Merge tag 'nbd-for-4.6' of git://git.pengutronix.de/git/mpa/linux-nbd into for-4...
authorJens Axboe <axboe@fb.com>
Thu, 3 Mar 2016 13:52:13 +0000 (06:52 -0700)
committerJens Axboe <axboe@fb.com>
Thu, 3 Mar 2016 13:52:13 +0000 (06:52 -0700)
NBD for 4.6

Markus writes:

This pull request contains 7 patches for 4.6.

Patch 1 fixes some unnecessarily complicated code I introduced some versions
ago for debugfs.

Patch 2 removes the criticised signal usage within NBD to kill the NBD threads
after a timeout. This code was used for the last years and is now replaced by
simply killing the tcp connection.

Patches 3-6 are some smaller cleanups.

Patch 7 uevents for the userspace. This way udev/systemd can react on connected
NBD devices.

277 files changed:
.mailmap
Documentation/Intel-IOMMU.txt
Documentation/virtual/kvm/api.txt
Makefile
arch/arm/boot/compressed/Makefile
arch/arm/include/uapi/asm/unistd.h
arch/arm/kernel/calls.S
arch/arm64/Makefile
arch/arm64/configs/defconfig
arch/arm64/include/asm/pgtable.h
arch/arm64/kernel/head.S
arch/arm64/kernel/image.h
arch/arm64/mm/dump.c
arch/arm64/mm/kasan_init.c
arch/arm64/mm/pageattr.c
arch/arm64/mm/proc-macros.S
arch/arm64/mm/proc.S
arch/mips/bcm63xx/nvram.c
arch/mips/include/asm/mach-bcm63xx/bcm63xx_nvram.h
arch/powerpc/include/asm/book3s/64/hash.h
arch/powerpc/include/asm/book3s/64/pgtable.h
arch/powerpc/include/asm/kvm_host.h
arch/powerpc/include/asm/systbl.h
arch/powerpc/include/asm/unistd.h
arch/powerpc/include/uapi/asm/unistd.h
arch/powerpc/kernel/eeh_pe.c
arch/powerpc/kernel/misc_64.S
arch/powerpc/kernel/module_64.c
arch/powerpc/kvm/book3s_64_mmu.c
arch/powerpc/kvm/book3s_hv.c
arch/powerpc/kvm/book3s_hv_rmhandlers.S
arch/powerpc/kvm/powerpc.c
arch/powerpc/mm/mem.c
arch/powerpc/perf/power8-pmu.c
arch/s390/include/asm/irqflags.h
arch/s390/include/asm/kvm_host.h
arch/s390/include/asm/pci_io.h
arch/s390/include/asm/processor.h
arch/s390/include/asm/ptrace.h
arch/s390/include/uapi/asm/unistd.h
arch/s390/kernel/compat_wrapper.c
arch/s390/kernel/crash_dump.c
arch/s390/kernel/debug.c
arch/s390/kernel/dumpstack.c
arch/s390/kernel/early.c
arch/s390/kernel/ftrace.c
arch/s390/kernel/ipl.c
arch/s390/kernel/kprobes.c
arch/s390/kernel/perf_event.c
arch/s390/kernel/process.c
arch/s390/kernel/ptrace.c
arch/s390/kernel/setup.c
arch/s390/kernel/signal.c
arch/s390/kernel/smp.c
arch/s390/kernel/stacktrace.c
arch/s390/kernel/syscalls.S
arch/s390/kernel/traps.c
arch/s390/kvm/Kconfig
arch/s390/kvm/Makefile
arch/s390/kvm/guestdbg.c
arch/s390/kvm/kvm-s390.c
arch/s390/mm/fault.c
arch/s390/mm/init.c
arch/s390/mm/mmap.c
arch/s390/mm/pgtable.c
arch/s390/numa/numa.c
arch/s390/oprofile/backtrace.c
arch/s390/pci/pci.c
arch/s390/pci/pci_event.c
arch/sh/include/asm/barrier.h
arch/x86/Kconfig
arch/x86/include/asm/irq.h
arch/x86/include/asm/pgtable_types.h
arch/x86/kernel/apic/io_apic.c
arch/x86/kernel/apic/vector.c
arch/x86/kernel/apic/x2apic_uv_x.c
arch/x86/kernel/cpu/perf_event_intel.c
arch/x86/kernel/cpu/perf_event_intel_uncore.c
arch/x86/kernel/cpu/perf_event_intel_uncore.h
arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c
arch/x86/kernel/head64.c
arch/x86/kernel/irq.c
arch/x86/mm/pageattr.c
arch/x86/platform/efi/quirks.c
arch/x86/platform/intel-mid/intel-mid.c
arch/x86/platform/intel-quark/imr.c
block/blk-merge.c
block/blk-mq-sysfs.c
block/blk-mq.c
block/blk-mq.h
block/cfq-iosched.c
drivers/acpi/video_detect.c
drivers/base/platform-msi.c
drivers/base/platform.c
drivers/base/power/domain.c
drivers/clocksource/Kconfig
drivers/clocksource/tcb_clksrc.c
drivers/cpufreq/cpufreq-dt.c
drivers/cpufreq/cpufreq.c
drivers/cpufreq/cpufreq_governor.c
drivers/cpufreq/pxa2xx-cpufreq.c
drivers/cpuidle/coupled.c
drivers/cpuidle/cpuidle.c
drivers/gpu/drm/amd/amdgpu/amdgpu.h
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c
drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
drivers/gpu/drm/amd/amdgpu/tonga_dpm.c
drivers/gpu/drm/amd/powerplay/amd_powerplay.c
drivers/gpu/drm/amd/powerplay/smumgr/cz_smumgr.c
drivers/gpu/drm/drm_atomic_helper.c
drivers/gpu/drm/etnaviv/common.xml.h
drivers/gpu/drm/etnaviv/etnaviv_drv.c
drivers/gpu/drm/etnaviv/etnaviv_drv.h
drivers/gpu/drm/etnaviv/etnaviv_dump.c
drivers/gpu/drm/etnaviv/etnaviv_gem.c
drivers/gpu/drm/etnaviv/etnaviv_gem.h
drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c
drivers/gpu/drm/etnaviv/etnaviv_gpu.c
drivers/gpu/drm/etnaviv/etnaviv_gpu.h
drivers/gpu/drm/etnaviv/state_hi.xml.h
drivers/gpu/drm/radeon/dce6_afmt.c
drivers/gpu/drm/radeon/evergreen_hdmi.c
drivers/gpu/drm/radeon/evergreend.h
drivers/gpu/drm/radeon/radeon.h
drivers/gpu/drm/radeon/radeon_atombios.c
drivers/gpu/drm/radeon/radeon_audio.c
drivers/gpu/drm/radeon/radeon_audio.h
drivers/gpu/drm/radeon/radeon_display.c
drivers/gpu/drm/radeon/radeon_gem.c
drivers/gpu/drm/radeon/vce_v1_0.c
drivers/gpu/drm/rockchip/Makefile
drivers/gpu/drm/rockchip/dw-mipi-dsi.c
drivers/gpu/drm/rockchip/rockchip_drm_drv.c
drivers/gpu/drm/rockchip/rockchip_drm_fb.c
drivers/gpu/drm/rockchip/rockchip_drm_fbdev.h
drivers/gpu/drm/rockchip/rockchip_drm_gem.c
drivers/gpu/drm/rockchip/rockchip_drm_vop.c
drivers/gpu/drm/vc4/vc4_v3d.c
drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
drivers/hwmon/dell-smm-hwmon.c
drivers/hwmon/fam15h_power.c
drivers/i2c/busses/i2c-designware-core.c
drivers/i2c/busses/i2c-piix4.c
drivers/iio/imu/adis_buffer.c
drivers/iommu/amd_iommu.c
drivers/iommu/intel-iommu.c
drivers/iommu/io-pgtable-arm.c
drivers/irqchip/Kconfig
drivers/irqchip/irq-atmel-aic-common.c
drivers/irqchip/irq-gic-v3-its.c
drivers/irqchip/irq-mxs.c
drivers/irqchip/irq-s3c24xx.c
drivers/nvme/host/Kconfig
drivers/nvme/host/Makefile
drivers/nvme/host/core.c
drivers/nvme/host/nvme.h
drivers/nvme/host/pci.c
drivers/of/irq.c
drivers/pci/hotplug/acpiphp_glue.c
drivers/s390/cio/chp.c
drivers/s390/cio/chp.h
drivers/s390/cio/chsc.c
drivers/s390/crypto/zcrypt_error.h
drivers/s390/crypto/zcrypt_msgtype50.c
drivers/s390/crypto/zcrypt_msgtype6.c
drivers/scsi/hisi_sas/Kconfig
drivers/scsi/sd.c
drivers/scsi/sr.c
drivers/staging/panel/panel.c
drivers/staging/speakup/main.c
drivers/staging/speakup/selection.c
drivers/staging/speakup/serialio.c
drivers/tty/n_tty.c
drivers/tty/serial/8250/8250_pci.c
drivers/tty/tty_io.c
drivers/tty/tty_mutex.c
drivers/tty/vt/vt.c
drivers/usb/class/cdc-acm.c
drivers/usb/class/cdc-acm.h
drivers/usb/core/hub.c
drivers/usb/serial/cp210x.c
drivers/usb/serial/ftdi_sio.c
drivers/usb/serial/ftdi_sio_ids.h
drivers/usb/serial/mxu11x0.c
drivers/usb/serial/option.c
drivers/usb/serial/visor.c
drivers/vfio/vfio.c
drivers/virtio/virtio_pci_common.c
drivers/xen/tmem.c
fs/btrfs/async-thread.c
fs/btrfs/disk-io.c
fs/btrfs/free-space-tree.c
fs/btrfs/inode.c
fs/btrfs/relocation.c
fs/btrfs/sysfs.c
fs/btrfs/sysfs.h
fs/btrfs/tests/btrfs-tests.c
fs/btrfs/tests/extent-io-tests.c
fs/btrfs/tests/inode-tests.c
fs/btrfs/tree-log.c
fs/timerfd.c
include/drm/drm_atomic_helper.h
include/linux/blk-mq.h
include/linux/cleancache.h
include/linux/ftrace.h
include/linux/hrtimer.h
include/linux/iommu.h
include/linux/irqdomain.h
include/linux/perf_event.h
include/linux/swiotlb.h
include/linux/tty.h
include/trace/events/fence.h
include/uapi/drm/etnaviv_drm.h
kernel/bpf/arraymap.c
kernel/events/core.c
kernel/events/hw_breakpoint.c
kernel/events/ring_buffer.c
kernel/futex.c
kernel/irq/handle.c
kernel/irq/irqdomain.c
kernel/locking/rtmutex.c
kernel/pid.c
kernel/power/Kconfig
kernel/sched/core.c
kernel/sched/fair.c
kernel/sched/idle.c
kernel/seccomp.c
kernel/time/hrtimer.c
kernel/time/itimer.c
kernel/time/ntp.c
kernel/time/posix-timers.c
kernel/time/tick-sched.c
kernel/time/timer_list.c
kernel/trace/bpf_trace.c
kernel/trace/trace.c
lib/debugobjects.c
mm/cleancache.c
scripts/mod/modpost.c
security/keys/key.c
sound/core/Kconfig
sound/core/compress_offload.c
sound/core/seq/oss/seq_oss_init.c
sound/core/seq/oss/seq_oss_synth.c
sound/drivers/dummy.c
sound/firewire/bebob/bebob_stream.c
sound/isa/Kconfig
sound/pci/Kconfig
sound/pci/hda/hda_intel.c
sound/pci/hda/patch_hdmi.c
sound/sparc/Kconfig
sound/usb/quirks.c
tools/perf/Makefile.perf
tools/perf/arch/x86/tests/intel-cqm.c
tools/perf/config/Makefile
tools/perf/tests/make
tools/perf/ui/browsers/annotate.c
tools/perf/util/hist.c
tools/perf/util/session.c
tools/perf/util/stat.c
tools/perf/util/symbol.c
tools/testing/selftests/timers/valid-adjtimex.c
tools/virtio/asm/barrier.h
tools/virtio/linux/compiler.h [new file with mode: 0644]
tools/virtio/linux/kernel.h
tools/virtio/ringtest/Makefile [new file with mode: 0644]
tools/virtio/ringtest/README [new file with mode: 0644]
tools/virtio/ringtest/main.c [new file with mode: 0644]
tools/virtio/ringtest/main.h [new file with mode: 0644]
tools/virtio/ringtest/ring.c [new file with mode: 0644]
tools/virtio/ringtest/run-on-all.sh [new file with mode: 0755]
tools/virtio/ringtest/virtio_ring_0_9.c [new file with mode: 0644]
tools/virtio/ringtest/virtio_ring_poll.c [new file with mode: 0644]

index b1e9a97..7e6c533 100644 (file)
--- a/.mailmap
+++ b/.mailmap
@@ -21,6 +21,7 @@ Andrey Ryabinin <ryabinin.a.a@gmail.com> <a.ryabinin@samsung.com>
 Andrew Morton <akpm@linux-foundation.org>
 Andrew Vasquez <andrew.vasquez@qlogic.com>
 Andy Adamson <andros@citi.umich.edu>
+Antonio Ospite <ao2@ao2.it> <ao2@amarulasolutions.com>
 Archit Taneja <archit@ti.com>
 Arnaud Patard <arnaud.patard@rtp-net.org>
 Arnd Bergmann <arnd@arndb.de>
index 7b57fc0..49585b6 100644 (file)
@@ -3,7 +3,7 @@ Linux IOMMU Support
 
 The architecture spec can be obtained from the below location.
 
-http://www.intel.com/technology/virtualization/
+http://www.intel.com/content/dam/www/public/us/en/documents/product-specifications/vt-directed-io-spec.pdf
 
 This guide gives a quick cheat sheet for some basic understanding.
 
index 053f613..07e4cdf 100644 (file)
@@ -3025,7 +3025,7 @@ len must be a multiple of sizeof(struct kvm_s390_irq). It must be > 0
 and it must not exceed (max_vcpus + 32) * sizeof(struct kvm_s390_irq),
 which is the maximum number of possibly pending cpu-local interrupts.
 
-4.90 KVM_SMI
+4.96 KVM_SMI
 
 Capability: KVM_CAP_X86_SMM
 Architectures: x86
index c65fe37..6c1a3c2 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 4
 PATCHLEVEL = 5
 SUBLEVEL = 0
-EXTRAVERSION = -rc1
+EXTRAVERSION = -rc2
 NAME = Blurry Fish Butt
 
 # *DOCUMENTATION*
index 4c23a68..7a6a58e 100644 (file)
@@ -106,6 +106,15 @@ ORIG_CFLAGS := $(KBUILD_CFLAGS)
 KBUILD_CFLAGS = $(subst -pg, , $(ORIG_CFLAGS))
 endif
 
+# -fstack-protector-strong triggers protection checks in this code,
+# but it is being used too early to link to meaningful stack_chk logic.
+nossp_flags := $(call cc-option, -fno-stack-protector)
+CFLAGS_atags_to_fdt.o := $(nossp_flags)
+CFLAGS_fdt.o := $(nossp_flags)
+CFLAGS_fdt_ro.o := $(nossp_flags)
+CFLAGS_fdt_rw.o := $(nossp_flags)
+CFLAGS_fdt_wip.o := $(nossp_flags)
+
 ccflags-y := -fpic -mno-single-pic-base -fno-builtin -I$(obj)
 asflags-y := -DZIMAGE
 
index ede692f..5dd2528 100644 (file)
 #define __NR_userfaultfd               (__NR_SYSCALL_BASE+388)
 #define __NR_membarrier                        (__NR_SYSCALL_BASE+389)
 #define __NR_mlock2                    (__NR_SYSCALL_BASE+390)
+#define __NR_copy_file_range           (__NR_SYSCALL_BASE+391)
 
 /*
  * The following SWIs are ARM private.
index ac368bb..dfc7cd6 100644 (file)
                CALL(sys_userfaultfd)
                CALL(sys_membarrier)
                CALL(sys_mlock2)
+               CALL(sys_copy_file_range)
 #ifndef syscalls_counted
 .equ syscalls_padding, ((NR_syscalls + 3) & ~3) - NR_syscalls
 #define syscalls_counted
index cd822d8..307237c 100644 (file)
@@ -27,6 +27,8 @@ $(warning LSE atomics not supported by binutils)
 endif
 
 KBUILD_CFLAGS  += -mgeneral-regs-only $(lseinstr)
+KBUILD_CFLAGS  += -fno-asynchronous-unwind-tables
+KBUILD_CFLAGS  += $(call cc-option, -mpc-relative-literal-loads)
 KBUILD_AFLAGS  += $(lseinstr)
 
 ifeq ($(CONFIG_CPU_BIG_ENDIAN), y)
index 18ca9fb..86581f7 100644 (file)
@@ -16,7 +16,6 @@ CONFIG_IKCONFIG_PROC=y
 CONFIG_LOG_BUF_SHIFT=14
 CONFIG_MEMCG=y
 CONFIG_MEMCG_SWAP=y
-CONFIG_MEMCG_KMEM=y
 CONFIG_CGROUP_HUGETLB=y
 # CONFIG_UTS_NS is not set
 # CONFIG_IPC_NS is not set
@@ -37,15 +36,13 @@ CONFIG_ARCH_EXYNOS7=y
 CONFIG_ARCH_LAYERSCAPE=y
 CONFIG_ARCH_HISI=y
 CONFIG_ARCH_MEDIATEK=y
+CONFIG_ARCH_QCOM=y
 CONFIG_ARCH_ROCKCHIP=y
 CONFIG_ARCH_SEATTLE=y
 CONFIG_ARCH_RENESAS=y
 CONFIG_ARCH_R8A7795=y
 CONFIG_ARCH_STRATIX10=y
 CONFIG_ARCH_TEGRA=y
-CONFIG_ARCH_TEGRA_132_SOC=y
-CONFIG_ARCH_TEGRA_210_SOC=y
-CONFIG_ARCH_QCOM=y
 CONFIG_ARCH_SPRD=y
 CONFIG_ARCH_THUNDER=y
 CONFIG_ARCH_UNIPHIER=y
@@ -54,14 +51,19 @@ CONFIG_ARCH_XGENE=y
 CONFIG_ARCH_ZYNQMP=y
 CONFIG_PCI=y
 CONFIG_PCI_MSI=y
+CONFIG_PCI_IOV=y
+CONFIG_PCI_RCAR_GEN2_PCIE=y
 CONFIG_PCI_HOST_GENERIC=y
 CONFIG_PCI_XGENE=y
-CONFIG_SMP=y
+CONFIG_PCI_LAYERSCAPE=y
+CONFIG_PCI_HISI=y
+CONFIG_PCIE_QCOM=y
 CONFIG_SCHED_MC=y
 CONFIG_PREEMPT=y
 CONFIG_KSM=y
 CONFIG_TRANSPARENT_HUGEPAGE=y
 CONFIG_CMA=y
+CONFIG_XEN=y
 CONFIG_CMDLINE="console=ttyAMA0"
 # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
 CONFIG_COMPAT=y
@@ -100,7 +102,11 @@ CONFIG_PATA_OF_PLATFORM=y
 CONFIG_NETDEVICES=y
 CONFIG_TUN=y
 CONFIG_VIRTIO_NET=y
+CONFIG_AMD_XGBE=y
 CONFIG_NET_XGENE=y
+CONFIG_E1000E=y
+CONFIG_IGB=y
+CONFIG_IGBVF=y
 CONFIG_SKY2=y
 CONFIG_RAVB=y
 CONFIG_SMC91X=y
@@ -117,25 +123,23 @@ CONFIG_SERIAL_8250_CONSOLE=y
 CONFIG_SERIAL_8250_DW=y
 CONFIG_SERIAL_8250_MT6577=y
 CONFIG_SERIAL_8250_UNIPHIER=y
+CONFIG_SERIAL_OF_PLATFORM=y
 CONFIG_SERIAL_AMBA_PL011=y
 CONFIG_SERIAL_AMBA_PL011_CONSOLE=y
 CONFIG_SERIAL_SAMSUNG=y
-CONFIG_SERIAL_SAMSUNG_UARTS_4=y
-CONFIG_SERIAL_SAMSUNG_UARTS=4
 CONFIG_SERIAL_SAMSUNG_CONSOLE=y
+CONFIG_SERIAL_TEGRA=y
 CONFIG_SERIAL_SH_SCI=y
 CONFIG_SERIAL_SH_SCI_NR_UARTS=11
 CONFIG_SERIAL_SH_SCI_CONSOLE=y
-CONFIG_SERIAL_TEGRA=y
 CONFIG_SERIAL_MSM=y
 CONFIG_SERIAL_MSM_CONSOLE=y
-CONFIG_SERIAL_OF_PLATFORM=y
 CONFIG_SERIAL_XILINX_PS_UART=y
 CONFIG_SERIAL_XILINX_PS_UART_CONSOLE=y
 CONFIG_VIRTIO_CONSOLE=y
 # CONFIG_HW_RANDOM is not set
-CONFIG_I2C=y
 CONFIG_I2C_QUP=y
+CONFIG_I2C_UNIPHIER_F=y
 CONFIG_I2C_RCAR=y
 CONFIG_SPI=y
 CONFIG_SPI_PL022=y
@@ -176,8 +180,6 @@ CONFIG_MMC_SDHCI_PLTFM=y
 CONFIG_MMC_SDHCI_TEGRA=y
 CONFIG_MMC_SPI=y
 CONFIG_MMC_DW=y
-CONFIG_MMC_DW_IDMAC=y
-CONFIG_MMC_DW_PLTFM=y
 CONFIG_MMC_DW_EXYNOS=y
 CONFIG_NEW_LEDS=y
 CONFIG_LEDS_CLASS=y
@@ -187,28 +189,33 @@ CONFIG_LEDS_TRIGGER_HEARTBEAT=y
 CONFIG_LEDS_TRIGGER_CPU=y
 CONFIG_RTC_CLASS=y
 CONFIG_RTC_DRV_EFI=y
+CONFIG_RTC_DRV_PL031=y
 CONFIG_RTC_DRV_XGENE=y
 CONFIG_DMADEVICES=y
-CONFIG_RCAR_DMAC=y
 CONFIG_QCOM_BAM_DMA=y
 CONFIG_TEGRA20_APB_DMA=y
+CONFIG_RCAR_DMAC=y
+CONFIG_VFIO=y
+CONFIG_VFIO_PCI=y
 CONFIG_VIRTIO_PCI=y
 CONFIG_VIRTIO_BALLOON=y
 CONFIG_VIRTIO_MMIO=y
+CONFIG_XEN_GNTDEV=y
+CONFIG_XEN_GRANT_DEV_ALLOC=y
 CONFIG_COMMON_CLK_CS2000_CP=y
 CONFIG_COMMON_CLK_QCOM=y
 CONFIG_MSM_GCC_8916=y
 CONFIG_HWSPINLOCK_QCOM=y
-# CONFIG_IOMMU_SUPPORT is not set
+CONFIG_ARM_SMMU=y
 CONFIG_QCOM_SMEM=y
 CONFIG_QCOM_SMD=y
 CONFIG_QCOM_SMD_RPM=y
+CONFIG_ARCH_TEGRA_132_SOC=y
+CONFIG_ARCH_TEGRA_210_SOC=y
+CONFIG_HISILICON_IRQ_MBIGEN=y
 CONFIG_PHY_XGENE=y
 CONFIG_EXT2_FS=y
 CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-# CONFIG_EXT3_FS_XATTR is not set
-CONFIG_EXT4_FS=y
 CONFIG_FANOTIFY=y
 CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y
 CONFIG_QUOTA=y
@@ -239,6 +246,7 @@ CONFIG_LOCKUP_DETECTOR=y
 # CONFIG_FTRACE is not set
 CONFIG_MEMTEST=y
 CONFIG_SECURITY=y
+CONFIG_CRYPTO_ECHAINIV=y
 CONFIG_CRYPTO_ANSI_CPRNG=y
 CONFIG_ARM64_CRYPTO=y
 CONFIG_CRYPTO_SHA1_ARM64_CE=y
index 2d545d7..bf464de 100644 (file)
@@ -67,11 +67,11 @@ extern void __pgd_error(const char *file, int line, unsigned long val);
 #define PROT_DEFAULT           (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED)
 #define PROT_SECT_DEFAULT      (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S)
 
-#define PROT_DEVICE_nGnRnE     (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_DEVICE_nGnRnE))
-#define PROT_DEVICE_nGnRE      (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_DEVICE_nGnRE))
-#define PROT_NORMAL_NC         (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_NORMAL_NC))
-#define PROT_NORMAL_WT         (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_NORMAL_WT))
-#define PROT_NORMAL            (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_NORMAL))
+#define PROT_DEVICE_nGnRnE     (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRnE))
+#define PROT_DEVICE_nGnRE      (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRE))
+#define PROT_NORMAL_NC         (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_NC))
+#define PROT_NORMAL_WT         (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_WT))
+#define PROT_NORMAL            (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL))
 
 #define PROT_SECT_DEVICE_nGnRE (PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_DEVICE_nGnRE))
 #define PROT_SECT_NORMAL       (PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_NORMAL))
@@ -81,7 +81,7 @@ extern void __pgd_error(const char *file, int line, unsigned long val);
 
 #define PAGE_KERNEL            __pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE)
 #define PAGE_KERNEL_RO         __pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_RDONLY)
-#define PAGE_KERNEL_ROX        __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_RDONLY)
+#define PAGE_KERNEL_ROX                __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_RDONLY)
 #define PAGE_KERNEL_EXEC       __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE)
 #define PAGE_KERNEL_EXEC_CONT  __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_CONT)
 
@@ -153,6 +153,7 @@ extern struct page *empty_zero_page;
 #define pte_write(pte)         (!!(pte_val(pte) & PTE_WRITE))
 #define pte_exec(pte)          (!(pte_val(pte) & PTE_UXN))
 #define pte_cont(pte)          (!!(pte_val(pte) & PTE_CONT))
+#define pte_user(pte)          (!!(pte_val(pte) & PTE_USER))
 
 #ifdef CONFIG_ARM64_HW_AFDBM
 #define pte_hw_dirty(pte)      (pte_write(pte) && !(pte_val(pte) & PTE_RDONLY))
@@ -163,8 +164,6 @@ extern struct page *empty_zero_page;
 #define pte_dirty(pte)         (pte_sw_dirty(pte) || pte_hw_dirty(pte))
 
 #define pte_valid(pte)         (!!(pte_val(pte) & PTE_VALID))
-#define pte_valid_user(pte) \
-       ((pte_val(pte) & (PTE_VALID | PTE_USER)) == (PTE_VALID | PTE_USER))
 #define pte_valid_not_user(pte) \
        ((pte_val(pte) & (PTE_VALID | PTE_USER)) == PTE_VALID)
 #define pte_valid_young(pte) \
@@ -278,13 +277,13 @@ extern void __sync_icache_dcache(pte_t pteval, unsigned long addr);
 static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
                              pte_t *ptep, pte_t pte)
 {
-       if (pte_valid_user(pte)) {
-               if (!pte_special(pte) && pte_exec(pte))
-                       __sync_icache_dcache(pte, addr);
+       if (pte_valid(pte)) {
                if (pte_sw_dirty(pte) && pte_write(pte))
                        pte_val(pte) &= ~PTE_RDONLY;
                else
                        pte_val(pte) |= PTE_RDONLY;
+               if (pte_user(pte) && pte_exec(pte) && !pte_special(pte))
+                       __sync_icache_dcache(pte, addr);
        }
 
        /*
index ffe9c2b..917d981 100644 (file)
@@ -514,9 +514,14 @@ CPU_LE(    movk    x0, #0x30d0, lsl #16    )       // Clear EE and E0E on LE systems
 #endif
 
        /* EL2 debug */
+       mrs     x0, id_aa64dfr0_el1             // Check ID_AA64DFR0_EL1 PMUVer
+       sbfx    x0, x0, #8, #4
+       cmp     x0, #1
+       b.lt    4f                              // Skip if no PMU present
        mrs     x0, pmcr_el0                    // Disable debug access traps
        ubfx    x0, x0, #11, #5                 // to EL2 and allow access to
        msr     mdcr_el2, x0                    // all PMU counters from EL1
+4:
 
        /* Stage-2 translation */
        msr     vttbr_el2, xzr
index bc2abb8..999633b 100644 (file)
 
 #ifdef CONFIG_EFI
 
+/*
+ * Prevent the symbol aliases below from being emitted into the kallsyms
+ * table, by forcing them to be absolute symbols (which are conveniently
+ * ignored by scripts/kallsyms) rather than section relative symbols.
+ * The distinction is only relevant for partial linking, and only for symbols
+ * that are defined within a section declaration (which is not the case for
+ * the definitions below) so the resulting values will be identical.
+ */
+#define KALLSYMS_HIDE(sym)     ABSOLUTE(sym)
+
 /*
  * The EFI stub has its own symbol namespace prefixed by __efistub_, to
  * isolate it from the kernel proper. The following symbols are legally
  * linked at. The routines below are all implemented in assembler in a
  * position independent manner
  */
-__efistub_memcmp               = __pi_memcmp;
-__efistub_memchr               = __pi_memchr;
-__efistub_memcpy               = __pi_memcpy;
-__efistub_memmove              = __pi_memmove;
-__efistub_memset               = __pi_memset;
-__efistub_strlen               = __pi_strlen;
-__efistub_strcmp               = __pi_strcmp;
-__efistub_strncmp              = __pi_strncmp;
-__efistub___flush_dcache_area  = __pi___flush_dcache_area;
+__efistub_memcmp               = KALLSYMS_HIDE(__pi_memcmp);
+__efistub_memchr               = KALLSYMS_HIDE(__pi_memchr);
+__efistub_memcpy               = KALLSYMS_HIDE(__pi_memcpy);
+__efistub_memmove              = KALLSYMS_HIDE(__pi_memmove);
+__efistub_memset               = KALLSYMS_HIDE(__pi_memset);
+__efistub_strlen               = KALLSYMS_HIDE(__pi_strlen);
+__efistub_strcmp               = KALLSYMS_HIDE(__pi_strcmp);
+__efistub_strncmp              = KALLSYMS_HIDE(__pi_strncmp);
+__efistub___flush_dcache_area  = KALLSYMS_HIDE(__pi___flush_dcache_area);
 
 #ifdef CONFIG_KASAN
-__efistub___memcpy             = __pi_memcpy;
-__efistub___memmove            = __pi_memmove;
-__efistub___memset             = __pi_memset;
+__efistub___memcpy             = KALLSYMS_HIDE(__pi_memcpy);
+__efistub___memmove            = KALLSYMS_HIDE(__pi_memmove);
+__efistub___memset             = KALLSYMS_HIDE(__pi_memset);
 #endif
 
-__efistub__text                        = _text;
-__efistub__end                 = _end;
-__efistub__edata               = _edata;
+__efistub__text                        = KALLSYMS_HIDE(_text);
+__efistub__end                 = KALLSYMS_HIDE(_end);
+__efistub__edata               = KALLSYMS_HIDE(_edata);
 
 #endif
 
index 5a22a11..0adbebb 100644 (file)
@@ -46,7 +46,7 @@ enum address_markers_idx {
        PCI_START_NR,
        PCI_END_NR,
        MODULES_START_NR,
-       MODUELS_END_NR,
+       MODULES_END_NR,
        KERNEL_SPACE_NR,
 };
 
index cf038c7..cab7a5b 100644 (file)
@@ -120,6 +120,7 @@ static void __init cpu_set_ttbr1(unsigned long ttbr1)
 void __init kasan_init(void)
 {
        struct memblock_region *reg;
+       int i;
 
        /*
         * We are going to perform proper setup of shadow memory.
@@ -155,6 +156,14 @@ void __init kasan_init(void)
                                pfn_to_nid(virt_to_pfn(start)));
        }
 
+       /*
+        * KAsan may reuse the contents of kasan_zero_pte directly, so we
+        * should make sure that it maps the zero page read-only.
+        */
+       for (i = 0; i < PTRS_PER_PTE; i++)
+               set_pte(&kasan_zero_pte[i],
+                       pfn_pte(virt_to_pfn(kasan_zero_page), PAGE_KERNEL_RO));
+
        memset(kasan_zero_page, 0, PAGE_SIZE);
        cpu_set_ttbr1(__pa(swapper_pg_dir));
        flush_tlb_all();
index 3571c73..cf62407 100644 (file)
@@ -57,6 +57,9 @@ static int change_memory_common(unsigned long addr, int numpages,
        if (end < MODULES_VADDR || end >= MODULES_END)
                return -EINVAL;
 
+       if (!numpages)
+               return 0;
+
        data.set_mask = set_mask;
        data.clear_mask = clear_mask;
 
index 146bd99..e6a30e1 100644 (file)
        b.lo    9998b
        dsb     \domain
        .endm
+
+/*
+ * reset_pmuserenr_el0 - reset PMUSERENR_EL0 if PMUv3 present
+ */
+       .macro  reset_pmuserenr_el0, tmpreg
+       mrs     \tmpreg, id_aa64dfr0_el1        // Check ID_AA64DFR0_EL1 PMUVer
+       sbfx    \tmpreg, \tmpreg, #8, #4
+       cmp     \tmpreg, #1                     // Skip if no PMU present
+       b.lt    9000f
+       msr     pmuserenr_el0, xzr              // Disable PMU access from EL0
+9000:
+       .endm
index a3d867e..c164d2c 100644 (file)
@@ -117,7 +117,7 @@ ENTRY(cpu_do_resume)
         */
        ubfx    x11, x11, #1, #1
        msr     oslar_el1, x11
-       msr     pmuserenr_el0, xzr              // Disable PMU access from EL0
+       reset_pmuserenr_el0 x0                  // Disable PMU access from EL0
        mov     x0, x12
        dsb     nsh             // Make sure local tlb invalidation completed
        isb
@@ -154,7 +154,7 @@ ENTRY(__cpu_setup)
        msr     cpacr_el1, x0                   // Enable FP/ASIMD
        mov     x0, #1 << 12                    // Reset mdscr_el1 and disable
        msr     mdscr_el1, x0                   // access to the DCC from EL0
-       msr     pmuserenr_el0, xzr              // Disable PMU access from EL0
+       reset_pmuserenr_el0 x0                  // Disable PMU access from EL0
        /*
         * Memory region attributes for LPAE:
         *
index 5f2bc1e..05757ae 100644 (file)
@@ -19,6 +19,8 @@
 
 #include <bcm63xx_nvram.h>
 
+#define BCM63XX_DEFAULT_PSI_SIZE       64
+
 static struct bcm963xx_nvram nvram;
 static int mac_addr_used;
 
@@ -85,3 +87,12 @@ int bcm63xx_nvram_get_mac_address(u8 *mac)
        return 0;
 }
 EXPORT_SYMBOL(bcm63xx_nvram_get_mac_address);
+
+int bcm63xx_nvram_get_psi_size(void)
+{
+       if (nvram.psi_size > 0)
+               return nvram.psi_size;
+
+       return BCM63XX_DEFAULT_PSI_SIZE;
+}
+EXPORT_SYMBOL(bcm63xx_nvram_get_psi_size);
index 4e0b6bc..348df49 100644 (file)
@@ -30,4 +30,6 @@ u8 *bcm63xx_nvram_get_name(void);
  */
 int bcm63xx_nvram_get_mac_address(u8 *mac);
 
+int bcm63xx_nvram_get_psi_size(void);
+
 #endif /* BCM63XX_NVRAM_H */
index 06f17e7..8d1c816 100644 (file)
@@ -50,7 +50,9 @@
  * set of bits not changed in pmd_modify.
  */
 #define _HPAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_HPTEFLAGS | _PAGE_DIRTY | \
-                        _PAGE_ACCESSED | _PAGE_THP_HUGE)
+                        _PAGE_ACCESSED | _PAGE_THP_HUGE | _PAGE_PTE | \
+                        _PAGE_SOFT_DIRTY)
+
 
 #ifdef CONFIG_PPC_64K_PAGES
 #include <asm/book3s/64/hash-64k.h>
index 8204b0c..8d1c41d 100644 (file)
@@ -223,7 +223,6 @@ static inline pte_t *pmdp_ptep(pmd_t *pmd)
 #define pmd_pfn(pmd)           pte_pfn(pmd_pte(pmd))
 #define pmd_dirty(pmd)         pte_dirty(pmd_pte(pmd))
 #define pmd_young(pmd)         pte_young(pmd_pte(pmd))
-#define pmd_dirty(pmd)         pte_dirty(pmd_pte(pmd))
 #define pmd_mkold(pmd)         pte_pmd(pte_mkold(pmd_pte(pmd)))
 #define pmd_wrprotect(pmd)     pte_pmd(pte_wrprotect(pmd_pte(pmd)))
 #define pmd_mkdirty(pmd)       pte_pmd(pte_mkdirty(pmd_pte(pmd)))
index 271fefb..9d08d8c 100644 (file)
@@ -38,8 +38,7 @@
 
 #define KVM_MAX_VCPUS          NR_CPUS
 #define KVM_MAX_VCORES         NR_CPUS
-#define KVM_USER_MEM_SLOTS 32
-#define KVM_MEM_SLOTS_NUM KVM_USER_MEM_SLOTS
+#define KVM_USER_MEM_SLOTS     512
 
 #ifdef CONFIG_KVM_MMIO
 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1
index 5654ece..3fa9df7 100644 (file)
@@ -383,3 +383,4 @@ SYSCALL(ni_syscall)
 SYSCALL(ni_syscall)
 SYSCALL(ni_syscall)
 SYSCALL(mlock2)
+SYSCALL(copy_file_range)
index 6a5ace5..1f2594d 100644 (file)
@@ -12,7 +12,7 @@
 #include <uapi/asm/unistd.h>
 
 
-#define NR_syscalls            379
+#define NR_syscalls            380
 
 #define __NR__exit __NR_exit
 
index 12a0565..940290d 100644 (file)
 #define __NR_userfaultfd       364
 #define __NR_membarrier                365
 #define __NR_mlock2            378
+#define __NR_copy_file_range   379
 
 #endif /* _UAPI_ASM_POWERPC_UNISTD_H_ */
index 8654cb1..ca9e537 100644 (file)
@@ -883,32 +883,29 @@ void eeh_pe_restore_bars(struct eeh_pe *pe)
 const char *eeh_pe_loc_get(struct eeh_pe *pe)
 {
        struct pci_bus *bus = eeh_pe_bus_get(pe);
-       struct device_node *dn = pci_bus_to_OF_node(bus);
+       struct device_node *dn;
        const char *loc = NULL;
 
-       if (!dn)
-               goto out;
+       while (bus) {
+               dn = pci_bus_to_OF_node(bus);
+               if (!dn) {
+                       bus = bus->parent;
+                       continue;
+               }
 
-       /* PHB PE or root PE ? */
-       if (pci_is_root_bus(bus)) {
-               loc = of_get_property(dn, "ibm,loc-code", NULL);
-               if (!loc)
+               if (pci_is_root_bus(bus))
                        loc = of_get_property(dn, "ibm,io-base-loc-code", NULL);
+               else
+                       loc = of_get_property(dn, "ibm,slot-location-code",
+                                             NULL);
+
                if (loc)
-                       goto out;
+                       return loc;
 
-               /* Check the root port */
-               dn = dn->child;
-               if (!dn)
-                       goto out;
+               bus = bus->parent;
        }
 
-       loc = of_get_property(dn, "ibm,loc-code", NULL);
-       if (!loc)
-               loc = of_get_property(dn, "ibm,slot-location-code", NULL);
-
-out:
-       return loc ? loc : "N/A";
+       return "N/A";
 }
 
 /**
index db475d4..f28754c 100644 (file)
@@ -701,31 +701,3 @@ _GLOBAL(kexec_sequence)
        li      r5,0
        blr     /* image->start(physid, image->start, 0); */
 #endif /* CONFIG_KEXEC */
-
-#ifdef CONFIG_MODULES
-#if defined(_CALL_ELF) && _CALL_ELF == 2
-
-#ifdef CONFIG_MODVERSIONS
-.weak __crc_TOC.
-.section "___kcrctab+TOC.","a"
-.globl __kcrctab_TOC.
-__kcrctab_TOC.:
-       .llong  __crc_TOC.
-#endif
-
-/*
- * Export a fake .TOC. since both modpost and depmod will complain otherwise.
- * Both modpost and depmod strip the leading . so we do the same here.
- */
-.section "__ksymtab_strings","a"
-__kstrtab_TOC.:
-       .asciz "TOC."
-
-.section "___ksymtab+TOC.","a"
-/* This symbol name is important: it's used by modpost to find exported syms */
-.globl __ksymtab_TOC.
-__ksymtab_TOC.:
-       .llong 0 /* .value */
-       .llong __kstrtab_TOC.
-#endif /* ELFv2 */
-#endif /* MODULES */
index 59663af..ac64ffd 100644 (file)
@@ -326,7 +326,10 @@ static void dedotify_versions(struct modversion_info *vers,
                }
 }
 
-/* Undefined symbols which refer to .funcname, hack to funcname (or .TOC.) */
+/*
+ * Undefined symbols which refer to .funcname, hack to funcname. Make .TOC.
+ * seem to be defined (value set later).
+ */
 static void dedotify(Elf64_Sym *syms, unsigned int numsyms, char *strtab)
 {
        unsigned int i;
@@ -334,8 +337,11 @@ static void dedotify(Elf64_Sym *syms, unsigned int numsyms, char *strtab)
        for (i = 1; i < numsyms; i++) {
                if (syms[i].st_shndx == SHN_UNDEF) {
                        char *name = strtab + syms[i].st_name;
-                       if (name[0] == '.')
+                       if (name[0] == '.') {
+                               if (strcmp(name+1, "TOC.") == 0)
+                                       syms[i].st_shndx = SHN_ABS;
                                memmove(name, name+1, strlen(name));
+                       }
                }
        }
 }
@@ -351,7 +357,7 @@ static Elf64_Sym *find_dot_toc(Elf64_Shdr *sechdrs,
        numsyms = sechdrs[symindex].sh_size / sizeof(Elf64_Sym);
 
        for (i = 1; i < numsyms; i++) {
-               if (syms[i].st_shndx == SHN_UNDEF
+               if (syms[i].st_shndx == SHN_ABS
                    && strcmp(strtab + syms[i].st_name, "TOC.") == 0)
                        return &syms[i];
        }
index 774a253..9bf7031 100644 (file)
@@ -377,15 +377,12 @@ no_seg_found:
 
 static void kvmppc_mmu_book3s_64_slbmte(struct kvm_vcpu *vcpu, u64 rs, u64 rb)
 {
-       struct kvmppc_vcpu_book3s *vcpu_book3s;
        u64 esid, esid_1t;
        int slb_nr;
        struct kvmppc_slb *slbe;
 
        dprintk("KVM MMU: slbmte(0x%llx, 0x%llx)\n", rs, rb);
 
-       vcpu_book3s = to_book3s(vcpu);
-
        esid = GET_ESID(rb);
        esid_1t = GET_ESID_1T(rb);
        slb_nr = rb & 0xfff;
index cff207b..baeddb0 100644 (file)
@@ -833,6 +833,24 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
 
        vcpu->stat.sum_exits++;
 
+       /*
+        * This can happen if an interrupt occurs in the last stages
+        * of guest entry or the first stages of guest exit (i.e. after
+        * setting paca->kvm_hstate.in_guest to KVM_GUEST_MODE_GUEST_HV
+        * and before setting it to KVM_GUEST_MODE_HOST_HV).
+        * That can happen due to a bug, or due to a machine check
+        * occurring at just the wrong time.
+        */
+       if (vcpu->arch.shregs.msr & MSR_HV) {
+               printk(KERN_EMERG "KVM trap in HV mode!\n");
+               printk(KERN_EMERG "trap=0x%x | pc=0x%lx | msr=0x%llx\n",
+                       vcpu->arch.trap, kvmppc_get_pc(vcpu),
+                       vcpu->arch.shregs.msr);
+               kvmppc_dump_regs(vcpu);
+               run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+               run->hw.hardware_exit_reason = vcpu->arch.trap;
+               return RESUME_HOST;
+       }
        run->exit_reason = KVM_EXIT_UNKNOWN;
        run->ready_for_interrupt_injection = 1;
        switch (vcpu->arch.trap) {
index 3c6badc..6ee26de 100644 (file)
@@ -2153,7 +2153,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 
        /* Emulate H_SET_DABR/X on P8 for the sake of compat mode guests */
 2:     rlwimi  r5, r4, 5, DAWRX_DR | DAWRX_DW
-       rlwimi  r5, r4, 1, DAWRX_WT
+       rlwimi  r5, r4, 2, DAWRX_WT
        clrrdi  r4, r4, 3
        std     r4, VCPU_DAWR(r3)
        std     r5, VCPU_DAWRX(r3)
@@ -2404,6 +2404,8 @@ machine_check_realmode:
         * guest as machine check causing guest to crash.
         */
        ld      r11, VCPU_MSR(r9)
+       rldicl. r0, r11, 64-MSR_HV_LG, 63 /* check if it happened in HV mode */
+       bne     mc_cont                 /* if so, exit to host */
        andi.   r10, r11, MSR_RI        /* check for unrecoverable exception */
        beq     1f                      /* Deliver a machine check to guest */
        ld      r10, VCPU_PC(r9)
index 6fd2405..a3b182d 100644 (file)
@@ -919,21 +919,17 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
                                r = -ENXIO;
                                break;
                        }
-                       vcpu->arch.vr.vr[reg->id - KVM_REG_PPC_VR0] = val.vval;
+                       val.vval = vcpu->arch.vr.vr[reg->id - KVM_REG_PPC_VR0];
                        break;
                case KVM_REG_PPC_VSCR:
                        if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
                                r = -ENXIO;
                                break;
                        }
-                       vcpu->arch.vr.vscr.u[3] = set_reg_val(reg->id, val);
+                       val = get_reg_val(reg->id, vcpu->arch.vr.vscr.u[3]);
                        break;
                case KVM_REG_PPC_VRSAVE:
-                       if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
-                               r = -ENXIO;
-                               break;
-                       }
-                       vcpu->arch.vrsave = set_reg_val(reg->id, val);
+                       val = get_reg_val(reg->id, vcpu->arch.vrsave);
                        break;
 #endif /* CONFIG_ALTIVEC */
                default:
@@ -974,17 +970,21 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
                                r = -ENXIO;
                                break;
                        }
-                       val.vval = vcpu->arch.vr.vr[reg->id - KVM_REG_PPC_VR0];
+                       vcpu->arch.vr.vr[reg->id - KVM_REG_PPC_VR0] = val.vval;
                        break;
                case KVM_REG_PPC_VSCR:
                        if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
                                r = -ENXIO;
                                break;
                        }
-                       val = get_reg_val(reg->id, vcpu->arch.vr.vscr.u[3]);
+                       vcpu->arch.vr.vscr.u[3] = set_reg_val(reg->id, val);
                        break;
                case KVM_REG_PPC_VRSAVE:
-                       val = get_reg_val(reg->id, vcpu->arch.vrsave);
+                       if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
+                               r = -ENXIO;
+                               break;
+                       }
+                       vcpu->arch.vrsave = set_reg_val(reg->id, val);
                        break;
 #endif /* CONFIG_ALTIVEC */
                default:
index 22d94c3..d0f0a51 100644 (file)
@@ -560,12 +560,12 @@ subsys_initcall(add_system_ram_resources);
  */
 int devmem_is_allowed(unsigned long pfn)
 {
+       if (page_is_rtas_user_buf(pfn))
+               return 1;
        if (iomem_is_exclusive(PFN_PHYS(pfn)))
                return 0;
        if (!page_is_ram(pfn))
                return 1;
-       if (page_is_rtas_user_buf(pfn))
-               return 1;
        return 0;
 }
 #endif /* CONFIG_STRICT_DEVMEM */
index 7d5e295..9958ba8 100644 (file)
@@ -816,7 +816,7 @@ static struct power_pmu power8_pmu = {
        .get_constraint         = power8_get_constraint,
        .get_alternatives       = power8_get_alternatives,
        .disable_pmc            = power8_disable_pmc,
-       .flags                  = PPMU_HAS_SSLOT | PPMU_HAS_SIER | PPMU_ARCH_207S,
+       .flags                  = PPMU_HAS_SIER | PPMU_ARCH_207S,
        .n_generic              = ARRAY_SIZE(power8_generic_events),
        .generic_events         = power8_generic_events,
        .cache_events           = &power8_cache_events,
index 16aa0c7..595a275 100644 (file)
@@ -8,6 +8,8 @@
 
 #include <linux/types.h>
 
+#define ARCH_IRQ_ENABLED       (3UL << (BITS_PER_LONG - 8))
+
 /* store then OR system mask. */
 #define __arch_local_irq_stosm(__or)                                   \
 ({                                                                     \
@@ -54,14 +56,17 @@ static inline notrace void arch_local_irq_enable(void)
        __arch_local_irq_stosm(0x03);
 }
 
+/* This only restores external and I/O interrupt state */
 static inline notrace void arch_local_irq_restore(unsigned long flags)
 {
-       __arch_local_irq_ssm(flags);
+       /* only disabled->disabled and disabled->enabled is valid */
+       if (flags & ARCH_IRQ_ENABLED)
+               arch_local_irq_enable();
 }
 
 static inline notrace bool arch_irqs_disabled_flags(unsigned long flags)
 {
-       return !(flags & (3UL << (BITS_PER_LONG - 8)));
+       return !(flags & ARCH_IRQ_ENABLED);
 }
 
 static inline notrace bool arch_irqs_disabled(void)
index 6742414..8959ebb 100644 (file)
@@ -546,7 +546,6 @@ struct kvm_vcpu_arch {
        struct kvm_s390_sie_block *sie_block;
        unsigned int      host_acrs[NUM_ACRS];
        struct fpu        host_fpregs;
-       struct fpu        guest_fpregs;
        struct kvm_s390_local_interrupt local_int;
        struct hrtimer    ckc_timer;
        struct kvm_s390_pgm_info pgm;
index 1a9a98d..69aa18b 100644 (file)
@@ -8,10 +8,13 @@
 #include <asm/pci_insn.h>
 
 /* I/O Map */
-#define ZPCI_IOMAP_MAX_ENTRIES         0x7fff
-#define ZPCI_IOMAP_ADDR_BASE           0x8000000000000000ULL
-#define ZPCI_IOMAP_ADDR_IDX_MASK       0x7fff000000000000ULL
-#define ZPCI_IOMAP_ADDR_OFF_MASK       0x0000ffffffffffffULL
+#define ZPCI_IOMAP_SHIFT               48
+#define ZPCI_IOMAP_ADDR_BASE           0x8000000000000000UL
+#define ZPCI_IOMAP_ADDR_OFF_MASK       ((1UL << ZPCI_IOMAP_SHIFT) - 1)
+#define ZPCI_IOMAP_MAX_ENTRIES                                                 \
+       ((ULONG_MAX - ZPCI_IOMAP_ADDR_BASE + 1) / (1UL << ZPCI_IOMAP_SHIFT))
+#define ZPCI_IOMAP_ADDR_IDX_MASK                                               \
+       (~ZPCI_IOMAP_ADDR_OFF_MASK - ZPCI_IOMAP_ADDR_BASE)
 
 struct zpci_iomap_entry {
        u32 fh;
@@ -21,8 +24,9 @@ struct zpci_iomap_entry {
 
 extern struct zpci_iomap_entry *zpci_iomap_start;
 
+#define ZPCI_ADDR(idx) (ZPCI_IOMAP_ADDR_BASE | ((u64) idx << ZPCI_IOMAP_SHIFT))
 #define ZPCI_IDX(addr)                                                         \
-       (((__force u64) addr & ZPCI_IOMAP_ADDR_IDX_MASK) >> 48)
+       (((__force u64) addr & ZPCI_IOMAP_ADDR_IDX_MASK) >> ZPCI_IOMAP_SHIFT)
 #define ZPCI_OFFSET(addr)                                                      \
        ((__force u64) addr & ZPCI_IOMAP_ADDR_OFF_MASK)
 
index f16debf..1c4fe12 100644 (file)
@@ -166,14 +166,14 @@ extern __vector128 init_task_fpu_regs[__NUM_VXRS];
  */
 #define start_thread(regs, new_psw, new_stackp) do {                   \
        regs->psw.mask  = PSW_USER_BITS | PSW_MASK_EA | PSW_MASK_BA;    \
-       regs->psw.addr  = new_psw | PSW_ADDR_AMODE;                     \
+       regs->psw.addr  = new_psw;                                      \
        regs->gprs[15]  = new_stackp;                                   \
        execve_tail();                                                  \
 } while (0)
 
 #define start_thread31(regs, new_psw, new_stackp) do {                 \
        regs->psw.mask  = PSW_USER_BITS | PSW_MASK_BA;                  \
-       regs->psw.addr  = new_psw | PSW_ADDR_AMODE;                     \
+       regs->psw.addr  = new_psw;                                      \
        regs->gprs[15]  = new_stackp;                                   \
        crst_table_downgrade(current->mm, 1UL << 31);                   \
        execve_tail();                                                  \
index f00cd35..99bc456 100644 (file)
@@ -149,7 +149,7 @@ static inline int test_pt_regs_flag(struct pt_regs *regs, int flag)
 #define arch_has_block_step()  (1)
 
 #define user_mode(regs) (((regs)->psw.mask & PSW_MASK_PSTATE) != 0)
-#define instruction_pointer(regs) ((regs)->psw.addr & PSW_ADDR_INSN)
+#define instruction_pointer(regs) ((regs)->psw.addr)
 #define user_stack_pointer(regs)((regs)->gprs[15])
 #define profile_pc(regs) instruction_pointer(regs)
 
@@ -161,7 +161,7 @@ static inline long regs_return_value(struct pt_regs *regs)
 static inline void instruction_pointer_set(struct pt_regs *regs,
                                           unsigned long val)
 {
-       regs->psw.addr = val | PSW_ADDR_AMODE;
+       regs->psw.addr = val;
 }
 
 int regs_query_register_offset(const char *name);
@@ -171,7 +171,7 @@ unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, unsigned int n);
 
 static inline unsigned long kernel_stack_pointer(struct pt_regs *regs)
 {
-       return regs->gprs[15] & PSW_ADDR_INSN;
+       return regs->gprs[15];
 }
 
 #endif /* __ASSEMBLY__ */
index 34ec202..ab3aa68 100644 (file)
 #define __NR_recvmsg           372
 #define __NR_shutdown          373
 #define __NR_mlock2            374
-#define NR_syscalls 375
+#define __NR_copy_file_range   375
+#define NR_syscalls 376
 
 /* 
  * There are some system calls that are not present on 64 bit, some
index fac4eed..ae2cda5 100644 (file)
@@ -177,3 +177,4 @@ COMPAT_SYSCALL_WRAP3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
 COMPAT_SYSCALL_WRAP3(getpeername, int, fd, struct sockaddr __user *, usockaddr, int __user *, usockaddr_len);
 COMPAT_SYSCALL_WRAP6(sendto, int, fd, void __user *, buff, size_t, len, unsigned int, flags, struct sockaddr __user *, addr, int, addr_len);
 COMPAT_SYSCALL_WRAP3(mlock2, unsigned long, start, size_t, len, int, flags);
+COMPAT_SYSCALL_WRAP6(copy_file_range, int, fd_in, loff_t __user *, off_in, int, fd_out, loff_t __user *, off_out, size_t, len, unsigned int, flags);
index a92b39f..3986c9f 100644 (file)
@@ -59,8 +59,6 @@ struct save_area * __init save_area_alloc(bool is_boot_cpu)
        struct save_area *sa;
 
        sa = (void *) memblock_alloc(sizeof(*sa), 8);
-       if (!sa)
-               return NULL;
        if (is_boot_cpu)
                list_add(&sa->list, &dump_save_areas);
        else
index 6fca0e4..c890a55 100644 (file)
@@ -1470,7 +1470,7 @@ debug_dflt_header_fn(debug_info_t * id, struct debug_view *view,
                except_str = "*";
        else
                except_str = "-";
-       caller = ((unsigned long) entry->caller) & PSW_ADDR_INSN;
+       caller = (unsigned long) entry->caller;
        rc += sprintf(out_buf, "%02i %011lld:%06lu %1u %1s %02i %p  ",
                      area, (long long)time_spec.tv_sec,
                      time_spec.tv_nsec / 1000, level, except_str,
index dc8e204..02bd02f 100644 (file)
@@ -34,22 +34,21 @@ __show_trace(unsigned long sp, unsigned long low, unsigned long high)
        unsigned long addr;
 
        while (1) {
-               sp = sp & PSW_ADDR_INSN;
                if (sp < low || sp > high - sizeof(*sf))
                        return sp;
                sf = (struct stack_frame *) sp;
-               addr = sf->gprs[8] & PSW_ADDR_INSN;
+               addr = sf->gprs[8];
                printk("([<%016lx>] %pSR)\n", addr, (void *)addr);
                /* Follow the backchain. */
                while (1) {
                        low = sp;
-                       sp = sf->back_chain & PSW_ADDR_INSN;
+                       sp = sf->back_chain;
                        if (!sp)
                                break;
                        if (sp <= low || sp > high - sizeof(*sf))
                                return sp;
                        sf = (struct stack_frame *) sp;
-                       addr = sf->gprs[8] & PSW_ADDR_INSN;
+                       addr = sf->gprs[8];
                        printk(" [<%016lx>] %pSR\n", addr, (void *)addr);
                }
                /* Zero backchain detected, check for interrupt frame. */
@@ -57,7 +56,7 @@ __show_trace(unsigned long sp, unsigned long low, unsigned long high)
                if (sp <= low || sp > high - sizeof(*regs))
                        return sp;
                regs = (struct pt_regs *) sp;
-               addr = regs->psw.addr & PSW_ADDR_INSN;
+               addr = regs->psw.addr;
                printk(" [<%016lx>] %pSR\n", addr, (void *)addr);
                low = sp;
                sp = regs->gprs[15];
index 20a5caf..c55576b 100644 (file)
@@ -252,14 +252,14 @@ static void early_pgm_check_handler(void)
        unsigned long addr;
 
        addr = S390_lowcore.program_old_psw.addr;
-       fixup = search_exception_tables(addr & PSW_ADDR_INSN);
+       fixup = search_exception_tables(addr);
        if (!fixup)
                disabled_wait(0);
        /* Disable low address protection before storing into lowcore. */
        __ctl_store(cr0, 0, 0);
        cr0_new = cr0 & ~(1UL << 28);
        __ctl_load(cr0_new, 0, 0);
-       S390_lowcore.program_old_psw.addr = extable_fixup(fixup)|PSW_ADDR_AMODE;
+       S390_lowcore.program_old_psw.addr = extable_fixup(fixup);
        __ctl_load(cr0, 0, 0);
 }
 
@@ -268,9 +268,9 @@ static noinline __init void setup_lowcore_early(void)
        psw_t psw;
 
        psw.mask = PSW_MASK_BASE | PSW_DEFAULT_KEY | PSW_MASK_EA | PSW_MASK_BA;
-       psw.addr = PSW_ADDR_AMODE | (unsigned long) s390_base_ext_handler;
+       psw.addr = (unsigned long) s390_base_ext_handler;
        S390_lowcore.external_new_psw = psw;
-       psw.addr = PSW_ADDR_AMODE | (unsigned long) s390_base_pgm_handler;
+       psw.addr = (unsigned long) s390_base_pgm_handler;
        S390_lowcore.program_new_psw = psw;
        s390_base_pgm_handler_fn = early_pgm_check_handler;
 }
index e0eaf11..0f7bfeb 100644 (file)
@@ -203,7 +203,7 @@ unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip)
                goto out;
        if (unlikely(atomic_read(&current->tracing_graph_pause)))
                goto out;
-       ip = (ip & PSW_ADDR_INSN) - MCOUNT_INSN_SIZE;
+       ip -= MCOUNT_INSN_SIZE;
        trace.func = ip;
        trace.depth = current->curr_ret_stack + 1;
        /* Only trace if the calling function expects to. */
index 0a5a6b6..f20abdb 100644 (file)
@@ -2057,12 +2057,12 @@ void s390_reset_system(void)
        /* Set new machine check handler */
        S390_lowcore.mcck_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_DAT;
        S390_lowcore.mcck_new_psw.addr =
-               PSW_ADDR_AMODE | (unsigned long) s390_base_mcck_handler;
+               (unsigned long) s390_base_mcck_handler;
 
        /* Set new program check handler */
        S390_lowcore.program_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_DAT;
        S390_lowcore.program_new_psw.addr =
-               PSW_ADDR_AMODE | (unsigned long) s390_base_pgm_handler;
+               (unsigned long) s390_base_pgm_handler;
 
        /*
         * Clear subchannel ID and number to signal new kernel that no CCW or
index 389db56..250f597 100644 (file)
@@ -226,7 +226,7 @@ static void enable_singlestep(struct kprobe_ctlblk *kcb,
        __ctl_load(per_kprobe, 9, 11);
        regs->psw.mask |= PSW_MASK_PER;
        regs->psw.mask &= ~(PSW_MASK_IO | PSW_MASK_EXT);
-       regs->psw.addr = ip | PSW_ADDR_AMODE;
+       regs->psw.addr = ip;
 }
 NOKPROBE_SYMBOL(enable_singlestep);
 
@@ -238,7 +238,7 @@ static void disable_singlestep(struct kprobe_ctlblk *kcb,
        __ctl_load(kcb->kprobe_saved_ctl, 9, 11);
        regs->psw.mask &= ~PSW_MASK_PER;
        regs->psw.mask |= kcb->kprobe_saved_imask;
-       regs->psw.addr = ip | PSW_ADDR_AMODE;
+       regs->psw.addr = ip;
 }
 NOKPROBE_SYMBOL(disable_singlestep);
 
@@ -310,7 +310,7 @@ static int kprobe_handler(struct pt_regs *regs)
         */
        preempt_disable();
        kcb = get_kprobe_ctlblk();
-       p = get_kprobe((void *)((regs->psw.addr & PSW_ADDR_INSN) - 2));
+       p = get_kprobe((void *)(regs->psw.addr - 2));
 
        if (p) {
                if (kprobe_running()) {
@@ -460,7 +460,7 @@ static int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
                        break;
        }
 
-       regs->psw.addr = orig_ret_address | PSW_ADDR_AMODE;
+       regs->psw.addr = orig_ret_address;
 
        pop_kprobe(get_kprobe_ctlblk());
        kretprobe_hash_unlock(current, &flags);
@@ -490,7 +490,7 @@ NOKPROBE_SYMBOL(trampoline_probe_handler);
 static void resume_execution(struct kprobe *p, struct pt_regs *regs)
 {
        struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
-       unsigned long ip = regs->psw.addr & PSW_ADDR_INSN;
+       unsigned long ip = regs->psw.addr;
        int fixup = probe_get_fixup_type(p->ainsn.insn);
 
        /* Check if the kprobes location is an enabled ftrace caller */
@@ -605,9 +605,9 @@ static int kprobe_trap_handler(struct pt_regs *regs, int trapnr)
                 * In case the user-specified fault handler returned
                 * zero, try to fix up.
                 */
-               entry = search_exception_tables(regs->psw.addr & PSW_ADDR_INSN);
+               entry = search_exception_tables(regs->psw.addr);
                if (entry) {
-                       regs->psw.addr = extable_fixup(entry) | PSW_ADDR_AMODE;
+                       regs->psw.addr = extable_fixup(entry);
                        return 1;
                }
 
@@ -683,7 +683,7 @@ int setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
        memcpy(&kcb->jprobe_saved_regs, regs, sizeof(struct pt_regs));
 
        /* setup return addr to the jprobe handler routine */
-       regs->psw.addr = (unsigned long) jp->entry | PSW_ADDR_AMODE;
+       regs->psw.addr = (unsigned long) jp->entry;
        regs->psw.mask &= ~(PSW_MASK_IO | PSW_MASK_EXT);
 
        /* r15 is the stack pointer */
index 61595c1..cfcba2d 100644 (file)
@@ -74,7 +74,7 @@ static unsigned long guest_is_user_mode(struct pt_regs *regs)
 
 static unsigned long instruction_pointer_guest(struct pt_regs *regs)
 {
-       return sie_block(regs)->gpsw.addr & PSW_ADDR_INSN;
+       return sie_block(regs)->gpsw.addr;
 }
 
 unsigned long perf_instruction_pointer(struct pt_regs *regs)
@@ -231,29 +231,27 @@ static unsigned long __store_trace(struct perf_callchain_entry *entry,
        struct pt_regs *regs;
 
        while (1) {
-               sp = sp & PSW_ADDR_INSN;
                if (sp < low || sp > high - sizeof(*sf))
                        return sp;
                sf = (struct stack_frame *) sp;
-               perf_callchain_store(entry, sf->gprs[8] & PSW_ADDR_INSN);
+               perf_callchain_store(entry, sf->gprs[8]);
                /* Follow the backchain. */
                while (1) {
                        low = sp;
-                       sp = sf->back_chain & PSW_ADDR_INSN;
+                       sp = sf->back_chain;
                        if (!sp)
                                break;
                        if (sp <= low || sp > high - sizeof(*sf))
                                return sp;
                        sf = (struct stack_frame *) sp;
-                       perf_callchain_store(entry,
-                                            sf->gprs[8] & PSW_ADDR_INSN);
+                       perf_callchain_store(entry, sf->gprs[8]);
                }
                /* Zero backchain detected, check for interrupt frame. */
                sp = (unsigned long) (sf + 1);
                if (sp <= low || sp > high - sizeof(*regs))
                        return sp;
                regs = (struct pt_regs *) sp;
-               perf_callchain_store(entry, sf->gprs[8] & PSW_ADDR_INSN);
+               perf_callchain_store(entry, sf->gprs[8]);
                low = sp;
                sp = regs->gprs[15];
        }
index 114ee8b..2bba7df 100644 (file)
@@ -56,10 +56,10 @@ unsigned long thread_saved_pc(struct task_struct *tsk)
                return 0;
        low = task_stack_page(tsk);
        high = (struct stack_frame *) task_pt_regs(tsk);
-       sf = (struct stack_frame *) (tsk->thread.ksp & PSW_ADDR_INSN);
+       sf = (struct stack_frame *) tsk->thread.ksp;
        if (sf <= low || sf > high)
                return 0;
-       sf = (struct stack_frame *) (sf->back_chain & PSW_ADDR_INSN);
+       sf = (struct stack_frame *) sf->back_chain;
        if (sf <= low || sf > high)
                return 0;
        return sf->gprs[8];
@@ -154,7 +154,7 @@ int copy_thread(unsigned long clone_flags, unsigned long new_stackp,
                memset(&frame->childregs, 0, sizeof(struct pt_regs));
                frame->childregs.psw.mask = PSW_KERNEL_BITS | PSW_MASK_DAT |
                                PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK;
-               frame->childregs.psw.addr = PSW_ADDR_AMODE |
+               frame->childregs.psw.addr =
                                (unsigned long) kernel_thread_starter;
                frame->childregs.gprs[9] = new_stackp; /* function */
                frame->childregs.gprs[10] = arg;
@@ -220,14 +220,14 @@ unsigned long get_wchan(struct task_struct *p)
                return 0;
        low = task_stack_page(p);
        high = (struct stack_frame *) task_pt_regs(p);
-       sf = (struct stack_frame *) (p->thread.ksp & PSW_ADDR_INSN);
+       sf = (struct stack_frame *) p->thread.ksp;
        if (sf <= low || sf > high)
                return 0;
        for (count = 0; count < 16; count++) {
-               sf = (struct stack_frame *) (sf->back_chain & PSW_ADDR_INSN);
+               sf = (struct stack_frame *) sf->back_chain;
                if (sf <= low || sf > high)
                        return 0;
-               return_address = sf->gprs[8] & PSW_ADDR_INSN;
+               return_address = sf->gprs[8];
                if (!in_sched_functions(return_address))
                        return return_address;
        }
index 01c37b3..49b1c13 100644 (file)
@@ -84,7 +84,7 @@ void update_cr_regs(struct task_struct *task)
                if (test_tsk_thread_flag(task, TIF_UPROBE_SINGLESTEP))
                        new.control |= PER_EVENT_IFETCH;
                new.start = 0;
-               new.end = PSW_ADDR_INSN;
+               new.end = -1UL;
        }
 
        /* Take care of the PER enablement bit in the PSW. */
@@ -148,7 +148,7 @@ static inline unsigned long __peek_user_per(struct task_struct *child,
        else if (addr == (addr_t) &dummy->cr11)
                /* End address of the active per set. */
                return test_thread_flag(TIF_SINGLE_STEP) ?
-                       PSW_ADDR_INSN : child->thread.per_user.end;
+                       -1UL : child->thread.per_user.end;
        else if (addr == (addr_t) &dummy->bits)
                /* Single-step bit. */
                return test_thread_flag(TIF_SINGLE_STEP) ?
@@ -495,8 +495,6 @@ long arch_ptrace(struct task_struct *child, long request,
                }
                return 0;
        default:
-               /* Removing high order bit from addr (only for 31 bit). */
-               addr &= PSW_ADDR_INSN;
                return ptrace_request(child, request, addr, data);
        }
 }
index c6878fb..9220db5 100644 (file)
@@ -301,25 +301,21 @@ static void __init setup_lowcore(void)
        BUILD_BUG_ON(sizeof(struct lowcore) != LC_PAGES * 4096);
        lc = __alloc_bootmem_low(LC_PAGES * PAGE_SIZE, LC_PAGES * PAGE_SIZE, 0);
        lc->restart_psw.mask = PSW_KERNEL_BITS;
-       lc->restart_psw.addr =
-               PSW_ADDR_AMODE | (unsigned long) restart_int_handler;
+       lc->restart_psw.addr = (unsigned long) restart_int_handler;
        lc->external_new_psw.mask = PSW_KERNEL_BITS |
                PSW_MASK_DAT | PSW_MASK_MCHECK;
-       lc->external_new_psw.addr =
-               PSW_ADDR_AMODE | (unsigned long) ext_int_handler;
+       lc->external_new_psw.addr = (unsigned long) ext_int_handler;
        lc->svc_new_psw.mask = PSW_KERNEL_BITS |
                PSW_MASK_DAT | PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK;
-       lc->svc_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) system_call;
+       lc->svc_new_psw.addr = (unsigned long) system_call;
        lc->program_new_psw.mask = PSW_KERNEL_BITS |
                PSW_MASK_DAT | PSW_MASK_MCHECK;
-       lc->program_new_psw.addr =
-               PSW_ADDR_AMODE | (unsigned long) pgm_check_handler;
+       lc->program_new_psw.addr = (unsigned long) pgm_check_handler;
        lc->mcck_new_psw.mask = PSW_KERNEL_BITS;
-       lc->mcck_new_psw.addr =
-               PSW_ADDR_AMODE | (unsigned long) mcck_int_handler;
+       lc->mcck_new_psw.addr = (unsigned long) mcck_int_handler;
        lc->io_new_psw.mask = PSW_KERNEL_BITS |
                PSW_MASK_DAT | PSW_MASK_MCHECK;
-       lc->io_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) io_int_handler;
+       lc->io_new_psw.addr = (unsigned long) io_int_handler;
        lc->clock_comparator = -1ULL;
        lc->kernel_stack = ((unsigned long) &init_thread_union)
                + THREAD_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs);
index 028cc46..d82562c 100644 (file)
@@ -331,13 +331,13 @@ static int setup_frame(int sig, struct k_sigaction *ka,
        /* Set up to return from userspace.  If provided, use a stub
           already in userspace.  */
        if (ka->sa.sa_flags & SA_RESTORER) {
-               restorer = (unsigned long) ka->sa.sa_restorer | PSW_ADDR_AMODE;
+               restorer = (unsigned long) ka->sa.sa_restorer;
        } else {
                /* Signal frame without vector registers are short ! */
                __u16 __user *svc = (void __user *) frame + frame_size - 2;
                if (__put_user(S390_SYSCALL_OPCODE | __NR_sigreturn, svc))
                        return -EFAULT;
-               restorer = (unsigned long) svc | PSW_ADDR_AMODE;
+               restorer = (unsigned long) svc;
        }
 
        /* Set up registers for signal handler */
@@ -347,7 +347,7 @@ static int setup_frame(int sig, struct k_sigaction *ka,
        regs->psw.mask = PSW_MASK_EA | PSW_MASK_BA |
                (PSW_USER_BITS & PSW_MASK_ASC) |
                (regs->psw.mask & ~PSW_MASK_ASC);
-       regs->psw.addr = (unsigned long) ka->sa.sa_handler | PSW_ADDR_AMODE;
+       regs->psw.addr = (unsigned long) ka->sa.sa_handler;
 
        regs->gprs[2] = sig;
        regs->gprs[3] = (unsigned long) &frame->sc;
@@ -394,13 +394,12 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set,
        /* Set up to return from userspace.  If provided, use a stub
           already in userspace.  */
        if (ksig->ka.sa.sa_flags & SA_RESTORER) {
-               restorer = (unsigned long)
-                       ksig->ka.sa.sa_restorer | PSW_ADDR_AMODE;
+               restorer = (unsigned long) ksig->ka.sa.sa_restorer;
        } else {
                __u16 __user *svc = &frame->svc_insn;
                if (__put_user(S390_SYSCALL_OPCODE | __NR_rt_sigreturn, svc))
                        return -EFAULT;
-               restorer = (unsigned long) svc | PSW_ADDR_AMODE;
+               restorer = (unsigned long) svc;
        }
 
        /* Create siginfo on the signal stack */
@@ -426,7 +425,7 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set,
        regs->psw.mask = PSW_MASK_EA | PSW_MASK_BA |
                (PSW_USER_BITS & PSW_MASK_ASC) |
                (regs->psw.mask & ~PSW_MASK_ASC);
-       regs->psw.addr = (unsigned long) ksig->ka.sa.sa_handler | PSW_ADDR_AMODE;
+       regs->psw.addr = (unsigned long) ksig->ka.sa.sa_handler;
 
        regs->gprs[2] = ksig->sig;
        regs->gprs[3] = (unsigned long) &frame->info;
index a13468b..3c65a8e 100644 (file)
@@ -623,8 +623,6 @@ void __init smp_save_dump_cpus(void)
                return;
        /* Allocate a page as dumping area for the store status sigps */
        page = memblock_alloc_base(PAGE_SIZE, PAGE_SIZE, 1UL << 31);
-       if (!page)
-               panic("could not allocate memory for save area\n");
        /* Set multi-threading state to the previous system. */
        pcpu_set_smt(sclp.mtid_prev);
        boot_cpu_addr = stap();
index 1785cd8..5acba3c 100644 (file)
@@ -21,12 +21,11 @@ static unsigned long save_context_stack(struct stack_trace *trace,
        unsigned long addr;
 
        while(1) {
-               sp &= PSW_ADDR_INSN;
                if (sp < low || sp > high)
                        return sp;
                sf = (struct stack_frame *)sp;
                while(1) {
-                       addr = sf->gprs[8] & PSW_ADDR_INSN;
+                       addr = sf->gprs[8];
                        if (!trace->skip)
                                trace->entries[trace->nr_entries++] = addr;
                        else
@@ -34,7 +33,7 @@ static unsigned long save_context_stack(struct stack_trace *trace,
                        if (trace->nr_entries >= trace->max_entries)
                                return sp;
                        low = sp;
-                       sp = sf->back_chain & PSW_ADDR_INSN;
+                       sp = sf->back_chain;
                        if (!sp)
                                break;
                        if (sp <= low || sp > high - sizeof(*sf))
@@ -46,7 +45,7 @@ static unsigned long save_context_stack(struct stack_trace *trace,
                if (sp <= low || sp > high - sizeof(*regs))
                        return sp;
                regs = (struct pt_regs *)sp;
-               addr = regs->psw.addr & PSW_ADDR_INSN;
+               addr = regs->psw.addr;
                if (savesched || !in_sched_functions(addr)) {
                        if (!trace->skip)
                                trace->entries[trace->nr_entries++] = addr;
@@ -65,7 +64,7 @@ void save_stack_trace(struct stack_trace *trace)
        register unsigned long sp asm ("15");
        unsigned long orig_sp, new_sp;
 
-       orig_sp = sp & PSW_ADDR_INSN;
+       orig_sp = sp;
        new_sp = save_context_stack(trace, orig_sp,
                                    S390_lowcore.panic_stack - PAGE_SIZE,
                                    S390_lowcore.panic_stack, 1);
@@ -86,7 +85,7 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
 {
        unsigned long sp, low, high;
 
-       sp = tsk->thread.ksp & PSW_ADDR_INSN;
+       sp = tsk->thread.ksp;
        low = (unsigned long) task_stack_page(tsk);
        high = (unsigned long) task_pt_regs(tsk);
        save_context_stack(trace, sp, low, high, 0);
index 5378c3e..293d8b9 100644 (file)
@@ -383,3 +383,4 @@ SYSCALL(sys_recvfrom,compat_sys_recvfrom)
 SYSCALL(sys_recvmsg,compat_sys_recvmsg)
 SYSCALL(sys_shutdown,sys_shutdown)
 SYSCALL(sys_mlock2,compat_sys_mlock2)
+SYSCALL(sys_copy_file_range,compat_sys_copy_file_range) /* 375 */
index d69d648..017eb03 100644 (file)
@@ -32,8 +32,7 @@ static inline void __user *get_trap_ip(struct pt_regs *regs)
                address = *(unsigned long *)(current->thread.trap_tdb + 24);
        else
                address = regs->psw.addr;
-       return (void __user *)
-               ((address - (regs->int_code >> 16)) & PSW_ADDR_INSN);
+       return (void __user *) (address - (regs->int_code >> 16));
 }
 
 static inline void report_user_fault(struct pt_regs *regs, int signr)
@@ -46,7 +45,7 @@ static inline void report_user_fault(struct pt_regs *regs, int signr)
                return;
        printk("User process fault: interruption code %04x ilc:%d ",
               regs->int_code & 0xffff, regs->int_code >> 17);
-       print_vma_addr("in ", regs->psw.addr & PSW_ADDR_INSN);
+       print_vma_addr("in ", regs->psw.addr);
        printk("\n");
        show_regs(regs);
 }
@@ -69,13 +68,13 @@ void do_report_trap(struct pt_regs *regs, int si_signo, int si_code, char *str)
                report_user_fault(regs, si_signo);
         } else {
                 const struct exception_table_entry *fixup;
-                fixup = search_exception_tables(regs->psw.addr & PSW_ADDR_INSN);
+               fixup = search_exception_tables(regs->psw.addr);
                 if (fixup)
-                       regs->psw.addr = extable_fixup(fixup) | PSW_ADDR_AMODE;
+                       regs->psw.addr = extable_fixup(fixup);
                else {
                        enum bug_trap_type btt;
 
-                       btt = report_bug(regs->psw.addr & PSW_ADDR_INSN, regs);
+                       btt = report_bug(regs->psw.addr, regs);
                        if (btt == BUG_TRAP_TYPE_WARN)
                                return;
                        die(regs, str);
index 5fce52c..5ea5af3 100644 (file)
@@ -29,6 +29,7 @@ config KVM
        select HAVE_KVM_IRQFD
        select HAVE_KVM_IRQ_ROUTING
        select SRCU
+       select KVM_VFIO
        ---help---
          Support hosting paravirtualized guest machines using the SIE
          virtualization capability on the mainframe. This should work
index b3b5534..d42fa38 100644 (file)
@@ -7,7 +7,7 @@
 # as published by the Free Software Foundation.
 
 KVM := ../../../virt/kvm
-common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o  $(KVM)/async_pf.o $(KVM)/irqchip.o
+common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o  $(KVM)/async_pf.o $(KVM)/irqchip.o $(KVM)/vfio.o
 
 ccflags-y := -Ivirt/kvm -Iarch/s390/kvm
 
index 47518a3..d697312 100644 (file)
@@ -116,7 +116,7 @@ static void enable_all_hw_wp(struct kvm_vcpu *vcpu)
        if (*cr9 & PER_EVENT_STORE && *cr9 & PER_CONTROL_ALTERATION) {
                *cr9 &= ~PER_CONTROL_ALTERATION;
                *cr10 = 0;
-               *cr11 = PSW_ADDR_INSN;
+               *cr11 = -1UL;
        } else {
                *cr9 &= ~PER_CONTROL_ALTERATION;
                *cr9 |= PER_EVENT_STORE;
@@ -159,7 +159,7 @@ void kvm_s390_patch_guest_per_regs(struct kvm_vcpu *vcpu)
                vcpu->arch.sie_block->gcr[0] &= ~0x800ul;
                vcpu->arch.sie_block->gcr[9] |= PER_EVENT_IFETCH;
                vcpu->arch.sie_block->gcr[10] = 0;
-               vcpu->arch.sie_block->gcr[11] = PSW_ADDR_INSN;
+               vcpu->arch.sie_block->gcr[11] = -1UL;
        }
 
        if (guestdbg_hw_bp_enabled(vcpu)) {
index 835d60b..4af21c7 100644 (file)
@@ -1423,44 +1423,18 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
        return 0;
 }
 
-/*
- * Backs up the current FP/VX register save area on a particular
- * destination.  Used to switch between different register save
- * areas.
- */
-static inline void save_fpu_to(struct fpu *dst)
-{
-       dst->fpc = current->thread.fpu.fpc;
-       dst->regs = current->thread.fpu.regs;
-}
-
-/*
- * Switches the FP/VX register save area from which to lazy
- * restore register contents.
- */
-static inline void load_fpu_from(struct fpu *from)
-{
-       current->thread.fpu.fpc = from->fpc;
-       current->thread.fpu.regs = from->regs;
-}
-
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
        /* Save host register state */
        save_fpu_regs();
-       save_fpu_to(&vcpu->arch.host_fpregs);
-
-       if (test_kvm_facility(vcpu->kvm, 129)) {
-               current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
-               /*
-                * Use the register save area in the SIE-control block
-                * for register restore and save in kvm_arch_vcpu_put()
-                */
-               current->thread.fpu.vxrs =
-                       (__vector128 *)&vcpu->run->s.regs.vrs;
-       } else
-               load_fpu_from(&vcpu->arch.guest_fpregs);
+       vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
+       vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
 
+       /* Depending on MACHINE_HAS_VX, data stored to vrs either
+        * has vector register or floating point register format.
+        */
+       current->thread.fpu.regs = vcpu->run->s.regs.vrs;
+       current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
        if (test_fp_ctl(current->thread.fpu.fpc))
                /* User space provided an invalid FPC, let's clear it */
                current->thread.fpu.fpc = 0;
@@ -1476,19 +1450,13 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
        atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
        gmap_disable(vcpu->arch.gmap);
 
+       /* Save guest register state */
        save_fpu_regs();
+       vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
 
-       if (test_kvm_facility(vcpu->kvm, 129))
-               /*
-                * kvm_arch_vcpu_load() set up the register save area to
-                * the &vcpu->run->s.regs.vrs and, thus, the vector registers
-                * are already saved.  Only the floating-point control must be
-                * copied.
-                */
-               vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
-       else
-               save_fpu_to(&vcpu->arch.guest_fpregs);
-       load_fpu_from(&vcpu->arch.host_fpregs);
+       /* Restore host register state */
+       current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
+       current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
 
        save_access_regs(vcpu->run->s.regs.acrs);
        restore_access_regs(vcpu->arch.host_acrs);
@@ -1506,8 +1474,9 @@ static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
        memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
        vcpu->arch.sie_block->gcr[0]  = 0xE0UL;
        vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
-       vcpu->arch.guest_fpregs.fpc = 0;
-       asm volatile("lfpc %0" : : "Q" (vcpu->arch.guest_fpregs.fpc));
+       /* make sure the new fpc will be lazily loaded */
+       save_fpu_regs();
+       current->thread.fpu.fpc = 0;
        vcpu->arch.sie_block->gbea = 1;
        vcpu->arch.sie_block->pp = 0;
        vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
@@ -1648,17 +1617,6 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
        vcpu->arch.local_int.wq = &vcpu->wq;
        vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
 
-       /*
-        * Allocate a save area for floating-point registers.  If the vector
-        * extension is available, register contents are saved in the SIE
-        * control block.  The allocated save area is still required in
-        * particular places, for example, in kvm_s390_vcpu_store_status().
-        */
-       vcpu->arch.guest_fpregs.fprs = kzalloc(sizeof(freg_t) * __NUM_FPRS,
-                                              GFP_KERNEL);
-       if (!vcpu->arch.guest_fpregs.fprs)
-               goto out_free_sie_block;
-
        rc = kvm_vcpu_init(vcpu, kvm, id);
        if (rc)
                goto out_free_sie_block;
@@ -1879,19 +1837,27 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
 
 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
 {
+       /* make sure the new values will be lazily loaded */
+       save_fpu_regs();
        if (test_fp_ctl(fpu->fpc))
                return -EINVAL;
-       memcpy(vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs));
-       vcpu->arch.guest_fpregs.fpc = fpu->fpc;
-       save_fpu_regs();
-       load_fpu_from(&vcpu->arch.guest_fpregs);
+       current->thread.fpu.fpc = fpu->fpc;
+       if (MACHINE_HAS_VX)
+               convert_fp_to_vx(current->thread.fpu.vxrs, (freg_t *)fpu->fprs);
+       else
+               memcpy(current->thread.fpu.fprs, &fpu->fprs, sizeof(fpu->fprs));
        return 0;
 }
 
 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
 {
-       memcpy(&fpu->fprs, vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs));
-       fpu->fpc = vcpu->arch.guest_fpregs.fpc;
+       /* make sure we have the latest values */
+       save_fpu_regs();
+       if (MACHINE_HAS_VX)
+               convert_vx_to_fp((freg_t *)fpu->fprs, current->thread.fpu.vxrs);
+       else
+               memcpy(fpu->fprs, current->thread.fpu.fprs, sizeof(fpu->fprs));
+       fpu->fpc = current->thread.fpu.fpc;
        return 0;
 }
 
@@ -2396,6 +2362,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
 {
        unsigned char archmode = 1;
+       freg_t fprs[NUM_FPRS];
        unsigned int px;
        u64 clkcomp;
        int rc;
@@ -2411,8 +2378,16 @@ int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
                gpa = px;
        } else
                gpa -= __LC_FPREGS_SAVE_AREA;
-       rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
-                            vcpu->arch.guest_fpregs.fprs, 128);
+
+       /* manually convert vector registers if necessary */
+       if (MACHINE_HAS_VX) {
+               convert_vx_to_fp(fprs, current->thread.fpu.vxrs);
+               rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
+                                    fprs, 128);
+       } else {
+               rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
+                                    vcpu->run->s.regs.vrs, 128);
+       }
        rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
                              vcpu->run->s.regs.gprs, 128);
        rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
@@ -2420,7 +2395,7 @@ int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
        rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
                              &px, 4);
        rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
-                             &vcpu->arch.guest_fpregs.fpc, 4);
+                             &vcpu->run->s.regs.fpc, 4);
        rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
                              &vcpu->arch.sie_block->todpr, 4);
        rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
@@ -2443,19 +2418,7 @@ int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
         * it into the save area
         */
        save_fpu_regs();
-       if (test_kvm_facility(vcpu->kvm, 129)) {
-               /*
-                * If the vector extension is available, the vector registers
-                * which overlaps with floating-point registers are saved in
-                * the SIE-control block.  Hence, extract the floating-point
-                * registers and the FPC value and store them in the
-                * guest_fpregs structure.
-                */
-               vcpu->arch.guest_fpregs.fpc = current->thread.fpu.fpc;
-               convert_vx_to_fp(vcpu->arch.guest_fpregs.fprs,
-                                current->thread.fpu.vxrs);
-       } else
-               save_fpu_to(&vcpu->arch.guest_fpregs);
+       vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
        save_access_regs(vcpu->run->s.regs.acrs);
 
        return kvm_s390_store_status_unloaded(vcpu, addr);
index 1b903f6..791a414 100644 (file)
@@ -228,7 +228,7 @@ static inline void report_user_fault(struct pt_regs *regs, long signr)
                return;
        printk(KERN_ALERT "User process fault: interruption code %04x ilc:%d ",
               regs->int_code & 0xffff, regs->int_code >> 17);
-       print_vma_addr(KERN_CONT "in ", regs->psw.addr & PSW_ADDR_INSN);
+       print_vma_addr(KERN_CONT "in ", regs->psw.addr);
        printk(KERN_CONT "\n");
        printk(KERN_ALERT "failing address: %016lx TEID: %016lx\n",
               regs->int_parm_long & __FAIL_ADDR_MASK, regs->int_parm_long);
@@ -256,9 +256,9 @@ static noinline void do_no_context(struct pt_regs *regs)
        const struct exception_table_entry *fixup;
 
        /* Are we prepared to handle this kernel fault?  */
-       fixup = search_exception_tables(regs->psw.addr & PSW_ADDR_INSN);
+       fixup = search_exception_tables(regs->psw.addr);
        if (fixup) {
-               regs->psw.addr = extable_fixup(fixup) | PSW_ADDR_AMODE;
+               regs->psw.addr = extable_fixup(fixup);
                return;
        }
 
index c722400..73e2903 100644 (file)
@@ -98,7 +98,7 @@ void __init paging_init(void)
        __ctl_load(S390_lowcore.kernel_asce, 1, 1);
        __ctl_load(S390_lowcore.kernel_asce, 7, 7);
        __ctl_load(S390_lowcore.kernel_asce, 13, 13);
-       arch_local_irq_restore(4UL << (BITS_PER_LONG - 8));
+       __arch_local_irq_stosm(0x04);
 
        sparse_memory_present_with_active_regions(MAX_NUMNODES);
        sparse_init();
index ea01477..45c4daa 100644 (file)
@@ -169,12 +169,12 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
 
 int s390_mmap_check(unsigned long addr, unsigned long len, unsigned long flags)
 {
-       if (is_compat_task() || (TASK_SIZE >= (1UL << 53)))
+       if (is_compat_task() || TASK_SIZE >= TASK_MAX_SIZE)
                return 0;
        if (!(flags & MAP_FIXED))
                addr = 0;
        if ((addr + len) >= TASK_SIZE)
-               return crst_table_upgrade(current->mm, 1UL << 53);
+               return crst_table_upgrade(current->mm, TASK_MAX_SIZE);
        return 0;
 }
 
@@ -189,9 +189,9 @@ s390_get_unmapped_area(struct file *filp, unsigned long addr,
        area = arch_get_unmapped_area(filp, addr, len, pgoff, flags);
        if (!(area & ~PAGE_MASK))
                return area;
-       if (area == -ENOMEM && !is_compat_task() && TASK_SIZE < (1UL << 53)) {
+       if (area == -ENOMEM && !is_compat_task() && TASK_SIZE < TASK_MAX_SIZE) {
                /* Upgrade the page table to 4 levels and retry. */
-               rc = crst_table_upgrade(mm, 1UL << 53);
+               rc = crst_table_upgrade(mm, TASK_MAX_SIZE);
                if (rc)
                        return (unsigned long) rc;
                area = arch_get_unmapped_area(filp, addr, len, pgoff, flags);
@@ -211,9 +211,9 @@ s390_get_unmapped_area_topdown(struct file *filp, const unsigned long addr,
        area = arch_get_unmapped_area_topdown(filp, addr, len, pgoff, flags);
        if (!(area & ~PAGE_MASK))
                return area;
-       if (area == -ENOMEM && !is_compat_task() && TASK_SIZE < (1UL << 53)) {
+       if (area == -ENOMEM && !is_compat_task() && TASK_SIZE < TASK_MAX_SIZE) {
                /* Upgrade the page table to 4 levels and retry. */
-               rc = crst_table_upgrade(mm, 1UL << 53);
+               rc = crst_table_upgrade(mm, TASK_MAX_SIZE);
                if (rc)
                        return (unsigned long) rc;
                area = arch_get_unmapped_area_topdown(filp, addr, len,
index a809fa8..5109827 100644 (file)
@@ -55,7 +55,7 @@ int crst_table_upgrade(struct mm_struct *mm, unsigned long limit)
        unsigned long entry;
        int flush;
 
-       BUG_ON(limit > (1UL << 53));
+       BUG_ON(limit > TASK_MAX_SIZE);
        flush = 0;
 repeat:
        table = crst_table_alloc(mm);
index 43f32ce..2794845 100644 (file)
@@ -57,9 +57,7 @@ static __init pg_data_t *alloc_node_data(void)
 {
        pg_data_t *res;
 
-       res = (pg_data_t *) memblock_alloc(sizeof(pg_data_t), 1);
-       if (!res)
-               panic("Could not allocate memory for node data!\n");
+       res = (pg_data_t *) memblock_alloc(sizeof(pg_data_t), 8);
        memset(res, 0, sizeof(pg_data_t));
        return res;
 }
@@ -162,7 +160,7 @@ static int __init numa_init_late(void)
                register_one_node(nid);
        return 0;
 }
-device_initcall(numa_init_late);
+arch_initcall(numa_init_late);
 
 static int __init parse_debug(char *parm)
 {
index 8a6811b..fe0bfe3 100644 (file)
@@ -16,24 +16,23 @@ __show_trace(unsigned int *depth, unsigned long sp,
        struct pt_regs *regs;
 
        while (*depth) {
-               sp = sp & PSW_ADDR_INSN;
                if (sp < low || sp > high - sizeof(*sf))
                        return sp;
                sf = (struct stack_frame *) sp;
                (*depth)--;
-               oprofile_add_trace(sf->gprs[8] & PSW_ADDR_INSN);
+               oprofile_add_trace(sf->gprs[8]);
 
                /* Follow the backchain.  */
                while (*depth) {
                        low = sp;
-                       sp = sf->back_chain & PSW_ADDR_INSN;
+                       sp = sf->back_chain;
                        if (!sp)
                                break;
                        if (sp <= low || sp > high - sizeof(*sf))
                                return sp;
                        sf = (struct stack_frame *) sp;
                        (*depth)--;
-                       oprofile_add_trace(sf->gprs[8] & PSW_ADDR_INSN);
+                       oprofile_add_trace(sf->gprs[8]);
 
                }
 
@@ -46,7 +45,7 @@ __show_trace(unsigned int *depth, unsigned long sp,
                        return sp;
                regs = (struct pt_regs *) sp;
                (*depth)--;
-               oprofile_add_trace(sf->gprs[8] & PSW_ADDR_INSN);
+               oprofile_add_trace(sf->gprs[8]);
                low = sp;
                sp = regs->gprs[15];
        }
index 11d4f27..8f19c8f 100644 (file)
@@ -68,9 +68,12 @@ static struct airq_struct zpci_airq = {
        .isc = PCI_ISC,
 };
 
-/* I/O Map */
+#define ZPCI_IOMAP_ENTRIES                                             \
+       min(((unsigned long) CONFIG_PCI_NR_FUNCTIONS * PCI_BAR_COUNT),  \
+           ZPCI_IOMAP_MAX_ENTRIES)
+
 static DEFINE_SPINLOCK(zpci_iomap_lock);
-static DECLARE_BITMAP(zpci_iomap, ZPCI_IOMAP_MAX_ENTRIES);
+static unsigned long *zpci_iomap_bitmap;
 struct zpci_iomap_entry *zpci_iomap_start;
 EXPORT_SYMBOL_GPL(zpci_iomap_start);
 
@@ -265,27 +268,20 @@ void __iomem *pci_iomap_range(struct pci_dev *pdev,
                              unsigned long max)
 {
        struct zpci_dev *zdev = to_zpci(pdev);
-       u64 addr;
        int idx;
 
-       if ((bar & 7) != bar)
+       if (!pci_resource_len(pdev, bar))
                return NULL;
 
        idx = zdev->bars[bar].map_idx;
        spin_lock(&zpci_iomap_lock);
-       if (zpci_iomap_start[idx].count++) {
-               BUG_ON(zpci_iomap_start[idx].fh != zdev->fh ||
-                      zpci_iomap_start[idx].bar != bar);
-       } else {
-               zpci_iomap_start[idx].fh = zdev->fh;
-               zpci_iomap_start[idx].bar = bar;
-       }
        /* Detect overrun */
-       BUG_ON(!zpci_iomap_start[idx].count);
+       WARN_ON(!++zpci_iomap_start[idx].count);
+       zpci_iomap_start[idx].fh = zdev->fh;
+       zpci_iomap_start[idx].bar = bar;
        spin_unlock(&zpci_iomap_lock);
 
-       addr = ZPCI_IOMAP_ADDR_BASE | ((u64) idx << 48);
-       return (void __iomem *) addr + offset;
+       return (void __iomem *) ZPCI_ADDR(idx) + offset;
 }
 EXPORT_SYMBOL(pci_iomap_range);
 
@@ -297,12 +293,11 @@ EXPORT_SYMBOL(pci_iomap);
 
 void pci_iounmap(struct pci_dev *pdev, void __iomem *addr)
 {
-       unsigned int idx;
+       unsigned int idx = ZPCI_IDX(addr);
 
-       idx = (((__force u64) addr) & ~ZPCI_IOMAP_ADDR_BASE) >> 48;
        spin_lock(&zpci_iomap_lock);
        /* Detect underrun */
-       BUG_ON(!zpci_iomap_start[idx].count);
+       WARN_ON(!zpci_iomap_start[idx].count);
        if (!--zpci_iomap_start[idx].count) {
                zpci_iomap_start[idx].fh = 0;
                zpci_iomap_start[idx].bar = 0;
@@ -544,15 +539,15 @@ static void zpci_irq_exit(void)
 
 static int zpci_alloc_iomap(struct zpci_dev *zdev)
 {
-       int entry;
+       unsigned long entry;
 
        spin_lock(&zpci_iomap_lock);
-       entry = find_first_zero_bit(zpci_iomap, ZPCI_IOMAP_MAX_ENTRIES);
-       if (entry == ZPCI_IOMAP_MAX_ENTRIES) {
+       entry = find_first_zero_bit(zpci_iomap_bitmap, ZPCI_IOMAP_ENTRIES);
+       if (entry == ZPCI_IOMAP_ENTRIES) {
                spin_unlock(&zpci_iomap_lock);
                return -ENOSPC;
        }
-       set_bit(entry, zpci_iomap);
+       set_bit(entry, zpci_iomap_bitmap);
        spin_unlock(&zpci_iomap_lock);
        return entry;
 }
@@ -561,7 +556,7 @@ static void zpci_free_iomap(struct zpci_dev *zdev, int entry)
 {
        spin_lock(&zpci_iomap_lock);
        memset(&zpci_iomap_start[entry], 0, sizeof(struct zpci_iomap_entry));
-       clear_bit(entry, zpci_iomap);
+       clear_bit(entry, zpci_iomap_bitmap);
        spin_unlock(&zpci_iomap_lock);
 }
 
@@ -611,8 +606,7 @@ static int zpci_setup_bus_resources(struct zpci_dev *zdev,
                if (zdev->bars[i].val & 4)
                        flags |= IORESOURCE_MEM_64;
 
-               addr = ZPCI_IOMAP_ADDR_BASE + ((u64) entry << 48);
-
+               addr = ZPCI_ADDR(entry);
                size = 1UL << zdev->bars[i].size;
 
                res = __alloc_res(zdev, addr, size, flags);
@@ -873,23 +867,30 @@ static int zpci_mem_init(void)
        zdev_fmb_cache = kmem_cache_create("PCI_FMB_cache", sizeof(struct zpci_fmb),
                                16, 0, NULL);
        if (!zdev_fmb_cache)
-               goto error_zdev;
+               goto error_fmb;
 
-       /* TODO: use realloc */
-       zpci_iomap_start = kzalloc(ZPCI_IOMAP_MAX_ENTRIES * sizeof(*zpci_iomap_start),
-                                  GFP_KERNEL);
+       zpci_iomap_start = kcalloc(ZPCI_IOMAP_ENTRIES,
+                                  sizeof(*zpci_iomap_start), GFP_KERNEL);
        if (!zpci_iomap_start)
                goto error_iomap;
-       return 0;
 
+       zpci_iomap_bitmap = kcalloc(BITS_TO_LONGS(ZPCI_IOMAP_ENTRIES),
+                                   sizeof(*zpci_iomap_bitmap), GFP_KERNEL);
+       if (!zpci_iomap_bitmap)
+               goto error_iomap_bitmap;
+
+       return 0;
+error_iomap_bitmap:
+       kfree(zpci_iomap_start);
 error_iomap:
        kmem_cache_destroy(zdev_fmb_cache);
-error_zdev:
+error_fmb:
        return -ENOMEM;
 }
 
 static void zpci_mem_exit(void)
 {
+       kfree(zpci_iomap_bitmap);
        kfree(zpci_iomap_start);
        kmem_cache_destroy(zdev_fmb_cache);
 }
index 369a3e0..b0e0475 100644 (file)
@@ -53,6 +53,11 @@ static void __zpci_event_error(struct zpci_ccdf_err *ccdf)
 
        pr_err("%s: Event 0x%x reports an error for PCI function 0x%x\n",
               pdev ? pci_name(pdev) : "n/a", ccdf->pec, ccdf->fid);
+
+       if (!pdev)
+               return;
+
+       pdev->error_state = pci_channel_io_perm_failure;
 }
 
 void zpci_event_error(void *data)
index f887c64..8a84e05 100644 (file)
@@ -33,7 +33,6 @@
 #endif
 
 #define __smp_store_mb(var, value) do { (void)xchg(&var, value); } while (0)
-#define smp_store_mb(var, value) __smp_store_mb(var, value)
 
 #include <asm-generic/barrier.h>
 
index 330e738..9af2e63 100644 (file)
@@ -509,11 +509,10 @@ config X86_INTEL_CE
 
 config X86_INTEL_MID
        bool "Intel MID platform support"
-       depends on X86_32
        depends on X86_EXTENDED_PLATFORM
        depends on X86_PLATFORM_DEVICES
        depends on PCI
-       depends on PCI_GOANY
+       depends on X86_64 || (PCI_GOANY && X86_32)
        depends on X86_IO_APIC
        select SFI
        select I2C
index 881b476..e7de5c9 100644 (file)
@@ -23,11 +23,13 @@ extern void irq_ctx_init(int cpu);
 
 #define __ARCH_HAS_DO_SOFTIRQ
 
+struct irq_desc;
+
 #ifdef CONFIG_HOTPLUG_CPU
 #include <linux/cpumask.h>
 extern int check_irq_vectors_for_cpu_disable(void);
 extern void fixup_irqs(void);
-extern void irq_force_complete_move(int);
+extern void irq_force_complete_move(struct irq_desc *desc);
 #endif
 
 #ifdef CONFIG_HAVE_KVM
@@ -37,7 +39,6 @@ extern void kvm_set_posted_intr_wakeup_handler(void (*handler)(void));
 extern void (*x86_platform_ipi_callback)(void);
 extern void native_init_IRQ(void);
 
-struct irq_desc;
 extern bool handle_irq(struct irq_desc *desc, struct pt_regs *regs);
 
 extern __visible unsigned int do_IRQ(struct pt_regs *regs);
index 04c27a0..4432ab7 100644 (file)
@@ -366,20 +366,18 @@ static inline enum page_cache_mode pgprot2cachemode(pgprot_t pgprot)
 }
 static inline pgprot_t pgprot_4k_2_large(pgprot_t pgprot)
 {
+       pgprotval_t val = pgprot_val(pgprot);
        pgprot_t new;
-       unsigned long val;
 
-       val = pgprot_val(pgprot);
        pgprot_val(new) = (val & ~(_PAGE_PAT | _PAGE_PAT_LARGE)) |
                ((val & _PAGE_PAT) << (_PAGE_BIT_PAT_LARGE - _PAGE_BIT_PAT));
        return new;
 }
 static inline pgprot_t pgprot_large_2_4k(pgprot_t pgprot)
 {
+       pgprotval_t val = pgprot_val(pgprot);
        pgprot_t new;
-       unsigned long val;
 
-       val = pgprot_val(pgprot);
        pgprot_val(new) = (val & ~(_PAGE_PAT | _PAGE_PAT_LARGE)) |
                          ((val & _PAGE_PAT_LARGE) >>
                           (_PAGE_BIT_PAT_LARGE - _PAGE_BIT_PAT));
index f253218..fdb0fbf 100644 (file)
@@ -2521,6 +2521,7 @@ void __init setup_ioapic_dest(void)
 {
        int pin, ioapic, irq, irq_entry;
        const struct cpumask *mask;
+       struct irq_desc *desc;
        struct irq_data *idata;
        struct irq_chip *chip;
 
@@ -2536,7 +2537,9 @@ void __init setup_ioapic_dest(void)
                if (irq < 0 || !mp_init_irq_at_boot(ioapic, irq))
                        continue;
 
-               idata = irq_get_irq_data(irq);
+               desc = irq_to_desc(irq);
+               raw_spin_lock_irq(&desc->lock);
+               idata = irq_desc_get_irq_data(desc);
 
                /*
                 * Honour affinities which have been set in early boot
@@ -2550,6 +2553,7 @@ void __init setup_ioapic_dest(void)
                /* Might be lapic_chip for irq 0 */
                if (chip->irq_set_affinity)
                        chip->irq_set_affinity(idata, mask, false);
+               raw_spin_unlock_irq(&desc->lock);
        }
 }
 #endif
index 908cb37..3b670df 100644 (file)
@@ -31,7 +31,7 @@ struct apic_chip_data {
 struct irq_domain *x86_vector_domain;
 EXPORT_SYMBOL_GPL(x86_vector_domain);
 static DEFINE_RAW_SPINLOCK(vector_lock);
-static cpumask_var_t vector_cpumask;
+static cpumask_var_t vector_cpumask, vector_searchmask, searched_cpumask;
 static struct irq_chip lapic_controller;
 #ifdef CONFIG_X86_IO_APIC
 static struct apic_chip_data *legacy_irq_data[NR_IRQS_LEGACY];
@@ -118,35 +118,47 @@ static int __assign_irq_vector(int irq, struct apic_chip_data *d,
         */
        static int current_vector = FIRST_EXTERNAL_VECTOR + VECTOR_OFFSET_START;
        static int current_offset = VECTOR_OFFSET_START % 16;
-       int cpu, err;
+       int cpu, vector;
 
-       if (d->move_in_progress)
+       /*
+        * If there is still a move in progress or the previous move has not
+        * been cleaned up completely, tell the caller to come back later.
+        */
+       if (d->move_in_progress ||
+           cpumask_intersects(d->old_domain, cpu_online_mask))
                return -EBUSY;
 
        /* Only try and allocate irqs on cpus that are present */
-       err = -ENOSPC;
        cpumask_clear(d->old_domain);
+       cpumask_clear(searched_cpumask);
        cpu = cpumask_first_and(mask, cpu_online_mask);
        while (cpu < nr_cpu_ids) {
-               int new_cpu, vector, offset;
+               int new_cpu, offset;
 
+               /* Get the possible target cpus for @mask/@cpu from the apic */
                apic->vector_allocation_domain(cpu, vector_cpumask, mask);
 
+               /*
+                * Clear the offline cpus from @vector_cpumask for searching
+                * and verify whether the result overlaps with @mask. If true,
+                * then the call to apic->cpu_mask_to_apicid_and() will
+                * succeed as well. If not, no point in trying to find a
+                * vector in this mask.
+                */
+               cpumask_and(vector_searchmask, vector_cpumask, cpu_online_mask);
+               if (!cpumask_intersects(vector_searchmask, mask))
+                       goto next_cpu;
+
                if (cpumask_subset(vector_cpumask, d->domain)) {
-                       err = 0;
                        if (cpumask_equal(vector_cpumask, d->domain))
-                               break;
+                               goto success;
                        /*
-                        * New cpumask using the vector is a proper subset of
-                        * the current in use mask. So cleanup the vector
-                        * allocation for the members that are not used anymore.
+                        * Mark the cpus which are not longer in the mask for
+                        * cleanup.
                         */
-                       cpumask_andnot(d->old_domain, d->domain,
-                                      vector_cpumask);
-                       d->move_in_progress =
-                          cpumask_intersects(d->old_domain, cpu_online_mask);
-                       cpumask_and(d->domain, d->domain, vector_cpumask);
-                       break;
+                       cpumask_andnot(d->old_domain, d->domain, vector_cpumask);
+                       vector = d->cfg.vector;
+                       goto update;
                }
 
                vector = current_vector;
@@ -158,45 +170,60 @@ next:
                        vector = FIRST_EXTERNAL_VECTOR + offset;
                }
 
-               if (unlikely(current_vector == vector)) {
-                       cpumask_or(d->old_domain, d->old_domain,
-                                  vector_cpumask);
-                       cpumask_andnot(vector_cpumask, mask, d->old_domain);
-                       cpu = cpumask_first_and(vector_cpumask,
-                                               cpu_online_mask);
-                       continue;
-               }
+               /* If the search wrapped around, try the next cpu */
+               if (unlikely(current_vector == vector))
+                       goto next_cpu;
 
                if (test_bit(vector, used_vectors))
                        goto next;
 
-               for_each_cpu_and(new_cpu, vector_cpumask, cpu_online_mask) {
+               for_each_cpu(new_cpu, vector_searchmask) {
                        if (!IS_ERR_OR_NULL(per_cpu(vector_irq, new_cpu)[vector]))
                                goto next;
                }
                /* Found one! */
                current_vector = vector;
                current_offset = offset;
-               if (d->cfg.vector) {
+               /* Schedule the old vector for cleanup on all cpus */
+               if (d->cfg.vector)
                        cpumask_copy(d->old_domain, d->domain);
-                       d->move_in_progress =
-                          cpumask_intersects(d->old_domain, cpu_online_mask);
-               }
-               for_each_cpu_and(new_cpu, vector_cpumask, cpu_online_mask)
+               for_each_cpu(new_cpu, vector_searchmask)
                        per_cpu(vector_irq, new_cpu)[vector] = irq_to_desc(irq);
-               d->cfg.vector = vector;
-               cpumask_copy(d->domain, vector_cpumask);
-               err = 0;
-               break;
-       }
+               goto update;
 
-       if (!err) {
-               /* cache destination APIC IDs into cfg->dest_apicid */
-               err = apic->cpu_mask_to_apicid_and(mask, d->domain,
-                                                  &d->cfg.dest_apicid);
+next_cpu:
+               /*
+                * We exclude the current @vector_cpumask from the requested
+                * @mask and try again with the next online cpu in the
+                * result. We cannot modify @mask, so we use @vector_cpumask
+                * as a temporary buffer here as it will be reassigned when
+                * calling apic->vector_allocation_domain() above.
+                */
+               cpumask_or(searched_cpumask, searched_cpumask, vector_cpumask);
+               cpumask_andnot(vector_cpumask, mask, searched_cpumask);
+               cpu = cpumask_first_and(vector_cpumask, cpu_online_mask);
+               continue;
        }
+       return -ENOSPC;
 
-       return err;
+update:
+       /*
+        * Exclude offline cpus from the cleanup mask and set the
+        * move_in_progress flag when the result is not empty.
+        */
+       cpumask_and(d->old_domain, d->old_domain, cpu_online_mask);
+       d->move_in_progress = !cpumask_empty(d->old_domain);
+       d->cfg.vector = vector;
+       cpumask_copy(d->domain, vector_cpumask);
+success:
+       /*
+        * Cache destination APIC IDs into cfg->dest_apicid. This cannot fail
+        * as we already established, that mask & d->domain & cpu_online_mask
+        * is not empty.
+        */
+       BUG_ON(apic->cpu_mask_to_apicid_and(mask, d->domain,
+                                           &d->cfg.dest_apicid));
+       return 0;
 }
 
 static int assign_irq_vector(int irq, struct apic_chip_data *data,
@@ -226,10 +253,8 @@ static int assign_irq_vector_policy(int irq, int node,
 static void clear_irq_vector(int irq, struct apic_chip_data *data)
 {
        struct irq_desc *desc;
-       unsigned long flags;
        int cpu, vector;
 
-       raw_spin_lock_irqsave(&vector_lock, flags);
        BUG_ON(!data->cfg.vector);
 
        vector = data->cfg.vector;
@@ -239,10 +264,13 @@ static void clear_irq_vector(int irq, struct apic_chip_data *data)
        data->cfg.vector = 0;
        cpumask_clear(data->domain);
 
-       if (likely(!data->move_in_progress)) {
-               raw_spin_unlock_irqrestore(&vector_lock, flags);
+       /*
+        * If move is in progress or the old_domain mask is not empty,
+        * i.e. the cleanup IPI has not been processed yet, we need to remove
+        * the old references to desc from all cpus vector tables.
+        */
+       if (!data->move_in_progress && cpumask_empty(data->old_domain))
                return;
-       }
 
        desc = irq_to_desc(irq);
        for_each_cpu_and(cpu, data->old_domain, cpu_online_mask) {
@@ -255,7 +283,6 @@ static void clear_irq_vector(int irq, struct apic_chip_data *data)
                }
        }
        data->move_in_progress = 0;
-       raw_spin_unlock_irqrestore(&vector_lock, flags);
 }
 
 void init_irq_alloc_info(struct irq_alloc_info *info,
@@ -276,19 +303,24 @@ void copy_irq_alloc_info(struct irq_alloc_info *dst, struct irq_alloc_info *src)
 static void x86_vector_free_irqs(struct irq_domain *domain,
                                 unsigned int virq, unsigned int nr_irqs)
 {
+       struct apic_chip_data *apic_data;
        struct irq_data *irq_data;
+       unsigned long flags;
        int i;
 
        for (i = 0; i < nr_irqs; i++) {
                irq_data = irq_domain_get_irq_data(x86_vector_domain, virq + i);
                if (irq_data && irq_data->chip_data) {
+                       raw_spin_lock_irqsave(&vector_lock, flags);
                        clear_irq_vector(virq + i, irq_data->chip_data);
-                       free_apic_chip_data(irq_data->chip_data);
+                       apic_data = irq_data->chip_data;
+                       irq_domain_reset_irq_data(irq_data);
+                       raw_spin_unlock_irqrestore(&vector_lock, flags);
+                       free_apic_chip_data(apic_data);
 #ifdef CONFIG_X86_IO_APIC
                        if (virq + i < nr_legacy_irqs())
                                legacy_irq_data[virq + i] = NULL;
 #endif
-                       irq_domain_reset_irq_data(irq_data);
                }
        }
 }
@@ -406,6 +438,8 @@ int __init arch_early_irq_init(void)
        arch_init_htirq_domain(x86_vector_domain);
 
        BUG_ON(!alloc_cpumask_var(&vector_cpumask, GFP_KERNEL));
+       BUG_ON(!alloc_cpumask_var(&vector_searchmask, GFP_KERNEL));
+       BUG_ON(!alloc_cpumask_var(&searched_cpumask, GFP_KERNEL));
 
        return arch_early_ioapic_init();
 }
@@ -494,14 +528,7 @@ static int apic_set_affinity(struct irq_data *irq_data,
                return -EINVAL;
 
        err = assign_irq_vector(irq, data, dest);
-       if (err) {
-               if (assign_irq_vector(irq, data,
-                                     irq_data_get_affinity_mask(irq_data)))
-                       pr_err("Failed to recover vector for irq %d\n", irq);
-               return err;
-       }
-
-       return IRQ_SET_MASK_OK;
+       return err ? err : IRQ_SET_MASK_OK;
 }
 
 static struct irq_chip lapic_controller = {
@@ -513,20 +540,12 @@ static struct irq_chip lapic_controller = {
 #ifdef CONFIG_SMP
 static void __send_cleanup_vector(struct apic_chip_data *data)
 {
-       cpumask_var_t cleanup_mask;
-
-       if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) {
-               unsigned int i;
-
-               for_each_cpu_and(i, data->old_domain, cpu_online_mask)
-                       apic->send_IPI_mask(cpumask_of(i),
-                                           IRQ_MOVE_CLEANUP_VECTOR);
-       } else {
-               cpumask_and(cleanup_mask, data->old_domain, cpu_online_mask);
-               apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
-               free_cpumask_var(cleanup_mask);
-       }
+       raw_spin_lock(&vector_lock);
+       cpumask_and(data->old_domain, data->old_domain, cpu_online_mask);
        data->move_in_progress = 0;
+       if (!cpumask_empty(data->old_domain))
+               apic->send_IPI_mask(data->old_domain, IRQ_MOVE_CLEANUP_VECTOR);
+       raw_spin_unlock(&vector_lock);
 }
 
 void send_cleanup_vector(struct irq_cfg *cfg)
@@ -570,12 +589,25 @@ asmlinkage __visible void smp_irq_move_cleanup_interrupt(void)
                        goto unlock;
 
                /*
-                * Check if the irq migration is in progress. If so, we
-                * haven't received the cleanup request yet for this irq.
+                * Nothing to cleanup if irq migration is in progress
+                * or this cpu is not set in the cleanup mask.
                 */
-               if (data->move_in_progress)
+               if (data->move_in_progress ||
+                   !cpumask_test_cpu(me, data->old_domain))
                        goto unlock;
 
+               /*
+                * We have two cases to handle here:
+                * 1) vector is unchanged but the target mask got reduced
+                * 2) vector and the target mask has changed
+                *
+                * #1 is obvious, but in #2 we have two vectors with the same
+                * irq descriptor: the old and the new vector. So we need to
+                * make sure that we only cleanup the old vector. The new
+                * vector has the current @vector number in the config and
+                * this cpu is part of the target mask. We better leave that
+                * one alone.
+                */
                if (vector == data->cfg.vector &&
                    cpumask_test_cpu(me, data->domain))
                        goto unlock;
@@ -593,6 +625,7 @@ asmlinkage __visible void smp_irq_move_cleanup_interrupt(void)
                        goto unlock;
                }
                __this_cpu_write(vector_irq[vector], VECTOR_UNUSED);
+               cpumask_clear_cpu(me, data->old_domain);
 unlock:
                raw_spin_unlock(&desc->lock);
        }
@@ -621,12 +654,48 @@ void irq_complete_move(struct irq_cfg *cfg)
        __irq_complete_move(cfg, ~get_irq_regs()->orig_ax);
 }
 
-void irq_force_complete_move(int irq)
+/*
+ * Called with @desc->lock held and interrupts disabled.
+ */
+void irq_force_complete_move(struct irq_desc *desc)
 {
-       struct irq_cfg *cfg = irq_cfg(irq);
+       struct irq_data *irqdata = irq_desc_get_irq_data(desc);
+       struct apic_chip_data *data = apic_chip_data(irqdata);
+       struct irq_cfg *cfg = data ? &data->cfg : NULL;
 
-       if (cfg)
-               __irq_complete_move(cfg, cfg->vector);
+       if (!cfg)
+               return;
+
+       __irq_complete_move(cfg, cfg->vector);
+
+       /*
+        * This is tricky. If the cleanup of @data->old_domain has not been
+        * done yet, then the following setaffinity call will fail with
+        * -EBUSY. This can leave the interrupt in a stale state.
+        *
+        * The cleanup cannot make progress because we hold @desc->lock. So in
+        * case @data->old_domain is not yet cleaned up, we need to drop the
+        * lock and acquire it again. @desc cannot go away, because the
+        * hotplug code holds the sparse irq lock.
+        */
+       raw_spin_lock(&vector_lock);
+       /* Clean out all offline cpus (including ourself) first. */
+       cpumask_and(data->old_domain, data->old_domain, cpu_online_mask);
+       while (!cpumask_empty(data->old_domain)) {
+               raw_spin_unlock(&vector_lock);
+               raw_spin_unlock(&desc->lock);
+               cpu_relax();
+               raw_spin_lock(&desc->lock);
+               /*
+                * Reevaluate apic_chip_data. It might have been cleared after
+                * we dropped @desc->lock.
+                */
+               data = apic_chip_data(irqdata);
+               if (!data)
+                       return;
+               raw_spin_lock(&vector_lock);
+       }
+       raw_spin_unlock(&vector_lock);
 }
 #endif
 
index d760c6b..624db00 100644 (file)
@@ -889,7 +889,10 @@ void __init uv_system_init(void)
                return;
        }
        pr_info("UV: Found %s hub\n", hub);
-       map_low_mmrs();
+
+       /* We now only need to map the MMRs on UV1 */
+       if (is_uv1_hub())
+               map_low_mmrs();
 
        m_n_config.v = uv_read_local_mmr(UVH_RH_GAM_CONFIG_MMR );
        m_val = m_n_config.s.m_skt;
index a667078..fed2ab1 100644 (file)
@@ -1960,7 +1960,8 @@ intel_bts_constraints(struct perf_event *event)
 
 static int intel_alt_er(int idx, u64 config)
 {
-       int alt_idx;
+       int alt_idx = idx;
+
        if (!(x86_pmu.flags & PMU_FL_HAS_RSP_1))
                return idx;
 
@@ -2897,14 +2898,12 @@ static void intel_pmu_cpu_starting(int cpu)
                return;
 
        if (!(x86_pmu.flags & PMU_FL_NO_HT_SHARING)) {
-               void **onln = &cpuc->kfree_on_online[X86_PERF_KFREE_SHARED];
-
                for_each_cpu(i, topology_sibling_cpumask(cpu)) {
                        struct intel_shared_regs *pc;
 
                        pc = per_cpu(cpu_hw_events, i).shared_regs;
                        if (pc && pc->core_id == core_id) {
-                               *onln = cpuc->shared_regs;
+                               cpuc->kfree_on_online[0] = cpuc->shared_regs;
                                cpuc->shared_regs = pc;
                                break;
                        }
index f97f807..3bf41d4 100644 (file)
@@ -995,6 +995,9 @@ static int __init uncore_pci_init(void)
        case 87: /* Knights Landing */
                ret = knl_uncore_pci_init();
                break;
+       case 94: /* SkyLake */
+               ret = skl_uncore_pci_init();
+               break;
        default:
                return 0;
        }
index 07aa2d6..a7086b8 100644 (file)
@@ -336,6 +336,7 @@ int snb_uncore_pci_init(void);
 int ivb_uncore_pci_init(void);
 int hsw_uncore_pci_init(void);
 int bdw_uncore_pci_init(void);
+int skl_uncore_pci_init(void);
 void snb_uncore_cpu_init(void);
 void nhm_uncore_cpu_init(void);
 int snb_pci2phy_map_init(int devid);
index 0b93482..2bd030d 100644 (file)
@@ -8,6 +8,7 @@
 #define PCI_DEVICE_ID_INTEL_HSW_IMC    0x0c00
 #define PCI_DEVICE_ID_INTEL_HSW_U_IMC  0x0a04
 #define PCI_DEVICE_ID_INTEL_BDW_IMC    0x1604
+#define PCI_DEVICE_ID_INTEL_SKL_IMC    0x191f
 
 /* SNB event control */
 #define SNB_UNC_CTL_EV_SEL_MASK                        0x000000ff
@@ -524,6 +525,14 @@ static const struct pci_device_id bdw_uncore_pci_ids[] = {
        { /* end: all zeroes */ },
 };
 
+static const struct pci_device_id skl_uncore_pci_ids[] = {
+       { /* IMC */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_IMC),
+               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+       },
+       { /* end: all zeroes */ },
+};
+
 static struct pci_driver snb_uncore_pci_driver = {
        .name           = "snb_uncore",
        .id_table       = snb_uncore_pci_ids,
@@ -544,6 +553,11 @@ static struct pci_driver bdw_uncore_pci_driver = {
        .id_table       = bdw_uncore_pci_ids,
 };
 
+static struct pci_driver skl_uncore_pci_driver = {
+       .name           = "skl_uncore",
+       .id_table       = skl_uncore_pci_ids,
+};
+
 struct imc_uncore_pci_dev {
        __u32 pci_id;
        struct pci_driver *driver;
@@ -558,6 +572,7 @@ static const struct imc_uncore_pci_dev desktop_imc_pci_ids[] = {
        IMC_DEV(HSW_IMC, &hsw_uncore_pci_driver),    /* 4th Gen Core Processor */
        IMC_DEV(HSW_U_IMC, &hsw_uncore_pci_driver),  /* 4th Gen Core ULT Mobile Processor */
        IMC_DEV(BDW_IMC, &bdw_uncore_pci_driver),    /* 5th Gen Core U */
+       IMC_DEV(SKL_IMC, &skl_uncore_pci_driver),    /* 6th Gen Core */
        {  /* end marker */ }
 };
 
@@ -610,6 +625,11 @@ int bdw_uncore_pci_init(void)
        return imc_uncore_pci_init();
 }
 
+int skl_uncore_pci_init(void)
+{
+       return imc_uncore_pci_init();
+}
+
 /* end of Sandy Bridge uncore support */
 
 /* Nehalem uncore support */
index f129a9a..2c0f340 100644 (file)
@@ -192,5 +192,13 @@ void __init x86_64_start_reservations(char *real_mode_data)
 
        reserve_ebda_region();
 
+       switch (boot_params.hdr.hardware_subarch) {
+       case X86_SUBARCH_INTEL_MID:
+               x86_intel_mid_early_setup();
+               break;
+       default:
+               break;
+       }
+
        start_kernel();
 }
index f8062aa..61521dc 100644 (file)
@@ -462,7 +462,7 @@ void fixup_irqs(void)
                 * non intr-remapping case, we can't wait till this interrupt
                 * arrives at this cpu before completing the irq move.
                 */
-               irq_force_complete_move(irq);
+               irq_force_complete_move(desc);
 
                if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) {
                        break_affinity = 1;
@@ -470,6 +470,15 @@ void fixup_irqs(void)
                }
 
                chip = irq_data_get_irq_chip(data);
+               /*
+                * The interrupt descriptor might have been cleaned up
+                * already, but it is not yet removed from the radix tree
+                */
+               if (!chip) {
+                       raw_spin_unlock(&desc->lock);
+                       continue;
+               }
+
                if (!irqd_can_move_in_process_context(data) && chip->irq_mask)
                        chip->irq_mask(data);
 
index fc6a4c8..2440814 100644 (file)
@@ -33,7 +33,7 @@ struct cpa_data {
        pgd_t           *pgd;
        pgprot_t        mask_set;
        pgprot_t        mask_clr;
-       int             numpages;
+       unsigned long   numpages;
        int             flags;
        unsigned long   pfn;
        unsigned        force_split : 1;
@@ -1350,7 +1350,7 @@ static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias)
                 * CPA operation. Either a large page has been
                 * preserved or a single page update happened.
                 */
-               BUG_ON(cpa->numpages > numpages);
+               BUG_ON(cpa->numpages > numpages || !cpa->numpages);
                numpages -= cpa->numpages;
                if (cpa->flags & (CPA_PAGES_ARRAY | CPA_ARRAY))
                        cpa->curpage++;
index 1c7380d..2d66db8 100644 (file)
@@ -8,6 +8,7 @@
 #include <linux/memblock.h>
 #include <linux/bootmem.h>
 #include <linux/acpi.h>
+#include <linux/dmi.h>
 #include <asm/efi.h>
 #include <asm/uv/uv.h>
 
@@ -248,6 +249,16 @@ out:
        return ret;
 }
 
+static const struct dmi_system_id sgi_uv1_dmi[] = {
+       { NULL, "SGI UV1",
+               {       DMI_MATCH(DMI_PRODUCT_NAME,     "Stoutland Platform"),
+                       DMI_MATCH(DMI_PRODUCT_VERSION,  "1.0"),
+                       DMI_MATCH(DMI_BIOS_VENDOR,      "SGI.COM"),
+               }
+       },
+       { } /* NULL entry stops DMI scanning */
+};
+
 void __init efi_apply_memmap_quirks(void)
 {
        /*
@@ -260,10 +271,8 @@ void __init efi_apply_memmap_quirks(void)
                efi_unmap_memmap();
        }
 
-       /*
-        * UV doesn't support the new EFI pagetable mapping yet.
-        */
-       if (is_uv_system())
+       /* UV2+ BIOS has a fix for this issue.  UV1 still needs the quirk. */
+       if (dmi_check_system(sgi_uv1_dmi))
                set_bit(EFI_OLD_MEMMAP, &efi.flags);
 }
 
index 1bbc21e..90bb997 100644 (file)
@@ -138,7 +138,7 @@ static void intel_mid_arch_setup(void)
                intel_mid_ops = get_intel_mid_ops[__intel_mid_cpu_chip]();
        else {
                intel_mid_ops = get_intel_mid_ops[INTEL_MID_CPU_CHIP_PENWELL]();
-               pr_info("ARCH: Unknown SoC, assuming PENWELL!\n");
+               pr_info("ARCH: Unknown SoC, assuming Penwell!\n");
        }
 
 out:
@@ -214,12 +214,10 @@ static inline int __init setup_x86_intel_mid_timer(char *arg)
        else if (strcmp("lapic_and_apbt", arg) == 0)
                intel_mid_timer_options = INTEL_MID_TIMER_LAPIC_APBT;
        else {
-               pr_warn("X86 INTEL_MID timer option %s not recognised"
-                          " use x86_intel_mid_timer=apbt_only or lapic_and_apbt\n",
-                          arg);
+               pr_warn("X86 INTEL_MID timer option %s not recognised use x86_intel_mid_timer=apbt_only or lapic_and_apbt\n",
+                       arg);
                return -EINVAL;
        }
        return 0;
 }
 __setup("x86_intel_mid_timer=", setup_x86_intel_mid_timer);
-
index c1bdafa..c61b6c3 100644 (file)
@@ -220,11 +220,12 @@ static int imr_dbgfs_state_show(struct seq_file *s, void *unused)
                if (imr_is_enabled(&imr)) {
                        base = imr_to_phys(imr.addr_lo);
                        end = imr_to_phys(imr.addr_hi) + IMR_MASK;
+                       size = end - base + 1;
                } else {
                        base = 0;
                        end = 0;
+                       size = 0;
                }
-               size = end - base;
                seq_printf(s, "imr%02i: base=%pa, end=%pa, size=0x%08zx "
                           "rmask=0x%08x, wmask=0x%08x, %s, %s\n", i,
                           &base, &end, size, imr.rmask, imr.wmask,
@@ -579,6 +580,7 @@ static void __init imr_fixup_memmap(struct imr_device *idev)
 {
        phys_addr_t base = virt_to_phys(&_text);
        size_t size = virt_to_phys(&__end_rodata) - base;
+       unsigned long start, end;
        int i;
        int ret;
 
@@ -586,18 +588,24 @@ static void __init imr_fixup_memmap(struct imr_device *idev)
        for (i = 0; i < idev->max_imr; i++)
                imr_clear(i);
 
+       start = (unsigned long)_text;
+       end = (unsigned long)__end_rodata - 1;
+
        /*
         * Setup a locked IMR around the physical extent of the kernel
         * from the beginning of the .text secton to the end of the
         * .rodata section as one physically contiguous block.
+        *
+        * We don't round up @size since it is already PAGE_SIZE aligned.
+        * See vmlinux.lds.S for details.
         */
        ret = imr_add_range(base, size, IMR_CPU, IMR_CPU, true);
        if (ret < 0) {
-               pr_err("unable to setup IMR for kernel: (%p - %p)\n",
-                       &_text, &__end_rodata);
+               pr_err("unable to setup IMR for kernel: %zu KiB (%lx - %lx)\n",
+                       size / 1024, start, end);
        } else {
-               pr_info("protecting kernel .text - .rodata: %zu KiB (%p - %p)\n",
-                       size / 1024, &_text, &__end_rodata);
+               pr_info("protecting kernel .text - .rodata: %zu KiB (%lx - %lx)\n",
+                       size / 1024, start, end);
        }
 
 }
index 1699df5..888a7fe 100644 (file)
@@ -70,6 +70,18 @@ static struct bio *blk_bio_write_same_split(struct request_queue *q,
        return bio_split(bio, q->limits.max_write_same_sectors, GFP_NOIO, bs);
 }
 
+static inline unsigned get_max_io_size(struct request_queue *q,
+                                      struct bio *bio)
+{
+       unsigned sectors = blk_max_size_offset(q, bio->bi_iter.bi_sector);
+       unsigned mask = queue_logical_block_size(q) - 1;
+
+       /* aligned to logical block size */
+       sectors &= ~(mask >> 9);
+
+       return sectors;
+}
+
 static struct bio *blk_bio_segment_split(struct request_queue *q,
                                         struct bio *bio,
                                         struct bio_set *bs,
@@ -81,6 +93,7 @@ static struct bio *blk_bio_segment_split(struct request_queue *q,
        unsigned front_seg_size = bio->bi_seg_front_size;
        bool do_split = true;
        struct bio *new = NULL;
+       const unsigned max_sectors = get_max_io_size(q, bio);
 
        bio_for_each_segment(bv, bio, iter) {
                /*
@@ -90,20 +103,19 @@ static struct bio *blk_bio_segment_split(struct request_queue *q,
                if (bvprvp && bvec_gap_to_prev(q, bvprvp, bv.bv_offset))
                        goto split;
 
-               if (sectors + (bv.bv_len >> 9) >
-                               blk_max_size_offset(q, bio->bi_iter.bi_sector)) {
+               if (sectors + (bv.bv_len >> 9) > max_sectors) {
                        /*
                         * Consider this a new segment if we're splitting in
                         * the middle of this vector.
                         */
                        if (nsegs < queue_max_segments(q) &&
-                           sectors < blk_max_size_offset(q,
-                                               bio->bi_iter.bi_sector)) {
+                           sectors < max_sectors) {
                                nsegs++;
-                               sectors = blk_max_size_offset(q,
-                                               bio->bi_iter.bi_sector);
+                               sectors = max_sectors;
                        }
-                       goto split;
+                       if (sectors)
+                               goto split;
+                       /* Make this single bvec as the 1st segment */
                }
 
                if (bvprvp && blk_queue_cluster(q)) {
index 1cf1878..431fdda 100644 (file)
@@ -408,17 +408,18 @@ void blk_mq_unregister_disk(struct gendisk *disk)
        blk_mq_enable_hotplug();
 }
 
+void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx)
+{
+       kobject_init(&hctx->kobj, &blk_mq_hw_ktype);
+}
+
 static void blk_mq_sysfs_init(struct request_queue *q)
 {
-       struct blk_mq_hw_ctx *hctx;
        struct blk_mq_ctx *ctx;
        int i;
 
        kobject_init(&q->mq_kobj, &blk_mq_ktype);
 
-       queue_for_each_hw_ctx(q, hctx, i)
-               kobject_init(&hctx->kobj, &blk_mq_hw_ktype);
-
        queue_for_each_ctx(q, ctx, i)
                kobject_init(&ctx->kobj, &blk_mq_ctx_ktype);
 }
index 4c0622f..645eb9e 100644 (file)
@@ -1742,31 +1742,6 @@ static int blk_mq_init_hctx(struct request_queue *q,
        return -1;
 }
 
-static int blk_mq_init_hw_queues(struct request_queue *q,
-               struct blk_mq_tag_set *set)
-{
-       struct blk_mq_hw_ctx *hctx;
-       unsigned int i;
-
-       /*
-        * Initialize hardware queues
-        */
-       queue_for_each_hw_ctx(q, hctx, i) {
-               if (blk_mq_init_hctx(q, set, hctx, i))
-                       break;
-       }
-
-       if (i == q->nr_hw_queues)
-               return 0;
-
-       /*
-        * Init failed
-        */
-       blk_mq_exit_hw_queues(q, set, i);
-
-       return 1;
-}
-
 static void blk_mq_init_cpu_queues(struct request_queue *q,
                                   unsigned int nr_hw_queues)
 {
@@ -1824,6 +1799,7 @@ static void blk_mq_map_swqueue(struct request_queue *q,
                        continue;
 
                hctx = q->mq_ops->map_queue(q, i);
+
                cpumask_set_cpu(i, hctx->cpumask);
                ctx->index_hw = hctx->nr_ctx;
                hctx->ctxs[hctx->nr_ctx++] = ctx;
@@ -1972,54 +1948,89 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
 }
 EXPORT_SYMBOL(blk_mq_init_queue);
 
-struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
-                                                 struct request_queue *q)
+static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
+                                               struct request_queue *q)
 {
-       struct blk_mq_hw_ctx **hctxs;
-       struct blk_mq_ctx __percpu *ctx;
-       unsigned int *map;
-       int i;
-
-       ctx = alloc_percpu(struct blk_mq_ctx);
-       if (!ctx)
-               return ERR_PTR(-ENOMEM);
-
-       hctxs = kmalloc_node(set->nr_hw_queues * sizeof(*hctxs), GFP_KERNEL,
-                       set->numa_node);
-
-       if (!hctxs)
-               goto err_percpu;
-
-       map = blk_mq_make_queue_map(set);
-       if (!map)
-               goto err_map;
+       int i, j;
+       struct blk_mq_hw_ctx **hctxs = q->queue_hw_ctx;
 
+       blk_mq_sysfs_unregister(q);
        for (i = 0; i < set->nr_hw_queues; i++) {
-               int node = blk_mq_hw_queue_to_node(map, i);
+               int node;
 
+               if (hctxs[i])
+                       continue;
+
+               node = blk_mq_hw_queue_to_node(q->mq_map, i);
                hctxs[i] = kzalloc_node(sizeof(struct blk_mq_hw_ctx),
                                        GFP_KERNEL, node);
                if (!hctxs[i])
-                       goto err_hctxs;
+                       break;
 
                if (!zalloc_cpumask_var_node(&hctxs[i]->cpumask, GFP_KERNEL,
-                                               node))
-                       goto err_hctxs;
+                                               node)) {
+                       kfree(hctxs[i]);
+                       hctxs[i] = NULL;
+                       break;
+               }
 
                atomic_set(&hctxs[i]->nr_active, 0);
                hctxs[i]->numa_node = node;
                hctxs[i]->queue_num = i;
+
+               if (blk_mq_init_hctx(q, set, hctxs[i], i)) {
+                       free_cpumask_var(hctxs[i]->cpumask);
+                       kfree(hctxs[i]);
+                       hctxs[i] = NULL;
+                       break;
+               }
+               blk_mq_hctx_kobj_init(hctxs[i]);
        }
+       for (j = i; j < q->nr_hw_queues; j++) {
+               struct blk_mq_hw_ctx *hctx = hctxs[j];
+
+               if (hctx) {
+                       if (hctx->tags) {
+                               blk_mq_free_rq_map(set, hctx->tags, j);
+                               set->tags[j] = NULL;
+                       }
+                       blk_mq_exit_hctx(q, set, hctx, j);
+                       free_cpumask_var(hctx->cpumask);
+                       kobject_put(&hctx->kobj);
+                       kfree(hctx->ctxs);
+                       kfree(hctx);
+                       hctxs[j] = NULL;
+
+               }
+       }
+       q->nr_hw_queues = i;
+       blk_mq_sysfs_register(q);
+}
+
+struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
+                                                 struct request_queue *q)
+{
+       q->queue_ctx = alloc_percpu(struct blk_mq_ctx);
+       if (!q->queue_ctx)
+               return ERR_PTR(-ENOMEM);
+
+       q->queue_hw_ctx = kzalloc_node(nr_cpu_ids * sizeof(*(q->queue_hw_ctx)),
+                                               GFP_KERNEL, set->numa_node);
+       if (!q->queue_hw_ctx)
+               goto err_percpu;
+
+       q->mq_map = blk_mq_make_queue_map(set);
+       if (!q->mq_map)
+               goto err_map;
+
+       blk_mq_realloc_hw_ctxs(set, q);
+       if (!q->nr_hw_queues)
+               goto err_hctxs;
 
        INIT_WORK(&q->timeout_work, blk_mq_timeout_work);
        blk_queue_rq_timeout(q, set->timeout ? set->timeout : 30 * HZ);
 
        q->nr_queues = nr_cpu_ids;
-       q->nr_hw_queues = set->nr_hw_queues;
-       q->mq_map = map;
-
-       q->queue_ctx = ctx;
-       q->queue_hw_ctx = hctxs;
 
        q->mq_ops = set->ops;
        q->queue_flags |= QUEUE_FLAG_MQ_DEFAULT;
@@ -2048,9 +2059,6 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
 
        blk_mq_init_cpu_queues(q, set->nr_hw_queues);
 
-       if (blk_mq_init_hw_queues(q, set))
-               goto err_hctxs;
-
        get_online_cpus();
        mutex_lock(&all_q_mutex);
 
@@ -2064,17 +2072,11 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
        return q;
 
 err_hctxs:
-       kfree(map);
-       for (i = 0; i < set->nr_hw_queues; i++) {
-               if (!hctxs[i])
-                       break;
-               free_cpumask_var(hctxs[i]->cpumask);
-               kfree(hctxs[i]);
-       }
+       kfree(q->mq_map);
 err_map:
-       kfree(hctxs);
+       kfree(q->queue_hw_ctx);
 err_percpu:
-       free_percpu(ctx);
+       free_percpu(q->queue_ctx);
        return ERR_PTR(-ENOMEM);
 }
 EXPORT_SYMBOL(blk_mq_init_allocated_queue);
@@ -2282,9 +2284,13 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
                set->nr_hw_queues = 1;
                set->queue_depth = min(64U, set->queue_depth);
        }
+       /*
+        * There is no use for more h/w queues than cpus.
+        */
+       if (set->nr_hw_queues > nr_cpu_ids)
+               set->nr_hw_queues = nr_cpu_ids;
 
-       set->tags = kmalloc_node(set->nr_hw_queues *
-                                sizeof(struct blk_mq_tags *),
+       set->tags = kzalloc_node(nr_cpu_ids * sizeof(struct blk_mq_tags *),
                                 GFP_KERNEL, set->numa_node);
        if (!set->tags)
                return -ENOMEM;
@@ -2307,7 +2313,7 @@ void blk_mq_free_tag_set(struct blk_mq_tag_set *set)
 {
        int i;
 
-       for (i = 0; i < set->nr_hw_queues; i++) {
+       for (i = 0; i < nr_cpu_ids; i++) {
                if (set->tags[i])
                        blk_mq_free_rq_map(set, set->tags[i], i);
        }
@@ -2339,6 +2345,35 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr)
        return ret;
 }
 
+void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues)
+{
+       struct request_queue *q;
+
+       if (nr_hw_queues > nr_cpu_ids)
+               nr_hw_queues = nr_cpu_ids;
+       if (nr_hw_queues < 1 || nr_hw_queues == set->nr_hw_queues)
+               return;
+
+       list_for_each_entry(q, &set->tag_list, tag_set_list)
+               blk_mq_freeze_queue(q);
+
+       set->nr_hw_queues = nr_hw_queues;
+       list_for_each_entry(q, &set->tag_list, tag_set_list) {
+               blk_mq_realloc_hw_ctxs(set, q);
+
+               if (q->nr_hw_queues > 1)
+                       blk_queue_make_request(q, blk_mq_make_request);
+               else
+                       blk_queue_make_request(q, blk_sq_make_request);
+
+               blk_mq_queue_reinit(q, cpu_online_mask);
+       }
+
+       list_for_each_entry(q, &set->tag_list, tag_set_list)
+               blk_mq_unfreeze_queue(q);
+}
+EXPORT_SYMBOL_GPL(blk_mq_update_nr_hw_queues);
+
 void blk_mq_disable_hotplug(void)
 {
        mutex_lock(&all_q_mutex);
index eaede8e..9087b11 100644 (file)
@@ -57,6 +57,7 @@ extern int blk_mq_hw_queue_to_node(unsigned int *map, unsigned int);
  */
 extern int blk_mq_sysfs_register(struct request_queue *q);
 extern void blk_mq_sysfs_unregister(struct request_queue *q);
+extern void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx);
 
 extern void blk_mq_rq_timed_out(struct request *req, bool reserved);
 
index 1f9093e..e3c591d 100644 (file)
@@ -632,6 +632,13 @@ static inline struct cfq_group *cfqg_parent(struct cfq_group *cfqg)
        return pblkg ? blkg_to_cfqg(pblkg) : NULL;
 }
 
+static inline bool cfqg_is_descendant(struct cfq_group *cfqg,
+                                     struct cfq_group *ancestor)
+{
+       return cgroup_is_descendant(cfqg_to_blkg(cfqg)->blkcg->css.cgroup,
+                                   cfqg_to_blkg(ancestor)->blkcg->css.cgroup);
+}
+
 static inline void cfqg_get(struct cfq_group *cfqg)
 {
        return blkg_get(cfqg_to_blkg(cfqg));
@@ -758,6 +765,11 @@ static void cfqg_stats_xfer_dead(struct cfq_group *cfqg)
 #else  /* CONFIG_CFQ_GROUP_IOSCHED */
 
 static inline struct cfq_group *cfqg_parent(struct cfq_group *cfqg) { return NULL; }
+static inline bool cfqg_is_descendant(struct cfq_group *cfqg,
+                                     struct cfq_group *ancestor)
+{
+       return true;
+}
 static inline void cfqg_get(struct cfq_group *cfqg) { }
 static inline void cfqg_put(struct cfq_group *cfqg) { }
 
@@ -2897,6 +2909,7 @@ static bool cfq_should_idle(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 static void cfq_arm_slice_timer(struct cfq_data *cfqd)
 {
        struct cfq_queue *cfqq = cfqd->active_queue;
+       struct cfq_rb_root *st = cfqq->service_tree;
        struct cfq_io_cq *cic;
        unsigned long sl, group_idle = 0;
 
@@ -2947,8 +2960,13 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd)
                return;
        }
 
-       /* There are other queues in the group, don't do group idle */
-       if (group_idle && cfqq->cfqg->nr_cfqq > 1)
+       /*
+        * There are other queues in the group or this is the only group and
+        * it has too big thinktime, don't do group idle.
+        */
+       if (group_idle &&
+           (cfqq->cfqg->nr_cfqq > 1 ||
+            cfq_io_thinktime_big(cfqd, &st->ttime, true)))
                return;
 
        cfq_mark_cfqq_wait_request(cfqq);
@@ -3947,16 +3965,27 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq,
        if (rq_is_sync(rq) && !cfq_cfqq_sync(cfqq))
                return true;
 
-       if (new_cfqq->cfqg != cfqq->cfqg)
+       /*
+        * Treat ancestors of current cgroup the same way as current cgroup.
+        * For anybody else we disallow preemption to guarantee service
+        * fairness among cgroups.
+        */
+       if (!cfqg_is_descendant(cfqq->cfqg, new_cfqq->cfqg))
                return false;
 
        if (cfq_slice_used(cfqq))
                return true;
 
+       /*
+        * Allow an RT request to pre-empt an ongoing non-RT cfqq timeslice.
+        */
+       if (cfq_class_rt(new_cfqq) && !cfq_class_rt(cfqq))
+               return true;
+
+       WARN_ON_ONCE(cfqq->ioprio_class != new_cfqq->ioprio_class);
        /* Allow preemption only if we are idling on sync-noidle tree */
        if (cfqd->serving_wl_type == SYNC_NOIDLE_WORKLOAD &&
            cfqq_type(new_cfqq) == SYNC_NOIDLE_WORKLOAD &&
-           new_cfqq->service_tree->count == 2 &&
            RB_EMPTY_ROOT(&cfqq->sort_list))
                return true;
 
@@ -3967,12 +3996,6 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq,
        if ((rq->cmd_flags & REQ_PRIO) && !cfqq->prio_pending)
                return true;
 
-       /*
-        * Allow an RT request to pre-empt an ongoing non-RT cfqq timeslice.
-        */
-       if (cfq_class_rt(new_cfqq) && !cfq_class_rt(cfqq))
-               return true;
-
        /* An idle queue should not be idle now for some reason */
        if (RB_EMPTY_ROOT(&cfqq->sort_list) && !cfq_should_idle(cfqd, cfqq))
                return true;
index 90e2d54..1316ddd 100644 (file)
@@ -135,14 +135,6 @@ static const struct dmi_system_id video_detect_dmi_table[] = {
                DMI_MATCH(DMI_PRODUCT_NAME, "UL30A"),
                },
        },
-       {
-       .callback = video_detect_force_vendor,
-       .ident = "Dell Inspiron 5737",
-       .matches = {
-               DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
-               DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 5737"),
-               },
-       },
 
        /*
         * These models have a working acpi_video backlight control, and using
index 47c4338..279e539 100644 (file)
@@ -284,6 +284,7 @@ out_free_priv_data:
 
        return err;
 }
+EXPORT_SYMBOL_GPL(platform_msi_domain_alloc_irqs);
 
 /**
  * platform_msi_domain_free_irqs - Free MSI interrupts for @dev
@@ -301,6 +302,7 @@ void platform_msi_domain_free_irqs(struct device *dev)
        msi_domain_free_irqs(dev->msi_domain, dev);
        platform_msi_free_descs(dev, 0, MAX_DEV_MSIS);
 }
+EXPORT_SYMBOL_GPL(platform_msi_domain_free_irqs);
 
 /**
  * platform_msi_get_host_data - Query the private data associated with
index 73d6e5d..f437afa 100644 (file)
@@ -558,10 +558,15 @@ static int platform_drv_probe(struct device *_dev)
                return ret;
 
        ret = dev_pm_domain_attach(_dev, true);
-       if (ret != -EPROBE_DEFER && drv->probe) {
-               ret = drv->probe(dev);
-               if (ret)
-                       dev_pm_domain_detach(_dev, true);
+       if (ret != -EPROBE_DEFER) {
+               if (drv->probe) {
+                       ret = drv->probe(dev);
+                       if (ret)
+                               dev_pm_domain_detach(_dev, true);
+               } else {
+                       /* don't fail if just dev_pm_domain_attach failed */
+                       ret = 0;
+               }
        }
 
        if (drv->prevent_deferred_probe && ret == -EPROBE_DEFER) {
index 6ac9a7f..784dbe8 100644 (file)
@@ -162,7 +162,7 @@ static int genpd_power_off(struct generic_pm_domain *genpd, bool timed)
 
 /**
  * genpd_queue_power_off_work - Queue up the execution of genpd_poweroff().
- * @genpd: PM domait to power off.
+ * @genpd: PM domain to power off.
  *
  * Queue up the execution of genpd_poweroff() unless it's already been done
  * before.
@@ -172,16 +172,15 @@ static void genpd_queue_power_off_work(struct generic_pm_domain *genpd)
        queue_work(pm_wq, &genpd->power_off_work);
 }
 
-static int genpd_poweron(struct generic_pm_domain *genpd);
-
 /**
  * __genpd_poweron - Restore power to a given PM domain and its masters.
  * @genpd: PM domain to power up.
+ * @depth: nesting count for lockdep.
  *
  * Restore power to @genpd and all of its masters so that it is possible to
  * resume a device belonging to it.
  */
-static int __genpd_poweron(struct generic_pm_domain *genpd)
+static int __genpd_poweron(struct generic_pm_domain *genpd, unsigned int depth)
 {
        struct gpd_link *link;
        int ret = 0;
@@ -196,11 +195,16 @@ static int __genpd_poweron(struct generic_pm_domain *genpd)
         * with it.
         */
        list_for_each_entry(link, &genpd->slave_links, slave_node) {
-               genpd_sd_counter_inc(link->master);
+               struct generic_pm_domain *master = link->master;
+
+               genpd_sd_counter_inc(master);
+
+               mutex_lock_nested(&master->lock, depth + 1);
+               ret = __genpd_poweron(master, depth + 1);
+               mutex_unlock(&master->lock);
 
-               ret = genpd_poweron(link->master);
                if (ret) {
-                       genpd_sd_counter_dec(link->master);
+                       genpd_sd_counter_dec(master);
                        goto err;
                }
        }
@@ -232,11 +236,12 @@ static int genpd_poweron(struct generic_pm_domain *genpd)
        int ret;
 
        mutex_lock(&genpd->lock);
-       ret = __genpd_poweron(genpd);
+       ret = __genpd_poweron(genpd, 0);
        mutex_unlock(&genpd->lock);
        return ret;
 }
 
+
 static int genpd_save_dev(struct generic_pm_domain *genpd, struct device *dev)
 {
        return GENPD_DEV_CALLBACK(genpd, int, save_state, dev);
@@ -484,7 +489,7 @@ static int pm_genpd_runtime_resume(struct device *dev)
        }
 
        mutex_lock(&genpd->lock);
-       ret = __genpd_poweron(genpd);
+       ret = __genpd_poweron(genpd, 0);
        mutex_unlock(&genpd->lock);
 
        if (ret)
@@ -1339,8 +1344,8 @@ int pm_genpd_add_subdomain(struct generic_pm_domain *genpd,
        if (!link)
                return -ENOMEM;
 
-       mutex_lock(&genpd->lock);
-       mutex_lock_nested(&subdomain->lock, SINGLE_DEPTH_NESTING);
+       mutex_lock(&subdomain->lock);
+       mutex_lock_nested(&genpd->lock, SINGLE_DEPTH_NESTING);
 
        if (genpd->status == GPD_STATE_POWER_OFF
            &&  subdomain->status != GPD_STATE_POWER_OFF) {
@@ -1363,8 +1368,8 @@ int pm_genpd_add_subdomain(struct generic_pm_domain *genpd,
                genpd_sd_counter_inc(genpd);
 
  out:
-       mutex_unlock(&subdomain->lock);
        mutex_unlock(&genpd->lock);
+       mutex_unlock(&subdomain->lock);
        if (ret)
                kfree(link);
        return ret;
@@ -1385,7 +1390,8 @@ int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd,
        if (IS_ERR_OR_NULL(genpd) || IS_ERR_OR_NULL(subdomain))
                return -EINVAL;
 
-       mutex_lock(&genpd->lock);
+       mutex_lock(&subdomain->lock);
+       mutex_lock_nested(&genpd->lock, SINGLE_DEPTH_NESTING);
 
        if (!list_empty(&subdomain->slave_links) || subdomain->device_count) {
                pr_warn("%s: unable to remove subdomain %s\n", genpd->name,
@@ -1398,22 +1404,19 @@ int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd,
                if (link->slave != subdomain)
                        continue;
 
-               mutex_lock_nested(&subdomain->lock, SINGLE_DEPTH_NESTING);
-
                list_del(&link->master_node);
                list_del(&link->slave_node);
                kfree(link);
                if (subdomain->status != GPD_STATE_POWER_OFF)
                        genpd_sd_counter_dec(genpd);
 
-               mutex_unlock(&subdomain->lock);
-
                ret = 0;
                break;
        }
 
 out:
        mutex_unlock(&genpd->lock);
+       mutex_unlock(&subdomain->lock);
 
        return ret;
 }
index 56777f0..33db740 100644 (file)
@@ -30,6 +30,8 @@ config CLKSRC_MMIO
 config DIGICOLOR_TIMER
        bool "Digicolor timer driver" if COMPILE_TEST
        depends on GENERIC_CLOCKEVENTS
+       select CLKSRC_MMIO
+       depends on HAS_IOMEM
        help
          Enables the support for the digicolor timer driver.
 
@@ -55,6 +57,7 @@ config ARMADA_370_XP_TIMER
        bool "Armada 370 and XP timer driver" if COMPILE_TEST
        depends on ARM
        select CLKSRC_OF
+       select CLKSRC_MMIO
        help
          Enables the support for the Armada 370 and XP timer driver.
 
@@ -76,6 +79,7 @@ config ORION_TIMER
 config SUN4I_TIMER
        bool "Sun4i timer driver" if COMPILE_TEST
        depends on GENERIC_CLOCKEVENTS
+       depends on HAS_IOMEM
        select CLKSRC_MMIO
        help
          Enables support for the Sun4i timer.
@@ -89,6 +93,7 @@ config SUN5I_HSTIMER
 
 config TEGRA_TIMER
        bool "Tegra timer driver" if COMPILE_TEST
+       select CLKSRC_MMIO
        depends on ARM
        help
          Enables support for the Tegra driver.
@@ -96,6 +101,7 @@ config TEGRA_TIMER
 config VT8500_TIMER
        bool "VT8500 timer driver" if COMPILE_TEST
        depends on GENERIC_CLOCKEVENTS
+       depends on HAS_IOMEM
        help
          Enables support for the VT8500 driver.
 
@@ -131,6 +137,7 @@ config CLKSRC_NOMADIK_MTU_SCHED_CLOCK
 config CLKSRC_DBX500_PRCMU
        bool "Clocksource PRCMU Timer" if COMPILE_TEST
        depends on GENERIC_CLOCKEVENTS
+       depends on HAS_IOMEM
        help
          Use the always on PRCMU Timer as clocksource
 
@@ -248,6 +255,7 @@ config CLKSRC_EXYNOS_MCT
 config CLKSRC_SAMSUNG_PWM
        bool "PWM timer drvier for Samsung S3C, S5P" if COMPILE_TEST
        depends on GENERIC_CLOCKEVENTS
+       depends on HAS_IOMEM
        help
          This is a new clocksource driver for the PWM timer found in
          Samsung S3C, S5P and Exynos SoCs, replacing an earlier driver
@@ -257,12 +265,14 @@ config CLKSRC_SAMSUNG_PWM
 config FSL_FTM_TIMER
        bool "Freescale FlexTimer Module driver" if COMPILE_TEST
        depends on GENERIC_CLOCKEVENTS
+       depends on HAS_IOMEM
        select CLKSRC_MMIO
        help
          Support for Freescale FlexTimer Module (FTM) timer.
 
 config VF_PIT_TIMER
        bool
+       select CLKSRC_MMIO
        help
          Support for Period Interrupt Timer on Freescale Vybrid Family SoCs.
 
@@ -360,6 +370,7 @@ config CLKSRC_TANGO_XTAL
 config CLKSRC_PXA
        bool "Clocksource for PXA or SA-11x0 platform" if COMPILE_TEST
        depends on GENERIC_CLOCKEVENTS
+       depends on HAS_IOMEM
        select CLKSRC_MMIO
        help
          This enables OST0 support available on PXA and SA-11x0
@@ -394,6 +405,7 @@ config CLKSRC_ST_LPC
        bool "Low power clocksource found in the LPC" if COMPILE_TEST
        select CLKSRC_OF if OF
        depends on HAS_IOMEM
+       select CLKSRC_MMIO
        help
          Enable this option to use the Low Power controller timer
          as clocksource.
index 6ee9140..4da2af9 100644 (file)
@@ -98,7 +98,8 @@ static int tc_shutdown(struct clock_event_device *d)
 
        __raw_writel(0xff, regs + ATMEL_TC_REG(2, IDR));
        __raw_writel(ATMEL_TC_CLKDIS, regs + ATMEL_TC_REG(2, CCR));
-       clk_disable(tcd->clk);
+       if (!clockevent_state_detached(d))
+               clk_disable(tcd->clk);
 
        return 0;
 }
index 9bc37c4..0ca74d0 100644 (file)
@@ -142,15 +142,16 @@ static int allocate_resources(int cpu, struct device **cdev,
 
 try_again:
        cpu_reg = regulator_get_optional(cpu_dev, reg);
-       if (IS_ERR(cpu_reg)) {
+       ret = PTR_ERR_OR_ZERO(cpu_reg);
+       if (ret) {
                /*
                 * If cpu's regulator supply node is present, but regulator is
                 * not yet registered, we should try defering probe.
                 */
-               if (PTR_ERR(cpu_reg) == -EPROBE_DEFER) {
+               if (ret == -EPROBE_DEFER) {
                        dev_dbg(cpu_dev, "cpu%d regulator not ready, retry\n",
                                cpu);
-                       return -EPROBE_DEFER;
+                       return ret;
                }
 
                /* Try with "cpu-supply" */
@@ -159,18 +160,16 @@ try_again:
                        goto try_again;
                }
 
-               dev_dbg(cpu_dev, "no regulator for cpu%d: %ld\n",
-                       cpu, PTR_ERR(cpu_reg));
+               dev_dbg(cpu_dev, "no regulator for cpu%d: %d\n", cpu, ret);
        }
 
        cpu_clk = clk_get(cpu_dev, NULL);
-       if (IS_ERR(cpu_clk)) {
+       ret = PTR_ERR_OR_ZERO(cpu_clk);
+       if (ret) {
                /* put regulator */
                if (!IS_ERR(cpu_reg))
                        regulator_put(cpu_reg);
 
-               ret = PTR_ERR(cpu_clk);
-
                /*
                 * If cpu's clk node is present, but clock is not yet
                 * registered, we should try defering probe.
index c35e7da..e979ec7 100644 (file)
@@ -48,11 +48,11 @@ static struct cpufreq_policy *next_policy(struct cpufreq_policy *policy,
                                          bool active)
 {
        do {
-               policy = list_next_entry(policy, policy_list);
-
                /* No more policies in the list */
-               if (&policy->policy_list == &cpufreq_policy_list)
+               if (list_is_last(&policy->policy_list, &cpufreq_policy_list))
                        return NULL;
+
+               policy = list_next_entry(policy, policy_list);
        } while (!suitable_policy(policy, active));
 
        return policy;
index bab3a51..e0d1110 100644 (file)
@@ -387,16 +387,18 @@ static int cpufreq_governor_init(struct cpufreq_policy *policy,
        if (!have_governor_per_policy())
                cdata->gdbs_data = dbs_data;
 
+       policy->governor_data = dbs_data;
+
        ret = sysfs_create_group(get_governor_parent_kobj(policy),
                                 get_sysfs_attr(dbs_data));
        if (ret)
                goto reset_gdbs_data;
 
-       policy->governor_data = dbs_data;
-
        return 0;
 
 reset_gdbs_data:
+       policy->governor_data = NULL;
+
        if (!have_governor_per_policy())
                cdata->gdbs_data = NULL;
        cdata->exit(dbs_data, !policy->governor->initialized);
@@ -417,16 +419,19 @@ static int cpufreq_governor_exit(struct cpufreq_policy *policy,
        if (!cdbs->shared || cdbs->shared->policy)
                return -EBUSY;
 
-       policy->governor_data = NULL;
        if (!--dbs_data->usage_count) {
                sysfs_remove_group(get_governor_parent_kobj(policy),
                                   get_sysfs_attr(dbs_data));
 
+               policy->governor_data = NULL;
+
                if (!have_governor_per_policy())
                        cdata->gdbs_data = NULL;
 
                cdata->exit(dbs_data, policy->governor->initialized == 1);
                kfree(dbs_data);
+       } else {
+               policy->governor_data = NULL;
        }
 
        free_common_dbs_info(policy, cdata);
index 1d99c97..0963772 100644 (file)
@@ -202,7 +202,7 @@ static void __init pxa_cpufreq_init_voltages(void)
        }
 }
 #else
-static int pxa_cpufreq_change_voltage(struct pxa_freqs *pxa_freq)
+static int pxa_cpufreq_change_voltage(const struct pxa_freqs *pxa_freq)
 {
        return 0;
 }
index 344058f..d5657d5 100644 (file)
@@ -119,7 +119,6 @@ struct cpuidle_coupled {
 
 #define CPUIDLE_COUPLED_NOT_IDLE       (-1)
 
-static DEFINE_MUTEX(cpuidle_coupled_lock);
 static DEFINE_PER_CPU(struct call_single_data, cpuidle_coupled_poke_cb);
 
 /*
index 046423b..f996efc 100644 (file)
@@ -153,7 +153,7 @@ int cpuidle_enter_freeze(struct cpuidle_driver *drv, struct cpuidle_device *dev)
         * be frozen safely.
         */
        index = find_deepest_state(drv, dev, UINT_MAX, 0, true);
-       if (index >= 0)
+       if (index > 0)
                enter_freeze_proper(drv, dev, index);
 
        return index;
index 313b0cc..82edf95 100644 (file)
@@ -2278,60 +2278,60 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
 #define amdgpu_dpm_enable_bapm(adev, e) (adev)->pm.funcs->enable_bapm((adev), (e))
 
 #define amdgpu_dpm_get_temperature(adev) \
-       (adev)->pp_enabled ?                                            \
+       ((adev)->pp_enabled ?                                           \
              (adev)->powerplay.pp_funcs->get_temperature((adev)->powerplay.pp_handle) : \
-             (adev)->pm.funcs->get_temperature((adev))
+             (adev)->pm.funcs->get_temperature((adev)))
 
 #define amdgpu_dpm_set_fan_control_mode(adev, m) \
-       (adev)->pp_enabled ?                                            \
+       ((adev)->pp_enabled ?                                           \
              (adev)->powerplay.pp_funcs->set_fan_control_mode((adev)->powerplay.pp_handle, (m)) : \
-             (adev)->pm.funcs->set_fan_control_mode((adev), (m))
+             (adev)->pm.funcs->set_fan_control_mode((adev), (m)))
 
 #define amdgpu_dpm_get_fan_control_mode(adev) \
-       (adev)->pp_enabled ?                                            \
+       ((adev)->pp_enabled ?                                           \
              (adev)->powerplay.pp_funcs->get_fan_control_mode((adev)->powerplay.pp_handle) : \
-             (adev)->pm.funcs->get_fan_control_mode((adev))
+             (adev)->pm.funcs->get_fan_control_mode((adev)))
 
 #define amdgpu_dpm_set_fan_speed_percent(adev, s) \
-       (adev)->pp_enabled ?                                            \
+       ((adev)->pp_enabled ?                                           \
              (adev)->powerplay.pp_funcs->set_fan_speed_percent((adev)->powerplay.pp_handle, (s)) : \
-             (adev)->pm.funcs->set_fan_speed_percent((adev), (s))
+             (adev)->pm.funcs->set_fan_speed_percent((adev), (s)))
 
 #define amdgpu_dpm_get_fan_speed_percent(adev, s) \
-       (adev)->pp_enabled ?                                            \
+       ((adev)->pp_enabled ?                                           \
              (adev)->powerplay.pp_funcs->get_fan_speed_percent((adev)->powerplay.pp_handle, (s)) : \
-             (adev)->pm.funcs->get_fan_speed_percent((adev), (s))
+             (adev)->pm.funcs->get_fan_speed_percent((adev), (s)))
 
 #define amdgpu_dpm_get_sclk(adev, l) \
-       (adev)->pp_enabled ?                                            \
+       ((adev)->pp_enabled ?                                           \
              (adev)->powerplay.pp_funcs->get_sclk((adev)->powerplay.pp_handle, (l)) : \
-               (adev)->pm.funcs->get_sclk((adev), (l))
+               (adev)->pm.funcs->get_sclk((adev), (l)))
 
 #define amdgpu_dpm_get_mclk(adev, l)  \
-       (adev)->pp_enabled ?                                            \
+       ((adev)->pp_enabled ?                                           \
              (adev)->powerplay.pp_funcs->get_mclk((adev)->powerplay.pp_handle, (l)) : \
-             (adev)->pm.funcs->get_mclk((adev), (l))
+             (adev)->pm.funcs->get_mclk((adev), (l)))
 
 
 #define amdgpu_dpm_force_performance_level(adev, l) \
-       (adev)->pp_enabled ?                                            \
+       ((adev)->pp_enabled ?                                           \
              (adev)->powerplay.pp_funcs->force_performance_level((adev)->powerplay.pp_handle, (l)) : \
-             (adev)->pm.funcs->force_performance_level((adev), (l))
+             (adev)->pm.funcs->force_performance_level((adev), (l)))
 
 #define amdgpu_dpm_powergate_uvd(adev, g) \
-       (adev)->pp_enabled ?                                            \
+       ((adev)->pp_enabled ?                                           \
              (adev)->powerplay.pp_funcs->powergate_uvd((adev)->powerplay.pp_handle, (g)) : \
-             (adev)->pm.funcs->powergate_uvd((adev), (g))
+             (adev)->pm.funcs->powergate_uvd((adev), (g)))
 
 #define amdgpu_dpm_powergate_vce(adev, g) \
-       (adev)->pp_enabled ?                                            \
+       ((adev)->pp_enabled ?                                           \
              (adev)->powerplay.pp_funcs->powergate_vce((adev)->powerplay.pp_handle, (g)) : \
-             (adev)->pm.funcs->powergate_vce((adev), (g))
+             (adev)->pm.funcs->powergate_vce((adev), (g)))
 
 #define amdgpu_dpm_debugfs_print_current_performance_level(adev, m) \
-       (adev)->pp_enabled ?                                            \
+       ((adev)->pp_enabled ?                                           \
              (adev)->powerplay.pp_funcs->print_current_performance_level((adev)->powerplay.pp_handle, (m)) : \
-             (adev)->pm.funcs->debugfs_print_current_performance_level((adev), (m))
+             (adev)->pm.funcs->debugfs_print_current_performance_level((adev), (m)))
 
 #define amdgpu_dpm_get_current_power_state(adev) \
        (adev)->powerplay.pp_funcs->get_current_power_state((adev)->powerplay.pp_handle)
index 6f89f8e..b882e81 100644 (file)
@@ -478,9 +478,9 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bo
        struct amdgpu_fpriv *fpriv = parser->filp->driver_priv;
        unsigned i;
 
-       amdgpu_vm_move_pt_bos_in_lru(parser->adev, &fpriv->vm);
-
        if (!error) {
+               amdgpu_vm_move_pt_bos_in_lru(parser->adev, &fpriv->vm);
+
                /* Sort the buffer list from the smallest to largest buffer,
                 * which affects the order of buffers in the LRU list.
                 * This assures that the smallest buffers are added first
index cfb6caa..9191467 100644 (file)
@@ -333,6 +333,10 @@ int amdgpu_fbdev_init(struct amdgpu_device *adev)
        if (!adev->mode_info.mode_config_initialized)
                return 0;
 
+       /* don't init fbdev if there are no connectors */
+       if (list_empty(&adev->ddev->mode_config.connector_list))
+               return 0;
+
        /* select 8 bpp console on low vram cards */
        if (adev->mc.real_vram_size <= (32*1024*1024))
                bpp_sel = 8;
index c3ce103..a2a16ac 100644 (file)
@@ -399,7 +399,8 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
                }
                if (fpfn > bo->placements[i].fpfn)
                        bo->placements[i].fpfn = fpfn;
-               if (lpfn && lpfn < bo->placements[i].lpfn)
+               if (!bo->placements[i].lpfn ||
+                   (lpfn && lpfn < bo->placements[i].lpfn))
                        bo->placements[i].lpfn = lpfn;
                bo->placements[i].flags |= TTM_PL_FLAG_NO_EVICT;
        }
index 5ee9a06..b9d0d55 100644 (file)
@@ -99,13 +99,24 @@ static int amdgpu_pp_early_init(void *handle)
 
 #ifdef CONFIG_DRM_AMD_POWERPLAY
        switch (adev->asic_type) {
-               case CHIP_TONGA:
-               case CHIP_FIJI:
-                       adev->pp_enabled = (amdgpu_powerplay > 0) ? true : false;
-                       break;
-               default:
-                       adev->pp_enabled = (amdgpu_powerplay > 0) ? true : false;
-                       break;
+       case CHIP_TONGA:
+       case CHIP_FIJI:
+               adev->pp_enabled = (amdgpu_powerplay == 0) ? false : true;
+               break;
+       case CHIP_CARRIZO:
+       case CHIP_STONEY:
+               adev->pp_enabled = (amdgpu_powerplay > 0) ? true : false;
+               break;
+       /* These chips don't have powerplay implemenations */
+       case CHIP_BONAIRE:
+       case CHIP_HAWAII:
+       case CHIP_KABINI:
+       case CHIP_MULLINS:
+       case CHIP_KAVERI:
+       case CHIP_TOPAZ:
+       default:
+               adev->pp_enabled = false;
+               break;
        }
 #else
        adev->pp_enabled = false;
index 78e9b0f..d1f234d 100644 (file)
@@ -487,7 +487,7 @@ static int amdgpu_debugfs_ring_info(struct seq_file *m, void *data)
        seq_printf(m, "rptr: 0x%08x [%5d]\n",
                   rptr, rptr);
 
-       rptr_next = ~0;
+       rptr_next = le32_to_cpu(*ring->next_rptr_cpu_addr);
 
        seq_printf(m, "driver's copy of the wptr: 0x%08x [%5d]\n",
                   ring->wptr, ring->wptr);
index aefc668..9599f75 100644 (file)
@@ -1282,7 +1282,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 {
        const unsigned align = min(AMDGPU_VM_PTB_ALIGN_SIZE,
                AMDGPU_VM_PTE_COUNT * 8);
-       unsigned pd_size, pd_entries, pts_size;
+       unsigned pd_size, pd_entries;
        int i, r;
 
        for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
@@ -1300,8 +1300,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
        pd_entries = amdgpu_vm_num_pdes(adev);
 
        /* allocate page table array */
-       pts_size = pd_entries * sizeof(struct amdgpu_vm_pt);
-       vm->page_tables = kzalloc(pts_size, GFP_KERNEL);
+       vm->page_tables = drm_calloc_large(pd_entries, sizeof(struct amdgpu_vm_pt));
        if (vm->page_tables == NULL) {
                DRM_ERROR("Cannot allocate memory for page table array\n");
                return -ENOMEM;
@@ -1361,7 +1360,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 
        for (i = 0; i < amdgpu_vm_num_pdes(adev); i++)
                amdgpu_bo_unref(&vm->page_tables[i].entry.robj);
-       kfree(vm->page_tables);
+       drm_free_large(vm->page_tables);
 
        amdgpu_bo_unref(&vm->page_directory);
        fence_put(vm->page_directory_fence);
index 13235d8..95c0cdf 100644 (file)
@@ -4186,7 +4186,18 @@ static int gfx_v8_0_soft_reset(void *handle)
                gfx_v8_0_cp_gfx_enable(adev, false);
 
                /* Disable MEC parsing/prefetching */
-               /* XXX todo */
+               gfx_v8_0_cp_compute_enable(adev, false);
+
+               if (grbm_soft_reset || srbm_soft_reset) {
+                       tmp = RREG32(mmGMCON_DEBUG);
+                       tmp = REG_SET_FIELD(tmp,
+                                           GMCON_DEBUG, GFX_STALL, 1);
+                       tmp = REG_SET_FIELD(tmp,
+                                           GMCON_DEBUG, GFX_CLEAR, 1);
+                       WREG32(mmGMCON_DEBUG, tmp);
+
+                       udelay(50);
+               }
 
                if (grbm_soft_reset) {
                        tmp = RREG32(mmGRBM_SOFT_RESET);
@@ -4215,6 +4226,16 @@ static int gfx_v8_0_soft_reset(void *handle)
                        WREG32(mmSRBM_SOFT_RESET, tmp);
                        tmp = RREG32(mmSRBM_SOFT_RESET);
                }
+
+               if (grbm_soft_reset || srbm_soft_reset) {
+                       tmp = RREG32(mmGMCON_DEBUG);
+                       tmp = REG_SET_FIELD(tmp,
+                                           GMCON_DEBUG, GFX_STALL, 0);
+                       tmp = REG_SET_FIELD(tmp,
+                                           GMCON_DEBUG, GFX_CLEAR, 0);
+                       WREG32(mmGMCON_DEBUG, tmp);
+               }
+
                /* Wait a little for things to settle down */
                udelay(50);
                gfx_v8_0_print_status((void *)adev);
index f4a1346..0497784 100644 (file)
@@ -122,25 +122,12 @@ static int tonga_dpm_hw_fini(void *handle)
 
 static int tonga_dpm_suspend(void *handle)
 {
-       return 0;
+       return tonga_dpm_hw_fini(handle);
 }
 
 static int tonga_dpm_resume(void *handle)
 {
-       int ret;
-       struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
-       mutex_lock(&adev->pm.mutex);
-
-       ret = tonga_smu_start(adev);
-       if (ret) {
-               DRM_ERROR("SMU start failed\n");
-               goto fail;
-       }
-
-fail:
-       mutex_unlock(&adev->pm.mutex);
-       return ret;
+       return tonga_dpm_hw_init(handle);
 }
 
 static int tonga_dpm_set_clockgating_state(void *handle,
index 8f5d5ed..aa67244 100644 (file)
@@ -64,6 +64,11 @@ static int pp_sw_init(void *handle)
        if (ret == 0)
                ret = hwmgr->hwmgr_func->backend_init(hwmgr);
 
+       if (ret)
+               printk("amdgpu: powerplay initialization failed\n");
+       else
+               printk("amdgpu: powerplay initialized\n");
+
        return ret;
 }
 
index 873a8d2..ec222c6 100644 (file)
@@ -272,6 +272,9 @@ static int cz_start_smu(struct pp_smumgr *smumgr)
                                UCODE_ID_CP_MEC_JT1_MASK |
                                UCODE_ID_CP_MEC_JT2_MASK;
 
+       if (smumgr->chip_id == CHIP_STONEY)
+               fw_to_check &= ~(UCODE_ID_SDMA1_MASK | UCODE_ID_CP_MEC_JT2_MASK);
+
        cz_request_smu_load_fw(smumgr);
        cz_check_fw_load_finish(smumgr, fw_to_check);
 
@@ -282,7 +285,7 @@ static int cz_start_smu(struct pp_smumgr *smumgr)
        return ret;
 }
 
-static uint8_t cz_translate_firmware_enum_to_arg(
+static uint8_t cz_translate_firmware_enum_to_arg(struct pp_smumgr *smumgr,
                        enum cz_scratch_entry firmware_enum)
 {
        uint8_t ret = 0;
@@ -292,7 +295,10 @@ static uint8_t cz_translate_firmware_enum_to_arg(
                ret = UCODE_ID_SDMA0;
                break;
        case CZ_SCRATCH_ENTRY_UCODE_ID_SDMA1:
-               ret = UCODE_ID_SDMA1;
+               if (smumgr->chip_id == CHIP_STONEY)
+                       ret = UCODE_ID_SDMA0;
+               else
+                       ret = UCODE_ID_SDMA1;
                break;
        case CZ_SCRATCH_ENTRY_UCODE_ID_CP_CE:
                ret = UCODE_ID_CP_CE;
@@ -307,7 +313,10 @@ static uint8_t cz_translate_firmware_enum_to_arg(
                ret = UCODE_ID_CP_MEC_JT1;
                break;
        case CZ_SCRATCH_ENTRY_UCODE_ID_CP_MEC_JT2:
-               ret = UCODE_ID_CP_MEC_JT2;
+               if (smumgr->chip_id == CHIP_STONEY)
+                       ret = UCODE_ID_CP_MEC_JT1;
+               else
+                       ret = UCODE_ID_CP_MEC_JT2;
                break;
        case CZ_SCRATCH_ENTRY_UCODE_ID_GMCON_RENG:
                ret = UCODE_ID_GMCON_RENG;
@@ -396,7 +405,7 @@ static int cz_smu_populate_single_scratch_task(
        struct SMU_Task *task = &toc->tasks[cz_smu->toc_entry_used_count++];
 
        task->type = type;
-       task->arg = cz_translate_firmware_enum_to_arg(fw_enum);
+       task->arg = cz_translate_firmware_enum_to_arg(smumgr, fw_enum);
        task->next = is_last ? END_OF_TASK_LIST : cz_smu->toc_entry_used_count;
 
        for (i = 0; i < cz_smu->scratch_buffer_length; i++)
@@ -433,7 +442,7 @@ static int cz_smu_populate_single_ucode_load_task(
        struct SMU_Task *task = &toc->tasks[cz_smu->toc_entry_used_count++];
 
        task->type = TASK_TYPE_UCODE_LOAD;
-       task->arg = cz_translate_firmware_enum_to_arg(fw_enum);
+       task->arg = cz_translate_firmware_enum_to_arg(smumgr, fw_enum);
        task->next = is_last ? END_OF_TASK_LIST : cz_smu->toc_entry_used_count;
 
        for (i = 0; i < cz_smu->driver_buffer_length; i++)
@@ -509,8 +518,14 @@ static int cz_smu_construct_toc_for_vddgfx_exit(struct pp_smumgr *smumgr)
                                CZ_SCRATCH_ENTRY_UCODE_ID_CP_ME, false);
        cz_smu_populate_single_ucode_load_task(smumgr,
                                CZ_SCRATCH_ENTRY_UCODE_ID_CP_MEC_JT1, false);
-       cz_smu_populate_single_ucode_load_task(smumgr,
+
+       if (smumgr->chip_id == CHIP_STONEY)
+               cz_smu_populate_single_ucode_load_task(smumgr,
+                               CZ_SCRATCH_ENTRY_UCODE_ID_CP_MEC_JT1, false);
+       else
+               cz_smu_populate_single_ucode_load_task(smumgr,
                                CZ_SCRATCH_ENTRY_UCODE_ID_CP_MEC_JT2, false);
+
        cz_smu_populate_single_ucode_load_task(smumgr,
                                CZ_SCRATCH_ENTRY_UCODE_ID_RLC_G, false);
 
@@ -551,7 +566,11 @@ static int cz_smu_construct_toc_for_bootup(struct pp_smumgr *smumgr)
 
        cz_smu_populate_single_ucode_load_task(smumgr,
                                CZ_SCRATCH_ENTRY_UCODE_ID_SDMA0, false);
-       cz_smu_populate_single_ucode_load_task(smumgr,
+       if (smumgr->chip_id == CHIP_STONEY)
+               cz_smu_populate_single_ucode_load_task(smumgr,
+                               CZ_SCRATCH_ENTRY_UCODE_ID_SDMA0, false);
+       else
+               cz_smu_populate_single_ucode_load_task(smumgr,
                                CZ_SCRATCH_ENTRY_UCODE_ID_SDMA1, false);
        cz_smu_populate_single_ucode_load_task(smumgr,
                                CZ_SCRATCH_ENTRY_UCODE_ID_CP_CE, false);
@@ -561,7 +580,11 @@ static int cz_smu_construct_toc_for_bootup(struct pp_smumgr *smumgr)
                                CZ_SCRATCH_ENTRY_UCODE_ID_CP_ME, false);
        cz_smu_populate_single_ucode_load_task(smumgr,
                                CZ_SCRATCH_ENTRY_UCODE_ID_CP_MEC_JT1, false);
-       cz_smu_populate_single_ucode_load_task(smumgr,
+       if (smumgr->chip_id == CHIP_STONEY)
+               cz_smu_populate_single_ucode_load_task(smumgr,
+                               CZ_SCRATCH_ENTRY_UCODE_ID_CP_MEC_JT1, false);
+       else
+               cz_smu_populate_single_ucode_load_task(smumgr,
                                CZ_SCRATCH_ENTRY_UCODE_ID_CP_MEC_JT2, false);
        cz_smu_populate_single_ucode_load_task(smumgr,
                                CZ_SCRATCH_ENTRY_UCODE_ID_RLC_G, true);
@@ -618,7 +641,7 @@ static int cz_smu_populate_firmware_entries(struct pp_smumgr *smumgr)
 
        for (i = 0; i < sizeof(firmware_list)/sizeof(*firmware_list); i++) {
 
-               firmware_type = cz_translate_firmware_enum_to_arg(
+               firmware_type = cz_translate_firmware_enum_to_arg(smumgr,
                                        firmware_list[i]);
 
                ucode_id = cz_convert_fw_type_to_cgs(firmware_type);
index 57cccd6..7c52306 100644 (file)
@@ -946,9 +946,23 @@ static void wait_for_fences(struct drm_device *dev,
        }
 }
 
-static bool framebuffer_changed(struct drm_device *dev,
-                               struct drm_atomic_state *old_state,
-                               struct drm_crtc *crtc)
+/**
+ * drm_atomic_helper_framebuffer_changed - check if framebuffer has changed
+ * @dev: DRM device
+ * @old_state: atomic state object with old state structures
+ * @crtc: DRM crtc
+ *
+ * Checks whether the framebuffer used for this CRTC changes as a result of
+ * the atomic update.  This is useful for drivers which cannot use
+ * drm_atomic_helper_wait_for_vblanks() and need to reimplement its
+ * functionality.
+ *
+ * Returns:
+ * true if the framebuffer changed.
+ */
+bool drm_atomic_helper_framebuffer_changed(struct drm_device *dev,
+                                          struct drm_atomic_state *old_state,
+                                          struct drm_crtc *crtc)
 {
        struct drm_plane *plane;
        struct drm_plane_state *old_plane_state;
@@ -965,6 +979,7 @@ static bool framebuffer_changed(struct drm_device *dev,
 
        return false;
 }
+EXPORT_SYMBOL(drm_atomic_helper_framebuffer_changed);
 
 /**
  * drm_atomic_helper_wait_for_vblanks - wait for vblank on crtcs
@@ -999,7 +1014,8 @@ drm_atomic_helper_wait_for_vblanks(struct drm_device *dev,
                if (old_state->legacy_cursor_update)
                        continue;
 
-               if (!framebuffer_changed(dev, old_state, crtc))
+               if (!drm_atomic_helper_framebuffer_changed(dev,
+                               old_state, crtc))
                        continue;
 
                ret = drm_crtc_vblank_get(crtc);
index 9e585d5..e881482 100644 (file)
@@ -8,8 +8,8 @@ http://0x04.net/cgit/index.cgi/rules-ng-ng
 git clone git://0x04.net/rules-ng-ng
 
 The rules-ng-ng source files this header was generated from are:
-- state_vg.xml (   5973 bytes, from 2015-03-25 11:26:01)
-- common.xml   (  18437 bytes, from 2015-03-25 11:27:41)
+- state_hi.xml (  24309 bytes, from 2015-12-12 09:02:53)
+- common.xml   (  18379 bytes, from 2015-12-12 09:02:53)
 
 Copyright (C) 2015
 */
@@ -30,15 +30,19 @@ Copyright (C) 2015
 #define ENDIAN_MODE_NO_SWAP                                    0x00000000
 #define ENDIAN_MODE_SWAP_16                                    0x00000001
 #define ENDIAN_MODE_SWAP_32                                    0x00000002
+#define chipModel_GC200                                                0x00000200
 #define chipModel_GC300                                                0x00000300
 #define chipModel_GC320                                                0x00000320
+#define chipModel_GC328                                                0x00000328
 #define chipModel_GC350                                                0x00000350
 #define chipModel_GC355                                                0x00000355
 #define chipModel_GC400                                                0x00000400
 #define chipModel_GC410                                                0x00000410
 #define chipModel_GC420                                                0x00000420
+#define chipModel_GC428                                                0x00000428
 #define chipModel_GC450                                                0x00000450
 #define chipModel_GC500                                                0x00000500
+#define chipModel_GC520                                                0x00000520
 #define chipModel_GC530                                                0x00000530
 #define chipModel_GC600                                                0x00000600
 #define chipModel_GC700                                                0x00000700
@@ -46,9 +50,16 @@ Copyright (C) 2015
 #define chipModel_GC860                                                0x00000860
 #define chipModel_GC880                                                0x00000880
 #define chipModel_GC1000                                       0x00001000
+#define chipModel_GC1500                                       0x00001500
 #define chipModel_GC2000                                       0x00002000
 #define chipModel_GC2100                                       0x00002100
+#define chipModel_GC2200                                       0x00002200
+#define chipModel_GC2500                                       0x00002500
+#define chipModel_GC3000                                       0x00003000
 #define chipModel_GC4000                                       0x00004000
+#define chipModel_GC5000                                       0x00005000
+#define chipModel_GC5200                                       0x00005200
+#define chipModel_GC6400                                       0x00006400
 #define RGBA_BITS_R                                            0x00000001
 #define RGBA_BITS_G                                            0x00000002
 #define RGBA_BITS_B                                            0x00000004
@@ -160,7 +171,7 @@ Copyright (C) 2015
 #define chipMinorFeatures2_UNK8                                        0x00000100
 #define chipMinorFeatures2_UNK9                                        0x00000200
 #define chipMinorFeatures2_UNK10                               0x00000400
-#define chipMinorFeatures2_SAMPLERBASE_16                      0x00000800
+#define chipMinorFeatures2_HALTI1                              0x00000800
 #define chipMinorFeatures2_UNK12                               0x00001000
 #define chipMinorFeatures2_UNK13                               0x00002000
 #define chipMinorFeatures2_UNK14                               0x00004000
@@ -189,7 +200,7 @@ Copyright (C) 2015
 #define chipMinorFeatures3_UNK5                                        0x00000020
 #define chipMinorFeatures3_UNK6                                        0x00000040
 #define chipMinorFeatures3_UNK7                                        0x00000080
-#define chipMinorFeatures3_UNK8                                        0x00000100
+#define chipMinorFeatures3_FAST_MSAA                           0x00000100
 #define chipMinorFeatures3_UNK9                                        0x00000200
 #define chipMinorFeatures3_BUG_FIXES10                         0x00000400
 #define chipMinorFeatures3_UNK11                               0x00000800
@@ -199,7 +210,7 @@ Copyright (C) 2015
 #define chipMinorFeatures3_UNK15                               0x00008000
 #define chipMinorFeatures3_UNK16                               0x00010000
 #define chipMinorFeatures3_UNK17                               0x00020000
-#define chipMinorFeatures3_UNK18                               0x00040000
+#define chipMinorFeatures3_ACE                                 0x00040000
 #define chipMinorFeatures3_UNK19                               0x00080000
 #define chipMinorFeatures3_UNK20                               0x00100000
 #define chipMinorFeatures3_UNK21                               0x00200000
@@ -207,7 +218,7 @@ Copyright (C) 2015
 #define chipMinorFeatures3_UNK23                               0x00800000
 #define chipMinorFeatures3_UNK24                               0x01000000
 #define chipMinorFeatures3_UNK25                               0x02000000
-#define chipMinorFeatures3_UNK26                               0x04000000
+#define chipMinorFeatures3_NEW_HZ                              0x04000000
 #define chipMinorFeatures3_UNK27                               0x08000000
 #define chipMinorFeatures3_UNK28                               0x10000000
 #define chipMinorFeatures3_UNK29                               0x20000000
@@ -229,9 +240,9 @@ Copyright (C) 2015
 #define chipMinorFeatures4_UNK13                               0x00002000
 #define chipMinorFeatures4_UNK14                               0x00004000
 #define chipMinorFeatures4_UNK15                               0x00008000
-#define chipMinorFeatures4_UNK16                               0x00010000
+#define chipMinorFeatures4_HALTI2                              0x00010000
 #define chipMinorFeatures4_UNK17                               0x00020000
-#define chipMinorFeatures4_UNK18                               0x00040000
+#define chipMinorFeatures4_SMALL_MSAA                          0x00040000
 #define chipMinorFeatures4_UNK19                               0x00080000
 #define chipMinorFeatures4_UNK20                               0x00100000
 #define chipMinorFeatures4_UNK21                               0x00200000
@@ -245,5 +256,37 @@ Copyright (C) 2015
 #define chipMinorFeatures4_UNK29                               0x20000000
 #define chipMinorFeatures4_UNK30                               0x40000000
 #define chipMinorFeatures4_UNK31                               0x80000000
+#define chipMinorFeatures5_UNK0                                        0x00000001
+#define chipMinorFeatures5_UNK1                                        0x00000002
+#define chipMinorFeatures5_UNK2                                        0x00000004
+#define chipMinorFeatures5_UNK3                                        0x00000008
+#define chipMinorFeatures5_UNK4                                        0x00000010
+#define chipMinorFeatures5_UNK5                                        0x00000020
+#define chipMinorFeatures5_UNK6                                        0x00000040
+#define chipMinorFeatures5_UNK7                                        0x00000080
+#define chipMinorFeatures5_UNK8                                        0x00000100
+#define chipMinorFeatures5_HALTI3                              0x00000200
+#define chipMinorFeatures5_UNK10                               0x00000400
+#define chipMinorFeatures5_UNK11                               0x00000800
+#define chipMinorFeatures5_UNK12                               0x00001000
+#define chipMinorFeatures5_UNK13                               0x00002000
+#define chipMinorFeatures5_UNK14                               0x00004000
+#define chipMinorFeatures5_UNK15                               0x00008000
+#define chipMinorFeatures5_UNK16                               0x00010000
+#define chipMinorFeatures5_UNK17                               0x00020000
+#define chipMinorFeatures5_UNK18                               0x00040000
+#define chipMinorFeatures5_UNK19                               0x00080000
+#define chipMinorFeatures5_UNK20                               0x00100000
+#define chipMinorFeatures5_UNK21                               0x00200000
+#define chipMinorFeatures5_UNK22                               0x00400000
+#define chipMinorFeatures5_UNK23                               0x00800000
+#define chipMinorFeatures5_UNK24                               0x01000000
+#define chipMinorFeatures5_UNK25                               0x02000000
+#define chipMinorFeatures5_UNK26                               0x04000000
+#define chipMinorFeatures5_UNK27                               0x08000000
+#define chipMinorFeatures5_UNK28                               0x10000000
+#define chipMinorFeatures5_UNK29                               0x20000000
+#define chipMinorFeatures5_UNK30                               0x40000000
+#define chipMinorFeatures5_UNK31                               0x80000000
 
 #endif /* COMMON_XML */
index 5c89ebb..e885898 100644 (file)
@@ -668,7 +668,6 @@ static struct platform_driver etnaviv_platform_driver = {
        .probe      = etnaviv_pdev_probe,
        .remove     = etnaviv_pdev_remove,
        .driver     = {
-               .owner  = THIS_MODULE,
                .name   = "etnaviv",
                .of_match_table = dt_match,
        },
index d6bd438..1cd6046 100644 (file)
@@ -85,7 +85,7 @@ struct drm_gem_object *etnaviv_gem_prime_import_sg_table(struct drm_device *dev,
        struct dma_buf_attachment *attach, struct sg_table *sg);
 int etnaviv_gem_prime_pin(struct drm_gem_object *obj);
 void etnaviv_gem_prime_unpin(struct drm_gem_object *obj);
-void *etnaviv_gem_vaddr(struct drm_gem_object *obj);
+void *etnaviv_gem_vmap(struct drm_gem_object *obj);
 int etnaviv_gem_cpu_prep(struct drm_gem_object *obj, u32 op,
                struct timespec *timeout);
 int etnaviv_gem_cpu_fini(struct drm_gem_object *obj);
index bf8fa85..4a29eea 100644 (file)
@@ -201,7 +201,9 @@ void etnaviv_core_dump(struct etnaviv_gpu *gpu)
 
                obj = vram->object;
 
+               mutex_lock(&obj->lock);
                pages = etnaviv_gem_get_pages(obj);
+               mutex_unlock(&obj->lock);
                if (pages) {
                        int j;
 
@@ -213,8 +215,8 @@ void etnaviv_core_dump(struct etnaviv_gpu *gpu)
 
                iter.hdr->iova = cpu_to_le64(vram->iova);
 
-               vaddr = etnaviv_gem_vaddr(&obj->base);
-               if (vaddr && !IS_ERR(vaddr))
+               vaddr = etnaviv_gem_vmap(&obj->base);
+               if (vaddr)
                        memcpy(iter.data, vaddr, obj->base.size);
 
                etnaviv_core_dump_header(&iter, ETDUMP_BUF_BO, iter.data +
index 9f77c3b..4b519e4 100644 (file)
@@ -353,25 +353,39 @@ void etnaviv_gem_put_iova(struct etnaviv_gpu *gpu, struct drm_gem_object *obj)
        drm_gem_object_unreference_unlocked(obj);
 }
 
-void *etnaviv_gem_vaddr(struct drm_gem_object *obj)
+void *etnaviv_gem_vmap(struct drm_gem_object *obj)
 {
        struct etnaviv_gem_object *etnaviv_obj = to_etnaviv_bo(obj);
 
-       mutex_lock(&etnaviv_obj->lock);
-       if (!etnaviv_obj->vaddr) {
-               struct page **pages = etnaviv_gem_get_pages(etnaviv_obj);
-
-               if (IS_ERR(pages))
-                       return ERR_CAST(pages);
+       if (etnaviv_obj->vaddr)
+               return etnaviv_obj->vaddr;
 
-               etnaviv_obj->vaddr = vmap(pages, obj->size >> PAGE_SHIFT,
-                               VM_MAP, pgprot_writecombine(PAGE_KERNEL));
-       }
+       mutex_lock(&etnaviv_obj->lock);
+       /*
+        * Need to check again, as we might have raced with another thread
+        * while waiting for the mutex.
+        */
+       if (!etnaviv_obj->vaddr)
+               etnaviv_obj->vaddr = etnaviv_obj->ops->vmap(etnaviv_obj);
        mutex_unlock(&etnaviv_obj->lock);
 
        return etnaviv_obj->vaddr;
 }
 
+static void *etnaviv_gem_vmap_impl(struct etnaviv_gem_object *obj)
+{
+       struct page **pages;
+
+       lockdep_assert_held(&obj->lock);
+
+       pages = etnaviv_gem_get_pages(obj);
+       if (IS_ERR(pages))
+               return NULL;
+
+       return vmap(pages, obj->base.size >> PAGE_SHIFT,
+                       VM_MAP, pgprot_writecombine(PAGE_KERNEL));
+}
+
 static inline enum dma_data_direction etnaviv_op_to_dma_dir(u32 op)
 {
        if (op & ETNA_PREP_READ)
@@ -522,6 +536,7 @@ static void etnaviv_gem_shmem_release(struct etnaviv_gem_object *etnaviv_obj)
 static const struct etnaviv_gem_ops etnaviv_gem_shmem_ops = {
        .get_pages = etnaviv_gem_shmem_get_pages,
        .release = etnaviv_gem_shmem_release,
+       .vmap = etnaviv_gem_vmap_impl,
 };
 
 void etnaviv_gem_free_object(struct drm_gem_object *obj)
@@ -866,6 +881,7 @@ static void etnaviv_gem_userptr_release(struct etnaviv_gem_object *etnaviv_obj)
 static const struct etnaviv_gem_ops etnaviv_gem_userptr_ops = {
        .get_pages = etnaviv_gem_userptr_get_pages,
        .release = etnaviv_gem_userptr_release,
+       .vmap = etnaviv_gem_vmap_impl,
 };
 
 int etnaviv_gem_new_userptr(struct drm_device *dev, struct drm_file *file,
index a300b4b..ab5df81 100644 (file)
@@ -78,6 +78,7 @@ struct etnaviv_gem_object *to_etnaviv_bo(struct drm_gem_object *obj)
 struct etnaviv_gem_ops {
        int (*get_pages)(struct etnaviv_gem_object *);
        void (*release)(struct etnaviv_gem_object *);
+       void *(*vmap)(struct etnaviv_gem_object *);
 };
 
 static inline bool is_active(struct etnaviv_gem_object *etnaviv_obj)
index e94db4f..4e67395 100644 (file)
@@ -31,7 +31,7 @@ struct sg_table *etnaviv_gem_prime_get_sg_table(struct drm_gem_object *obj)
 
 void *etnaviv_gem_prime_vmap(struct drm_gem_object *obj)
 {
-       return etnaviv_gem_vaddr(obj);
+       return etnaviv_gem_vmap(obj);
 }
 
 void etnaviv_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr)
@@ -77,9 +77,17 @@ static void etnaviv_gem_prime_release(struct etnaviv_gem_object *etnaviv_obj)
        drm_prime_gem_destroy(&etnaviv_obj->base, etnaviv_obj->sgt);
 }
 
+static void *etnaviv_gem_prime_vmap_impl(struct etnaviv_gem_object *etnaviv_obj)
+{
+       lockdep_assert_held(&etnaviv_obj->lock);
+
+       return dma_buf_vmap(etnaviv_obj->base.import_attach->dmabuf);
+}
+
 static const struct etnaviv_gem_ops etnaviv_gem_prime_ops = {
        /* .get_pages should never be called */
        .release = etnaviv_gem_prime_release,
+       .vmap = etnaviv_gem_prime_vmap_impl,
 };
 
 struct drm_gem_object *etnaviv_gem_prime_import_sg_table(struct drm_device *dev,
index 056a72e..a33162c 100644 (file)
@@ -72,6 +72,14 @@ int etnaviv_gpu_get_param(struct etnaviv_gpu *gpu, u32 param, u64 *value)
                *value = gpu->identity.minor_features3;
                break;
 
+       case ETNAVIV_PARAM_GPU_FEATURES_5:
+               *value = gpu->identity.minor_features4;
+               break;
+
+       case ETNAVIV_PARAM_GPU_FEATURES_6:
+               *value = gpu->identity.minor_features5;
+               break;
+
        case ETNAVIV_PARAM_GPU_STREAM_COUNT:
                *value = gpu->identity.stream_count;
                break;
@@ -112,6 +120,10 @@ int etnaviv_gpu_get_param(struct etnaviv_gpu *gpu, u32 param, u64 *value)
                *value = gpu->identity.num_constants;
                break;
 
+       case ETNAVIV_PARAM_GPU_NUM_VARYINGS:
+               *value = gpu->identity.varyings_count;
+               break;
+
        default:
                DBG("%s: invalid param: %u", dev_name(gpu->dev), param);
                return -EINVAL;
@@ -120,46 +132,56 @@ int etnaviv_gpu_get_param(struct etnaviv_gpu *gpu, u32 param, u64 *value)
        return 0;
 }
 
+
+#define etnaviv_is_model_rev(gpu, mod, rev) \
+       ((gpu)->identity.model == chipModel_##mod && \
+        (gpu)->identity.revision == rev)
+#define etnaviv_field(val, field) \
+       (((val) & field##__MASK) >> field##__SHIFT)
+
 static void etnaviv_hw_specs(struct etnaviv_gpu *gpu)
 {
        if (gpu->identity.minor_features0 &
            chipMinorFeatures0_MORE_MINOR_FEATURES) {
-               u32 specs[2];
+               u32 specs[4];
+               unsigned int streams;
 
                specs[0] = gpu_read(gpu, VIVS_HI_CHIP_SPECS);
                specs[1] = gpu_read(gpu, VIVS_HI_CHIP_SPECS_2);
-
-               gpu->identity.stream_count =
-                       (specs[0] & VIVS_HI_CHIP_SPECS_STREAM_COUNT__MASK)
-                               >> VIVS_HI_CHIP_SPECS_STREAM_COUNT__SHIFT;
-               gpu->identity.register_max =
-                       (specs[0] & VIVS_HI_CHIP_SPECS_REGISTER_MAX__MASK)
-                               >> VIVS_HI_CHIP_SPECS_REGISTER_MAX__SHIFT;
-               gpu->identity.thread_count =
-                       (specs[0] & VIVS_HI_CHIP_SPECS_THREAD_COUNT__MASK)
-                               >> VIVS_HI_CHIP_SPECS_THREAD_COUNT__SHIFT;
-               gpu->identity.vertex_cache_size =
-                       (specs[0] & VIVS_HI_CHIP_SPECS_VERTEX_CACHE_SIZE__MASK)
-                               >> VIVS_HI_CHIP_SPECS_VERTEX_CACHE_SIZE__SHIFT;
-               gpu->identity.shader_core_count =
-                       (specs[0] & VIVS_HI_CHIP_SPECS_SHADER_CORE_COUNT__MASK)
-                               >> VIVS_HI_CHIP_SPECS_SHADER_CORE_COUNT__SHIFT;
-               gpu->identity.pixel_pipes =
-                       (specs[0] & VIVS_HI_CHIP_SPECS_PIXEL_PIPES__MASK)
-                               >> VIVS_HI_CHIP_SPECS_PIXEL_PIPES__SHIFT;
+               specs[2] = gpu_read(gpu, VIVS_HI_CHIP_SPECS_3);
+               specs[3] = gpu_read(gpu, VIVS_HI_CHIP_SPECS_4);
+
+               gpu->identity.stream_count = etnaviv_field(specs[0],
+                                       VIVS_HI_CHIP_SPECS_STREAM_COUNT);
+               gpu->identity.register_max = etnaviv_field(specs[0],
+                                       VIVS_HI_CHIP_SPECS_REGISTER_MAX);
+               gpu->identity.thread_count = etnaviv_field(specs[0],
+                                       VIVS_HI_CHIP_SPECS_THREAD_COUNT);
+               gpu->identity.vertex_cache_size = etnaviv_field(specs[0],
+                                       VIVS_HI_CHIP_SPECS_VERTEX_CACHE_SIZE);
+               gpu->identity.shader_core_count = etnaviv_field(specs[0],
+                                       VIVS_HI_CHIP_SPECS_SHADER_CORE_COUNT);
+               gpu->identity.pixel_pipes = etnaviv_field(specs[0],
+                                       VIVS_HI_CHIP_SPECS_PIXEL_PIPES);
                gpu->identity.vertex_output_buffer_size =
-                       (specs[0] & VIVS_HI_CHIP_SPECS_VERTEX_OUTPUT_BUFFER_SIZE__MASK)
-                               >> VIVS_HI_CHIP_SPECS_VERTEX_OUTPUT_BUFFER_SIZE__SHIFT;
-
-               gpu->identity.buffer_size =
-                       (specs[1] & VIVS_HI_CHIP_SPECS_2_BUFFER_SIZE__MASK)
-                               >> VIVS_HI_CHIP_SPECS_2_BUFFER_SIZE__SHIFT;
-               gpu->identity.instruction_count =
-                       (specs[1] & VIVS_HI_CHIP_SPECS_2_INSTRUCTION_COUNT__MASK)
-                               >> VIVS_HI_CHIP_SPECS_2_INSTRUCTION_COUNT__SHIFT;
-               gpu->identity.num_constants =
-                       (specs[1] & VIVS_HI_CHIP_SPECS_2_NUM_CONSTANTS__MASK)
-                               >> VIVS_HI_CHIP_SPECS_2_NUM_CONSTANTS__SHIFT;
+                       etnaviv_field(specs[0],
+                               VIVS_HI_CHIP_SPECS_VERTEX_OUTPUT_BUFFER_SIZE);
+
+               gpu->identity.buffer_size = etnaviv_field(specs[1],
+                                       VIVS_HI_CHIP_SPECS_2_BUFFER_SIZE);
+               gpu->identity.instruction_count = etnaviv_field(specs[1],
+                                       VIVS_HI_CHIP_SPECS_2_INSTRUCTION_COUNT);
+               gpu->identity.num_constants = etnaviv_field(specs[1],
+                                       VIVS_HI_CHIP_SPECS_2_NUM_CONSTANTS);
+
+               gpu->identity.varyings_count = etnaviv_field(specs[2],
+                                       VIVS_HI_CHIP_SPECS_3_VARYINGS_COUNT);
+
+               /* This overrides the value from older register if non-zero */
+               streams = etnaviv_field(specs[3],
+                                       VIVS_HI_CHIP_SPECS_4_STREAM_COUNT);
+               if (streams)
+                       gpu->identity.stream_count = streams;
        }
 
        /* Fill in the stream count if not specified */
@@ -173,7 +195,7 @@ static void etnaviv_hw_specs(struct etnaviv_gpu *gpu)
        /* Convert the register max value */
        if (gpu->identity.register_max)
                gpu->identity.register_max = 1 << gpu->identity.register_max;
-       else if (gpu->identity.model == 0x0400)
+       else if (gpu->identity.model == chipModel_GC400)
                gpu->identity.register_max = 32;
        else
                gpu->identity.register_max = 64;
@@ -181,10 +203,10 @@ static void etnaviv_hw_specs(struct etnaviv_gpu *gpu)
        /* Convert thread count */
        if (gpu->identity.thread_count)
                gpu->identity.thread_count = 1 << gpu->identity.thread_count;
-       else if (gpu->identity.model == 0x0400)
+       else if (gpu->identity.model == chipModel_GC400)
                gpu->identity.thread_count = 64;
-       else if (gpu->identity.model == 0x0500 ||
-                gpu->identity.model == 0x0530)
+       else if (gpu->identity.model == chipModel_GC500 ||
+                gpu->identity.model == chipModel_GC530)
                gpu->identity.thread_count = 128;
        else
                gpu->identity.thread_count = 256;
@@ -206,7 +228,7 @@ static void etnaviv_hw_specs(struct etnaviv_gpu *gpu)
        if (gpu->identity.vertex_output_buffer_size) {
                gpu->identity.vertex_output_buffer_size =
                        1 << gpu->identity.vertex_output_buffer_size;
-       } else if (gpu->identity.model == 0x0400) {
+       } else if (gpu->identity.model == chipModel_GC400) {
                if (gpu->identity.revision < 0x4000)
                        gpu->identity.vertex_output_buffer_size = 512;
                else if (gpu->identity.revision < 0x4200)
@@ -219,9 +241,8 @@ static void etnaviv_hw_specs(struct etnaviv_gpu *gpu)
 
        switch (gpu->identity.instruction_count) {
        case 0:
-               if ((gpu->identity.model == 0x2000 &&
-                    gpu->identity.revision == 0x5108) ||
-                   gpu->identity.model == 0x880)
+               if (etnaviv_is_model_rev(gpu, GC2000, 0x5108) ||
+                   gpu->identity.model == chipModel_GC880)
                        gpu->identity.instruction_count = 512;
                else
                        gpu->identity.instruction_count = 256;
@@ -242,6 +263,30 @@ static void etnaviv_hw_specs(struct etnaviv_gpu *gpu)
 
        if (gpu->identity.num_constants == 0)
                gpu->identity.num_constants = 168;
+
+       if (gpu->identity.varyings_count == 0) {
+               if (gpu->identity.minor_features1 & chipMinorFeatures1_HALTI0)
+                       gpu->identity.varyings_count = 12;
+               else
+                       gpu->identity.varyings_count = 8;
+       }
+
+       /*
+        * For some cores, two varyings are consumed for position, so the
+        * maximum varying count needs to be reduced by one.
+        */
+       if (etnaviv_is_model_rev(gpu, GC5000, 0x5434) ||
+           etnaviv_is_model_rev(gpu, GC4000, 0x5222) ||
+           etnaviv_is_model_rev(gpu, GC4000, 0x5245) ||
+           etnaviv_is_model_rev(gpu, GC4000, 0x5208) ||
+           etnaviv_is_model_rev(gpu, GC3000, 0x5435) ||
+           etnaviv_is_model_rev(gpu, GC2200, 0x5244) ||
+           etnaviv_is_model_rev(gpu, GC2100, 0x5108) ||
+           etnaviv_is_model_rev(gpu, GC2000, 0x5108) ||
+           etnaviv_is_model_rev(gpu, GC1500, 0x5246) ||
+           etnaviv_is_model_rev(gpu, GC880, 0x5107) ||
+           etnaviv_is_model_rev(gpu, GC880, 0x5106))
+               gpu->identity.varyings_count -= 1;
 }
 
 static void etnaviv_hw_identify(struct etnaviv_gpu *gpu)
@@ -251,12 +296,10 @@ static void etnaviv_hw_identify(struct etnaviv_gpu *gpu)
        chipIdentity = gpu_read(gpu, VIVS_HI_CHIP_IDENTITY);
 
        /* Special case for older graphic cores. */
-       if (((chipIdentity & VIVS_HI_CHIP_IDENTITY_FAMILY__MASK)
-            >> VIVS_HI_CHIP_IDENTITY_FAMILY__SHIFT) ==  0x01) {
-               gpu->identity.model    = 0x500; /* gc500 */
-               gpu->identity.revision =
-                       (chipIdentity & VIVS_HI_CHIP_IDENTITY_REVISION__MASK)
-                       >> VIVS_HI_CHIP_IDENTITY_REVISION__SHIFT;
+       if (etnaviv_field(chipIdentity, VIVS_HI_CHIP_IDENTITY_FAMILY) == 0x01) {
+               gpu->identity.model    = chipModel_GC500;
+               gpu->identity.revision = etnaviv_field(chipIdentity,
+                                        VIVS_HI_CHIP_IDENTITY_REVISION);
        } else {
 
                gpu->identity.model = gpu_read(gpu, VIVS_HI_CHIP_MODEL);
@@ -269,13 +312,12 @@ static void etnaviv_hw_identify(struct etnaviv_gpu *gpu)
                 * same.  Only for GC400 family.
                 */
                if ((gpu->identity.model & 0xff00) == 0x0400 &&
-                   gpu->identity.model != 0x0420) {
+                   gpu->identity.model != chipModel_GC420) {
                        gpu->identity.model = gpu->identity.model & 0x0400;
                }
 
                /* Another special case */
-               if (gpu->identity.model == 0x300 &&
-                   gpu->identity.revision == 0x2201) {
+               if (etnaviv_is_model_rev(gpu, GC300, 0x2201)) {
                        u32 chipDate = gpu_read(gpu, VIVS_HI_CHIP_DATE);
                        u32 chipTime = gpu_read(gpu, VIVS_HI_CHIP_TIME);
 
@@ -295,11 +337,13 @@ static void etnaviv_hw_identify(struct etnaviv_gpu *gpu)
        gpu->identity.features = gpu_read(gpu, VIVS_HI_CHIP_FEATURE);
 
        /* Disable fast clear on GC700. */
-       if (gpu->identity.model == 0x700)
+       if (gpu->identity.model == chipModel_GC700)
                gpu->identity.features &= ~chipFeatures_FAST_CLEAR;
 
-       if ((gpu->identity.model == 0x500 && gpu->identity.revision < 2) ||
-           (gpu->identity.model == 0x300 && gpu->identity.revision < 0x2000)) {
+       if ((gpu->identity.model == chipModel_GC500 &&
+            gpu->identity.revision < 2) ||
+           (gpu->identity.model == chipModel_GC300 &&
+            gpu->identity.revision < 0x2000)) {
 
                /*
                 * GC500 rev 1.x and GC300 rev < 2.0 doesn't have these
@@ -309,6 +353,8 @@ static void etnaviv_hw_identify(struct etnaviv_gpu *gpu)
                gpu->identity.minor_features1 = 0;
                gpu->identity.minor_features2 = 0;
                gpu->identity.minor_features3 = 0;
+               gpu->identity.minor_features4 = 0;
+               gpu->identity.minor_features5 = 0;
        } else
                gpu->identity.minor_features0 =
                                gpu_read(gpu, VIVS_HI_CHIP_MINOR_FEATURE_0);
@@ -321,6 +367,10 @@ static void etnaviv_hw_identify(struct etnaviv_gpu *gpu)
                                gpu_read(gpu, VIVS_HI_CHIP_MINOR_FEATURE_2);
                gpu->identity.minor_features3 =
                                gpu_read(gpu, VIVS_HI_CHIP_MINOR_FEATURE_3);
+               gpu->identity.minor_features4 =
+                               gpu_read(gpu, VIVS_HI_CHIP_MINOR_FEATURE_4);
+               gpu->identity.minor_features5 =
+                               gpu_read(gpu, VIVS_HI_CHIP_MINOR_FEATURE_5);
        }
 
        /* GC600 idle register reports zero bits where modules aren't present */
@@ -441,10 +491,9 @@ static void etnaviv_gpu_hw_init(struct etnaviv_gpu *gpu)
 {
        u16 prefetch;
 
-       if (gpu->identity.model == chipModel_GC320 &&
-           gpu_read(gpu, VIVS_HI_CHIP_TIME) != 0x2062400 &&
-           (gpu->identity.revision == 0x5007 ||
-            gpu->identity.revision == 0x5220)) {
+       if ((etnaviv_is_model_rev(gpu, GC320, 0x5007) ||
+            etnaviv_is_model_rev(gpu, GC320, 0x5220)) &&
+           gpu_read(gpu, VIVS_HI_CHIP_TIME) != 0x2062400) {
                u32 mc_memory_debug;
 
                mc_memory_debug = gpu_read(gpu, VIVS_MC_DEBUG_MEMORY) & ~0xff;
@@ -466,7 +515,7 @@ static void etnaviv_gpu_hw_init(struct etnaviv_gpu *gpu)
                  VIVS_HI_AXI_CONFIG_ARCACHE(2));
 
        /* GC2000 rev 5108 needs a special bus config */
-       if (gpu->identity.model == 0x2000 && gpu->identity.revision == 0x5108) {
+       if (etnaviv_is_model_rev(gpu, GC2000, 0x5108)) {
                u32 bus_config = gpu_read(gpu, VIVS_MC_BUS_CONFIG);
                bus_config &= ~(VIVS_MC_BUS_CONFIG_FE_BUS_CONFIG__MASK |
                                VIVS_MC_BUS_CONFIG_TX_BUS_CONFIG__MASK);
@@ -511,8 +560,16 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu)
 
        if (gpu->identity.model == 0) {
                dev_err(gpu->dev, "Unknown GPU model\n");
-               pm_runtime_put_autosuspend(gpu->dev);
-               return -ENXIO;
+               ret = -ENXIO;
+               goto fail;
+       }
+
+       /* Exclude VG cores with FE2.0 */
+       if (gpu->identity.features & chipFeatures_PIPE_VG &&
+           gpu->identity.features & chipFeatures_FE20) {
+               dev_info(gpu->dev, "Ignoring GPU with VG and FE2.0\n");
+               ret = -ENXIO;
+               goto fail;
        }
 
        ret = etnaviv_hw_reset(gpu);
@@ -539,10 +596,9 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu)
                goto fail;
        }
 
-       /* TODO: we will leak here memory - fix it! */
-
        gpu->mmu = etnaviv_iommu_new(gpu, iommu, version);
        if (!gpu->mmu) {
+               iommu_domain_free(iommu);
                ret = -ENOMEM;
                goto fail;
        }
@@ -552,7 +608,7 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu)
        if (!gpu->buffer) {
                ret = -ENOMEM;
                dev_err(gpu->dev, "could not create command buffer\n");
-               goto fail;
+               goto destroy_iommu;
        }
        if (gpu->buffer->paddr - gpu->memory_base > 0x80000000) {
                ret = -EINVAL;
@@ -582,6 +638,9 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu)
 free_buffer:
        etnaviv_gpu_cmdbuf_free(gpu->buffer);
        gpu->buffer = NULL;
+destroy_iommu:
+       etnaviv_iommu_destroy(gpu->mmu);
+       gpu->mmu = NULL;
 fail:
        pm_runtime_mark_last_busy(gpu->dev);
        pm_runtime_put_autosuspend(gpu->dev);
@@ -642,6 +701,10 @@ int etnaviv_gpu_debugfs(struct etnaviv_gpu *gpu, struct seq_file *m)
                   gpu->identity.minor_features2);
        seq_printf(m, "\t minor_features3: 0x%08x\n",
                   gpu->identity.minor_features3);
+       seq_printf(m, "\t minor_features4: 0x%08x\n",
+                  gpu->identity.minor_features4);
+       seq_printf(m, "\t minor_features5: 0x%08x\n",
+                  gpu->identity.minor_features5);
 
        seq_puts(m, "\tspecs\n");
        seq_printf(m, "\t stream_count:  %d\n",
@@ -664,6 +727,8 @@ int etnaviv_gpu_debugfs(struct etnaviv_gpu *gpu, struct seq_file *m)
                        gpu->identity.instruction_count);
        seq_printf(m, "\t num_constants: %d\n",
                        gpu->identity.num_constants);
+       seq_printf(m, "\t varyings_count: %d\n",
+                       gpu->identity.varyings_count);
 
        seq_printf(m, "\taxi: 0x%08x\n", axi);
        seq_printf(m, "\tidle: 0x%08x\n", idle);
index c75d503..f233ac4 100644 (file)
@@ -46,6 +46,12 @@ struct etnaviv_chip_identity {
        /* Supported minor feature 3 fields. */
        u32 minor_features3;
 
+       /* Supported minor feature 4 fields. */
+       u32 minor_features4;
+
+       /* Supported minor feature 5 fields. */
+       u32 minor_features5;
+
        /* Number of streams supported. */
        u32 stream_count;
 
@@ -75,6 +81,9 @@ struct etnaviv_chip_identity {
 
        /* Buffer size */
        u32 buffer_size;
+
+       /* Number of varyings */
+       u8 varyings_count;
 };
 
 struct etnaviv_event {
index 0064f26..6a7de5f 100644 (file)
@@ -8,8 +8,8 @@ http://0x04.net/cgit/index.cgi/rules-ng-ng
 git clone git://0x04.net/rules-ng-ng
 
 The rules-ng-ng source files this header was generated from are:
-- state_hi.xml (  23420 bytes, from 2015-03-25 11:47:21)
-- common.xml   (  18437 bytes, from 2015-03-25 11:27:41)
+- state_hi.xml (  24309 bytes, from 2015-12-12 09:02:53)
+- common.xml   (  18437 bytes, from 2015-12-12 09:02:53)
 
 Copyright (C) 2015
 */
@@ -182,8 +182,25 @@ Copyright (C) 2015
 
 #define VIVS_HI_CHIP_MINOR_FEATURE_3                           0x00000088
 
+#define VIVS_HI_CHIP_SPECS_3                                   0x0000008c
+#define VIVS_HI_CHIP_SPECS_3_VARYINGS_COUNT__MASK              0x000001f0
+#define VIVS_HI_CHIP_SPECS_3_VARYINGS_COUNT__SHIFT             4
+#define VIVS_HI_CHIP_SPECS_3_VARYINGS_COUNT(x)                 (((x) << VIVS_HI_CHIP_SPECS_3_VARYINGS_COUNT__SHIFT) & VIVS_HI_CHIP_SPECS_3_VARYINGS_COUNT__MASK)
+#define VIVS_HI_CHIP_SPECS_3_GPU_CORE_COUNT__MASK              0x00000007
+#define VIVS_HI_CHIP_SPECS_3_GPU_CORE_COUNT__SHIFT             0
+#define VIVS_HI_CHIP_SPECS_3_GPU_CORE_COUNT(x)                 (((x) << VIVS_HI_CHIP_SPECS_3_GPU_CORE_COUNT__SHIFT) & VIVS_HI_CHIP_SPECS_3_GPU_CORE_COUNT__MASK)
+
 #define VIVS_HI_CHIP_MINOR_FEATURE_4                           0x00000094
 
+#define VIVS_HI_CHIP_SPECS_4                                   0x0000009c
+#define VIVS_HI_CHIP_SPECS_4_STREAM_COUNT__MASK                        0x0001f000
+#define VIVS_HI_CHIP_SPECS_4_STREAM_COUNT__SHIFT               12
+#define VIVS_HI_CHIP_SPECS_4_STREAM_COUNT(x)                   (((x) << VIVS_HI_CHIP_SPECS_4_STREAM_COUNT__SHIFT) & VIVS_HI_CHIP_SPECS_4_STREAM_COUNT__MASK)
+
+#define VIVS_HI_CHIP_MINOR_FEATURE_5                           0x000000a0
+
+#define VIVS_HI_CHIP_PRODUCT_ID                                        0x000000a8
+
 #define VIVS_PM                                                        0x00000000
 
 #define VIVS_PM_POWER_CONTROLS                                 0x00000100
@@ -206,6 +223,11 @@ Copyright (C) 2015
 #define VIVS_PM_MODULE_STATUS_MODULE_CLOCK_GATED_FE            0x00000001
 #define VIVS_PM_MODULE_STATUS_MODULE_CLOCK_GATED_DE            0x00000002
 #define VIVS_PM_MODULE_STATUS_MODULE_CLOCK_GATED_PE            0x00000004
+#define VIVS_PM_MODULE_STATUS_MODULE_CLOCK_GATED_SH            0x00000008
+#define VIVS_PM_MODULE_STATUS_MODULE_CLOCK_GATED_PA            0x00000010
+#define VIVS_PM_MODULE_STATUS_MODULE_CLOCK_GATED_SE            0x00000020
+#define VIVS_PM_MODULE_STATUS_MODULE_CLOCK_GATED_RA            0x00000040
+#define VIVS_PM_MODULE_STATUS_MODULE_CLOCK_GATED_TX            0x00000080
 
 #define VIVS_PM_PULSE_EATER                                    0x0000010c
 
index 6bfc463..367a916 100644 (file)
@@ -304,18 +304,10 @@ void dce6_dp_audio_set_dto(struct radeon_device *rdev,
                unsigned int div = (RREG32(DENTIST_DISPCLK_CNTL) &
                        DENTIST_DPREFCLK_WDIVIDER_MASK) >>
                        DENTIST_DPREFCLK_WDIVIDER_SHIFT;
-
-               if (div < 128 && div >= 96)
-                       div -= 64;
-               else if (div >= 64)
-                       div = div / 2 - 16;
-               else if (div >= 8)
-                       div /= 4;
-               else
-                       div = 0;
+               div = radeon_audio_decode_dfs_div(div);
 
                if (div)
-                       clock = rdev->clock.gpupll_outputfreq * 10 / div;
+                       clock = clock * 100 / div;
 
                WREG32(DCE8_DCCG_AUDIO_DTO1_PHASE, 24000);
                WREG32(DCE8_DCCG_AUDIO_DTO1_MODULE, clock);
index 9953356..3cf04a2 100644 (file)
@@ -289,6 +289,16 @@ void dce4_dp_audio_set_dto(struct radeon_device *rdev,
         * number (coefficient of two integer numbers.  DCCG_AUDIO_DTOx_PHASE
         * is the numerator, DCCG_AUDIO_DTOx_MODULE is the denominator
         */
+       if (ASIC_IS_DCE41(rdev)) {
+               unsigned int div = (RREG32(DCE41_DENTIST_DISPCLK_CNTL) &
+                       DENTIST_DPREFCLK_WDIVIDER_MASK) >>
+                       DENTIST_DPREFCLK_WDIVIDER_SHIFT;
+               div = radeon_audio_decode_dfs_div(div);
+
+               if (div)
+                       clock = 100 * clock / div;
+       }
+
        WREG32(DCCG_AUDIO_DTO1_PHASE, 24000);
        WREG32(DCCG_AUDIO_DTO1_MODULE, clock);
 }
index 4aa5f75..13b6029 100644 (file)
 #define DCCG_AUDIO_DTO1_CNTL              0x05cc
 #       define DCCG_AUDIO_DTO1_USE_512FBR_DTO (1 << 3)
 
+#define DCE41_DENTIST_DISPCLK_CNTL                     0x049c
+#       define DENTIST_DPREFCLK_WDIVIDER(x)            (((x) & 0x7f) << 24)
+#       define DENTIST_DPREFCLK_WDIVIDER_MASK          (0x7f << 24)
+#       define DENTIST_DPREFCLK_WDIVIDER_SHIFT         24
+
 /* DCE 4.0 AFMT */
 #define HDMI_CONTROL                         0x7030
 #       define HDMI_KEEPOUT_MODE             (1 << 0)
index 5ae6db9..78a51b3 100644 (file)
@@ -268,7 +268,7 @@ struct radeon_clock {
        uint32_t current_dispclk;
        uint32_t dp_extclk;
        uint32_t max_pixel_clock;
-       uint32_t gpupll_outputfreq;
+       uint32_t vco_freq;
 };
 
 /*
index 08fc1b5..de9a2ff 100644 (file)
@@ -1106,6 +1106,31 @@ union firmware_info {
        ATOM_FIRMWARE_INFO_V2_2 info_22;
 };
 
+union igp_info {
+       struct _ATOM_INTEGRATED_SYSTEM_INFO info;
+       struct _ATOM_INTEGRATED_SYSTEM_INFO_V2 info_2;
+       struct _ATOM_INTEGRATED_SYSTEM_INFO_V6 info_6;
+       struct _ATOM_INTEGRATED_SYSTEM_INFO_V1_7 info_7;
+       struct _ATOM_INTEGRATED_SYSTEM_INFO_V1_8 info_8;
+};
+
+static void radeon_atombios_get_dentist_vco_freq(struct radeon_device *rdev)
+{
+       struct radeon_mode_info *mode_info = &rdev->mode_info;
+       int index = GetIndexIntoMasterTable(DATA, IntegratedSystemInfo);
+       union igp_info *igp_info;
+       u8 frev, crev;
+       u16 data_offset;
+
+       if (atom_parse_data_header(mode_info->atom_context, index, NULL,
+                       &frev, &crev, &data_offset)) {
+               igp_info = (union igp_info *)(mode_info->atom_context->bios +
+                       data_offset);
+               rdev->clock.vco_freq =
+                       le32_to_cpu(igp_info->info_6.ulDentistVCOFreq);
+       }
+}
+
 bool radeon_atom_get_clock_info(struct drm_device *dev)
 {
        struct radeon_device *rdev = dev->dev_private;
@@ -1257,12 +1282,18 @@ bool radeon_atom_get_clock_info(struct drm_device *dev)
                rdev->mode_info.firmware_flags =
                        le16_to_cpu(firmware_info->info.usFirmwareCapability.susAccess);
 
-               if (ASIC_IS_DCE8(rdev)) {
-                       rdev->clock.gpupll_outputfreq =
+               if (ASIC_IS_DCE8(rdev))
+                       rdev->clock.vco_freq =
                                le32_to_cpu(firmware_info->info_22.ulGPUPLL_OutputFreq);
-                       if (rdev->clock.gpupll_outputfreq == 0)
-                               rdev->clock.gpupll_outputfreq = 360000; /* 3.6 GHz */
-               }
+               else if (ASIC_IS_DCE5(rdev))
+                       rdev->clock.vco_freq = rdev->clock.current_dispclk;
+               else if (ASIC_IS_DCE41(rdev))
+                       radeon_atombios_get_dentist_vco_freq(rdev);
+               else
+                       rdev->clock.vco_freq = rdev->clock.current_dispclk;
+
+               if (rdev->clock.vco_freq == 0)
+                       rdev->clock.vco_freq = 360000;  /* 3.6 GHz */
 
                return true;
        }
@@ -1270,14 +1301,6 @@ bool radeon_atom_get_clock_info(struct drm_device *dev)
        return false;
 }
 
-union igp_info {
-       struct _ATOM_INTEGRATED_SYSTEM_INFO info;
-       struct _ATOM_INTEGRATED_SYSTEM_INFO_V2 info_2;
-       struct _ATOM_INTEGRATED_SYSTEM_INFO_V6 info_6;
-       struct _ATOM_INTEGRATED_SYSTEM_INFO_V1_7 info_7;
-       struct _ATOM_INTEGRATED_SYSTEM_INFO_V1_8 info_8;
-};
-
 bool radeon_atombios_sideport_present(struct radeon_device *rdev)
 {
        struct radeon_mode_info *mode_info = &rdev->mode_info;
index 2c02e99..b214663 100644 (file)
@@ -739,9 +739,6 @@ static void radeon_audio_dp_mode_set(struct drm_encoder *encoder,
        struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
        struct radeon_encoder_atom_dig *dig = radeon_encoder->enc_priv;
        struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
-       struct radeon_connector *radeon_connector = to_radeon_connector(connector);
-       struct radeon_connector_atom_dig *dig_connector =
-               radeon_connector->con_priv;
 
        if (!dig || !dig->afmt)
                return;
@@ -753,10 +750,7 @@ static void radeon_audio_dp_mode_set(struct drm_encoder *encoder,
                radeon_audio_write_speaker_allocation(encoder);
                radeon_audio_write_sad_regs(encoder);
                radeon_audio_write_latency_fields(encoder, mode);
-               if (rdev->clock.dp_extclk || ASIC_IS_DCE5(rdev))
-                       radeon_audio_set_dto(encoder, rdev->clock.default_dispclk * 10);
-               else
-                       radeon_audio_set_dto(encoder, dig_connector->dp_clock);
+               radeon_audio_set_dto(encoder, rdev->clock.vco_freq * 10);
                radeon_audio_set_audio_packet(encoder);
                radeon_audio_select_pin(encoder);
 
@@ -781,3 +775,15 @@ void radeon_audio_dpms(struct drm_encoder *encoder, int mode)
        if (radeon_encoder->audio && radeon_encoder->audio->dpms)
                radeon_encoder->audio->dpms(encoder, mode == DRM_MODE_DPMS_ON);
 }
+
+unsigned int radeon_audio_decode_dfs_div(unsigned int div)
+{
+       if (div >= 8 && div < 64)
+               return (div - 8) * 25 + 200;
+       else if (div >= 64 && div < 96)
+               return (div - 64) * 50 + 1600;
+       else if (div >= 96 && div < 128)
+               return (div - 96) * 100 + 3200;
+       else
+               return 0;
+}
index 059cc30..5c70cce 100644 (file)
@@ -79,5 +79,6 @@ void radeon_audio_fini(struct radeon_device *rdev);
 void radeon_audio_mode_set(struct drm_encoder *encoder,
        struct drm_display_mode *mode);
 void radeon_audio_dpms(struct drm_encoder *encoder, int mode);
+unsigned int radeon_audio_decode_dfs_div(unsigned int div);
 
 #endif
index b3bb923..298ea1c 100644 (file)
@@ -1670,8 +1670,10 @@ int radeon_modeset_init(struct radeon_device *rdev)
        /* setup afmt */
        radeon_afmt_init(rdev);
 
-       radeon_fbdev_init(rdev);
-       drm_kms_helper_poll_init(rdev->ddev);
+       if (!list_empty(&rdev->ddev->mode_config.connector_list)) {
+               radeon_fbdev_init(rdev);
+               drm_kms_helper_poll_init(rdev->ddev);
+       }
 
        /* do pm late init */
        ret = radeon_pm_late_init(rdev);
index 3dcc573..e26c963 100644 (file)
@@ -663,6 +663,7 @@ int radeon_gem_va_ioctl(struct drm_device *dev, void *data,
        bo_va = radeon_vm_bo_find(&fpriv->vm, rbo);
        if (!bo_va) {
                args->operation = RADEON_VA_RESULT_ERROR;
+               radeon_bo_unreserve(rbo);
                drm_gem_object_unreference_unlocked(gobj);
                return -ENOENT;
        }
index 07a0d37..a01efe3 100644 (file)
@@ -178,12 +178,12 @@ int vce_v1_0_load_fw(struct radeon_device *rdev, uint32_t *data)
                return -EINVAL;
        }
 
-       for (i = 0; i < sign->num; ++i) {
-               if (sign->val[i].chip_id == chip_id)
+       for (i = 0; i < le32_to_cpu(sign->num); ++i) {
+               if (le32_to_cpu(sign->val[i].chip_id) == chip_id)
                        break;
        }
 
-       if (i == sign->num)
+       if (i == le32_to_cpu(sign->num))
                return -EINVAL;
 
        data += (256 - 64) / 4;
@@ -191,18 +191,18 @@ int vce_v1_0_load_fw(struct radeon_device *rdev, uint32_t *data)
        data[1] = sign->val[i].nonce[1];
        data[2] = sign->val[i].nonce[2];
        data[3] = sign->val[i].nonce[3];
-       data[4] = sign->len + 64;
+       data[4] = cpu_to_le32(le32_to_cpu(sign->len) + 64);
 
        memset(&data[5], 0, 44);
        memcpy(&data[16], &sign[1], rdev->vce_fw->size - sizeof(*sign));
 
-       data += data[4] / 4;
+       data += le32_to_cpu(data[4]) / 4;
        data[0] = sign->val[i].sigval[0];
        data[1] = sign->val[i].sigval[1];
        data[2] = sign->val[i].sigval[2];
        data[3] = sign->val[i].sigval[3];
 
-       rdev->vce.keyselect = sign->val[i].keyselect;
+       rdev->vce.keyselect = le32_to_cpu(sign->val[i].keyselect);
 
        return 0;
 }
index d1dc0f7..f6a809a 100644 (file)
@@ -2,11 +2,11 @@
 # Makefile for the drm device driver.  This driver provides support for the
 # Direct Rendering Infrastructure (DRI) in XFree86 4.1.0 and higher.
 
-rockchipdrm-y := rockchip_drm_drv.o rockchip_drm_fb.o rockchip_drm_fbdev.o \
-               rockchip_drm_gem.o
+rockchipdrm-y := rockchip_drm_drv.o rockchip_drm_fb.o \
+               rockchip_drm_gem.o rockchip_drm_vop.o
+rockchipdrm-$(CONFIG_DRM_FBDEV_EMULATION) += rockchip_drm_fbdev.o
 
 obj-$(CONFIG_ROCKCHIP_DW_HDMI) += dw_hdmi-rockchip.o
 obj-$(CONFIG_ROCKCHIP_DW_MIPI_DSI) += dw-mipi-dsi.o
 
-obj-$(CONFIG_DRM_ROCKCHIP) += rockchipdrm.o rockchip_drm_vop.o \
-                               rockchip_vop_reg.o
+obj-$(CONFIG_DRM_ROCKCHIP) += rockchipdrm.o rockchip_vop_reg.o
index 7bfe243..f8f8f29 100644 (file)
@@ -461,10 +461,11 @@ static int dw_mipi_dsi_phy_init(struct dw_mipi_dsi *dsi)
 
 static int dw_mipi_dsi_get_lane_bps(struct dw_mipi_dsi *dsi)
 {
-       unsigned int bpp, i, pre;
+       unsigned int i, pre;
        unsigned long mpclk, pllref, tmp;
        unsigned int m = 1, n = 1, target_mbps = 1000;
        unsigned int max_mbps = dptdin_map[ARRAY_SIZE(dptdin_map) - 1].max_mbps;
+       int bpp;
 
        bpp = mipi_dsi_pixel_format_to_bpp(dsi->format);
        if (bpp < 0) {
index 8397d1b..a0d51cc 100644 (file)
@@ -55,14 +55,12 @@ int rockchip_drm_dma_attach_device(struct drm_device *drm_dev,
 
        return arm_iommu_attach_device(dev, mapping);
 }
-EXPORT_SYMBOL_GPL(rockchip_drm_dma_attach_device);
 
 void rockchip_drm_dma_detach_device(struct drm_device *drm_dev,
                                    struct device *dev)
 {
        arm_iommu_detach_device(dev);
 }
-EXPORT_SYMBOL_GPL(rockchip_drm_dma_detach_device);
 
 int rockchip_register_crtc_funcs(struct drm_crtc *crtc,
                                 const struct rockchip_crtc_funcs *crtc_funcs)
@@ -77,7 +75,6 @@ int rockchip_register_crtc_funcs(struct drm_crtc *crtc,
 
        return 0;
 }
-EXPORT_SYMBOL_GPL(rockchip_register_crtc_funcs);
 
 void rockchip_unregister_crtc_funcs(struct drm_crtc *crtc)
 {
@@ -89,7 +86,6 @@ void rockchip_unregister_crtc_funcs(struct drm_crtc *crtc)
 
        priv->crtc_funcs[pipe] = NULL;
 }
-EXPORT_SYMBOL_GPL(rockchip_unregister_crtc_funcs);
 
 static struct drm_crtc *rockchip_crtc_from_pipe(struct drm_device *drm,
                                                int pipe)
index f784488..3b8f652 100644 (file)
@@ -39,7 +39,6 @@ struct drm_gem_object *rockchip_fb_get_gem_obj(struct drm_framebuffer *fb,
 
        return rk_fb->obj[plane];
 }
-EXPORT_SYMBOL_GPL(rockchip_fb_get_gem_obj);
 
 static void rockchip_drm_fb_destroy(struct drm_framebuffer *fb)
 {
@@ -177,8 +176,23 @@ static void rockchip_crtc_wait_for_update(struct drm_crtc *crtc)
                crtc_funcs->wait_for_update(crtc);
 }
 
+/*
+ * We can't use drm_atomic_helper_wait_for_vblanks() because rk3288 and rk3066
+ * have hardware counters for neither vblanks nor scanlines, which results in
+ * a race where:
+ *                             | <-- HW vsync irq and reg take effect
+ *            plane_commit --> |
+ *     get_vblank and wait --> |
+ *                             | <-- handle_vblank, vblank->count + 1
+ *              cleanup_fb --> |
+ *             iommu crash --> |
+ *                             | <-- HW vsync irq and reg take effect
+ *
+ * This function is equivalent but uses rockchip_crtc_wait_for_update() instead
+ * of waiting for vblank_count to change.
+ */
 static void
-rockchip_atomic_wait_for_complete(struct drm_atomic_state *old_state)
+rockchip_atomic_wait_for_complete(struct drm_device *dev, struct drm_atomic_state *old_state)
 {
        struct drm_crtc_state *old_crtc_state;
        struct drm_crtc *crtc;
@@ -194,6 +208,10 @@ rockchip_atomic_wait_for_complete(struct drm_atomic_state *old_state)
                if (!crtc->state->active)
                        continue;
 
+               if (!drm_atomic_helper_framebuffer_changed(dev,
+                               old_state, crtc))
+                       continue;
+
                ret = drm_crtc_vblank_get(crtc);
                if (ret != 0)
                        continue;
@@ -241,7 +259,7 @@ rockchip_atomic_commit_complete(struct rockchip_atomic_commit *commit)
 
        drm_atomic_helper_commit_planes(dev, state, true);
 
-       rockchip_atomic_wait_for_complete(state);
+       rockchip_atomic_wait_for_complete(dev, state);
 
        drm_atomic_helper_cleanup_planes(dev, state);
 
index 50432e9..73718c5 100644 (file)
 #ifndef _ROCKCHIP_DRM_FBDEV_H
 #define _ROCKCHIP_DRM_FBDEV_H
 
+#ifdef CONFIG_DRM_FBDEV_EMULATION
 int rockchip_drm_fbdev_init(struct drm_device *dev);
 void rockchip_drm_fbdev_fini(struct drm_device *dev);
+#else
+static inline int rockchip_drm_fbdev_init(struct drm_device *dev)
+{
+       return 0;
+}
+
+static inline void rockchip_drm_fbdev_fini(struct drm_device *dev)
+{
+}
+#endif
 
 #endif /* _ROCKCHIP_DRM_FBDEV_H */
index d908321..18e0733 100644 (file)
@@ -234,13 +234,8 @@ int rockchip_gem_dumb_create(struct drm_file *file_priv,
        /*
         * align to 64 bytes since Mali requires it.
         */
-       min_pitch = ALIGN(min_pitch, 64);
-
-       if (args->pitch < min_pitch)
-               args->pitch = min_pitch;
-
-       if (args->size < args->pitch * args->height)
-               args->size = args->pitch * args->height;
+       args->pitch = ALIGN(min_pitch, 64);
+       args->size = args->pitch * args->height;
 
        rk_obj = rockchip_gem_create_with_handle(file_priv, dev, args->size,
                                                 &args->handle);
index 46c2a8d..fd37054 100644 (file)
@@ -43,8 +43,8 @@
 
 #define REG_SET(x, base, reg, v, mode) \
                __REG_SET_##mode(x, base + reg.offset, reg.mask, reg.shift, v)
-#define REG_SET_MASK(x, base, reg, v, mode) \
-               __REG_SET_##mode(x, base + reg.offset, reg.mask, reg.shift, v)
+#define REG_SET_MASK(x, base, reg, mask, v, mode) \
+               __REG_SET_##mode(x, base + reg.offset, mask, reg.shift, v)
 
 #define VOP_WIN_SET(x, win, name, v) \
                REG_SET(x, win->base, win->phy->name, v, RELAXED)
 #define VOP_INTR_GET(vop, name) \
                vop_read_reg(vop, 0, &vop->data->ctrl->name)
 
-#define VOP_INTR_SET(vop, name, v) \
-               REG_SET(vop, 0, vop->data->intr->name, v, NORMAL)
+#define VOP_INTR_SET(vop, name, mask, v) \
+               REG_SET_MASK(vop, 0, vop->data->intr->name, mask, v, NORMAL)
 #define VOP_INTR_SET_TYPE(vop, name, type, v) \
        do { \
-               int i, reg = 0; \
+               int i, reg = 0, mask = 0; \
                for (i = 0; i < vop->data->intr->nintrs; i++) { \
-                       if (vop->data->intr->intrs[i] & type) \
+                       if (vop->data->intr->intrs[i] & type) \
                                reg |= (v) << i; \
+                               mask |= 1 << i; \
+                       } \
                } \
-               VOP_INTR_SET(vop, name, reg); \
+               VOP_INTR_SET(vop, name, mask, reg); \
        } while (0)
 #define VOP_INTR_GET_TYPE(vop, name, type) \
                vop_get_intr_type(vop, &vop->data->intr->name, type)
index 424d515..314ff71 100644 (file)
@@ -144,19 +144,16 @@ int vc4_v3d_debugfs_ident(struct seq_file *m, void *unused)
 }
 #endif /* CONFIG_DEBUG_FS */
 
-/*
- * Asks the firmware to turn on power to the V3D engine.
- *
- * This may be doable with just the clocks interface, though this
- * packet does some other register setup from the firmware, too.
- */
 int
 vc4_v3d_set_power(struct vc4_dev *vc4, bool on)
 {
-       if (on)
-               return pm_generic_poweroff(&vc4->v3d->pdev->dev);
-       else
-               return pm_generic_resume(&vc4->v3d->pdev->dev);
+       /* XXX: This interface is needed for GPU reset, and the way to
+        * do it is to turn our power domain off and back on.  We
+        * can't just reset from within the driver, because the reset
+        * bits are in the power domain's register area, and get set
+        * during the poweron process.
+        */
+       return 0;
 }
 
 static void vc4_v3d_init_hw(struct drm_device *dev)
index c49812b..24fb348 100644 (file)
@@ -25,6 +25,7 @@
  *
  **************************************************************************/
 #include <linux/module.h>
+#include <linux/console.h>
 
 #include <drm/drmP.h>
 #include "vmwgfx_drv.h"
@@ -1538,6 +1539,12 @@ static int vmw_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 static int __init vmwgfx_init(void)
 {
        int ret;
+
+#ifdef CONFIG_VGA_CONSOLE
+       if (vgacon_text_force())
+               return -EINVAL;
+#endif
+
        ret = drm_pci_init(&driver, &vmw_pci_driver);
        if (ret)
                DRM_ERROR("Failed initializing DRM.\n");
index c848789..c43318d 100644 (file)
@@ -930,6 +930,17 @@ static struct dmi_system_id i8k_dmi_table[] __initdata = {
 MODULE_DEVICE_TABLE(dmi, i8k_dmi_table);
 
 static struct dmi_system_id i8k_blacklist_dmi_table[] __initdata = {
+       {
+               /*
+                * CPU fan speed going up and down on Dell Studio XPS 8000
+                * for unknown reasons.
+                */
+               .ident = "Dell Studio XPS 8000",
+               .matches = {
+                       DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+                       DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Studio XPS 8000"),
+               },
+       },
        {
                /*
                 * CPU fan speed going up and down on Dell Studio XPS 8100
index f77eb97..4f695d8 100644 (file)
@@ -90,7 +90,15 @@ static ssize_t show_power(struct device *dev,
        pci_bus_read_config_dword(f4->bus, PCI_DEVFN(PCI_SLOT(f4->devfn), 5),
                                  REG_TDP_LIMIT3, &val);
 
-       tdp_limit = val >> 16;
+       /*
+        * On Carrizo and later platforms, ApmTdpLimit bit field
+        * is extended to 16:31 from 16:28.
+        */
+       if (boot_cpu_data.x86 == 0x15 && boot_cpu_data.x86_model >= 0x60)
+               tdp_limit = val >> 16;
+       else
+               tdp_limit = (val >> 16) & 0x1fff;
+
        curr_pwr_watts = ((u64)(tdp_limit +
                                data->base_tdp)) << running_avg_range;
        curr_pwr_watts -= running_avg_capture;
index ba9732c..10fbd6d 100644 (file)
@@ -874,7 +874,8 @@ int i2c_dw_probe(struct dw_i2c_dev *dev)
        i2c_set_adapdata(adap, dev);
 
        i2c_dw_disable_int(dev);
-       r = devm_request_irq(dev->dev, dev->irq, i2c_dw_isr, IRQF_SHARED,
+       r = devm_request_irq(dev->dev, dev->irq, i2c_dw_isr,
+                            IRQF_SHARED | IRQF_COND_SUSPEND,
                             dev_name(dev->dev), dev);
        if (r) {
                dev_err(dev->dev, "failure requesting irq %i: %d\n",
index e045985..93f2895 100644 (file)
@@ -137,10 +137,11 @@ static const struct dmi_system_id piix4_dmi_ibm[] = {
 };
 
 /* SB800 globals */
+static DEFINE_MUTEX(piix4_mutex_sb800);
 static const char *piix4_main_port_names_sb800[PIIX4_MAX_ADAPTERS] = {
-       "SDA0", "SDA2", "SDA3", "SDA4"
+       " port 0", " port 2", " port 3", " port 4"
 };
-static const char *piix4_aux_port_name_sb800 = "SDA1";
+static const char *piix4_aux_port_name_sb800 = " port 1";
 
 struct i2c_piix4_adapdata {
        unsigned short smba;
@@ -148,7 +149,6 @@ struct i2c_piix4_adapdata {
        /* SB800 */
        bool sb800_main;
        unsigned short port;
-       struct mutex *mutex;
 };
 
 static int piix4_setup(struct pci_dev *PIIX4_dev,
@@ -275,10 +275,12 @@ static int piix4_setup_sb800(struct pci_dev *PIIX4_dev,
        else
                smb_en = (aux) ? 0x28 : 0x2c;
 
+       mutex_lock(&piix4_mutex_sb800);
        outb_p(smb_en, SB800_PIIX4_SMB_IDX);
        smba_en_lo = inb_p(SB800_PIIX4_SMB_IDX + 1);
        outb_p(smb_en + 1, SB800_PIIX4_SMB_IDX);
        smba_en_hi = inb_p(SB800_PIIX4_SMB_IDX + 1);
+       mutex_unlock(&piix4_mutex_sb800);
 
        if (!smb_en) {
                smb_en_status = smba_en_lo & 0x10;
@@ -559,7 +561,7 @@ static s32 piix4_access_sb800(struct i2c_adapter *adap, u16 addr,
        u8 port;
        int retval;
 
-       mutex_lock(adapdata->mutex);
+       mutex_lock(&piix4_mutex_sb800);
 
        outb_p(SB800_PIIX4_PORT_IDX, SB800_PIIX4_SMB_IDX);
        smba_en_lo = inb_p(SB800_PIIX4_SMB_IDX + 1);
@@ -574,7 +576,7 @@ static s32 piix4_access_sb800(struct i2c_adapter *adap, u16 addr,
 
        outb_p(smba_en_lo, SB800_PIIX4_SMB_IDX + 1);
 
-       mutex_unlock(adapdata->mutex);
+       mutex_unlock(&piix4_mutex_sb800);
 
        return retval;
 }
@@ -625,6 +627,7 @@ static struct i2c_adapter *piix4_main_adapters[PIIX4_MAX_ADAPTERS];
 static struct i2c_adapter *piix4_aux_adapter;
 
 static int piix4_add_adapter(struct pci_dev *dev, unsigned short smba,
+                            bool sb800_main, unsigned short port,
                             const char *name, struct i2c_adapter **padap)
 {
        struct i2c_adapter *adap;
@@ -639,7 +642,8 @@ static int piix4_add_adapter(struct pci_dev *dev, unsigned short smba,
 
        adap->owner = THIS_MODULE;
        adap->class = I2C_CLASS_HWMON | I2C_CLASS_SPD;
-       adap->algo = &smbus_algorithm;
+       adap->algo = sb800_main ? &piix4_smbus_algorithm_sb800
+                               : &smbus_algorithm;
 
        adapdata = kzalloc(sizeof(*adapdata), GFP_KERNEL);
        if (adapdata == NULL) {
@@ -649,12 +653,14 @@ static int piix4_add_adapter(struct pci_dev *dev, unsigned short smba,
        }
 
        adapdata->smba = smba;
+       adapdata->sb800_main = sb800_main;
+       adapdata->port = port;
 
        /* set up the sysfs linkage to our parent device */
        adap->dev.parent = &dev->dev;
 
        snprintf(adap->name, sizeof(adap->name),
-               "SMBus PIIX4 adapter %s at %04x", name, smba);
+               "SMBus PIIX4 adapter%s at %04x", name, smba);
 
        i2c_set_adapdata(adap, adapdata);
 
@@ -673,30 +679,16 @@ static int piix4_add_adapter(struct pci_dev *dev, unsigned short smba,
 
 static int piix4_add_adapters_sb800(struct pci_dev *dev, unsigned short smba)
 {
-       struct mutex *mutex;
        struct i2c_piix4_adapdata *adapdata;
        int port;
        int retval;
 
-       mutex = kzalloc(sizeof(*mutex), GFP_KERNEL);
-       if (mutex == NULL)
-               return -ENOMEM;
-
-       mutex_init(mutex);
-
        for (port = 0; port < PIIX4_MAX_ADAPTERS; port++) {
-               retval = piix4_add_adapter(dev, smba,
+               retval = piix4_add_adapter(dev, smba, true, port,
                                           piix4_main_port_names_sb800[port],
                                           &piix4_main_adapters[port]);
                if (retval < 0)
                        goto error;
-
-               piix4_main_adapters[port]->algo = &piix4_smbus_algorithm_sb800;
-
-               adapdata = i2c_get_adapdata(piix4_main_adapters[port]);
-               adapdata->sb800_main = true;
-               adapdata->port = port;
-               adapdata->mutex = mutex;
        }
 
        return retval;
@@ -714,19 +706,20 @@ error:
                }
        }
 
-       kfree(mutex);
-
        return retval;
 }
 
 static int piix4_probe(struct pci_dev *dev, const struct pci_device_id *id)
 {
        int retval;
+       bool is_sb800 = false;
 
        if ((dev->vendor == PCI_VENDOR_ID_ATI &&
             dev->device == PCI_DEVICE_ID_ATI_SBX00_SMBUS &&
             dev->revision >= 0x40) ||
            dev->vendor == PCI_VENDOR_ID_AMD) {
+               is_sb800 = true;
+
                if (!request_region(SB800_PIIX4_SMB_IDX, 2, "smba_idx")) {
                        dev_err(&dev->dev,
                        "SMBus base address index region 0x%x already in use!\n",
@@ -756,7 +749,7 @@ static int piix4_probe(struct pci_dev *dev, const struct pci_device_id *id)
                        return retval;
 
                /* Try to register main SMBus adapter, give up if we can't */
-               retval = piix4_add_adapter(dev, retval, "main",
+               retval = piix4_add_adapter(dev, retval, false, 0, "",
                                           &piix4_main_adapters[0]);
                if (retval < 0)
                        return retval;
@@ -783,7 +776,8 @@ static int piix4_probe(struct pci_dev *dev, const struct pci_device_id *id)
        if (retval > 0) {
                /* Try to add the aux adapter if it exists,
                 * piix4_add_adapter will clean up if this fails */
-               piix4_add_adapter(dev, retval, piix4_aux_port_name_sb800,
+               piix4_add_adapter(dev, retval, false, 0,
+                                 is_sb800 ? piix4_aux_port_name_sb800 : "",
                                  &piix4_aux_adapter);
        }
 
@@ -798,10 +792,8 @@ static void piix4_adap_remove(struct i2c_adapter *adap)
                i2c_del_adapter(adap);
                if (adapdata->port == 0) {
                        release_region(adapdata->smba, SMBIOSIZE);
-                       if (adapdata->sb800_main) {
-                               kfree(adapdata->mutex);
+                       if (adapdata->sb800_main)
                                release_region(SB800_PIIX4_SMB_IDX, 2);
-                       }
                }
                kfree(adapdata);
                kfree(adap);
index cb32b59..36607d5 100644 (file)
@@ -43,7 +43,7 @@ int adis_update_scan_mode(struct iio_dev *indio_dev,
                return -ENOMEM;
 
        rx = adis->buffer;
-       tx = rx + indio_dev->scan_bytes;
+       tx = rx + scan_count;
 
        spi_message_init(&adis->msg);
 
index 539b0de..e5e2239 100644 (file)
@@ -2049,7 +2049,7 @@ static void do_attach(struct iommu_dev_data *dev_data,
        /* Update device table */
        set_dte_entry(dev_data->devid, domain, ats);
        if (alias != dev_data->devid)
-               set_dte_entry(dev_data->devid, domain, ats);
+               set_dte_entry(alias, domain, ats);
 
        device_flush_dte(dev_data);
 }
index ac73876..986a53e 100644 (file)
@@ -1489,7 +1489,7 @@ static void iommu_disable_dev_iotlb(struct device_domain_info *info)
 {
        struct pci_dev *pdev;
 
-       if (dev_is_pci(info->dev))
+       if (!dev_is_pci(info->dev))
                return;
 
        pdev = to_pci_dev(info->dev);
index 8bbcbfe..381ca5a 100644 (file)
@@ -25,6 +25,7 @@
 #include <linux/sizes.h>
 #include <linux/slab.h>
 #include <linux/types.h>
+#include <linux/dma-mapping.h>
 
 #include <asm/barrier.h>
 
index 715923d..fb50911 100644 (file)
@@ -159,6 +159,7 @@ config TB10X_IRQC
 config TS4800_IRQ
        tristate "TS-4800 IRQ controller"
        select IRQ_DOMAIN
+       depends on HAS_IOMEM
        help
          Support for the TS-4800 FPGA IRQ controller
 
index b12a5d5..37199b9 100644 (file)
@@ -86,7 +86,7 @@ int aic_common_set_priority(int priority, unsigned *val)
            priority > AT91_AIC_IRQ_MAX_PRIORITY)
                return -EINVAL;
 
-       *val &= AT91_AIC_PRIOR;
+       *val &= ~AT91_AIC_PRIOR;
        *val |= priority;
 
        return 0;
index e23d1d1..3447549 100644 (file)
@@ -875,6 +875,7 @@ static int its_alloc_tables(const char *node_name, struct its_node *its)
                }
 
                alloc_size = (1 << order) * PAGE_SIZE;
+retry_alloc_baser:
                alloc_pages = (alloc_size / psz);
                if (alloc_pages > GITS_BASER_PAGES_MAX) {
                        alloc_pages = GITS_BASER_PAGES_MAX;
@@ -938,13 +939,16 @@ retry_baser:
                         * size and retry. If we reach 4K, then
                         * something is horribly wrong...
                         */
+                       free_pages((unsigned long)base, order);
+                       its->tables[i] = NULL;
+
                        switch (psz) {
                        case SZ_16K:
                                psz = SZ_4K;
-                               goto retry_baser;
+                               goto retry_alloc_baser;
                        case SZ_64K:
                                psz = SZ_16K;
-                               goto retry_baser;
+                               goto retry_alloc_baser;
                        }
                }
 
index c22e2d4..efe5084 100644 (file)
@@ -241,6 +241,7 @@ static int __init asm9260_of_init(struct device_node *np,
                writel(0, icoll_priv.intr + i);
 
        icoll_add_domain(np, ASM9260_NUM_IRQS);
+       set_handle_irq(icoll_handle_irq);
 
        return 0;
 }
index c71914e..5dc5a76 100644 (file)
@@ -605,7 +605,7 @@ err:
        return ERR_PTR(ret);
 }
 
-static struct s3c_irq_data init_eint[32] = {
+static struct s3c_irq_data __maybe_unused init_eint[32] = {
        { .type = S3C_IRQTYPE_NONE, }, /* reserved */
        { .type = S3C_IRQTYPE_NONE, }, /* reserved */
        { .type = S3C_IRQTYPE_NONE, }, /* reserved */
index 5d62373..2ed30f0 100644 (file)
@@ -1,6 +1,10 @@
+config NVME_CORE
+       tristate
+
 config BLK_DEV_NVME
        tristate "NVM Express block device"
        depends on PCI && BLOCK
+       select NVME_CORE
        ---help---
          The NVM Express driver is for solid state drives directly
          connected to the PCI or PCI Express bus.  If you know you
@@ -11,7 +15,7 @@ config BLK_DEV_NVME
 
 config BLK_DEV_NVME_SCSI
        bool "SCSI emulation for NVMe device nodes"
-       depends on BLK_DEV_NVME
+       depends on NVME_CORE
        ---help---
          This adds support for the SG_IO ioctl on the NVMe character
          and block devices nodes, as well a a translation for a small
index 51bf908..9a3ca89 100644 (file)
@@ -1,6 +1,8 @@
+obj-$(CONFIG_NVME_CORE)                        += nvme-core.o
+obj-$(CONFIG_BLK_DEV_NVME)             += nvme.o
 
-obj-$(CONFIG_BLK_DEV_NVME)     += nvme.o
+nvme-core-y                            := core.o
+nvme-core-$(CONFIG_BLK_DEV_NVME_SCSI)  += scsi.o
+nvme-core-$(CONFIG_NVM)                        += lightnvm.o
 
-lightnvm-$(CONFIG_NVM)                 := lightnvm.o
-nvme-y                                 += core.o pci.o $(lightnvm-y)
-nvme-$(CONFIG_BLK_DEV_NVME_SCSI)        += scsi.o
+nvme-y                                 += pci.o
index c5bf001..f08dcce 100644 (file)
 
 #define NVME_MINORS            (1U << MINORBITS)
 
+unsigned char admin_timeout = 60;
+module_param(admin_timeout, byte, 0644);
+MODULE_PARM_DESC(admin_timeout, "timeout in seconds for admin commands");
+EXPORT_SYMBOL_GPL(admin_timeout);
+
+unsigned char nvme_io_timeout = 30;
+module_param_named(io_timeout, nvme_io_timeout, byte, 0644);
+MODULE_PARM_DESC(io_timeout, "timeout in seconds for I/O");
+EXPORT_SYMBOL_GPL(nvme_io_timeout);
+
+unsigned char shutdown_timeout = 5;
+module_param(shutdown_timeout, byte, 0644);
+MODULE_PARM_DESC(shutdown_timeout, "timeout in seconds for controller shutdown");
+
 static int nvme_major;
 module_param(nvme_major, int, 0);
 
@@ -40,7 +54,7 @@ static int nvme_char_major;
 module_param(nvme_char_major, int, 0);
 
 static LIST_HEAD(nvme_ctrl_list);
-DEFINE_SPINLOCK(dev_list_lock);
+static DEFINE_SPINLOCK(dev_list_lock);
 
 static struct class *nvme_class;
 
@@ -71,11 +85,21 @@ static struct nvme_ns *nvme_get_ns_from_disk(struct gendisk *disk)
 
        spin_lock(&dev_list_lock);
        ns = disk->private_data;
-       if (ns && !kref_get_unless_zero(&ns->kref))
-               ns = NULL;
+       if (ns) {
+               if (!kref_get_unless_zero(&ns->kref))
+                       goto fail;
+               if (!try_module_get(ns->ctrl->ops->module))
+                       goto fail_put_ns;
+       }
        spin_unlock(&dev_list_lock);
 
        return ns;
+
+fail_put_ns:
+       kref_put(&ns->kref, nvme_free_ns);
+fail:
+       spin_unlock(&dev_list_lock);
+       return NULL;
 }
 
 void nvme_requeue_req(struct request *req)
@@ -88,6 +112,7 @@ void nvme_requeue_req(struct request *req)
                blk_mq_kick_requeue_list(req->q);
        spin_unlock_irqrestore(req->q->queue_lock, flags);
 }
+EXPORT_SYMBOL_GPL(nvme_requeue_req);
 
 struct request *nvme_alloc_request(struct request_queue *q,
                struct nvme_command *cmd, unsigned int flags)
@@ -107,17 +132,18 @@ struct request *nvme_alloc_request(struct request_queue *q,
 
        req->cmd = (unsigned char *)cmd;
        req->cmd_len = sizeof(struct nvme_command);
-       req->special = (void *)0;
 
        return req;
 }
+EXPORT_SYMBOL_GPL(nvme_alloc_request);
 
 /*
  * Returns 0 on success.  If the result is negative, it's a Linux error code;
  * if the result is positive, it's an NVM Express status code
  */
 int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
-               void *buffer, unsigned bufflen, u32 *result, unsigned timeout)
+               struct nvme_completion *cqe, void *buffer, unsigned bufflen,
+               unsigned timeout)
 {
        struct request *req;
        int ret;
@@ -127,6 +153,7 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
                return PTR_ERR(req);
 
        req->timeout = timeout ? timeout : ADMIN_TIMEOUT;
+       req->special = cqe;
 
        if (buffer && bufflen) {
                ret = blk_rq_map_kern(q, req, buffer, bufflen, GFP_KERNEL);
@@ -135,8 +162,6 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
        }
 
        blk_execute_rq(req->q, NULL, req, 0);
-       if (result)
-               *result = (u32)(uintptr_t)req->special;
        ret = req->errors;
  out:
        blk_mq_free_request(req);
@@ -146,8 +171,9 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
 int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
                void *buffer, unsigned bufflen)
 {
-       return __nvme_submit_sync_cmd(q, cmd, buffer, bufflen, NULL, 0);
+       return __nvme_submit_sync_cmd(q, cmd, NULL, buffer, bufflen, 0);
 }
+EXPORT_SYMBOL_GPL(nvme_submit_sync_cmd);
 
 int __nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd,
                void __user *ubuffer, unsigned bufflen,
@@ -155,6 +181,7 @@ int __nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd,
                u32 *result, unsigned timeout)
 {
        bool write = cmd->common.opcode & 1;
+       struct nvme_completion cqe;
        struct nvme_ns *ns = q->queuedata;
        struct gendisk *disk = ns ? ns->disk : NULL;
        struct request *req;
@@ -167,6 +194,7 @@ int __nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd,
                return PTR_ERR(req);
 
        req->timeout = timeout ? timeout : ADMIN_TIMEOUT;
+       req->special = &cqe;
 
        if (ubuffer && bufflen) {
                ret = blk_rq_map_user(q, req, NULL, ubuffer, bufflen,
@@ -221,7 +249,7 @@ int __nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd,
        blk_execute_rq(req->q, disk, req, 0);
        ret = req->errors;
        if (result)
-               *result = (u32)(uintptr_t)req->special;
+               *result = le32_to_cpu(cqe.result);
        if (meta && !ret && !write) {
                if (copy_to_user(meta_buffer, meta, meta_len))
                        ret = -EFAULT;
@@ -302,6 +330,8 @@ int nvme_get_features(struct nvme_ctrl *dev, unsigned fid, unsigned nsid,
                                        dma_addr_t dma_addr, u32 *result)
 {
        struct nvme_command c;
+       struct nvme_completion cqe;
+       int ret;
 
        memset(&c, 0, sizeof(c));
        c.features.opcode = nvme_admin_get_features;
@@ -309,13 +339,18 @@ int nvme_get_features(struct nvme_ctrl *dev, unsigned fid, unsigned nsid,
        c.features.prp1 = cpu_to_le64(dma_addr);
        c.features.fid = cpu_to_le32(fid);
 
-       return __nvme_submit_sync_cmd(dev->admin_q, &c, NULL, 0, result, 0);
+       ret = __nvme_submit_sync_cmd(dev->admin_q, &c, &cqe, NULL, 0, 0);
+       if (ret >= 0)
+               *result = le32_to_cpu(cqe.result);
+       return ret;
 }
 
 int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11,
                                        dma_addr_t dma_addr, u32 *result)
 {
        struct nvme_command c;
+       struct nvme_completion cqe;
+       int ret;
 
        memset(&c, 0, sizeof(c));
        c.features.opcode = nvme_admin_set_features;
@@ -323,7 +358,10 @@ int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11,
        c.features.fid = cpu_to_le32(fid);
        c.features.dword11 = cpu_to_le32(dword11);
 
-       return __nvme_submit_sync_cmd(dev->admin_q, &c, NULL, 0, result, 0);
+       ret = __nvme_submit_sync_cmd(dev->admin_q, &c, &cqe, NULL, 0, 0);
+       if (ret >= 0)
+               *result = le32_to_cpu(cqe.result);
+       return ret;
 }
 
 int nvme_get_log_page(struct nvme_ctrl *dev, struct nvme_smart_log **log)
@@ -363,6 +401,7 @@ int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count)
        *count = min(*count, nr_io_queues);
        return 0;
 }
+EXPORT_SYMBOL_GPL(nvme_set_queue_count);
 
 static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
 {
@@ -499,7 +538,10 @@ static int nvme_open(struct block_device *bdev, fmode_t mode)
 
 static void nvme_release(struct gendisk *disk, fmode_t mode)
 {
-       nvme_put_ns(disk->private_data);
+       struct nvme_ns *ns = disk->private_data;
+
+       module_put(ns->ctrl->ops->module);
+       nvme_put_ns(ns);
 }
 
 static int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo)
@@ -557,8 +599,8 @@ static int nvme_revalidate_disk(struct gendisk *disk)
        unsigned short bs;
 
        if (nvme_identify_ns(ns->ctrl, ns->ns_id, &id)) {
-               dev_warn(ns->ctrl->dev, "%s: Identify failure nvme%dn%d\n",
-                               __func__, ns->ctrl->instance, ns->ns_id);
+               dev_warn(disk_to_dev(ns->disk), "%s: Identify failure\n",
+                               __func__);
                return -ENODEV;
        }
        if (id->ncap == 0) {
@@ -568,7 +610,7 @@ static int nvme_revalidate_disk(struct gendisk *disk)
 
        if (nvme_nvm_ns_supported(ns, id) && ns->type != NVME_NS_LIGHTNVM) {
                if (nvme_nvm_register(ns->queue, disk->disk_name)) {
-                       dev_warn(ns->ctrl->dev,
+                       dev_warn(disk_to_dev(ns->disk),
                                "%s: LightNVM init failure\n", __func__);
                        kfree(id);
                        return -ENODEV;
@@ -741,7 +783,7 @@ static int nvme_wait_ready(struct nvme_ctrl *ctrl, u64 cap, bool enabled)
                if (fatal_signal_pending(current))
                        return -EINTR;
                if (time_after(jiffies, timeout)) {
-                       dev_err(ctrl->dev,
+                       dev_err(ctrl->device,
                                "Device not ready; aborting %s\n", enabled ?
                                                "initialisation" : "reset");
                        return -ENODEV;
@@ -769,6 +811,7 @@ int nvme_disable_ctrl(struct nvme_ctrl *ctrl, u64 cap)
                return ret;
        return nvme_wait_ready(ctrl, cap, false);
 }
+EXPORT_SYMBOL_GPL(nvme_disable_ctrl);
 
 int nvme_enable_ctrl(struct nvme_ctrl *ctrl, u64 cap)
 {
@@ -781,7 +824,7 @@ int nvme_enable_ctrl(struct nvme_ctrl *ctrl, u64 cap)
        int ret;
 
        if (page_shift < dev_page_min) {
-               dev_err(ctrl->dev,
+               dev_err(ctrl->device,
                        "Minimum device page size %u too large for host (%u)\n",
                        1 << dev_page_min, 1 << page_shift);
                return -ENODEV;
@@ -800,6 +843,7 @@ int nvme_enable_ctrl(struct nvme_ctrl *ctrl, u64 cap)
                return ret;
        return nvme_wait_ready(ctrl, cap, true);
 }
+EXPORT_SYMBOL_GPL(nvme_enable_ctrl);
 
 int nvme_shutdown_ctrl(struct nvme_ctrl *ctrl)
 {
@@ -822,7 +866,7 @@ int nvme_shutdown_ctrl(struct nvme_ctrl *ctrl)
                if (fatal_signal_pending(current))
                        return -EINTR;
                if (time_after(jiffies, timeout)) {
-                       dev_err(ctrl->dev,
+                       dev_err(ctrl->device,
                                "Device shutdown incomplete; abort shutdown\n");
                        return -ENODEV;
                }
@@ -830,6 +874,7 @@ int nvme_shutdown_ctrl(struct nvme_ctrl *ctrl)
 
        return ret;
 }
+EXPORT_SYMBOL_GPL(nvme_shutdown_ctrl);
 
 /*
  * Initialize the cached copies of the Identify data and various controller
@@ -844,13 +889,13 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
 
        ret = ctrl->ops->reg_read32(ctrl, NVME_REG_VS, &ctrl->vs);
        if (ret) {
-               dev_err(ctrl->dev, "Reading VS failed (%d)\n", ret);
+               dev_err(ctrl->device, "Reading VS failed (%d)\n", ret);
                return ret;
        }
 
        ret = ctrl->ops->reg_read64(ctrl, NVME_REG_CAP, &cap);
        if (ret) {
-               dev_err(ctrl->dev, "Reading CAP failed (%d)\n", ret);
+               dev_err(ctrl->device, "Reading CAP failed (%d)\n", ret);
                return ret;
        }
        page_shift = NVME_CAP_MPSMIN(cap) + 12;
@@ -860,13 +905,14 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
 
        ret = nvme_identify_ctrl(ctrl, &id);
        if (ret) {
-               dev_err(ctrl->dev, "Identify Controller failed (%d)\n", ret);
+               dev_err(ctrl->device, "Identify Controller failed (%d)\n", ret);
                return -EIO;
        }
 
        ctrl->oncs = le16_to_cpup(&id->oncs);
        atomic_set(&ctrl->abort_limit, id->acl + 1);
        ctrl->vwc = id->vwc;
+       ctrl->cntlid = le16_to_cpup(&id->cntlid);
        memcpy(ctrl->serial, id->sn, sizeof(id->sn));
        memcpy(ctrl->model, id->mn, sizeof(id->mn));
        memcpy(ctrl->firmware_rev, id->fr, sizeof(id->fr));
@@ -891,6 +937,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
        kfree(id);
        return 0;
 }
+EXPORT_SYMBOL_GPL(nvme_init_identify);
 
 static int nvme_dev_open(struct inode *inode, struct file *file)
 {
@@ -937,13 +984,13 @@ static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp)
 
        ns = list_first_entry(&ctrl->namespaces, struct nvme_ns, list);
        if (ns != list_last_entry(&ctrl->namespaces, struct nvme_ns, list)) {
-               dev_warn(ctrl->dev,
+               dev_warn(ctrl->device,
                        "NVME_IOCTL_IO_CMD not supported when multiple namespaces present!\n");
                ret = -EINVAL;
                goto out_unlock;
        }
 
-       dev_warn(ctrl->dev,
+       dev_warn(ctrl->device,
                "using deprecated NVME_IOCTL_IO_CMD ioctl on the char device!\n");
        kref_get(&ns->kref);
        mutex_unlock(&ctrl->namespaces_mutex);
@@ -969,7 +1016,7 @@ static long nvme_dev_ioctl(struct file *file, unsigned int cmd,
        case NVME_IOCTL_IO_CMD:
                return nvme_dev_user_cmd(ctrl, argp);
        case NVME_IOCTL_RESET:
-               dev_warn(ctrl->dev, "resetting controller\n");
+               dev_warn(ctrl->device, "resetting controller\n");
                return ctrl->ops->reset_ctrl(ctrl);
        case NVME_IOCTL_SUBSYS_RESET:
                return nvme_reset_subsystem(ctrl);
@@ -1053,7 +1100,7 @@ static const struct attribute_group nvme_ns_attr_group = {
        .is_visible     = nvme_attrs_are_visible,
 };
 
-#define nvme_show_function(field)                                              \
+#define nvme_show_str_function(field)                                          \
 static ssize_t  field##_show(struct device *dev,                               \
                            struct device_attribute *attr, char *buf)           \
 {                                                                              \
@@ -1062,15 +1109,26 @@ static ssize_t  field##_show(struct device *dev,                                \
 }                                                                              \
 static DEVICE_ATTR(field, S_IRUGO, field##_show, NULL);
 
-nvme_show_function(model);
-nvme_show_function(serial);
-nvme_show_function(firmware_rev);
+#define nvme_show_int_function(field)                                          \
+static ssize_t  field##_show(struct device *dev,                               \
+                           struct device_attribute *attr, char *buf)           \
+{                                                                              \
+        struct nvme_ctrl *ctrl = dev_get_drvdata(dev);                         \
+        return sprintf(buf, "%d\n", ctrl->field);      \
+}                                                                              \
+static DEVICE_ATTR(field, S_IRUGO, field##_show, NULL);
+
+nvme_show_str_function(model);
+nvme_show_str_function(serial);
+nvme_show_str_function(firmware_rev);
+nvme_show_int_function(cntlid);
 
 static struct attribute *nvme_dev_attrs[] = {
        &dev_attr_reset_controller.attr,
        &dev_attr_model.attr,
        &dev_attr_serial.attr,
        &dev_attr_firmware_rev.attr,
+       &dev_attr_cntlid.attr,
        NULL
 };
 
@@ -1296,6 +1354,7 @@ void nvme_scan_namespaces(struct nvme_ctrl *ctrl)
        mutex_unlock(&ctrl->namespaces_mutex);
        kfree(id);
 }
+EXPORT_SYMBOL_GPL(nvme_scan_namespaces);
 
 void nvme_remove_namespaces(struct nvme_ctrl *ctrl)
 {
@@ -1306,6 +1365,7 @@ void nvme_remove_namespaces(struct nvme_ctrl *ctrl)
                nvme_ns_remove(ns);
        mutex_unlock(&ctrl->namespaces_mutex);
 }
+EXPORT_SYMBOL_GPL(nvme_remove_namespaces);
 
 static DEFINE_IDA(nvme_instance_ida);
 
@@ -1337,13 +1397,14 @@ static void nvme_release_instance(struct nvme_ctrl *ctrl)
 }
 
 void nvme_uninit_ctrl(struct nvme_ctrl *ctrl)
- {
+{
        device_destroy(nvme_class, MKDEV(nvme_char_major, ctrl->instance));
 
        spin_lock(&dev_list_lock);
        list_del(&ctrl->node);
        spin_unlock(&dev_list_lock);
 }
+EXPORT_SYMBOL_GPL(nvme_uninit_ctrl);
 
 static void nvme_free_ctrl(struct kref *kref)
 {
@@ -1359,6 +1420,7 @@ void nvme_put_ctrl(struct nvme_ctrl *ctrl)
 {
        kref_put(&ctrl->kref, nvme_free_ctrl);
 }
+EXPORT_SYMBOL_GPL(nvme_put_ctrl);
 
 /*
  * Initialize a NVMe controller structures.  This needs to be called during
@@ -1383,14 +1445,13 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
 
        ctrl->device = device_create_with_groups(nvme_class, ctrl->dev,
                                MKDEV(nvme_char_major, ctrl->instance),
-                               dev, nvme_dev_attr_groups,
+                               ctrl, nvme_dev_attr_groups,
                                "nvme%d", ctrl->instance);
        if (IS_ERR(ctrl->device)) {
                ret = PTR_ERR(ctrl->device);
                goto out_release_instance;
        }
        get_device(ctrl->device);
-       dev_set_drvdata(ctrl->device, ctrl);
 
        spin_lock(&dev_list_lock);
        list_add_tail(&ctrl->node, &nvme_ctrl_list);
@@ -1402,6 +1463,7 @@ out_release_instance:
 out:
        return ret;
 }
+EXPORT_SYMBOL_GPL(nvme_init_ctrl);
 
 void nvme_stop_queues(struct nvme_ctrl *ctrl)
 {
@@ -1418,6 +1480,7 @@ void nvme_stop_queues(struct nvme_ctrl *ctrl)
        }
        mutex_unlock(&ctrl->namespaces_mutex);
 }
+EXPORT_SYMBOL_GPL(nvme_stop_queues);
 
 void nvme_start_queues(struct nvme_ctrl *ctrl)
 {
@@ -1431,6 +1494,7 @@ void nvme_start_queues(struct nvme_ctrl *ctrl)
        }
        mutex_unlock(&ctrl->namespaces_mutex);
 }
+EXPORT_SYMBOL_GPL(nvme_start_queues);
 
 int __init nvme_core_init(void)
 {
@@ -1470,3 +1534,8 @@ void nvme_core_exit(void)
        class_destroy(nvme_class);
        __unregister_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme");
 }
+
+MODULE_LICENSE("GPL");
+MODULE_VERSION("1.0");
+module_init(nvme_core_init);
+module_exit(nvme_core_exit);
index 4fb5bb7..9b71fa8 100644 (file)
@@ -77,6 +77,7 @@ struct nvme_ctrl {
        char serial[20];
        char model[40];
        char firmware_rev[8];
+       int cntlid;
 
        u32 ctrl_config;
 
@@ -117,6 +118,7 @@ struct nvme_ns {
 };
 
 struct nvme_ctrl_ops {
+       struct module *module;
        int (*reg_read32)(struct nvme_ctrl *ctrl, u32 off, u32 *val);
        int (*reg_write32)(struct nvme_ctrl *ctrl, u32 off, u32 val);
        int (*reg_read64)(struct nvme_ctrl *ctrl, u32 off, u64 *val);
@@ -247,7 +249,8 @@ void nvme_requeue_req(struct request *req);
 int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
                void *buf, unsigned bufflen);
 int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
-               void *buffer, unsigned bufflen,  u32 *result, unsigned timeout);
+               struct nvme_completion *cqe, void *buffer, unsigned bufflen,
+               unsigned timeout);
 int nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd,
                void __user *ubuffer, unsigned bufflen, u32 *result,
                unsigned timeout);
@@ -265,8 +268,6 @@ int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11,
                        dma_addr_t dma_addr, u32 *result);
 int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count);
 
-extern spinlock_t dev_list_lock;
-
 struct sg_io_hdr;
 
 int nvme_sg_io(struct nvme_ns *ns, struct sg_io_hdr __user *u_hdr);
index 72ef832..d47b087 100644 (file)
@@ -27,7 +27,6 @@
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/kdev_t.h>
-#include <linux/kthread.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/module.h>
@@ -39,6 +38,7 @@
 #include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/t10-pi.h>
+#include <linux/timer.h>
 #include <linux/types.h>
 #include <linux/io-64-nonatomic-lo-hi.h>
 #include <asm/unaligned.h>
 #define NVME_NR_AEN_COMMANDS   1
 #define NVME_AQ_BLKMQ_DEPTH    (NVME_AQ_DEPTH - NVME_NR_AEN_COMMANDS)
 
-unsigned char admin_timeout = 60;
-module_param(admin_timeout, byte, 0644);
-MODULE_PARM_DESC(admin_timeout, "timeout in seconds for admin commands");
-
-unsigned char nvme_io_timeout = 30;
-module_param_named(io_timeout, nvme_io_timeout, byte, 0644);
-MODULE_PARM_DESC(io_timeout, "timeout in seconds for I/O");
-
-unsigned char shutdown_timeout = 5;
-module_param(shutdown_timeout, byte, 0644);
-MODULE_PARM_DESC(shutdown_timeout, "timeout in seconds for controller shutdown");
-
 static int use_threaded_interrupts;
 module_param(use_threaded_interrupts, int, 0);
 
@@ -76,10 +64,7 @@ static bool use_cmb_sqes = true;
 module_param(use_cmb_sqes, bool, 0644);
 MODULE_PARM_DESC(use_cmb_sqes, "use controller's memory buffer for I/O SQes");
 
-static LIST_HEAD(dev_list);
-static struct task_struct *nvme_thread;
 static struct workqueue_struct *nvme_workq;
-static wait_queue_head_t nvme_kthread_wait;
 
 struct nvme_dev;
 struct nvme_queue;
@@ -93,7 +78,6 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown);
  * Represents an NVM Express device.  Each nvme_dev is a PCI function.
  */
 struct nvme_dev {
-       struct list_head node;
        struct nvme_queue **queues;
        struct blk_mq_tag_set tagset;
        struct blk_mq_tag_set admin_tagset;
@@ -111,6 +95,8 @@ struct nvme_dev {
        struct work_struct reset_work;
        struct work_struct scan_work;
        struct work_struct remove_work;
+       struct work_struct async_work;
+       struct timer_list watchdog_timer;
        struct mutex shutdown_lock;
        bool subsystem;
        void __iomem *cmb;
@@ -292,17 +278,20 @@ static void nvme_complete_async_event(struct nvme_dev *dev,
        u16 status = le16_to_cpu(cqe->status) >> 1;
        u32 result = le32_to_cpu(cqe->result);
 
-       if (status == NVME_SC_SUCCESS || status == NVME_SC_ABORT_REQ)
+       if (status == NVME_SC_SUCCESS || status == NVME_SC_ABORT_REQ) {
                ++dev->ctrl.event_limit;
+               queue_work(nvme_workq, &dev->async_work);
+       }
+
        if (status != NVME_SC_SUCCESS)
                return;
 
        switch (result & 0xff07) {
        case NVME_AER_NOTICE_NS_CHANGED:
-               dev_info(dev->dev, "rescanning\n");
+               dev_info(dev->ctrl.device, "rescanning\n");
                queue_work(nvme_workq, &dev->scan_work);
        default:
-               dev_warn(dev->dev, "async event result %08x\n", result);
+               dev_warn(dev->ctrl.device, "async event result %08x\n", result);
        }
 }
 
@@ -708,7 +697,7 @@ static void nvme_complete_rq(struct request *req)
        }
 
        if (unlikely(iod->aborted)) {
-               dev_warn(dev->dev,
+               dev_warn(dev->ctrl.device,
                        "completing aborted command with status: %04x\n",
                        req->errors);
        }
@@ -740,7 +729,7 @@ static void __nvme_process_cq(struct nvme_queue *nvmeq, unsigned int *tag)
                        *tag = -1;
 
                if (unlikely(cqe.command_id >= nvmeq->q_depth)) {
-                       dev_warn(nvmeq->q_dmadev,
+                       dev_warn(nvmeq->dev->ctrl.device,
                                "invalid id %d completed on queue %d\n",
                                cqe.command_id, le16_to_cpu(cqe.sq_id));
                        continue;
@@ -759,10 +748,8 @@ static void __nvme_process_cq(struct nvme_queue *nvmeq, unsigned int *tag)
                }
 
                req = blk_mq_tag_to_rq(*nvmeq->tags, cqe.command_id);
-               if (req->cmd_type == REQ_TYPE_DRV_PRIV) {
-                       u32 result = le32_to_cpu(cqe.result);
-                       req->special = (void *)(uintptr_t)result;
-               }
+               if (req->cmd_type == REQ_TYPE_DRV_PRIV && req->special)
+                       memcpy(req->special, &cqe, sizeof(cqe));
                blk_mq_complete_request(req, status >> 1);
 
        }
@@ -827,15 +814,22 @@ static int nvme_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag)
        return 0;
 }
 
-static void nvme_submit_async_event(struct nvme_dev *dev)
+static void nvme_async_event_work(struct work_struct *work)
 {
+       struct nvme_dev *dev = container_of(work, struct nvme_dev, async_work);
+       struct nvme_queue *nvmeq = dev->queues[0];
        struct nvme_command c;
 
        memset(&c, 0, sizeof(c));
        c.common.opcode = nvme_admin_async_event;
-       c.common.command_id = NVME_AQ_BLKMQ_DEPTH + --dev->ctrl.event_limit;
 
-       __nvme_submit_cmd(dev->queues[0], &c);
+       spin_lock_irq(&nvmeq->q_lock);
+       while (dev->ctrl.event_limit > 0) {
+               c.common.command_id = NVME_AQ_BLKMQ_DEPTH +
+                       --dev->ctrl.event_limit;
+               __nvme_submit_cmd(nvmeq, &c);
+       }
+       spin_unlock_irq(&nvmeq->q_lock);
 }
 
 static int adapter_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id)
@@ -905,12 +899,10 @@ static void abort_endio(struct request *req, int error)
 {
        struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
        struct nvme_queue *nvmeq = iod->nvmeq;
-       u32 result = (u32)(uintptr_t)req->special;
        u16 status = req->errors;
 
-       dev_warn(nvmeq->q_dmadev, "Abort status:%x result:%x", status, result);
+       dev_warn(nvmeq->dev->ctrl.device, "Abort status: 0x%x", status);
        atomic_inc(&nvmeq->dev->ctrl.abort_limit);
-
        blk_mq_free_request(req);
 }
 
@@ -929,7 +921,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
         * shutdown, so we return BLK_EH_HANDLED.
         */
        if (test_bit(NVME_CTRL_RESETTING, &dev->flags)) {
-               dev_warn(dev->dev,
+               dev_warn(dev->ctrl.device,
                         "I/O %d QID %d timeout, disable controller\n",
                         req->tag, nvmeq->qid);
                nvme_dev_disable(dev, false);
@@ -943,7 +935,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
         * returned to the driver, or if this is the admin queue.
         */
        if (!nvmeq->qid || iod->aborted) {
-               dev_warn(dev->dev,
+               dev_warn(dev->ctrl.device,
                         "I/O %d QID %d timeout, reset controller\n",
                         req->tag, nvmeq->qid);
                nvme_dev_disable(dev, false);
@@ -969,8 +961,9 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
        cmd.abort.cid = req->tag;
        cmd.abort.sqid = cpu_to_le16(nvmeq->qid);
 
-       dev_warn(nvmeq->q_dmadev, "I/O %d QID %d timeout, aborting\n",
-                                req->tag, nvmeq->qid);
+       dev_warn(nvmeq->dev->ctrl.device,
+               "I/O %d QID %d timeout, aborting\n",
+                req->tag, nvmeq->qid);
 
        abort_req = nvme_alloc_request(dev->ctrl.admin_q, &cmd,
                        BLK_MQ_REQ_NOWAIT);
@@ -999,7 +992,7 @@ static void nvme_cancel_queue_ios(struct request *req, void *data, bool reserved
        if (!blk_mq_request_started(req))
                return;
 
-       dev_warn(nvmeq->q_dmadev,
+       dev_warn(nvmeq->dev->ctrl.device,
                 "Cancelling I/O %d QID %d\n", req->tag, nvmeq->qid);
 
        status = NVME_SC_ABORT_REQ;
@@ -1154,9 +1147,6 @@ static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid,
        nvmeq->qid = qid;
        nvmeq->cq_vector = -1;
        dev->queues[qid] = nvmeq;
-
-       /* make sure queue descriptor is set before queue count, for kthread */
-       mb();
        dev->queue_count++;
 
        return nvmeq;
@@ -1335,53 +1325,31 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
        return result;
 }
 
-static int nvme_kthread(void *data)
-{
-       struct nvme_dev *dev, *next;
-
-       while (!kthread_should_stop()) {
-               set_current_state(TASK_INTERRUPTIBLE);
-               spin_lock(&dev_list_lock);
-               list_for_each_entry_safe(dev, next, &dev_list, node) {
-                       int i;
-                       u32 csts = readl(dev->bar + NVME_REG_CSTS);
-
-                       /*
-                        * Skip controllers currently under reset.
-                        */
-                       if (work_pending(&dev->reset_work) || work_busy(&dev->reset_work))
-                               continue;
-
-                       if ((dev->subsystem && (csts & NVME_CSTS_NSSRO)) ||
-                                                       csts & NVME_CSTS_CFS) {
-                               if (queue_work(nvme_workq, &dev->reset_work)) {
-                                       dev_warn(dev->dev,
-                                               "Failed status: %x, reset controller\n",
-                                               readl(dev->bar + NVME_REG_CSTS));
-                               }
-                               continue;
-                       }
-                       for (i = 0; i < dev->queue_count; i++) {
-                               struct nvme_queue *nvmeq = dev->queues[i];
-                               if (!nvmeq)
-                                       continue;
-                               spin_lock_irq(&nvmeq->q_lock);
-                               nvme_process_cq(nvmeq);
-
-                               while (i == 0 && dev->ctrl.event_limit > 0)
-                                       nvme_submit_async_event(dev);
-                               spin_unlock_irq(&nvmeq->q_lock);
-                       }
+static void nvme_watchdog_timer(unsigned long data)
+{
+       struct nvme_dev *dev = (struct nvme_dev *)data;
+       u32 csts = readl(dev->bar + NVME_REG_CSTS);
+
+       /*
+        * Skip controllers currently under reset.
+        */
+       if (!work_pending(&dev->reset_work) && !work_busy(&dev->reset_work) &&
+           ((csts & NVME_CSTS_CFS) ||
+            (dev->subsystem && (csts & NVME_CSTS_NSSRO)))) {
+               if (queue_work(nvme_workq, &dev->reset_work)) {
+                       dev_warn(dev->dev,
+                               "Failed status: 0x%x, reset controller.\n",
+                               csts);
                }
-               spin_unlock(&dev_list_lock);
-               schedule_timeout(round_jiffies_relative(HZ));
+               return;
        }
-       return 0;
+
+       mod_timer(&dev->watchdog_timer, round_jiffies(jiffies + HZ));
 }
 
 static int nvme_create_io_queues(struct nvme_dev *dev)
 {
-       unsigned i;
+       unsigned i, max;
        int ret = 0;
 
        for (i = dev->queue_count; i <= dev->max_qid; i++) {
@@ -1391,7 +1359,8 @@ static int nvme_create_io_queues(struct nvme_dev *dev)
                }
        }
 
-       for (i = dev->online_queues; i <= dev->queue_count - 1; i++) {
+       max = min(dev->max_qid, dev->queue_count - 1);
+       for (i = dev->online_queues; i <= max; i++) {
                ret = nvme_create_queue(dev->queues[i], i);
                if (ret) {
                        nvme_free_queues(dev, i);
@@ -1482,7 +1451,8 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
         * access to the admin queue, as that might be only way to fix them up.
         */
        if (result > 0) {
-               dev_err(dev->dev, "Could not set queue count (%d)\n", result);
+               dev_err(dev->ctrl.device,
+                       "Could not set queue count (%d)\n", result);
                nr_io_queues = 0;
                result = 0;
        }
@@ -1548,9 +1518,6 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
                adminq->cq_vector = -1;
                goto free_queues;
        }
-
-       /* Free previously allocated queues that are no longer usable */
-       nvme_free_queues(dev, nr_io_queues + 1);
        return nvme_create_io_queues(dev);
 
  free_queues:
@@ -1684,7 +1651,13 @@ static int nvme_dev_add(struct nvme_dev *dev)
                if (blk_mq_alloc_tag_set(&dev->tagset))
                        return 0;
                dev->ctrl.tagset = &dev->tagset;
+       } else {
+               blk_mq_update_nr_hw_queues(&dev->tagset, dev->online_queues - 1);
+
+               /* Free previously allocated queues that are no longer usable */
+               nvme_free_queues(dev, dev->online_queues);
        }
+
        queue_work(nvme_workq, &dev->scan_work);
        return 0;
 }
@@ -1785,56 +1758,12 @@ static void nvme_dev_unmap(struct nvme_dev *dev)
        }
 }
 
-static int nvme_dev_list_add(struct nvme_dev *dev)
-{
-       bool start_thread = false;
-
-       spin_lock(&dev_list_lock);
-       if (list_empty(&dev_list) && IS_ERR_OR_NULL(nvme_thread)) {
-               start_thread = true;
-               nvme_thread = NULL;
-       }
-       list_add(&dev->node, &dev_list);
-       spin_unlock(&dev_list_lock);
-
-       if (start_thread) {
-               nvme_thread = kthread_run(nvme_kthread, NULL, "nvme");
-               wake_up_all(&nvme_kthread_wait);
-       } else
-               wait_event_killable(nvme_kthread_wait, nvme_thread);
-
-       if (IS_ERR_OR_NULL(nvme_thread))
-               return nvme_thread ? PTR_ERR(nvme_thread) : -EINTR;
-
-       return 0;
-}
-
-/*
-* Remove the node from the device list and check
-* for whether or not we need to stop the nvme_thread.
-*/
-static void nvme_dev_list_remove(struct nvme_dev *dev)
-{
-       struct task_struct *tmp = NULL;
-
-       spin_lock(&dev_list_lock);
-       list_del_init(&dev->node);
-       if (list_empty(&dev_list) && !IS_ERR_OR_NULL(nvme_thread)) {
-               tmp = nvme_thread;
-               nvme_thread = NULL;
-       }
-       spin_unlock(&dev_list_lock);
-
-       if (tmp)
-               kthread_stop(tmp);
-}
-
 static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
 {
        int i;
        u32 csts = -1;
 
-       nvme_dev_list_remove(dev);
+       del_timer_sync(&dev->watchdog_timer);
 
        mutex_lock(&dev->shutdown_lock);
        if (dev->bar) {
@@ -1933,17 +1862,16 @@ static void nvme_reset_work(struct work_struct *work)
                goto free_tags;
 
        dev->ctrl.event_limit = NVME_NR_AEN_COMMANDS;
+       queue_work(nvme_workq, &dev->async_work);
 
-       result = nvme_dev_list_add(dev);
-       if (result)
-               goto remove;
+       mod_timer(&dev->watchdog_timer, round_jiffies(jiffies + HZ));
 
        /*
         * Keep the controller around but remove all namespaces if we don't have
         * any working I/O queue.
         */
        if (dev->online_queues < 2) {
-               dev_warn(dev->dev, "IO queues not created\n");
+               dev_warn(dev->ctrl.device, "IO queues not created\n");
                nvme_remove_namespaces(&dev->ctrl);
        } else {
                nvme_start_queues(&dev->ctrl);
@@ -1953,8 +1881,6 @@ static void nvme_reset_work(struct work_struct *work)
        clear_bit(NVME_CTRL_RESETTING, &dev->flags);
        return;
 
- remove:
-       nvme_dev_list_remove(dev);
  free_tags:
        nvme_dev_remove_admin(dev);
        blk_put_queue(dev->ctrl.admin_q);
@@ -1980,7 +1906,7 @@ static void nvme_remove_dead_ctrl_work(struct work_struct *work)
 
 static void nvme_remove_dead_ctrl(struct nvme_dev *dev)
 {
-       dev_warn(dev->dev, "Removing after probe failure\n");
+       dev_warn(dev->ctrl.device, "Removing after probe failure\n");
        kref_get(&dev->ctrl.kref);
        if (!schedule_work(&dev->remove_work))
                nvme_put_ctrl(&dev->ctrl);
@@ -2029,6 +1955,7 @@ static int nvme_pci_reset_ctrl(struct nvme_ctrl *ctrl)
 }
 
 static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = {
+       .module                 = THIS_MODULE,
        .reg_read32             = nvme_pci_reg_read32,
        .reg_write32            = nvme_pci_reg_write32,
        .reg_read64             = nvme_pci_reg_read64,
@@ -2061,10 +1988,12 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
        dev->dev = get_device(&pdev->dev);
        pci_set_drvdata(pdev, dev);
 
-       INIT_LIST_HEAD(&dev->node);
        INIT_WORK(&dev->scan_work, nvme_dev_scan);
        INIT_WORK(&dev->reset_work, nvme_reset_work);
        INIT_WORK(&dev->remove_work, nvme_remove_dead_ctrl_work);
+       INIT_WORK(&dev->async_work, nvme_async_event_work);
+       setup_timer(&dev->watchdog_timer, nvme_watchdog_timer,
+               (unsigned long)dev);
        mutex_init(&dev->shutdown_lock);
        init_completion(&dev->ioq_wait);
 
@@ -2077,6 +2006,8 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
        if (result)
                goto release_pools;
 
+       dev_info(dev->ctrl.device, "pci function %s\n", dev_name(&pdev->dev));
+
        queue_work(nvme_workq, &dev->reset_work);
        return 0;
 
@@ -2111,11 +2042,10 @@ static void nvme_remove(struct pci_dev *pdev)
 {
        struct nvme_dev *dev = pci_get_drvdata(pdev);
 
-       spin_lock(&dev_list_lock);
-       list_del_init(&dev->node);
-       spin_unlock(&dev_list_lock);
+       del_timer_sync(&dev->watchdog_timer);
 
        pci_set_drvdata(pdev, NULL);
+       flush_work(&dev->async_work);
        flush_work(&dev->reset_work);
        flush_work(&dev->scan_work);
        nvme_remove_namespaces(&dev->ctrl);
@@ -2160,7 +2090,7 @@ static pci_ers_result_t nvme_error_detected(struct pci_dev *pdev,
         * shutdown the controller to quiesce. The controller will be restarted
         * after the slot reset through driver's slot_reset callback.
         */
-       dev_warn(&pdev->dev, "error detected: state:%d\n", state);
+       dev_warn(dev->ctrl.device, "error detected: state:%d\n", state);
        switch (state) {
        case pci_channel_io_normal:
                return PCI_ERS_RESULT_CAN_RECOVER;
@@ -2177,7 +2107,7 @@ static pci_ers_result_t nvme_slot_reset(struct pci_dev *pdev)
 {
        struct nvme_dev *dev = pci_get_drvdata(pdev);
 
-       dev_info(&pdev->dev, "restart after slot reset\n");
+       dev_info(dev->ctrl.device, "restart after slot reset\n");
        pci_restore_state(pdev);
        queue_work(nvme_workq, &dev->reset_work);
        return PCI_ERS_RESULT_RECOVERED;
@@ -2225,34 +2155,20 @@ static int __init nvme_init(void)
 {
        int result;
 
-       init_waitqueue_head(&nvme_kthread_wait);
-
        nvme_workq = alloc_workqueue("nvme", WQ_UNBOUND | WQ_MEM_RECLAIM, 0);
        if (!nvme_workq)
                return -ENOMEM;
 
-       result = nvme_core_init();
-       if (result < 0)
-               goto kill_workq;
-
        result = pci_register_driver(&nvme_driver);
        if (result)
-               goto core_exit;
-       return 0;
-
- core_exit:
-       nvme_core_exit();
- kill_workq:
-       destroy_workqueue(nvme_workq);
+               destroy_workqueue(nvme_workq);
        return result;
 }
 
 static void __exit nvme_exit(void)
 {
        pci_unregister_driver(&nvme_driver);
-       nvme_core_exit();
        destroy_workqueue(nvme_workq);
-       BUG_ON(nvme_thread && !IS_ERR(nvme_thread));
        _nvme_check_size();
 }
 
index 706e3ff..7ee21ae 100644 (file)
@@ -679,18 +679,6 @@ u32 of_msi_map_rid(struct device *dev, struct device_node *msi_np, u32 rid_in)
        return __of_msi_map_rid(dev, &msi_np, rid_in);
 }
 
-static struct irq_domain *__of_get_msi_domain(struct device_node *np,
-                                             enum irq_domain_bus_token token)
-{
-       struct irq_domain *d;
-
-       d = irq_find_matching_host(np, token);
-       if (!d)
-               d = irq_find_host(np);
-
-       return d;
-}
-
 /**
  * of_msi_map_get_device_domain - Use msi-map to find the relevant MSI domain
  * @dev: device for which the mapping is to be done.
@@ -706,7 +694,7 @@ struct irq_domain *of_msi_map_get_device_domain(struct device *dev, u32 rid)
        struct device_node *np = NULL;
 
        __of_msi_map_rid(dev, &np, rid);
-       return __of_get_msi_domain(np, DOMAIN_BUS_PCI_MSI);
+       return irq_find_matching_host(np, DOMAIN_BUS_PCI_MSI);
 }
 
 /**
@@ -730,7 +718,7 @@ struct irq_domain *of_msi_get_domain(struct device *dev,
        /* Check for a single msi-parent property */
        msi_np = of_parse_phandle(np, "msi-parent", 0);
        if (msi_np && !of_property_read_bool(msi_np, "#msi-cells")) {
-               d = __of_get_msi_domain(msi_np, token);
+               d = irq_find_matching_host(msi_np, token);
                if (!d)
                        of_node_put(msi_np);
                return d;
@@ -744,7 +732,7 @@ struct irq_domain *of_msi_get_domain(struct device *dev,
                while (!of_parse_phandle_with_args(np, "msi-parent",
                                                   "#msi-cells",
                                                   index, &args)) {
-                       d = __of_get_msi_domain(args.np, token);
+                       d = irq_find_matching_host(args.np, token);
                        if (d)
                                return d;
 
index 5f2fda1..fa49f91 100644 (file)
@@ -953,8 +953,10 @@ int acpiphp_enable_slot(struct acpiphp_slot *slot)
 {
        pci_lock_rescan_remove();
 
-       if (slot->flags & SLOT_IS_GOING_AWAY)
+       if (slot->flags & SLOT_IS_GOING_AWAY) {
+               pci_unlock_rescan_remove();
                return -ENODEV;
+       }
 
        /* configure all functions */
        if (!(slot->flags & SLOT_ENABLED))
index c692dfe..50597f9 100644 (file)
@@ -139,11 +139,11 @@ static ssize_t chp_measurement_chars_read(struct file *filp,
 
        device = container_of(kobj, struct device, kobj);
        chp = to_channelpath(device);
-       if (!chp->cmg_chars)
+       if (chp->cmg == -1)
                return 0;
 
-       return memory_read_from_buffer(buf, count, &off,
-                               chp->cmg_chars, sizeof(struct cmg_chars));
+       return memory_read_from_buffer(buf, count, &off, &chp->cmg_chars,
+                                      sizeof(chp->cmg_chars));
 }
 
 static struct bin_attribute chp_measurement_chars_attr = {
@@ -416,7 +416,8 @@ static void chp_release(struct device *dev)
  * chp_update_desc - update channel-path description
  * @chp - channel-path
  *
- * Update the channel-path description of the specified channel-path.
+ * Update the channel-path description of the specified channel-path
+ * including channel measurement related information.
  * Return zero on success, non-zero otherwise.
  */
 int chp_update_desc(struct channel_path *chp)
@@ -428,8 +429,10 @@ int chp_update_desc(struct channel_path *chp)
                return rc;
 
        rc = chsc_determine_fmt1_channel_path_desc(chp->chpid, &chp->desc_fmt1);
+       if (rc)
+               return rc;
 
-       return rc;
+       return chsc_get_channel_measurement_chars(chp);
 }
 
 /**
@@ -466,14 +469,6 @@ int chp_new(struct chp_id chpid)
                ret = -ENODEV;
                goto out_free;
        }
-       /* Get channel-measurement characteristics. */
-       if (css_chsc_characteristics.scmc && css_chsc_characteristics.secm) {
-               ret = chsc_get_channel_measurement_chars(chp);
-               if (ret)
-                       goto out_free;
-       } else {
-               chp->cmg = -1;
-       }
        dev_set_name(&chp->dev, "chp%x.%02x", chpid.cssid, chpid.id);
 
        /* make it known to the system */
index 4efd5b8..af02322 100644 (file)
@@ -48,7 +48,7 @@ struct channel_path {
        /* Channel-measurement related stuff: */
        int cmg;
        int shared;
-       void *cmg_chars;
+       struct cmg_chars cmg_chars;
 };
 
 /* Return channel_path struct for given chpid. */
index a831d18..c424c0c 100644 (file)
@@ -14,6 +14,7 @@
 #include <linux/slab.h>
 #include <linux/init.h>
 #include <linux/device.h>
+#include <linux/mutex.h>
 #include <linux/pci.h>
 
 #include <asm/cio.h>
@@ -224,8 +225,9 @@ out_unreg:
 
 void chsc_chp_offline(struct chp_id chpid)
 {
-       char dbf_txt[15];
+       struct channel_path *chp = chpid_to_chp(chpid);
        struct chp_link link;
+       char dbf_txt[15];
 
        sprintf(dbf_txt, "chpr%x.%02x", chpid.cssid, chpid.id);
        CIO_TRACE_EVENT(2, dbf_txt);
@@ -236,6 +238,11 @@ void chsc_chp_offline(struct chp_id chpid)
        link.chpid = chpid;
        /* Wait until previous actions have settled. */
        css_wait_for_slow_path();
+
+       mutex_lock(&chp->lock);
+       chp_update_desc(chp);
+       mutex_unlock(&chp->lock);
+
        for_each_subchannel_staged(s390_subchannel_remove_chpid, NULL, &link);
 }
 
@@ -690,8 +697,9 @@ static void chsc_process_crw(struct crw *crw0, struct crw *crw1, int overflow)
 
 void chsc_chp_online(struct chp_id chpid)
 {
-       char dbf_txt[15];
+       struct channel_path *chp = chpid_to_chp(chpid);
        struct chp_link link;
+       char dbf_txt[15];
 
        sprintf(dbf_txt, "cadd%x.%02x", chpid.cssid, chpid.id);
        CIO_TRACE_EVENT(2, dbf_txt);
@@ -701,6 +709,11 @@ void chsc_chp_online(struct chp_id chpid)
                link.chpid = chpid;
                /* Wait until previous actions have settled. */
                css_wait_for_slow_path();
+
+               mutex_lock(&chp->lock);
+               chp_update_desc(chp);
+               mutex_unlock(&chp->lock);
+
                for_each_subchannel_staged(__s390_process_res_acc, NULL,
                                           &link);
                css_schedule_reprobe();
@@ -967,22 +980,19 @@ static void
 chsc_initialize_cmg_chars(struct channel_path *chp, u8 cmcv,
                          struct cmg_chars *chars)
 {
-       struct cmg_chars *cmg_chars;
        int i, mask;
 
-       cmg_chars = chp->cmg_chars;
        for (i = 0; i < NR_MEASUREMENT_CHARS; i++) {
                mask = 0x80 >> (i + 3);
                if (cmcv & mask)
-                       cmg_chars->values[i] = chars->values[i];
+                       chp->cmg_chars.values[i] = chars->values[i];
                else
-                       cmg_chars->values[i] = 0;
+                       chp->cmg_chars.values[i] = 0;
        }
 }
 
 int chsc_get_channel_measurement_chars(struct channel_path *chp)
 {
-       struct cmg_chars *cmg_chars;
        int ccode, ret;
 
        struct {
@@ -1006,10 +1016,11 @@ int chsc_get_channel_measurement_chars(struct channel_path *chp)
                u32 data[NR_MEASUREMENT_CHARS];
        } __attribute__ ((packed)) *scmc_area;
 
-       chp->cmg_chars = NULL;
-       cmg_chars = kmalloc(sizeof(*cmg_chars), GFP_KERNEL);
-       if (!cmg_chars)
-               return -ENOMEM;
+       chp->shared = -1;
+       chp->cmg = -1;
+
+       if (!css_chsc_characteristics.scmc || !css_chsc_characteristics.secm)
+               return 0;
 
        spin_lock_irq(&chsc_page_lock);
        memset(chsc_page, 0, PAGE_SIZE);
@@ -1031,25 +1042,19 @@ int chsc_get_channel_measurement_chars(struct channel_path *chp)
                              scmc_area->response.code);
                goto out;
        }
-       if (scmc_area->not_valid) {
-               chp->cmg = -1;
-               chp->shared = -1;
+       if (scmc_area->not_valid)
                goto out;
-       }
+
        chp->cmg = scmc_area->cmg;
        chp->shared = scmc_area->shared;
        if (chp->cmg != 2 && chp->cmg != 3) {
                /* No cmg-dependent data. */
                goto out;
        }
-       chp->cmg_chars = cmg_chars;
        chsc_initialize_cmg_chars(chp, scmc_area->cmcv,
                                  (struct cmg_chars *) &scmc_area->data);
 out:
        spin_unlock_irq(&chsc_page_lock);
-       if (!chp->cmg_chars)
-               kfree(cmg_chars);
-
        return ret;
 }
 
index 7b23f43..de1b6c1 100644 (file)
@@ -112,9 +112,10 @@ static inline int convert_error(struct zcrypt_device *zdev,
                atomic_set(&zcrypt_rescan_req, 1);
                zdev->online = 0;
                pr_err("Cryptographic device %x failed and was set offline\n",
-                      zdev->ap_dev->qid);
+                      AP_QID_DEVICE(zdev->ap_dev->qid));
                ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%drc%d",
-                       zdev->ap_dev->qid, zdev->online, ehdr->reply_code);
+                       AP_QID_DEVICE(zdev->ap_dev->qid), zdev->online,
+                       ehdr->reply_code);
                return -EAGAIN;
        case REP82_ERROR_TRANSPORT_FAIL:
        case REP82_ERROR_MACHINE_FAILURE:
@@ -123,16 +124,18 @@ static inline int convert_error(struct zcrypt_device *zdev,
                atomic_set(&zcrypt_rescan_req, 1);
                zdev->online = 0;
                pr_err("Cryptographic device %x failed and was set offline\n",
-                      zdev->ap_dev->qid);
+                      AP_QID_DEVICE(zdev->ap_dev->qid));
                ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%drc%d",
-                       zdev->ap_dev->qid, zdev->online, ehdr->reply_code);
+                       AP_QID_DEVICE(zdev->ap_dev->qid), zdev->online,
+                       ehdr->reply_code);
                return -EAGAIN;
        default:
                zdev->online = 0;
                pr_err("Cryptographic device %x failed and was set offline\n",
-                      zdev->ap_dev->qid);
+                      AP_QID_DEVICE(zdev->ap_dev->qid));
                ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%drc%d",
-                       zdev->ap_dev->qid, zdev->online, ehdr->reply_code);
+                       AP_QID_DEVICE(zdev->ap_dev->qid), zdev->online,
+                       ehdr->reply_code);
                return -EAGAIN; /* repeat the request on a different device. */
        }
 }
index 74edf29..eedfaa2 100644 (file)
@@ -336,9 +336,10 @@ static int convert_type80(struct zcrypt_device *zdev,
                /* The result is too short, the CEX2A card may not do that.. */
                zdev->online = 0;
                pr_err("Cryptographic device %x failed and was set offline\n",
-                      zdev->ap_dev->qid);
+                      AP_QID_DEVICE(zdev->ap_dev->qid));
                ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%drc%d",
-                              zdev->ap_dev->qid, zdev->online, t80h->code);
+                              AP_QID_DEVICE(zdev->ap_dev->qid),
+                              zdev->online, t80h->code);
 
                return -EAGAIN; /* repeat the request on a different device. */
        }
@@ -368,9 +369,9 @@ static int convert_response(struct zcrypt_device *zdev,
        default: /* Unknown response type, this should NEVER EVER happen */
                zdev->online = 0;
                pr_err("Cryptographic device %x failed and was set offline\n",
-                      zdev->ap_dev->qid);
+                      AP_QID_DEVICE(zdev->ap_dev->qid));
                ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%dfail",
-                              zdev->ap_dev->qid, zdev->online);
+                              AP_QID_DEVICE(zdev->ap_dev->qid), zdev->online);
                return -EAGAIN; /* repeat the request on a different device. */
        }
 }
index 9a2dd47..2195971 100644 (file)
@@ -572,9 +572,9 @@ static int convert_type86_ica(struct zcrypt_device *zdev,
                        return -EINVAL;
                zdev->online = 0;
                pr_err("Cryptographic device %x failed and was set offline\n",
-                      zdev->ap_dev->qid);
+                      AP_QID_DEVICE(zdev->ap_dev->qid));
                ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%drc%d",
-                              zdev->ap_dev->qid, zdev->online,
+                              AP_QID_DEVICE(zdev->ap_dev->qid), zdev->online,
                               msg->hdr.reply_code);
                return -EAGAIN; /* repeat the request on a different device. */
        }
@@ -715,9 +715,9 @@ static int convert_response_ica(struct zcrypt_device *zdev,
        default: /* Unknown response type, this should NEVER EVER happen */
                zdev->online = 0;
                pr_err("Cryptographic device %x failed and was set offline\n",
-                      zdev->ap_dev->qid);
+                      AP_QID_DEVICE(zdev->ap_dev->qid));
                ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%dfail",
-                              zdev->ap_dev->qid, zdev->online);
+                              AP_QID_DEVICE(zdev->ap_dev->qid), zdev->online);
                return -EAGAIN; /* repeat the request on a different device. */
        }
 }
@@ -747,9 +747,9 @@ static int convert_response_xcrb(struct zcrypt_device *zdev,
                xcRB->status = 0x0008044DL; /* HDD_InvalidParm */
                zdev->online = 0;
                pr_err("Cryptographic device %x failed and was set offline\n",
-                      zdev->ap_dev->qid);
+                      AP_QID_DEVICE(zdev->ap_dev->qid));
                ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%dfail",
-                              zdev->ap_dev->qid, zdev->online);
+                              AP_QID_DEVICE(zdev->ap_dev->qid), zdev->online);
                return -EAGAIN; /* repeat the request on a different device. */
        }
 }
@@ -773,9 +773,9 @@ static int convert_response_ep11_xcrb(struct zcrypt_device *zdev,
        default: /* Unknown response type, this should NEVER EVER happen */
                zdev->online = 0;
                pr_err("Cryptographic device %x failed and was set offline\n",
-                      zdev->ap_dev->qid);
+                      AP_QID_DEVICE(zdev->ap_dev->qid));
                ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%dfail",
-                              zdev->ap_dev->qid, zdev->online);
+                              AP_QID_DEVICE(zdev->ap_dev->qid), zdev->online);
                return -EAGAIN; /* repeat the request on a different device. */
        }
 }
@@ -800,9 +800,9 @@ static int convert_response_rng(struct zcrypt_device *zdev,
        default: /* Unknown response type, this should NEVER EVER happen */
                zdev->online = 0;
                pr_err("Cryptographic device %x failed and was set offline\n",
-                      zdev->ap_dev->qid);
+                      AP_QID_DEVICE(zdev->ap_dev->qid));
                ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%dfail",
-                              zdev->ap_dev->qid, zdev->online);
+                              AP_QID_DEVICE(zdev->ap_dev->qid), zdev->online);
                return -EAGAIN; /* repeat the request on a different device. */
        }
 }
index 37a0c71..b676618 100644 (file)
@@ -1,5 +1,7 @@
 config SCSI_HISI_SAS
        tristate "HiSilicon SAS"
+       depends on HAS_DMA
+       depends on ARM64 || COMPILE_TEST
        select SCSI_SAS_LIBSAS
        select BLK_DEV_INTEGRITY
        help
index 4e08d1c..bb669d3 100644 (file)
@@ -2893,7 +2893,7 @@ static int sd_revalidate_disk(struct gendisk *disk)
            sdkp->opt_xfer_blocks <= SD_DEF_XFER_BLOCKS &&
            sdkp->opt_xfer_blocks * sdp->sector_size >= PAGE_CACHE_SIZE)
                rw_max = q->limits.io_opt =
-                       logical_to_sectors(sdp, sdkp->opt_xfer_blocks);
+                       sdkp->opt_xfer_blocks * sdp->sector_size;
        else
                rw_max = BLK_DEF_MAX_SECTORS;
 
@@ -3268,8 +3268,8 @@ static int sd_suspend_common(struct device *dev, bool ignore_stop_errors)
        struct scsi_disk *sdkp = dev_get_drvdata(dev);
        int ret = 0;
 
-       if (!sdkp)
-               return 0;       /* this can happen */
+       if (!sdkp)      /* E.g.: runtime suspend following sd_remove() */
+               return 0;
 
        if (sdkp->WCE && sdkp->media_present) {
                sd_printk(KERN_NOTICE, sdkp, "Synchronizing SCSI cache\n");
@@ -3308,6 +3308,9 @@ static int sd_resume(struct device *dev)
 {
        struct scsi_disk *sdkp = dev_get_drvdata(dev);
 
+       if (!sdkp)      /* E.g.: runtime resume at the start of sd_probe() */
+               return 0;
+
        if (!sdkp->device->manage_start_stop)
                return 0;
 
index 8bd54a6..64c8674 100644 (file)
@@ -144,6 +144,9 @@ static int sr_runtime_suspend(struct device *dev)
 {
        struct scsi_cd *cd = dev_get_drvdata(dev);
 
+       if (!cd)        /* E.g.: runtime suspend following sr_remove() */
+               return 0;
+
        if (cd->media_present)
                return -EBUSY;
        else
@@ -985,6 +988,7 @@ static int sr_remove(struct device *dev)
        scsi_autopm_get_device(cd->device);
 
        del_gendisk(cd->disk);
+       dev_set_drvdata(dev, NULL);
 
        mutex_lock(&sr_ref_mutex);
        kref_put(&cd->kref, sr_kref_release);
index 79ac192..70b8f4f 100644 (file)
@@ -825,8 +825,7 @@ static void lcd_write_cmd_s(int cmd)
        lcd_send_serial(0x1F);  /* R/W=W, RS=0 */
        lcd_send_serial(cmd & 0x0F);
        lcd_send_serial((cmd >> 4) & 0x0F);
-       /* the shortest command takes at least 40 us */
-       usleep_range(40, 100);
+       udelay(40);             /* the shortest command takes at least 40 us */
        spin_unlock_irq(&pprt_lock);
 }
 
@@ -837,8 +836,7 @@ static void lcd_write_data_s(int data)
        lcd_send_serial(0x5F);  /* R/W=W, RS=1 */
        lcd_send_serial(data & 0x0F);
        lcd_send_serial((data >> 4) & 0x0F);
-       /* the shortest data takes at least 40 us */
-       usleep_range(40, 100);
+       udelay(40);             /* the shortest data takes at least 40 us */
        spin_unlock_irq(&pprt_lock);
 }
 
@@ -848,20 +846,19 @@ static void lcd_write_cmd_p8(int cmd)
        spin_lock_irq(&pprt_lock);
        /* present the data to the data port */
        w_dtr(pprt, cmd);
-       /* maintain the data during 20 us before the strobe */
-       usleep_range(20, 100);
+       udelay(20);     /* maintain the data during 20 us before the strobe */
 
        bits.e = BIT_SET;
        bits.rs = BIT_CLR;
        bits.rw = BIT_CLR;
        set_ctrl_bits();
 
-       usleep_range(40, 100);  /* maintain the strobe during 40 us */
+       udelay(40);     /* maintain the strobe during 40 us */
 
        bits.e = BIT_CLR;
        set_ctrl_bits();
 
-       usleep_range(120, 500); /* the shortest command takes at least 120 us */
+       udelay(120);    /* the shortest command takes at least 120 us */
        spin_unlock_irq(&pprt_lock);
 }
 
@@ -871,20 +868,19 @@ static void lcd_write_data_p8(int data)
        spin_lock_irq(&pprt_lock);
        /* present the data to the data port */
        w_dtr(pprt, data);
-       /* maintain the data during 20 us before the strobe */
-       usleep_range(20, 100);
+       udelay(20);     /* maintain the data during 20 us before the strobe */
 
        bits.e = BIT_SET;
        bits.rs = BIT_SET;
        bits.rw = BIT_CLR;
        set_ctrl_bits();
 
-       usleep_range(40, 100);  /* maintain the strobe during 40 us */
+       udelay(40);     /* maintain the strobe during 40 us */
 
        bits.e = BIT_CLR;
        set_ctrl_bits();
 
-       usleep_range(45, 100);  /* the shortest data takes at least 45 us */
+       udelay(45);     /* the shortest data takes at least 45 us */
        spin_unlock_irq(&pprt_lock);
 }
 
@@ -894,7 +890,7 @@ static void lcd_write_cmd_tilcd(int cmd)
        spin_lock_irq(&pprt_lock);
        /* present the data to the control port */
        w_ctr(pprt, cmd);
-       usleep_range(60, 120);
+       udelay(60);
        spin_unlock_irq(&pprt_lock);
 }
 
@@ -904,7 +900,7 @@ static void lcd_write_data_tilcd(int data)
        spin_lock_irq(&pprt_lock);
        /* present the data to the data port */
        w_dtr(pprt, data);
-       usleep_range(60, 120);
+       udelay(60);
        spin_unlock_irq(&pprt_lock);
 }
 
@@ -947,7 +943,7 @@ static void lcd_clear_fast_s(void)
                lcd_send_serial(0x5F);  /* R/W=W, RS=1 */
                lcd_send_serial(' ' & 0x0F);
                lcd_send_serial((' ' >> 4) & 0x0F);
-               usleep_range(40, 100);  /* the shortest data takes at least 40 us */
+               udelay(40);     /* the shortest data takes at least 40 us */
        }
        spin_unlock_irq(&pprt_lock);
 
@@ -971,7 +967,7 @@ static void lcd_clear_fast_p8(void)
                w_dtr(pprt, ' ');
 
                /* maintain the data during 20 us before the strobe */
-               usleep_range(20, 100);
+               udelay(20);
 
                bits.e = BIT_SET;
                bits.rs = BIT_SET;
@@ -979,13 +975,13 @@ static void lcd_clear_fast_p8(void)
                set_ctrl_bits();
 
                /* maintain the strobe during 40 us */
-               usleep_range(40, 100);
+               udelay(40);
 
                bits.e = BIT_CLR;
                set_ctrl_bits();
 
                /* the shortest data takes at least 45 us */
-               usleep_range(45, 100);
+               udelay(45);
        }
        spin_unlock_irq(&pprt_lock);
 
@@ -1007,7 +1003,7 @@ static void lcd_clear_fast_tilcd(void)
        for (pos = 0; pos < lcd.height * lcd.hwidth; pos++) {
                /* present the data to the data port */
                w_dtr(pprt, ' ');
-               usleep_range(60, 120);
+               udelay(60);
        }
 
        spin_unlock_irq(&pprt_lock);
index 63c59bc..30cf973 100644 (file)
@@ -264,8 +264,9 @@ static struct notifier_block vt_notifier_block = {
        .notifier_call = vt_notifier_call,
 };
 
-static unsigned char get_attributes(u16 *pos)
+static unsigned char get_attributes(struct vc_data *vc, u16 *pos)
 {
+       pos = screen_pos(vc, pos - (u16 *)vc->vc_origin, 1);
        return (u_char) (scr_readw(pos) >> 8);
 }
 
@@ -275,7 +276,7 @@ static void speakup_date(struct vc_data *vc)
        spk_y = spk_cy = vc->vc_y;
        spk_pos = spk_cp = vc->vc_pos;
        spk_old_attr = spk_attr;
-       spk_attr = get_attributes((u_short *) spk_pos);
+       spk_attr = get_attributes(vc, (u_short *)spk_pos);
 }
 
 static void bleep(u_short val)
@@ -469,8 +470,12 @@ static u16 get_char(struct vc_data *vc, u16 *pos, u_char *attribs)
        u16 ch = ' ';
 
        if (vc && pos) {
-               u16 w = scr_readw(pos);
-               u16 c = w & 0xff;
+               u16 w;
+               u16 c;
+
+               pos = screen_pos(vc, pos - (u16 *)vc->vc_origin, 1);
+               w = scr_readw(pos);
+               c = w & 0xff;
 
                if (w & vc->vc_hi_font_mask)
                        c |= 0x100;
@@ -746,7 +751,7 @@ static int get_line(struct vc_data *vc)
        u_char tmp2;
 
        spk_old_attr = spk_attr;
-       spk_attr = get_attributes((u_short *) spk_pos);
+       spk_attr = get_attributes(vc, (u_short *)spk_pos);
        for (i = 0; i < vc->vc_cols; i++) {
                buf[i] = (u_char) get_char(vc, (u_short *) tmp, &tmp2);
                tmp += 2;
@@ -811,7 +816,7 @@ static int say_from_to(struct vc_data *vc, u_long from, u_long to,
        u_short saved_punc_mask = spk_punc_mask;
 
        spk_old_attr = spk_attr;
-       spk_attr = get_attributes((u_short *) from);
+       spk_attr = get_attributes(vc, (u_short *)from);
        while (from < to) {
                buf[i++] = (char)get_char(vc, (u_short *) from, &tmp);
                from += 2;
@@ -886,7 +891,7 @@ static int get_sentence_buf(struct vc_data *vc, int read_punc)
        sentmarks[bn][0] = &sentbuf[bn][0];
        i = 0;
        spk_old_attr = spk_attr;
-       spk_attr = get_attributes((u_short *) start);
+       spk_attr = get_attributes(vc, (u_short *)start);
 
        while (start < end) {
                sentbuf[bn][i] = (char)get_char(vc, (u_short *) start, &tmp);
@@ -1585,7 +1590,7 @@ static int count_highlight_color(struct vc_data *vc)
                u16 *ptr;
 
                for (ptr = start; ptr < end; ptr++) {
-                       ch = get_attributes(ptr);
+                       ch = get_attributes(vc, ptr);
                        bg = (ch & 0x70) >> 4;
                        speakup_console[vc_num]->ht.bgcount[bg]++;
                }
index aa5ab6c..41ef099 100644 (file)
@@ -142,7 +142,9 @@ static void __speakup_paste_selection(struct work_struct *work)
        struct tty_ldisc *ld;
        DECLARE_WAITQUEUE(wait, current);
 
-       ld = tty_ldisc_ref_wait(tty);
+       ld = tty_ldisc_ref(tty);
+       if (!ld)
+               goto tty_unref;
        tty_buffer_lock_exclusive(&vc->port);
 
        add_wait_queue(&vc->paste_wait, &wait);
@@ -162,6 +164,7 @@ static void __speakup_paste_selection(struct work_struct *work)
 
        tty_buffer_unlock_exclusive(&vc->port);
        tty_ldisc_deref(ld);
+tty_unref:
        tty_kref_put(tty);
 }
 
index 3b5835b..a5bbb33 100644 (file)
@@ -6,6 +6,11 @@
 #include "spk_priv.h"
 #include "serialio.h"
 
+#include <linux/serial_core.h>
+/* WARNING:  Do not change this to <linux/serial.h> without testing that
+ * SERIAL_PORT_DFNS does get defined to the appropriate value. */
+#include <asm/serial.h>
+
 #ifndef SERIAL_PORT_DFNS
 #define SERIAL_PORT_DFNS
 #endif
@@ -23,9 +28,15 @@ const struct old_serial_port *spk_serial_init(int index)
        int baud = 9600, quot = 0;
        unsigned int cval = 0;
        int cflag = CREAD | HUPCL | CLOCAL | B9600 | CS8;
-       const struct old_serial_port *ser = rs_table + index;
+       const struct old_serial_port *ser;
        int err;
 
+       if (index >= ARRAY_SIZE(rs_table)) {
+               pr_info("no port info for ttyS%d\n", index);
+               return NULL;
+       }
+       ser = rs_table + index;
+
        /*      Divisor, bytesize and parity */
        quot = ser->baud_base / baud;
        cval = cflag & (CSIZE | CSTOPB);
index d9a5fc2..b280aba 100644 (file)
@@ -269,16 +269,13 @@ static void n_tty_check_throttle(struct tty_struct *tty)
 
 static void n_tty_check_unthrottle(struct tty_struct *tty)
 {
-       if (tty->driver->type == TTY_DRIVER_TYPE_PTY &&
-           tty->link->ldisc->ops->write_wakeup == n_tty_write_wakeup) {
+       if (tty->driver->type == TTY_DRIVER_TYPE_PTY) {
                if (chars_in_buffer(tty) > TTY_THRESHOLD_UNTHROTTLE)
                        return;
                if (!tty->count)
                        return;
                n_tty_kick_worker(tty);
-               n_tty_write_wakeup(tty->link);
-               if (waitqueue_active(&tty->link->write_wait))
-                       wake_up_interruptible_poll(&tty->link->write_wait, POLLOUT);
+               tty_wakeup(tty->link);
                return;
        }
 
index 4097f3f..e71ec78 100644 (file)
@@ -1379,6 +1379,9 @@ ce4100_serial_setup(struct serial_private *priv,
 #define PCI_DEVICE_ID_INTEL_BSW_UART1  0x228a
 #define PCI_DEVICE_ID_INTEL_BSW_UART2  0x228c
 
+#define PCI_DEVICE_ID_INTEL_BDW_UART1  0x9ce3
+#define PCI_DEVICE_ID_INTEL_BDW_UART2  0x9ce4
+
 #define BYT_PRV_CLK                    0x800
 #define BYT_PRV_CLK_EN                 (1 << 0)
 #define BYT_PRV_CLK_M_VAL_SHIFT                1
@@ -1461,11 +1464,13 @@ byt_serial_setup(struct serial_private *priv,
        switch (pdev->device) {
        case PCI_DEVICE_ID_INTEL_BYT_UART1:
        case PCI_DEVICE_ID_INTEL_BSW_UART1:
+       case PCI_DEVICE_ID_INTEL_BDW_UART1:
                rx_param->src_id = 3;
                tx_param->dst_id = 2;
                break;
        case PCI_DEVICE_ID_INTEL_BYT_UART2:
        case PCI_DEVICE_ID_INTEL_BSW_UART2:
+       case PCI_DEVICE_ID_INTEL_BDW_UART2:
                rx_param->src_id = 5;
                tx_param->dst_id = 4;
                break;
@@ -2062,6 +2067,20 @@ static struct pci_serial_quirk pci_serial_quirks[] __refdata = {
                .subdevice      = PCI_ANY_ID,
                .setup          = byt_serial_setup,
        },
+       {
+               .vendor         = PCI_VENDOR_ID_INTEL,
+               .device         = PCI_DEVICE_ID_INTEL_BDW_UART1,
+               .subvendor      = PCI_ANY_ID,
+               .subdevice      = PCI_ANY_ID,
+               .setup          = byt_serial_setup,
+       },
+       {
+               .vendor         = PCI_VENDOR_ID_INTEL,
+               .device         = PCI_DEVICE_ID_INTEL_BDW_UART2,
+               .subvendor      = PCI_ANY_ID,
+               .subdevice      = PCI_ANY_ID,
+               .setup          = byt_serial_setup,
+       },
        /*
         * ITE
         */
@@ -5506,6 +5525,16 @@ static struct pci_device_id serial_pci_tbl[] = {
                PCI_CLASS_COMMUNICATION_SERIAL << 8, 0xff0000,
                pbn_byt },
 
+       /* Intel Broadwell */
+       {       PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BDW_UART1,
+               PCI_ANY_ID,  PCI_ANY_ID,
+               PCI_CLASS_COMMUNICATION_SERIAL << 8, 0xff0000,
+               pbn_byt },
+       {       PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BDW_UART2,
+               PCI_ANY_ID,  PCI_ANY_ID,
+               PCI_CLASS_COMMUNICATION_SERIAL << 8, 0xff0000,
+               pbn_byt },
+
        /*
         * Intel Quark x1000
         */
index 892c923..5cec01c 100644 (file)
@@ -1463,13 +1463,13 @@ static int tty_reopen(struct tty_struct *tty)
 {
        struct tty_driver *driver = tty->driver;
 
-       if (!tty->count)
-               return -EIO;
-
        if (driver->type == TTY_DRIVER_TYPE_PTY &&
            driver->subtype == PTY_TYPE_MASTER)
                return -EIO;
 
+       if (!tty->count)
+               return -EAGAIN;
+
        if (test_bit(TTY_EXCLUSIVE, &tty->flags) && !capable(CAP_SYS_ADMIN))
                return -EBUSY;
 
@@ -2065,7 +2065,12 @@ retry_open:
 
                if (tty) {
                        mutex_unlock(&tty_mutex);
-                       tty_lock(tty);
+                       retval = tty_lock_interruptible(tty);
+                       if (retval) {
+                               if (retval == -EINTR)
+                                       retval = -ERESTARTSYS;
+                               goto err_unref;
+                       }
                        /* safe to drop the kref from tty_driver_lookup_tty() */
                        tty_kref_put(tty);
                        retval = tty_reopen(tty);
@@ -2083,7 +2088,11 @@ retry_open:
 
        if (IS_ERR(tty)) {
                retval = PTR_ERR(tty);
-               goto err_file;
+               if (retval != -EAGAIN || signal_pending(current))
+                       goto err_file;
+               tty_free_file(filp);
+               schedule();
+               goto retry_open;
        }
 
        tty_add_file(tty, filp);
@@ -2152,6 +2161,7 @@ retry_open:
        return 0;
 err_unlock:
        mutex_unlock(&tty_mutex);
+err_unref:
        /* after locks to avoid deadlock */
        if (!IS_ERR_OR_NULL(driver))
                tty_driver_kref_put(driver);
@@ -2648,6 +2658,28 @@ static int tiocsetd(struct tty_struct *tty, int __user *p)
        return ret;
 }
 
+/**
+ *     tiocgetd        -       get line discipline
+ *     @tty: tty device
+ *     @p: pointer to user data
+ *
+ *     Retrieves the line discipline id directly from the ldisc.
+ *
+ *     Locking: waits for ldisc reference (in case the line discipline
+ *             is changing or the tty is being hungup)
+ */
+
+static int tiocgetd(struct tty_struct *tty, int __user *p)
+{
+       struct tty_ldisc *ld;
+       int ret;
+
+       ld = tty_ldisc_ref_wait(tty);
+       ret = put_user(ld->ops->num, p);
+       tty_ldisc_deref(ld);
+       return ret;
+}
+
 /**
  *     send_break      -       performed time break
  *     @tty: device to break on
@@ -2874,7 +2906,7 @@ long tty_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
        case TIOCGSID:
                return tiocgsid(tty, real_tty, p);
        case TIOCGETD:
-               return put_user(tty->ldisc->ops->num, (int __user *)p);
+               return tiocgetd(tty, p);
        case TIOCSETD:
                return tiocsetd(tty, p);
        case TIOCVHANGUP:
index 77703a3..d2f3c4c 100644 (file)
@@ -19,6 +19,14 @@ void __lockfunc tty_lock(struct tty_struct *tty)
 }
 EXPORT_SYMBOL(tty_lock);
 
+int tty_lock_interruptible(struct tty_struct *tty)
+{
+       if (WARN(tty->magic != TTY_MAGIC, "L Bad %p\n", tty))
+               return -EIO;
+       tty_kref_get(tty);
+       return mutex_lock_interruptible(&tty->legacy_mutex);
+}
+
 void __lockfunc tty_unlock(struct tty_struct *tty)
 {
        if (WARN(tty->magic != TTY_MAGIC, "U Bad %p\n", tty))
index e7cbc44..bd51bdd 100644 (file)
@@ -4250,6 +4250,7 @@ unsigned short *screen_pos(struct vc_data *vc, int w_offset, int viewed)
 {
        return screenpos(vc, 2 * w_offset, viewed);
 }
+EXPORT_SYMBOL_GPL(screen_pos);
 
 void getconsxy(struct vc_data *vc, unsigned char *p)
 {
index 26ca4f9..fa4e239 100644 (file)
@@ -428,7 +428,8 @@ static void acm_read_bulk_callback(struct urb *urb)
                set_bit(rb->index, &acm->read_urbs_free);
                dev_dbg(&acm->data->dev, "%s - non-zero urb status: %d\n",
                                                        __func__, status);
-               return;
+               if ((status != -ENOENT) || (urb->actual_length == 0))
+                       return;
        }
 
        usb_mark_last_busy(acm->dev);
@@ -1404,6 +1405,8 @@ made_compressed_probe:
                                usb_sndbulkpipe(usb_dev, epwrite->bEndpointAddress),
                                NULL, acm->writesize, acm_write_bulk, snd);
                snd->urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
+               if (quirks & SEND_ZERO_PACKET)
+                       snd->urb->transfer_flags |= URB_ZERO_PACKET;
                snd->instance = acm;
        }
 
@@ -1838,6 +1841,11 @@ static const struct usb_device_id acm_ids[] = {
        },
 #endif
 
+       /*Samsung phone in firmware update mode */
+       { USB_DEVICE(0x04e8, 0x685d),
+       .driver_info = IGNORE_DEVICE,
+       },
+
        /* Exclude Infineon Flash Loader utility */
        { USB_DEVICE(0x058b, 0x0041),
        .driver_info = IGNORE_DEVICE,
@@ -1861,6 +1869,10 @@ static const struct usb_device_id acm_ids[] = {
        { USB_INTERFACE_INFO(USB_CLASS_COMM, USB_CDC_SUBCLASS_ACM,
                USB_CDC_ACM_PROTO_AT_CDMA) },
 
+       { USB_DEVICE(0x1519, 0x0452), /* Intel 7260 modem */
+       .driver_info = SEND_ZERO_PACKET,
+       },
+
        { }
 };
 
index dd9af38..ccfaba9 100644 (file)
@@ -134,3 +134,4 @@ struct acm {
 #define IGNORE_DEVICE                  BIT(5)
 #define QUIRK_CONTROL_LINE_STATE       BIT(6)
 #define CLEAR_HALT_CONDITIONS          BIT(7)
+#define SEND_ZERO_PACKET               BIT(8)
index 51b4369..350dcd9 100644 (file)
@@ -5401,7 +5401,6 @@ static int usb_reset_and_verify_device(struct usb_device *udev)
        }
 
        bos = udev->bos;
-       udev->bos = NULL;
 
        for (i = 0; i < SET_CONFIG_TRIES; ++i) {
 
@@ -5494,8 +5493,11 @@ done:
        usb_set_usb2_hardware_lpm(udev, 1);
        usb_unlocked_enable_lpm(udev);
        usb_enable_ltm(udev);
-       usb_release_bos_descriptor(udev);
-       udev->bos = bos;
+       /* release the new BOS descriptor allocated  by hub_port_init() */
+       if (udev->bos != bos) {
+               usb_release_bos_descriptor(udev);
+               udev->bos = bos;
+       }
        return 0;
 
 re_enumerate:
index 9b90ad7..987813b 100644 (file)
@@ -99,6 +99,7 @@ static const struct usb_device_id id_table[] = {
        { USB_DEVICE(0x10C4, 0x81AC) }, /* MSD Dash Hawk */
        { USB_DEVICE(0x10C4, 0x81AD) }, /* INSYS USB Modem */
        { USB_DEVICE(0x10C4, 0x81C8) }, /* Lipowsky Industrie Elektronik GmbH, Baby-JTAG */
+       { USB_DEVICE(0x10C4, 0x81D7) }, /* IAI Corp. RCB-CV-USB USB to RS485 Adaptor */
        { USB_DEVICE(0x10C4, 0x81E2) }, /* Lipowsky Industrie Elektronik GmbH, Baby-LIN */
        { USB_DEVICE(0x10C4, 0x81E7) }, /* Aerocomm Radio */
        { USB_DEVICE(0x10C4, 0x81E8) }, /* Zephyr Bioharness */
index a5a0376..8c660ae 100644 (file)
@@ -824,6 +824,7 @@ static const struct usb_device_id id_table_combined[] = {
        { USB_DEVICE(FTDI_VID, FTDI_TURTELIZER_PID),
                .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk },
        { USB_DEVICE(RATOC_VENDOR_ID, RATOC_PRODUCT_ID_USB60F) },
+       { USB_DEVICE(RATOC_VENDOR_ID, RATOC_PRODUCT_ID_SCU18) },
        { USB_DEVICE(FTDI_VID, FTDI_REU_TINY_PID) },
 
        /* Papouch devices based on FTDI chip */
index 67c6d44..a84df25 100644 (file)
  */
 #define RATOC_VENDOR_ID                0x0584
 #define RATOC_PRODUCT_ID_USB60F        0xb020
+#define RATOC_PRODUCT_ID_SCU18 0xb03a
 
 /*
  * Infineon Technologies
index e3c3f57..6196073 100644 (file)
@@ -368,6 +368,16 @@ static int mxu1_port_probe(struct usb_serial_port *port)
        return 0;
 }
 
+static int mxu1_port_remove(struct usb_serial_port *port)
+{
+       struct mxu1_port *mxport;
+
+       mxport = usb_get_serial_port_data(port);
+       kfree(mxport);
+
+       return 0;
+}
+
 static int mxu1_startup(struct usb_serial *serial)
 {
        struct mxu1_device *mxdev;
@@ -427,6 +437,14 @@ err_free_mxdev:
        return err;
 }
 
+static void mxu1_release(struct usb_serial *serial)
+{
+       struct mxu1_device *mxdev;
+
+       mxdev = usb_get_serial_data(serial);
+       kfree(mxdev);
+}
+
 static int mxu1_write_byte(struct usb_serial_port *port, u32 addr,
                           u8 mask, u8 byte)
 {
@@ -957,7 +975,9 @@ static struct usb_serial_driver mxu11x0_device = {
        .id_table               = mxu1_idtable,
        .num_ports              = 1,
        .port_probe             = mxu1_port_probe,
+       .port_remove            = mxu1_port_remove,
        .attach                 = mxu1_startup,
+       .release                = mxu1_release,
        .open                   = mxu1_open,
        .close                  = mxu1_close,
        .ioctl                  = mxu1_ioctl,
index f228060..db86e51 100644 (file)
@@ -268,6 +268,8 @@ static void option_instat_callback(struct urb *urb);
 #define TELIT_PRODUCT_CC864_SINGLE             0x1006
 #define TELIT_PRODUCT_DE910_DUAL               0x1010
 #define TELIT_PRODUCT_UE910_V2                 0x1012
+#define TELIT_PRODUCT_LE922_USBCFG0            0x1042
+#define TELIT_PRODUCT_LE922_USBCFG3            0x1043
 #define TELIT_PRODUCT_LE920                    0x1200
 #define TELIT_PRODUCT_LE910                    0x1201
 
@@ -615,6 +617,16 @@ static const struct option_blacklist_info telit_le920_blacklist = {
        .reserved = BIT(1) | BIT(5),
 };
 
+static const struct option_blacklist_info telit_le922_blacklist_usbcfg0 = {
+       .sendsetup = BIT(2),
+       .reserved = BIT(0) | BIT(1) | BIT(3),
+};
+
+static const struct option_blacklist_info telit_le922_blacklist_usbcfg3 = {
+       .sendsetup = BIT(0),
+       .reserved = BIT(1) | BIT(2) | BIT(3),
+};
+
 static const struct usb_device_id option_ids[] = {
        { USB_DEVICE(OPTION_VENDOR_ID, OPTION_PRODUCT_COLT) },
        { USB_DEVICE(OPTION_VENDOR_ID, OPTION_PRODUCT_RICOLA) },
@@ -1160,6 +1172,10 @@ static const struct usb_device_id option_ids[] = {
        { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_CC864_SINGLE) },
        { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_DE910_DUAL) },
        { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_UE910_V2) },
+       { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE922_USBCFG0),
+               .driver_info = (kernel_ulong_t)&telit_le922_blacklist_usbcfg0 },
+       { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE922_USBCFG3),
+               .driver_info = (kernel_ulong_t)&telit_le922_blacklist_usbcfg3 },
        { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE910),
                .driver_info = (kernel_ulong_t)&telit_le910_blacklist },
        { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE920),
@@ -1679,7 +1695,7 @@ static const struct usb_device_id option_ids[] = {
        { USB_DEVICE(CINTERION_VENDOR_ID, CINTERION_PRODUCT_EU3_P) },
        { USB_DEVICE(CINTERION_VENDOR_ID, CINTERION_PRODUCT_PH8),
                .driver_info = (kernel_ulong_t)&net_intf4_blacklist },
-       { USB_DEVICE(CINTERION_VENDOR_ID, CINTERION_PRODUCT_AHXX) },
+       { USB_DEVICE_INTERFACE_CLASS(CINTERION_VENDOR_ID, CINTERION_PRODUCT_AHXX, 0xff) },
        { USB_DEVICE(CINTERION_VENDOR_ID, CINTERION_PRODUCT_PLXX),
                .driver_info = (kernel_ulong_t)&net_intf4_blacklist },
        { USB_DEVICE(CINTERION_VENDOR_ID, CINTERION_PRODUCT_HC28_MDM) }, 
index 60afb39..337a0be 100644 (file)
@@ -544,6 +544,11 @@ static int treo_attach(struct usb_serial *serial)
                (serial->num_interrupt_in == 0))
                return 0;
 
+       if (serial->num_bulk_in < 2 || serial->num_interrupt_in < 2) {
+               dev_err(&serial->interface->dev, "missing endpoints\n");
+               return -ENODEV;
+       }
+
        /*
        * It appears that Treos and Kyoceras want to use the
        * 1st bulk in endpoint to communicate with the 2nd bulk out endpoint,
@@ -597,8 +602,10 @@ static int clie_5_attach(struct usb_serial *serial)
         */
 
        /* some sanity check */
-       if (serial->num_ports < 2)
-               return -1;
+       if (serial->num_bulk_out < 2) {
+               dev_err(&serial->interface->dev, "missing bulk out endpoints\n");
+               return -ENODEV;
+       }
 
        /* port 0 now uses the modified endpoint Address */
        port = serial->port[0];
index 82f25cc..ecca316 100644 (file)
@@ -123,8 +123,8 @@ struct iommu_group *vfio_iommu_group_get(struct device *dev)
        /*
         * With noiommu enabled, an IOMMU group will be created for a device
         * that doesn't already have one and doesn't have an iommu_ops on their
-        * bus.  We use iommu_present() again in the main code to detect these
-        * fake groups.
+        * bus.  We set iommudata simply to be able to identify these groups
+        * as special use and for reclamation later.
         */
        if (group || !noiommu || iommu_present(dev->bus))
                return group;
@@ -134,6 +134,7 @@ struct iommu_group *vfio_iommu_group_get(struct device *dev)
                return NULL;
 
        iommu_group_set_name(group, "vfio-noiommu");
+       iommu_group_set_iommudata(group, &noiommu, NULL);
        ret = iommu_group_add_device(group, dev);
        iommu_group_put(group);
        if (ret)
@@ -158,7 +159,7 @@ EXPORT_SYMBOL_GPL(vfio_iommu_group_get);
 void vfio_iommu_group_put(struct iommu_group *group, struct device *dev)
 {
 #ifdef CONFIG_VFIO_NOIOMMU
-       if (!iommu_present(dev->bus))
+       if (iommu_group_get_iommudata(group) == &noiommu)
                iommu_group_remove_device(dev);
 #endif
 
@@ -190,16 +191,10 @@ static long vfio_noiommu_ioctl(void *iommu_data,
        return -ENOTTY;
 }
 
-static int vfio_iommu_present(struct device *dev, void *unused)
-{
-       return iommu_present(dev->bus) ? 1 : 0;
-}
-
 static int vfio_noiommu_attach_group(void *iommu_data,
                                     struct iommu_group *iommu_group)
 {
-       return iommu_group_for_each_dev(iommu_group, NULL,
-                                       vfio_iommu_present) ? -EINVAL : 0;
+       return iommu_group_get_iommudata(iommu_group) == &noiommu ? 0 : -EINVAL;
 }
 
 static void vfio_noiommu_detach_group(void *iommu_data,
@@ -323,8 +318,7 @@ static void vfio_group_unlock_and_free(struct vfio_group *group)
 /**
  * Group objects - create, release, get, put, search
  */
-static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group,
-                                           bool iommu_present)
+static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group)
 {
        struct vfio_group *group, *tmp;
        struct device *dev;
@@ -342,7 +336,9 @@ static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group,
        atomic_set(&group->container_users, 0);
        atomic_set(&group->opened, 0);
        group->iommu_group = iommu_group;
-       group->noiommu = !iommu_present;
+#ifdef CONFIG_VFIO_NOIOMMU
+       group->noiommu = (iommu_group_get_iommudata(iommu_group) == &noiommu);
+#endif
 
        group->nb.notifier_call = vfio_iommu_group_notifier;
 
@@ -767,7 +763,7 @@ int vfio_add_group_dev(struct device *dev,
 
        group = vfio_group_get_from_iommu(iommu_group);
        if (!group) {
-               group = vfio_create_group(iommu_group, iommu_present(dev->bus));
+               group = vfio_create_group(iommu_group);
                if (IS_ERR(group)) {
                        iommu_group_put(iommu_group);
                        return PTR_ERR(group);
index 36205c2..f6bed86 100644 (file)
@@ -545,6 +545,7 @@ err_enable_device:
 static void virtio_pci_remove(struct pci_dev *pci_dev)
 {
        struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev);
+       struct device *dev = get_device(&vp_dev->vdev.dev);
 
        unregister_virtio_device(&vp_dev->vdev);
 
@@ -554,6 +555,7 @@ static void virtio_pci_remove(struct pci_dev *pci_dev)
                virtio_pci_modern_remove(vp_dev);
 
        pci_disable_device(pci_dev);
+       put_device(dev);
 }
 
 static struct pci_driver virtio_pci_driver = {
index 945fc43..4ac2ca8 100644 (file)
@@ -242,7 +242,7 @@ static int tmem_cleancache_init_shared_fs(char *uuid, size_t pagesize)
        return xen_tmem_new_pool(shared_uuid, TMEM_POOL_SHARED, pagesize);
 }
 
-static struct cleancache_ops tmem_cleancache_ops = {
+static const struct cleancache_ops tmem_cleancache_ops = {
        .put_page = tmem_cleancache_put_page,
        .get_page = tmem_cleancache_get_page,
        .invalidate_page = tmem_cleancache_flush_page,
index 88d9af3..5fb60ea 100644 (file)
@@ -328,8 +328,8 @@ static inline void __btrfs_queue_work(struct __btrfs_workqueue *wq,
                list_add_tail(&work->ordered_list, &wq->ordered_list);
                spin_unlock_irqrestore(&wq->list_lock, flags);
        }
-       queue_work(wq->normal_wq, &work->normal_work);
        trace_btrfs_work_queued(work);
+       queue_work(wq->normal_wq, &work->normal_work);
 }
 
 void btrfs_queue_work(struct btrfs_workqueue *wq,
index dd08e29..4545e2e 100644 (file)
@@ -182,6 +182,7 @@ static struct btrfs_lockdep_keyset {
        { .id = BTRFS_TREE_RELOC_OBJECTID,      .name_stem = "treloc"   },
        { .id = BTRFS_DATA_RELOC_TREE_OBJECTID, .name_stem = "dreloc"   },
        { .id = BTRFS_UUID_TREE_OBJECTID,       .name_stem = "uuid"     },
+       { .id = BTRFS_FREE_SPACE_TREE_OBJECTID, .name_stem = "free-space" },
        { .id = 0,                              .name_stem = "tree"     },
 };
 
@@ -1787,7 +1788,6 @@ static int cleaner_kthread(void *arg)
        int again;
        struct btrfs_trans_handle *trans;
 
-       set_freezable();
        do {
                again = 0;
 
index 393e36b..53dbeaf 100644 (file)
@@ -153,6 +153,20 @@ static inline u32 free_space_bitmap_size(u64 size, u32 sectorsize)
 
 static unsigned long *alloc_bitmap(u32 bitmap_size)
 {
+       void *mem;
+
+       /*
+        * The allocation size varies, observed numbers were < 4K up to 16K.
+        * Using vmalloc unconditionally would be too heavy, we'll try
+        * contiguous allocations first.
+        */
+       if  (bitmap_size <= PAGE_SIZE)
+               return kzalloc(bitmap_size, GFP_NOFS);
+
+       mem = kzalloc(bitmap_size, GFP_NOFS | __GFP_NOWARN);
+       if (mem)
+               return mem;
+
        return __vmalloc(bitmap_size, GFP_NOFS | __GFP_HIGHMEM | __GFP_ZERO,
                         PAGE_KERNEL);
 }
@@ -289,7 +303,7 @@ int convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans,
 
        ret = 0;
 out:
-       vfree(bitmap);
+       kvfree(bitmap);
        if (ret)
                btrfs_abort_transaction(trans, root, ret);
        return ret;
@@ -438,7 +452,7 @@ int convert_free_space_to_extents(struct btrfs_trans_handle *trans,
 
        ret = 0;
 out:
-       vfree(bitmap);
+       kvfree(bitmap);
        if (ret)
                btrfs_abort_transaction(trans, root, ret);
        return ret;
index e28f3d4..5f06eb1 100644 (file)
@@ -7116,21 +7116,41 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
        if (ret)
                return ERR_PTR(ret);
 
-       em = create_pinned_em(inode, start, ins.offset, start, ins.objectid,
-                             ins.offset, ins.offset, ins.offset, 0);
-       if (IS_ERR(em)) {
-               btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1);
-               return em;
-       }
-
+       /*
+        * Create the ordered extent before the extent map. This is to avoid
+        * races with the fast fsync path that would lead to it logging file
+        * extent items that point to disk extents that were not yet written to.
+        * The fast fsync path collects ordered extents into a local list and
+        * then collects all the new extent maps, so we must create the ordered
+        * extent first and make sure the fast fsync path collects any new
+        * ordered extents after collecting new extent maps as well.
+        * The fsync path simply can not rely on inode_dio_wait() because it
+        * causes deadlock with AIO.
+        */
        ret = btrfs_add_ordered_extent_dio(inode, start, ins.objectid,
                                           ins.offset, ins.offset, 0);
        if (ret) {
                btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1);
-               free_extent_map(em);
                return ERR_PTR(ret);
        }
 
+       em = create_pinned_em(inode, start, ins.offset, start, ins.objectid,
+                             ins.offset, ins.offset, ins.offset, 0);
+       if (IS_ERR(em)) {
+               struct btrfs_ordered_extent *oe;
+
+               btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1);
+               oe = btrfs_lookup_ordered_extent(inode, start);
+               ASSERT(oe);
+               if (WARN_ON(!oe))
+                       return em;
+               set_bit(BTRFS_ORDERED_IOERR, &oe->flags);
+               set_bit(BTRFS_ORDERED_IO_DONE, &oe->flags);
+               btrfs_remove_ordered_extent(inode, oe);
+               /* Once for our lookup and once for the ordered extents tree. */
+               btrfs_put_ordered_extent(oe);
+               btrfs_put_ordered_extent(oe);
+       }
        return em;
 }
 
index fd1c4d9..2bd0011 100644 (file)
@@ -575,7 +575,8 @@ static int is_cowonly_root(u64 root_objectid)
            root_objectid == BTRFS_TREE_LOG_OBJECTID ||
            root_objectid == BTRFS_CSUM_TREE_OBJECTID ||
            root_objectid == BTRFS_UUID_TREE_OBJECTID ||
-           root_objectid == BTRFS_QUOTA_TREE_OBJECTID)
+           root_objectid == BTRFS_QUOTA_TREE_OBJECTID ||
+           root_objectid == BTRFS_FREE_SPACE_TREE_OBJECTID)
                return 1;
        return 0;
 }
index e0ac859..539e7b5 100644 (file)
@@ -202,6 +202,7 @@ BTRFS_FEAT_ATTR_INCOMPAT(extended_iref, EXTENDED_IREF);
 BTRFS_FEAT_ATTR_INCOMPAT(raid56, RAID56);
 BTRFS_FEAT_ATTR_INCOMPAT(skinny_metadata, SKINNY_METADATA);
 BTRFS_FEAT_ATTR_INCOMPAT(no_holes, NO_HOLES);
+BTRFS_FEAT_ATTR_COMPAT_RO(free_space_tree, FREE_SPACE_TREE);
 
 static struct attribute *btrfs_supported_feature_attrs[] = {
        BTRFS_FEAT_ATTR_PTR(mixed_backref),
@@ -213,6 +214,7 @@ static struct attribute *btrfs_supported_feature_attrs[] = {
        BTRFS_FEAT_ATTR_PTR(raid56),
        BTRFS_FEAT_ATTR_PTR(skinny_metadata),
        BTRFS_FEAT_ATTR_PTR(no_holes),
+       BTRFS_FEAT_ATTR_PTR(free_space_tree),
        NULL
 };
 
@@ -780,6 +782,39 @@ failure:
        return error;
 }
 
+
+/*
+ * Change per-fs features in /sys/fs/btrfs/UUID/features to match current
+ * values in superblock. Call after any changes to incompat/compat_ro flags
+ */
+void btrfs_sysfs_feature_update(struct btrfs_fs_info *fs_info,
+               u64 bit, enum btrfs_feature_set set)
+{
+       struct btrfs_fs_devices *fs_devs;
+       struct kobject *fsid_kobj;
+       u64 features;
+       int ret;
+
+       if (!fs_info)
+               return;
+
+       features = get_features(fs_info, set);
+       ASSERT(bit & supported_feature_masks[set]);
+
+       fs_devs = fs_info->fs_devices;
+       fsid_kobj = &fs_devs->fsid_kobj;
+
+       if (!fsid_kobj->state_initialized)
+               return;
+
+       /*
+        * FIXME: this is too heavy to update just one value, ideally we'd like
+        * to use sysfs_update_group but some refactoring is needed first.
+        */
+       sysfs_remove_group(fsid_kobj, &btrfs_feature_attr_group);
+       ret = sysfs_create_group(fsid_kobj, &btrfs_feature_attr_group);
+}
+
 static int btrfs_init_debugfs(void)
 {
 #ifdef CONFIG_DEBUG_FS
index 9c09522..d7da1a4 100644 (file)
@@ -56,7 +56,7 @@ static struct btrfs_feature_attr btrfs_attr_##_name = {                            \
 #define BTRFS_FEAT_ATTR_COMPAT(name, feature) \
        BTRFS_FEAT_ATTR(name, FEAT_COMPAT, BTRFS_FEATURE_COMPAT, feature)
 #define BTRFS_FEAT_ATTR_COMPAT_RO(name, feature) \
-       BTRFS_FEAT_ATTR(name, FEAT_COMPAT_RO, BTRFS_FEATURE_COMPAT, feature)
+       BTRFS_FEAT_ATTR(name, FEAT_COMPAT_RO, BTRFS_FEATURE_COMPAT_RO, feature)
 #define BTRFS_FEAT_ATTR_INCOMPAT(name, feature) \
        BTRFS_FEAT_ATTR(name, FEAT_INCOMPAT, BTRFS_FEATURE_INCOMPAT, feature)
 
@@ -90,4 +90,7 @@ int btrfs_sysfs_add_fsid(struct btrfs_fs_devices *fs_devs,
                                struct kobject *parent);
 int btrfs_sysfs_add_device(struct btrfs_fs_devices *fs_devs);
 void btrfs_sysfs_remove_fsid(struct btrfs_fs_devices *fs_devs);
+void btrfs_sysfs_feature_update(struct btrfs_fs_info *fs_info,
+               u64 bit, enum btrfs_feature_set set);
+
 #endif /* _BTRFS_SYSFS_H_ */
index b1d920b..0e1e61a 100644 (file)
@@ -82,18 +82,18 @@ void btrfs_destroy_test_fs(void)
 struct btrfs_fs_info *btrfs_alloc_dummy_fs_info(void)
 {
        struct btrfs_fs_info *fs_info = kzalloc(sizeof(struct btrfs_fs_info),
-                                               GFP_NOFS);
+                                               GFP_KERNEL);
 
        if (!fs_info)
                return fs_info;
        fs_info->fs_devices = kzalloc(sizeof(struct btrfs_fs_devices),
-                                     GFP_NOFS);
+                                     GFP_KERNEL);
        if (!fs_info->fs_devices) {
                kfree(fs_info);
                return NULL;
        }
        fs_info->super_copy = kzalloc(sizeof(struct btrfs_super_block),
-                                     GFP_NOFS);
+                                     GFP_KERNEL);
        if (!fs_info->super_copy) {
                kfree(fs_info->fs_devices);
                kfree(fs_info);
@@ -180,11 +180,11 @@ btrfs_alloc_dummy_block_group(unsigned long length)
 {
        struct btrfs_block_group_cache *cache;
 
-       cache = kzalloc(sizeof(*cache), GFP_NOFS);
+       cache = kzalloc(sizeof(*cache), GFP_KERNEL);
        if (!cache)
                return NULL;
        cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl),
-                                       GFP_NOFS);
+                                       GFP_KERNEL);
        if (!cache->free_space_ctl) {
                kfree(cache);
                return NULL;
index e29fa29..669b582 100644 (file)
@@ -94,7 +94,7 @@ static int test_find_delalloc(void)
         * test.
         */
        for (index = 0; index < (total_dirty >> PAGE_CACHE_SHIFT); index++) {
-               page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
+               page = find_or_create_page(inode->i_mapping, index, GFP_KERNEL);
                if (!page) {
                        test_msg("Failed to allocate test page\n");
                        ret = -ENOMEM;
@@ -113,7 +113,7 @@ static int test_find_delalloc(void)
         * |--- delalloc ---|
         * |---  search  ---|
         */
-       set_extent_delalloc(&tmp, 0, 4095, NULL, GFP_NOFS);
+       set_extent_delalloc(&tmp, 0, 4095, NULL, GFP_KERNEL);
        start = 0;
        end = 0;
        found = find_lock_delalloc_range(inode, &tmp, locked_page, &start,
@@ -144,7 +144,7 @@ static int test_find_delalloc(void)
                test_msg("Couldn't find the locked page\n");
                goto out_bits;
        }
-       set_extent_delalloc(&tmp, 4096, max_bytes - 1, NULL, GFP_NOFS);
+       set_extent_delalloc(&tmp, 4096, max_bytes - 1, NULL, GFP_KERNEL);
        start = test_start;
        end = 0;
        found = find_lock_delalloc_range(inode, &tmp, locked_page, &start,
@@ -199,7 +199,7 @@ static int test_find_delalloc(void)
         *
         * We are re-using our test_start from above since it works out well.
         */
-       set_extent_delalloc(&tmp, max_bytes, total_dirty - 1, NULL, GFP_NOFS);
+       set_extent_delalloc(&tmp, max_bytes, total_dirty - 1, NULL, GFP_KERNEL);
        start = test_start;
        end = 0;
        found = find_lock_delalloc_range(inode, &tmp, locked_page, &start,
@@ -262,7 +262,7 @@ static int test_find_delalloc(void)
        }
        ret = 0;
 out_bits:
-       clear_extent_bits(&tmp, 0, total_dirty - 1, (unsigned)-1, GFP_NOFS);
+       clear_extent_bits(&tmp, 0, total_dirty - 1, (unsigned)-1, GFP_KERNEL);
 out:
        if (locked_page)
                page_cache_release(locked_page);
@@ -360,7 +360,7 @@ static int test_eb_bitmaps(void)
 
        test_msg("Running extent buffer bitmap tests\n");
 
-       bitmap = kmalloc(len, GFP_NOFS);
+       bitmap = kmalloc(len, GFP_KERNEL);
        if (!bitmap) {
                test_msg("Couldn't allocate test bitmap\n");
                return -ENOMEM;
index 5de55fd..e2d3da0 100644 (file)
@@ -974,7 +974,7 @@ static int test_extent_accounting(void)
                               (BTRFS_MAX_EXTENT_SIZE >> 1) + 4095,
                               EXTENT_DELALLOC | EXTENT_DIRTY |
                               EXTENT_UPTODATE | EXTENT_DO_ACCOUNTING, 0, 0,
-                              NULL, GFP_NOFS);
+                              NULL, GFP_KERNEL);
        if (ret) {
                test_msg("clear_extent_bit returned %d\n", ret);
                goto out;
@@ -1045,7 +1045,7 @@ static int test_extent_accounting(void)
                               BTRFS_MAX_EXTENT_SIZE+8191,
                               EXTENT_DIRTY | EXTENT_DELALLOC |
                               EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0,
-                              NULL, GFP_NOFS);
+                              NULL, GFP_KERNEL);
        if (ret) {
                test_msg("clear_extent_bit returned %d\n", ret);
                goto out;
@@ -1079,7 +1079,7 @@ static int test_extent_accounting(void)
        ret = clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1,
                               EXTENT_DIRTY | EXTENT_DELALLOC |
                               EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0,
-                              NULL, GFP_NOFS);
+                              NULL, GFP_KERNEL);
        if (ret) {
                test_msg("clear_extent_bit returned %d\n", ret);
                goto out;
@@ -1096,7 +1096,7 @@ out:
                clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1,
                                 EXTENT_DIRTY | EXTENT_DELALLOC |
                                 EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0,
-                                NULL, GFP_NOFS);
+                                NULL, GFP_KERNEL);
        iput(inode);
        btrfs_free_dummy_root(root);
        return ret;
index 323e12c..978c3a8 100644 (file)
@@ -4127,7 +4127,9 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
                                     struct inode *inode,
                                     struct btrfs_path *path,
                                     struct list_head *logged_list,
-                                    struct btrfs_log_ctx *ctx)
+                                    struct btrfs_log_ctx *ctx,
+                                    const u64 start,
+                                    const u64 end)
 {
        struct extent_map *em, *n;
        struct list_head extents;
@@ -4166,7 +4168,13 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
        }
 
        list_sort(NULL, &extents, extent_cmp);
-
+       /*
+        * Collect any new ordered extents within the range. This is to
+        * prevent logging file extent items without waiting for the disk
+        * location they point to being written. We do this only to deal
+        * with races against concurrent lockless direct IO writes.
+        */
+       btrfs_get_logged_extents(inode, logged_list, start, end);
 process:
        while (!list_empty(&extents)) {
                em = list_entry(extents.next, struct extent_map, list);
@@ -4701,7 +4709,7 @@ log_extents:
                        goto out_unlock;
                }
                ret = btrfs_log_changed_extents(trans, root, inode, dst_path,
-                                               &logged_list, ctx);
+                                               &logged_list, ctx, start, end);
                if (ret) {
                        err = ret;
                        goto out_unlock;
index b94fa6c..053818d 100644 (file)
@@ -153,7 +153,7 @@ static ktime_t timerfd_get_remaining(struct timerfd_ctx *ctx)
        if (isalarm(ctx))
                remaining = alarm_expires_remaining(&ctx->t.alarm);
        else
-               remaining = hrtimer_expires_remaining(&ctx->t.tmr);
+               remaining = hrtimer_expires_remaining_adjusted(&ctx->t.tmr);
 
        return remaining.tv64 < 0 ? ktime_set(0, 0): remaining;
 }
index 89d008d..fe5efad 100644 (file)
@@ -42,6 +42,10 @@ int drm_atomic_helper_commit(struct drm_device *dev,
                             struct drm_atomic_state *state,
                             bool async);
 
+bool drm_atomic_helper_framebuffer_changed(struct drm_device *dev,
+                                          struct drm_atomic_state *old_state,
+                                          struct drm_crtc *crtc);
+
 void drm_atomic_helper_wait_for_vblanks(struct drm_device *dev,
                                        struct drm_atomic_state *old_state);
 
index 7fc9296..15a73d4 100644 (file)
@@ -244,6 +244,8 @@ void blk_mq_freeze_queue(struct request_queue *q);
 void blk_mq_unfreeze_queue(struct request_queue *q);
 void blk_mq_freeze_queue_start(struct request_queue *q);
 
+void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues);
+
 /*
  * Driver command data is immediately after the request. So subtract request
  * size to get back to the original request, add request size to get the PDU.
index bda5ec0..fccf7f4 100644 (file)
@@ -37,7 +37,7 @@ struct cleancache_ops {
        void (*invalidate_fs)(int);
 };
 
-extern int cleancache_register_ops(struct cleancache_ops *ops);
+extern int cleancache_register_ops(const struct cleancache_ops *ops);
 extern void __cleancache_init_fs(struct super_block *);
 extern void __cleancache_init_shared_fs(struct super_block *);
 extern int  __cleancache_get_page(struct page *);
@@ -48,14 +48,14 @@ extern void __cleancache_invalidate_fs(struct super_block *);
 
 #ifdef CONFIG_CLEANCACHE
 #define cleancache_enabled (1)
-static inline bool cleancache_fs_enabled(struct page *page)
-{
-       return page->mapping->host->i_sb->cleancache_poolid >= 0;
-}
 static inline bool cleancache_fs_enabled_mapping(struct address_space *mapping)
 {
        return mapping->host->i_sb->cleancache_poolid >= 0;
 }
+static inline bool cleancache_fs_enabled(struct page *page)
+{
+       return cleancache_fs_enabled_mapping(page->mapping);
+}
 #else
 #define cleancache_enabled (0)
 #define cleancache_fs_enabled(_page) (0)
@@ -89,11 +89,9 @@ static inline void cleancache_init_shared_fs(struct super_block *sb)
 
 static inline int cleancache_get_page(struct page *page)
 {
-       int ret = -1;
-
        if (cleancache_enabled && cleancache_fs_enabled(page))
-               ret = __cleancache_get_page(page);
-       return ret;
+               return __cleancache_get_page(page);
+       return -1;
 }
 
 static inline void cleancache_put_page(struct page *page)
index 0639dcc..81de712 100644 (file)
@@ -165,7 +165,6 @@ struct ftrace_ops {
        ftrace_func_t                   saved_func;
        int __percpu                    *disabled;
 #ifdef CONFIG_DYNAMIC_FTRACE
-       int                             nr_trampolines;
        struct ftrace_ops_hash          local_hash;
        struct ftrace_ops_hash          *func_hash;
        struct ftrace_ops_hash          old_hash;
index 76dd4f0..2ead22d 100644 (file)
@@ -87,7 +87,8 @@ enum hrtimer_restart {
  * @function:  timer expiry callback function
  * @base:      pointer to the timer base (per cpu and per clock)
  * @state:     state information (See bit values above)
- * @start_pid: timer statistics field to store the pid of the task which
+ * @is_rel:    Set if the timer was armed relative
+ * @start_pid:  timer statistics field to store the pid of the task which
  *             started the timer
  * @start_site:        timer statistics field to store the site where the timer
  *             was started
@@ -101,7 +102,8 @@ struct hrtimer {
        ktime_t                         _softexpires;
        enum hrtimer_restart            (*function)(struct hrtimer *);
        struct hrtimer_clock_base       *base;
-       unsigned long                   state;
+       u8                              state;
+       u8                              is_rel;
 #ifdef CONFIG_TIMER_STATS
        int                             start_pid;
        void                            *start_site;
@@ -321,6 +323,27 @@ static inline void clock_was_set_delayed(void) { }
 
 #endif
 
+static inline ktime_t
+__hrtimer_expires_remaining_adjusted(const struct hrtimer *timer, ktime_t now)
+{
+       ktime_t rem = ktime_sub(timer->node.expires, now);
+
+       /*
+        * Adjust relative timers for the extra we added in
+        * hrtimer_start_range_ns() to prevent short timeouts.
+        */
+       if (IS_ENABLED(CONFIG_TIME_LOW_RES) && timer->is_rel)
+               rem.tv64 -= hrtimer_resolution;
+       return rem;
+}
+
+static inline ktime_t
+hrtimer_expires_remaining_adjusted(const struct hrtimer *timer)
+{
+       return __hrtimer_expires_remaining_adjusted(timer,
+                                                   timer->base->get_time());
+}
+
 extern void clock_was_set(void);
 #ifdef CONFIG_TIMERFD
 extern void timerfd_clock_was_set(void);
@@ -390,7 +413,12 @@ static inline void hrtimer_restart(struct hrtimer *timer)
 }
 
 /* Query timers: */
-extern ktime_t hrtimer_get_remaining(const struct hrtimer *timer);
+extern ktime_t __hrtimer_get_remaining(const struct hrtimer *timer, bool adjust);
+
+static inline ktime_t hrtimer_get_remaining(const struct hrtimer *timer)
+{
+       return __hrtimer_get_remaining(timer, false);
+}
 
 extern u64 hrtimer_get_next_event(void);
 
index f28dff3..a5c539f 100644 (file)
@@ -133,8 +133,9 @@ struct iommu_dm_region {
 
 /**
  * struct iommu_ops - iommu ops and capabilities
- * @domain_init: init iommu domain
- * @domain_destroy: destroy iommu domain
+ * @capable: check capability
+ * @domain_alloc: allocate iommu domain
+ * @domain_free: free iommu domain
  * @attach_dev: attach device to an iommu domain
  * @detach_dev: detach device from an iommu domain
  * @map: map a physically contiguous memory region to an iommu domain
@@ -144,8 +145,15 @@ struct iommu_dm_region {
  * @iova_to_phys: translate iova to physical address
  * @add_device: add device to iommu grouping
  * @remove_device: remove device from iommu grouping
+ * @device_group: find iommu group for a particular device
  * @domain_get_attr: Query domain attributes
  * @domain_set_attr: Change domain attributes
+ * @get_dm_regions: Request list of direct mapping requirements for a device
+ * @put_dm_regions: Free list of direct mapping requirements for a device
+ * @domain_window_enable: Configure and enable a particular window for a domain
+ * @domain_window_disable: Disable a particular window for a domain
+ * @domain_set_windows: Set the number of windows for a domain
+ * @domain_get_windows: Return the number of windows for a domain
  * @of_xlate: add OF master IDs to iommu grouping
  * @pgsize_bitmap: bitmap of supported page sizes
  * @priv: per-instance data private to the iommu driver
@@ -182,9 +190,9 @@ struct iommu_ops {
        int (*domain_window_enable)(struct iommu_domain *domain, u32 wnd_nr,
                                    phys_addr_t paddr, u64 size, int prot);
        void (*domain_window_disable)(struct iommu_domain *domain, u32 wnd_nr);
-       /* Set the numer of window per domain */
+       /* Set the number of windows per domain */
        int (*domain_set_windows)(struct iommu_domain *domain, u32 w_count);
-       /* Get the numer of window per domain */
+       /* Get the number of windows per domain */
        u32 (*domain_get_windows)(struct iommu_domain *domain);
 
 #ifdef CONFIG_OF_IOMMU
index f64622a..04579d9 100644 (file)
@@ -70,6 +70,7 @@ struct irq_fwspec {
  */
 enum irq_domain_bus_token {
        DOMAIN_BUS_ANY          = 0,
+       DOMAIN_BUS_WIRED,
        DOMAIN_BUS_PCI_MSI,
        DOMAIN_BUS_PLATFORM_MSI,
        DOMAIN_BUS_NEXUS,
index f9828a4..b35a61a 100644 (file)
@@ -634,9 +634,6 @@ struct perf_event_context {
        int                             nr_cgroups;      /* cgroup evts */
        void                            *task_ctx_data; /* pmu specific data */
        struct rcu_head                 rcu_head;
-
-       struct delayed_work             orphans_remove;
-       bool                            orphans_remove_sched;
 };
 
 /*
@@ -729,7 +726,7 @@ extern int perf_event_init_task(struct task_struct *child);
 extern void perf_event_exit_task(struct task_struct *child);
 extern void perf_event_free_task(struct task_struct *task);
 extern void perf_event_delayed_put(struct task_struct *task);
-extern struct perf_event *perf_event_get(unsigned int fd);
+extern struct file *perf_event_get(unsigned int fd);
 extern const struct perf_event_attr *perf_event_attrs(struct perf_event *event);
 extern void perf_event_print_debug(void);
 extern void perf_pmu_disable(struct pmu *pmu);
@@ -1044,7 +1041,7 @@ extern void perf_swevent_put_recursion_context(int rctx);
 extern u64 perf_swevent_set_period(struct perf_event *event);
 extern void perf_event_enable(struct perf_event *event);
 extern void perf_event_disable(struct perf_event *event);
-extern int __perf_event_disable(void *info);
+extern void perf_event_disable_local(struct perf_event *event);
 extern void perf_event_task_tick(void);
 #else /* !CONFIG_PERF_EVENTS: */
 static inline void *
@@ -1070,7 +1067,7 @@ static inline int perf_event_init_task(struct task_struct *child) { return 0; }
 static inline void perf_event_exit_task(struct task_struct *child)     { }
 static inline void perf_event_free_task(struct task_struct *task)      { }
 static inline void perf_event_delayed_put(struct task_struct *task)    { }
-static inline struct perf_event *perf_event_get(unsigned int fd)       { return ERR_PTR(-EINVAL); }
+static inline struct file *perf_event_get(unsigned int fd)     { return ERR_PTR(-EINVAL); }
 static inline const struct perf_event_attr *perf_event_attrs(struct perf_event *event)
 {
        return ERR_PTR(-EINVAL);
index e7a018e..017fced 100644 (file)
@@ -1,10 +1,13 @@
 #ifndef __LINUX_SWIOTLB_H
 #define __LINUX_SWIOTLB_H
 
+#include <linux/dma-direction.h>
+#include <linux/init.h>
 #include <linux/types.h>
 
 struct device;
 struct dma_attrs;
+struct page;
 struct scatterlist;
 
 extern int swiotlb_force;
index 2fd8708..d9fb4b0 100644 (file)
@@ -649,6 +649,7 @@ extern long vt_compat_ioctl(struct tty_struct *tty,
 /* tty_mutex.c */
 /* functions for preparation of BKL removal */
 extern void __lockfunc tty_lock(struct tty_struct *tty);
+extern int  tty_lock_interruptible(struct tty_struct *tty);
 extern void __lockfunc tty_unlock(struct tty_struct *tty);
 extern void __lockfunc tty_lock_slave(struct tty_struct *tty);
 extern void __lockfunc tty_unlock_slave(struct tty_struct *tty);
index 98feb1b..d6dfa05 100644 (file)
@@ -17,7 +17,7 @@ TRACE_EVENT(fence_annotate_wait_on,
 
        TP_STRUCT__entry(
                __string(driver, fence->ops->get_driver_name(fence))
-               __string(timeline, fence->ops->get_driver_name(fence))
+               __string(timeline, fence->ops->get_timeline_name(fence))
                __field(unsigned int, context)
                __field(unsigned int, seqno)
 
index 4cc989a..f95e1c4 100644 (file)
@@ -48,6 +48,8 @@ struct drm_etnaviv_timespec {
 #define ETNAVIV_PARAM_GPU_FEATURES_2                0x05
 #define ETNAVIV_PARAM_GPU_FEATURES_3                0x06
 #define ETNAVIV_PARAM_GPU_FEATURES_4                0x07
+#define ETNAVIV_PARAM_GPU_FEATURES_5                0x08
+#define ETNAVIV_PARAM_GPU_FEATURES_6                0x09
 
 #define ETNAVIV_PARAM_GPU_STREAM_COUNT              0x10
 #define ETNAVIV_PARAM_GPU_REGISTER_MAX              0x11
@@ -59,6 +61,7 @@ struct drm_etnaviv_timespec {
 #define ETNAVIV_PARAM_GPU_BUFFER_SIZE               0x17
 #define ETNAVIV_PARAM_GPU_INSTRUCTION_COUNT         0x18
 #define ETNAVIV_PARAM_GPU_NUM_CONSTANTS             0x19
+#define ETNAVIV_PARAM_GPU_NUM_VARYINGS              0x1a
 
 #define ETNA_MAX_PIPES 4
 
index b0799bc..89ebbc4 100644 (file)
@@ -291,10 +291,13 @@ static void *perf_event_fd_array_get_ptr(struct bpf_map *map, int fd)
 {
        struct perf_event *event;
        const struct perf_event_attr *attr;
+       struct file *file;
 
-       event = perf_event_get(fd);
-       if (IS_ERR(event))
-               return event;
+       file = perf_event_get(fd);
+       if (IS_ERR(file))
+               return file;
+
+       event = file->private_data;
 
        attr = perf_event_attrs(event);
        if (IS_ERR(attr))
@@ -304,24 +307,22 @@ static void *perf_event_fd_array_get_ptr(struct bpf_map *map, int fd)
                goto err;
 
        if (attr->type == PERF_TYPE_RAW)
-               return event;
+               return file;
 
        if (attr->type == PERF_TYPE_HARDWARE)
-               return event;
+               return file;
 
        if (attr->type == PERF_TYPE_SOFTWARE &&
            attr->config == PERF_COUNT_SW_BPF_OUTPUT)
-               return event;
+               return file;
 err:
-       perf_event_release_kernel(event);
+       fput(file);
        return ERR_PTR(-EINVAL);
 }
 
 static void perf_event_fd_array_put_ptr(void *ptr)
 {
-       struct perf_event *event = ptr;
-
-       perf_event_release_kernel(event);
+       fput((struct file *)ptr);
 }
 
 static const struct bpf_map_ops perf_event_array_ops = {
index 06ae52e..5946460 100644 (file)
@@ -49,8 +49,6 @@
 
 #include <asm/irq_regs.h>
 
-static struct workqueue_struct *perf_wq;
-
 typedef int (*remote_function_f)(void *);
 
 struct remote_function_call {
@@ -126,44 +124,181 @@ static int cpu_function_call(int cpu, remote_function_f func, void *info)
        return data.ret;
 }
 
-static void event_function_call(struct perf_event *event,
-                               int (*active)(void *),
-                               void (*inactive)(void *),
-                               void *data)
+static inline struct perf_cpu_context *
+__get_cpu_context(struct perf_event_context *ctx)
+{
+       return this_cpu_ptr(ctx->pmu->pmu_cpu_context);
+}
+
+static void perf_ctx_lock(struct perf_cpu_context *cpuctx,
+                         struct perf_event_context *ctx)
 {
+       raw_spin_lock(&cpuctx->ctx.lock);
+       if (ctx)
+               raw_spin_lock(&ctx->lock);
+}
+
+static void perf_ctx_unlock(struct perf_cpu_context *cpuctx,
+                           struct perf_event_context *ctx)
+{
+       if (ctx)
+               raw_spin_unlock(&ctx->lock);
+       raw_spin_unlock(&cpuctx->ctx.lock);
+}
+
+#define TASK_TOMBSTONE ((void *)-1L)
+
+static bool is_kernel_event(struct perf_event *event)
+{
+       return READ_ONCE(event->owner) == TASK_TOMBSTONE;
+}
+
+/*
+ * On task ctx scheduling...
+ *
+ * When !ctx->nr_events a task context will not be scheduled. This means
+ * we can disable the scheduler hooks (for performance) without leaving
+ * pending task ctx state.
+ *
+ * This however results in two special cases:
+ *
+ *  - removing the last event from a task ctx; this is relatively straight
+ *    forward and is done in __perf_remove_from_context.
+ *
+ *  - adding the first event to a task ctx; this is tricky because we cannot
+ *    rely on ctx->is_active and therefore cannot use event_function_call().
+ *    See perf_install_in_context().
+ *
+ * This is because we need a ctx->lock serialized variable (ctx->is_active)
+ * to reliably determine if a particular task/context is scheduled in. The
+ * task_curr() use in task_function_call() is racy in that a remote context
+ * switch is not a single atomic operation.
+ *
+ * As is, the situation is 'safe' because we set rq->curr before we do the
+ * actual context switch. This means that task_curr() will fail early, but
+ * we'll continue spinning on ctx->is_active until we've passed
+ * perf_event_task_sched_out().
+ *
+ * Without this ctx->lock serialized variable we could have race where we find
+ * the task (and hence the context) would not be active while in fact they are.
+ *
+ * If ctx->nr_events, then ctx->is_active and cpuctx->task_ctx are set.
+ */
+
+typedef void (*event_f)(struct perf_event *, struct perf_cpu_context *,
+                       struct perf_event_context *, void *);
+
+struct event_function_struct {
+       struct perf_event *event;
+       event_f func;
+       void *data;
+};
+
+static int event_function(void *info)
+{
+       struct event_function_struct *efs = info;
+       struct perf_event *event = efs->event;
        struct perf_event_context *ctx = event->ctx;
-       struct task_struct *task = ctx->task;
+       struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
+       struct perf_event_context *task_ctx = cpuctx->task_ctx;
+       int ret = 0;
+
+       WARN_ON_ONCE(!irqs_disabled());
+
+       perf_ctx_lock(cpuctx, task_ctx);
+       /*
+        * Since we do the IPI call without holding ctx->lock things can have
+        * changed, double check we hit the task we set out to hit.
+        */
+       if (ctx->task) {
+               if (ctx->task != current) {
+                       ret = -EAGAIN;
+                       goto unlock;
+               }
+
+               /*
+                * We only use event_function_call() on established contexts,
+                * and event_function() is only ever called when active (or
+                * rather, we'll have bailed in task_function_call() or the
+                * above ctx->task != current test), therefore we must have
+                * ctx->is_active here.
+                */
+               WARN_ON_ONCE(!ctx->is_active);
+               /*
+                * And since we have ctx->is_active, cpuctx->task_ctx must
+                * match.
+                */
+               WARN_ON_ONCE(task_ctx != ctx);
+       } else {
+               WARN_ON_ONCE(&cpuctx->ctx != ctx);
+       }
+
+       efs->func(event, cpuctx, ctx, efs->data);
+unlock:
+       perf_ctx_unlock(cpuctx, task_ctx);
+
+       return ret;
+}
+
+static void event_function_local(struct perf_event *event, event_f func, void *data)
+{
+       struct event_function_struct efs = {
+               .event = event,
+               .func = func,
+               .data = data,
+       };
+
+       int ret = event_function(&efs);
+       WARN_ON_ONCE(ret);
+}
+
+static void event_function_call(struct perf_event *event, event_f func, void *data)
+{
+       struct perf_event_context *ctx = event->ctx;
+       struct task_struct *task = READ_ONCE(ctx->task); /* verified in event_function */
+       struct event_function_struct efs = {
+               .event = event,
+               .func = func,
+               .data = data,
+       };
+
+       if (!event->parent) {
+               /*
+                * If this is a !child event, we must hold ctx::mutex to
+                * stabilize the the event->ctx relation. See
+                * perf_event_ctx_lock().
+                */
+               lockdep_assert_held(&ctx->mutex);
+       }
 
        if (!task) {
-               cpu_function_call(event->cpu, active, data);
+               cpu_function_call(event->cpu, event_function, &efs);
                return;
        }
 
 again:
-       if (!task_function_call(task, active, data))
+       if (task == TASK_TOMBSTONE)
+               return;
+
+       if (!task_function_call(task, event_function, &efs))
                return;
 
        raw_spin_lock_irq(&ctx->lock);
-       if (ctx->is_active) {
-               /*
-                * Reload the task pointer, it might have been changed by
-                * a concurrent perf_event_context_sched_out().
-                */
-               task = ctx->task;
-               raw_spin_unlock_irq(&ctx->lock);
-               goto again;
+       /*
+        * Reload the task pointer, it might have been changed by
+        * a concurrent perf_event_context_sched_out().
+        */
+       task = ctx->task;
+       if (task != TASK_TOMBSTONE) {
+               if (ctx->is_active) {
+                       raw_spin_unlock_irq(&ctx->lock);
+                       goto again;
+               }
+               func(event, NULL, ctx, data);
        }
-       inactive(data);
        raw_spin_unlock_irq(&ctx->lock);
 }
 
-#define EVENT_OWNER_KERNEL ((void *) -1)
-
-static bool is_kernel_event(struct perf_event *event)
-{
-       return event->owner == EVENT_OWNER_KERNEL;
-}
-
 #define PERF_FLAG_ALL (PERF_FLAG_FD_NO_GROUP |\
                       PERF_FLAG_FD_OUTPUT  |\
                       PERF_FLAG_PID_CGROUP |\
@@ -368,28 +503,6 @@ static inline u64 perf_event_clock(struct perf_event *event)
        return event->clock();
 }
 
-static inline struct perf_cpu_context *
-__get_cpu_context(struct perf_event_context *ctx)
-{
-       return this_cpu_ptr(ctx->pmu->pmu_cpu_context);
-}
-
-static void perf_ctx_lock(struct perf_cpu_context *cpuctx,
-                         struct perf_event_context *ctx)
-{
-       raw_spin_lock(&cpuctx->ctx.lock);
-       if (ctx)
-               raw_spin_lock(&ctx->lock);
-}
-
-static void perf_ctx_unlock(struct perf_cpu_context *cpuctx,
-                           struct perf_event_context *ctx)
-{
-       if (ctx)
-               raw_spin_unlock(&ctx->lock);
-       raw_spin_unlock(&cpuctx->ctx.lock);
-}
-
 #ifdef CONFIG_CGROUP_PERF
 
 static inline bool
@@ -579,13 +692,7 @@ static inline void perf_cgroup_sched_out(struct task_struct *task,
         * we are holding the rcu lock
         */
        cgrp1 = perf_cgroup_from_task(task, NULL);
-
-       /*
-        * next is NULL when called from perf_event_enable_on_exec()
-        * that will systematically cause a cgroup_switch()
-        */
-       if (next)
-               cgrp2 = perf_cgroup_from_task(next, NULL);
+       cgrp2 = perf_cgroup_from_task(next, NULL);
 
        /*
         * only schedule out current cgroup events if we know
@@ -611,8 +718,6 @@ static inline void perf_cgroup_sched_in(struct task_struct *prev,
         * we are holding the rcu lock
         */
        cgrp1 = perf_cgroup_from_task(task, NULL);
-
-       /* prev can never be NULL */
        cgrp2 = perf_cgroup_from_task(prev, NULL);
 
        /*
@@ -917,7 +1022,7 @@ static void put_ctx(struct perf_event_context *ctx)
        if (atomic_dec_and_test(&ctx->refcount)) {
                if (ctx->parent_ctx)
                        put_ctx(ctx->parent_ctx);
-               if (ctx->task)
+               if (ctx->task && ctx->task != TASK_TOMBSTONE)
                        put_task_struct(ctx->task);
                call_rcu(&ctx->rcu_head, free_ctx);
        }
@@ -934,9 +1039,8 @@ static void put_ctx(struct perf_event_context *ctx)
  * perf_event_context::mutex nests and those are:
  *
  *  - perf_event_exit_task_context()   [ child , 0 ]
- *      __perf_event_exit_task()
- *        sync_child_event()
- *          put_event()                        [ parent, 1 ]
+ *      perf_event_exit_event()
+ *        put_event()                  [ parent, 1 ]
  *
  *  - perf_event_init_context()                [ parent, 0 ]
  *      inherit_task_group()
@@ -979,8 +1083,8 @@ static void put_ctx(struct perf_event_context *ctx)
  * Lock order:
  *     task_struct::perf_event_mutex
  *       perf_event_context::mutex
- *         perf_event_context::lock
  *         perf_event::child_mutex;
+ *           perf_event_context::lock
  *         perf_event::mmap_mutex
  *         mmap_sem
  */
@@ -1078,6 +1182,7 @@ static u64 primary_event_id(struct perf_event *event)
 
 /*
  * Get the perf_event_context for a task and lock it.
+ *
  * This has to cope with with the fact that until it is locked,
  * the context could get moved to another task.
  */
@@ -1118,9 +1223,12 @@ retry:
                        goto retry;
                }
 
-               if (!atomic_inc_not_zero(&ctx->refcount)) {
+               if (ctx->task == TASK_TOMBSTONE ||
+                   !atomic_inc_not_zero(&ctx->refcount)) {
                        raw_spin_unlock(&ctx->lock);
                        ctx = NULL;
+               } else {
+                       WARN_ON_ONCE(ctx->task != task);
                }
        }
        rcu_read_unlock();
@@ -1246,6 +1354,8 @@ ctx_group_list(struct perf_event *event, struct perf_event_context *ctx)
 static void
 list_add_event(struct perf_event *event, struct perf_event_context *ctx)
 {
+       lockdep_assert_held(&ctx->lock);
+
        WARN_ON_ONCE(event->attach_state & PERF_ATTACH_CONTEXT);
        event->attach_state |= PERF_ATTACH_CONTEXT;
 
@@ -1448,11 +1558,14 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx)
 
        if (is_cgroup_event(event)) {
                ctx->nr_cgroups--;
+               /*
+                * Because cgroup events are always per-cpu events, this will
+                * always be called from the right CPU.
+                */
                cpuctx = __get_cpu_context(ctx);
                /*
-                * if there are no more cgroup events
-                * then cler cgrp to avoid stale pointer
-                * in update_cgrp_time_from_cpuctx()
+                * If there are no more cgroup events then clear cgrp to avoid
+                * stale pointer in update_cgrp_time_from_cpuctx().
                 */
                if (!ctx->nr_cgroups)
                        cpuctx->cgrp = NULL;
@@ -1530,45 +1643,11 @@ out:
                perf_event__header_size(tmp);
 }
 
-/*
- * User event without the task.
- */
 static bool is_orphaned_event(struct perf_event *event)
 {
-       return event && !is_kernel_event(event) && !event->owner;
+       return event->state == PERF_EVENT_STATE_EXIT;
 }
 
-/*
- * Event has a parent but parent's task finished and it's
- * alive only because of children holding refference.
- */
-static bool is_orphaned_child(struct perf_event *event)
-{
-       return is_orphaned_event(event->parent);
-}
-
-static void orphans_remove_work(struct work_struct *work);
-
-static void schedule_orphans_remove(struct perf_event_context *ctx)
-{
-       if (!ctx->task || ctx->orphans_remove_sched || !perf_wq)
-               return;
-
-       if (queue_delayed_work(perf_wq, &ctx->orphans_remove, 1)) {
-               get_ctx(ctx);
-               ctx->orphans_remove_sched = true;
-       }
-}
-
-static int __init perf_workqueue_init(void)
-{
-       perf_wq = create_singlethread_workqueue("perf");
-       WARN(!perf_wq, "failed to create perf workqueue\n");
-       return perf_wq ? 0 : -1;
-}
-
-core_initcall(perf_workqueue_init);
-
 static inline int pmu_filter_match(struct perf_event *event)
 {
        struct pmu *pmu = event->pmu;
@@ -1629,9 +1708,6 @@ event_sched_out(struct perf_event *event,
        if (event->attr.exclusive || !cpuctx->active_oncpu)
                cpuctx->exclusive = 0;
 
-       if (is_orphaned_child(event))
-               schedule_orphans_remove(ctx);
-
        perf_pmu_enable(event->pmu);
 }
 
@@ -1655,21 +1731,8 @@ group_sched_out(struct perf_event *group_event,
                cpuctx->exclusive = 0;
 }
 
-struct remove_event {
-       struct perf_event *event;
-       bool detach_group;
-};
-
-static void ___perf_remove_from_context(void *info)
-{
-       struct remove_event *re = info;
-       struct perf_event *event = re->event;
-       struct perf_event_context *ctx = event->ctx;
-
-       if (re->detach_group)
-               perf_group_detach(event);
-       list_del_event(event, ctx);
-}
+#define DETACH_GROUP   0x01UL
+#define DETACH_STATE   0x02UL
 
 /*
  * Cross CPU call to remove a performance event
@@ -1677,33 +1740,33 @@ static void ___perf_remove_from_context(void *info)
  * We disable the event on the hardware level first. After that we
  * remove it from the context list.
  */
-static int __perf_remove_from_context(void *info)
+static void
+__perf_remove_from_context(struct perf_event *event,
+                          struct perf_cpu_context *cpuctx,
+                          struct perf_event_context *ctx,
+                          void *info)
 {
-       struct remove_event *re = info;
-       struct perf_event *event = re->event;
-       struct perf_event_context *ctx = event->ctx;
-       struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
+       unsigned long flags = (unsigned long)info;
 
-       raw_spin_lock(&ctx->lock);
        event_sched_out(event, cpuctx, ctx);
-       if (re->detach_group)
+       if (flags & DETACH_GROUP)
                perf_group_detach(event);
        list_del_event(event, ctx);
-       if (!ctx->nr_events && cpuctx->task_ctx == ctx) {
+       if (flags & DETACH_STATE)
+               event->state = PERF_EVENT_STATE_EXIT;
+
+       if (!ctx->nr_events && ctx->is_active) {
                ctx->is_active = 0;
-               cpuctx->task_ctx = NULL;
+               if (ctx->task) {
+                       WARN_ON_ONCE(cpuctx->task_ctx != ctx);
+                       cpuctx->task_ctx = NULL;
+               }
        }
-       raw_spin_unlock(&ctx->lock);
-
-       return 0;
 }
 
 /*
  * Remove the event from a task's (or a CPU's) list of events.
  *
- * CPU events are removed with a smp call. For task events we only
- * call when the task is on a CPU.
- *
  * If event->ctx is a cloned context, callers must make sure that
  * every task struct that event->ctx->task could possibly point to
  * remains valid.  This is OK when called from perf_release since
@@ -1711,73 +1774,32 @@ static int __perf_remove_from_context(void *info)
  * When called from perf_event_exit_task, it's OK because the
  * context has been detached from its task.
  */
-static void perf_remove_from_context(struct perf_event *event, bool detach_group)
+static void perf_remove_from_context(struct perf_event *event, unsigned long flags)
 {
-       struct perf_event_context *ctx = event->ctx;
-       struct remove_event re = {
-               .event = event,
-               .detach_group = detach_group,
-       };
+       lockdep_assert_held(&event->ctx->mutex);
 
-       lockdep_assert_held(&ctx->mutex);
-
-       event_function_call(event, __perf_remove_from_context,
-                           ___perf_remove_from_context, &re);
+       event_function_call(event, __perf_remove_from_context, (void *)flags);
 }
 
 /*
  * Cross CPU call to disable a performance event
  */
-int __perf_event_disable(void *info)
-{
-       struct perf_event *event = info;
-       struct perf_event_context *ctx = event->ctx;
-       struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
-
-       /*
-        * If this is a per-task event, need to check whether this
-        * event's task is the current task on this cpu.
-        *
-        * Can trigger due to concurrent perf_event_context_sched_out()
-        * flipping contexts around.
-        */
-       if (ctx->task && cpuctx->task_ctx != ctx)
-               return -EINVAL;
-
-       raw_spin_lock(&ctx->lock);
-
-       /*
-        * If the event is on, turn it off.
-        * If it is in error state, leave it in error state.
-        */
-       if (event->state >= PERF_EVENT_STATE_INACTIVE) {
-               update_context_time(ctx);
-               update_cgrp_time_from_event(event);
-               update_group_times(event);
-               if (event == event->group_leader)
-                       group_sched_out(event, cpuctx, ctx);
-               else
-                       event_sched_out(event, cpuctx, ctx);
-               event->state = PERF_EVENT_STATE_OFF;
-       }
-
-       raw_spin_unlock(&ctx->lock);
-
-       return 0;
-}
-
-void ___perf_event_disable(void *info)
+static void __perf_event_disable(struct perf_event *event,
+                                struct perf_cpu_context *cpuctx,
+                                struct perf_event_context *ctx,
+                                void *info)
 {
-       struct perf_event *event = info;
+       if (event->state < PERF_EVENT_STATE_INACTIVE)
+               return;
 
-       /*
-        * Since we have the lock this context can't be scheduled
-        * in, so we can change the state safely.
-        */
-       if (event->state == PERF_EVENT_STATE_INACTIVE) {
-               update_group_times(event);
-               event->state = PERF_EVENT_STATE_OFF;
-       }
+       update_context_time(ctx);
+       update_cgrp_time_from_event(event);
+       update_group_times(event);
+       if (event == event->group_leader)
+               group_sched_out(event, cpuctx, ctx);
+       else
+               event_sched_out(event, cpuctx, ctx);
+       event->state = PERF_EVENT_STATE_OFF;
 }
 
 /*
@@ -1788,7 +1810,8 @@ void ___perf_event_disable(void *info)
  * remains valid.  This condition is satisifed when called through
  * perf_event_for_each_child or perf_event_for_each because they
  * hold the top-level event's child_mutex, so any descendant that
- * goes to exit will block in sync_child_event.
+ * goes to exit will block in perf_event_exit_event().
+ *
  * When called from perf_pending_event it's OK because event->ctx
  * is the current context on this CPU and preemption is disabled,
  * hence we can't get into perf_event_task_sched_out for this context.
@@ -1804,8 +1827,12 @@ static void _perf_event_disable(struct perf_event *event)
        }
        raw_spin_unlock_irq(&ctx->lock);
 
-       event_function_call(event, __perf_event_disable,
-                           ___perf_event_disable, event);
+       event_function_call(event, __perf_event_disable, NULL);
+}
+
+void perf_event_disable_local(struct perf_event *event)
+{
+       event_function_local(event, __perf_event_disable, NULL);
 }
 
 /*
@@ -1918,9 +1945,6 @@ event_sched_in(struct perf_event *event,
        if (event->attr.exclusive)
                cpuctx->exclusive = 1;
 
-       if (is_orphaned_child(event))
-               schedule_orphans_remove(ctx);
-
 out:
        perf_pmu_enable(event->pmu);
 
@@ -2039,7 +2063,8 @@ static void add_event_to_ctx(struct perf_event *event,
        event->tstamp_stopped = tstamp;
 }
 
-static void task_ctx_sched_out(struct perf_event_context *ctx);
+static void task_ctx_sched_out(struct perf_cpu_context *cpuctx,
+                              struct perf_event_context *ctx);
 static void
 ctx_sched_in(struct perf_event_context *ctx,
             struct perf_cpu_context *cpuctx,
@@ -2058,16 +2083,15 @@ static void perf_event_sched_in(struct perf_cpu_context *cpuctx,
                ctx_sched_in(ctx, cpuctx, EVENT_FLEXIBLE, task);
 }
 
-static void ___perf_install_in_context(void *info)
+static void ctx_resched(struct perf_cpu_context *cpuctx,
+                       struct perf_event_context *task_ctx)
 {
-       struct perf_event *event = info;
-       struct perf_event_context *ctx = event->ctx;
-
-       /*
-        * Since the task isn't running, its safe to add the event, us holding
-        * the ctx->lock ensures the task won't get scheduled in.
-        */
-       add_event_to_ctx(event, ctx);
+       perf_pmu_disable(cpuctx->ctx.pmu);
+       if (task_ctx)
+               task_ctx_sched_out(cpuctx, task_ctx);
+       cpu_ctx_sched_out(cpuctx, EVENT_ALL);
+       perf_event_sched_in(cpuctx, task_ctx, current);
+       perf_pmu_enable(cpuctx->ctx.pmu);
 }
 
 /*
@@ -2077,55 +2101,31 @@ static void ___perf_install_in_context(void *info)
  */
 static int  __perf_install_in_context(void *info)
 {
-       struct perf_event *event = info;
-       struct perf_event_context *ctx = event->ctx;
+       struct perf_event_context *ctx = info;
        struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
        struct perf_event_context *task_ctx = cpuctx->task_ctx;
-       struct task_struct *task = current;
-
-       perf_ctx_lock(cpuctx, task_ctx);
-       perf_pmu_disable(cpuctx->ctx.pmu);
 
-       /*
-        * If there was an active task_ctx schedule it out.
-        */
-       if (task_ctx)
-               task_ctx_sched_out(task_ctx);
-
-       /*
-        * If the context we're installing events in is not the
-        * active task_ctx, flip them.
-        */
-       if (ctx->task && task_ctx != ctx) {
-               if (task_ctx)
-                       raw_spin_unlock(&task_ctx->lock);
+       raw_spin_lock(&cpuctx->ctx.lock);
+       if (ctx->task) {
                raw_spin_lock(&ctx->lock);
+               /*
+                * If we hit the 'wrong' task, we've since scheduled and
+                * everything should be sorted, nothing to do!
+                */
                task_ctx = ctx;
-       }
+               if (ctx->task != current)
+                       goto unlock;
 
-       if (task_ctx) {
-               cpuctx->task_ctx = task_ctx;
-               task = task_ctx->task;
+               /*
+                * If task_ctx is set, it had better be to us.
+                */
+               WARN_ON_ONCE(cpuctx->task_ctx != ctx && cpuctx->task_ctx);
+       } else if (task_ctx) {
+               raw_spin_lock(&task_ctx->lock);
        }
 
-       cpu_ctx_sched_out(cpuctx, EVENT_ALL);
-
-       update_context_time(ctx);
-       /*
-        * update cgrp time only if current cgrp
-        * matches event->cgrp. Must be done before
-        * calling add_event_to_ctx()
-        */
-       update_cgrp_time_from_event(event);
-
-       add_event_to_ctx(event, ctx);
-
-       /*
-        * Schedule everything back in
-        */
-       perf_event_sched_in(cpuctx, task_ctx, task);
-
-       perf_pmu_enable(cpuctx->ctx.pmu);
+       ctx_resched(cpuctx, task_ctx);
+unlock:
        perf_ctx_unlock(cpuctx, task_ctx);
 
        return 0;
@@ -2133,27 +2133,54 @@ static int  __perf_install_in_context(void *info)
 
 /*
  * Attach a performance event to a context
- *
- * First we add the event to the list with the hardware enable bit
- * in event->hw_config cleared.
- *
- * If the event is attached to a task which is on a CPU we use a smp
- * call to enable it in the task context. The task might have been
- * scheduled away, but we check this in the smp call again.
  */
 static void
 perf_install_in_context(struct perf_event_context *ctx,
                        struct perf_event *event,
                        int cpu)
 {
+       struct task_struct *task = NULL;
+
        lockdep_assert_held(&ctx->mutex);
 
        event->ctx = ctx;
        if (event->cpu != -1)
                event->cpu = cpu;
 
-       event_function_call(event, __perf_install_in_context,
-                           ___perf_install_in_context, event);
+       /*
+        * Installing events is tricky because we cannot rely on ctx->is_active
+        * to be set in case this is the nr_events 0 -> 1 transition.
+        *
+        * So what we do is we add the event to the list here, which will allow
+        * a future context switch to DTRT and then send a racy IPI. If the IPI
+        * fails to hit the right task, this means a context switch must have
+        * happened and that will have taken care of business.
+        */
+       raw_spin_lock_irq(&ctx->lock);
+       task = ctx->task;
+       /*
+        * Worse, we cannot even rely on the ctx actually existing anymore. If
+        * between find_get_context() and perf_install_in_context() the task
+        * went through perf_event_exit_task() its dead and we should not be
+        * adding new events.
+        */
+       if (task == TASK_TOMBSTONE) {
+               raw_spin_unlock_irq(&ctx->lock);
+               return;
+       }
+       update_context_time(ctx);
+       /*
+        * Update cgrp time only if current cgrp matches event->cgrp.
+        * Must be done before calling add_event_to_ctx().
+        */
+       update_cgrp_time_from_event(event);
+       add_event_to_ctx(event, ctx);
+       raw_spin_unlock_irq(&ctx->lock);
+
+       if (task)
+               task_function_call(task, __perf_install_in_context, ctx);
+       else
+               cpu_function_call(cpu, __perf_install_in_context, ctx);
 }
 
 /*
@@ -2180,43 +2207,30 @@ static void __perf_event_mark_enabled(struct perf_event *event)
 /*
  * Cross CPU call to enable a performance event
  */
-static int __perf_event_enable(void *info)
+static void __perf_event_enable(struct perf_event *event,
+                               struct perf_cpu_context *cpuctx,
+                               struct perf_event_context *ctx,
+                               void *info)
 {
-       struct perf_event *event = info;
-       struct perf_event_context *ctx = event->ctx;
        struct perf_event *leader = event->group_leader;
-       struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
-       int err;
+       struct perf_event_context *task_ctx;
 
-       /*
-        * There's a time window between 'ctx->is_active' check
-        * in perf_event_enable function and this place having:
-        *   - IRQs on
-        *   - ctx->lock unlocked
-        *
-        * where the task could be killed and 'ctx' deactivated
-        * by perf_event_exit_task.
-        */
-       if (!ctx->is_active)
-               return -EINVAL;
+       if (event->state >= PERF_EVENT_STATE_INACTIVE ||
+           event->state <= PERF_EVENT_STATE_ERROR)
+               return;
 
-       raw_spin_lock(&ctx->lock);
        update_context_time(ctx);
-
-       if (event->state >= PERF_EVENT_STATE_INACTIVE)
-               goto unlock;
-
-       /*
-        * set current task's cgroup time reference point
-        */
-       perf_cgroup_set_timestamp(current, ctx);
-
        __perf_event_mark_enabled(event);
 
+       if (!ctx->is_active)
+               return;
+
        if (!event_filter_match(event)) {
-               if (is_cgroup_event(event))
+               if (is_cgroup_event(event)) {
+                       perf_cgroup_set_timestamp(current, ctx); // XXX ?
                        perf_cgroup_defer_enabled(event);
-               goto unlock;
+               }
+               return;
        }
 
        /*
@@ -2224,41 +2238,13 @@ static int __perf_event_enable(void *info)
         * then don't put it on unless the group is on.
         */
        if (leader != event && leader->state != PERF_EVENT_STATE_ACTIVE)
-               goto unlock;
-
-       if (!group_can_go_on(event, cpuctx, 1)) {
-               err = -EEXIST;
-       } else {
-               if (event == leader)
-                       err = group_sched_in(event, cpuctx, ctx);
-               else
-                       err = event_sched_in(event, cpuctx, ctx);
-       }
-
-       if (err) {
-               /*
-                * If this event can't go on and it's part of a
-                * group, then the whole group has to come off.
-                */
-               if (leader != event) {
-                       group_sched_out(leader, cpuctx, ctx);
-                       perf_mux_hrtimer_restart(cpuctx);
-               }
-               if (leader->attr.pinned) {
-                       update_group_times(leader);
-                       leader->state = PERF_EVENT_STATE_ERROR;
-               }
-       }
-
-unlock:
-       raw_spin_unlock(&ctx->lock);
+               return;
 
-       return 0;
-}
+       task_ctx = cpuctx->task_ctx;
+       if (ctx->task)
+               WARN_ON_ONCE(task_ctx != ctx);
 
-void ___perf_event_enable(void *info)
-{
-       __perf_event_mark_enabled((struct perf_event *)info);
+       ctx_resched(cpuctx, task_ctx);
 }
 
 /*
@@ -2275,7 +2261,8 @@ static void _perf_event_enable(struct perf_event *event)
        struct perf_event_context *ctx = event->ctx;
 
        raw_spin_lock_irq(&ctx->lock);
-       if (event->state >= PERF_EVENT_STATE_INACTIVE) {
+       if (event->state >= PERF_EVENT_STATE_INACTIVE ||
+           event->state <  PERF_EVENT_STATE_ERROR) {
                raw_spin_unlock_irq(&ctx->lock);
                return;
        }
@@ -2291,8 +2278,7 @@ static void _perf_event_enable(struct perf_event *event)
                event->state = PERF_EVENT_STATE_OFF;
        raw_spin_unlock_irq(&ctx->lock);
 
-       event_function_call(event, __perf_event_enable,
-                           ___perf_event_enable, event);
+       event_function_call(event, __perf_event_enable, NULL);
 }
 
 /*
@@ -2342,12 +2328,27 @@ static void ctx_sched_out(struct perf_event_context *ctx,
                          struct perf_cpu_context *cpuctx,
                          enum event_type_t event_type)
 {
-       struct perf_event *event;
        int is_active = ctx->is_active;
+       struct perf_event *event;
 
-       ctx->is_active &= ~event_type;
-       if (likely(!ctx->nr_events))
+       lockdep_assert_held(&ctx->lock);
+
+       if (likely(!ctx->nr_events)) {
+               /*
+                * See __perf_remove_from_context().
+                */
+               WARN_ON_ONCE(ctx->is_active);
+               if (ctx->task)
+                       WARN_ON_ONCE(cpuctx->task_ctx);
                return;
+       }
+
+       ctx->is_active &= ~event_type;
+       if (ctx->task) {
+               WARN_ON_ONCE(cpuctx->task_ctx != ctx);
+               if (!ctx->is_active)
+                       cpuctx->task_ctx = NULL;
+       }
 
        update_context_time(ctx);
        update_cgrp_time_from_cpuctx(cpuctx);
@@ -2518,17 +2519,21 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn,
                raw_spin_lock(&ctx->lock);
                raw_spin_lock_nested(&next_ctx->lock, SINGLE_DEPTH_NESTING);
                if (context_equiv(ctx, next_ctx)) {
-                       /*
-                        * XXX do we need a memory barrier of sorts
-                        * wrt to rcu_dereference() of perf_event_ctxp
-                        */
-                       task->perf_event_ctxp[ctxn] = next_ctx;
-                       next->perf_event_ctxp[ctxn] = ctx;
-                       ctx->task = next;
-                       next_ctx->task = task;
+                       WRITE_ONCE(ctx->task, next);
+                       WRITE_ONCE(next_ctx->task, task);
 
                        swap(ctx->task_ctx_data, next_ctx->task_ctx_data);
 
+                       /*
+                        * RCU_INIT_POINTER here is safe because we've not
+                        * modified the ctx and the above modification of
+                        * ctx->task and ctx->task_ctx_data are immaterial
+                        * since those values are always verified under
+                        * ctx->lock which we're now holding.
+                        */
+                       RCU_INIT_POINTER(task->perf_event_ctxp[ctxn], next_ctx);
+                       RCU_INIT_POINTER(next->perf_event_ctxp[ctxn], ctx);
+
                        do_switch = 0;
 
                        perf_event_sync_stat(ctx, next_ctx);
@@ -2541,8 +2546,7 @@ unlock:
 
        if (do_switch) {
                raw_spin_lock(&ctx->lock);
-               ctx_sched_out(ctx, cpuctx, EVENT_ALL);
-               cpuctx->task_ctx = NULL;
+               task_ctx_sched_out(cpuctx, ctx);
                raw_spin_unlock(&ctx->lock);
        }
 }
@@ -2637,10 +2641,9 @@ void __perf_event_task_sched_out(struct task_struct *task,
                perf_cgroup_sched_out(task, next);
 }
 
-static void task_ctx_sched_out(struct perf_event_context *ctx)
+static void task_ctx_sched_out(struct perf_cpu_context *cpuctx,
+                              struct perf_event_context *ctx)
 {
-       struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
-
        if (!cpuctx->task_ctx)
                return;
 
@@ -2648,7 +2651,6 @@ static void task_ctx_sched_out(struct perf_event_context *ctx)
                return;
 
        ctx_sched_out(ctx, cpuctx, EVENT_ALL);
-       cpuctx->task_ctx = NULL;
 }
 
 /*
@@ -2725,13 +2727,22 @@ ctx_sched_in(struct perf_event_context *ctx,
             enum event_type_t event_type,
             struct task_struct *task)
 {
-       u64 now;
        int is_active = ctx->is_active;
+       u64 now;
+
+       lockdep_assert_held(&ctx->lock);
 
-       ctx->is_active |= event_type;
        if (likely(!ctx->nr_events))
                return;
 
+       ctx->is_active |= event_type;
+       if (ctx->task) {
+               if (!is_active)
+                       cpuctx->task_ctx = ctx;
+               else
+                       WARN_ON_ONCE(cpuctx->task_ctx != ctx);
+       }
+
        now = perf_clock();
        ctx->timestamp = now;
        perf_cgroup_set_timestamp(task, ctx);
@@ -2773,12 +2784,7 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx,
         * cpu flexible, task flexible.
         */
        cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
-
-       if (ctx->nr_events)
-               cpuctx->task_ctx = ctx;
-
-       perf_event_sched_in(cpuctx, cpuctx->task_ctx, task);
-
+       perf_event_sched_in(cpuctx, ctx, task);
        perf_pmu_enable(ctx->pmu);
        perf_ctx_unlock(cpuctx, ctx);
 }
@@ -2800,6 +2806,16 @@ void __perf_event_task_sched_in(struct task_struct *prev,
        struct perf_event_context *ctx;
        int ctxn;
 
+       /*
+        * If cgroup events exist on this CPU, then we need to check if we have
+        * to switch in PMU state; cgroup event are system-wide mode only.
+        *
+        * Since cgroup events are CPU events, we must schedule these in before
+        * we schedule in the task events.
+        */
+       if (atomic_read(this_cpu_ptr(&perf_cgroup_events)))
+               perf_cgroup_sched_in(prev, task);
+
        for_each_task_context_nr(ctxn) {
                ctx = task->perf_event_ctxp[ctxn];
                if (likely(!ctx))
@@ -2807,13 +2823,6 @@ void __perf_event_task_sched_in(struct task_struct *prev,
 
                perf_event_context_sched_in(ctx, task);
        }
-       /*
-        * if cgroup events exist on this CPU, then we need
-        * to check if we have to switch in PMU state.
-        * cgroup event are system-wide mode only
-        */
-       if (atomic_read(this_cpu_ptr(&perf_cgroup_events)))
-               perf_cgroup_sched_in(prev, task);
 
        if (atomic_read(&nr_switch_events))
                perf_event_switch(task, prev, true);
@@ -3099,46 +3108,30 @@ static int event_enable_on_exec(struct perf_event *event,
 static void perf_event_enable_on_exec(int ctxn)
 {
        struct perf_event_context *ctx, *clone_ctx = NULL;
+       struct perf_cpu_context *cpuctx;
        struct perf_event *event;
        unsigned long flags;
        int enabled = 0;
-       int ret;
 
        local_irq_save(flags);
        ctx = current->perf_event_ctxp[ctxn];
        if (!ctx || !ctx->nr_events)
                goto out;
 
-       /*
-        * We must ctxsw out cgroup events to avoid conflict
-        * when invoking perf_task_event_sched_in() later on
-        * in this function. Otherwise we end up trying to
-        * ctxswin cgroup events which are already scheduled
-        * in.
-        */
-       perf_cgroup_sched_out(current, NULL);
-
-       raw_spin_lock(&ctx->lock);
-       task_ctx_sched_out(ctx);
-
-       list_for_each_entry(event, &ctx->event_list, event_entry) {
-               ret = event_enable_on_exec(event, ctx);
-               if (ret)
-                       enabled = 1;
-       }
+       cpuctx = __get_cpu_context(ctx);
+       perf_ctx_lock(cpuctx, ctx);
+       list_for_each_entry(event, &ctx->event_list, event_entry)
+               enabled |= event_enable_on_exec(event, ctx);
 
        /*
-        * Unclone this context if we enabled any event.
+        * Unclone and reschedule this context if we enabled any event.
         */
-       if (enabled)
+       if (enabled) {
                clone_ctx = unclone_ctx(ctx);
+               ctx_resched(cpuctx, ctx);
+       }
+       perf_ctx_unlock(cpuctx, ctx);
 
-       raw_spin_unlock(&ctx->lock);
-
-       /*
-        * Also calls ctxswin for cgroup events, if any:
-        */
-       perf_event_context_sched_in(ctx, ctx->task);
 out:
        local_irq_restore(flags);
 
@@ -3334,7 +3327,6 @@ static void __perf_event_init_context(struct perf_event_context *ctx)
        INIT_LIST_HEAD(&ctx->flexible_groups);
        INIT_LIST_HEAD(&ctx->event_list);
        atomic_set(&ctx->refcount, 1);
-       INIT_DELAYED_WORK(&ctx->orphans_remove, orphans_remove_work);
 }
 
 static struct perf_event_context *
@@ -3521,11 +3513,13 @@ static void unaccount_event_cpu(struct perf_event *event, int cpu)
 
 static void unaccount_event(struct perf_event *event)
 {
+       bool dec = false;
+
        if (event->parent)
                return;
 
        if (event->attach_state & PERF_ATTACH_TASK)
-               static_key_slow_dec_deferred(&perf_sched_events);
+               dec = true;
        if (event->attr.mmap || event->attr.mmap_data)
                atomic_dec(&nr_mmap_events);
        if (event->attr.comm)
@@ -3535,12 +3529,15 @@ static void unaccount_event(struct perf_event *event)
        if (event->attr.freq)
                atomic_dec(&nr_freq_events);
        if (event->attr.context_switch) {
-               static_key_slow_dec_deferred(&perf_sched_events);
+               dec = true;
                atomic_dec(&nr_switch_events);
        }
        if (is_cgroup_event(event))
-               static_key_slow_dec_deferred(&perf_sched_events);
+               dec = true;
        if (has_branch_stack(event))
+               dec = true;
+
+       if (dec)
                static_key_slow_dec_deferred(&perf_sched_events);
 
        unaccount_event_cpu(event, event->cpu);
@@ -3556,7 +3553,7 @@ static void unaccount_event(struct perf_event *event)
  *  3) two matching events on the same context.
  *
  * The former two cases are handled in the allocation path (perf_event_alloc(),
- * __free_event()), the latter -- before the first perf_install_in_context().
+ * _free_event()), the latter -- before the first perf_install_in_context().
  */
 static int exclusive_event_init(struct perf_event *event)
 {
@@ -3631,29 +3628,6 @@ static bool exclusive_event_installable(struct perf_event *event,
        return true;
 }
 
-static void __free_event(struct perf_event *event)
-{
-       if (!event->parent) {
-               if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN)
-                       put_callchain_buffers();
-       }
-
-       perf_event_free_bpf_prog(event);
-
-       if (event->destroy)
-               event->destroy(event);
-
-       if (event->ctx)
-               put_ctx(event->ctx);
-
-       if (event->pmu) {
-               exclusive_event_destroy(event);
-               module_put(event->pmu->module);
-       }
-
-       call_rcu(&event->rcu_head, free_event_rcu);
-}
-
 static void _free_event(struct perf_event *event)
 {
        irq_work_sync(&event->pending);
@@ -3675,7 +3649,25 @@ static void _free_event(struct perf_event *event)
        if (is_cgroup_event(event))
                perf_detach_cgroup(event);
 
-       __free_event(event);
+       if (!event->parent) {
+               if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN)
+                       put_callchain_buffers();
+       }
+
+       perf_event_free_bpf_prog(event);
+
+       if (event->destroy)
+               event->destroy(event);
+
+       if (event->ctx)
+               put_ctx(event->ctx);
+
+       if (event->pmu) {
+               exclusive_event_destroy(event);
+               module_put(event->pmu->module);
+       }
+
+       call_rcu(&event->rcu_head, free_event_rcu);
 }
 
 /*
@@ -3702,14 +3694,13 @@ static void perf_remove_from_owner(struct perf_event *event)
        struct task_struct *owner;
 
        rcu_read_lock();
-       owner = ACCESS_ONCE(event->owner);
        /*
-        * Matches the smp_wmb() in perf_event_exit_task(). If we observe
-        * !owner it means the list deletion is complete and we can indeed
-        * free this event, otherwise we need to serialize on
+        * Matches the smp_store_release() in perf_event_exit_task(). If we
+        * observe !owner it means the list deletion is complete and we can
+        * indeed free this event, otherwise we need to serialize on
         * owner->perf_event_mutex.
         */
-       smp_read_barrier_depends();
+       owner = lockless_dereference(event->owner);
        if (owner) {
                /*
                 * Since delayed_put_task_struct() also drops the last
@@ -3737,8 +3728,10 @@ static void perf_remove_from_owner(struct perf_event *event)
                 * ensured they're done, and we can proceed with freeing the
                 * event.
                 */
-               if (event->owner)
+               if (event->owner) {
                        list_del_init(&event->owner_entry);
+                       smp_store_release(&event->owner, NULL);
+               }
                mutex_unlock(&owner->perf_event_mutex);
                put_task_struct(owner);
        }
@@ -3746,36 +3739,98 @@ static void perf_remove_from_owner(struct perf_event *event)
 
 static void put_event(struct perf_event *event)
 {
-       struct perf_event_context *ctx;
-
        if (!atomic_long_dec_and_test(&event->refcount))
                return;
 
+       _free_event(event);
+}
+
+/*
+ * Kill an event dead; while event:refcount will preserve the event
+ * object, it will not preserve its functionality. Once the last 'user'
+ * gives up the object, we'll destroy the thing.
+ */
+int perf_event_release_kernel(struct perf_event *event)
+{
+       struct perf_event_context *ctx;
+       struct perf_event *child, *tmp;
+
        if (!is_kernel_event(event))
                perf_remove_from_owner(event);
 
+       ctx = perf_event_ctx_lock(event);
+       WARN_ON_ONCE(ctx->parent_ctx);
+       perf_remove_from_context(event, DETACH_GROUP | DETACH_STATE);
+       perf_event_ctx_unlock(event, ctx);
+
        /*
-        * There are two ways this annotation is useful:
+        * At this point we must have event->state == PERF_EVENT_STATE_EXIT,
+        * either from the above perf_remove_from_context() or through
+        * perf_event_exit_event().
         *
-        *  1) there is a lock recursion from perf_event_exit_task
-        *     see the comment there.
+        * Therefore, anybody acquiring event->child_mutex after the below
+        * loop _must_ also see this, most importantly inherit_event() which
+        * will avoid placing more children on the list.
         *
-        *  2) there is a lock-inversion with mmap_sem through
-        *     perf_read_group(), which takes faults while
-        *     holding ctx->mutex, however this is called after
-        *     the last filedesc died, so there is no possibility
-        *     to trigger the AB-BA case.
+        * Thus this guarantees that we will in fact observe and kill _ALL_
+        * child events.
         */
-       ctx = perf_event_ctx_lock_nested(event, SINGLE_DEPTH_NESTING);
-       WARN_ON_ONCE(ctx->parent_ctx);
-       perf_remove_from_context(event, true);
-       perf_event_ctx_unlock(event, ctx);
+       WARN_ON_ONCE(event->state != PERF_EVENT_STATE_EXIT);
 
-       _free_event(event);
-}
+again:
+       mutex_lock(&event->child_mutex);
+       list_for_each_entry(child, &event->child_list, child_list) {
 
-int perf_event_release_kernel(struct perf_event *event)
-{
+               /*
+                * Cannot change, child events are not migrated, see the
+                * comment with perf_event_ctx_lock_nested().
+                */
+               ctx = lockless_dereference(child->ctx);
+               /*
+                * Since child_mutex nests inside ctx::mutex, we must jump
+                * through hoops. We start by grabbing a reference on the ctx.
+                *
+                * Since the event cannot get freed while we hold the
+                * child_mutex, the context must also exist and have a !0
+                * reference count.
+                */
+               get_ctx(ctx);
+
+               /*
+                * Now that we have a ctx ref, we can drop child_mutex, and
+                * acquire ctx::mutex without fear of it going away. Then we
+                * can re-acquire child_mutex.
+                */
+               mutex_unlock(&event->child_mutex);
+               mutex_lock(&ctx->mutex);
+               mutex_lock(&event->child_mutex);
+
+               /*
+                * Now that we hold ctx::mutex and child_mutex, revalidate our
+                * state, if child is still the first entry, it didn't get freed
+                * and we can continue doing so.
+                */
+               tmp = list_first_entry_or_null(&event->child_list,
+                                              struct perf_event, child_list);
+               if (tmp == child) {
+                       perf_remove_from_context(child, DETACH_GROUP);
+                       list_del(&child->child_list);
+                       free_event(child);
+                       /*
+                        * This matches the refcount bump in inherit_event();
+                        * this can't be the last reference.
+                        */
+                       put_event(event);
+               }
+
+               mutex_unlock(&event->child_mutex);
+               mutex_unlock(&ctx->mutex);
+               put_ctx(ctx);
+               goto again;
+       }
+       mutex_unlock(&event->child_mutex);
+
+       /* Must be the last reference */
        put_event(event);
        return 0;
 }
@@ -3786,46 +3841,10 @@ EXPORT_SYMBOL_GPL(perf_event_release_kernel);
  */
 static int perf_release(struct inode *inode, struct file *file)
 {
-       put_event(file->private_data);
+       perf_event_release_kernel(file->private_data);
        return 0;
 }
 
-/*
- * Remove all orphanes events from the context.
- */
-static void orphans_remove_work(struct work_struct *work)
-{
-       struct perf_event_context *ctx;
-       struct perf_event *event, *tmp;
-
-       ctx = container_of(work, struct perf_event_context,
-                          orphans_remove.work);
-
-       mutex_lock(&ctx->mutex);
-       list_for_each_entry_safe(event, tmp, &ctx->event_list, event_entry) {
-               struct perf_event *parent_event = event->parent;
-
-               if (!is_orphaned_child(event))
-                       continue;
-
-               perf_remove_from_context(event, true);
-
-               mutex_lock(&parent_event->child_mutex);
-               list_del_init(&event->child_list);
-               mutex_unlock(&parent_event->child_mutex);
-
-               free_event(event);
-               put_event(parent_event);
-       }
-
-       raw_spin_lock_irq(&ctx->lock);
-       ctx->orphans_remove_sched = false;
-       raw_spin_unlock_irq(&ctx->lock);
-       mutex_unlock(&ctx->mutex);
-
-       put_ctx(ctx);
-}
-
 u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running)
 {
        struct perf_event *child;
@@ -4054,7 +4073,7 @@ static void _perf_event_reset(struct perf_event *event)
 /*
  * Holding the top-level event's child_mutex means that any
  * descendant process that has inherited this event will block
- * in sync_child_event if it goes to exit, thus satisfying the
+ * in perf_event_exit_event() if it goes to exit, thus satisfying the
  * task existence requirements of perf_event_enable/disable.
  */
 static void perf_event_for_each_child(struct perf_event *event,
@@ -4086,36 +4105,14 @@ static void perf_event_for_each(struct perf_event *event,
                perf_event_for_each_child(sibling, func);
 }
 
-struct period_event {
-       struct perf_event *event;
-       u64 value;
-};
-
-static void ___perf_event_period(void *info)
-{
-       struct period_event *pe = info;
-       struct perf_event *event = pe->event;
-       u64 value = pe->value;
-
-       if (event->attr.freq) {
-               event->attr.sample_freq = value;
-       } else {
-               event->attr.sample_period = value;
-               event->hw.sample_period = value;
-       }
-
-       local64_set(&event->hw.period_left, 0);
-}
-
-static int __perf_event_period(void *info)
+static void __perf_event_period(struct perf_event *event,
+                               struct perf_cpu_context *cpuctx,
+                               struct perf_event_context *ctx,
+                               void *info)
 {
-       struct period_event *pe = info;
-       struct perf_event *event = pe->event;
-       struct perf_event_context *ctx = event->ctx;
-       u64 value = pe->value;
+       u64 value = *((u64 *)info);
        bool active;
 
-       raw_spin_lock(&ctx->lock);
        if (event->attr.freq) {
                event->attr.sample_freq = value;
        } else {
@@ -4135,14 +4132,10 @@ static int __perf_event_period(void *info)
                event->pmu->start(event, PERF_EF_RELOAD);
                perf_pmu_enable(ctx->pmu);
        }
-       raw_spin_unlock(&ctx->lock);
-
-       return 0;
 }
 
 static int perf_event_period(struct perf_event *event, u64 __user *arg)
 {
-       struct period_event pe = { .event = event, };
        u64 value;
 
        if (!is_sampling_event(event))
@@ -4157,10 +4150,7 @@ static int perf_event_period(struct perf_event *event, u64 __user *arg)
        if (event->attr.freq && value > sysctl_perf_event_sample_rate)
                return -EINVAL;
 
-       pe.value = value;
-
-       event_function_call(event, __perf_event_period,
-                           ___perf_event_period, &pe);
+       event_function_call(event, __perf_event_period, &value);
 
        return 0;
 }
@@ -4932,7 +4922,7 @@ static void perf_pending_event(struct irq_work *entry)
 
        if (event->pending_disable) {
                event->pending_disable = 0;
-               __perf_event_disable(event);
+               perf_event_disable_local(event);
        }
 
        if (event->pending_wakeup) {
@@ -7753,11 +7743,13 @@ static void account_event_cpu(struct perf_event *event, int cpu)
 
 static void account_event(struct perf_event *event)
 {
+       bool inc = false;
+
        if (event->parent)
                return;
 
        if (event->attach_state & PERF_ATTACH_TASK)
-               static_key_slow_inc(&perf_sched_events.key);
+               inc = true;
        if (event->attr.mmap || event->attr.mmap_data)
                atomic_inc(&nr_mmap_events);
        if (event->attr.comm)
@@ -7770,11 +7762,14 @@ static void account_event(struct perf_event *event)
        }
        if (event->attr.context_switch) {
                atomic_inc(&nr_switch_events);
-               static_key_slow_inc(&perf_sched_events.key);
+               inc = true;
        }
        if (has_branch_stack(event))
-               static_key_slow_inc(&perf_sched_events.key);
+               inc = true;
        if (is_cgroup_event(event))
+               inc = true;
+
+       if (inc)
                static_key_slow_inc(&perf_sched_events.key);
 
        account_event_cpu(event, event->cpu);
@@ -8422,11 +8417,11 @@ SYSCALL_DEFINE5(perf_event_open,
                 * See perf_event_ctx_lock() for comments on the details
                 * of swizzling perf_event::ctx.
                 */
-               perf_remove_from_context(group_leader, false);
+               perf_remove_from_context(group_leader, 0);
 
                list_for_each_entry(sibling, &group_leader->sibling_list,
                                    group_entry) {
-                       perf_remove_from_context(sibling, false);
+                       perf_remove_from_context(sibling, 0);
                        put_ctx(gctx);
                }
 
@@ -8479,6 +8474,8 @@ SYSCALL_DEFINE5(perf_event_open,
        perf_event__header_size(event);
        perf_event__id_header_size(event);
 
+       event->owner = current;
+
        perf_install_in_context(ctx, event, event->cpu);
        perf_unpin_context(ctx);
 
@@ -8488,8 +8485,6 @@ SYSCALL_DEFINE5(perf_event_open,
 
        put_online_cpus();
 
-       event->owner = current;
-
        mutex_lock(&current->perf_event_mutex);
        list_add_tail(&event->owner_entry, &current->perf_event_list);
        mutex_unlock(&current->perf_event_mutex);
@@ -8556,7 +8551,7 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
        }
 
        /* Mark owner so we could distinguish it from user events. */
-       event->owner = EVENT_OWNER_KERNEL;
+       event->owner = TASK_TOMBSTONE;
 
        account_event(event);
 
@@ -8606,7 +8601,7 @@ void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu)
        mutex_lock_double(&src_ctx->mutex, &dst_ctx->mutex);
        list_for_each_entry_safe(event, tmp, &src_ctx->event_list,
                                 event_entry) {
-               perf_remove_from_context(event, false);
+               perf_remove_from_context(event, 0);
                unaccount_event_cpu(event, src_cpu);
                put_ctx(src_ctx);
                list_add(&event->migrate_entry, &events);
@@ -8673,33 +8668,15 @@ static void sync_child_event(struct perf_event *child_event,
                     &parent_event->child_total_time_enabled);
        atomic64_add(child_event->total_time_running,
                     &parent_event->child_total_time_running);
-
-       /*
-        * Remove this event from the parent's list
-        */
-       WARN_ON_ONCE(parent_event->ctx->parent_ctx);
-       mutex_lock(&parent_event->child_mutex);
-       list_del_init(&child_event->child_list);
-       mutex_unlock(&parent_event->child_mutex);
-
-       /*
-        * Make sure user/parent get notified, that we just
-        * lost one event.
-        */
-       perf_event_wakeup(parent_event);
-
-       /*
-        * Release the parent event, if this was the last
-        * reference to it.
-        */
-       put_event(parent_event);
 }
 
 static void
-__perf_event_exit_task(struct perf_event *child_event,
-                        struct perf_event_context *child_ctx,
-                        struct task_struct *child)
+perf_event_exit_event(struct perf_event *child_event,
+                     struct perf_event_context *child_ctx,
+                     struct task_struct *child)
 {
+       struct perf_event *parent_event = child_event->parent;
+
        /*
         * Do not destroy the 'original' grouping; because of the context
         * switch optimization the original events could've ended up in a
@@ -8712,57 +8689,86 @@ __perf_event_exit_task(struct perf_event *child_event,
         * Do destroy all inherited groups, we don't care about those
         * and being thorough is better.
         */
-       perf_remove_from_context(child_event, !!child_event->parent);
+       raw_spin_lock_irq(&child_ctx->lock);
+       WARN_ON_ONCE(child_ctx->is_active);
+
+       if (parent_event)
+               perf_group_detach(child_event);
+       list_del_event(child_event, child_ctx);
+       child_event->state = PERF_EVENT_STATE_EXIT; /* see perf_event_release_kernel() */
+       raw_spin_unlock_irq(&child_ctx->lock);
 
        /*
-        * It can happen that the parent exits first, and has events
-        * that are still around due to the child reference. These
-        * events need to be zapped.
+        * Parent events are governed by their filedesc, retain them.
         */
-       if (child_event->parent) {
-               sync_child_event(child_event, child);
-               free_event(child_event);
-       } else {
-               child_event->state = PERF_EVENT_STATE_EXIT;
+       if (!parent_event) {
                perf_event_wakeup(child_event);
+               return;
        }
+       /*
+        * Child events can be cleaned up.
+        */
+
+       sync_child_event(child_event, child);
+
+       /*
+        * Remove this event from the parent's list
+        */
+       WARN_ON_ONCE(parent_event->ctx->parent_ctx);
+       mutex_lock(&parent_event->child_mutex);
+       list_del_init(&child_event->child_list);
+       mutex_unlock(&parent_event->child_mutex);
+
+       /*
+        * Kick perf_poll() for is_event_hup().
+        */
+       perf_event_wakeup(parent_event);
+       free_event(child_event);
+       put_event(parent_event);
 }
 
 static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
 {
-       struct perf_event *child_event, *next;
        struct perf_event_context *child_ctx, *clone_ctx = NULL;
-       unsigned long flags;
+       struct perf_event *child_event, *next;
+
+       WARN_ON_ONCE(child != current);
 
-       if (likely(!child->perf_event_ctxp[ctxn]))
+       child_ctx = perf_pin_task_context(child, ctxn);
+       if (!child_ctx)
                return;
 
-       local_irq_save(flags);
        /*
-        * We can't reschedule here because interrupts are disabled,
-        * and either child is current or it is a task that can't be
-        * scheduled, so we are now safe from rescheduling changing
-        * our context.
+        * In order to reduce the amount of tricky in ctx tear-down, we hold
+        * ctx::mutex over the entire thing. This serializes against almost
+        * everything that wants to access the ctx.
+        *
+        * The exception is sys_perf_event_open() /
+        * perf_event_create_kernel_count() which does find_get_context()
+        * without ctx::mutex (it cannot because of the move_group double mutex
+        * lock thing). See the comments in perf_install_in_context().
         */
-       child_ctx = rcu_dereference_raw(child->perf_event_ctxp[ctxn]);
+       mutex_lock(&child_ctx->mutex);
 
        /*
-        * Take the context lock here so that if find_get_context is
-        * reading child->perf_event_ctxp, we wait until it has
-        * incremented the context's refcount before we do put_ctx below.
+        * In a single ctx::lock section, de-schedule the events and detach the
+        * context from the task such that we cannot ever get it scheduled back
+        * in.
         */
-       raw_spin_lock(&child_ctx->lock);
-       task_ctx_sched_out(child_ctx);
-       child->perf_event_ctxp[ctxn] = NULL;
+       raw_spin_lock_irq(&child_ctx->lock);
+       task_ctx_sched_out(__get_cpu_context(child_ctx), child_ctx);
 
        /*
-        * If this context is a clone; unclone it so it can't get
-        * swapped to another process while we're removing all
-        * the events from it.
+        * Now that the context is inactive, destroy the task <-> ctx relation
+        * and mark the context dead.
         */
+       RCU_INIT_POINTER(child->perf_event_ctxp[ctxn], NULL);
+       put_ctx(child_ctx); /* cannot be last */
+       WRITE_ONCE(child_ctx->task, TASK_TOMBSTONE);
+       put_task_struct(current); /* cannot be last */
+
        clone_ctx = unclone_ctx(child_ctx);
-       update_context_time(child_ctx);
-       raw_spin_unlock_irqrestore(&child_ctx->lock, flags);
+       raw_spin_unlock_irq(&child_ctx->lock);
 
        if (clone_ctx)
                put_ctx(clone_ctx);
@@ -8774,20 +8780,8 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
         */
        perf_event_task(child, child_ctx, 0);
 
-       /*
-        * We can recurse on the same lock type through:
-        *
-        *   __perf_event_exit_task()
-        *     sync_child_event()
-        *       put_event()
-        *         mutex_lock(&ctx->mutex)
-        *
-        * But since its the parent context it won't be the same instance.
-        */
-       mutex_lock(&child_ctx->mutex);
-
        list_for_each_entry_safe(child_event, next, &child_ctx->event_list, event_entry)
-               __perf_event_exit_task(child_event, child_ctx, child);
+               perf_event_exit_event(child_event, child_ctx, child);
 
        mutex_unlock(&child_ctx->mutex);
 
@@ -8812,8 +8806,7 @@ void perf_event_exit_task(struct task_struct *child)
                 * the owner, closes a race against perf_release() where
                 * we need to serialize on the owner->perf_event_mutex.
                 */
-               smp_wmb();
-               event->owner = NULL;
+               smp_store_release(&event->owner, NULL);
        }
        mutex_unlock(&child->perf_event_mutex);
 
@@ -8896,21 +8889,20 @@ void perf_event_delayed_put(struct task_struct *task)
                WARN_ON_ONCE(task->perf_event_ctxp[ctxn]);
 }
 
-struct perf_event *perf_event_get(unsigned int fd)
+struct file *perf_event_get(unsigned int fd)
 {
-       int err;
-       struct fd f;
-       struct perf_event *event;
+       struct file *file;
 
-       err = perf_fget_light(fd, &f);
-       if (err)
-               return ERR_PTR(err);
+       file = fget_raw(fd);
+       if (!file)
+               return ERR_PTR(-EBADF);
 
-       event = f.file->private_data;
-       atomic_long_inc(&event->refcount);
-       fdput(f);
+       if (file->f_op != &perf_fops) {
+               fput(file);
+               return ERR_PTR(-EBADF);
+       }
 
-       return event;
+       return file;
 }
 
 const struct perf_event_attr *perf_event_attrs(struct perf_event *event)
@@ -8953,8 +8945,16 @@ inherit_event(struct perf_event *parent_event,
        if (IS_ERR(child_event))
                return child_event;
 
+       /*
+        * is_orphaned_event() and list_add_tail(&parent_event->child_list)
+        * must be under the same lock in order to serialize against
+        * perf_event_release_kernel(), such that either we must observe
+        * is_orphaned_event() or they will observe us on the child_list.
+        */
+       mutex_lock(&parent_event->child_mutex);
        if (is_orphaned_event(parent_event) ||
            !atomic_long_inc_not_zero(&parent_event->refcount)) {
+               mutex_unlock(&parent_event->child_mutex);
                free_event(child_event);
                return NULL;
        }
@@ -9002,8 +9002,6 @@ inherit_event(struct perf_event *parent_event,
        /*
         * Link this into the parent event's child list
         */
-       WARN_ON_ONCE(parent_event->ctx->parent_ctx);
-       mutex_lock(&parent_event->child_mutex);
        list_add_tail(&child_event->child_list, &parent_event->child_list);
        mutex_unlock(&parent_event->child_mutex);
 
@@ -9221,13 +9219,14 @@ static void perf_event_init_cpu(int cpu)
 #if defined CONFIG_HOTPLUG_CPU || defined CONFIG_KEXEC_CORE
 static void __perf_event_exit_context(void *__info)
 {
-       struct remove_event re = { .detach_group = true };
        struct perf_event_context *ctx = __info;
+       struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
+       struct perf_event *event;
 
-       rcu_read_lock();
-       list_for_each_entry_rcu(re.event, &ctx->event_list, event_entry)
-               __perf_remove_from_context(&re);
-       rcu_read_unlock();
+       raw_spin_lock(&ctx->lock);
+       list_for_each_entry(event, &ctx->event_list, event_entry)
+               __perf_remove_from_context(event, cpuctx, ctx, (void *)DETACH_GROUP);
+       raw_spin_unlock(&ctx->lock);
 }
 
 static void perf_event_exit_cpu_context(int cpu)
index 92ce5f4..3f8cb1e 100644 (file)
@@ -444,7 +444,7 @@ int modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *att
         * current task.
         */
        if (irqs_disabled() && bp->ctx && bp->ctx->task == current)
-               __perf_event_disable(bp);
+               perf_event_disable_local(bp);
        else
                perf_event_disable(bp);
 
index adfdc05..1faad2c 100644 (file)
@@ -459,6 +459,25 @@ static void rb_free_aux_page(struct ring_buffer *rb, int idx)
        __free_page(page);
 }
 
+static void __rb_free_aux(struct ring_buffer *rb)
+{
+       int pg;
+
+       if (rb->aux_priv) {
+               rb->free_aux(rb->aux_priv);
+               rb->free_aux = NULL;
+               rb->aux_priv = NULL;
+       }
+
+       if (rb->aux_nr_pages) {
+               for (pg = 0; pg < rb->aux_nr_pages; pg++)
+                       rb_free_aux_page(rb, pg);
+
+               kfree(rb->aux_pages);
+               rb->aux_nr_pages = 0;
+       }
+}
+
 int rb_alloc_aux(struct ring_buffer *rb, struct perf_event *event,
                 pgoff_t pgoff, int nr_pages, long watermark, int flags)
 {
@@ -547,30 +566,11 @@ out:
        if (!ret)
                rb->aux_pgoff = pgoff;
        else
-               rb_free_aux(rb);
+               __rb_free_aux(rb);
 
        return ret;
 }
 
-static void __rb_free_aux(struct ring_buffer *rb)
-{
-       int pg;
-
-       if (rb->aux_priv) {
-               rb->free_aux(rb->aux_priv);
-               rb->free_aux = NULL;
-               rb->aux_priv = NULL;
-       }
-
-       if (rb->aux_nr_pages) {
-               for (pg = 0; pg < rb->aux_nr_pages; pg++)
-                       rb_free_aux_page(rb, pg);
-
-               kfree(rb->aux_pages);
-               rb->aux_nr_pages = 0;
-       }
-}
-
 void rb_free_aux(struct ring_buffer *rb)
 {
        if (atomic_dec_and_test(&rb->aux_refcount))
index 0773f2b..5d6ce64 100644 (file)
@@ -1191,7 +1191,7 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this,
        if (pi_state->owner != current)
                return -EINVAL;
 
-       raw_spin_lock(&pi_state->pi_mutex.wait_lock);
+       raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
        new_owner = rt_mutex_next_owner(&pi_state->pi_mutex);
 
        /*
@@ -1217,22 +1217,22 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this,
        else if (curval != uval)
                ret = -EINVAL;
        if (ret) {
-               raw_spin_unlock(&pi_state->pi_mutex.wait_lock);
+               raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
                return ret;
        }
 
-       raw_spin_lock_irq(&pi_state->owner->pi_lock);
+       raw_spin_lock(&pi_state->owner->pi_lock);
        WARN_ON(list_empty(&pi_state->list));
        list_del_init(&pi_state->list);
-       raw_spin_unlock_irq(&pi_state->owner->pi_lock);
+       raw_spin_unlock(&pi_state->owner->pi_lock);
 
-       raw_spin_lock_irq(&new_owner->pi_lock);
+       raw_spin_lock(&new_owner->pi_lock);
        WARN_ON(!list_empty(&pi_state->list));
        list_add(&pi_state->list, &new_owner->pi_state_list);
        pi_state->owner = new_owner;
-       raw_spin_unlock_irq(&new_owner->pi_lock);
+       raw_spin_unlock(&new_owner->pi_lock);
 
-       raw_spin_unlock(&pi_state->pi_mutex.wait_lock);
+       raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
 
        deboost = rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q);
 
@@ -2127,11 +2127,11 @@ static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
                 * we returned due to timeout or signal without taking the
                 * rt_mutex. Too late.
                 */
-               raw_spin_lock(&q->pi_state->pi_mutex.wait_lock);
+               raw_spin_lock_irq(&q->pi_state->pi_mutex.wait_lock);
                owner = rt_mutex_owner(&q->pi_state->pi_mutex);
                if (!owner)
                        owner = rt_mutex_next_owner(&q->pi_state->pi_mutex);
-               raw_spin_unlock(&q->pi_state->pi_mutex.wait_lock);
+               raw_spin_unlock_irq(&q->pi_state->pi_mutex.wait_lock);
                ret = fixup_pi_state_owner(uaddr, q, owner);
                goto out;
        }
index a302cf9..57bff78 100644 (file)
@@ -138,7 +138,8 @@ irqreturn_t handle_irq_event_percpu(struct irq_desc *desc)
        unsigned int flags = 0, irq = desc->irq_data.irq;
        struct irqaction *action = desc->action;
 
-       do {
+       /* action might have become NULL since we dropped the lock */
+       while (action) {
                irqreturn_t res;
 
                trace_irq_handler_entry(irq, action);
@@ -173,7 +174,7 @@ irqreturn_t handle_irq_event_percpu(struct irq_desc *desc)
 
                retval |= res;
                action = action->next;
-       } while (action);
+       }
 
        add_interrupt_randomness(irq, flags);
 
index 6e655f7..3e56d2f 100644 (file)
@@ -575,10 +575,15 @@ unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec)
        unsigned int type = IRQ_TYPE_NONE;
        int virq;
 
-       if (fwspec->fwnode)
-               domain = irq_find_matching_fwnode(fwspec->fwnode, DOMAIN_BUS_ANY);
-       else
+       if (fwspec->fwnode) {
+               domain = irq_find_matching_fwnode(fwspec->fwnode,
+                                                 DOMAIN_BUS_WIRED);
+               if (!domain)
+                       domain = irq_find_matching_fwnode(fwspec->fwnode,
+                                                         DOMAIN_BUS_ANY);
+       } else {
                domain = irq_default_domain;
+       }
 
        if (!domain) {
                pr_warn("no irq domain found for %s !\n",
index 8251e75..3e74660 100644 (file)
@@ -99,13 +99,14 @@ static inline void mark_rt_mutex_waiters(struct rt_mutex *lock)
  * 2) Drop lock->wait_lock
  * 3) Try to unlock the lock with cmpxchg
  */
-static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock)
+static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock,
+                                       unsigned long flags)
        __releases(lock->wait_lock)
 {
        struct task_struct *owner = rt_mutex_owner(lock);
 
        clear_rt_mutex_waiters(lock);
-       raw_spin_unlock(&lock->wait_lock);
+       raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
        /*
         * If a new waiter comes in between the unlock and the cmpxchg
         * we have two situations:
@@ -147,11 +148,12 @@ static inline void mark_rt_mutex_waiters(struct rt_mutex *lock)
 /*
  * Simple slow path only version: lock->owner is protected by lock->wait_lock.
  */
-static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock)
+static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock,
+                                       unsigned long flags)
        __releases(lock->wait_lock)
 {
        lock->owner = NULL;
-       raw_spin_unlock(&lock->wait_lock);
+       raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
        return true;
 }
 #endif
@@ -433,7 +435,6 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
        int ret = 0, depth = 0;
        struct rt_mutex *lock;
        bool detect_deadlock;
-       unsigned long flags;
        bool requeue = true;
 
        detect_deadlock = rt_mutex_cond_detect_deadlock(orig_waiter, chwalk);
@@ -476,7 +477,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
        /*
         * [1] Task cannot go away as we did a get_task() before !
         */
-       raw_spin_lock_irqsave(&task->pi_lock, flags);
+       raw_spin_lock_irq(&task->pi_lock);
 
        /*
         * [2] Get the waiter on which @task is blocked on.
@@ -560,7 +561,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
         * operations.
         */
        if (!raw_spin_trylock(&lock->wait_lock)) {
-               raw_spin_unlock_irqrestore(&task->pi_lock, flags);
+               raw_spin_unlock_irq(&task->pi_lock);
                cpu_relax();
                goto retry;
        }
@@ -591,7 +592,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
                /*
                 * No requeue[7] here. Just release @task [8]
                 */
-               raw_spin_unlock_irqrestore(&task->pi_lock, flags);
+               raw_spin_unlock(&task->pi_lock);
                put_task_struct(task);
 
                /*
@@ -599,14 +600,14 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
                 * If there is no owner of the lock, end of chain.
                 */
                if (!rt_mutex_owner(lock)) {
-                       raw_spin_unlock(&lock->wait_lock);
+                       raw_spin_unlock_irq(&lock->wait_lock);
                        return 0;
                }
 
                /* [10] Grab the next task, i.e. owner of @lock */
                task = rt_mutex_owner(lock);
                get_task_struct(task);
-               raw_spin_lock_irqsave(&task->pi_lock, flags);
+               raw_spin_lock(&task->pi_lock);
 
                /*
                 * No requeue [11] here. We just do deadlock detection.
@@ -621,8 +622,8 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
                top_waiter = rt_mutex_top_waiter(lock);
 
                /* [13] Drop locks */
-               raw_spin_unlock_irqrestore(&task->pi_lock, flags);
-               raw_spin_unlock(&lock->wait_lock);
+               raw_spin_unlock(&task->pi_lock);
+               raw_spin_unlock_irq(&lock->wait_lock);
 
                /* If owner is not blocked, end of chain. */
                if (!next_lock)
@@ -643,7 +644,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
        rt_mutex_enqueue(lock, waiter);
 
        /* [8] Release the task */
-       raw_spin_unlock_irqrestore(&task->pi_lock, flags);
+       raw_spin_unlock(&task->pi_lock);
        put_task_struct(task);
 
        /*
@@ -661,14 +662,14 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
                 */
                if (prerequeue_top_waiter != rt_mutex_top_waiter(lock))
                        wake_up_process(rt_mutex_top_waiter(lock)->task);
-               raw_spin_unlock(&lock->wait_lock);
+               raw_spin_unlock_irq(&lock->wait_lock);
                return 0;
        }
 
        /* [10] Grab the next task, i.e. the owner of @lock */
        task = rt_mutex_owner(lock);
        get_task_struct(task);
-       raw_spin_lock_irqsave(&task->pi_lock, flags);
+       raw_spin_lock(&task->pi_lock);
 
        /* [11] requeue the pi waiters if necessary */
        if (waiter == rt_mutex_top_waiter(lock)) {
@@ -722,8 +723,8 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
        top_waiter = rt_mutex_top_waiter(lock);
 
        /* [13] Drop the locks */
-       raw_spin_unlock_irqrestore(&task->pi_lock, flags);
-       raw_spin_unlock(&lock->wait_lock);
+       raw_spin_unlock(&task->pi_lock);
+       raw_spin_unlock_irq(&lock->wait_lock);
 
        /*
         * Make the actual exit decisions [12], based on the stored
@@ -746,7 +747,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
        goto again;
 
  out_unlock_pi:
-       raw_spin_unlock_irqrestore(&task->pi_lock, flags);
+       raw_spin_unlock_irq(&task->pi_lock);
  out_put_task:
        put_task_struct(task);
 
@@ -756,7 +757,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
 /*
  * Try to take an rt-mutex
  *
- * Must be called with lock->wait_lock held.
+ * Must be called with lock->wait_lock held and interrupts disabled
  *
  * @lock:   The lock to be acquired.
  * @task:   The task which wants to acquire the lock
@@ -766,8 +767,6 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
 static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
                                struct rt_mutex_waiter *waiter)
 {
-       unsigned long flags;
-
        /*
         * Before testing whether we can acquire @lock, we set the
         * RT_MUTEX_HAS_WAITERS bit in @lock->owner. This forces all
@@ -852,7 +851,7 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
         * case, but conditionals are more expensive than a redundant
         * store.
         */
-       raw_spin_lock_irqsave(&task->pi_lock, flags);
+       raw_spin_lock(&task->pi_lock);
        task->pi_blocked_on = NULL;
        /*
         * Finish the lock acquisition. @task is the new owner. If
@@ -861,7 +860,7 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
         */
        if (rt_mutex_has_waiters(lock))
                rt_mutex_enqueue_pi(task, rt_mutex_top_waiter(lock));
-       raw_spin_unlock_irqrestore(&task->pi_lock, flags);
+       raw_spin_unlock(&task->pi_lock);
 
 takeit:
        /* We got the lock. */
@@ -883,7 +882,7 @@ takeit:
  *
  * Prepare waiter and propagate pi chain
  *
- * This must be called with lock->wait_lock held.
+ * This must be called with lock->wait_lock held and interrupts disabled
  */
 static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
                                   struct rt_mutex_waiter *waiter,
@@ -894,7 +893,6 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
        struct rt_mutex_waiter *top_waiter = waiter;
        struct rt_mutex *next_lock;
        int chain_walk = 0, res;
-       unsigned long flags;
 
        /*
         * Early deadlock detection. We really don't want the task to
@@ -908,7 +906,7 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
        if (owner == task)
                return -EDEADLK;
 
-       raw_spin_lock_irqsave(&task->pi_lock, flags);
+       raw_spin_lock(&task->pi_lock);
        __rt_mutex_adjust_prio(task);
        waiter->task = task;
        waiter->lock = lock;
@@ -921,12 +919,12 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
 
        task->pi_blocked_on = waiter;
 
-       raw_spin_unlock_irqrestore(&task->pi_lock, flags);
+       raw_spin_unlock(&task->pi_lock);
 
        if (!owner)
                return 0;
 
-       raw_spin_lock_irqsave(&owner->pi_lock, flags);
+       raw_spin_lock(&owner->pi_lock);
        if (waiter == rt_mutex_top_waiter(lock)) {
                rt_mutex_dequeue_pi(owner, top_waiter);
                rt_mutex_enqueue_pi(owner, waiter);
@@ -941,7 +939,7 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
        /* Store the lock on which owner is blocked or NULL */
        next_lock = task_blocked_on_lock(owner);
 
-       raw_spin_unlock_irqrestore(&owner->pi_lock, flags);
+       raw_spin_unlock(&owner->pi_lock);
        /*
         * Even if full deadlock detection is on, if the owner is not
         * blocked itself, we can avoid finding this out in the chain
@@ -957,12 +955,12 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
         */
        get_task_struct(owner);
 
-       raw_spin_unlock(&lock->wait_lock);
+       raw_spin_unlock_irq(&lock->wait_lock);
 
        res = rt_mutex_adjust_prio_chain(owner, chwalk, lock,
                                         next_lock, waiter, task);
 
-       raw_spin_lock(&lock->wait_lock);
+       raw_spin_lock_irq(&lock->wait_lock);
 
        return res;
 }
@@ -971,15 +969,14 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
  * Remove the top waiter from the current tasks pi waiter tree and
  * queue it up.
  *
- * Called with lock->wait_lock held.
+ * Called with lock->wait_lock held and interrupts disabled.
  */
 static void mark_wakeup_next_waiter(struct wake_q_head *wake_q,
                                    struct rt_mutex *lock)
 {
        struct rt_mutex_waiter *waiter;
-       unsigned long flags;
 
-       raw_spin_lock_irqsave(&current->pi_lock, flags);
+       raw_spin_lock(&current->pi_lock);
 
        waiter = rt_mutex_top_waiter(lock);
 
@@ -1001,7 +998,7 @@ static void mark_wakeup_next_waiter(struct wake_q_head *wake_q,
         */
        lock->owner = (void *) RT_MUTEX_HAS_WAITERS;
 
-       raw_spin_unlock_irqrestore(&current->pi_lock, flags);
+       raw_spin_unlock(&current->pi_lock);
 
        wake_q_add(wake_q, waiter->task);
 }
@@ -1009,7 +1006,7 @@ static void mark_wakeup_next_waiter(struct wake_q_head *wake_q,
 /*
  * Remove a waiter from a lock and give up
  *
- * Must be called with lock->wait_lock held and
+ * Must be called with lock->wait_lock held and interrupts disabled. I must
  * have just failed to try_to_take_rt_mutex().
  */
 static void remove_waiter(struct rt_mutex *lock,
@@ -1018,12 +1015,11 @@ static void remove_waiter(struct rt_mutex *lock,
        bool is_top_waiter = (waiter == rt_mutex_top_waiter(lock));
        struct task_struct *owner = rt_mutex_owner(lock);
        struct rt_mutex *next_lock;
-       unsigned long flags;
 
-       raw_spin_lock_irqsave(&current->pi_lock, flags);
+       raw_spin_lock(&current->pi_lock);
        rt_mutex_dequeue(lock, waiter);
        current->pi_blocked_on = NULL;
-       raw_spin_unlock_irqrestore(&current->pi_lock, flags);
+       raw_spin_unlock(&current->pi_lock);
 
        /*
         * Only update priority if the waiter was the highest priority
@@ -1032,7 +1028,7 @@ static void remove_waiter(struct rt_mutex *lock,
        if (!owner || !is_top_waiter)
                return;
 
-       raw_spin_lock_irqsave(&owner->pi_lock, flags);
+       raw_spin_lock(&owner->pi_lock);
 
        rt_mutex_dequeue_pi(owner, waiter);
 
@@ -1044,7 +1040,7 @@ static void remove_waiter(struct rt_mutex *lock,
        /* Store the lock on which owner is blocked or NULL */
        next_lock = task_blocked_on_lock(owner);
 
-       raw_spin_unlock_irqrestore(&owner->pi_lock, flags);
+       raw_spin_unlock(&owner->pi_lock);
 
        /*
         * Don't walk the chain, if the owner task is not blocked
@@ -1056,12 +1052,12 @@ static void remove_waiter(struct rt_mutex *lock,
        /* gets dropped in rt_mutex_adjust_prio_chain()! */
        get_task_struct(owner);
 
-       raw_spin_unlock(&lock->wait_lock);
+       raw_spin_unlock_irq(&lock->wait_lock);
 
        rt_mutex_adjust_prio_chain(owner, RT_MUTEX_MIN_CHAINWALK, lock,
                                   next_lock, NULL, current);
 
-       raw_spin_lock(&lock->wait_lock);
+       raw_spin_lock_irq(&lock->wait_lock);
 }
 
 /*
@@ -1097,11 +1093,11 @@ void rt_mutex_adjust_pi(struct task_struct *task)
  * __rt_mutex_slowlock() - Perform the wait-wake-try-to-take loop
  * @lock:               the rt_mutex to take
  * @state:              the state the task should block in (TASK_INTERRUPTIBLE
- *                      or TASK_UNINTERRUPTIBLE)
+ *                      or TASK_UNINTERRUPTIBLE)
  * @timeout:            the pre-initialized and started timer, or NULL for none
  * @waiter:             the pre-initialized rt_mutex_waiter
  *
- * lock->wait_lock must be held by the caller.
+ * Must be called with lock->wait_lock held and interrupts disabled
  */
 static int __sched
 __rt_mutex_slowlock(struct rt_mutex *lock, int state,
@@ -1129,13 +1125,13 @@ __rt_mutex_slowlock(struct rt_mutex *lock, int state,
                                break;
                }
 
-               raw_spin_unlock(&lock->wait_lock);
+               raw_spin_unlock_irq(&lock->wait_lock);
 
                debug_rt_mutex_print_deadlock(waiter);
 
                schedule();
 
-               raw_spin_lock(&lock->wait_lock);
+               raw_spin_lock_irq(&lock->wait_lock);
                set_current_state(state);
        }
 
@@ -1172,17 +1168,26 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
                  enum rtmutex_chainwalk chwalk)
 {
        struct rt_mutex_waiter waiter;
+       unsigned long flags;
        int ret = 0;
 
        debug_rt_mutex_init_waiter(&waiter);
        RB_CLEAR_NODE(&waiter.pi_tree_entry);
        RB_CLEAR_NODE(&waiter.tree_entry);
 
-       raw_spin_lock(&lock->wait_lock);
+       /*
+        * Technically we could use raw_spin_[un]lock_irq() here, but this can
+        * be called in early boot if the cmpxchg() fast path is disabled
+        * (debug, no architecture support). In this case we will acquire the
+        * rtmutex with lock->wait_lock held. But we cannot unconditionally
+        * enable interrupts in that early boot case. So we need to use the
+        * irqsave/restore variants.
+        */
+       raw_spin_lock_irqsave(&lock->wait_lock, flags);
 
        /* Try to acquire the lock again: */
        if (try_to_take_rt_mutex(lock, current, NULL)) {
-               raw_spin_unlock(&lock->wait_lock);
+               raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
                return 0;
        }
 
@@ -1211,7 +1216,7 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
         */
        fixup_rt_mutex_waiters(lock);
 
-       raw_spin_unlock(&lock->wait_lock);
+       raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
 
        /* Remove pending timer: */
        if (unlikely(timeout))
@@ -1227,6 +1232,7 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
  */
 static inline int rt_mutex_slowtrylock(struct rt_mutex *lock)
 {
+       unsigned long flags;
        int ret;
 
        /*
@@ -1238,10 +1244,10 @@ static inline int rt_mutex_slowtrylock(struct rt_mutex *lock)
                return 0;
 
        /*
-        * The mutex has currently no owner. Lock the wait lock and
-        * try to acquire the lock.
+        * The mutex has currently no owner. Lock the wait lock and try to
+        * acquire the lock. We use irqsave here to support early boot calls.
         */
-       raw_spin_lock(&lock->wait_lock);
+       raw_spin_lock_irqsave(&lock->wait_lock, flags);
 
        ret = try_to_take_rt_mutex(lock, current, NULL);
 
@@ -1251,7 +1257,7 @@ static inline int rt_mutex_slowtrylock(struct rt_mutex *lock)
         */
        fixup_rt_mutex_waiters(lock);
 
-       raw_spin_unlock(&lock->wait_lock);
+       raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
 
        return ret;
 }
@@ -1263,7 +1269,10 @@ static inline int rt_mutex_slowtrylock(struct rt_mutex *lock)
 static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock,
                                        struct wake_q_head *wake_q)
 {
-       raw_spin_lock(&lock->wait_lock);
+       unsigned long flags;
+
+       /* irqsave required to support early boot calls */
+       raw_spin_lock_irqsave(&lock->wait_lock, flags);
 
        debug_rt_mutex_unlock(lock);
 
@@ -1302,10 +1311,10 @@ static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock,
         */
        while (!rt_mutex_has_waiters(lock)) {
                /* Drops lock->wait_lock ! */
-               if (unlock_rt_mutex_safe(lock) == true)
+               if (unlock_rt_mutex_safe(lock, flags) == true)
                        return false;
                /* Relock the rtmutex and try again */
-               raw_spin_lock(&lock->wait_lock);
+               raw_spin_lock_irqsave(&lock->wait_lock, flags);
        }
 
        /*
@@ -1316,7 +1325,7 @@ static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock,
         */
        mark_wakeup_next_waiter(wake_q, lock);
 
-       raw_spin_unlock(&lock->wait_lock);
+       raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
 
        /* check PI boosting */
        return true;
@@ -1596,10 +1605,10 @@ int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
 {
        int ret;
 
-       raw_spin_lock(&lock->wait_lock);
+       raw_spin_lock_irq(&lock->wait_lock);
 
        if (try_to_take_rt_mutex(lock, task, NULL)) {
-               raw_spin_unlock(&lock->wait_lock);
+               raw_spin_unlock_irq(&lock->wait_lock);
                return 1;
        }
 
@@ -1620,7 +1629,7 @@ int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
        if (unlikely(ret))
                remove_waiter(lock, waiter);
 
-       raw_spin_unlock(&lock->wait_lock);
+       raw_spin_unlock_irq(&lock->wait_lock);
 
        debug_rt_mutex_print_deadlock(waiter);
 
@@ -1668,7 +1677,7 @@ int rt_mutex_finish_proxy_lock(struct rt_mutex *lock,
 {
        int ret;
 
-       raw_spin_lock(&lock->wait_lock);
+       raw_spin_lock_irq(&lock->wait_lock);
 
        set_current_state(TASK_INTERRUPTIBLE);
 
@@ -1684,7 +1693,7 @@ int rt_mutex_finish_proxy_lock(struct rt_mutex *lock,
         */
        fixup_rt_mutex_waiters(lock);
 
-       raw_spin_unlock(&lock->wait_lock);
+       raw_spin_unlock_irq(&lock->wait_lock);
 
        return ret;
 }
index f4ad91b..4d73a83 100644 (file)
@@ -588,7 +588,7 @@ void __init pidhash_init(void)
 
 void __init pidmap_init(void)
 {
-       /* Veryify no one has done anything silly */
+       /* Verify no one has done anything silly: */
        BUILD_BUG_ON(PID_MAX_LIMIT >= PIDNS_HASH_ADDING);
 
        /* bump default and minimum pid_max based on number of cpus */
index 02e8dfa..68d3ebc 100644 (file)
@@ -235,7 +235,7 @@ config PM_TRACE_RTC
 
 config APM_EMULATION
        tristate "Advanced Power Management Emulation"
-       depends on PM && SYS_SUPPORTS_APM_EMULATION
+       depends on SYS_SUPPORTS_APM_EMULATION
        help
          APM is a BIOS specification for saving power using several different
          techniques. This is mostly useful for battery powered laptops with
index 63d3a24..9503d59 100644 (file)
@@ -6840,7 +6840,7 @@ static void sched_init_numa(void)
 
                        sched_domains_numa_masks[i][j] = mask;
 
-                       for (k = 0; k < nr_node_ids; k++) {
+                       for_each_node(k) {
                                if (node_distance(j, k) > sched_domains_numa_distance[i])
                                        continue;
 
index 1926606..56b7d4b 100644 (file)
@@ -1220,8 +1220,6 @@ static void task_numa_assign(struct task_numa_env *env,
 {
        if (env->best_task)
                put_task_struct(env->best_task);
-       if (p)
-               get_task_struct(p);
 
        env->best_task = p;
        env->best_imp = imp;
@@ -1289,20 +1287,30 @@ static void task_numa_compare(struct task_numa_env *env,
        long imp = env->p->numa_group ? groupimp : taskimp;
        long moveimp = imp;
        int dist = env->dist;
+       bool assigned = false;
 
        rcu_read_lock();
 
        raw_spin_lock_irq(&dst_rq->lock);
        cur = dst_rq->curr;
        /*
-        * No need to move the exiting task, and this ensures that ->curr
-        * wasn't reaped and thus get_task_struct() in task_numa_assign()
-        * is safe under RCU read lock.
-        * Note that rcu_read_lock() itself can't protect from the final
-        * put_task_struct() after the last schedule().
+        * No need to move the exiting task or idle task.
         */
        if ((cur->flags & PF_EXITING) || is_idle_task(cur))
                cur = NULL;
+       else {
+               /*
+                * The task_struct must be protected here to protect the
+                * p->numa_faults access in the task_weight since the
+                * numa_faults could already be freed in the following path:
+                * finish_task_switch()
+                *     --> put_task_struct()
+                *         --> __put_task_struct()
+                *             --> task_numa_free()
+                */
+               get_task_struct(cur);
+       }
+
        raw_spin_unlock_irq(&dst_rq->lock);
 
        /*
@@ -1386,6 +1394,7 @@ balance:
                 */
                if (!load_too_imbalanced(src_load, dst_load, env)) {
                        imp = moveimp - 1;
+                       put_task_struct(cur);
                        cur = NULL;
                        goto assign;
                }
@@ -1411,9 +1420,16 @@ balance:
                env->dst_cpu = select_idle_sibling(env->p, env->dst_cpu);
 
 assign:
+       assigned = true;
        task_numa_assign(env, cur, imp);
 unlock:
        rcu_read_unlock();
+       /*
+        * The dst_rq->curr isn't assigned. The protection for task_struct is
+        * finished.
+        */
+       if (cur && !assigned)
+               put_task_struct(cur);
 }
 
 static void task_numa_find_cpu(struct task_numa_env *env,
index de0e786..544a713 100644 (file)
@@ -162,7 +162,7 @@ static void cpuidle_idle_call(void)
         */
        if (idle_should_freeze()) {
                entered_state = cpuidle_enter_freeze(drv, dev);
-               if (entered_state >= 0) {
+               if (entered_state > 0) {
                        local_irq_enable();
                        goto exit_idle;
                }
index 580ac2d..15a1795 100644 (file)
@@ -316,24 +316,24 @@ static inline void seccomp_sync_threads(void)
                put_seccomp_filter(thread);
                smp_store_release(&thread->seccomp.filter,
                                  caller->seccomp.filter);
+
+               /*
+                * Don't let an unprivileged task work around
+                * the no_new_privs restriction by creating
+                * a thread that sets it up, enters seccomp,
+                * then dies.
+                */
+               if (task_no_new_privs(caller))
+                       task_set_no_new_privs(thread);
+
                /*
                 * Opt the other thread into seccomp if needed.
                 * As threads are considered to be trust-realm
                 * equivalent (see ptrace_may_access), it is safe to
                 * allow one thread to transition the other.
                 */
-               if (thread->seccomp.mode == SECCOMP_MODE_DISABLED) {
-                       /*
-                        * Don't let an unprivileged task work around
-                        * the no_new_privs restriction by creating
-                        * a thread that sets it up, enters seccomp,
-                        * then dies.
-                        */
-                       if (task_no_new_privs(caller))
-                               task_set_no_new_privs(thread);
-
+               if (thread->seccomp.mode == SECCOMP_MODE_DISABLED)
                        seccomp_assign_mode(thread, SECCOMP_MODE_FILTER);
-               }
        }
 }
 
index 435b885..fa909f9 100644 (file)
@@ -897,10 +897,10 @@ static int enqueue_hrtimer(struct hrtimer *timer,
  */
 static void __remove_hrtimer(struct hrtimer *timer,
                             struct hrtimer_clock_base *base,
-                            unsigned long newstate, int reprogram)
+                            u8 newstate, int reprogram)
 {
        struct hrtimer_cpu_base *cpu_base = base->cpu_base;
-       unsigned int state = timer->state;
+       u8 state = timer->state;
 
        timer->state = newstate;
        if (!(state & HRTIMER_STATE_ENQUEUED))
@@ -930,7 +930,7 @@ static inline int
 remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base, bool restart)
 {
        if (hrtimer_is_queued(timer)) {
-               unsigned long state = timer->state;
+               u8 state = timer->state;
                int reprogram;
 
                /*
@@ -954,6 +954,22 @@ remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base, bool rest
        return 0;
 }
 
+static inline ktime_t hrtimer_update_lowres(struct hrtimer *timer, ktime_t tim,
+                                           const enum hrtimer_mode mode)
+{
+#ifdef CONFIG_TIME_LOW_RES
+       /*
+        * CONFIG_TIME_LOW_RES indicates that the system has no way to return
+        * granular time values. For relative timers we add hrtimer_resolution
+        * (i.e. one jiffie) to prevent short timeouts.
+        */
+       timer->is_rel = mode & HRTIMER_MODE_REL;
+       if (timer->is_rel)
+               tim = ktime_add_safe(tim, ktime_set(0, hrtimer_resolution));
+#endif
+       return tim;
+}
+
 /**
  * hrtimer_start_range_ns - (re)start an hrtimer on the current CPU
  * @timer:     the timer to be added
@@ -974,19 +990,10 @@ void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
        /* Remove an active timer from the queue: */
        remove_hrtimer(timer, base, true);
 
-       if (mode & HRTIMER_MODE_REL) {
+       if (mode & HRTIMER_MODE_REL)
                tim = ktime_add_safe(tim, base->get_time());
-               /*
-                * CONFIG_TIME_LOW_RES is a temporary way for architectures
-                * to signal that they simply return xtime in
-                * do_gettimeoffset(). In this case we want to round up by
-                * resolution when starting a relative timer, to avoid short
-                * timeouts. This will go away with the GTOD framework.
-                */
-#ifdef CONFIG_TIME_LOW_RES
-               tim = ktime_add_safe(tim, ktime_set(0, hrtimer_resolution));
-#endif
-       }
+
+       tim = hrtimer_update_lowres(timer, tim, mode);
 
        hrtimer_set_expires_range_ns(timer, tim, delta_ns);
 
@@ -1074,19 +1081,23 @@ EXPORT_SYMBOL_GPL(hrtimer_cancel);
 /**
  * hrtimer_get_remaining - get remaining time for the timer
  * @timer:     the timer to read
+ * @adjust:    adjust relative timers when CONFIG_TIME_LOW_RES=y
  */
-ktime_t hrtimer_get_remaining(const struct hrtimer *timer)
+ktime_t __hrtimer_get_remaining(const struct hrtimer *timer, bool adjust)
 {
        unsigned long flags;
        ktime_t rem;
 
        lock_hrtimer_base(timer, &flags);
-       rem = hrtimer_expires_remaining(timer);
+       if (IS_ENABLED(CONFIG_TIME_LOW_RES) && adjust)
+               rem = hrtimer_expires_remaining_adjusted(timer);
+       else
+               rem = hrtimer_expires_remaining(timer);
        unlock_hrtimer_base(timer, &flags);
 
        return rem;
 }
-EXPORT_SYMBOL_GPL(hrtimer_get_remaining);
+EXPORT_SYMBOL_GPL(__hrtimer_get_remaining);
 
 #ifdef CONFIG_NO_HZ_COMMON
 /**
@@ -1219,6 +1230,14 @@ static void __run_hrtimer(struct hrtimer_cpu_base *cpu_base,
        timer_stats_account_hrtimer(timer);
        fn = timer->function;
 
+       /*
+        * Clear the 'is relative' flag for the TIME_LOW_RES case. If the
+        * timer is restarted with a period then it becomes an absolute
+        * timer. If its not restarted it does not matter.
+        */
+       if (IS_ENABLED(CONFIG_TIME_LOW_RES))
+               timer->is_rel = false;
+
        /*
         * Because we run timers from hardirq context, there is no chance
         * they get migrated to another cpu, therefore its safe to unlock
index 8d262b4..1d5c720 100644 (file)
@@ -26,7 +26,7 @@
  */
 static struct timeval itimer_get_remtime(struct hrtimer *timer)
 {
-       ktime_t rem = hrtimer_get_remaining(timer);
+       ktime_t rem = __hrtimer_get_remaining(timer, true);
 
        /*
         * Racy but safe: if the itimer expires after the above
index 36f2ca0..6df8927 100644 (file)
@@ -685,8 +685,18 @@ int ntp_validate_timex(struct timex *txc)
                if (!capable(CAP_SYS_TIME))
                        return -EPERM;
 
-               if (!timeval_inject_offset_valid(&txc->time))
-                       return -EINVAL;
+               if (txc->modes & ADJ_NANO) {
+                       struct timespec ts;
+
+                       ts.tv_sec = txc->time.tv_sec;
+                       ts.tv_nsec = txc->time.tv_usec;
+                       if (!timespec_inject_offset_valid(&ts))
+                               return -EINVAL;
+
+               } else {
+                       if (!timeval_inject_offset_valid(&txc->time))
+                               return -EINVAL;
+               }
        }
 
        /*
index 31d11ac..f2826c3 100644 (file)
@@ -760,7 +760,7 @@ common_timer_get(struct k_itimer *timr, struct itimerspec *cur_setting)
            (timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE))
                timr->it_overrun += (unsigned int) hrtimer_forward(timer, now, iv);
 
-       remaining = ktime_sub(hrtimer_get_expires(timer), now);
+       remaining = __hrtimer_expires_remaining_adjusted(timer, now);
        /* Return 0 only, when the timer is expired and not pending */
        if (remaining.tv64 <= 0) {
                /*
index 9d7a053..0b17424 100644 (file)
  */
 static DEFINE_PER_CPU(struct tick_sched, tick_cpu_sched);
 
-/*
- * The time, when the last jiffy update happened. Protected by jiffies_lock.
- */
-static ktime_t last_jiffies_update;
-
 struct tick_sched *tick_get_tick_sched(int cpu)
 {
        return &per_cpu(tick_cpu_sched, cpu);
 }
 
+#if defined(CONFIG_NO_HZ_COMMON) || defined(CONFIG_HIGH_RES_TIMERS)
+/*
+ * The time, when the last jiffy update happened. Protected by jiffies_lock.
+ */
+static ktime_t last_jiffies_update;
+
 /*
  * Must be called with interrupts disabled !
  */
@@ -151,6 +152,7 @@ static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs)
        update_process_times(user_mode(regs));
        profile_tick(CPU_PROFILING);
 }
+#endif
 
 #ifdef CONFIG_NO_HZ_FULL
 cpumask_var_t tick_nohz_full_mask;
@@ -993,9 +995,9 @@ static void tick_nohz_switch_to_nohz(void)
        /* Get the next period */
        next = tick_init_jiffy_update();
 
-       hrtimer_forward_now(&ts->sched_timer, tick_period);
        hrtimer_set_expires(&ts->sched_timer, next);
-       tick_program_event(next, 1);
+       hrtimer_forward_now(&ts->sched_timer, tick_period);
+       tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1);
        tick_nohz_activate(ts, NOHZ_MODE_LOWRES);
 }
 
index f75e35b..ba7d8b2 100644 (file)
@@ -69,7 +69,7 @@ print_timer(struct seq_file *m, struct hrtimer *taddr, struct hrtimer *timer,
        print_name_offset(m, taddr);
        SEQ_printf(m, ", ");
        print_name_offset(m, timer->function);
-       SEQ_printf(m, ", S:%02lx", timer->state);
+       SEQ_printf(m, ", S:%02x", timer->state);
 #ifdef CONFIG_TIMER_STATS
        SEQ_printf(m, ", ");
        print_name_offset(m, timer->start_site);
index 45dd798..326a75e 100644 (file)
@@ -191,14 +191,17 @@ static u64 bpf_perf_event_read(u64 r1, u64 index, u64 r3, u64 r4, u64 r5)
        struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
        struct bpf_array *array = container_of(map, struct bpf_array, map);
        struct perf_event *event;
+       struct file *file;
 
        if (unlikely(index >= array->map.max_entries))
                return -E2BIG;
 
-       event = (struct perf_event *)array->ptrs[index];
-       if (!event)
+       file = (struct file *)array->ptrs[index];
+       if (unlikely(!file))
                return -ENOENT;
 
+       event = file->private_data;
+
        /* make sure event is local and doesn't have pmu::count */
        if (event->oncpu != smp_processor_id() ||
            event->pmu->count)
@@ -228,6 +231,7 @@ static u64 bpf_perf_event_output(u64 r1, u64 r2, u64 index, u64 r4, u64 size)
        void *data = (void *) (long) r4;
        struct perf_sample_data sample_data;
        struct perf_event *event;
+       struct file *file;
        struct perf_raw_record raw = {
                .size = size,
                .data = data,
@@ -236,10 +240,12 @@ static u64 bpf_perf_event_output(u64 r1, u64 r2, u64 index, u64 r4, u64 size)
        if (unlikely(index >= array->map.max_entries))
                return -E2BIG;
 
-       event = (struct perf_event *)array->ptrs[index];
-       if (unlikely(!event))
+       file = (struct file *)array->ptrs[index];
+       if (unlikely(!file))
                return -ENOENT;
 
+       event = file->private_data;
+
        if (unlikely(event->attr.type != PERF_TYPE_SOFTWARE ||
                     event->attr.config != PERF_COUNT_SW_BPF_OUTPUT))
                return -EINVAL;
index 87fb980..d929340 100644 (file)
@@ -1751,7 +1751,7 @@ void trace_buffer_unlock_commit_regs(struct trace_array *tr,
 {
        __buffer_unlock_commit(buffer, event);
 
-       ftrace_trace_stack(tr, buffer, flags, 6, pc, regs);
+       ftrace_trace_stack(tr, buffer, flags, 0, pc, regs);
        ftrace_trace_userstack(buffer, flags, pc);
 }
 EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit_regs);
index 547f7f9..519b5a1 100644 (file)
@@ -21,7 +21,7 @@
 #define ODEBUG_HASH_BITS       14
 #define ODEBUG_HASH_SIZE       (1 << ODEBUG_HASH_BITS)
 
-#define ODEBUG_POOL_SIZE       512
+#define ODEBUG_POOL_SIZE       1024
 #define ODEBUG_POOL_MIN_LEVEL  256
 
 #define ODEBUG_CHUNK_SHIFT     PAGE_SHIFT
index 8fc5081..ba5d8f3 100644 (file)
@@ -22,7 +22,7 @@
  * cleancache_ops is set by cleancache_register_ops to contain the pointers
  * to the cleancache "backend" implementation functions.
  */
-static struct cleancache_ops *cleancache_ops __read_mostly;
+static const struct cleancache_ops *cleancache_ops __read_mostly;
 
 /*
  * Counters available via /sys/kernel/debug/cleancache (if debugfs is
@@ -49,7 +49,7 @@ static void cleancache_register_ops_sb(struct super_block *sb, void *unused)
 /*
  * Register operations for cleancache. Returns 0 on success.
  */
-int cleancache_register_ops(struct cleancache_ops *ops)
+int cleancache_register_ops(const struct cleancache_ops *ops)
 {
        if (cmpxchg(&cleancache_ops, NULL, ops))
                return -EBUSY;
index e080746..48958d3 100644 (file)
@@ -594,7 +594,8 @@ static int ignore_undef_symbol(struct elf_info *info, const char *symname)
                if (strncmp(symname, "_restgpr0_", sizeof("_restgpr0_") - 1) == 0 ||
                    strncmp(symname, "_savegpr0_", sizeof("_savegpr0_") - 1) == 0 ||
                    strncmp(symname, "_restvr_", sizeof("_restvr_") - 1) == 0 ||
-                   strncmp(symname, "_savevr_", sizeof("_savevr_") - 1) == 0)
+                   strncmp(symname, "_savevr_", sizeof("_savevr_") - 1) == 0 ||
+                   strcmp(symname, ".TOC.") == 0)
                        return 1;
        /* Do not ignore this symbol */
        return 0;
index 07a8731..09ef276 100644 (file)
@@ -430,7 +430,8 @@ static int __key_instantiate_and_link(struct key *key,
 
                        /* and link it into the destination keyring */
                        if (keyring) {
-                               set_bit(KEY_FLAG_KEEP, &key->flags);
+                               if (test_bit(KEY_FLAG_KEEP, &keyring->flags))
+                                       set_bit(KEY_FLAG_KEEP, &key->flags);
 
                                __key_link(key, _edit);
                        }
index e3e9491..a2a1e24 100644 (file)
@@ -97,11 +97,11 @@ config SND_PCM_TIMER
        bool "PCM timer interface" if EXPERT
        default y
        help
-         If you disable this option, pcm timer will be inavailable, so
-         those stubs used pcm timer (e.g. dmix, dsnoop & co) may work
+         If you disable this option, pcm timer will be unavailable, so
+         those stubs that use pcm timer (e.g. dmix, dsnoop & co) may work
          incorrectlly.
 
-         For some embedded device, we may disable it to reduce memory
+         For some embedded devices, we may disable it to reduce memory
          footprint, about 20KB on x86_64 platform.
 
 config SND_SEQUENCER_OSS
index 18b8dc4..7fac3ca 100644 (file)
 #include <sound/compress_offload.h>
 #include <sound/compress_driver.h>
 
+/* struct snd_compr_codec_caps overflows the ioctl bit size for some
+ * architectures, so we need to disable the relevant ioctls.
+ */
+#if _IOC_SIZEBITS < 14
+#define COMPR_CODEC_CAPS_OVERFLOW
+#endif
+
 /* TODO:
  * - add substream support for multiple devices in case of
  *     SND_DYNAMIC_MINORS is not used
@@ -440,6 +447,7 @@ out:
        return retval;
 }
 
+#ifndef COMPR_CODEC_CAPS_OVERFLOW
 static int
 snd_compr_get_codec_caps(struct snd_compr_stream *stream, unsigned long arg)
 {
@@ -463,6 +471,7 @@ out:
        kfree(caps);
        return retval;
 }
+#endif /* !COMPR_CODEC_CAPS_OVERFLOW */
 
 /* revisit this with snd_pcm_preallocate_xxx */
 static int snd_compr_allocate_buffer(struct snd_compr_stream *stream,
@@ -801,9 +810,11 @@ static long snd_compr_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
        case _IOC_NR(SNDRV_COMPRESS_GET_CAPS):
                retval = snd_compr_get_caps(stream, arg);
                break;
+#ifndef COMPR_CODEC_CAPS_OVERFLOW
        case _IOC_NR(SNDRV_COMPRESS_GET_CODEC_CAPS):
                retval = snd_compr_get_codec_caps(stream, arg);
                break;
+#endif
        case _IOC_NR(SNDRV_COMPRESS_SET_PARAMS):
                retval = snd_compr_set_params(stream, arg);
                break;
index b1221b2..6779e82 100644 (file)
@@ -202,7 +202,7 @@ snd_seq_oss_open(struct file *file, int level)
 
        dp->index = i;
        if (i >= SNDRV_SEQ_OSS_MAX_CLIENTS) {
-               pr_err("ALSA: seq_oss: too many applications\n");
+               pr_debug("ALSA: seq_oss: too many applications\n");
                rc = -ENOMEM;
                goto _error;
        }
index 0f3b381..b16dbef 100644 (file)
@@ -308,7 +308,7 @@ snd_seq_oss_synth_cleanup(struct seq_oss_devinfo *dp)
        struct seq_oss_synth *rec;
        struct seq_oss_synthinfo *info;
 
-       if (snd_BUG_ON(dp->max_synthdev >= SNDRV_SEQ_OSS_MAX_SYNTH_DEVS))
+       if (snd_BUG_ON(dp->max_synthdev > SNDRV_SEQ_OSS_MAX_SYNTH_DEVS))
                return;
        for (i = 0; i < dp->max_synthdev; i++) {
                info = &dp->synths[i];
index 75b7485..bde3330 100644 (file)
@@ -87,7 +87,7 @@ MODULE_PARM_DESC(pcm_substreams, "PCM substreams # (1-128) for dummy driver.");
 module_param(fake_buffer, bool, 0444);
 MODULE_PARM_DESC(fake_buffer, "Fake buffer allocations.");
 #ifdef CONFIG_HIGH_RES_TIMERS
-module_param(hrtimer, bool, 0644);
+module_param(hrtimer, bool, 0444);
 MODULE_PARM_DESC(hrtimer, "Use hrtimer as the timer source.");
 #endif
 
index 926e5dc..5022c9b 100644 (file)
@@ -47,14 +47,16 @@ static const unsigned int bridgeco_freq_table[] = {
        [6] = 0x07,
 };
 
-static unsigned int
-get_formation_index(unsigned int rate)
+static int
+get_formation_index(unsigned int rate, unsigned int *index)
 {
        unsigned int i;
 
        for (i = 0; i < ARRAY_SIZE(snd_bebob_rate_table); i++) {
-               if (snd_bebob_rate_table[i] == rate)
-                       return i;
+               if (snd_bebob_rate_table[i] == rate) {
+                       *index = i;
+                       return 0;
+               }
        }
        return -EINVAL;
 }
@@ -425,7 +427,9 @@ make_both_connections(struct snd_bebob *bebob, unsigned int rate)
                goto end;
 
        /* confirm params for both streams */
-       index = get_formation_index(rate);
+       err = get_formation_index(rate, &index);
+       if (err < 0)
+               goto end;
        pcm_channels = bebob->tx_stream_formations[index].pcm;
        midi_channels = bebob->tx_stream_formations[index].midi;
        err = amdtp_am824_set_parameters(&bebob->tx_stream, rate,
index 0216475..37adcc6 100644 (file)
@@ -3,6 +3,7 @@
 config SND_WSS_LIB
         tristate
         select SND_PCM
+       select SND_TIMER
 
 config SND_SB_COMMON
         tristate
@@ -42,6 +43,7 @@ config SND_AD1816A
        select SND_OPL3_LIB
        select SND_MPU401_UART
        select SND_PCM
+       select SND_TIMER
        help
          Say Y here to include support for Analog Devices SoundPort
          AD1816A or compatible sound chips.
@@ -209,6 +211,7 @@ config SND_GUSCLASSIC
        tristate "Gravis UltraSound Classic"
        select SND_RAWMIDI
        select SND_PCM
+       select SND_TIMER
        help
          Say Y here to include support for Gravis UltraSound Classic
          soundcards.
@@ -221,6 +224,7 @@ config SND_GUSEXTREME
        select SND_OPL3_LIB
        select SND_MPU401_UART
        select SND_PCM
+       select SND_TIMER
        help
          Say Y here to include support for Gravis UltraSound Extreme
          soundcards.
index 656ce39..8f6594a 100644 (file)
@@ -155,6 +155,7 @@ config SND_AZT3328
        select SND_PCM
        select SND_RAWMIDI
        select SND_AC97_CODEC
+       select SND_TIMER
        depends on ZONE_DMA
        help
          Say Y here to include support for Aztech AZF3328 (PCI168)
@@ -463,6 +464,7 @@ config SND_EMU10K1
        select SND_HWDEP
        select SND_RAWMIDI
        select SND_AC97_CODEC
+       select SND_TIMER
        depends on ZONE_DMA
        help
          Say Y to include support for Sound Blaster PCI 512, Live!,
@@ -889,6 +891,7 @@ config SND_YMFPCI
        select SND_OPL3_LIB
        select SND_MPU401_UART
        select SND_AC97_CODEC
+       select SND_TIMER
        help
          Say Y here to include support for Yamaha PCI audio chips -
          YMF724, YMF724F, YMF740, YMF740C, YMF744, YMF754.
index 256e6cd..4045dca 100644 (file)
@@ -90,6 +90,8 @@ enum {
 #define NVIDIA_HDA_ENABLE_COHBIT      0x01
 
 /* Defines for Intel SCH HDA snoop control */
+#define INTEL_HDA_CGCTL         0x48
+#define INTEL_HDA_CGCTL_MISCBDCGE        (0x1 << 6)
 #define INTEL_SCH_HDA_DEVC      0x78
 #define INTEL_SCH_HDA_DEVC_NOSNOOP       (0x1<<11)
 
@@ -534,10 +536,21 @@ static void hda_intel_init_chip(struct azx *chip, bool full_reset)
 {
        struct hdac_bus *bus = azx_bus(chip);
        struct pci_dev *pci = chip->pci;
+       u32 val;
 
        if (chip->driver_caps & AZX_DCAPS_I915_POWERWELL)
                snd_hdac_set_codec_wakeup(bus, true);
+       if (IS_BROXTON(pci)) {
+               pci_read_config_dword(pci, INTEL_HDA_CGCTL, &val);
+               val = val & ~INTEL_HDA_CGCTL_MISCBDCGE;
+               pci_write_config_dword(pci, INTEL_HDA_CGCTL, val);
+       }
        azx_init_chip(chip, full_reset);
+       if (IS_BROXTON(pci)) {
+               pci_read_config_dword(pci, INTEL_HDA_CGCTL, &val);
+               val = val | INTEL_HDA_CGCTL_MISCBDCGE;
+               pci_write_config_dword(pci, INTEL_HDA_CGCTL, val);
+       }
        if (chip->driver_caps & AZX_DCAPS_I915_POWERWELL)
                snd_hdac_set_codec_wakeup(bus, false);
 
index 426a29a..1f52b55 100644 (file)
@@ -3653,6 +3653,7 @@ HDA_CODEC_ENTRY(0x10de0070, "GPU 70 HDMI/DP",     patch_nvhdmi),
 HDA_CODEC_ENTRY(0x10de0071, "GPU 71 HDMI/DP",  patch_nvhdmi),
 HDA_CODEC_ENTRY(0x10de0072, "GPU 72 HDMI/DP",  patch_nvhdmi),
 HDA_CODEC_ENTRY(0x10de007d, "GPU 7d HDMI/DP",  patch_nvhdmi),
+HDA_CODEC_ENTRY(0x10de0083, "GPU 83 HDMI/DP",  patch_nvhdmi),
 HDA_CODEC_ENTRY(0x10de8001, "MCP73 HDMI",      patch_nvhdmi_2ch),
 HDA_CODEC_ENTRY(0x11069f80, "VX900 HDMI/DP",   patch_via_hdmi),
 HDA_CODEC_ENTRY(0x11069f81, "VX900 HDMI/DP",   patch_via_hdmi),
index d75deba..dfcd386 100644 (file)
@@ -22,6 +22,7 @@ config SND_SUN_AMD7930
 config SND_SUN_CS4231
        tristate "Sun CS4231"
        select SND_PCM
+       select SND_TIMER
        help
          Say Y here to include support for CS4231 sound device on Sun.
 
index 23ea6d8..a75d9ce 100644 (file)
@@ -1205,8 +1205,12 @@ void snd_usb_set_interface_quirk(struct usb_device *dev)
         * "Playback Design" products need a 50ms delay after setting the
         * USB interface.
         */
-       if (le16_to_cpu(dev->descriptor.idVendor) == 0x23ba)
+       switch (le16_to_cpu(dev->descriptor.idVendor)) {
+       case 0x23ba: /* Playback Design */
+       case 0x0644: /* TEAC Corp. */
                mdelay(50);
+               break;
+       }
 }
 
 void snd_usb_ctl_msg_quirk(struct usb_device *dev, unsigned int pipe,
@@ -1221,6 +1225,14 @@ void snd_usb_ctl_msg_quirk(struct usb_device *dev, unsigned int pipe,
            (requesttype & USB_TYPE_MASK) == USB_TYPE_CLASS)
                mdelay(20);
 
+       /*
+        * "TEAC Corp." products need a 20ms delay after each
+        * class compliant request
+        */
+       if ((le16_to_cpu(dev->descriptor.idVendor) == 0x0644) &&
+           (requesttype & USB_TYPE_MASK) == USB_TYPE_CLASS)
+               mdelay(20);
+
        /* Marantz/Denon devices with USB DAC functionality need a delay
         * after each class compliant request
         */
index 0a22407..5d34815 100644 (file)
@@ -77,6 +77,9 @@ include config/utilities.mak
 # Define NO_AUXTRACE if you do not want AUX area tracing support
 #
 # Define NO_LIBBPF if you do not want BPF support
+#
+# Define FEATURES_DUMP to provide features detection dump file
+# and bypass the feature detection
 
 # As per kernel Makefile, avoid funny character set dependencies
 unexport LC_ALL
@@ -166,6 +169,15 @@ ifeq ($(config),1)
 include config/Makefile
 endif
 
+# The FEATURE_DUMP_EXPORT holds location of the actual
+# FEATURE_DUMP file to be used to bypass feature detection
+# (for bpf or any other subproject)
+ifeq ($(FEATURES_DUMP),)
+FEATURE_DUMP_EXPORT := $(realpath $(OUTPUT)FEATURE-DUMP)
+else
+FEATURE_DUMP_EXPORT := $(FEATURES_DUMP)
+endif
+
 export prefix bindir sharedir sysconfdir DESTDIR
 
 # sparse is architecture-neutral, which means that we need to tell it
@@ -436,7 +448,7 @@ $(LIBAPI)-clean:
        $(Q)$(MAKE) -C $(LIB_DIR) O=$(OUTPUT) clean >/dev/null
 
 $(LIBBPF): fixdep FORCE
-       $(Q)$(MAKE) -C $(BPF_DIR) O=$(OUTPUT) $(OUTPUT)libbpf.a FEATURES_DUMP=$(realpath $(OUTPUT)FEATURE-DUMP)
+       $(Q)$(MAKE) -C $(BPF_DIR) O=$(OUTPUT) $(OUTPUT)libbpf.a FEATURES_DUMP=$(FEATURE_DUMP_EXPORT)
 
 $(LIBBPF)-clean:
        $(call QUIET_CLEAN, libbpf)
@@ -610,6 +622,17 @@ clean: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean
        $(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) clean
        $(python-clean)
 
+#
+# To provide FEATURE-DUMP into $(FEATURE_DUMP_COPY)
+# file if defined, with no further action.
+feature-dump:
+ifdef FEATURE_DUMP_COPY
+       @cp $(OUTPUT)FEATURE-DUMP $(FEATURE_DUMP_COPY)
+       @echo "FEATURE-DUMP file copied into $(FEATURE_DUMP_COPY)"
+else
+       @echo "FEATURE-DUMP file available in $(OUTPUT)FEATURE-DUMP"
+endif
+
 #
 # Trick: if ../../.git does not exist - we are building out of tree for example,
 # then force version regeneration:
index 3e89ba8..7f064eb 100644 (file)
@@ -17,7 +17,7 @@ static pid_t spawn(void)
        if (pid)
                return pid;
 
-       while(1);
+       while(1)
                sleep(5);
        return 0;
 }
index e5959c1..511141b 100644 (file)
@@ -181,7 +181,11 @@ LDFLAGS += -Wl,-z,noexecstack
 
 EXTLIBS = -lpthread -lrt -lm -ldl
 
+ifeq ($(FEATURES_DUMP),)
 include $(srctree)/tools/build/Makefile.feature
+else
+include $(FEATURES_DUMP)
+endif
 
 ifeq ($(feature-stackprotector-all), 1)
   CFLAGS += -fstack-protector-all
index df38dec..f918015 100644 (file)
@@ -5,7 +5,7 @@ ifeq ($(MAKECMDGOALS),)
 # no target specified, trigger the whole suite
 all:
        @echo "Testing Makefile";      $(MAKE) -sf tests/make MK=Makefile
-       @echo "Testing Makefile.perf"; $(MAKE) -sf tests/make MK=Makefile.perf
+       @echo "Testing Makefile.perf"; $(MAKE) -sf tests/make MK=Makefile.perf SET_PARALLEL=1 SET_O=1
 else
 # run only specific test over 'Makefile'
 %:
@@ -13,6 +13,26 @@ else
 endif
 else
 PERF := .
+PERF_O := $(PERF)
+O_OPT :=
+
+ifneq ($(O),)
+  FULL_O := $(shell readlink -f $(O) || echo $(O))
+  PERF_O := $(FULL_O)
+  ifeq ($(SET_O),1)
+    O_OPT := 'O=$(FULL_O)'
+  endif
+  K_O_OPT := 'O=$(FULL_O)'
+endif
+
+PARALLEL_OPT=
+ifeq ($(SET_PARALLEL),1)
+  cores := $(shell (getconf _NPROCESSORS_ONLN || egrep -c '^processor|^CPU[0-9]' /proc/cpuinfo) 2>/dev/null)
+  ifeq ($(cores),0)
+    cores := 1
+  endif
+  PARALLEL_OPT="-j$(cores)"
+endif
 
 # As per kernel Makefile, avoid funny character set dependencies
 unexport LC_ALL
@@ -156,11 +176,11 @@ test_make_doc    := $(test_ok)
 test_make_help_O := $(test_ok)
 test_make_doc_O  := $(test_ok)
 
-test_make_python_perf_so := test -f $(PERF)/python/perf.so
+test_make_python_perf_so := test -f $(PERF_O)/python/perf.so
 
-test_make_perf_o           := test -f $(PERF)/perf.o
-test_make_util_map_o       := test -f $(PERF)/util/map.o
-test_make_util_pmu_bison_o := test -f $(PERF)/util/pmu-bison.o
+test_make_perf_o           := test -f $(PERF_O)/perf.o
+test_make_util_map_o       := test -f $(PERF_O)/util/map.o
+test_make_util_pmu_bison_o := test -f $(PERF_O)/util/pmu-bison.o
 
 define test_dest_files
   for file in $(1); do                         \
@@ -227,7 +247,7 @@ test_make_perf_o_O            := test -f $$TMP_O/perf.o
 test_make_util_map_o_O        := test -f $$TMP_O/util/map.o
 test_make_util_pmu_bison_o_O := test -f $$TMP_O/util/pmu-bison.o
 
-test_default = test -x $(PERF)/perf
+test_default = test -x $(PERF_O)/perf
 test = $(if $(test_$1),$(test_$1),$(test_default))
 
 test_default_O = test -x $$TMP_O/perf
@@ -247,12 +267,12 @@ endif
 
 MAKEFLAGS := --no-print-directory
 
-clean := @(cd $(PERF); make -s -f $(MK) clean >/dev/null)
+clean := @(cd $(PERF); make -s -f $(MK) $(O_OPT) clean >/dev/null)
 
 $(run):
        $(call clean)
        @TMP_DEST=$$(mktemp -d); \
-       cmd="cd $(PERF) && make -f $(MK) DESTDIR=$$TMP_DEST $($@)"; \
+       cmd="cd $(PERF) && make -f $(MK) $(PARALLEL_OPT) $(O_OPT) DESTDIR=$$TMP_DEST $($@)"; \
        echo "- $@: $$cmd" && echo $$cmd > $@ && \
        ( eval $$cmd ) >> $@ 2>&1; \
        echo "  test: $(call test,$@)" >> $@ 2>&1; \
@@ -263,7 +283,7 @@ $(run_O):
        $(call clean)
        @TMP_O=$$(mktemp -d); \
        TMP_DEST=$$(mktemp -d); \
-       cmd="cd $(PERF) && make -f $(MK) O=$$TMP_O DESTDIR=$$TMP_DEST $($(patsubst %_O,%,$@))"; \
+       cmd="cd $(PERF) && make -f $(MK) $(PARALLEL_OPT) O=$$TMP_O DESTDIR=$$TMP_DEST $($(patsubst %_O,%,$@))"; \
        echo "- $@: $$cmd" && echo $$cmd > $@ && \
        ( eval $$cmd ) >> $@ 2>&1 && \
        echo "  test: $(call test_O,$@)" >> $@ 2>&1; \
@@ -276,17 +296,22 @@ tarpkg:
        ( eval $$cmd ) >> $@ 2>&1 && \
        rm -f $@
 
+KERNEL_O := ../..
+ifneq ($(O),)
+  KERNEL_O := $(O)
+endif
+
 make_kernelsrc:
-       @echo "- make -C <kernelsrc> tools/perf"
+       @echo "- make -C <kernelsrc> $(PARALLEL_OPT) $(K_O_OPT) tools/perf"
        $(call clean); \
-       (make -C ../.. tools/perf) > $@ 2>&1 && \
-       test -x perf && rm -f $@ || (cat $@ ; false)
+       (make -C ../.. $(PARALLEL_OPT) $(K_O_OPT) tools/perf) > $@ 2>&1 && \
+       test -x $(KERNEL_O)/tools/perf/perf && rm -f $@ || (cat $@ ; false)
 
 make_kernelsrc_tools:
-       @echo "- make -C <kernelsrc>/tools perf"
+       @echo "- make -C <kernelsrc>/tools $(PARALLEL_OPT) $(K_O_OPT) perf"
        $(call clean); \
-       (make -C ../../tools perf) > $@ 2>&1 && \
-       test -x perf && rm -f $@ || (cat $@ ; false)
+       (make -C ../../tools $(PARALLEL_OPT) $(K_O_OPT) perf) > $@ 2>&1 && \
+       test -x $(KERNEL_O)/tools/perf/perf && rm -f $@ || (cat $@ ; false)
 
 all: $(run) $(run_O) tarpkg make_kernelsrc make_kernelsrc_tools
        @echo OK
index d4d7cc2..718bd46 100644 (file)
@@ -755,11 +755,11 @@ static int annotate_browser__run(struct annotate_browser *browser,
                                nd = browser->curr_hot;
                        break;
                case K_UNTAB:
-                       if (nd != NULL)
+                       if (nd != NULL) {
                                nd = rb_next(nd);
                                if (nd == NULL)
                                        nd = rb_first(&browser->entries);
-                       else
+                       else
                                nd = browser->curr_hot;
                        break;
                case K_F1:
index c226303..68a7612 100644 (file)
@@ -131,6 +131,8 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
                        symlen = unresolved_col_width + 4 + 2;
                        hists__new_col_len(hists, HISTC_MEM_DADDR_SYMBOL,
                                           symlen);
+                       hists__new_col_len(hists, HISTC_MEM_DCACHELINE,
+                                          symlen);
                }
 
                if (h->mem_info->iaddr.sym) {
index d5636ba..40b7a0d 100644 (file)
@@ -1149,7 +1149,7 @@ static struct machine *machines__find_for_cpumode(struct machines *machines,
 
                machine = machines__find(machines, pid);
                if (!machine)
-                       machine = machines__find(machines, DEFAULT_GUEST_KERNEL_ID);
+                       machine = machines__findnew(machines, DEFAULT_GUEST_KERNEL_ID);
                return machine;
        }
 
index 2f901d1..2b58edc 100644 (file)
@@ -310,7 +310,6 @@ int perf_stat_process_counter(struct perf_stat_config *config,
        int i, ret;
 
        aggr->val = aggr->ena = aggr->run = 0;
-       init_stats(ps->res_stats);
 
        if (counter->per_pkg)
                zero_per_pkg(counter);
index 3b2de6e..ab02209 100644 (file)
@@ -1466,7 +1466,7 @@ int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter)
         * Read the build id if possible. This is required for
         * DSO_BINARY_TYPE__BUILDID_DEBUGINFO to work
         */
-       if (filename__read_build_id(dso->name, build_id, BUILD_ID_SIZE) > 0)
+       if (filename__read_build_id(dso->long_name, build_id, BUILD_ID_SIZE) > 0)
                dso__set_build_id(dso, build_id);
 
        /*
index e86d937..60fe3c5 100644 (file)
@@ -45,7 +45,17 @@ static inline int ksft_exit_fail(void)
 }
 #endif
 
-#define NSEC_PER_SEC 1000000000L
+#define NSEC_PER_SEC 1000000000LL
+#define USEC_PER_SEC 1000000LL
+
+#define ADJ_SETOFFSET 0x0100
+
+#include <sys/syscall.h>
+static int clock_adjtime(clockid_t id, struct timex *tx)
+{
+       return syscall(__NR_clock_adjtime, id, tx);
+}
+
 
 /* clear NTP time_status & time_state */
 int clear_time_state(void)
@@ -193,10 +203,137 @@ out:
 }
 
 
+int set_offset(long long offset, int use_nano)
+{
+       struct timex tmx = {};
+       int ret;
+
+       tmx.modes = ADJ_SETOFFSET;
+       if (use_nano) {
+               tmx.modes |= ADJ_NANO;
+
+               tmx.time.tv_sec = offset / NSEC_PER_SEC;
+               tmx.time.tv_usec = offset % NSEC_PER_SEC;
+
+               if (offset < 0 && tmx.time.tv_usec) {
+                       tmx.time.tv_sec -= 1;
+                       tmx.time.tv_usec += NSEC_PER_SEC;
+               }
+       } else {
+               tmx.time.tv_sec = offset / USEC_PER_SEC;
+               tmx.time.tv_usec = offset % USEC_PER_SEC;
+
+               if (offset < 0 && tmx.time.tv_usec) {
+                       tmx.time.tv_sec -= 1;
+                       tmx.time.tv_usec += USEC_PER_SEC;
+               }
+       }
+
+       ret = clock_adjtime(CLOCK_REALTIME, &tmx);
+       if (ret < 0) {
+               printf("(sec: %ld  usec: %ld) ", tmx.time.tv_sec, tmx.time.tv_usec);
+               printf("[FAIL]\n");
+               return -1;
+       }
+       return 0;
+}
+
+int set_bad_offset(long sec, long usec, int use_nano)
+{
+       struct timex tmx = {};
+       int ret;
+
+       tmx.modes = ADJ_SETOFFSET;
+       if (use_nano)
+               tmx.modes |= ADJ_NANO;
+
+       tmx.time.tv_sec = sec;
+       tmx.time.tv_usec = usec;
+       ret = clock_adjtime(CLOCK_REALTIME, &tmx);
+       if (ret >= 0) {
+               printf("Invalid (sec: %ld  usec: %ld) did not fail! ", tmx.time.tv_sec, tmx.time.tv_usec);
+               printf("[FAIL]\n");
+               return -1;
+       }
+       return 0;
+}
+
+int validate_set_offset(void)
+{
+       printf("Testing ADJ_SETOFFSET... ");
+
+       /* Test valid values */
+       if (set_offset(NSEC_PER_SEC - 1, 1))
+               return -1;
+
+       if (set_offset(-NSEC_PER_SEC + 1, 1))
+               return -1;
+
+       if (set_offset(-NSEC_PER_SEC - 1, 1))
+               return -1;
+
+       if (set_offset(5 * NSEC_PER_SEC, 1))
+               return -1;
+
+       if (set_offset(-5 * NSEC_PER_SEC, 1))
+               return -1;
+
+       if (set_offset(5 * NSEC_PER_SEC + NSEC_PER_SEC / 2, 1))
+               return -1;
+
+       if (set_offset(-5 * NSEC_PER_SEC - NSEC_PER_SEC / 2, 1))
+               return -1;
+
+       if (set_offset(USEC_PER_SEC - 1, 0))
+               return -1;
+
+       if (set_offset(-USEC_PER_SEC + 1, 0))
+               return -1;
+
+       if (set_offset(-USEC_PER_SEC - 1, 0))
+               return -1;
+
+       if (set_offset(5 * USEC_PER_SEC, 0))
+               return -1;
+
+       if (set_offset(-5 * USEC_PER_SEC, 0))
+               return -1;
+
+       if (set_offset(5 * USEC_PER_SEC + USEC_PER_SEC / 2, 0))
+               return -1;
+
+       if (set_offset(-5 * USEC_PER_SEC - USEC_PER_SEC / 2, 0))
+               return -1;
+
+       /* Test invalid values */
+       if (set_bad_offset(0, -1, 1))
+               return -1;
+       if (set_bad_offset(0, -1, 0))
+               return -1;
+       if (set_bad_offset(0, 2 * NSEC_PER_SEC, 1))
+               return -1;
+       if (set_bad_offset(0, 2 * USEC_PER_SEC, 0))
+               return -1;
+       if (set_bad_offset(0, NSEC_PER_SEC, 1))
+               return -1;
+       if (set_bad_offset(0, USEC_PER_SEC, 0))
+               return -1;
+       if (set_bad_offset(0, -NSEC_PER_SEC, 1))
+               return -1;
+       if (set_bad_offset(0, -USEC_PER_SEC, 0))
+               return -1;
+
+       printf("[OK]\n");
+       return 0;
+}
+
 int main(int argc, char **argv)
 {
        if (validate_freq())
                return ksft_exit_fail();
 
+       if (validate_set_offset())
+               return ksft_exit_fail();
+
        return ksft_exit_pass();
 }
index 26b7926..ba34f9e 100644 (file)
@@ -1,15 +1,19 @@
 #if defined(__i386__) || defined(__x86_64__)
 #define barrier() asm volatile("" ::: "memory")
-#define mb() __sync_synchronize()
-
-#define smp_mb()       mb()
-# define dma_rmb()     barrier()
-# define dma_wmb()     barrier()
-# define smp_rmb()     barrier()
-# define smp_wmb()     barrier()
+#define virt_mb() __sync_synchronize()
+#define virt_rmb() barrier()
+#define virt_wmb() barrier()
+/* Atomic store should be enough, but gcc generates worse code in that case. */
+#define virt_store_mb(var, value)  do { \
+       typeof(var) virt_store_mb_value = (value); \
+       __atomic_exchange(&(var), &virt_store_mb_value, &virt_store_mb_value, \
+                         __ATOMIC_SEQ_CST); \
+       barrier(); \
+} while (0);
 /* Weak barriers should be used. If not - it's a bug */
-# define rmb() abort()
-# define wmb() abort()
+# define mb() abort()
+# define rmb() abort()
+# define wmb() abort()
 #else
 #error Please fill in barrier macros
 #endif
diff --git a/tools/virtio/linux/compiler.h b/tools/virtio/linux/compiler.h
new file mode 100644 (file)
index 0000000..845960e
--- /dev/null
@@ -0,0 +1,9 @@
+#ifndef LINUX_COMPILER_H
+#define LINUX_COMPILER_H
+
+#define WRITE_ONCE(var, val) \
+       (*((volatile typeof(val) *)(&(var))) = (val))
+
+#define READ_ONCE(var) (*((volatile typeof(val) *)(&(var))))
+
+#endif
index 4db7d56..0338499 100644 (file)
@@ -8,6 +8,7 @@
 #include <assert.h>
 #include <stdarg.h>
 
+#include <linux/compiler.h>
 #include <linux/types.h>
 #include <linux/printk.h>
 #include <linux/bug.h>
diff --git a/tools/virtio/ringtest/Makefile b/tools/virtio/ringtest/Makefile
new file mode 100644 (file)
index 0000000..feaa64a
--- /dev/null
@@ -0,0 +1,22 @@
+all:
+
+all: ring virtio_ring_0_9 virtio_ring_poll
+
+CFLAGS += -Wall
+CFLAGS += -pthread -O2 -ggdb
+LDFLAGS += -pthread -O2 -ggdb
+
+main.o: main.c main.h
+ring.o: ring.c main.h
+virtio_ring_0_9.o: virtio_ring_0_9.c main.h
+virtio_ring_poll.o: virtio_ring_poll.c virtio_ring_0_9.c main.h
+ring: ring.o main.o
+virtio_ring_0_9: virtio_ring_0_9.o main.o
+virtio_ring_poll: virtio_ring_poll.o main.o
+clean:
+       -rm main.o
+       -rm ring.o ring
+       -rm virtio_ring_0_9.o virtio_ring_0_9
+       -rm virtio_ring_poll.o virtio_ring_poll
+
+.PHONY: all clean
diff --git a/tools/virtio/ringtest/README b/tools/virtio/ringtest/README
new file mode 100644 (file)
index 0000000..34e94c4
--- /dev/null
@@ -0,0 +1,2 @@
+Partial implementation of various ring layouts, useful to tune virtio design.
+Uses shared memory heavily.
diff --git a/tools/virtio/ringtest/main.c b/tools/virtio/ringtest/main.c
new file mode 100644 (file)
index 0000000..3a5ff43
--- /dev/null
@@ -0,0 +1,366 @@
+/*
+ * Copyright (C) 2016 Red Hat, Inc.
+ * Author: Michael S. Tsirkin <mst@redhat.com>
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * Command line processing and common functions for ring benchmarking.
+ */
+#define _GNU_SOURCE
+#include <getopt.h>
+#include <pthread.h>
+#include <assert.h>
+#include <sched.h>
+#include "main.h"
+#include <sys/eventfd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <limits.h>
+
+int runcycles = 10000000;
+int max_outstanding = INT_MAX;
+int batch = 1;
+
+bool do_sleep = false;
+bool do_relax = false;
+bool do_exit = true;
+
+unsigned ring_size = 256;
+
+static int kickfd = -1;
+static int callfd = -1;
+
+void notify(int fd)
+{
+       unsigned long long v = 1;
+       int r;
+
+       vmexit();
+       r = write(fd, &v, sizeof v);
+       assert(r == sizeof v);
+       vmentry();
+}
+
+void wait_for_notify(int fd)
+{
+       unsigned long long v = 1;
+       int r;
+
+       vmexit();
+       r = read(fd, &v, sizeof v);
+       assert(r == sizeof v);
+       vmentry();
+}
+
+void kick(void)
+{
+       notify(kickfd);
+}
+
+void wait_for_kick(void)
+{
+       wait_for_notify(kickfd);
+}
+
+void call(void)
+{
+       notify(callfd);
+}
+
+void wait_for_call(void)
+{
+       wait_for_notify(callfd);
+}
+
+void set_affinity(const char *arg)
+{
+       cpu_set_t cpuset;
+       int ret;
+       pthread_t self;
+       long int cpu;
+       char *endptr;
+
+       if (!arg)
+               return;
+
+       cpu = strtol(arg, &endptr, 0);
+       assert(!*endptr);
+
+       assert(cpu >= 0 || cpu < CPU_SETSIZE);
+
+       self = pthread_self();
+       CPU_ZERO(&cpuset);
+       CPU_SET(cpu, &cpuset);
+
+       ret = pthread_setaffinity_np(self, sizeof(cpu_set_t), &cpuset);
+       assert(!ret);
+}
+
+static void run_guest(void)
+{
+       int completed_before;
+       int completed = 0;
+       int started = 0;
+       int bufs = runcycles;
+       int spurious = 0;
+       int r;
+       unsigned len;
+       void *buf;
+       int tokick = batch;
+
+       for (;;) {
+               if (do_sleep)
+                       disable_call();
+               completed_before = completed;
+               do {
+                       if (started < bufs &&
+                           started - completed < max_outstanding) {
+                               r = add_inbuf(0, NULL, "Hello, world!");
+                               if (__builtin_expect(r == 0, true)) {
+                                       ++started;
+                                       if (!--tokick) {
+                                               tokick = batch;
+                                               if (do_sleep)
+                                                       kick_available();
+                                       }
+
+                               }
+                       } else
+                               r = -1;
+
+                       /* Flush out completed bufs if any */
+                       if (get_buf(&len, &buf)) {
+                               ++completed;
+                               if (__builtin_expect(completed == bufs, false))
+                                       return;
+                               r = 0;
+                       }
+               } while (r == 0);
+               if (completed == completed_before)
+                       ++spurious;
+               assert(completed <= bufs);
+               assert(started <= bufs);
+               if (do_sleep) {
+                       if (enable_call())
+                               wait_for_call();
+               } else {
+                       poll_used();
+               }
+       }
+}
+
+static void run_host(void)
+{
+       int completed_before;
+       int completed = 0;
+       int spurious = 0;
+       int bufs = runcycles;
+       unsigned len;
+       void *buf;
+
+       for (;;) {
+               if (do_sleep) {
+                       if (enable_kick())
+                               wait_for_kick();
+               } else {
+                       poll_avail();
+               }
+               if (do_sleep)
+                       disable_kick();
+               completed_before = completed;
+               while (__builtin_expect(use_buf(&len, &buf), true)) {
+                       if (do_sleep)
+                               call_used();
+                       ++completed;
+                       if (__builtin_expect(completed == bufs, false))
+                               return;
+               }
+               if (completed == completed_before)
+                       ++spurious;
+               assert(completed <= bufs);
+               if (completed == bufs)
+                       break;
+       }
+}
+
+void *start_guest(void *arg)
+{
+       set_affinity(arg);
+       run_guest();
+       pthread_exit(NULL);
+}
+
+void *start_host(void *arg)
+{
+       set_affinity(arg);
+       run_host();
+       pthread_exit(NULL);
+}
+
+static const char optstring[] = "";
+static const struct option longopts[] = {
+       {
+               .name = "help",
+               .has_arg = no_argument,
+               .val = 'h',
+       },
+       {
+               .name = "host-affinity",
+               .has_arg = required_argument,
+               .val = 'H',
+       },
+       {
+               .name = "guest-affinity",
+               .has_arg = required_argument,
+               .val = 'G',
+       },
+       {
+               .name = "ring-size",
+               .has_arg = required_argument,
+               .val = 'R',
+       },
+       {
+               .name = "run-cycles",
+               .has_arg = required_argument,
+               .val = 'C',
+       },
+       {
+               .name = "outstanding",
+               .has_arg = required_argument,
+               .val = 'o',
+       },
+       {
+               .name = "batch",
+               .has_arg = required_argument,
+               .val = 'b',
+       },
+       {
+               .name = "sleep",
+               .has_arg = no_argument,
+               .val = 's',
+       },
+       {
+               .name = "relax",
+               .has_arg = no_argument,
+               .val = 'x',
+       },
+       {
+               .name = "exit",
+               .has_arg = no_argument,
+               .val = 'e',
+       },
+       {
+       }
+};
+
+static void help(void)
+{
+       fprintf(stderr, "Usage: <test> [--help]"
+               " [--host-affinity H]"
+               " [--guest-affinity G]"
+               " [--ring-size R (default: %d)]"
+               " [--run-cycles C (default: %d)]"
+               " [--batch b]"
+               " [--outstanding o]"
+               " [--sleep]"
+               " [--relax]"
+               " [--exit]"
+               "\n",
+               ring_size,
+               runcycles);
+}
+
+int main(int argc, char **argv)
+{
+       int ret;
+       pthread_t host, guest;
+       void *tret;
+       char *host_arg = NULL;
+       char *guest_arg = NULL;
+       char *endptr;
+       long int c;
+
+       kickfd = eventfd(0, 0);
+       assert(kickfd >= 0);
+       callfd = eventfd(0, 0);
+       assert(callfd >= 0);
+
+       for (;;) {
+               int o = getopt_long(argc, argv, optstring, longopts, NULL);
+               switch (o) {
+               case -1:
+                       goto done;
+               case '?':
+                       help();
+                       exit(2);
+               case 'H':
+                       host_arg = optarg;
+                       break;
+               case 'G':
+                       guest_arg = optarg;
+                       break;
+               case 'R':
+                       ring_size = strtol(optarg, &endptr, 0);
+                       assert(ring_size && !(ring_size & (ring_size - 1)));
+                       assert(!*endptr);
+                       break;
+               case 'C':
+                       c = strtol(optarg, &endptr, 0);
+                       assert(!*endptr);
+                       assert(c > 0 && c < INT_MAX);
+                       runcycles = c;
+                       break;
+               case 'o':
+                       c = strtol(optarg, &endptr, 0);
+                       assert(!*endptr);
+                       assert(c > 0 && c < INT_MAX);
+                       max_outstanding = c;
+                       break;
+               case 'b':
+                       c = strtol(optarg, &endptr, 0);
+                       assert(!*endptr);
+                       assert(c > 0 && c < INT_MAX);
+                       batch = c;
+                       break;
+               case 's':
+                       do_sleep = true;
+                       break;
+               case 'x':
+                       do_relax = true;
+                       break;
+               case 'e':
+                       do_exit = true;
+                       break;
+               default:
+                       help();
+                       exit(4);
+                       break;
+               }
+       }
+
+       /* does nothing here, used to make sure all smp APIs compile */
+       smp_acquire();
+       smp_release();
+       smp_mb();
+done:
+
+       if (batch > max_outstanding)
+               batch = max_outstanding;
+
+       if (optind < argc) {
+               help();
+               exit(4);
+       }
+       alloc_ring();
+
+       ret = pthread_create(&host, NULL, start_host, host_arg);
+       assert(!ret);
+       ret = pthread_create(&guest, NULL, start_guest, guest_arg);
+       assert(!ret);
+
+       ret = pthread_join(guest, &tret);
+       assert(!ret);
+       ret = pthread_join(host, &tret);
+       assert(!ret);
+       return 0;
+}
diff --git a/tools/virtio/ringtest/main.h b/tools/virtio/ringtest/main.h
new file mode 100644 (file)
index 0000000..16917ac
--- /dev/null
@@ -0,0 +1,119 @@
+/*
+ * Copyright (C) 2016 Red Hat, Inc.
+ * Author: Michael S. Tsirkin <mst@redhat.com>
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * Common macros and functions for ring benchmarking.
+ */
+#ifndef MAIN_H
+#define MAIN_H
+
+#include <stdbool.h>
+
+extern bool do_exit;
+
+#if defined(__x86_64__) || defined(__i386__)
+#include "x86intrin.h"
+
+static inline void wait_cycles(unsigned long long cycles)
+{
+       unsigned long long t;
+
+       t = __rdtsc();
+       while (__rdtsc() - t < cycles) {}
+}
+
+#define VMEXIT_CYCLES 500
+#define VMENTRY_CYCLES 500
+
+#else
+static inline void wait_cycles(unsigned long long cycles)
+{
+       _Exit(5);
+}
+#define VMEXIT_CYCLES 0
+#define VMENTRY_CYCLES 0
+#endif
+
+static inline void vmexit(void)
+{
+       if (!do_exit)
+               return;
+       
+       wait_cycles(VMEXIT_CYCLES);
+}
+static inline void vmentry(void)
+{
+       if (!do_exit)
+               return;
+       
+       wait_cycles(VMENTRY_CYCLES);
+}
+
+/* implemented by ring */
+void alloc_ring(void);
+/* guest side */
+int add_inbuf(unsigned, void *, void *);
+void *get_buf(unsigned *, void **);
+void disable_call();
+bool enable_call();
+void kick_available();
+void poll_used();
+/* host side */
+void disable_kick();
+bool enable_kick();
+bool use_buf(unsigned *, void **);
+void call_used();
+void poll_avail();
+
+/* implemented by main */
+extern bool do_sleep;
+void kick(void);
+void wait_for_kick(void);
+void call(void);
+void wait_for_call(void);
+
+extern unsigned ring_size;
+
+/* Compiler barrier - similar to what Linux uses */
+#define barrier() asm volatile("" ::: "memory")
+
+/* Is there a portable way to do this? */
+#if defined(__x86_64__) || defined(__i386__)
+#define cpu_relax() asm ("rep; nop" ::: "memory")
+#else
+#define cpu_relax() assert(0)
+#endif
+
+extern bool do_relax;
+
+static inline void busy_wait(void)
+{
+       if (do_relax)
+               cpu_relax();
+       else
+               /* prevent compiler from removing busy loops */
+               barrier();
+} 
+
+/*
+ * Not using __ATOMIC_SEQ_CST since gcc docs say they are only synchronized
+ * with other __ATOMIC_SEQ_CST calls.
+ */
+#define smp_mb() __sync_synchronize()
+
+/*
+ * This abuses the atomic builtins for thread fences, and
+ * adds a compiler barrier.
+ */
+#define smp_release() do { \
+    barrier(); \
+    __atomic_thread_fence(__ATOMIC_RELEASE); \
+} while (0)
+
+#define smp_acquire() do { \
+    __atomic_thread_fence(__ATOMIC_ACQUIRE); \
+    barrier(); \
+} while (0)
+
+#endif
diff --git a/tools/virtio/ringtest/ring.c b/tools/virtio/ringtest/ring.c
new file mode 100644 (file)
index 0000000..c25c8d2
--- /dev/null
@@ -0,0 +1,272 @@
+/*
+ * Copyright (C) 2016 Red Hat, Inc.
+ * Author: Michael S. Tsirkin <mst@redhat.com>
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * Simple descriptor-based ring. virtio 0.9 compatible event index is used for
+ * signalling, unconditionally.
+ */
+#define _GNU_SOURCE
+#include "main.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+/* Next - Where next entry will be written.
+ * Prev - "Next" value when event triggered previously.
+ * Event - Peer requested event after writing this entry.
+ */
+static inline bool need_event(unsigned short event,
+                             unsigned short next,
+                             unsigned short prev)
+{
+       return (unsigned short)(next - event - 1) < (unsigned short)(next - prev);
+}
+
+/* Design:
+ * Guest adds descriptors with unique index values and DESC_HW in flags.
+ * Host overwrites used descriptors with correct len, index, and DESC_HW clear.
+ * Flags are always set last.
+ */
+#define DESC_HW 0x1
+
+struct desc {
+       unsigned short flags;
+       unsigned short index;
+       unsigned len;
+       unsigned long long addr;
+};
+
+/* how much padding is needed to avoid false cache sharing */
+#define HOST_GUEST_PADDING 0x80
+
+/* Mostly read */
+struct event {
+       unsigned short kick_index;
+       unsigned char reserved0[HOST_GUEST_PADDING - 2];
+       unsigned short call_index;
+       unsigned char reserved1[HOST_GUEST_PADDING - 2];
+};
+
+struct data {
+       void *buf; /* descriptor is writeable, we can't get buf from there */
+       void *data;
+} *data;
+
+struct desc *ring;
+struct event *event;
+
+struct guest {
+       unsigned avail_idx;
+       unsigned last_used_idx;
+       unsigned num_free;
+       unsigned kicked_avail_idx;
+       unsigned char reserved[HOST_GUEST_PADDING - 12];
+} guest;
+
+struct host {
+       /* we do not need to track last avail index
+        * unless we have more than one in flight.
+        */
+       unsigned used_idx;
+       unsigned called_used_idx;
+       unsigned char reserved[HOST_GUEST_PADDING - 4];
+} host;
+
+/* implemented by ring */
+void alloc_ring(void)
+{
+       int ret;
+       int i;
+
+       ret = posix_memalign((void **)&ring, 0x1000, ring_size * sizeof *ring);
+       if (ret) {
+               perror("Unable to allocate ring buffer.\n");
+               exit(3);
+       }
+       event = malloc(sizeof *event);
+       if (!event) {
+               perror("Unable to allocate event buffer.\n");
+               exit(3);
+       }
+       memset(event, 0, sizeof *event);
+       guest.avail_idx = 0;
+       guest.kicked_avail_idx = -1;
+       guest.last_used_idx = 0;
+       host.used_idx = 0;
+       host.called_used_idx = -1;
+       for (i = 0; i < ring_size; ++i) {
+               struct desc desc = {
+                       .index = i,
+               };
+               ring[i] = desc;
+       }
+       guest.num_free = ring_size;
+       data = malloc(ring_size * sizeof *data);
+       if (!data) {
+               perror("Unable to allocate data buffer.\n");
+               exit(3);
+       }
+       memset(data, 0, ring_size * sizeof *data);
+}
+
+/* guest side */
+int add_inbuf(unsigned len, void *buf, void *datap)
+{
+       unsigned head, index;
+
+       if (!guest.num_free)
+               return -1;
+
+       guest.num_free--;
+       head = (ring_size - 1) & (guest.avail_idx++);
+
+       /* Start with a write. On MESI architectures this helps
+        * avoid a shared state with consumer that is polling this descriptor.
+        */
+       ring[head].addr = (unsigned long)(void*)buf;
+       ring[head].len = len;
+       /* read below might bypass write above. That is OK because it's just an
+        * optimization. If this happens, we will get the cache line in a
+        * shared state which is unfortunate, but probably not worth it to
+        * add an explicit full barrier to avoid this.
+        */
+       barrier();
+       index = ring[head].index;
+       data[index].buf = buf;
+       data[index].data = datap;
+       /* Barrier A (for pairing) */
+       smp_release();
+       ring[head].flags = DESC_HW;
+
+       return 0;
+}
+
+void *get_buf(unsigned *lenp, void **bufp)
+{
+       unsigned head = (ring_size - 1) & guest.last_used_idx;
+       unsigned index;
+       void *datap;
+
+       if (ring[head].flags & DESC_HW)
+               return NULL;
+       /* Barrier B (for pairing) */
+       smp_acquire();
+       *lenp = ring[head].len;
+       index = ring[head].index & (ring_size - 1);
+       datap = data[index].data;
+       *bufp = data[index].buf;
+       data[index].buf = NULL;
+       data[index].data = NULL;
+       guest.num_free++;
+       guest.last_used_idx++;
+       return datap;
+}
+
+void poll_used(void)
+{
+       unsigned head = (ring_size - 1) & guest.last_used_idx;
+
+       while (ring[head].flags & DESC_HW)
+               busy_wait();
+}
+
+void disable_call()
+{
+       /* Doing nothing to disable calls might cause
+        * extra interrupts, but reduces the number of cache misses.
+        */
+}
+
+bool enable_call()
+{
+       unsigned head = (ring_size - 1) & guest.last_used_idx;
+
+       event->call_index = guest.last_used_idx;
+       /* Flush call index write */
+       /* Barrier D (for pairing) */
+       smp_mb();
+       return ring[head].flags & DESC_HW;
+}
+
+void kick_available(void)
+{
+       /* Flush in previous flags write */
+       /* Barrier C (for pairing) */
+       smp_mb();
+       if (!need_event(event->kick_index,
+                       guest.avail_idx,
+                       guest.kicked_avail_idx))
+               return;
+
+       guest.kicked_avail_idx = guest.avail_idx;
+       kick();
+}
+
+/* host side */
+void disable_kick()
+{
+       /* Doing nothing to disable kicks might cause
+        * extra interrupts, but reduces the number of cache misses.
+        */
+}
+
+bool enable_kick()
+{
+       unsigned head = (ring_size - 1) & host.used_idx;
+
+       event->kick_index = host.used_idx;
+       /* Barrier C (for pairing) */
+       smp_mb();
+       return !(ring[head].flags & DESC_HW);
+}
+
+void poll_avail(void)
+{
+       unsigned head = (ring_size - 1) & host.used_idx;
+
+       while (!(ring[head].flags & DESC_HW))
+               busy_wait();
+}
+
+bool use_buf(unsigned *lenp, void **bufp)
+{
+       unsigned head = (ring_size - 1) & host.used_idx;
+
+       if (!(ring[head].flags & DESC_HW))
+               return false;
+
+       /* make sure length read below is not speculated */
+       /* Barrier A (for pairing) */
+       smp_acquire();
+
+       /* simple in-order completion: we don't need
+        * to touch index at all. This also means we
+        * can just modify the descriptor in-place.
+        */
+       ring[head].len--;
+       /* Make sure len is valid before flags.
+        * Note: alternative is to write len and flags in one access -
+        * possible on 64 bit architectures but wmb is free on Intel anyway
+        * so I have no way to test whether it's a gain.
+        */
+       /* Barrier B (for pairing) */
+       smp_release();
+       ring[head].flags = 0;
+       host.used_idx++;
+       return true;
+}
+
+void call_used(void)
+{
+       /* Flush in previous flags write */
+       /* Barrier D (for pairing) */
+       smp_mb();
+       if (!need_event(event->call_index,
+                       host.used_idx,
+                       host.called_used_idx))
+               return;
+
+       host.called_used_idx = host.used_idx;
+       call();
+}
diff --git a/tools/virtio/ringtest/run-on-all.sh b/tools/virtio/ringtest/run-on-all.sh
new file mode 100755 (executable)
index 0000000..52b0f71
--- /dev/null
@@ -0,0 +1,24 @@
+#!/bin/sh
+
+#use last CPU for host. Why not the first?
+#many devices tend to use cpu0 by default so
+#it tends to be busier
+HOST_AFFINITY=$(cd /dev/cpu; ls|grep -v '[a-z]'|sort -n|tail -1)
+
+#run command on all cpus
+for cpu in $(cd /dev/cpu; ls|grep -v '[a-z]'|sort -n);
+do
+       #Don't run guest and host on same CPU
+       #It actually works ok if using signalling
+       if
+               (echo "$@" | grep -e "--sleep" > /dev/null) || \
+                       test $HOST_AFFINITY '!=' $cpu
+       then
+               echo "GUEST AFFINITY $cpu"
+               "$@" --host-affinity $HOST_AFFINITY --guest-affinity $cpu
+       fi
+done
+echo "NO GUEST AFFINITY"
+"$@" --host-affinity $HOST_AFFINITY
+echo "NO AFFINITY"
+"$@"
diff --git a/tools/virtio/ringtest/virtio_ring_0_9.c b/tools/virtio/ringtest/virtio_ring_0_9.c
new file mode 100644 (file)
index 0000000..47c9a1a
--- /dev/null
@@ -0,0 +1,316 @@
+/*
+ * Copyright (C) 2016 Red Hat, Inc.
+ * Author: Michael S. Tsirkin <mst@redhat.com>
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * Partial implementation of virtio 0.9. event index is used for signalling,
+ * unconditionally. Design roughly follows linux kernel implementation in order
+ * to be able to judge its performance.
+ */
+#define _GNU_SOURCE
+#include "main.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <assert.h>
+#include <string.h>
+#include <linux/virtio_ring.h>
+
+struct data {
+       void *data;
+} *data;
+
+struct vring ring;
+
+/* enabling the below activates experimental ring polling code
+ * (which skips index reads on consumer in favor of looking at
+ * high bits of ring id ^ 0x8000).
+ */
+/* #ifdef RING_POLL */
+
+/* how much padding is needed to avoid false cache sharing */
+#define HOST_GUEST_PADDING 0x80
+
+struct guest {
+       unsigned short avail_idx;
+       unsigned short last_used_idx;
+       unsigned short num_free;
+       unsigned short kicked_avail_idx;
+       unsigned short free_head;
+       unsigned char reserved[HOST_GUEST_PADDING - 10];
+} guest;
+
+struct host {
+       /* we do not need to track last avail index
+        * unless we have more than one in flight.
+        */
+       unsigned short used_idx;
+       unsigned short called_used_idx;
+       unsigned char reserved[HOST_GUEST_PADDING - 4];
+} host;
+
+/* implemented by ring */
+void alloc_ring(void)
+{
+       int ret;
+       int i;
+       void *p;
+
+       ret = posix_memalign(&p, 0x1000, vring_size(ring_size, 0x1000));
+       if (ret) {
+               perror("Unable to allocate ring buffer.\n");
+               exit(3);
+       }
+       memset(p, 0, vring_size(ring_size, 0x1000));
+       vring_init(&ring, ring_size, p, 0x1000);
+
+       guest.avail_idx = 0;
+       guest.kicked_avail_idx = -1;
+       guest.last_used_idx = 0;
+       /* Put everything in free lists. */
+       guest.free_head = 0;
+       for (i = 0; i < ring_size - 1; i++)
+               ring.desc[i].next = i + 1;
+       host.used_idx = 0;
+       host.called_used_idx = -1;
+       guest.num_free = ring_size;
+       data = malloc(ring_size * sizeof *data);
+       if (!data) {
+               perror("Unable to allocate data buffer.\n");
+               exit(3);
+       }
+       memset(data, 0, ring_size * sizeof *data);
+}
+
+/* guest side */
+int add_inbuf(unsigned len, void *buf, void *datap)
+{
+       unsigned head, avail;
+       struct vring_desc *desc;
+
+       if (!guest.num_free)
+               return -1;
+
+       head = guest.free_head;
+       guest.num_free--;
+
+       desc = ring.desc;
+       desc[head].flags = VRING_DESC_F_NEXT;
+       desc[head].addr = (unsigned long)(void *)buf;
+       desc[head].len = len;
+       /* We do it like this to simulate the way
+        * we'd have to flip it if we had multiple
+        * descriptors.
+        */
+       desc[head].flags &= ~VRING_DESC_F_NEXT;
+       guest.free_head = desc[head].next;
+
+       data[head].data = datap;
+
+#ifdef RING_POLL
+       /* Barrier A (for pairing) */
+       smp_release();
+       avail = guest.avail_idx++;
+       ring.avail->ring[avail & (ring_size - 1)] =
+               (head | (avail & ~(ring_size - 1))) ^ 0x8000;
+#else
+       avail = (ring_size - 1) & (guest.avail_idx++);
+       ring.avail->ring[avail] = head;
+       /* Barrier A (for pairing) */
+       smp_release();
+#endif
+       ring.avail->idx = guest.avail_idx;
+       return 0;
+}
+
+void *get_buf(unsigned *lenp, void **bufp)
+{
+       unsigned head;
+       unsigned index;
+       void *datap;
+
+#ifdef RING_POLL
+       head = (ring_size - 1) & guest.last_used_idx;
+       index = ring.used->ring[head].id;
+       if ((index ^ guest.last_used_idx ^ 0x8000) & ~(ring_size - 1))
+               return NULL;
+       /* Barrier B (for pairing) */
+       smp_acquire();
+       index &= ring_size - 1;
+#else
+       if (ring.used->idx == guest.last_used_idx)
+               return NULL;
+       /* Barrier B (for pairing) */
+       smp_acquire();
+       head = (ring_size - 1) & guest.last_used_idx;
+       index = ring.used->ring[head].id;
+#endif
+       *lenp = ring.used->ring[head].len;
+       datap = data[index].data;
+       *bufp = (void*)(unsigned long)ring.desc[index].addr;
+       data[index].data = NULL;
+       ring.desc[index].next = guest.free_head;
+       guest.free_head = index;
+       guest.num_free++;
+       guest.last_used_idx++;
+       return datap;
+}
+
+void poll_used(void)
+{
+#ifdef RING_POLL
+       unsigned head = (ring_size - 1) & guest.last_used_idx;
+
+       for (;;) {
+               unsigned index = ring.used->ring[head].id;
+
+               if ((index ^ guest.last_used_idx ^ 0x8000) & ~(ring_size - 1))
+                       busy_wait();
+               else
+                       break;
+       }
+#else
+       unsigned head = guest.last_used_idx;
+
+       while (ring.used->idx == head)
+               busy_wait();
+#endif
+}
+
+void disable_call()
+{
+       /* Doing nothing to disable calls might cause
+        * extra interrupts, but reduces the number of cache misses.
+        */
+}
+
+bool enable_call()
+{
+       unsigned short last_used_idx;
+
+       vring_used_event(&ring) = (last_used_idx = guest.last_used_idx);
+       /* Flush call index write */
+       /* Barrier D (for pairing) */
+       smp_mb();
+#ifdef RING_POLL
+       {
+               unsigned short head = last_used_idx & (ring_size - 1);
+               unsigned index = ring.used->ring[head].id;
+
+               return (index ^ last_used_idx ^ 0x8000) & ~(ring_size - 1);
+       }
+#else
+       return ring.used->idx == last_used_idx;
+#endif
+}
+
+void kick_available(void)
+{
+       /* Flush in previous flags write */
+       /* Barrier C (for pairing) */
+       smp_mb();
+       if (!vring_need_event(vring_avail_event(&ring),
+                             guest.avail_idx,
+                             guest.kicked_avail_idx))
+               return;
+
+       guest.kicked_avail_idx = guest.avail_idx;
+       kick();
+}
+
+/* host side */
+void disable_kick()
+{
+       /* Doing nothing to disable kicks might cause
+        * extra interrupts, but reduces the number of cache misses.
+        */
+}
+
+bool enable_kick()
+{
+       unsigned head = host.used_idx;
+
+       vring_avail_event(&ring) = head;
+       /* Barrier C (for pairing) */
+       smp_mb();
+#ifdef RING_POLL
+       {
+               unsigned index = ring.avail->ring[head & (ring_size - 1)];
+
+               return (index ^ head ^ 0x8000) & ~(ring_size - 1);
+       }
+#else
+       return head == ring.avail->idx;
+#endif
+}
+
+void poll_avail(void)
+{
+       unsigned head = host.used_idx;
+#ifdef RING_POLL
+       for (;;) {
+               unsigned index = ring.avail->ring[head & (ring_size - 1)];
+               if ((index ^ head ^ 0x8000) & ~(ring_size - 1))
+                       busy_wait();
+               else
+                       break;
+       }
+#else
+       while (ring.avail->idx == head)
+               busy_wait();
+#endif
+}
+
+bool use_buf(unsigned *lenp, void **bufp)
+{
+       unsigned used_idx = host.used_idx;
+       struct vring_desc *desc;
+       unsigned head;
+
+#ifdef RING_POLL
+       head = ring.avail->ring[used_idx & (ring_size - 1)];
+       if ((used_idx ^ head ^ 0x8000) & ~(ring_size - 1))
+               return false;
+       /* Barrier A (for pairing) */
+       smp_acquire();
+
+       used_idx &= ring_size - 1;
+       desc = &ring.desc[head & (ring_size - 1)];
+#else
+       if (used_idx == ring.avail->idx)
+               return false;
+
+       /* Barrier A (for pairing) */
+       smp_acquire();
+
+       used_idx &= ring_size - 1;
+       head = ring.avail->ring[used_idx];
+       desc = &ring.desc[head];
+#endif
+
+       *lenp = desc->len;
+       *bufp = (void *)(unsigned long)desc->addr;
+
+       /* now update used ring */
+       ring.used->ring[used_idx].id = head;
+       ring.used->ring[used_idx].len = desc->len - 1;
+       /* Barrier B (for pairing) */
+       smp_release();
+       host.used_idx++;
+       ring.used->idx = host.used_idx;
+       
+       return true;
+}
+
+void call_used(void)
+{
+       /* Flush in previous flags write */
+       /* Barrier D (for pairing) */
+       smp_mb();
+       if (!vring_need_event(vring_used_event(&ring),
+                             host.used_idx,
+                             host.called_used_idx))
+               return;
+
+       host.called_used_idx = host.used_idx;
+       call();
+}
diff --git a/tools/virtio/ringtest/virtio_ring_poll.c b/tools/virtio/ringtest/virtio_ring_poll.c
new file mode 100644 (file)
index 0000000..84fc2c5
--- /dev/null
@@ -0,0 +1,2 @@
+#define RING_POLL 1
+#include "virtio_ring_0_9.c"