Merge branch 'x86/asm' into x86/mm, to resolve conflicts
authorIngo Molnar <mingo@kernel.org>
Fri, 15 Jul 2016 08:26:04 +0000 (10:26 +0200)
committerIngo Molnar <mingo@kernel.org>
Fri, 15 Jul 2016 08:26:04 +0000 (10:26 +0200)
 Conflicts:
tools/testing/selftests/x86/Makefile

Signed-off-by: Ingo Molnar <mingo@kernel.org>
162 files changed:
Documentation/x86/intel_mpx.txt
Documentation/x86/tlb.txt
Documentation/x86/x86_64/machinecheck
arch/arm64/include/asm/cputype.h
arch/arm64/include/asm/ptrace.h
arch/arm64/kernel/asm-offsets.c
arch/arm64/kernel/cpu_errata.c
arch/arm64/kernel/entry.S
arch/arm64/mm/fault.c
arch/x86/Kconfig
arch/x86/boot/bitops.h
arch/x86/boot/boot.h
arch/x86/boot/string.c
arch/x86/entry/common.c
arch/x86/entry/thunk_64.S
arch/x86/entry/vdso/Makefile
arch/x86/entry/vdso/vdso32/sigreturn.S
arch/x86/entry/vdso/vdso32/system_call.S
arch/x86/events/core.c
arch/x86/events/intel/Makefile
arch/x86/events/intel/core.c
arch/x86/include/asm/apm.h
arch/x86/include/asm/arch_hweight.h
arch/x86/include/asm/archrandom.h
arch/x86/include/asm/asm.h
arch/x86/include/asm/atomic.h
arch/x86/include/asm/atomic64_64.h
arch/x86/include/asm/bitops.h
arch/x86/include/asm/compat.h
arch/x86/include/asm/local.h
arch/x86/include/asm/percpu.h
arch/x86/include/asm/preempt.h
arch/x86/include/asm/rmwcc.h
arch/x86/include/asm/rwsem.h
arch/x86/include/asm/signal.h
arch/x86/include/asm/sync_bitops.h
arch/x86/kernel/amd_nb.c
arch/x86/kernel/cpu/rdrand.c
arch/x86/kernel/i386_ksyms_32.c
arch/x86/kernel/signal_compat.c
arch/x86/kernel/vm86_32.c
arch/x86/kernel/x8664_ksyms_64.c
arch/x86/lib/Makefile
arch/x86/lib/hweight.S [new file with mode: 0644]
arch/x86/pci/acpi.c
arch/x86/power/hibernate_64.c
arch/x86/power/hibernate_asm_64.S
arch/x86/xen/enlighten.c
block/ioprio.c
drivers/acpi/acpi_dbg.c
drivers/acpi/acpica/nsload.c
drivers/acpi/acpica/nsparse.c
drivers/acpi/pci_link.c
drivers/block/xen-blkfront.c
drivers/cpuidle/cpuidle.c
drivers/gpio/Kconfig
drivers/gpio/gpio-sch.c
drivers/gpio/gpiolib-legacy.c
drivers/gpio/gpiolib.c
drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c
drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.c
drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.h
drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.c
drivers/gpu/drm/amd/powerplay/hwmgr/tonga_processpptables.c
drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgf119.c
drivers/gpu/drm/sun4i/sun4i_crtc.c
drivers/gpu/drm/sun4i/sun4i_drv.c
drivers/iommu/amd_iommu_init.c
drivers/iommu/intel-iommu.c
drivers/irqchip/irq-mips-gic.c
drivers/net/bonding/bond_3ad.c
drivers/net/bonding/bond_alb.c
drivers/net/bonding/bond_main.c
drivers/net/ethernet/broadcom/bcmsysport.c
drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h
drivers/net/ethernet/intel/e1000e/netdev.c
drivers/net/ethernet/intel/ixgbevf/mbx.c
drivers/net/ethernet/marvell/mvneta.c
drivers/net/ethernet/mellanox/mlx5/core/cmd.c
drivers/net/ethernet/mellanox/mlx5/core/en.h
drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
drivers/net/ethernet/mellanox/mlx5/core/en_main.c
drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
drivers/net/ethernet/mellanox/mlx5/core/health.c
drivers/net/ethernet/mellanox/mlx5/core/main.c
drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
drivers/net/ethernet/mellanox/mlx5/core/vport.c
drivers/net/ethernet/microchip/enc28j60.c
drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c
drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
drivers/net/geneve.c
drivers/net/macsec.c
drivers/net/phy/dp83867.c
drivers/net/usb/cdc_ncm.c
drivers/net/usb/r8152.c
drivers/net/usb/usbnet.c
drivers/platform/chrome/cros_ec_dev.c
drivers/s390/net/qeth_l2_main.c
drivers/s390/net/qeth_l3_main.c
drivers/scsi/ipr.c
drivers/scsi/qla2xxx/qla_isr.c
drivers/scsi/scsi_devinfo.c
drivers/xen/xen-acpi-processor.c
drivers/xen/xenbus/xenbus_dev_frontend.c
drivers/xen/xenbus/xenbus_xs.c
fs/configfs/file.c
fs/ecryptfs/crypto.c
fs/ecryptfs/file.c
fs/ecryptfs/kthread.c
fs/ecryptfs/main.c
fs/fs-writeback.c
include/acpi/acpi_drivers.h
include/linux/context_tracking.h
include/linux/mlx5/driver.h
include/linux/random.h
include/linux/skbuff.h
include/net/bonding.h
include/net/ip.h
init/Kconfig
kernel/events/core.c
kernel/sched/fair.c
lib/Makefile
lib/hweight.c
net/bridge/br_netfilter_hooks.c
net/core/flow_dissector.c
net/core/skbuff.c
net/decnet/dn_fib.c
net/ipv4/ip_output.c
net/ipv6/ip6_fib.c
net/packet/af_packet.c
net/rds/tcp.c
net/sched/act_mirred.c
net/tipc/netlink_compat.c
security/apparmor/lsm.c
sound/core/timer.c
sound/pci/au88x0/au88x0_core.c
sound/pci/echoaudio/echoaudio.c
sound/pci/hda/hda_generic.c
sound/pci/hda/hda_intel.c
sound/pci/hda/patch_realtek.c
sound/soc/codecs/Kconfig
sound/soc/codecs/ak4613.c
sound/soc/codecs/cx20442.c
sound/soc/codecs/hdac_hdmi.c
sound/soc/codecs/rt5645.c
sound/soc/codecs/rt5670.c
sound/soc/codecs/wm5102.c
sound/soc/codecs/wm5110.c
sound/soc/codecs/wm8940.c
sound/soc/davinci/davinci-mcasp.c
sound/soc/davinci/davinci-mcasp.h
sound/soc/fsl/fsl_ssi.c
sound/soc/intel/atom/sst-mfld-platform-compress.c
sound/soc/intel/skylake/bxt-sst.c
sound/soc/sh/rcar/adg.c
tools/testing/selftests/x86/Makefile
tools/testing/selftests/x86/mpx-debug.h [new file with mode: 0644]
tools/testing/selftests/x86/mpx-dig.c [new file with mode: 0644]
tools/testing/selftests/x86/mpx-hw.h [new file with mode: 0644]
tools/testing/selftests/x86/mpx-mini-test.c [new file with mode: 0644]
tools/testing/selftests/x86/mpx-mm.h [new file with mode: 0644]

index 1a5a121..85d0549 100644 (file)
@@ -45,7 +45,7 @@ is how we expect the compiler, application and kernel to work together.
    MPX-instrumented.
 3) The kernel detects that the CPU has MPX, allows the new prctl() to
    succeed, and notes the location of the bounds directory. Userspace is
-   expected to keep the bounds directory at that locationWe note it
+   expected to keep the bounds directory at that locationWe note it
    instead of reading it each time because the 'xsave' operation needed
    to access the bounds directory register is an expensive operation.
 4) If the application needs to spill bounds out of the 4 registers, it
@@ -167,7 +167,7 @@ If a #BR is generated due to a bounds violation caused by MPX.
 We need to decode MPX instructions to get violation address and
 set this address into extended struct siginfo.
 
-The _sigfault feild of struct siginfo is extended as follow:
+The _sigfault field of struct siginfo is extended as follow:
 
 87             /* SIGILL, SIGFPE, SIGSEGV, SIGBUS */
 88             struct {
@@ -240,5 +240,5 @@ them at the same bounds table.
 This is allowed architecturally.  See more information "Intel(R) Architecture
 Instruction Set Extensions Programming Reference" (9.3.4).
 
-However, if users did this, the kernel might be fooled in to unmaping an
+However, if users did this, the kernel might be fooled in to unmapping an
 in-use bounds table since it does not recognize sharing.
index 39d1723..6a0607b 100644 (file)
@@ -5,7 +5,7 @@ memory, it has two choices:
     from areas other than the one we are trying to flush will be
     destroyed and must be refilled later, at some cost.
  2. Use the invlpg instruction to invalidate a single page at a
-    time.  This could potentialy cost many more instructions, but
+    time.  This could potentially cost many more instructions, but
     it is a much more precise operation, causing no collateral
     damage to other TLB entries.
 
@@ -19,7 +19,7 @@ Which method to do depends on a few things:
     work.
  3. The size of the TLB.  The larger the TLB, the more collateral
     damage we do with a full flush.  So, the larger the TLB, the
-    more attrative an individual flush looks.  Data and
+    more attractive an individual flush looks.  Data and
     instructions have separate TLBs, as do different page sizes.
  4. The microarchitecture.  The TLB has become a multi-level
     cache on modern CPUs, and the global flushes have become more
index b1fb302..d0648a7 100644 (file)
@@ -36,7 +36,7 @@ between all CPUs.
 
 check_interval
        How often to poll for corrected machine check errors, in seconds
-       (Note output is hexademical). Default 5 minutes.  When the poller
+       (Note output is hexadecimal). Default 5 minutes.  When the poller
        finds MCEs it triggers an exponential speedup (poll more often) on
        the polling interval.  When the poller stops finding MCEs, it
        triggers an exponential backoff (poll less often) on the polling
index 87e1985..9d9fd4b 100644 (file)
 #define APM_CPU_PART_POTENZA           0x000
 
 #define CAVIUM_CPU_PART_THUNDERX       0x0A1
+#define CAVIUM_CPU_PART_THUNDERX_81XX  0x0A2
 
 #define BRCM_CPU_PART_VULCAN           0x516
 
 #define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53)
 #define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57)
 #define MIDR_THUNDERX  MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX)
+#define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX)
 
 #ifndef __ASSEMBLY__
 
index a307eb6..7f94755 100644 (file)
@@ -117,6 +117,8 @@ struct pt_regs {
        };
        u64 orig_x0;
        u64 syscallno;
+       u64 orig_addr_limit;
+       u64 unused;     // maintain 16 byte alignment
 };
 
 #define arch_has_single_step() (1)
index f8e5d47..2f4ba77 100644 (file)
@@ -60,6 +60,7 @@ int main(void)
   DEFINE(S_PC,                 offsetof(struct pt_regs, pc));
   DEFINE(S_ORIG_X0,            offsetof(struct pt_regs, orig_x0));
   DEFINE(S_SYSCALLNO,          offsetof(struct pt_regs, syscallno));
+  DEFINE(S_ORIG_ADDR_LIMIT,    offsetof(struct pt_regs, orig_addr_limit));
   DEFINE(S_FRAME_SIZE,         sizeof(struct pt_regs));
   BLANK();
   DEFINE(MM_CONTEXT_ID,                offsetof(struct mm_struct, context.id.counter));
index d427894..af716b6 100644 (file)
@@ -98,6 +98,12 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
                MIDR_RANGE(MIDR_THUNDERX, 0x00,
                           (1 << MIDR_VARIANT_SHIFT) | 1),
        },
+       {
+       /* Cavium ThunderX, T81 pass 1.0 */
+               .desc = "Cavium erratum 27456",
+               .capability = ARM64_WORKAROUND_CAVIUM_27456,
+               MIDR_RANGE(MIDR_THUNDERX_81XX, 0x00, 0x00),
+       },
 #endif
        {
        }
index 12e8d2b..6c3b734 100644 (file)
@@ -28,6 +28,7 @@
 #include <asm/errno.h>
 #include <asm/esr.h>
 #include <asm/irq.h>
+#include <asm/memory.h>
 #include <asm/thread_info.h>
 #include <asm/unistd.h>
 
        mov     x29, xzr                        // fp pointed to user-space
        .else
        add     x21, sp, #S_FRAME_SIZE
-       .endif
+       get_thread_info tsk
+       /* Save the task's original addr_limit and set USER_DS (TASK_SIZE_64) */
+       ldr     x20, [tsk, #TI_ADDR_LIMIT]
+       str     x20, [sp, #S_ORIG_ADDR_LIMIT]
+       mov     x20, #TASK_SIZE_64
+       str     x20, [tsk, #TI_ADDR_LIMIT]
+       ALTERNATIVE(nop, SET_PSTATE_UAO(0), ARM64_HAS_UAO, CONFIG_ARM64_UAO)
+       .endif /* \el == 0 */
        mrs     x22, elr_el1
        mrs     x23, spsr_el1
        stp     lr, x21, [sp, #S_LR]
        .endm
 
        .macro  kernel_exit, el
+       .if     \el != 0
+       /* Restore the task's original addr_limit. */
+       ldr     x20, [sp, #S_ORIG_ADDR_LIMIT]
+       str     x20, [tsk, #TI_ADDR_LIMIT]
+
+       /* No need to restore UAO, it will be restored from SPSR_EL1 */
+       .endif
+
        ldp     x21, x22, [sp, #S_PC]           // load ELR, SPSR
        .if     \el == 0
        ct_user_enter
@@ -406,7 +422,6 @@ el1_irq:
        bl      trace_hardirqs_off
 #endif
 
-       get_thread_info tsk
        irq_handler
 
 #ifdef CONFIG_PREEMPT
index 013e2cb..b1166d1 100644 (file)
@@ -280,7 +280,8 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
        }
 
        if (permission_fault(esr) && (addr < USER_DS)) {
-               if (get_fs() == KERNEL_DS)
+               /* regs->orig_addr_limit may be 0 if we entered from EL0 */
+               if (regs->orig_addr_limit == KERNEL_DS)
                        die("Accessing user space memory with fs=KERNEL_DS", regs, esr);
 
                if (!search_exception_tables(regs->pc))
index d9a94da..df884a5 100644 (file)
@@ -294,11 +294,6 @@ config X86_32_LAZY_GS
        def_bool y
        depends on X86_32 && !CC_STACKPROTECTOR
 
-config ARCH_HWEIGHT_CFLAGS
-       string
-       default "-fcall-saved-ecx -fcall-saved-edx" if X86_32
-       default "-fcall-saved-rdi -fcall-saved-rsi -fcall-saved-rdx -fcall-saved-rcx -fcall-saved-r8 -fcall-saved-r9 -fcall-saved-r10 -fcall-saved-r11" if X86_64
-
 config ARCH_SUPPORTS_UPROBES
        def_bool y
 
index 878e4b9..0d41d68 100644 (file)
 #define BOOT_BITOPS_H
 #define _LINUX_BITOPS_H                /* Inhibit inclusion of <linux/bitops.h> */
 
-static inline int constant_test_bit(int nr, const void *addr)
+#include <linux/types.h>
+
+static inline bool constant_test_bit(int nr, const void *addr)
 {
        const u32 *p = (const u32 *)addr;
        return ((1UL << (nr & 31)) & (p[nr >> 5])) != 0;
 }
-static inline int variable_test_bit(int nr, const void *addr)
+static inline bool variable_test_bit(int nr, const void *addr)
 {
-       u8 v;
+       bool v;
        const u32 *p = (const u32 *)addr;
 
        asm("btl %2,%1; setc %0" : "=qm" (v) : "m" (*p), "Ir" (nr));
index a5ce666..e5612f3 100644 (file)
@@ -24,6 +24,7 @@
 #include <linux/types.h>
 #include <linux/edd.h>
 #include <asm/setup.h>
+#include <asm/asm.h>
 #include "bitops.h"
 #include "ctype.h"
 #include "cpuflags.h"
@@ -176,18 +177,18 @@ static inline void wrgs32(u32 v, addr_t addr)
 }
 
 /* Note: these only return true/false, not a signed return value! */
-static inline int memcmp_fs(const void *s1, addr_t s2, size_t len)
+static inline bool memcmp_fs(const void *s1, addr_t s2, size_t len)
 {
-       u8 diff;
-       asm volatile("fs; repe; cmpsb; setnz %0"
-                    : "=qm" (diff), "+D" (s1), "+S" (s2), "+c" (len));
+       bool diff;
+       asm volatile("fs; repe; cmpsb" CC_SET(nz)
+                    : CC_OUT(nz) (diff), "+D" (s1), "+S" (s2), "+c" (len));
        return diff;
 }
-static inline int memcmp_gs(const void *s1, addr_t s2, size_t len)
+static inline bool memcmp_gs(const void *s1, addr_t s2, size_t len)
 {
-       u8 diff;
-       asm volatile("gs; repe; cmpsb; setnz %0"
-                    : "=qm" (diff), "+D" (s1), "+S" (s2), "+c" (len));
+       bool diff;
+       asm volatile("gs; repe; cmpsb" CC_SET(nz)
+                    : CC_OUT(nz) (diff), "+D" (s1), "+S" (s2), "+c" (len));
        return diff;
 }
 
index 318b846..cc3bd58 100644 (file)
@@ -17,7 +17,7 @@
 
 int memcmp(const void *s1, const void *s2, size_t len)
 {
-       u8 diff;
+       bool diff;
        asm("repe; cmpsb; setnz %0"
            : "=qm" (diff), "+D" (s1), "+S" (s2), "+c" (len));
        return diff;
index ec138e5..9e1e27d 100644 (file)
@@ -40,10 +40,10 @@ static struct thread_info *pt_regs_to_thread_info(struct pt_regs *regs)
 
 #ifdef CONFIG_CONTEXT_TRACKING
 /* Called on entry from user mode with IRQs off. */
-__visible void enter_from_user_mode(void)
+__visible inline void enter_from_user_mode(void)
 {
        CT_WARN_ON(ct_state() != CONTEXT_USER);
-       user_exit();
+       user_exit_irqoff();
 }
 #else
 static inline void enter_from_user_mode(void) {}
@@ -274,7 +274,7 @@ __visible inline void prepare_exit_to_usermode(struct pt_regs *regs)
        ti->status &= ~TS_COMPAT;
 #endif
 
-       user_enter();
+       user_enter_irqoff();
 }
 
 #define SYSCALL_EXIT_WORK_FLAGS                                \
index 027aec4..627ecbc 100644 (file)
@@ -33,7 +33,7 @@
        .endif
 
        call \func
-       jmp  restore
+       jmp  .L_restore
        _ASM_NOKPROBE(\name)
        .endm
 
@@ -54,7 +54,7 @@
 #if defined(CONFIG_TRACE_IRQFLAGS) \
  || defined(CONFIG_DEBUG_LOCK_ALLOC) \
  || defined(CONFIG_PREEMPT)
-restore:
+.L_restore:
        popq %r11
        popq %r10
        popq %r9
@@ -66,5 +66,5 @@ restore:
        popq %rdi
        popq %rbp
        ret
-       _ASM_NOKPROBE(restore)
+       _ASM_NOKPROBE(.L_restore)
 #endif
index 253b72e..68b63fd 100644 (file)
@@ -134,7 +134,7 @@ VDSO_LDFLAGS_vdso32.lds = -m32 -Wl,-m,elf_i386 -Wl,-soname=linux-gate.so.1
 override obj-dirs = $(dir $(obj)) $(obj)/vdso32/
 
 targets += vdso32/vdso32.lds
-targets += vdso32/note.o vdso32/vclock_gettime.o vdso32/system_call.o
+targets += vdso32/note.o vdso32/system_call.o vdso32/sigreturn.o
 targets += vdso32/vclock_gettime.o
 
 KBUILD_AFLAGS_32 := $(filter-out -m64,$(KBUILD_AFLAGS)) -DBUILD_VDSO
@@ -156,7 +156,8 @@ $(obj)/vdso32.so.dbg: FORCE \
                      $(obj)/vdso32/vdso32.lds \
                      $(obj)/vdso32/vclock_gettime.o \
                      $(obj)/vdso32/note.o \
-                     $(obj)/vdso32/system_call.o
+                     $(obj)/vdso32/system_call.o \
+                     $(obj)/vdso32/sigreturn.o
        $(call if_changed,vdso)
 
 #
index d7ec4e2..20633e0 100644 (file)
@@ -1,11 +1,3 @@
-/*
- * Common code for the sigreturn entry points in vDSO images.
- * So far this code is the same for both int80 and sysenter versions.
- * This file is #include'd by int80.S et al to define them first thing.
- * The kernel assumes that the addresses of these routines are constant
- * for all vDSO implementations.
- */
-
 #include <linux/linkage.h>
 #include <asm/unistd_32.h>
 #include <asm/asm-offsets.h>
index 0109ac6..ed4bc97 100644 (file)
@@ -2,16 +2,11 @@
  * AT_SYSINFO entry point
 */
 
+#include <linux/linkage.h>
 #include <asm/dwarf2.h>
 #include <asm/cpufeatures.h>
 #include <asm/alternative-asm.h>
 
-/*
- * First get the common code for the sigreturn entry points.
- * This must come first.
- */
-#include "sigreturn.S"
-
        .text
        .globl __kernel_vsyscall
        .type __kernel_vsyscall,@function
index 33787ee..26ced53 100644 (file)
@@ -2319,7 +2319,7 @@ void
 perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs)
 {
        struct stack_frame frame;
-       const void __user *fp;
+       const unsigned long __user *fp;
 
        if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
                /* TODO: We don't support guest os callchain now */
@@ -2332,7 +2332,7 @@ perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs
        if (regs->flags & (X86_VM_MASK | PERF_EFLAGS_VM))
                return;
 
-       fp = (void __user *)regs->bp;
+       fp = (unsigned long __user *)regs->bp;
 
        perf_callchain_store(entry, regs->ip);
 
@@ -2345,16 +2345,17 @@ perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs
        pagefault_disable();
        while (entry->nr < entry->max_stack) {
                unsigned long bytes;
+
                frame.next_frame             = NULL;
                frame.return_address = 0;
 
-               if (!access_ok(VERIFY_READ, fp, 16))
+               if (!access_ok(VERIFY_READ, fp, sizeof(*fp) * 2))
                        break;
 
-               bytes = __copy_from_user_nmi(&frame.next_frame, fp, 8);
+               bytes = __copy_from_user_nmi(&frame.next_frame, fp, sizeof(*fp));
                if (bytes != 0)
                        break;
-               bytes = __copy_from_user_nmi(&frame.return_address, fp+8, 8);
+               bytes = __copy_from_user_nmi(&frame.return_address, fp + 1, sizeof(*fp));
                if (bytes != 0)
                        break;
 
index 3660b2c..06c2baa 100644 (file)
@@ -1,8 +1,8 @@
 obj-$(CONFIG_CPU_SUP_INTEL)            += core.o bts.o cqm.o
 obj-$(CONFIG_CPU_SUP_INTEL)            += ds.o knc.o
 obj-$(CONFIG_CPU_SUP_INTEL)            += lbr.o p4.o p6.o pt.o
-obj-$(CONFIG_PERF_EVENTS_INTEL_RAPL)   += intel-rapl.o
-intel-rapl-objs                                := rapl.o
+obj-$(CONFIG_PERF_EVENTS_INTEL_RAPL)   += intel-rapl-perf.o
+intel-rapl-perf-objs                   := rapl.o
 obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE) += intel-uncore.o
 intel-uncore-objs                      := uncore.o uncore_nhmex.o uncore_snb.o uncore_snbep.o
 obj-$(CONFIG_PERF_EVENTS_INTEL_CSTATE) += intel-cstate.o
index 7c66695..9b4f9d3 100644 (file)
@@ -115,6 +115,10 @@ static struct event_constraint intel_snb_event_constraints[] __read_mostly =
        INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_NO_DISPATCH */
        INTEL_UEVENT_CONSTRAINT(0x02a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */
 
+       /*
+        * When HT is off these events can only run on the bottom 4 counters
+        * When HT is on, they are impacted by the HT bug and require EXCL access
+        */
        INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOPS_RETIRED.* */
        INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
        INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
@@ -139,6 +143,10 @@ static struct event_constraint intel_ivb_event_constraints[] __read_mostly =
        INTEL_UEVENT_CONSTRAINT(0x0ca3, 0x4), /* CYCLE_ACTIVITY.STALLS_L1D_PENDING */
        INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */
 
+       /*
+        * When HT is off these events can only run on the bottom 4 counters
+        * When HT is on, they are impacted by the HT bug and require EXCL access
+        */
        INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOPS_RETIRED.* */
        INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
        INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
@@ -182,6 +190,16 @@ struct event_constraint intel_skl_event_constraints[] = {
        FIXED_EVENT_CONSTRAINT(0x003c, 1),      /* CPU_CLK_UNHALTED.CORE */
        FIXED_EVENT_CONSTRAINT(0x0300, 2),      /* CPU_CLK_UNHALTED.REF */
        INTEL_UEVENT_CONSTRAINT(0x1c0, 0x2),    /* INST_RETIRED.PREC_DIST */
+
+       /*
+        * when HT is off, these can only run on the bottom 4 counters
+        */
+       INTEL_EVENT_CONSTRAINT(0xd0, 0xf),      /* MEM_INST_RETIRED.* */
+       INTEL_EVENT_CONSTRAINT(0xd1, 0xf),      /* MEM_LOAD_RETIRED.* */
+       INTEL_EVENT_CONSTRAINT(0xd2, 0xf),      /* MEM_LOAD_L3_HIT_RETIRED.* */
+       INTEL_EVENT_CONSTRAINT(0xcd, 0xf),      /* MEM_TRANS_RETIRED.* */
+       INTEL_EVENT_CONSTRAINT(0xc6, 0xf),      /* FRONTEND_RETIRED.* */
+
        EVENT_CONSTRAINT_END
 };
 
@@ -250,6 +268,10 @@ static struct event_constraint intel_hsw_event_constraints[] = {
        /* CYCLE_ACTIVITY.CYCLES_NO_EXECUTE */
        INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf),
 
+       /*
+        * When HT is off these events can only run on the bottom 4 counters
+        * When HT is on, they are impacted by the HT bug and require EXCL access
+        */
        INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOPS_RETIRED.* */
        INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
        INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
@@ -264,6 +286,13 @@ struct event_constraint intel_bdw_event_constraints[] = {
        FIXED_EVENT_CONSTRAINT(0x0300, 2),      /* CPU_CLK_UNHALTED.REF */
        INTEL_UEVENT_CONSTRAINT(0x148, 0x4),    /* L1D_PEND_MISS.PENDING */
        INTEL_UBIT_EVENT_CONSTRAINT(0x8a3, 0x4),        /* CYCLE_ACTIVITY.CYCLES_L1D_MISS */
+       /*
+        * when HT is off, these can only run on the bottom 4 counters
+        */
+       INTEL_EVENT_CONSTRAINT(0xd0, 0xf),      /* MEM_INST_RETIRED.* */
+       INTEL_EVENT_CONSTRAINT(0xd1, 0xf),      /* MEM_LOAD_RETIRED.* */
+       INTEL_EVENT_CONSTRAINT(0xd2, 0xf),      /* MEM_LOAD_L3_HIT_RETIRED.* */
+       INTEL_EVENT_CONSTRAINT(0xcd, 0xf),      /* MEM_TRANS_RETIRED.* */
        EVENT_CONSTRAINT_END
 };
 
index 20370c6..93eebc6 100644 (file)
@@ -45,11 +45,11 @@ static inline void apm_bios_call_asm(u32 func, u32 ebx_in, u32 ecx_in,
                : "memory", "cc");
 }
 
-static inline u8 apm_bios_call_simple_asm(u32 func, u32 ebx_in,
-                                               u32 ecx_in, u32 *eax)
+static inline bool apm_bios_call_simple_asm(u32 func, u32 ebx_in,
+                                           u32 ecx_in, u32 *eax)
 {
        int     cx, dx, si;
-       u8      error;
+       bool    error;
 
        /*
         * N.B. We do NOT need a cld after the BIOS call
index 02e799f..e7cd631 100644 (file)
@@ -4,8 +4,8 @@
 #include <asm/cpufeatures.h>
 
 #ifdef CONFIG_64BIT
-/* popcnt %edi, %eax -- redundant REX prefix for alignment */
-#define POPCNT32 ".byte 0xf3,0x40,0x0f,0xb8,0xc7"
+/* popcnt %edi, %eax */
+#define POPCNT32 ".byte 0xf3,0x0f,0xb8,0xc7"
 /* popcnt %rdi, %rax */
 #define POPCNT64 ".byte 0xf3,0x48,0x0f,0xb8,0xc7"
 #define REG_IN "D"
 #define REG_OUT "a"
 #endif
 
-/*
- * __sw_hweightXX are called from within the alternatives below
- * and callee-clobbered registers need to be taken care of. See
- * ARCH_HWEIGHT_CFLAGS in <arch/x86/Kconfig> for the respective
- * compiler switches.
- */
+#define __HAVE_ARCH_SW_HWEIGHT
+
 static __always_inline unsigned int __arch_hweight32(unsigned int w)
 {
-       unsigned int res = 0;
+       unsigned int res;
 
        asm (ALTERNATIVE("call __sw_hweight32", POPCNT32, X86_FEATURE_POPCNT)
-                    : "="REG_OUT (res)
-                    : REG_IN (w));
+                        : "="REG_OUT (res)
+                        : REG_IN (w));
 
        return res;
 }
@@ -53,11 +49,11 @@ static inline unsigned long __arch_hweight64(__u64 w)
 #else
 static __always_inline unsigned long __arch_hweight64(__u64 w)
 {
-       unsigned long res = 0;
+       unsigned long res;
 
        asm (ALTERNATIVE("call __sw_hweight64", POPCNT64, X86_FEATURE_POPCNT)
-                    : "="REG_OUT (res)
-                    : REG_IN (w));
+                        : "="REG_OUT (res)
+                        : REG_IN (w));
 
        return res;
 }
index 69f1366..5b0579a 100644 (file)
@@ -25,8 +25,6 @@
 
 #include <asm/processor.h>
 #include <asm/cpufeature.h>
-#include <asm/alternative.h>
-#include <asm/nops.h>
 
 #define RDRAND_RETRY_LOOPS     10
 
 # define RDSEED_LONG   RDSEED_INT
 #endif
 
-#ifdef CONFIG_ARCH_RANDOM
+/* Unconditional execution of RDRAND and RDSEED */
 
-/* Instead of arch_get_random_long() when alternatives haven't run. */
-static inline int rdrand_long(unsigned long *v)
+static inline bool rdrand_long(unsigned long *v)
 {
-       int ok;
-       asm volatile("1: " RDRAND_LONG "\n\t"
-                    "jc 2f\n\t"
-                    "decl %0\n\t"
-                    "jnz 1b\n\t"
-                    "2:"
-                    : "=r" (ok), "=a" (*v)
-                    : "0" (RDRAND_RETRY_LOOPS));
-       return ok;
+       bool ok;
+       unsigned int retry = RDRAND_RETRY_LOOPS;
+       do {
+               asm volatile(RDRAND_LONG "\n\t"
+                            CC_SET(c)
+                            : CC_OUT(c) (ok), "=a" (*v));
+               if (ok)
+                       return true;
+       } while (--retry);
+       return false;
+}
+
+static inline bool rdrand_int(unsigned int *v)
+{
+       bool ok;
+       unsigned int retry = RDRAND_RETRY_LOOPS;
+       do {
+               asm volatile(RDRAND_INT "\n\t"
+                            CC_SET(c)
+                            : CC_OUT(c) (ok), "=a" (*v));
+               if (ok)
+                       return true;
+       } while (--retry);
+       return false;
 }
 
-/* A single attempt at RDSEED */
 static inline bool rdseed_long(unsigned long *v)
 {
-       unsigned char ok;
+       bool ok;
        asm volatile(RDSEED_LONG "\n\t"
-                    "setc %0"
-                    : "=qm" (ok), "=a" (*v));
+                    CC_SET(c)
+                    : CC_OUT(c) (ok), "=a" (*v));
        return ok;
 }
 
-#define GET_RANDOM(name, type, rdrand, nop)                    \
-static inline int name(type *v)                                        \
-{                                                              \
-       int ok;                                                 \
-       alternative_io("movl $0, %0\n\t"                        \
-                      nop,                                     \
-                      "\n1: " rdrand "\n\t"                    \
-                      "jc 2f\n\t"                              \
-                      "decl %0\n\t"                            \
-                      "jnz 1b\n\t"                             \
-                      "2:",                                    \
-                      X86_FEATURE_RDRAND,                      \
-                      ASM_OUTPUT2("=r" (ok), "=a" (*v)),       \
-                      "0" (RDRAND_RETRY_LOOPS));               \
-       return ok;                                              \
-}
-
-#define GET_SEED(name, type, rdseed, nop)                      \
-static inline int name(type *v)                                        \
-{                                                              \
-       unsigned char ok;                                       \
-       alternative_io("movb $0, %0\n\t"                        \
-                      nop,                                     \
-                      rdseed "\n\t"                            \
-                      "setc %0",                               \
-                      X86_FEATURE_RDSEED,                      \
-                      ASM_OUTPUT2("=q" (ok), "=a" (*v)));      \
-       return ok;                                              \
+static inline bool rdseed_int(unsigned int *v)
+{
+       bool ok;
+       asm volatile(RDSEED_INT "\n\t"
+                    CC_SET(c)
+                    : CC_OUT(c) (ok), "=a" (*v));
+       return ok;
 }
 
-#ifdef CONFIG_X86_64
-
-GET_RANDOM(arch_get_random_long, unsigned long, RDRAND_LONG, ASM_NOP5);
-GET_RANDOM(arch_get_random_int, unsigned int, RDRAND_INT, ASM_NOP4);
-
-GET_SEED(arch_get_random_seed_long, unsigned long, RDSEED_LONG, ASM_NOP5);
-GET_SEED(arch_get_random_seed_int, unsigned int, RDSEED_INT, ASM_NOP4);
-
-#else
-
-GET_RANDOM(arch_get_random_long, unsigned long, RDRAND_LONG, ASM_NOP3);
-GET_RANDOM(arch_get_random_int, unsigned int, RDRAND_INT, ASM_NOP3);
-
-GET_SEED(arch_get_random_seed_long, unsigned long, RDSEED_LONG, ASM_NOP4);
-GET_SEED(arch_get_random_seed_int, unsigned int, RDSEED_INT, ASM_NOP4);
-
-#endif /* CONFIG_X86_64 */
-
+/* Conditional execution based on CPU type */
 #define arch_has_random()      static_cpu_has(X86_FEATURE_RDRAND)
 #define arch_has_random_seed() static_cpu_has(X86_FEATURE_RDSEED)
 
-#else
+/*
+ * These are the generic interfaces; they must not be declared if the
+ * stubs in <linux/random.h> are to be invoked,
+ * i.e. CONFIG_ARCH_RANDOM is not defined.
+ */
+#ifdef CONFIG_ARCH_RANDOM
 
-static inline int rdrand_long(unsigned long *v)
+static inline bool arch_get_random_long(unsigned long *v)
 {
-       return 0;
+       return arch_has_random() ? rdrand_long(v) : false;
 }
 
-static inline bool rdseed_long(unsigned long *v)
+static inline bool arch_get_random_int(unsigned int *v)
 {
-       return 0;
+       return arch_has_random() ? rdrand_int(v) : false;
 }
 
-#endif  /* CONFIG_ARCH_RANDOM */
+static inline bool arch_get_random_seed_long(unsigned long *v)
+{
+       return arch_has_random_seed() ? rdseed_long(v) : false;
+}
+
+static inline bool arch_get_random_seed_int(unsigned int *v)
+{
+       return arch_has_random_seed() ? rdseed_int(v) : false;
+}
 
 extern void x86_init_rdrand(struct cpuinfo_x86 *c);
 
+#else  /* !CONFIG_ARCH_RANDOM */
+
+static inline void x86_init_rdrand(struct cpuinfo_x86 *c) { }
+
+#endif  /* !CONFIG_ARCH_RANDOM */
+
 #endif /* ASM_X86_ARCHRANDOM_H */
index f5063b6..7acb51c 100644 (file)
 #define _ASM_SI                __ASM_REG(si)
 #define _ASM_DI                __ASM_REG(di)
 
+/*
+ * Macros to generate condition code outputs from inline assembly,
+ * The output operand must be type "bool".
+ */
+#ifdef __GCC_ASM_FLAG_OUTPUTS__
+# define CC_SET(c) "\n\t/* output condition code " #c "*/\n"
+# define CC_OUT(c) "=@cc" #c
+#else
+# define CC_SET(c) "\n\tset" #c " %[_cc_" #c "]\n"
+# define CC_OUT(c) [_cc_ ## c] "=qm"
+#endif
+
 /* Exception table entry */
 #ifdef __ASSEMBLY__
 # define _ASM_EXTABLE_HANDLE(from, to, handler)                        \
index 3e86742..7322c15 100644 (file)
@@ -75,9 +75,9 @@ static __always_inline void atomic_sub(int i, atomic_t *v)
  * true if the result is zero, or false for all
  * other cases.
  */
-static __always_inline int atomic_sub_and_test(int i, atomic_t *v)
+static __always_inline bool atomic_sub_and_test(int i, atomic_t *v)
 {
-       GEN_BINARY_RMWcc(LOCK_PREFIX "subl", v->counter, "er", i, "%0", "e");
+       GEN_BINARY_RMWcc(LOCK_PREFIX "subl", v->counter, "er", i, "%0", e);
 }
 
 /**
@@ -112,9 +112,9 @@ static __always_inline void atomic_dec(atomic_t *v)
  * returns true if the result is 0, or false for all other
  * cases.
  */
-static __always_inline int atomic_dec_and_test(atomic_t *v)
+static __always_inline bool atomic_dec_and_test(atomic_t *v)
 {
-       GEN_UNARY_RMWcc(LOCK_PREFIX "decl", v->counter, "%0", "e");
+       GEN_UNARY_RMWcc(LOCK_PREFIX "decl", v->counter, "%0", e);
 }
 
 /**
@@ -125,9 +125,9 @@ static __always_inline int atomic_dec_and_test(atomic_t *v)
  * and returns true if the result is zero, or false for all
  * other cases.
  */
-static __always_inline int atomic_inc_and_test(atomic_t *v)
+static __always_inline bool atomic_inc_and_test(atomic_t *v)
 {
-       GEN_UNARY_RMWcc(LOCK_PREFIX "incl", v->counter, "%0", "e");
+       GEN_UNARY_RMWcc(LOCK_PREFIX "incl", v->counter, "%0", e);
 }
 
 /**
@@ -139,9 +139,9 @@ static __always_inline int atomic_inc_and_test(atomic_t *v)
  * if the result is negative, or false when
  * result is greater than or equal to zero.
  */
-static __always_inline int atomic_add_negative(int i, atomic_t *v)
+static __always_inline bool atomic_add_negative(int i, atomic_t *v)
 {
-       GEN_BINARY_RMWcc(LOCK_PREFIX "addl", v->counter, "er", i, "%0", "s");
+       GEN_BINARY_RMWcc(LOCK_PREFIX "addl", v->counter, "er", i, "%0", s);
 }
 
 /**
index 0373510..57bf925 100644 (file)
@@ -70,9 +70,9 @@ static inline void atomic64_sub(long i, atomic64_t *v)
  * true if the result is zero, or false for all
  * other cases.
  */
-static inline int atomic64_sub_and_test(long i, atomic64_t *v)
+static inline bool atomic64_sub_and_test(long i, atomic64_t *v)
 {
-       GEN_BINARY_RMWcc(LOCK_PREFIX "subq", v->counter, "er", i, "%0", "e");
+       GEN_BINARY_RMWcc(LOCK_PREFIX "subq", v->counter, "er", i, "%0", e);
 }
 
 /**
@@ -109,9 +109,9 @@ static __always_inline void atomic64_dec(atomic64_t *v)
  * returns true if the result is 0, or false for all other
  * cases.
  */
-static inline int atomic64_dec_and_test(atomic64_t *v)
+static inline bool atomic64_dec_and_test(atomic64_t *v)
 {
-       GEN_UNARY_RMWcc(LOCK_PREFIX "decq", v->counter, "%0", "e");
+       GEN_UNARY_RMWcc(LOCK_PREFIX "decq", v->counter, "%0", e);
 }
 
 /**
@@ -122,9 +122,9 @@ static inline int atomic64_dec_and_test(atomic64_t *v)
  * and returns true if the result is zero, or false for all
  * other cases.
  */
-static inline int atomic64_inc_and_test(atomic64_t *v)
+static inline bool atomic64_inc_and_test(atomic64_t *v)
 {
-       GEN_UNARY_RMWcc(LOCK_PREFIX "incq", v->counter, "%0", "e");
+       GEN_UNARY_RMWcc(LOCK_PREFIX "incq", v->counter, "%0", e);
 }
 
 /**
@@ -136,9 +136,9 @@ static inline int atomic64_inc_and_test(atomic64_t *v)
  * if the result is negative, or false when
  * result is greater than or equal to zero.
  */
-static inline int atomic64_add_negative(long i, atomic64_t *v)
+static inline bool atomic64_add_negative(long i, atomic64_t *v)
 {
-       GEN_BINARY_RMWcc(LOCK_PREFIX "addq", v->counter, "er", i, "%0", "s");
+       GEN_BINARY_RMWcc(LOCK_PREFIX "addq", v->counter, "er", i, "%0", s);
 }
 
 /**
@@ -180,7 +180,7 @@ static inline long atomic64_xchg(atomic64_t *v, long new)
  * Atomically adds @a to @v, so long as it was not @u.
  * Returns the old value of @v.
  */
-static inline int atomic64_add_unless(atomic64_t *v, long a, long u)
+static inline bool atomic64_add_unless(atomic64_t *v, long a, long u)
 {
        long c, old;
        c = atomic64_read(v);
index 7766d1c..68557f5 100644 (file)
@@ -201,9 +201,9 @@ static __always_inline void change_bit(long nr, volatile unsigned long *addr)
  * This operation is atomic and cannot be reordered.
  * It also implies a memory barrier.
  */
-static __always_inline int test_and_set_bit(long nr, volatile unsigned long *addr)
+static __always_inline bool test_and_set_bit(long nr, volatile unsigned long *addr)
 {
-       GEN_BINARY_RMWcc(LOCK_PREFIX "bts", *addr, "Ir", nr, "%0", "c");
+       GEN_BINARY_RMWcc(LOCK_PREFIX "bts", *addr, "Ir", nr, "%0", c);
 }
 
 /**
@@ -213,7 +213,7 @@ static __always_inline int test_and_set_bit(long nr, volatile unsigned long *add
  *
  * This is the same as test_and_set_bit on x86.
  */
-static __always_inline int
+static __always_inline bool
 test_and_set_bit_lock(long nr, volatile unsigned long *addr)
 {
        return test_and_set_bit(nr, addr);
@@ -228,13 +228,13 @@ test_and_set_bit_lock(long nr, volatile unsigned long *addr)
  * If two examples of this operation race, one can appear to succeed
  * but actually fail.  You must protect multiple accesses with a lock.
  */
-static __always_inline int __test_and_set_bit(long nr, volatile unsigned long *addr)
+static __always_inline bool __test_and_set_bit(long nr, volatile unsigned long *addr)
 {
-       int oldbit;
+       bool oldbit;
 
        asm("bts %2,%1\n\t"
-           "sbb %0,%0"
-           : "=r" (oldbit), ADDR
+           CC_SET(c)
+           : CC_OUT(c) (oldbit), ADDR
            : "Ir" (nr));
        return oldbit;
 }
@@ -247,9 +247,9 @@ static __always_inline int __test_and_set_bit(long nr, volatile unsigned long *a
  * This operation is atomic and cannot be reordered.
  * It also implies a memory barrier.
  */
-static __always_inline int test_and_clear_bit(long nr, volatile unsigned long *addr)
+static __always_inline bool test_and_clear_bit(long nr, volatile unsigned long *addr)
 {
-       GEN_BINARY_RMWcc(LOCK_PREFIX "btr", *addr, "Ir", nr, "%0", "c");
+       GEN_BINARY_RMWcc(LOCK_PREFIX "btr", *addr, "Ir", nr, "%0", c);
 }
 
 /**
@@ -268,25 +268,25 @@ static __always_inline int test_and_clear_bit(long nr, volatile unsigned long *a
  * accessed from a hypervisor on the same CPU if running in a VM: don't change
  * this without also updating arch/x86/kernel/kvm.c
  */
-static __always_inline int __test_and_clear_bit(long nr, volatile unsigned long *addr)
+static __always_inline bool __test_and_clear_bit(long nr, volatile unsigned long *addr)
 {
-       int oldbit;
+       bool oldbit;
 
        asm volatile("btr %2,%1\n\t"
-                    "sbb %0,%0"
-                    : "=r" (oldbit), ADDR
+                    CC_SET(c)
+                    : CC_OUT(c) (oldbit), ADDR
                     : "Ir" (nr));
        return oldbit;
 }
 
 /* WARNING: non atomic and it can be reordered! */
-static __always_inline int __test_and_change_bit(long nr, volatile unsigned long *addr)
+static __always_inline bool __test_and_change_bit(long nr, volatile unsigned long *addr)
 {
-       int oldbit;
+       bool oldbit;
 
        asm volatile("btc %2,%1\n\t"
-                    "sbb %0,%0"
-                    : "=r" (oldbit), ADDR
+                    CC_SET(c)
+                    : CC_OUT(c) (oldbit), ADDR
                     : "Ir" (nr) : "memory");
 
        return oldbit;
@@ -300,24 +300,24 @@ static __always_inline int __test_and_change_bit(long nr, volatile unsigned long
  * This operation is atomic and cannot be reordered.
  * It also implies a memory barrier.
  */
-static __always_inline int test_and_change_bit(long nr, volatile unsigned long *addr)
+static __always_inline bool test_and_change_bit(long nr, volatile unsigned long *addr)
 {
-       GEN_BINARY_RMWcc(LOCK_PREFIX "btc", *addr, "Ir", nr, "%0", "c");
+       GEN_BINARY_RMWcc(LOCK_PREFIX "btc", *addr, "Ir", nr, "%0", c);
 }
 
-static __always_inline int constant_test_bit(long nr, const volatile unsigned long *addr)
+static __always_inline bool constant_test_bit(long nr, const volatile unsigned long *addr)
 {
        return ((1UL << (nr & (BITS_PER_LONG-1))) &
                (addr[nr >> _BITOPS_LONG_SHIFT])) != 0;
 }
 
-static __always_inline int variable_test_bit(long nr, volatile const unsigned long *addr)
+static __always_inline bool variable_test_bit(long nr, volatile const unsigned long *addr)
 {
-       int oldbit;
+       bool oldbit;
 
        asm volatile("bt %2,%1\n\t"
-                    "sbb %0,%0"
-                    : "=r" (oldbit)
+                    CC_SET(c)
+                    : CC_OUT(c) (oldbit)
                     : "m" (*(unsigned long *)addr), "Ir" (nr));
 
        return oldbit;
@@ -329,7 +329,7 @@ static __always_inline int variable_test_bit(long nr, volatile const unsigned lo
  * @nr: bit number to test
  * @addr: Address to start counting from
  */
-static int test_bit(int nr, const volatile unsigned long *addr);
+static bool test_bit(int nr, const volatile unsigned long *addr);
 #endif
 
 #define test_bit(nr, addr)                     \
index 5a3b2c1..a188061 100644 (file)
@@ -40,6 +40,7 @@ typedef s32           compat_long_t;
 typedef s64 __attribute__((aligned(4))) compat_s64;
 typedef u32            compat_uint_t;
 typedef u32            compat_ulong_t;
+typedef u32            compat_u32;
 typedef u64 __attribute__((aligned(4))) compat_u64;
 typedef u32            compat_uptr_t;
 
@@ -181,6 +182,16 @@ typedef struct compat_siginfo {
                /* SIGILL, SIGFPE, SIGSEGV, SIGBUS */
                struct {
                        unsigned int _addr;     /* faulting insn/memory ref. */
+                       short int _addr_lsb;    /* Valid LSB of the reported address. */
+                       union {
+                               /* used when si_code=SEGV_BNDERR */
+                               struct {
+                                       compat_uptr_t _lower;
+                                       compat_uptr_t _upper;
+                               } _addr_bnd;
+                               /* used when si_code=SEGV_PKUERR */
+                               compat_u32 _pkey;
+                       };
                } _sigfault;
 
                /* SIGPOLL */
index 4ad6560..7511978 100644 (file)
@@ -50,9 +50,9 @@ static inline void local_sub(long i, local_t *l)
  * true if the result is zero, or false for all
  * other cases.
  */
-static inline int local_sub_and_test(long i, local_t *l)
+static inline bool local_sub_and_test(long i, local_t *l)
 {
-       GEN_BINARY_RMWcc(_ASM_SUB, l->a.counter, "er", i, "%0", "e");
+       GEN_BINARY_RMWcc(_ASM_SUB, l->a.counter, "er", i, "%0", e);
 }
 
 /**
@@ -63,9 +63,9 @@ static inline int local_sub_and_test(long i, local_t *l)
  * returns true if the result is 0, or false for all other
  * cases.
  */
-static inline int local_dec_and_test(local_t *l)
+static inline bool local_dec_and_test(local_t *l)
 {
-       GEN_UNARY_RMWcc(_ASM_DEC, l->a.counter, "%0", "e");
+       GEN_UNARY_RMWcc(_ASM_DEC, l->a.counter, "%0", e);
 }
 
 /**
@@ -76,9 +76,9 @@ static inline int local_dec_and_test(local_t *l)
  * and returns true if the result is zero, or false for all
  * other cases.
  */
-static inline int local_inc_and_test(local_t *l)
+static inline bool local_inc_and_test(local_t *l)
 {
-       GEN_UNARY_RMWcc(_ASM_INC, l->a.counter, "%0", "e");
+       GEN_UNARY_RMWcc(_ASM_INC, l->a.counter, "%0", e);
 }
 
 /**
@@ -90,9 +90,9 @@ static inline int local_inc_and_test(local_t *l)
  * if the result is negative, or false when
  * result is greater than or equal to zero.
  */
-static inline int local_add_negative(long i, local_t *l)
+static inline bool local_add_negative(long i, local_t *l)
 {
-       GEN_BINARY_RMWcc(_ASM_ADD, l->a.counter, "er", i, "%0", "s");
+       GEN_BINARY_RMWcc(_ASM_ADD, l->a.counter, "er", i, "%0", s);
 }
 
 /**
index e0ba66c..e02e3f8 100644 (file)
@@ -510,14 +510,15 @@ do {                                                                      \
 /* This is not atomic against other CPUs -- CPU preemption needs to be off */
 #define x86_test_and_clear_bit_percpu(bit, var)                                \
 ({                                                                     \
-       int old__;                                                      \
-       asm volatile("btr %2,"__percpu_arg(1)"\n\tsbbl %0,%0"           \
-                    : "=r" (old__), "+m" (var)                         \
+       bool old__;                                                     \
+       asm volatile("btr %2,"__percpu_arg(1)"\n\t"                     \
+                    CC_SET(c)                                          \
+                    : CC_OUT(c) (old__), "+m" (var)                    \
                     : "dIr" (bit));                                    \
        old__;                                                          \
 })
 
-static __always_inline int x86_this_cpu_constant_test_bit(unsigned int nr,
+static __always_inline bool x86_this_cpu_constant_test_bit(unsigned int nr,
                         const unsigned long __percpu *addr)
 {
        unsigned long __percpu *a = (unsigned long *)addr + nr / BITS_PER_LONG;
@@ -529,14 +530,14 @@ static __always_inline int x86_this_cpu_constant_test_bit(unsigned int nr,
 #endif
 }
 
-static inline int x86_this_cpu_variable_test_bit(int nr,
+static inline bool x86_this_cpu_variable_test_bit(int nr,
                         const unsigned long __percpu *addr)
 {
-       int oldbit;
+       bool oldbit;
 
        asm volatile("bt "__percpu_arg(2)",%1\n\t"
-                       "sbb %0,%0"
-                       : "=r" (oldbit)
+                       CC_SET(c)
+                       : CC_OUT(c) (oldbit)
                        : "m" (*(unsigned long *)addr), "Ir" (nr));
 
        return oldbit;
index d397deb..17f2186 100644 (file)
@@ -81,7 +81,7 @@ static __always_inline void __preempt_count_sub(int val)
  */
 static __always_inline bool __preempt_count_dec_and_test(void)
 {
-       GEN_UNARY_RMWcc("decl", __preempt_count, __percpu_arg(0), "e");
+       GEN_UNARY_RMWcc("decl", __preempt_count, __percpu_arg(0), e);
 }
 
 /*
index 8f7866a..661dd30 100644 (file)
@@ -1,11 +1,13 @@
 #ifndef _ASM_X86_RMWcc
 #define _ASM_X86_RMWcc
 
-#ifdef CC_HAVE_ASM_GOTO
+#if !defined(__GCC_ASM_FLAG_OUTPUTS__) && defined(CC_HAVE_ASM_GOTO)
+
+/* Use asm goto */
 
 #define __GEN_RMWcc(fullop, var, cc, ...)                              \
 do {                                                                   \
-       asm_volatile_goto (fullop "; j" cc " %l[cc_label]"              \
+       asm_volatile_goto (fullop "; j" #cc " %l[cc_label]"             \
                        : : "m" (var), ## __VA_ARGS__                   \
                        : "memory" : cc_label);                         \
        return 0;                                                       \
@@ -19,15 +21,17 @@ cc_label:                                                           \
 #define GEN_BINARY_RMWcc(op, var, vcon, val, arg0, cc)                 \
        __GEN_RMWcc(op " %1, " arg0, var, cc, vcon (val))
 
-#else /* !CC_HAVE_ASM_GOTO */
+#else /* defined(__GCC_ASM_FLAG_OUTPUTS__) || !defined(CC_HAVE_ASM_GOTO) */
+
+/* Use flags output or a set instruction */
 
 #define __GEN_RMWcc(fullop, var, cc, ...)                              \
 do {                                                                   \
-       char c;                                                         \
-       asm volatile (fullop "; set" cc " %1"                           \
-                       : "+m" (var), "=qm" (c)                         \
+       bool c;                                                         \
+       asm volatile (fullop ";" CC_SET(cc)                             \
+                       : "+m" (var), CC_OUT(cc) (c)                    \
                        : __VA_ARGS__ : "memory");                      \
-       return c != 0;                                                  \
+       return c;                                                       \
 } while (0)
 
 #define GEN_UNARY_RMWcc(op, var, arg0, cc)                             \
@@ -36,6 +40,6 @@ do {                                                                  \
 #define GEN_BINARY_RMWcc(op, var, vcon, val, arg0, cc)                 \
        __GEN_RMWcc(op " %2, " arg0, var, cc, vcon (val))
 
-#endif /* CC_HAVE_ASM_GOTO */
+#endif /* defined(__GCC_ASM_FLAG_OUTPUTS__) || !defined(CC_HAVE_ASM_GOTO) */
 
 #endif /* _ASM_X86_RMWcc */
index 453744c..1e8be26 100644 (file)
@@ -77,7 +77,7 @@ static inline void __down_read(struct rw_semaphore *sem)
 /*
  * trylock for reading -- returns 1 if successful, 0 if contention
  */
-static inline int __down_read_trylock(struct rw_semaphore *sem)
+static inline bool __down_read_trylock(struct rw_semaphore *sem)
 {
        long result, tmp;
        asm volatile("# beginning __down_read_trylock\n\t"
@@ -93,7 +93,7 @@ static inline int __down_read_trylock(struct rw_semaphore *sem)
                     : "+m" (sem->count), "=&a" (result), "=&r" (tmp)
                     : "i" (RWSEM_ACTIVE_READ_BIAS)
                     : "memory", "cc");
-       return result >= 0 ? 1 : 0;
+       return result >= 0;
 }
 
 /*
@@ -134,9 +134,10 @@ static inline int __down_write_killable(struct rw_semaphore *sem)
 /*
  * trylock for writing -- returns 1 if successful, 0 if contention
  */
-static inline int __down_write_trylock(struct rw_semaphore *sem)
+static inline bool __down_write_trylock(struct rw_semaphore *sem)
 {
-       long result, tmp;
+       bool result;
+       long tmp0, tmp1;
        asm volatile("# beginning __down_write_trylock\n\t"
                     "  mov          %0,%1\n\t"
                     "1:\n\t"
@@ -144,14 +145,14 @@ static inline int __down_write_trylock(struct rw_semaphore *sem)
                     /* was the active mask 0 before? */
                     "  jnz          2f\n\t"
                     "  mov          %1,%2\n\t"
-                    "  add          %3,%2\n\t"
+                    "  add          %4,%2\n\t"
                     LOCK_PREFIX "  cmpxchg  %2,%0\n\t"
                     "  jnz          1b\n\t"
                     "2:\n\t"
-                    "  sete         %b1\n\t"
-                    "  movzbl       %b1, %k1\n\t"
+                    CC_SET(e)
                     "# ending __down_write_trylock\n\t"
-                    : "+m" (sem->count), "=&a" (result), "=&r" (tmp)
+                    : "+m" (sem->count), "=&a" (tmp0), "=&r" (tmp1),
+                      CC_OUT(e) (result)
                     : "er" (RWSEM_ACTIVE_WRITE_BIAS)
                     : "memory", "cc");
        return result;
index 2138c9a..dd1e7d6 100644 (file)
@@ -81,9 +81,9 @@ static inline int __const_sigismember(sigset_t *set, int _sig)
 
 static inline int __gen_sigismember(sigset_t *set, int _sig)
 {
-       int ret;
-       asm("btl %2,%1\n\tsbbl %0,%0"
-           : "=r"(ret) : "m"(*set), "Ir"(_sig-1) : "cc");
+       unsigned char ret;
+       asm("btl %2,%1\n\tsetc %0"
+           : "=qm"(ret) : "m"(*set), "Ir"(_sig-1) : "cc");
        return ret;
 }
 
index f28a24b..cbf8847 100644 (file)
@@ -79,10 +79,10 @@ static inline void sync_change_bit(long nr, volatile unsigned long *addr)
  */
 static inline int sync_test_and_set_bit(long nr, volatile unsigned long *addr)
 {
-       int oldbit;
+       unsigned char oldbit;
 
-       asm volatile("lock; bts %2,%1\n\tsbbl %0,%0"
-                    : "=r" (oldbit), "+m" (ADDR)
+       asm volatile("lock; bts %2,%1\n\tsetc %0"
+                    : "=qm" (oldbit), "+m" (ADDR)
                     : "Ir" (nr) : "memory");
        return oldbit;
 }
@@ -97,10 +97,10 @@ static inline int sync_test_and_set_bit(long nr, volatile unsigned long *addr)
  */
 static inline int sync_test_and_clear_bit(long nr, volatile unsigned long *addr)
 {
-       int oldbit;
+       unsigned char oldbit;
 
-       asm volatile("lock; btr %2,%1\n\tsbbl %0,%0"
-                    : "=r" (oldbit), "+m" (ADDR)
+       asm volatile("lock; btr %2,%1\n\tsetc %0"
+                    : "=qm" (oldbit), "+m" (ADDR)
                     : "Ir" (nr) : "memory");
        return oldbit;
 }
@@ -115,10 +115,10 @@ static inline int sync_test_and_clear_bit(long nr, volatile unsigned long *addr)
  */
 static inline int sync_test_and_change_bit(long nr, volatile unsigned long *addr)
 {
-       int oldbit;
+       unsigned char oldbit;
 
-       asm volatile("lock; btc %2,%1\n\tsbbl %0,%0"
-                    : "=r" (oldbit), "+m" (ADDR)
+       asm volatile("lock; btc %2,%1\n\tsetc %0"
+                    : "=qm" (oldbit), "+m" (ADDR)
                     : "Ir" (nr) : "memory");
        return oldbit;
 }
index a147e67..e991d5c 100644 (file)
@@ -71,8 +71,8 @@ int amd_cache_northbridges(void)
        while ((misc = next_northbridge(misc, amd_nb_misc_ids)) != NULL)
                i++;
 
-       if (i == 0)
-               return 0;
+       if (!i)
+               return -ENODEV;
 
        nb = kzalloc(i * sizeof(struct amd_northbridge), GFP_KERNEL);
        if (!nb)
index f6f50c4..cfa97ff 100644 (file)
@@ -39,9 +39,9 @@ __setup("nordrand", x86_rdrand_setup);
  */
 #define SANITY_CHECK_LOOPS 8
 
+#ifdef CONFIG_ARCH_RANDOM
 void x86_init_rdrand(struct cpuinfo_x86 *c)
 {
-#ifdef CONFIG_ARCH_RANDOM
        unsigned long tmp;
        int i;
 
@@ -55,5 +55,5 @@ void x86_init_rdrand(struct cpuinfo_x86 *c)
                        return;
                }
        }
-#endif
 }
+#endif
index 64341aa..d40ee8a 100644 (file)
@@ -42,3 +42,5 @@ EXPORT_SYMBOL(empty_zero_page);
 EXPORT_SYMBOL(___preempt_schedule);
 EXPORT_SYMBOL(___preempt_schedule_notrace);
 #endif
+
+EXPORT_SYMBOL(__sw_hweight32);
index dc3c0b1..b44564b 100644 (file)
 #include <linux/compat.h>
 #include <linux/uaccess.h>
 
+/*
+ * The compat_siginfo_t structure and handing code is very easy
+ * to break in several ways.  It must always be updated when new
+ * updates are made to the main siginfo_t, and
+ * copy_siginfo_to_user32() must be updated when the
+ * (arch-independent) copy_siginfo_to_user() is updated.
+ *
+ * It is also easy to put a new member in the compat_siginfo_t
+ * which has implicit alignment which can move internal structure
+ * alignment around breaking the ABI.  This can happen if you,
+ * for instance, put a plain 64-bit value in there.
+ */
+static inline void signal_compat_build_tests(void)
+{
+       int _sifields_offset = offsetof(compat_siginfo_t, _sifields);
+
+       /*
+        * If adding a new si_code, there is probably new data in
+        * the siginfo.  Make sure folks bumping the si_code
+        * limits also have to look at this code.  Make sure any
+        * new fields are handled in copy_siginfo_to_user32()!
+        */
+       BUILD_BUG_ON(NSIGILL  != 8);
+       BUILD_BUG_ON(NSIGFPE  != 8);
+       BUILD_BUG_ON(NSIGSEGV != 4);
+       BUILD_BUG_ON(NSIGBUS  != 5);
+       BUILD_BUG_ON(NSIGTRAP != 4);
+       BUILD_BUG_ON(NSIGCHLD != 6);
+       BUILD_BUG_ON(NSIGSYS  != 1);
+
+       /* This is part of the ABI and can never change in size: */
+       BUILD_BUG_ON(sizeof(compat_siginfo_t) != 128);
+       /*
+        * The offsets of all the (unioned) si_fields are fixed
+        * in the ABI, of course.  Make sure none of them ever
+        * move and are always at the beginning:
+        */
+       BUILD_BUG_ON(offsetof(compat_siginfo_t, _sifields) != 3 * sizeof(int));
+#define CHECK_CSI_OFFSET(name)   BUILD_BUG_ON(_sifields_offset != offsetof(compat_siginfo_t, _sifields.name))
+
+        /*
+        * Ensure that the size of each si_field never changes.
+        * If it does, it is a sign that the
+        * copy_siginfo_to_user32() code below needs to updated
+        * along with the size in the CHECK_SI_SIZE().
+        *
+        * We repeat this check for both the generic and compat
+        * siginfos.
+        *
+        * Note: it is OK for these to grow as long as the whole
+        * structure stays within the padding size (checked
+        * above).
+        */
+#define CHECK_CSI_SIZE(name, size) BUILD_BUG_ON(size != sizeof(((compat_siginfo_t *)0)->_sifields.name))
+#define CHECK_SI_SIZE(name, size) BUILD_BUG_ON(size != sizeof(((siginfo_t *)0)->_sifields.name))
+
+       CHECK_CSI_OFFSET(_kill);
+       CHECK_CSI_SIZE  (_kill, 2*sizeof(int));
+       CHECK_SI_SIZE   (_kill, 2*sizeof(int));
+
+       CHECK_CSI_OFFSET(_timer);
+       CHECK_CSI_SIZE  (_timer, 5*sizeof(int));
+       CHECK_SI_SIZE   (_timer, 6*sizeof(int));
+
+       CHECK_CSI_OFFSET(_rt);
+       CHECK_CSI_SIZE  (_rt, 3*sizeof(int));
+       CHECK_SI_SIZE   (_rt, 4*sizeof(int));
+
+       CHECK_CSI_OFFSET(_sigchld);
+       CHECK_CSI_SIZE  (_sigchld, 5*sizeof(int));
+       CHECK_SI_SIZE   (_sigchld, 8*sizeof(int));
+
+       CHECK_CSI_OFFSET(_sigchld_x32);
+       CHECK_CSI_SIZE  (_sigchld_x32, 7*sizeof(int));
+       /* no _sigchld_x32 in the generic siginfo_t */
+
+       CHECK_CSI_OFFSET(_sigfault);
+       CHECK_CSI_SIZE  (_sigfault, 4*sizeof(int));
+       CHECK_SI_SIZE   (_sigfault, 8*sizeof(int));
+
+       CHECK_CSI_OFFSET(_sigpoll);
+       CHECK_CSI_SIZE  (_sigpoll, 2*sizeof(int));
+       CHECK_SI_SIZE   (_sigpoll, 4*sizeof(int));
+
+       CHECK_CSI_OFFSET(_sigsys);
+       CHECK_CSI_SIZE  (_sigsys, 3*sizeof(int));
+       CHECK_SI_SIZE   (_sigsys, 4*sizeof(int));
+
+       /* any new si_fields should be added here */
+}
+
 int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from)
 {
        int err = 0;
        bool ia32 = test_thread_flag(TIF_IA32);
 
+       signal_compat_build_tests();
+
        if (!access_ok(VERIFY_WRITE, to, sizeof(compat_siginfo_t)))
                return -EFAULT;
 
@@ -32,6 +125,21 @@ int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from)
                                          &to->_sifields._pad[0]);
                        switch (from->si_code >> 16) {
                        case __SI_FAULT >> 16:
+                               if (from->si_signo == SIGBUS &&
+                                   (from->si_code == BUS_MCEERR_AR ||
+                                    from->si_code == BUS_MCEERR_AO))
+                                       put_user_ex(from->si_addr_lsb, &to->si_addr_lsb);
+
+                               if (from->si_signo == SIGSEGV) {
+                                       if (from->si_code == SEGV_BNDERR) {
+                                               compat_uptr_t lower = (unsigned long)&to->si_lower;
+                                               compat_uptr_t upper = (unsigned long)&to->si_upper;
+                                               put_user_ex(lower, &to->si_lower);
+                                               put_user_ex(upper, &to->si_upper);
+                                       }
+                                       if (from->si_code == SEGV_PKUERR)
+                                               put_user_ex(from->si_pkey, &to->si_pkey);
+                               }
                                break;
                        case __SI_SYS >> 16:
                                put_user_ex(from->si_syscall, &to->si_syscall);
index 3dce1ca..01f30e5 100644 (file)
@@ -440,10 +440,7 @@ static inline unsigned long get_vflags(struct kernel_vm86_regs *regs)
 
 static inline int is_revectored(int nr, struct revectored_struct *bitmap)
 {
-       __asm__ __volatile__("btl %2,%1\n\tsbbl %0,%0"
-               :"=r" (nr)
-               :"m" (*bitmap), "r" (nr));
-       return nr;
+       return test_bit(nr, bitmap->__map);
 }
 
 #define val_byte(val, n) (((__u8 *)&val)[n])
index cd05942..f1aebfb 100644 (file)
@@ -44,6 +44,9 @@ EXPORT_SYMBOL(clear_page);
 
 EXPORT_SYMBOL(csum_partial);
 
+EXPORT_SYMBOL(__sw_hweight32);
+EXPORT_SYMBOL(__sw_hweight64);
+
 /*
  * Export string functions. We normally rely on gcc builtin for most of these,
  * but gcc sometimes decides not to inline them.
index 72a5767..ec969cc 100644 (file)
@@ -25,7 +25,7 @@ lib-y += memcpy_$(BITS).o
 lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o
 lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o
 
-obj-y += msr.o msr-reg.o msr-reg-export.o
+obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o
 
 ifeq ($(CONFIG_X86_32),y)
         obj-y += atomic64_32.o
diff --git a/arch/x86/lib/hweight.S b/arch/x86/lib/hweight.S
new file mode 100644 (file)
index 0000000..02de3d7
--- /dev/null
@@ -0,0 +1,77 @@
+#include <linux/linkage.h>
+
+#include <asm/asm.h>
+
+/*
+ * unsigned int __sw_hweight32(unsigned int w)
+ * %rdi: w
+ */
+ENTRY(__sw_hweight32)
+
+#ifdef CONFIG_X86_64
+       movl %edi, %eax                         # w
+#endif
+       __ASM_SIZE(push,) %__ASM_REG(dx)
+       movl %eax, %edx                         # w -> t
+       shrl %edx                               # t >>= 1
+       andl $0x55555555, %edx                  # t &= 0x55555555
+       subl %edx, %eax                         # w -= t
+
+       movl %eax, %edx                         # w -> t
+       shrl $2, %eax                           # w_tmp >>= 2
+       andl $0x33333333, %edx                  # t     &= 0x33333333
+       andl $0x33333333, %eax                  # w_tmp &= 0x33333333
+       addl %edx, %eax                         # w = w_tmp + t
+
+       movl %eax, %edx                         # w -> t
+       shrl $4, %edx                           # t >>= 4
+       addl %edx, %eax                         # w_tmp += t
+       andl  $0x0f0f0f0f, %eax                 # w_tmp &= 0x0f0f0f0f
+       imull $0x01010101, %eax, %eax           # w_tmp *= 0x01010101
+       shrl $24, %eax                          # w = w_tmp >> 24
+       __ASM_SIZE(pop,) %__ASM_REG(dx)
+       ret
+ENDPROC(__sw_hweight32)
+
+ENTRY(__sw_hweight64)
+#ifdef CONFIG_X86_64
+       pushq   %rdx
+
+       movq    %rdi, %rdx                      # w -> t
+       movabsq $0x5555555555555555, %rax
+       shrq    %rdx                            # t >>= 1
+       andq    %rdx, %rax                      # t &= 0x5555555555555555
+       movabsq $0x3333333333333333, %rdx
+       subq    %rax, %rdi                      # w -= t
+
+       movq    %rdi, %rax                      # w -> t
+       shrq    $2, %rdi                        # w_tmp >>= 2
+       andq    %rdx, %rax                      # t     &= 0x3333333333333333
+       andq    %rdi, %rdx                      # w_tmp &= 0x3333333333333333
+       addq    %rdx, %rax                      # w = w_tmp + t
+
+       movq    %rax, %rdx                      # w -> t
+       shrq    $4, %rdx                        # t >>= 4
+       addq    %rdx, %rax                      # w_tmp += t
+       movabsq $0x0f0f0f0f0f0f0f0f, %rdx
+       andq    %rdx, %rax                      # w_tmp &= 0x0f0f0f0f0f0f0f0f
+       movabsq $0x0101010101010101, %rdx
+       imulq   %rdx, %rax                      # w_tmp *= 0x0101010101010101
+       shrq    $56, %rax                       # w = w_tmp >> 56
+
+       popq    %rdx
+       ret
+#else /* CONFIG_X86_32 */
+       /* We're getting an u64 arg in (%eax,%edx): unsigned long hweight64(__u64 w) */
+       pushl   %ecx
+
+       call    __sw_hweight32
+       movl    %eax, %ecx                      # stash away result
+       movl    %edx, %eax                      # second part of input
+       call    __sw_hweight32
+       addl    %ecx, %eax                      # result
+
+       popl    %ecx
+       ret
+#endif
+ENDPROC(__sw_hweight64)
index b2a4e2a..3cd6983 100644 (file)
@@ -396,6 +396,7 @@ int __init pci_acpi_init(void)
                return -ENODEV;
 
        printk(KERN_INFO "PCI: Using ACPI for IRQ routing\n");
+       acpi_irq_penalty_init();
        pcibios_enable_irq = acpi_pci_irq_enable;
        pcibios_disable_irq = acpi_pci_irq_disable;
        x86_init.pci.init_irq = x86_init_noop;
index 009947d..f2b5e6a 100644 (file)
@@ -19,6 +19,7 @@
 #include <asm/mtrr.h>
 #include <asm/sections.h>
 #include <asm/suspend.h>
+#include <asm/tlbflush.h>
 
 /* Defined in hibernate_asm_64.S */
 extern asmlinkage __visible int restore_image(void);
@@ -28,6 +29,7 @@ extern asmlinkage __visible int restore_image(void);
  * kernel's text (this value is passed in the image header).
  */
 unsigned long restore_jump_address __visible;
+unsigned long jump_address_phys;
 
 /*
  * Value of the cr3 register from before the hibernation (this value is passed
@@ -37,7 +39,43 @@ unsigned long restore_cr3 __visible;
 
 pgd_t *temp_level4_pgt __visible;
 
-void *relocated_restore_code __visible;
+unsigned long relocated_restore_code __visible;
+
+static int set_up_temporary_text_mapping(void)
+{
+       pmd_t *pmd;
+       pud_t *pud;
+
+       /*
+        * The new mapping only has to cover the page containing the image
+        * kernel's entry point (jump_address_phys), because the switch over to
+        * it is carried out by relocated code running from a page allocated
+        * specifically for this purpose and covered by the identity mapping, so
+        * the temporary kernel text mapping is only needed for the final jump.
+        * Moreover, in that mapping the virtual address of the image kernel's
+        * entry point must be the same as its virtual address in the image
+        * kernel (restore_jump_address), so the image kernel's
+        * restore_registers() code doesn't find itself in a different area of
+        * the virtual address space after switching over to the original page
+        * tables used by the image kernel.
+        */
+       pud = (pud_t *)get_safe_page(GFP_ATOMIC);
+       if (!pud)
+               return -ENOMEM;
+
+       pmd = (pmd_t *)get_safe_page(GFP_ATOMIC);
+       if (!pmd)
+               return -ENOMEM;
+
+       set_pmd(pmd + pmd_index(restore_jump_address),
+               __pmd((jump_address_phys & PMD_MASK) | __PAGE_KERNEL_LARGE_EXEC));
+       set_pud(pud + pud_index(restore_jump_address),
+               __pud(__pa(pmd) | _KERNPG_TABLE));
+       set_pgd(temp_level4_pgt + pgd_index(restore_jump_address),
+               __pgd(__pa(pud) | _KERNPG_TABLE));
+
+       return 0;
+}
 
 static void *alloc_pgt_page(void *context)
 {
@@ -59,9 +97,10 @@ static int set_up_temporary_mappings(void)
        if (!temp_level4_pgt)
                return -ENOMEM;
 
-       /* It is safe to reuse the original kernel mapping */
-       set_pgd(temp_level4_pgt + pgd_index(__START_KERNEL_map),
-               init_level4_pgt[pgd_index(__START_KERNEL_map)]);
+       /* Prepare a temporary mapping for the kernel text */
+       result = set_up_temporary_text_mapping();
+       if (result)
+               return result;
 
        /* Set up the direct mapping from scratch */
        for (i = 0; i < nr_pfn_mapped; i++) {
@@ -78,19 +117,50 @@ static int set_up_temporary_mappings(void)
        return 0;
 }
 
+static int relocate_restore_code(void)
+{
+       pgd_t *pgd;
+       pud_t *pud;
+
+       relocated_restore_code = get_safe_page(GFP_ATOMIC);
+       if (!relocated_restore_code)
+               return -ENOMEM;
+
+       memcpy((void *)relocated_restore_code, &core_restore_code, PAGE_SIZE);
+
+       /* Make the page containing the relocated code executable */
+       pgd = (pgd_t *)__va(read_cr3()) + pgd_index(relocated_restore_code);
+       pud = pud_offset(pgd, relocated_restore_code);
+       if (pud_large(*pud)) {
+               set_pud(pud, __pud(pud_val(*pud) & ~_PAGE_NX));
+       } else {
+               pmd_t *pmd = pmd_offset(pud, relocated_restore_code);
+
+               if (pmd_large(*pmd)) {
+                       set_pmd(pmd, __pmd(pmd_val(*pmd) & ~_PAGE_NX));
+               } else {
+                       pte_t *pte = pte_offset_kernel(pmd, relocated_restore_code);
+
+                       set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_NX));
+               }
+       }
+       __flush_tlb_all();
+
+       return 0;
+}
+
 int swsusp_arch_resume(void)
 {
        int error;
 
        /* We have got enough memory and from now on we cannot recover */
-       if ((error = set_up_temporary_mappings()))
+       error = set_up_temporary_mappings();
+       if (error)
                return error;
 
-       relocated_restore_code = (void *)get_safe_page(GFP_ATOMIC);
-       if (!relocated_restore_code)
-               return -ENOMEM;
-       memcpy(relocated_restore_code, &core_restore_code,
-              &restore_registers - &core_restore_code);
+       error = relocate_restore_code();
+       if (error)
+               return error;
 
        restore_image();
        return 0;
@@ -109,11 +179,12 @@ int pfn_is_nosave(unsigned long pfn)
 
 struct restore_data_record {
        unsigned long jump_address;
+       unsigned long jump_address_phys;
        unsigned long cr3;
        unsigned long magic;
 };
 
-#define RESTORE_MAGIC  0x0123456789ABCDEFUL
+#define RESTORE_MAGIC  0x123456789ABCDEF0UL
 
 /**
  *     arch_hibernation_header_save - populate the architecture specific part
@@ -126,7 +197,8 @@ int arch_hibernation_header_save(void *addr, unsigned int max_size)
 
        if (max_size < sizeof(struct restore_data_record))
                return -EOVERFLOW;
-       rdr->jump_address = restore_jump_address;
+       rdr->jump_address = (unsigned long)&restore_registers;
+       rdr->jump_address_phys = __pa_symbol(&restore_registers);
        rdr->cr3 = restore_cr3;
        rdr->magic = RESTORE_MAGIC;
        return 0;
@@ -142,6 +214,7 @@ int arch_hibernation_header_restore(void *addr)
        struct restore_data_record *rdr = addr;
 
        restore_jump_address = rdr->jump_address;
+       jump_address_phys = rdr->jump_address_phys;
        restore_cr3 = rdr->cr3;
        return (rdr->magic == RESTORE_MAGIC) ? 0 : -EINVAL;
 }
index 4400a43..3177c2b 100644 (file)
@@ -44,9 +44,6 @@ ENTRY(swsusp_arch_suspend)
        pushfq
        popq    pt_regs_flags(%rax)
 
-       /* save the address of restore_registers */
-       movq    $restore_registers, %rax
-       movq    %rax, restore_jump_address(%rip)
        /* save cr3 */
        movq    %cr3, %rax
        movq    %rax, restore_cr3(%rip)
@@ -57,31 +54,34 @@ ENTRY(swsusp_arch_suspend)
 ENDPROC(swsusp_arch_suspend)
 
 ENTRY(restore_image)
-       /* switch to temporary page tables */
-       movq    $__PAGE_OFFSET, %rdx
-       movq    temp_level4_pgt(%rip), %rax
-       subq    %rdx, %rax
-       movq    %rax, %cr3
-       /* Flush TLB */
-       movq    mmu_cr4_features(%rip), %rax
-       movq    %rax, %rdx
-       andq    $~(X86_CR4_PGE), %rdx
-       movq    %rdx, %cr4;  # turn off PGE
-       movq    %cr3, %rcx;  # flush TLB
-       movq    %rcx, %cr3;
-       movq    %rax, %cr4;  # turn PGE back on
-
        /* prepare to jump to the image kernel */
-       movq    restore_jump_address(%rip), %rax
-       movq    restore_cr3(%rip), %rbx
+       movq    restore_jump_address(%rip), %r8
+       movq    restore_cr3(%rip), %r9
+
+       /* prepare to switch to temporary page tables */
+       movq    temp_level4_pgt(%rip), %rax
+       movq    mmu_cr4_features(%rip), %rbx
 
        /* prepare to copy image data to their original locations */
        movq    restore_pblist(%rip), %rdx
+
+       /* jump to relocated restore code */
        movq    relocated_restore_code(%rip), %rcx
        jmpq    *%rcx
 
        /* code below has been relocated to a safe page */
 ENTRY(core_restore_code)
+       /* switch to temporary page tables */
+       movq    $__PAGE_OFFSET, %rcx
+       subq    %rcx, %rax
+       movq    %rax, %cr3
+       /* flush TLB */
+       movq    %rbx, %rcx
+       andq    $~(X86_CR4_PGE), %rcx
+       movq    %rcx, %cr4;  # turn off PGE
+       movq    %cr3, %rcx;  # flush TLB
+       movq    %rcx, %cr3;
+       movq    %rbx, %cr4;  # turn PGE back on
 .Lloop:
        testq   %rdx, %rdx
        jz      .Ldone
@@ -96,24 +96,17 @@ ENTRY(core_restore_code)
        /* progress to the next pbe */
        movq    pbe_next(%rdx), %rdx
        jmp     .Lloop
+
 .Ldone:
        /* jump to the restore_registers address from the image header */
-       jmpq    *%rax
-       /*
-        * NOTE: This assumes that the boot kernel's text mapping covers the
-        * image kernel's page containing restore_registers and the address of
-        * this page is the same as in the image kernel's text mapping (it
-        * should always be true, because the text mapping is linear, starting
-        * from 0, and is supposed to cover the entire kernel text for every
-        * kernel).
-        *
-        * code below belongs to the image kernel
-        */
+       jmpq    *%r8
 
+        /* code below belongs to the image kernel */
+       .align PAGE_SIZE
 ENTRY(restore_registers)
        FRAME_BEGIN
        /* go back to the original page tables */
-       movq    %rbx, %cr3
+       movq    %r9, %cr3
 
        /* Flush TLB, including "global" things (vmalloc) */
        movq    mmu_cr4_features(%rip), %rax
index 760789a..0f87db2 100644 (file)
@@ -521,9 +521,7 @@ static void set_aliased_prot(void *v, pgprot_t prot)
 
        preempt_disable();
 
-       pagefault_disable();    /* Avoid warnings due to being atomic. */
-       __get_user(dummy, (unsigned char __user __force *)v);
-       pagefault_enable();
+       probe_kernel_read(&dummy, v, 1);
 
        if (HYPERVISOR_update_va_mapping((unsigned long)v, pte, 0))
                BUG();
index cc7800e..01b8116 100644 (file)
@@ -150,8 +150,10 @@ static int get_task_ioprio(struct task_struct *p)
        if (ret)
                goto out;
        ret = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, IOPRIO_NORM);
+       task_lock(p);
        if (p->io_context)
                ret = p->io_context->ioprio;
+       task_unlock(p);
 out:
        return ret;
 }
index 1f41284..dee8692 100644 (file)
@@ -602,7 +602,7 @@ static int acpi_aml_read_user(char __user *buf, int len)
        crc->tail = (crc->tail + n) & (ACPI_AML_BUF_SIZE - 1);
        ret = n;
 out:
-       acpi_aml_unlock_fifo(ACPI_AML_OUT_USER, !ret);
+       acpi_aml_unlock_fifo(ACPI_AML_OUT_USER, ret >= 0);
        return ret;
 }
 
@@ -672,7 +672,7 @@ static int acpi_aml_write_user(const char __user *buf, int len)
        crc->head = (crc->head + n) & (ACPI_AML_BUF_SIZE - 1);
        ret = n;
 out:
-       acpi_aml_unlock_fifo(ACPI_AML_IN_USER, !ret);
+       acpi_aml_unlock_fifo(ACPI_AML_IN_USER, ret >= 0);
        return n;
 }
 
index b5e2b0a..297f6aa 100644 (file)
@@ -46,6 +46,7 @@
 #include "acnamesp.h"
 #include "acdispat.h"
 #include "actables.h"
+#include "acinterp.h"
 
 #define _COMPONENT          ACPI_NAMESPACE
 ACPI_MODULE_NAME("nsload")
@@ -78,6 +79,8 @@ acpi_ns_load_table(u32 table_index, struct acpi_namespace_node *node)
 
        ACPI_FUNCTION_TRACE(ns_load_table);
 
+       acpi_ex_enter_interpreter();
+
        /*
         * Parse the table and load the namespace with all named
         * objects found within. Control methods are NOT parsed
@@ -89,7 +92,7 @@ acpi_ns_load_table(u32 table_index, struct acpi_namespace_node *node)
         */
        status = acpi_ut_acquire_mutex(ACPI_MTX_NAMESPACE);
        if (ACPI_FAILURE(status)) {
-               return_ACPI_STATUS(status);
+               goto unlock_interp;
        }
 
        /* If table already loaded into namespace, just return */
@@ -130,6 +133,8 @@ acpi_ns_load_table(u32 table_index, struct acpi_namespace_node *node)
 
 unlock:
        (void)acpi_ut_release_mutex(ACPI_MTX_NAMESPACE);
+unlock_interp:
+       (void)acpi_ex_exit_interpreter();
 
        if (ACPI_FAILURE(status)) {
                return_ACPI_STATUS(status);
index 1783cd7..f631a47 100644 (file)
@@ -47,7 +47,6 @@
 #include "acparser.h"
 #include "acdispat.h"
 #include "actables.h"
-#include "acinterp.h"
 
 #define _COMPONENT          ACPI_NAMESPACE
 ACPI_MODULE_NAME("nsparse")
@@ -171,8 +170,6 @@ acpi_ns_parse_table(u32 table_index, struct acpi_namespace_node *start_node)
 
        ACPI_FUNCTION_TRACE(ns_parse_table);
 
-       acpi_ex_enter_interpreter();
-
        /*
         * AML Parse, pass 1
         *
@@ -188,7 +185,7 @@ acpi_ns_parse_table(u32 table_index, struct acpi_namespace_node *start_node)
        status = acpi_ns_one_complete_parse(ACPI_IMODE_LOAD_PASS1,
                                            table_index, start_node);
        if (ACPI_FAILURE(status)) {
-               goto error_exit;
+               return_ACPI_STATUS(status);
        }
 
        /*
@@ -204,10 +201,8 @@ acpi_ns_parse_table(u32 table_index, struct acpi_namespace_node *start_node)
        status = acpi_ns_one_complete_parse(ACPI_IMODE_LOAD_PASS2,
                                            table_index, start_node);
        if (ACPI_FAILURE(status)) {
-               goto error_exit;
+               return_ACPI_STATUS(status);
        }
 
-error_exit:
-       acpi_ex_exit_interpreter();
        return_ACPI_STATUS(status);
 }
index 4ed4061..c983bf7 100644 (file)
@@ -470,6 +470,7 @@ static int acpi_irq_pci_sharing_penalty(int irq)
 {
        struct acpi_pci_link *link;
        int penalty = 0;
+       int i;
 
        list_for_each_entry(link, &acpi_link_list, list) {
                /*
@@ -478,18 +479,14 @@ static int acpi_irq_pci_sharing_penalty(int irq)
                 */
                if (link->irq.active && link->irq.active == irq)
                        penalty += PIRQ_PENALTY_PCI_USING;
-               else {
-                       int i;
-
-                       /*
-                        * If a link is inactive, penalize the IRQs it
-                        * might use, but not as severely.
-                        */
-                       for (i = 0; i < link->irq.possible_count; i++)
-                               if (link->irq.possible[i] == irq)
-                                       penalty += PIRQ_PENALTY_PCI_POSSIBLE /
-                                               link->irq.possible_count;
-               }
+
+               /*
+                * penalize the IRQs PCI might use, but not as severely.
+                */
+               for (i = 0; i < link->irq.possible_count; i++)
+                       if (link->irq.possible[i] == irq)
+                               penalty += PIRQ_PENALTY_PCI_POSSIBLE /
+                                       link->irq.possible_count;
        }
 
        return penalty;
@@ -499,9 +496,6 @@ static int acpi_irq_get_penalty(int irq)
 {
        int penalty = 0;
 
-       if (irq < ACPI_MAX_ISA_IRQS)
-               penalty += acpi_isa_irq_penalty[irq];
-
        /*
        * Penalize IRQ used by ACPI SCI. If ACPI SCI pin attributes conflict
        * with PCI IRQ attributes, mark ACPI SCI as ISA_ALWAYS so it won't be
@@ -516,10 +510,49 @@ static int acpi_irq_get_penalty(int irq)
                        penalty += PIRQ_PENALTY_PCI_USING;
        }
 
+       if (irq < ACPI_MAX_ISA_IRQS)
+               return penalty + acpi_isa_irq_penalty[irq];
+
        penalty += acpi_irq_pci_sharing_penalty(irq);
        return penalty;
 }
 
+int __init acpi_irq_penalty_init(void)
+{
+       struct acpi_pci_link *link;
+       int i;
+
+       /*
+        * Update penalties to facilitate IRQ balancing.
+        */
+       list_for_each_entry(link, &acpi_link_list, list) {
+
+               /*
+                * reflect the possible and active irqs in the penalty table --
+                * useful for breaking ties.
+                */
+               if (link->irq.possible_count) {
+                       int penalty =
+                           PIRQ_PENALTY_PCI_POSSIBLE /
+                           link->irq.possible_count;
+
+                       for (i = 0; i < link->irq.possible_count; i++) {
+                               if (link->irq.possible[i] < ACPI_MAX_ISA_IRQS)
+                                       acpi_isa_irq_penalty[link->irq.
+                                                        possible[i]] +=
+                                           penalty;
+                       }
+
+               } else if (link->irq.active &&
+                               (link->irq.active < ACPI_MAX_ISA_IRQS)) {
+                       acpi_isa_irq_penalty[link->irq.active] +=
+                           PIRQ_PENALTY_PCI_POSSIBLE;
+               }
+       }
+
+       return 0;
+}
+
 static int acpi_irq_balance = -1;      /* 0: static, 1: balance */
 
 static int acpi_pci_link_allocate(struct acpi_pci_link *link)
index 2e6d1e9..fcc5b4e 100644 (file)
@@ -207,6 +207,9 @@ struct blkfront_info
        struct blk_mq_tag_set tag_set;
        struct blkfront_ring_info *rinfo;
        unsigned int nr_rings;
+       /* Save uncomplete reqs and bios for migration. */
+       struct list_head requests;
+       struct bio_list bio_list;
 };
 
 static unsigned int nr_minors;
@@ -2002,69 +2005,22 @@ static int blkif_recover(struct blkfront_info *info)
 {
        unsigned int i, r_index;
        struct request *req, *n;
-       struct blk_shadow *copy;
        int rc;
        struct bio *bio, *cloned_bio;
-       struct bio_list bio_list, merge_bio;
        unsigned int segs, offset;
        int pending, size;
        struct split_bio *split_bio;
-       struct list_head requests;
 
        blkfront_gather_backend_features(info);
        segs = info->max_indirect_segments ? : BLKIF_MAX_SEGMENTS_PER_REQUEST;
        blk_queue_max_segments(info->rq, segs);
-       bio_list_init(&bio_list);
-       INIT_LIST_HEAD(&requests);
 
        for (r_index = 0; r_index < info->nr_rings; r_index++) {
-               struct blkfront_ring_info *rinfo;
-
-               rinfo = &info->rinfo[r_index];
-               /* Stage 1: Make a safe copy of the shadow state. */
-               copy = kmemdup(rinfo->shadow, sizeof(rinfo->shadow),
-                              GFP_NOIO | __GFP_REPEAT | __GFP_HIGH);
-               if (!copy)
-                       return -ENOMEM;
-
-               /* Stage 2: Set up free list. */
-               memset(&rinfo->shadow, 0, sizeof(rinfo->shadow));
-               for (i = 0; i < BLK_RING_SIZE(info); i++)
-                       rinfo->shadow[i].req.u.rw.id = i+1;
-               rinfo->shadow_free = rinfo->ring.req_prod_pvt;
-               rinfo->shadow[BLK_RING_SIZE(info)-1].req.u.rw.id = 0x0fffffff;
+               struct blkfront_ring_info *rinfo = &info->rinfo[r_index];
 
                rc = blkfront_setup_indirect(rinfo);
-               if (rc) {
-                       kfree(copy);
+               if (rc)
                        return rc;
-               }
-
-               for (i = 0; i < BLK_RING_SIZE(info); i++) {
-                       /* Not in use? */
-                       if (!copy[i].request)
-                               continue;
-
-                       /*
-                        * Get the bios in the request so we can re-queue them.
-                        */
-                       if (copy[i].request->cmd_flags &
-                           (REQ_FLUSH | REQ_FUA | REQ_DISCARD | REQ_SECURE)) {
-                               /*
-                                * Flush operations don't contain bios, so
-                                * we need to requeue the whole request
-                                */
-                               list_add(&copy[i].request->queuelist, &requests);
-                               continue;
-                       }
-                       merge_bio.head = copy[i].request->bio;
-                       merge_bio.tail = copy[i].request->biotail;
-                       bio_list_merge(&bio_list, &merge_bio);
-                       copy[i].request->bio = NULL;
-                       blk_end_request_all(copy[i].request, 0);
-               }
-
-               kfree(copy);
        }
        xenbus_switch_state(info->xbdev, XenbusStateConnected);
 
@@ -2079,7 +2035,7 @@ static int blkif_recover(struct blkfront_info *info)
                kick_pending_request_queues(rinfo);
        }
 
-       list_for_each_entry_safe(req, n, &requests, queuelist) {
+       list_for_each_entry_safe(req, n, &info->requests, queuelist) {
                /* Requeue pending requests (flush or discard) */
                list_del_init(&req->queuelist);
                BUG_ON(req->nr_phys_segments > segs);
@@ -2087,7 +2043,7 @@ static int blkif_recover(struct blkfront_info *info)
        }
        blk_mq_kick_requeue_list(info->rq);
 
-       while ((bio = bio_list_pop(&bio_list)) != NULL) {
+       while ((bio = bio_list_pop(&info->bio_list)) != NULL) {
                /* Traverse the list of pending bios and re-queue them */
                if (bio_segments(bio) > segs) {
                        /*
@@ -2133,9 +2089,42 @@ static int blkfront_resume(struct xenbus_device *dev)
 {
        struct blkfront_info *info = dev_get_drvdata(&dev->dev);
        int err = 0;
+       unsigned int i, j;
 
        dev_dbg(&dev->dev, "blkfront_resume: %s\n", dev->nodename);
 
+       bio_list_init(&info->bio_list);
+       INIT_LIST_HEAD(&info->requests);
+       for (i = 0; i < info->nr_rings; i++) {
+               struct blkfront_ring_info *rinfo = &info->rinfo[i];
+               struct bio_list merge_bio;
+               struct blk_shadow *shadow = rinfo->shadow;
+
+               for (j = 0; j < BLK_RING_SIZE(info); j++) {
+                       /* Not in use? */
+                       if (!shadow[j].request)
+                               continue;
+
+                       /*
+                        * Get the bios in the request so we can re-queue them.
+                        */
+                       if (shadow[j].request->cmd_flags &
+                                       (REQ_FLUSH | REQ_FUA | REQ_DISCARD | REQ_SECURE)) {
+                               /*
+                                * Flush operations don't contain bios, so
+                                * we need to requeue the whole request
+                                */
+                               list_add(&shadow[j].request->queuelist, &info->requests);
+                               continue;
+                       }
+                       merge_bio.head = shadow[j].request->bio;
+                       merge_bio.tail = shadow[j].request->biotail;
+                       bio_list_merge(&info->bio_list, &merge_bio);
+                       shadow[j].request->bio = NULL;
+                       blk_mq_end_request(shadow[j].request, 0);
+               }
+       }
+
        blkif_free(info, info->connected == BLKIF_STATE_CONNECTED);
 
        err = negotiate_mq(info);
index a4d0059..c73207a 100644 (file)
@@ -173,7 +173,7 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
 
        struct cpuidle_state *target_state = &drv->states[index];
        bool broadcast = !!(target_state->flags & CPUIDLE_FLAG_TIMER_STOP);
-       u64 time_start, time_end;
+       ktime_t time_start, time_end;
        s64 diff;
 
        /*
@@ -195,13 +195,13 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
        sched_idle_set_state(target_state);
 
        trace_cpu_idle_rcuidle(index, dev->cpu);
-       time_start = local_clock();
+       time_start = ns_to_ktime(local_clock());
 
        stop_critical_timings();
        entered_state = target_state->enter(dev, drv, index);
        start_critical_timings();
 
-       time_end = local_clock();
+       time_end = ns_to_ktime(local_clock());
        trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, dev->cpu);
 
        /* The cpu is no longer idle or about to enter idle. */
@@ -217,11 +217,7 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
        if (!cpuidle_state_is_coupled(drv, index))
                local_irq_enable();
 
-       /*
-        * local_clock() returns the time in nanosecond, let's shift
-        * by 10 (divide by 1024) to have microsecond based time.
-        */
-       diff = (time_end - time_start) >> 10;
+       diff = ktime_us_delta(time_end, time_start);
        if (diff > INT_MAX)
                diff = INT_MAX;
 
index cebcb40..536112f 100644 (file)
@@ -49,7 +49,7 @@ config GPIO_DEVRES
 
 config OF_GPIO
        def_bool y
-       depends on OF || COMPILE_TEST
+       depends on OF
 
 config GPIO_ACPI
        def_bool y
index e85e753..eb43ae4 100644 (file)
@@ -61,9 +61,8 @@ static unsigned sch_gpio_bit(struct sch_gpio *sch, unsigned gpio)
        return gpio % 8;
 }
 
-static int sch_gpio_reg_get(struct gpio_chip *gc, unsigned gpio, unsigned reg)
+static int sch_gpio_reg_get(struct sch_gpio *sch, unsigned gpio, unsigned reg)
 {
-       struct sch_gpio *sch = gpiochip_get_data(gc);
        unsigned short offset, bit;
        u8 reg_val;
 
@@ -75,10 +74,9 @@ static int sch_gpio_reg_get(struct gpio_chip *gc, unsigned gpio, unsigned reg)
        return reg_val;
 }
 
-static void sch_gpio_reg_set(struct gpio_chip *gc, unsigned gpio, unsigned reg,
+static void sch_gpio_reg_set(struct sch_gpio *sch, unsigned gpio, unsigned reg,
                             int val)
 {
-       struct sch_gpio *sch = gpiochip_get_data(gc);
        unsigned short offset, bit;
        u8 reg_val;
 
@@ -98,14 +96,15 @@ static int sch_gpio_direction_in(struct gpio_chip *gc, unsigned gpio_num)
        struct sch_gpio *sch = gpiochip_get_data(gc);
 
        spin_lock(&sch->lock);
-       sch_gpio_reg_set(gc, gpio_num, GIO, 1);
+       sch_gpio_reg_set(sch, gpio_num, GIO, 1);
        spin_unlock(&sch->lock);
        return 0;
 }
 
 static int sch_gpio_get(struct gpio_chip *gc, unsigned gpio_num)
 {
-       return sch_gpio_reg_get(gc, gpio_num, GLV);
+       struct sch_gpio *sch = gpiochip_get_data(gc);
+       return sch_gpio_reg_get(sch, gpio_num, GLV);
 }
 
 static void sch_gpio_set(struct gpio_chip *gc, unsigned gpio_num, int val)
@@ -113,7 +112,7 @@ static void sch_gpio_set(struct gpio_chip *gc, unsigned gpio_num, int val)
        struct sch_gpio *sch = gpiochip_get_data(gc);
 
        spin_lock(&sch->lock);
-       sch_gpio_reg_set(gc, gpio_num, GLV, val);
+       sch_gpio_reg_set(sch, gpio_num, GLV, val);
        spin_unlock(&sch->lock);
 }
 
@@ -123,7 +122,7 @@ static int sch_gpio_direction_out(struct gpio_chip *gc, unsigned gpio_num,
        struct sch_gpio *sch = gpiochip_get_data(gc);
 
        spin_lock(&sch->lock);
-       sch_gpio_reg_set(gc, gpio_num, GIO, 0);
+       sch_gpio_reg_set(sch, gpio_num, GIO, 0);
        spin_unlock(&sch->lock);
 
        /*
@@ -182,13 +181,13 @@ static int sch_gpio_probe(struct platform_device *pdev)
                 * GPIO7 is configured by the CMC as SLPIOVR
                 * Enable GPIO[9:8] core powered gpios explicitly
                 */
-               sch_gpio_reg_set(&sch->chip, 8, GEN, 1);
-               sch_gpio_reg_set(&sch->chip, 9, GEN, 1);
+               sch_gpio_reg_set(sch, 8, GEN, 1);
+               sch_gpio_reg_set(sch, 9, GEN, 1);
                /*
                 * SUS_GPIO[2:0] enabled by default
                 * Enable SUS_GPIO3 resume powered gpio explicitly
                 */
-               sch_gpio_reg_set(&sch->chip, 13, GEN, 1);
+               sch_gpio_reg_set(sch, 13, GEN, 1);
                break;
 
        case PCI_DEVICE_ID_INTEL_ITC_LPC:
index 3a5c701..8b83099 100644 (file)
@@ -28,6 +28,10 @@ int gpio_request_one(unsigned gpio, unsigned long flags, const char *label)
        if (!desc && gpio_is_valid(gpio))
                return -EPROBE_DEFER;
 
+       err = gpiod_request(desc, label);
+       if (err)
+               return err;
+
        if (flags & GPIOF_OPEN_DRAIN)
                set_bit(FLAG_OPEN_DRAIN, &desc->flags);
 
@@ -37,10 +41,6 @@ int gpio_request_one(unsigned gpio, unsigned long flags, const char *label)
        if (flags & GPIOF_ACTIVE_LOW)
                set_bit(FLAG_ACTIVE_LOW, &desc->flags);
 
-       err = gpiod_request(desc, label);
-       if (err)
-               return err;
-
        if (flags & GPIOF_DIR_IN)
                err = gpiod_direction_input(desc);
        else
index 570771e..be74bd3 100644 (file)
@@ -1352,14 +1352,6 @@ static int __gpiod_request(struct gpio_desc *desc, const char *label)
                spin_lock_irqsave(&gpio_lock, flags);
        }
 done:
-       if (status < 0) {
-               /* Clear flags that might have been set by the caller before
-                * requesting the GPIO.
-                */
-               clear_bit(FLAG_ACTIVE_LOW, &desc->flags);
-               clear_bit(FLAG_OPEN_DRAIN, &desc->flags);
-               clear_bit(FLAG_OPEN_SOURCE, &desc->flags);
-       }
        spin_unlock_irqrestore(&gpio_lock, flags);
        return status;
 }
@@ -2587,28 +2579,13 @@ struct gpio_desc *__must_check gpiod_get_optional(struct device *dev,
 }
 EXPORT_SYMBOL_GPL(gpiod_get_optional);
 
-/**
- * gpiod_parse_flags - helper function to parse GPIO lookup flags
- * @desc:      gpio to be setup
- * @lflags:    gpio_lookup_flags - returned from of_find_gpio() or
- *             of_get_gpio_hog()
- *
- * Set the GPIO descriptor flags based on the given GPIO lookup flags.
- */
-static void gpiod_parse_flags(struct gpio_desc *desc, unsigned long lflags)
-{
-       if (lflags & GPIO_ACTIVE_LOW)
-               set_bit(FLAG_ACTIVE_LOW, &desc->flags);
-       if (lflags & GPIO_OPEN_DRAIN)
-               set_bit(FLAG_OPEN_DRAIN, &desc->flags);
-       if (lflags & GPIO_OPEN_SOURCE)
-               set_bit(FLAG_OPEN_SOURCE, &desc->flags);
-}
 
 /**
  * gpiod_configure_flags - helper function to configure a given GPIO
  * @desc:      gpio whose value will be assigned
  * @con_id:    function within the GPIO consumer
+ * @lflags:    gpio_lookup_flags - returned from of_find_gpio() or
+ *             of_get_gpio_hog()
  * @dflags:    gpiod_flags - optional GPIO initialization flags
  *
  * Return 0 on success, -ENOENT if no GPIO has been assigned to the
@@ -2616,10 +2593,17 @@ static void gpiod_parse_flags(struct gpio_desc *desc, unsigned long lflags)
  * occurred while trying to acquire the GPIO.
  */
 static int gpiod_configure_flags(struct gpio_desc *desc, const char *con_id,
-                                enum gpiod_flags dflags)
+               unsigned long lflags, enum gpiod_flags dflags)
 {
        int status;
 
+       if (lflags & GPIO_ACTIVE_LOW)
+               set_bit(FLAG_ACTIVE_LOW, &desc->flags);
+       if (lflags & GPIO_OPEN_DRAIN)
+               set_bit(FLAG_OPEN_DRAIN, &desc->flags);
+       if (lflags & GPIO_OPEN_SOURCE)
+               set_bit(FLAG_OPEN_SOURCE, &desc->flags);
+
        /* No particular flag request, return here... */
        if (!(dflags & GPIOD_FLAGS_BIT_DIR_SET)) {
                pr_debug("no flags found for %s\n", con_id);
@@ -2686,13 +2670,11 @@ struct gpio_desc *__must_check gpiod_get_index(struct device *dev,
                return desc;
        }
 
-       gpiod_parse_flags(desc, lookupflags);
-
        status = gpiod_request(desc, con_id);
        if (status < 0)
                return ERR_PTR(status);
 
-       status = gpiod_configure_flags(desc, con_id, flags);
+       status = gpiod_configure_flags(desc, con_id, lookupflags, flags);
        if (status < 0) {
                dev_dbg(dev, "setup of GPIO %s failed\n", con_id);
                gpiod_put(desc);
@@ -2748,6 +2730,10 @@ struct gpio_desc *fwnode_get_named_gpiod(struct fwnode_handle *fwnode,
        if (IS_ERR(desc))
                return desc;
 
+       ret = gpiod_request(desc, NULL);
+       if (ret)
+               return ERR_PTR(ret);
+
        if (active_low)
                set_bit(FLAG_ACTIVE_LOW, &desc->flags);
 
@@ -2758,10 +2744,6 @@ struct gpio_desc *fwnode_get_named_gpiod(struct fwnode_handle *fwnode,
                        set_bit(FLAG_OPEN_SOURCE, &desc->flags);
        }
 
-       ret = gpiod_request(desc, NULL);
-       if (ret)
-               return ERR_PTR(ret);
-
        return desc;
 }
 EXPORT_SYMBOL_GPL(fwnode_get_named_gpiod);
@@ -2814,8 +2796,6 @@ int gpiod_hog(struct gpio_desc *desc, const char *name,
        chip = gpiod_to_chip(desc);
        hwnum = gpio_chip_hwgpio(desc);
 
-       gpiod_parse_flags(desc, lflags);
-
        local_desc = gpiochip_request_own_desc(chip, hwnum, name);
        if (IS_ERR(local_desc)) {
                status = PTR_ERR(local_desc);
@@ -2824,7 +2804,7 @@ int gpiod_hog(struct gpio_desc *desc, const char *name,
                return status;
        }
 
-       status = gpiod_configure_flags(desc, name, dflags);
+       status = gpiod_configure_flags(desc, name, lflags, dflags);
        if (status < 0) {
                pr_err("setup of hog GPIO %s (chip %s, offset %d) failed, %d\n",
                       name, chip->label, hwnum, status);
index ec2a7ad..91e25f9 100644 (file)
@@ -98,7 +98,6 @@
 #define PCIE_BUS_CLK                10000
 #define TCLK                        (PCIE_BUS_CLK / 10)
 
-#define CEILING_UCHAR(double) ((double-(uint8_t)(double)) > 0 ? (uint8_t)(double+1) : (uint8_t)(double))
 
 static const uint16_t polaris10_clock_stretcher_lookup_table[2][4] =
 { {600, 1050, 3, 0}, {600, 1050, 6, 1} };
@@ -733,7 +732,7 @@ static int polaris10_populate_smc_mvdd_table(struct pp_hwmgr *hwmgr,
                        table->Smio[level] |=
                                data->mvdd_voltage_table.entries[level].smio_low;
                }
-               table->SmioMask2 = data->vddci_voltage_table.mask_low;
+               table->SmioMask2 = data->mvdd_voltage_table.mask_low;
 
                table->MvddLevelCount = (uint32_t) PP_HOST_TO_SMC_UL(count);
        }
@@ -1807,27 +1806,25 @@ static int polaris10_populate_clock_stretcher_data_table(struct pp_hwmgr *hwmgr)
 
        ro = efuse * (max -min)/255 + min;
 
-       /* Populate Sclk_CKS_masterEn0_7 and Sclk_voltageOffset
-        * there is a little difference in calculating
-        * volt_with_cks with windows */
+       /* Populate Sclk_CKS_masterEn0_7 and Sclk_voltageOffset */
        for (i = 0; i < sclk_table->count; i++) {
                data->smc_state_table.Sclk_CKS_masterEn0_7 |=
                                sclk_table->entries[i].cks_enable << i;
                if (hwmgr->chip_id == CHIP_POLARIS10) {
-                       volt_without_cks = (uint32_t)((2753594000 + (sclk_table->entries[i].clk/100) * 136418 -(ro - 70) * 1000000) / \
+                       volt_without_cks = (uint32_t)((2753594000U + (sclk_table->entries[i].clk/100) * 136418 -(ro - 70) * 1000000) / \
                                                (2424180 - (sclk_table->entries[i].clk/100) * 1132925/1000));
-                       volt_with_cks = (uint32_t)((279720200 + sclk_table->entries[i].clk * 3232 - (ro - 65) * 100000000) / \
-                                       (252248000 - sclk_table->entries[i].clk/100 * 115764));
+                       volt_with_cks = (uint32_t)((2797202000U + sclk_table->entries[i].clk/100 * 3232 - (ro - 65) * 1000000) / \
+                                       (2522480 - sclk_table->entries[i].clk/100 * 115764/100));
                } else {
-                       volt_without_cks = (uint32_t)((2416794800 + (sclk_table->entries[i].clk/100) * 1476925/10 -(ro - 50) * 1000000) / \
-                                               (2625416 - (sclk_table->entries[i].clk/100) * 12586807/10000));
-                       volt_with_cks = (uint32_t)((2999656000 + sclk_table->entries[i].clk * 392803/100 - (ro - 44) * 1000000) / \
-                                       (3422454 - sclk_table->entries[i].clk/100 * 18886376/10000));
+                       volt_without_cks = (uint32_t)((2416794800U + (sclk_table->entries[i].clk/100) * 1476925/10 -(ro - 50) * 1000000) / \
+                                               (2625416 - (sclk_table->entries[i].clk/100) * (12586807/10000)));
+                       volt_with_cks = (uint32_t)((2999656000U - sclk_table->entries[i].clk/100 * 392803 - (ro - 44) * 1000000) / \
+                                       (3422454 - sclk_table->entries[i].clk/100 * (18886376/10000)));
                }
 
                if (volt_without_cks >= volt_with_cks)
-                       volt_offset = (uint8_t)CEILING_UCHAR((volt_without_cks - volt_with_cks +
-                                       sclk_table->entries[i].cks_voffset) * 100 / 625);
+                       volt_offset = (uint8_t)(((volt_without_cks - volt_with_cks +
+                                       sclk_table->entries[i].cks_voffset) * 100 + 624) / 625);
 
                data->smc_state_table.Sclk_voltageOffset[i] = volt_offset;
        }
@@ -2685,7 +2682,7 @@ static int polaris10_get_evv_voltages(struct pp_hwmgr *hwmgr)
 {
        struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
        uint16_t vv_id;
-       uint16_t vddc = 0;
+       uint32_t vddc = 0;
        uint16_t i, j;
        uint32_t sclk = 0;
        struct phm_ppt_v1_information *table_info =
@@ -2716,8 +2713,9 @@ static int polaris10_get_evv_voltages(struct pp_hwmgr *hwmgr)
                                                continue);
 
 
-                       /* need to make sure vddc is less than 2v or else, it could burn the ASIC. */
-                       PP_ASSERT_WITH_CODE((vddc < 2000 && vddc != 0),
+                       /* need to make sure vddc is less than 2v or else, it could burn the ASIC.
+                        * real voltage level in unit of 0.01mv */
+                       PP_ASSERT_WITH_CODE((vddc < 200000 && vddc != 0),
                                        "Invalid VDDC value", result = -EINVAL;);
 
                        /* the voltage should not be zero nor equal to leakage ID */
index bf4e18f..90b35c5 100644 (file)
@@ -1256,7 +1256,7 @@ int atomctrl_set_ac_timing_ai(struct pp_hwmgr *hwmgr, uint32_t memory_clock,
 }
 
 int atomctrl_get_voltage_evv_on_sclk_ai(struct pp_hwmgr *hwmgr, uint8_t voltage_type,
-                               uint32_t sclk, uint16_t virtual_voltage_Id, uint16_t *voltage)
+                               uint32_t sclk, uint16_t virtual_voltage_Id, uint32_t *voltage)
 {
 
        int result;
@@ -1274,7 +1274,7 @@ int atomctrl_get_voltage_evv_on_sclk_ai(struct pp_hwmgr *hwmgr, uint8_t voltage_
        if (0 != result)
                return result;
 
-       *voltage = get_voltage_info_param_space.usVoltageLevel;
+       *voltage = ((GET_EVV_VOLTAGE_INFO_OUTPUT_PARAMETER_V1_3 *)(&get_voltage_info_param_space))->ulVoltageLevel;
 
        return result;
 }
index 248c5db..1e35a96 100644 (file)
@@ -305,7 +305,7 @@ extern int atomctrl_get_engine_pll_dividers_ai(struct pp_hwmgr *hwmgr, uint32_t
 extern int atomctrl_set_ac_timing_ai(struct pp_hwmgr *hwmgr, uint32_t memory_clock,
                                                                uint8_t level);
 extern int atomctrl_get_voltage_evv_on_sclk_ai(struct pp_hwmgr *hwmgr, uint8_t voltage_type,
-                               uint32_t sclk, uint16_t virtual_voltage_Id, uint16_t *voltage);
+                               uint32_t sclk, uint16_t virtual_voltage_Id, uint32_t *voltage);
 extern int atomctrl_get_smc_sclk_range_table(struct pp_hwmgr *hwmgr, struct pp_atom_ctrl_sclk_range_table *table);
 
 extern int atomctrl_get_avfs_information(struct pp_hwmgr *hwmgr, struct pp_atom_ctrl__avfs_parameters *param);
index 233eb7f..5d0f655 100644 (file)
@@ -1302,7 +1302,7 @@ static int tonga_populate_smc_mvdd_table(struct pp_hwmgr *hwmgr,
                        table->Smio[count] |=
                                data->mvdd_voltage_table.entries[count].smio_low;
                }
-               table->SmioMask2 = data->vddci_voltage_table.mask_low;
+               table->SmioMask2 = data->mvdd_voltage_table.mask_low;
 
                CONVERT_FROM_HOST_TO_SMC_UL(table->MvddLevelCount);
        }
index 671fdb4..dccc859 100644 (file)
@@ -302,7 +302,7 @@ static int init_dpm_2_parameters(
                        (((unsigned long)powerplay_table) + le16_to_cpu(powerplay_table->usPPMTableOffset));
 
                if (0 != powerplay_table->usPPMTableOffset) {
-                       if (1 == get_platform_power_management_table(hwmgr, atom_ppm_table)) {
+                       if (get_platform_power_management_table(hwmgr, atom_ppm_table) == 0) {
                                phm_cap_set(hwmgr->platform_descriptor.platformCaps,
                                        PHM_PlatformCaps_EnablePlatformPowerManagement);
                        }
index 22706c0..49bd5da 100644 (file)
@@ -40,7 +40,8 @@ static int
 gf119_sor_dp_pattern(struct nvkm_output_dp *outp, int pattern)
 {
        struct nvkm_device *device = outp->base.disp->engine.subdev.device;
-       nvkm_mask(device, 0x61c110, 0x0f0f0f0f, 0x01010101 * pattern);
+       const u32 soff = gf119_sor_soff(outp);
+       nvkm_mask(device, 0x61c110 + soff, 0x0f0f0f0f, 0x01010101 * pattern);
        return 0;
 }
 
index 4182a21..41cacec 100644 (file)
@@ -65,6 +65,14 @@ static void sun4i_crtc_disable(struct drm_crtc *crtc)
        DRM_DEBUG_DRIVER("Disabling the CRTC\n");
 
        sun4i_tcon_disable(drv->tcon);
+
+       if (crtc->state->event && !crtc->state->active) {
+               spin_lock_irq(&crtc->dev->event_lock);
+               drm_crtc_send_vblank_event(crtc, crtc->state->event);
+               spin_unlock_irq(&crtc->dev->event_lock);
+
+               crtc->state->event = NULL;
+       }
 }
 
 static void sun4i_crtc_enable(struct drm_crtc *crtc)
index 257d2b4..937394c 100644 (file)
@@ -92,7 +92,7 @@ static struct drm_driver sun4i_drv_driver = {
        /* Frame Buffer Operations */
 
        /* VBlank Operations */
-       .get_vblank_counter     = drm_vblank_count,
+       .get_vblank_counter     = drm_vblank_no_hw_counter,
        .enable_vblank          = sun4i_drv_enable_vblank,
        .disable_vblank         = sun4i_drv_disable_vblank,
 };
@@ -310,6 +310,7 @@ static int sun4i_drv_probe(struct platform_device *pdev)
 
                count += sun4i_drv_add_endpoints(&pdev->dev, &match,
                                                pipeline);
+               of_node_put(pipeline);
 
                DRM_DEBUG_DRIVER("Queued %d outputs on pipeline %d\n",
                                 count, i);
index d091def..59741ea 100644 (file)
@@ -1568,13 +1568,23 @@ static int __init amd_iommu_init_pci(void)
                        break;
        }
 
+       /*
+        * Order is important here to make sure any unity map requirements are
+        * fulfilled. The unity mappings are created and written to the device
+        * table during the amd_iommu_init_api() call.
+        *
+        * After that we call init_device_table_dma() to make sure any
+        * uninitialized DTE will block DMA, and in the end we flush the caches
+        * of all IOMMUs to make sure the changes to the device table are
+        * active.
+        */
+       ret = amd_iommu_init_api();
+
        init_device_table_dma();
 
        for_each_iommu(iommu)
                iommu_flush_all_caches(iommu);
 
-       ret = amd_iommu_init_api();
-
        if (!ret)
                print_iommu_info();
 
index cfe410e..323dac9 100644 (file)
@@ -4602,13 +4602,13 @@ static void free_all_cpu_cached_iovas(unsigned int cpu)
        for (i = 0; i < g_num_of_iommus; i++) {
                struct intel_iommu *iommu = g_iommus[i];
                struct dmar_domain *domain;
-               u16 did;
+               int did;
 
                if (!iommu)
                        continue;
 
                for (did = 0; did < cap_ndoms(iommu->cap); did++) {
-                       domain = get_iommu_domain(iommu, did);
+                       domain = get_iommu_domain(iommu, (u16)did);
 
                        if (!domain)
                                continue;
index 8a4adbe..70ed1d0 100644 (file)
@@ -718,7 +718,7 @@ static int gic_shared_irq_domain_map(struct irq_domain *d, unsigned int virq,
 
        spin_lock_irqsave(&gic_lock, flags);
        gic_map_to_pin(intr, gic_cpu_pin);
-       gic_map_to_vpe(intr, vpe);
+       gic_map_to_vpe(intr, mips_cm_vp_id(vpe));
        for (i = 0; i < min(gic_vpes, NR_CPUS); i++)
                clear_bit(intr, pcpu_masks[i].pcpu_mask);
        set_bit(intr, pcpu_masks[vpe].pcpu_mask);
@@ -959,7 +959,7 @@ int gic_ipi_domain_match(struct irq_domain *d, struct device_node *node,
        switch (bus_token) {
        case DOMAIN_BUS_IPI:
                is_ipi = d->bus_token == bus_token;
-               return to_of_node(d->fwnode) == node && is_ipi;
+               return (!node || to_of_node(d->fwnode) == node) && is_ipi;
                break;
        default:
                return 0;
index ca81f46..edc70ff 100644 (file)
@@ -101,11 +101,14 @@ enum ad_link_speed_type {
 #define MAC_ADDRESS_EQUAL(A, B)        \
        ether_addr_equal_64bits((const u8 *)A, (const u8 *)B)
 
-static struct mac_addr null_mac_addr = { { 0, 0, 0, 0, 0, 0 } };
+static const u8 null_mac_addr[ETH_ALEN + 2] __long_aligned = {
+       0, 0, 0, 0, 0, 0
+};
 static u16 ad_ticks_per_sec;
 static const int ad_delta_in_ticks = (AD_TIMER_INTERVAL * HZ) / 1000;
 
-static const u8 lacpdu_mcast_addr[ETH_ALEN] = MULTICAST_LACPDU_ADDR;
+static const u8 lacpdu_mcast_addr[ETH_ALEN + 2] __long_aligned =
+       MULTICAST_LACPDU_ADDR;
 
 /* ================= main 802.3ad protocol functions ================== */
 static int ad_lacpdu_send(struct port *port);
@@ -1739,7 +1742,7 @@ static void ad_clear_agg(struct aggregator *aggregator)
                aggregator->is_individual = false;
                aggregator->actor_admin_aggregator_key = 0;
                aggregator->actor_oper_aggregator_key = 0;
-               aggregator->partner_system = null_mac_addr;
+               eth_zero_addr(aggregator->partner_system.mac_addr_value);
                aggregator->partner_system_priority = 0;
                aggregator->partner_oper_aggregator_key = 0;
                aggregator->receive_state = 0;
@@ -1761,7 +1764,7 @@ static void ad_initialize_agg(struct aggregator *aggregator)
        if (aggregator) {
                ad_clear_agg(aggregator);
 
-               aggregator->aggregator_mac_address = null_mac_addr;
+               eth_zero_addr(aggregator->aggregator_mac_address.mac_addr_value);
                aggregator->aggregator_identifier = 0;
                aggregator->slave = NULL;
        }
index c5ac160..551f0f8 100644 (file)
 
 
 
-#ifndef __long_aligned
-#define __long_aligned __attribute__((aligned((sizeof(long)))))
-#endif
-static const u8 mac_bcast[ETH_ALEN] __long_aligned = {
+static const u8 mac_bcast[ETH_ALEN + 2] __long_aligned = {
        0xff, 0xff, 0xff, 0xff, 0xff, 0xff
 };
-static const u8 mac_v6_allmcast[ETH_ALEN] __long_aligned = {
+static const u8 mac_v6_allmcast[ETH_ALEN + 2] __long_aligned = {
        0x33, 0x33, 0x00, 0x00, 0x00, 0x01
 };
 static const int alb_delta_in_ticks = HZ / ALB_TIMER_TICKS_PER_SEC;
index 941ec99..a2afa3b 100644 (file)
@@ -1584,6 +1584,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
        }
 
        /* check for initial state */
+       new_slave->link = BOND_LINK_NOCHANGE;
        if (bond->params.miimon) {
                if (bond_check_dev_link(bond, slave_dev, 0) == BMSR_LSTATUS) {
                        if (bond->params.updelay) {
index 543bf38..bfa26a2 100644 (file)
@@ -392,7 +392,7 @@ static void bcm_sysport_get_stats(struct net_device *dev,
                else
                        p = (char *)priv;
                p += s->stat_offset;
-               data[i] = *(u32 *)p;
+               data[i] = *(unsigned long *)p;
        }
 }
 
index c4b262c..2accab3 100644 (file)
@@ -36,8 +36,8 @@
 #define __T4FW_VERSION_H__
 
 #define T4FW_VERSION_MAJOR 0x01
-#define T4FW_VERSION_MINOR 0x0E
-#define T4FW_VERSION_MICRO 0x04
+#define T4FW_VERSION_MINOR 0x0F
+#define T4FW_VERSION_MICRO 0x25
 #define T4FW_VERSION_BUILD 0x00
 
 #define T4FW_MIN_VERSION_MAJOR 0x01
@@ -45,8 +45,8 @@
 #define T4FW_MIN_VERSION_MICRO 0x00
 
 #define T5FW_VERSION_MAJOR 0x01
-#define T5FW_VERSION_MINOR 0x0E
-#define T5FW_VERSION_MICRO 0x04
+#define T5FW_VERSION_MINOR 0x0F
+#define T5FW_VERSION_MICRO 0x25
 #define T5FW_VERSION_BUILD 0x00
 
 #define T5FW_MIN_VERSION_MAJOR 0x00
@@ -54,8 +54,8 @@
 #define T5FW_MIN_VERSION_MICRO 0x00
 
 #define T6FW_VERSION_MAJOR 0x01
-#define T6FW_VERSION_MINOR 0x0E
-#define T6FW_VERSION_MICRO 0x04
+#define T6FW_VERSION_MINOR 0x0F
+#define T6FW_VERSION_MICRO 0x25
 #define T6FW_VERSION_BUILD 0x00
 
 #define T6FW_MIN_VERSION_MAJOR 0x00
index 73f7452..2b2e2f8 100644 (file)
@@ -154,16 +154,6 @@ void __ew32(struct e1000_hw *hw, unsigned long reg, u32 val)
        writel(val, hw->hw_addr + reg);
 }
 
-static bool e1000e_vlan_used(struct e1000_adapter *adapter)
-{
-       u16 vid;
-
-       for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID)
-               return true;
-
-       return false;
-}
-
 /**
  * e1000_regdump - register printout routine
  * @hw: pointer to the HW structure
@@ -3453,8 +3443,7 @@ static void e1000e_set_rx_mode(struct net_device *netdev)
 
        ew32(RCTL, rctl);
 
-       if (netdev->features & NETIF_F_HW_VLAN_CTAG_RX ||
-           e1000e_vlan_used(adapter))
+       if (netdev->features & NETIF_F_HW_VLAN_CTAG_RX)
                e1000e_vlan_strip_enable(adapter);
        else
                e1000e_vlan_strip_disable(adapter);
@@ -6926,6 +6915,14 @@ static netdev_features_t e1000_fix_features(struct net_device *netdev,
        if ((hw->mac.type >= e1000_pch2lan) && (netdev->mtu > ETH_DATA_LEN))
                features &= ~NETIF_F_RXFCS;
 
+       /* Since there is no support for separate Rx/Tx vlan accel
+        * enable/disable make sure Tx flag is always in same state as Rx.
+        */
+       if (features & NETIF_F_HW_VLAN_CTAG_RX)
+               features |= NETIF_F_HW_VLAN_CTAG_TX;
+       else
+               features &= ~NETIF_F_HW_VLAN_CTAG_TX;
+
        return features;
 }
 
index 61a80da..2819abc 100644 (file)
@@ -85,7 +85,7 @@ static s32 ixgbevf_poll_for_ack(struct ixgbe_hw *hw)
 static s32 ixgbevf_read_posted_mbx(struct ixgbe_hw *hw, u32 *msg, u16 size)
 {
        struct ixgbe_mbx_info *mbx = &hw->mbx;
-       s32 ret_val = -IXGBE_ERR_MBX;
+       s32 ret_val = IXGBE_ERR_MBX;
 
        if (!mbx->ops.read)
                goto out;
@@ -111,7 +111,7 @@ out:
 static s32 ixgbevf_write_posted_mbx(struct ixgbe_hw *hw, u32 *msg, u16 size)
 {
        struct ixgbe_mbx_info *mbx = &hw->mbx;
-       s32 ret_val = -IXGBE_ERR_MBX;
+       s32 ret_val = IXGBE_ERR_MBX;
 
        /* exit if either we can't write or there isn't a defined timeout */
        if (!mbx->ops.write || !mbx->timeout)
index a6d26d3..d5d263b 100644 (file)
@@ -3458,6 +3458,8 @@ static int mvneta_open(struct net_device *dev)
        return 0;
 
 err_free_irq:
+       unregister_cpu_notifier(&pp->cpu_notifier);
+       on_each_cpu(mvneta_percpu_disable, pp, true);
        free_percpu_irq(pp->dev->irq, pp->ports);
 err_cleanup_txqs:
        mvneta_cleanup_txqs(pp);
index 0b49862..d6e2a1c 100644 (file)
@@ -295,6 +295,12 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
        case MLX5_CMD_OP_DESTROY_FLOW_GROUP:
        case MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY:
        case MLX5_CMD_OP_DEALLOC_FLOW_COUNTER:
+       case MLX5_CMD_OP_2ERR_QP:
+       case MLX5_CMD_OP_2RST_QP:
+       case MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT:
+       case MLX5_CMD_OP_MODIFY_FLOW_TABLE:
+       case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY:
+       case MLX5_CMD_OP_SET_FLOW_TABLE_ROOT:
                return MLX5_CMD_STAT_OK;
 
        case MLX5_CMD_OP_QUERY_HCA_CAP:
@@ -321,8 +327,6 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
        case MLX5_CMD_OP_RTR2RTS_QP:
        case MLX5_CMD_OP_RTS2RTS_QP:
        case MLX5_CMD_OP_SQERR2RTS_QP:
-       case MLX5_CMD_OP_2ERR_QP:
-       case MLX5_CMD_OP_2RST_QP:
        case MLX5_CMD_OP_QUERY_QP:
        case MLX5_CMD_OP_SQD_RTS_QP:
        case MLX5_CMD_OP_INIT2INIT_QP:
@@ -342,7 +346,6 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
        case MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT:
        case MLX5_CMD_OP_MODIFY_ESW_VPORT_CONTEXT:
        case MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT:
-       case MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT:
        case MLX5_CMD_OP_QUERY_ROCE_ADDRESS:
        case MLX5_CMD_OP_SET_ROCE_ADDRESS:
        case MLX5_CMD_OP_QUERY_HCA_VPORT_CONTEXT:
@@ -390,11 +393,12 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
        case MLX5_CMD_OP_CREATE_RQT:
        case MLX5_CMD_OP_MODIFY_RQT:
        case MLX5_CMD_OP_QUERY_RQT:
+
        case MLX5_CMD_OP_CREATE_FLOW_TABLE:
        case MLX5_CMD_OP_QUERY_FLOW_TABLE:
        case MLX5_CMD_OP_CREATE_FLOW_GROUP:
        case MLX5_CMD_OP_QUERY_FLOW_GROUP:
-       case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY:
+
        case MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY:
        case MLX5_CMD_OP_ALLOC_FLOW_COUNTER:
        case MLX5_CMD_OP_QUERY_FLOW_COUNTER:
@@ -602,11 +606,36 @@ static void dump_command(struct mlx5_core_dev *dev,
                pr_debug("\n");
 }
 
+static u16 msg_to_opcode(struct mlx5_cmd_msg *in)
+{
+       struct mlx5_inbox_hdr *hdr = (struct mlx5_inbox_hdr *)(in->first.data);
+
+       return be16_to_cpu(hdr->opcode);
+}
+
+static void cb_timeout_handler(struct work_struct *work)
+{
+       struct delayed_work *dwork = container_of(work, struct delayed_work,
+                                                 work);
+       struct mlx5_cmd_work_ent *ent = container_of(dwork,
+                                                    struct mlx5_cmd_work_ent,
+                                                    cb_timeout_work);
+       struct mlx5_core_dev *dev = container_of(ent->cmd, struct mlx5_core_dev,
+                                                cmd);
+
+       ent->ret = -ETIMEDOUT;
+       mlx5_core_warn(dev, "%s(0x%x) timeout. Will cause a leak of a command resource\n",
+                      mlx5_command_str(msg_to_opcode(ent->in)),
+                      msg_to_opcode(ent->in));
+       mlx5_cmd_comp_handler(dev, 1UL << ent->idx);
+}
+
 static void cmd_work_handler(struct work_struct *work)
 {
        struct mlx5_cmd_work_ent *ent = container_of(work, struct mlx5_cmd_work_ent, work);
        struct mlx5_cmd *cmd = ent->cmd;
        struct mlx5_core_dev *dev = container_of(cmd, struct mlx5_core_dev, cmd);
+       unsigned long cb_timeout = msecs_to_jiffies(MLX5_CMD_TIMEOUT_MSEC);
        struct mlx5_cmd_layout *lay;
        struct semaphore *sem;
        unsigned long flags;
@@ -647,6 +676,9 @@ static void cmd_work_handler(struct work_struct *work)
        dump_command(dev, ent, 1);
        ent->ts1 = ktime_get_ns();
 
+       if (ent->callback)
+               schedule_delayed_work(&ent->cb_timeout_work, cb_timeout);
+
        /* ring doorbell after the descriptor is valid */
        mlx5_core_dbg(dev, "writing 0x%x to command doorbell\n", 1 << ent->idx);
        wmb();
@@ -691,13 +723,6 @@ static const char *deliv_status_to_str(u8 status)
        }
 }
 
-static u16 msg_to_opcode(struct mlx5_cmd_msg *in)
-{
-       struct mlx5_inbox_hdr *hdr = (struct mlx5_inbox_hdr *)(in->first.data);
-
-       return be16_to_cpu(hdr->opcode);
-}
-
 static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent)
 {
        unsigned long timeout = msecs_to_jiffies(MLX5_CMD_TIMEOUT_MSEC);
@@ -706,13 +731,13 @@ static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent)
 
        if (cmd->mode == CMD_MODE_POLLING) {
                wait_for_completion(&ent->done);
-               err = ent->ret;
-       } else {
-               if (!wait_for_completion_timeout(&ent->done, timeout))
-                       err = -ETIMEDOUT;
-               else
-                       err = 0;
+       } else if (!wait_for_completion_timeout(&ent->done, timeout)) {
+               ent->ret = -ETIMEDOUT;
+               mlx5_cmd_comp_handler(dev, 1UL << ent->idx);
        }
+
+       err = ent->ret;
+
        if (err == -ETIMEDOUT) {
                mlx5_core_warn(dev, "%s(0x%x) timeout. Will cause a leak of a command resource\n",
                               mlx5_command_str(msg_to_opcode(ent->in)),
@@ -761,6 +786,7 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in,
        if (!callback)
                init_completion(&ent->done);
 
+       INIT_DELAYED_WORK(&ent->cb_timeout_work, cb_timeout_handler);
        INIT_WORK(&ent->work, cmd_work_handler);
        if (page_queue) {
                cmd_work_handler(&ent->work);
@@ -770,28 +796,26 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in,
                goto out_free;
        }
 
-       if (!callback) {
-               err = wait_func(dev, ent);
-               if (err == -ETIMEDOUT)
-                       goto out;
-
-               ds = ent->ts2 - ent->ts1;
-               op = be16_to_cpu(((struct mlx5_inbox_hdr *)in->first.data)->opcode);
-               if (op < ARRAY_SIZE(cmd->stats)) {
-                       stats = &cmd->stats[op];
-                       spin_lock_irq(&stats->lock);
-                       stats->sum += ds;
-                       ++stats->n;
-                       spin_unlock_irq(&stats->lock);
-               }
-               mlx5_core_dbg_mask(dev, 1 << MLX5_CMD_TIME,
-                                  "fw exec time for %s is %lld nsec\n",
-                                  mlx5_command_str(op), ds);
-               *status = ent->status;
-               free_cmd(ent);
-       }
+       if (callback)
+               goto out;
 
-       return err;
+       err = wait_func(dev, ent);
+       if (err == -ETIMEDOUT)
+               goto out_free;
+
+       ds = ent->ts2 - ent->ts1;
+       op = be16_to_cpu(((struct mlx5_inbox_hdr *)in->first.data)->opcode);
+       if (op < ARRAY_SIZE(cmd->stats)) {
+               stats = &cmd->stats[op];
+               spin_lock_irq(&stats->lock);
+               stats->sum += ds;
+               ++stats->n;
+               spin_unlock_irq(&stats->lock);
+       }
+       mlx5_core_dbg_mask(dev, 1 << MLX5_CMD_TIME,
+                          "fw exec time for %s is %lld nsec\n",
+                          mlx5_command_str(op), ds);
+       *status = ent->status;
 
 out_free:
        free_cmd(ent);
@@ -1181,41 +1205,30 @@ err_dbg:
        return err;
 }
 
-void mlx5_cmd_use_events(struct mlx5_core_dev *dev)
+static void mlx5_cmd_change_mod(struct mlx5_core_dev *dev, int mode)
 {
        struct mlx5_cmd *cmd = &dev->cmd;
        int i;
 
        for (i = 0; i < cmd->max_reg_cmds; i++)
                down(&cmd->sem);
-
        down(&cmd->pages_sem);
 
-       flush_workqueue(cmd->wq);
-
-       cmd->mode = CMD_MODE_EVENTS;
+       cmd->mode = mode;
 
        up(&cmd->pages_sem);
        for (i = 0; i < cmd->max_reg_cmds; i++)
                up(&cmd->sem);
 }
 
-void mlx5_cmd_use_polling(struct mlx5_core_dev *dev)
+void mlx5_cmd_use_events(struct mlx5_core_dev *dev)
 {
-       struct mlx5_cmd *cmd = &dev->cmd;
-       int i;
-
-       for (i = 0; i < cmd->max_reg_cmds; i++)
-               down(&cmd->sem);
-
-       down(&cmd->pages_sem);
-
-       flush_workqueue(cmd->wq);
-       cmd->mode = CMD_MODE_POLLING;
+       mlx5_cmd_change_mod(dev, CMD_MODE_EVENTS);
+}
 
-       up(&cmd->pages_sem);
-       for (i = 0; i < cmd->max_reg_cmds; i++)
-               up(&cmd->sem);
+void mlx5_cmd_use_polling(struct mlx5_core_dev *dev)
+{
+       mlx5_cmd_change_mod(dev, CMD_MODE_POLLING);
 }
 
 static void free_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg)
@@ -1251,6 +1264,8 @@ void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec)
                        struct semaphore *sem;
 
                        ent = cmd->ent_arr[i];
+                       if (ent->callback)
+                               cancel_delayed_work(&ent->cb_timeout_work);
                        if (ent->page_queue)
                                sem = &cmd->pages_sem;
                        else
index baa991a..943b1bd 100644 (file)
@@ -145,7 +145,6 @@ struct mlx5e_umr_wqe {
 
 #ifdef CONFIG_MLX5_CORE_EN_DCB
 #define MLX5E_MAX_BW_ALLOC 100 /* Max percentage of BW allocation */
-#define MLX5E_MIN_BW_ALLOC 1   /* Min percentage of BW allocation */
 #endif
 
 struct mlx5e_params {
@@ -191,6 +190,7 @@ struct mlx5e_tstamp {
 enum {
        MLX5E_RQ_STATE_POST_WQES_ENABLE,
        MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS,
+       MLX5E_RQ_STATE_FLUSH_TIMEOUT,
 };
 
 struct mlx5e_cq {
@@ -220,6 +220,8 @@ typedef void (*mlx5e_fp_handle_rx_cqe)(struct mlx5e_rq *rq,
 typedef int (*mlx5e_fp_alloc_wqe)(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe,
                                  u16 ix);
 
+typedef void (*mlx5e_fp_dealloc_wqe)(struct mlx5e_rq *rq, u16 ix);
+
 struct mlx5e_dma_info {
        struct page     *page;
        dma_addr_t      addr;
@@ -241,6 +243,7 @@ struct mlx5e_rq {
        struct mlx5e_cq        cq;
        mlx5e_fp_handle_rx_cqe handle_rx_cqe;
        mlx5e_fp_alloc_wqe     alloc_wqe;
+       mlx5e_fp_dealloc_wqe   dealloc_wqe;
 
        unsigned long          state;
        int                    ix;
@@ -305,6 +308,7 @@ struct mlx5e_sq_dma {
 enum {
        MLX5E_SQ_STATE_WAKE_TXQ_ENABLE,
        MLX5E_SQ_STATE_BF_ENABLE,
+       MLX5E_SQ_STATE_TX_TIMEOUT,
 };
 
 struct mlx5e_ico_wqe_info {
@@ -538,6 +542,7 @@ struct mlx5e_priv {
        struct workqueue_struct    *wq;
        struct work_struct         update_carrier_work;
        struct work_struct         set_rx_mode_work;
+       struct work_struct         tx_timeout_work;
        struct delayed_work        update_stats_work;
 
        struct mlx5_core_dev      *mdev;
@@ -589,12 +594,16 @@ void mlx5e_cq_error_event(struct mlx5_core_cq *mcq, enum mlx5_event event);
 int mlx5e_napi_poll(struct napi_struct *napi, int budget);
 bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget);
 int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget);
+void mlx5e_free_tx_descs(struct mlx5e_sq *sq);
+void mlx5e_free_rx_descs(struct mlx5e_rq *rq);
 
 void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
 void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
 bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq);
 int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix);
 int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix);
+void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix);
+void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix);
 void mlx5e_post_rx_fragmented_mpwqe(struct mlx5e_rq *rq);
 void mlx5e_complete_rx_linear_mpwqe(struct mlx5e_rq *rq,
                                    struct mlx5_cqe64 *cqe,
index b2db180..c585349 100644 (file)
@@ -96,7 +96,7 @@ static void mlx5e_build_tc_tx_bw(struct ieee_ets *ets, u8 *tc_tx_bw,
                        tc_tx_bw[i] = MLX5E_MAX_BW_ALLOC;
                        break;
                case IEEE_8021QAZ_TSA_ETS:
-                       tc_tx_bw[i] = ets->tc_tx_bw[i] ?: MLX5E_MIN_BW_ALLOC;
+                       tc_tx_bw[i] = ets->tc_tx_bw[i];
                        break;
                }
        }
@@ -140,8 +140,12 @@ static int mlx5e_dbcnl_validate_ets(struct ieee_ets *ets)
 
        /* Validate Bandwidth Sum */
        for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
-               if (ets->tc_tsa[i] == IEEE_8021QAZ_TSA_ETS)
+               if (ets->tc_tsa[i] == IEEE_8021QAZ_TSA_ETS) {
+                       if (!ets->tc_tx_bw[i])
+                               return -EINVAL;
+
                        bw_sum += ets->tc_tx_bw[i];
+               }
        }
 
        if (bw_sum != 0 && bw_sum != 100)
index cb6defd..7a0dca2 100644 (file)
 #include "eswitch.h"
 #include "vxlan.h"
 
+enum {
+       MLX5_EN_QP_FLUSH_TIMEOUT_MS     = 5000,
+       MLX5_EN_QP_FLUSH_MSLEEP_QUANT   = 20,
+       MLX5_EN_QP_FLUSH_MAX_ITER       = MLX5_EN_QP_FLUSH_TIMEOUT_MS /
+                                         MLX5_EN_QP_FLUSH_MSLEEP_QUANT,
+};
+
 struct mlx5e_rq_param {
        u32                        rqc[MLX5_ST_SZ_DW(rqc)];
        struct mlx5_wq_param       wq;
@@ -74,10 +81,13 @@ static void mlx5e_update_carrier(struct mlx5e_priv *priv)
        port_state = mlx5_query_vport_state(mdev,
                MLX5_QUERY_VPORT_STATE_IN_OP_MOD_VNIC_VPORT, 0);
 
-       if (port_state == VPORT_STATE_UP)
+       if (port_state == VPORT_STATE_UP) {
+               netdev_info(priv->netdev, "Link up\n");
                netif_carrier_on(priv->netdev);
-       else
+       } else {
+               netdev_info(priv->netdev, "Link down\n");
                netif_carrier_off(priv->netdev);
+       }
 }
 
 static void mlx5e_update_carrier_work(struct work_struct *work)
@@ -91,6 +101,26 @@ static void mlx5e_update_carrier_work(struct work_struct *work)
        mutex_unlock(&priv->state_lock);
 }
 
+static void mlx5e_tx_timeout_work(struct work_struct *work)
+{
+       struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv,
+                                              tx_timeout_work);
+       int err;
+
+       rtnl_lock();
+       mutex_lock(&priv->state_lock);
+       if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
+               goto unlock;
+       mlx5e_close_locked(priv->netdev);
+       err = mlx5e_open_locked(priv->netdev);
+       if (err)
+               netdev_err(priv->netdev, "mlx5e_open_locked failed recovering from a tx_timeout, err(%d).\n",
+                          err);
+unlock:
+       mutex_unlock(&priv->state_lock);
+       rtnl_unlock();
+}
+
 static void mlx5e_update_sw_counters(struct mlx5e_priv *priv)
 {
        struct mlx5e_sw_stats *s = &priv->stats.sw;
@@ -305,6 +335,7 @@ static int mlx5e_create_rq(struct mlx5e_channel *c,
                }
                rq->handle_rx_cqe = mlx5e_handle_rx_cqe_mpwrq;
                rq->alloc_wqe = mlx5e_alloc_rx_mpwqe;
+               rq->dealloc_wqe = mlx5e_dealloc_rx_mpwqe;
 
                rq->mpwqe_stride_sz = BIT(priv->params.mpwqe_log_stride_sz);
                rq->mpwqe_num_strides = BIT(priv->params.mpwqe_log_num_strides);
@@ -320,6 +351,7 @@ static int mlx5e_create_rq(struct mlx5e_channel *c,
                }
                rq->handle_rx_cqe = mlx5e_handle_rx_cqe;
                rq->alloc_wqe = mlx5e_alloc_rx_wqe;
+               rq->dealloc_wqe = mlx5e_dealloc_rx_wqe;
 
                rq->wqe_sz = (priv->params.lro_en) ?
                                priv->params.lro_wqe_sz :
@@ -525,17 +557,25 @@ err_destroy_rq:
 
 static void mlx5e_close_rq(struct mlx5e_rq *rq)
 {
+       int tout = 0;
+       int err;
+
        clear_bit(MLX5E_RQ_STATE_POST_WQES_ENABLE, &rq->state);
        napi_synchronize(&rq->channel->napi); /* prevent mlx5e_post_rx_wqes */
 
-       mlx5e_modify_rq_state(rq, MLX5_RQC_STATE_RDY, MLX5_RQC_STATE_ERR);
-       while (!mlx5_wq_ll_is_empty(&rq->wq))
-               msleep(20);
+       err = mlx5e_modify_rq_state(rq, MLX5_RQC_STATE_RDY, MLX5_RQC_STATE_ERR);
+       while (!mlx5_wq_ll_is_empty(&rq->wq) && !err &&
+              tout++ < MLX5_EN_QP_FLUSH_MAX_ITER)
+               msleep(MLX5_EN_QP_FLUSH_MSLEEP_QUANT);
+
+       if (err || tout == MLX5_EN_QP_FLUSH_MAX_ITER)
+               set_bit(MLX5E_RQ_STATE_FLUSH_TIMEOUT, &rq->state);
 
        /* avoid destroying rq before mlx5e_poll_rx_cq() is done with it */
        napi_synchronize(&rq->channel->napi);
 
        mlx5e_disable_rq(rq);
+       mlx5e_free_rx_descs(rq);
        mlx5e_destroy_rq(rq);
 }
 
@@ -782,6 +822,9 @@ static inline void netif_tx_disable_queue(struct netdev_queue *txq)
 
 static void mlx5e_close_sq(struct mlx5e_sq *sq)
 {
+       int tout = 0;
+       int err;
+
        if (sq->txq) {
                clear_bit(MLX5E_SQ_STATE_WAKE_TXQ_ENABLE, &sq->state);
                /* prevent netif_tx_wake_queue */
@@ -792,15 +835,24 @@ static void mlx5e_close_sq(struct mlx5e_sq *sq)
                if (mlx5e_sq_has_room_for(sq, 1))
                        mlx5e_send_nop(sq, true);
 
-               mlx5e_modify_sq(sq, MLX5_SQC_STATE_RDY, MLX5_SQC_STATE_ERR);
+               err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RDY,
+                                     MLX5_SQC_STATE_ERR);
+               if (err)
+                       set_bit(MLX5E_SQ_STATE_TX_TIMEOUT, &sq->state);
        }
 
-       while (sq->cc != sq->pc) /* wait till sq is empty */
-               msleep(20);
+       /* wait till sq is empty, unless a TX timeout occurred on this SQ */
+       while (sq->cc != sq->pc &&
+              !test_bit(MLX5E_SQ_STATE_TX_TIMEOUT, &sq->state)) {
+               msleep(MLX5_EN_QP_FLUSH_MSLEEP_QUANT);
+               if (tout++ > MLX5_EN_QP_FLUSH_MAX_ITER)
+                       set_bit(MLX5E_SQ_STATE_TX_TIMEOUT, &sq->state);
+       }
 
        /* avoid destroying sq before mlx5e_poll_tx_cq() is done with it */
        napi_synchronize(&sq->channel->napi);
 
+       mlx5e_free_tx_descs(sq);
        mlx5e_disable_sq(sq);
        mlx5e_destroy_sq(sq);
 }
@@ -1658,8 +1710,11 @@ static void mlx5e_netdev_set_tcs(struct net_device *netdev)
 
        netdev_set_num_tc(netdev, ntc);
 
+       /* Map netdev TCs to offset 0
+        * We have our own UP to TXQ mapping for QoS
+        */
        for (tc = 0; tc < ntc; tc++)
-               netdev_set_tc_queue(netdev, tc, nch, tc * nch);
+               netdev_set_tc_queue(netdev, tc, nch, 0);
 }
 
 int mlx5e_open_locked(struct net_device *netdev)
@@ -2590,6 +2645,29 @@ static netdev_features_t mlx5e_features_check(struct sk_buff *skb,
        return features;
 }
 
+static void mlx5e_tx_timeout(struct net_device *dev)
+{
+       struct mlx5e_priv *priv = netdev_priv(dev);
+       bool sched_work = false;
+       int i;
+
+       netdev_err(dev, "TX timeout detected\n");
+
+       for (i = 0; i < priv->params.num_channels * priv->params.num_tc; i++) {
+               struct mlx5e_sq *sq = priv->txq_to_sq_map[i];
+
+               if (!netif_tx_queue_stopped(netdev_get_tx_queue(dev, i)))
+                       continue;
+               sched_work = true;
+               set_bit(MLX5E_SQ_STATE_TX_TIMEOUT, &sq->state);
+               netdev_err(dev, "TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x\n",
+                          i, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc);
+       }
+
+       if (sched_work && test_bit(MLX5E_STATE_OPENED, &priv->state))
+               schedule_work(&priv->tx_timeout_work);
+}
+
 static const struct net_device_ops mlx5e_netdev_ops_basic = {
        .ndo_open                = mlx5e_open,
        .ndo_stop                = mlx5e_close,
@@ -2607,6 +2685,7 @@ static const struct net_device_ops mlx5e_netdev_ops_basic = {
 #ifdef CONFIG_RFS_ACCEL
        .ndo_rx_flow_steer       = mlx5e_rx_flow_steer,
 #endif
+       .ndo_tx_timeout          = mlx5e_tx_timeout,
 };
 
 static const struct net_device_ops mlx5e_netdev_ops_sriov = {
@@ -2636,6 +2715,7 @@ static const struct net_device_ops mlx5e_netdev_ops_sriov = {
        .ndo_get_vf_config       = mlx5e_get_vf_config,
        .ndo_set_vf_link_state   = mlx5e_set_vf_link_state,
        .ndo_get_vf_stats        = mlx5e_get_vf_stats,
+       .ndo_tx_timeout          = mlx5e_tx_timeout,
 };
 
 static int mlx5e_check_required_hca_cap(struct mlx5_core_dev *mdev)
@@ -2838,6 +2918,7 @@ static void mlx5e_build_netdev_priv(struct mlx5_core_dev *mdev,
 
        INIT_WORK(&priv->update_carrier_work, mlx5e_update_carrier_work);
        INIT_WORK(&priv->set_rx_mode_work, mlx5e_set_rx_mode_work);
+       INIT_WORK(&priv->tx_timeout_work, mlx5e_tx_timeout_work);
        INIT_DELAYED_WORK(&priv->update_stats_work, mlx5e_update_stats_work);
 }
 
index 022acc2..9f2a16a 100644 (file)
@@ -212,6 +212,20 @@ err_free_skb:
        return -ENOMEM;
 }
 
+void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix)
+{
+       struct sk_buff *skb = rq->skb[ix];
+
+       if (skb) {
+               rq->skb[ix] = NULL;
+               dma_unmap_single(rq->pdev,
+                                *((dma_addr_t *)skb->cb),
+                                rq->wqe_sz,
+                                DMA_FROM_DEVICE);
+               dev_kfree_skb(skb);
+       }
+}
+
 static inline int mlx5e_mpwqe_strides_per_page(struct mlx5e_rq *rq)
 {
        return rq->mpwqe_num_strides >> MLX5_MPWRQ_WQE_PAGE_ORDER;
@@ -574,6 +588,30 @@ int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix)
        return 0;
 }
 
+void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
+{
+       struct mlx5e_mpw_info *wi = &rq->wqe_info[ix];
+
+       wi->free_wqe(rq, wi);
+}
+
+void mlx5e_free_rx_descs(struct mlx5e_rq *rq)
+{
+       struct mlx5_wq_ll *wq = &rq->wq;
+       struct mlx5e_rx_wqe *wqe;
+       __be16 wqe_ix_be;
+       u16 wqe_ix;
+
+       while (!mlx5_wq_ll_is_empty(wq)) {
+               wqe_ix_be = *wq->tail_next;
+               wqe_ix    = be16_to_cpu(wqe_ix_be);
+               wqe       = mlx5_wq_ll_get_wqe(&rq->wq, wqe_ix);
+               rq->dealloc_wqe(rq, wqe_ix);
+               mlx5_wq_ll_pop(&rq->wq, wqe_ix_be,
+                              &wqe->next.next_wqe_index);
+       }
+}
+
 #define RQ_CANNOT_POST(rq) \
                (!test_bit(MLX5E_RQ_STATE_POST_WQES_ENABLE, &rq->state) || \
                 test_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, &rq->state))
@@ -878,6 +916,9 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget)
        struct mlx5e_rq *rq = container_of(cq, struct mlx5e_rq, cq);
        int work_done = 0;
 
+       if (unlikely(test_bit(MLX5E_RQ_STATE_FLUSH_TIMEOUT, &rq->state)))
+               return 0;
+
        if (cq->decmprs_left)
                work_done += mlx5e_decompress_cqes_cont(rq, cq, 0, budget);
 
index 5a750b9..5740b46 100644 (file)
@@ -110,8 +110,20 @@ u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb,
 {
        struct mlx5e_priv *priv = netdev_priv(dev);
        int channel_ix = fallback(dev, skb);
-       int up = (netdev_get_num_tc(dev) && skb_vlan_tag_present(skb)) ?
-                skb->vlan_tci >> VLAN_PRIO_SHIFT : 0;
+       int up = 0;
+
+       if (!netdev_get_num_tc(dev))
+               return channel_ix;
+
+       if (skb_vlan_tag_present(skb))
+               up = skb->vlan_tci >> VLAN_PRIO_SHIFT;
+
+       /* channel_ix can be larger than num_channels since
+        * dev->num_real_tx_queues = num_channels * num_tc
+        */
+       if (channel_ix >= priv->params.num_channels)
+               channel_ix = reciprocal_scale(channel_ix,
+                                             priv->params.num_channels);
 
        return priv->channeltc_to_txq_map[channel_ix][up];
 }
@@ -123,7 +135,7 @@ static inline u16 mlx5e_get_inline_hdr_size(struct mlx5e_sq *sq,
         * headers and occur before the data gather.
         * Therefore these headers must be copied into the WQE
         */
-#define MLX5E_MIN_INLINE ETH_HLEN
+#define MLX5E_MIN_INLINE (ETH_HLEN + VLAN_HLEN)
 
        if (bf) {
                u16 ihs = skb_headlen(skb);
@@ -135,7 +147,7 @@ static inline u16 mlx5e_get_inline_hdr_size(struct mlx5e_sq *sq,
                        return skb_headlen(skb);
        }
 
-       return MLX5E_MIN_INLINE;
+       return max(skb_network_offset(skb), MLX5E_MIN_INLINE);
 }
 
 static inline void mlx5e_tx_skb_pull_inline(unsigned char **skb_data,
@@ -341,6 +353,35 @@ netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev)
        return mlx5e_sq_xmit(sq, skb);
 }
 
+void mlx5e_free_tx_descs(struct mlx5e_sq *sq)
+{
+       struct mlx5e_tx_wqe_info *wi;
+       struct sk_buff *skb;
+       u16 ci;
+       int i;
+
+       while (sq->cc != sq->pc) {
+               ci = sq->cc & sq->wq.sz_m1;
+               skb = sq->skb[ci];
+               wi = &sq->wqe_info[ci];
+
+               if (!skb) { /* nop */
+                       sq->cc++;
+                       continue;
+               }
+
+               for (i = 0; i < wi->num_dma; i++) {
+                       struct mlx5e_sq_dma *dma =
+                               mlx5e_dma_get(sq, sq->dma_fifo_cc++);
+
+                       mlx5e_tx_dma_unmap(sq->pdev, dma);
+               }
+
+               dev_kfree_skb_any(skb);
+               sq->cc += wi->num_wqebbs;
+       }
+}
+
 bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
 {
        struct mlx5e_sq *sq;
@@ -352,6 +393,9 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
 
        sq = container_of(cq, struct mlx5e_sq, cq);
 
+       if (unlikely(test_bit(MLX5E_SQ_STATE_TX_TIMEOUT, &sq->state)))
+               return false;
+
        npkts = 0;
        nbytes = 0;
 
index 42d16b9..96a5946 100644 (file)
@@ -108,15 +108,21 @@ static int in_fatal(struct mlx5_core_dev *dev)
 
 void mlx5_enter_error_state(struct mlx5_core_dev *dev)
 {
+       mutex_lock(&dev->intf_state_mutex);
        if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
-               return;
+               goto unlock;
 
        mlx5_core_err(dev, "start\n");
-       if (pci_channel_offline(dev->pdev) || in_fatal(dev))
+       if (pci_channel_offline(dev->pdev) || in_fatal(dev)) {
                dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
+               trigger_cmd_completions(dev);
+       }
 
        mlx5_core_event(dev, MLX5_DEV_EVENT_SYS_ERROR, 0);
        mlx5_core_err(dev, "end\n");
+
+unlock:
+       mutex_unlock(&dev->intf_state_mutex);
 }
 
 static void mlx5_handle_bad_state(struct mlx5_core_dev *dev)
@@ -245,7 +251,6 @@ static void poll_health(unsigned long data)
        u32 count;
 
        if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
-               trigger_cmd_completions(dev);
                mod_timer(&health->timer, get_next_poll_jiffies());
                return;
        }
index c65f4a1..6695893 100644 (file)
@@ -1422,46 +1422,31 @@ void mlx5_disable_device(struct mlx5_core_dev *dev)
        mlx5_pci_err_detected(dev->pdev, 0);
 }
 
-/* wait for the device to show vital signs. For now we check
- * that we can read the device ID and that the health buffer
- * shows a non zero value which is different than 0xffffffff
+/* wait for the device to show vital signs by waiting
+ * for the health counter to start counting.
  */
-static void wait_vital(struct pci_dev *pdev)
+static int wait_vital(struct pci_dev *pdev)
 {
        struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
        struct mlx5_core_health *health = &dev->priv.health;
        const int niter = 100;
+       u32 last_count = 0;
        u32 count;
-       u16 did;
        int i;
 
-       /* Wait for firmware to be ready after reset */
-       msleep(1000);
-       for (i = 0; i < niter; i++) {
-               if (pci_read_config_word(pdev, 2, &did)) {
-                       dev_warn(&pdev->dev, "failed reading config word\n");
-                       break;
-               }
-               if (did == pdev->device) {
-                       dev_info(&pdev->dev, "device ID correctly read after %d iterations\n", i);
-                       break;
-               }
-               msleep(50);
-       }
-       if (i == niter)
-               dev_warn(&pdev->dev, "%s-%d: could not read device ID\n", __func__, __LINE__);
-
        for (i = 0; i < niter; i++) {
                count = ioread32be(health->health_counter);
                if (count && count != 0xffffffff) {
-                       dev_info(&pdev->dev, "Counter value 0x%x after %d iterations\n", count, i);
-                       break;
+                       if (last_count && last_count != count) {
+                               dev_info(&pdev->dev, "Counter value 0x%x after %d iterations\n", count, i);
+                               return 0;
+                       }
+                       last_count = count;
                }
                msleep(50);
        }
 
-       if (i == niter)
-               dev_warn(&pdev->dev, "%s-%d: could not read device ID\n", __func__, __LINE__);
+       return -ETIMEDOUT;
 }
 
 static void mlx5_pci_resume(struct pci_dev *pdev)
@@ -1473,7 +1458,11 @@ static void mlx5_pci_resume(struct pci_dev *pdev)
        dev_info(&pdev->dev, "%s was called\n", __func__);
 
        pci_save_state(pdev);
-       wait_vital(pdev);
+       err = wait_vital(pdev);
+       if (err) {
+               dev_err(&pdev->dev, "%s: wait_vital timed out\n", __func__);
+               return;
+       }
 
        err = mlx5_load_one(dev, priv);
        if (err)
index 9eeee05..32dea35 100644 (file)
@@ -345,7 +345,6 @@ retry:
                               func_id, npages, err);
                goto out_4k;
        }
-       dev->priv.fw_pages += npages;
 
        err = mlx5_cmd_status_to_err(&out.hdr);
        if (err) {
@@ -373,6 +372,33 @@ out_free:
        return err;
 }
 
+static int reclaim_pages_cmd(struct mlx5_core_dev *dev,
+                            struct mlx5_manage_pages_inbox *in, int in_size,
+                            struct mlx5_manage_pages_outbox *out, int out_size)
+{
+       struct fw_page *fwp;
+       struct rb_node *p;
+       u32 npages;
+       u32 i = 0;
+
+       if (dev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR)
+               return mlx5_cmd_exec_check_status(dev, (u32 *)in, in_size,
+                                                 (u32 *)out, out_size);
+
+       npages = be32_to_cpu(in->num_entries);
+
+       p = rb_first(&dev->priv.page_root);
+       while (p && i < npages) {
+               fwp = rb_entry(p, struct fw_page, rb_node);
+               out->pas[i] = cpu_to_be64(fwp->addr);
+               p = rb_next(p);
+               i++;
+       }
+
+       out->num_entries = cpu_to_be32(i);
+       return 0;
+}
+
 static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages,
                         int *nclaimed)
 {
@@ -398,15 +424,9 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages,
        in.func_id = cpu_to_be16(func_id);
        in.num_entries = cpu_to_be32(npages);
        mlx5_core_dbg(dev, "npages %d, outlen %d\n", npages, outlen);
-       err = mlx5_cmd_exec(dev, &in, sizeof(in), out, outlen);
+       err = reclaim_pages_cmd(dev, &in, sizeof(in), out, outlen);
        if (err) {
-               mlx5_core_err(dev, "failed reclaiming pages\n");
-               goto out_free;
-       }
-       dev->priv.fw_pages -= npages;
-
-       if (out->hdr.status) {
-               err = mlx5_cmd_status_to_err(&out->hdr);
+               mlx5_core_err(dev, "failed reclaiming pages: err %d\n", err);
                goto out_free;
        }
 
@@ -417,13 +437,15 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages,
                err = -EINVAL;
                goto out_free;
        }
-       if (nclaimed)
-               *nclaimed = num_claimed;
 
        for (i = 0; i < num_claimed; i++) {
                addr = be64_to_cpu(out->pas[i]);
                free_4k(dev, addr);
        }
+
+       if (nclaimed)
+               *nclaimed = num_claimed;
+
        dev->priv.fw_pages -= num_claimed;
        if (func_id)
                dev->priv.vfs_pages -= num_claimed;
@@ -514,14 +536,10 @@ int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev)
                p = rb_first(&dev->priv.page_root);
                if (p) {
                        fwp = rb_entry(p, struct fw_page, rb_node);
-                       if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
-                               free_4k(dev, fwp->addr);
-                               nclaimed = 1;
-                       } else {
-                               err = reclaim_pages(dev, fwp->func_id,
-                                                   optimal_reclaimed_pages(),
-                                                   &nclaimed);
-                       }
+                       err = reclaim_pages(dev, fwp->func_id,
+                                           optimal_reclaimed_pages(),
+                                           &nclaimed);
+
                        if (err) {
                                mlx5_core_warn(dev, "failed reclaiming pages (%d)\n",
                                               err);
@@ -536,6 +554,13 @@ int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev)
                }
        } while (p);
 
+       WARN(dev->priv.fw_pages,
+            "FW pages counter is %d after reclaiming all pages\n",
+            dev->priv.fw_pages);
+       WARN(dev->priv.vfs_pages,
+            "VFs FW pages counter is %d after reclaiming all pages\n",
+            dev->priv.vfs_pages);
+
        return 0;
 }
 
index daf44cd..91846df 100644 (file)
@@ -513,7 +513,6 @@ int mlx5_modify_nic_vport_node_guid(struct mlx5_core_dev *mdev,
 {
        int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in);
        void *nic_vport_context;
-       u8 *guid;
        void *in;
        int err;
 
@@ -535,8 +534,6 @@ int mlx5_modify_nic_vport_node_guid(struct mlx5_core_dev *mdev,
 
        nic_vport_context = MLX5_ADDR_OF(modify_nic_vport_context_in,
                                         in, nic_vport_context);
-       guid = MLX5_ADDR_OF(nic_vport_context, nic_vport_context,
-                           node_guid);
        MLX5_SET64(nic_vport_context, nic_vport_context, node_guid, node_guid);
 
        err = mlx5_modify_nic_vport_context(mdev, in, inlen);
index 7066954..0a26b11 100644 (file)
@@ -1151,7 +1151,8 @@ static void enc28j60_irq_work_handler(struct work_struct *work)
                        enc28j60_phy_read(priv, PHIR);
                }
                /* TX complete handler */
-               if ((intflags & EIR_TXIF) != 0) {
+               if (((intflags & EIR_TXIF) != 0) &&
+                   ((intflags & EIR_TXERIF) == 0)) {
                        bool err = false;
                        loop++;
                        if (netif_msg_intr(priv))
@@ -1203,7 +1204,7 @@ static void enc28j60_irq_work_handler(struct work_struct *work)
                                        enc28j60_tx_clear(ndev, true);
                        } else
                                enc28j60_tx_clear(ndev, true);
-                       locked_reg_bfclr(priv, EIR, EIR_TXERIF);
+                       locked_reg_bfclr(priv, EIR, EIR_TXERIF | EIR_TXIF);
                }
                /* RX Error handler */
                if ((intflags & EIR_RXERIF) != 0) {
@@ -1238,6 +1239,8 @@ static void enc28j60_irq_work_handler(struct work_struct *work)
  */
 static void enc28j60_hw_tx(struct enc28j60_net *priv)
 {
+       BUG_ON(!priv->tx_skb);
+
        if (netif_msg_tx_queued(priv))
                printk(KERN_DEBUG DRV_NAME
                        ": Tx Packet Len:%d\n", priv->tx_skb->len);
index 607bb7d..87c642d 100644 (file)
@@ -772,6 +772,8 @@ netdev_tx_t qlcnic_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
        tx_ring->tx_stats.tx_bytes += skb->len;
        tx_ring->tx_stats.xmit_called++;
 
+       /* Ensure writes are complete before HW fetches Tx descriptors */
+       wmb();
        qlcnic_update_cmd_producer(tx_ring);
 
        return NETDEV_TX_OK;
index a473c18..e407126 100644 (file)
@@ -2804,7 +2804,7 @@ static irqreturn_t stmmac_interrupt(int irq, void *dev_id)
                                priv->tx_path_in_lpi_mode = true;
                        if (status & CORE_IRQ_TX_PATH_EXIT_LPI_MODE)
                                priv->tx_path_in_lpi_mode = false;
-                       if (status & CORE_IRQ_MTL_RX_OVERFLOW)
+                       if (status & CORE_IRQ_MTL_RX_OVERFLOW && priv->hw->dma->set_rx_tail_ptr)
                                priv->hw->dma->set_rx_tail_ptr(priv->ioaddr,
                                                        priv->rx_tail_addr,
                                                        STMMAC_CHAN0);
index cc39cef..9b3dc3c 100644 (file)
@@ -1072,12 +1072,17 @@ static netdev_tx_t geneve_xmit(struct sk_buff *skb, struct net_device *dev)
 
 static int __geneve_change_mtu(struct net_device *dev, int new_mtu, bool strict)
 {
+       struct geneve_dev *geneve = netdev_priv(dev);
        /* The max_mtu calculation does not take account of GENEVE
         * options, to avoid excluding potentially valid
         * configurations.
         */
-       int max_mtu = IP_MAX_MTU - GENEVE_BASE_HLEN - sizeof(struct iphdr)
-               - dev->hard_header_len;
+       int max_mtu = IP_MAX_MTU - GENEVE_BASE_HLEN - dev->hard_header_len;
+
+       if (geneve->remote.sa.sa_family == AF_INET6)
+               max_mtu -= sizeof(struct ipv6hdr);
+       else
+               max_mtu -= sizeof(struct iphdr);
 
        if (new_mtu < 68)
                return -EINVAL;
index 0e7eff7..8bcd78f 100644 (file)
@@ -2640,6 +2640,7 @@ static netdev_tx_t macsec_start_xmit(struct sk_buff *skb,
                u64_stats_update_begin(&secy_stats->syncp);
                secy_stats->stats.OutPktsUntagged++;
                u64_stats_update_end(&secy_stats->syncp);
+               skb->dev = macsec->real_dev;
                len = skb->len;
                ret = dev_queue_xmit(skb);
                count_tx(dev, ret, len);
index 2afa61b..91177a4 100644 (file)
@@ -57,6 +57,7 @@
 
 /* PHY CTRL bits */
 #define DP83867_PHYCR_FIFO_DEPTH_SHIFT         14
+#define DP83867_PHYCR_FIFO_DEPTH_MASK          (3 << 14)
 
 /* RGMIIDCTL bits */
 #define DP83867_RGMII_TX_CLK_DELAY_SHIFT       4
@@ -133,8 +134,8 @@ static int dp83867_of_init(struct phy_device *phydev)
 static int dp83867_config_init(struct phy_device *phydev)
 {
        struct dp83867_private *dp83867;
-       int ret;
-       u16 val, delay;
+       int ret, val;
+       u16 delay;
 
        if (!phydev->priv) {
                dp83867 = devm_kzalloc(&phydev->mdio.dev, sizeof(*dp83867),
@@ -151,8 +152,12 @@ static int dp83867_config_init(struct phy_device *phydev)
        }
 
        if (phy_interface_is_rgmii(phydev)) {
-               ret = phy_write(phydev, MII_DP83867_PHYCTRL,
-                       (dp83867->fifo_depth << DP83867_PHYCR_FIFO_DEPTH_SHIFT));
+               val = phy_read(phydev, MII_DP83867_PHYCTRL);
+               if (val < 0)
+                       return val;
+               val &= ~DP83867_PHYCR_FIFO_DEPTH_MASK;
+               val |= (dp83867->fifo_depth << DP83867_PHYCR_FIFO_DEPTH_SHIFT);
+               ret = phy_write(phydev, MII_DP83867_PHYCTRL, val);
                if (ret)
                        return ret;
        }
index 53759c3..877c951 100644 (file)
@@ -854,6 +854,13 @@ int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_
        if (cdc_ncm_init(dev))
                goto error2;
 
+       /* Some firmwares need a pause here or they will silently fail
+        * to set up the interface properly.  This value was decided
+        * empirically on a Sierra Wireless MC7455 running 02.08.02.00
+        * firmware.
+        */
+       usleep_range(10000, 20000);
+
        /* configure data interface */
        temp = usb_set_interface(dev->udev, iface_no, data_altsetting);
        if (temp) {
index 4e257b8..0da72d3 100644 (file)
@@ -31,7 +31,7 @@
 #define NETNEXT_VERSION                "08"
 
 /* Information for net */
-#define NET_VERSION            "4"
+#define NET_VERSION            "5"
 
 #define DRIVER_VERSION         "v1." NETNEXT_VERSION "." NET_VERSION
 #define DRIVER_AUTHOR "Realtek linux nic maintainers <nic_swsd@realtek.com>"
@@ -624,6 +624,7 @@ struct r8152 {
                int (*eee_get)(struct r8152 *, struct ethtool_eee *);
                int (*eee_set)(struct r8152 *, struct ethtool_eee *);
                bool (*in_nway)(struct r8152 *);
+               void (*autosuspend_en)(struct r8152 *tp, bool enable);
        } rtl_ops;
 
        int intr_interval;
@@ -2408,9 +2409,6 @@ static void rtl_runtime_suspend_enable(struct r8152 *tp, bool enable)
        if (enable) {
                u32 ocp_data;
 
-               r8153_u1u2en(tp, false);
-               r8153_u2p3en(tp, false);
-
                __rtl_set_wol(tp, WAKE_ANY);
 
                ocp_write_byte(tp, MCU_TYPE_PLA, PLA_CRWECR, CRWECR_CONFIG);
@@ -2421,7 +2419,28 @@ static void rtl_runtime_suspend_enable(struct r8152 *tp, bool enable)
 
                ocp_write_byte(tp, MCU_TYPE_PLA, PLA_CRWECR, CRWECR_NORAML);
        } else {
+               u32 ocp_data;
+
                __rtl_set_wol(tp, tp->saved_wolopts);
+
+               ocp_write_byte(tp, MCU_TYPE_PLA, PLA_CRWECR, CRWECR_CONFIG);
+
+               ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_CONFIG34);
+               ocp_data &= ~LINK_OFF_WAKE_EN;
+               ocp_write_word(tp, MCU_TYPE_PLA, PLA_CONFIG34, ocp_data);
+
+               ocp_write_byte(tp, MCU_TYPE_PLA, PLA_CRWECR, CRWECR_NORAML);
+       }
+}
+
+static void rtl8153_runtime_enable(struct r8152 *tp, bool enable)
+{
+       rtl_runtime_suspend_enable(tp, enable);
+
+       if (enable) {
+               r8153_u1u2en(tp, false);
+               r8153_u2p3en(tp, false);
+       } else {
                r8153_u2p3en(tp, true);
                r8153_u1u2en(tp, true);
        }
@@ -3512,7 +3531,7 @@ static int rtl8152_suspend(struct usb_interface *intf, pm_message_t message)
                napi_disable(&tp->napi);
                if (test_bit(SELECTIVE_SUSPEND, &tp->flags)) {
                        rtl_stop_rx(tp);
-                       rtl_runtime_suspend_enable(tp, true);
+                       tp->rtl_ops.autosuspend_en(tp, true);
                } else {
                        cancel_delayed_work_sync(&tp->schedule);
                        tp->rtl_ops.down(tp);
@@ -3538,7 +3557,7 @@ static int rtl8152_resume(struct usb_interface *intf)
 
        if (netif_running(tp->netdev) && tp->netdev->flags & IFF_UP) {
                if (test_bit(SELECTIVE_SUSPEND, &tp->flags)) {
-                       rtl_runtime_suspend_enable(tp, false);
+                       tp->rtl_ops.autosuspend_en(tp, false);
                        clear_bit(SELECTIVE_SUSPEND, &tp->flags);
                        napi_disable(&tp->napi);
                        set_bit(WORK_ENABLE, &tp->flags);
@@ -3557,7 +3576,7 @@ static int rtl8152_resume(struct usb_interface *intf)
                usb_submit_urb(tp->intr_urb, GFP_KERNEL);
        } else if (test_bit(SELECTIVE_SUSPEND, &tp->flags)) {
                if (tp->netdev->flags & IFF_UP)
-                       rtl_runtime_suspend_enable(tp, false);
+                       tp->rtl_ops.autosuspend_en(tp, false);
                clear_bit(SELECTIVE_SUSPEND, &tp->flags);
        }
 
@@ -4137,6 +4156,7 @@ static int rtl_ops_init(struct r8152 *tp)
                ops->eee_get            = r8152_get_eee;
                ops->eee_set            = r8152_set_eee;
                ops->in_nway            = rtl8152_in_nway;
+               ops->autosuspend_en     = rtl_runtime_suspend_enable;
                break;
 
        case RTL_VER_03:
@@ -4152,6 +4172,7 @@ static int rtl_ops_init(struct r8152 *tp)
                ops->eee_get            = r8153_get_eee;
                ops->eee_set            = r8153_set_eee;
                ops->in_nway            = rtl8153_in_nway;
+               ops->autosuspend_en     = rtl8153_runtime_enable;
                break;
 
        default:
index 61ba464..6086a01 100644 (file)
@@ -395,8 +395,11 @@ int usbnet_change_mtu (struct net_device *net, int new_mtu)
        dev->hard_mtu = net->mtu + net->hard_header_len;
        if (dev->rx_urb_size == old_hard_mtu) {
                dev->rx_urb_size = dev->hard_mtu;
-               if (dev->rx_urb_size > old_rx_urb_size)
+               if (dev->rx_urb_size > old_rx_urb_size) {
+                       usbnet_pause_rx(dev);
                        usbnet_unlink_rx_urbs(dev);
+                       usbnet_resume_rx(dev);
+               }
        }
 
        /* max qlen depend on hard_mtu and rx_urb_size */
@@ -1508,8 +1511,9 @@ static void usbnet_bh (unsigned long param)
        } else if (netif_running (dev->net) &&
                   netif_device_present (dev->net) &&
                   netif_carrier_ok(dev->net) &&
-                  !timer_pending (&dev->delay) &&
-                  !test_bit (EVENT_RX_HALT, &dev->flags)) {
+                  !timer_pending(&dev->delay) &&
+                  !test_bit(EVENT_RX_PAUSED, &dev->flags) &&
+                  !test_bit(EVENT_RX_HALT, &dev->flags)) {
                int     temp = dev->rxq.qlen;
 
                if (temp < RX_QLEN(dev)) {
index 6d8ee3b..8abd80d 100644 (file)
@@ -151,13 +151,19 @@ static long ec_device_ioctl_xcmd(struct cros_ec_dev *ec, void __user *arg)
                goto exit;
        }
 
+       if (u_cmd.outsize != s_cmd->outsize ||
+           u_cmd.insize != s_cmd->insize) {
+               ret = -EINVAL;
+               goto exit;
+       }
+
        s_cmd->command += ec->cmd_offset;
        ret = cros_ec_cmd_xfer(ec->ec_dev, s_cmd);
        /* Only copy data to userland if data was received. */
        if (ret < 0)
                goto exit;
 
-       if (copy_to_user(arg, s_cmd, sizeof(*s_cmd) + u_cmd.insize))
+       if (copy_to_user(arg, s_cmd, sizeof(*s_cmd) + s_cmd->insize))
                ret = -EFAULT;
 exit:
        kfree(s_cmd);
index 80b1979..df036b8 100644 (file)
@@ -1051,6 +1051,7 @@ static void qeth_l2_remove_device(struct ccwgroup_device *cgdev)
                qeth_l2_set_offline(cgdev);
 
        if (card->dev) {
+               netif_napi_del(&card->napi);
                unregister_netdev(card->dev);
                card->dev = NULL;
        }
index ac54433..709b523 100644 (file)
@@ -3226,6 +3226,7 @@ static void qeth_l3_remove_device(struct ccwgroup_device *cgdev)
                qeth_l3_set_offline(cgdev);
 
        if (card->dev) {
+               netif_napi_del(&card->napi);
                unregister_netdev(card->dev);
                card->dev = NULL;
        }
index d6a691e..d6803a9 100644 (file)
@@ -10093,6 +10093,7 @@ static int ipr_probe_ioa(struct pci_dev *pdev,
                ioa_cfg->intr_flag = IPR_USE_MSI;
        else {
                ioa_cfg->intr_flag = IPR_USE_LSI;
+               ioa_cfg->clear_isr = 1;
                ioa_cfg->nvectors = 1;
                dev_info(&pdev->dev, "Cannot enable MSI.\n");
        }
index 5649c20..a92a62d 100644 (file)
@@ -2548,7 +2548,7 @@ void qla24xx_process_response_queue(struct scsi_qla_host *vha,
        if (!vha->flags.online)
                return;
 
-       if (rsp->msix->cpuid != smp_processor_id()) {
+       if (rsp->msix && rsp->msix->cpuid != smp_processor_id()) {
                /* if kernel does not notify qla of IRQ's CPU change,
                 * then set it here.
                 */
index ff41c31..eaccd65 100644 (file)
@@ -429,7 +429,7 @@ static struct scsi_dev_info_list *scsi_dev_info_list_find(const char *vendor,
         * here, and we don't know what device it is
         * trying to work with, leave it as-is.
         */
-       vmax = 8;       /* max length of vendor */
+       vmax = sizeof(devinfo->vendor);
        vskip = vendor;
        while (vmax > 0 && *vskip == ' ') {
                vmax--;
@@ -439,7 +439,7 @@ static struct scsi_dev_info_list *scsi_dev_info_list_find(const char *vendor,
        while (vmax > 0 && vskip[vmax - 1] == ' ')
                --vmax;
 
-       mmax = 16;      /* max length of model */
+       mmax = sizeof(devinfo->model);
        mskip = model;
        while (mmax > 0 && *mskip == ' ') {
                mmax--;
@@ -455,10 +455,12 @@ static struct scsi_dev_info_list *scsi_dev_info_list_find(const char *vendor,
                         * Behave like the older version of get_device_flags.
                         */
                        if (memcmp(devinfo->vendor, vskip, vmax) ||
-                                       devinfo->vendor[vmax])
+                                       (vmax < sizeof(devinfo->vendor) &&
+                                               devinfo->vendor[vmax]))
                                continue;
                        if (memcmp(devinfo->model, mskip, mmax) ||
-                                       devinfo->model[mmax])
+                                       (mmax < sizeof(devinfo->model) &&
+                                               devinfo->model[mmax]))
                                continue;
                        return devinfo;
                } else {
index 076970a..4ce10bc 100644 (file)
@@ -423,36 +423,7 @@ upload:
 
        return 0;
 }
-static int __init check_prereq(void)
-{
-       struct cpuinfo_x86 *c = &cpu_data(0);
-
-       if (!xen_initial_domain())
-               return -ENODEV;
-
-       if (!acpi_gbl_FADT.smi_command)
-               return -ENODEV;
-
-       if (c->x86_vendor == X86_VENDOR_INTEL) {
-               if (!cpu_has(c, X86_FEATURE_EST))
-                       return -ENODEV;
 
-               return 0;
-       }
-       if (c->x86_vendor == X86_VENDOR_AMD) {
-               /* Copied from powernow-k8.h, can't include ../cpufreq/powernow
-                * as we get compile warnings for the static functions.
-                */
-#define CPUID_FREQ_VOLT_CAPABILITIES    0x80000007
-#define USE_HW_PSTATE                   0x00000080
-               u32 eax, ebx, ecx, edx;
-               cpuid(CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx);
-               if ((edx & USE_HW_PSTATE) != USE_HW_PSTATE)
-                       return -ENODEV;
-               return 0;
-       }
-       return -ENODEV;
-}
 /* acpi_perf_data is a pointer to percpu data. */
 static struct acpi_processor_performance __percpu *acpi_perf_data;
 
@@ -509,10 +480,10 @@ struct notifier_block xen_acpi_processor_resume_nb = {
 static int __init xen_acpi_processor_init(void)
 {
        unsigned int i;
-       int rc = check_prereq();
+       int rc;
 
-       if (rc)
-               return rc;
+       if (!xen_initial_domain())
+               return -ENODEV;
 
        nr_acpi_bits = get_max_acpi_id() + 1;
        acpi_ids_done = kcalloc(BITS_TO_LONGS(nr_acpi_bits), sizeof(unsigned long), GFP_KERNEL);
index cacf30d..7487971 100644 (file)
@@ -316,11 +316,18 @@ static int xenbus_write_transaction(unsigned msg_type,
                        rc = -ENOMEM;
                        goto out;
                }
+       } else {
+               list_for_each_entry(trans, &u->transactions, list)
+                       if (trans->handle.id == u->u.msg.tx_id)
+                               break;
+               if (&trans->list == &u->transactions)
+                       return -ESRCH;
        }
 
        reply = xenbus_dev_request_and_reply(&u->u.msg);
        if (IS_ERR(reply)) {
-               kfree(trans);
+               if (msg_type == XS_TRANSACTION_START)
+                       kfree(trans);
                rc = PTR_ERR(reply);
                goto out;
        }
@@ -333,12 +340,7 @@ static int xenbus_write_transaction(unsigned msg_type,
                        list_add(&trans->list, &u->transactions);
                }
        } else if (u->u.msg.type == XS_TRANSACTION_END) {
-               list_for_each_entry(trans, &u->transactions, list)
-                       if (trans->handle.id == u->u.msg.tx_id)
-                               break;
-               BUG_ON(&trans->list == &u->transactions);
                list_del(&trans->list);
-
                kfree(trans);
        }
 
index 374b12a..22f7cd7 100644 (file)
@@ -232,10 +232,10 @@ static void transaction_resume(void)
 void *xenbus_dev_request_and_reply(struct xsd_sockmsg *msg)
 {
        void *ret;
-       struct xsd_sockmsg req_msg = *msg;
+       enum xsd_sockmsg_type type = msg->type;
        int err;
 
-       if (req_msg.type == XS_TRANSACTION_START)
+       if (type == XS_TRANSACTION_START)
                transaction_start();
 
        mutex_lock(&xs_state.request_mutex);
@@ -249,12 +249,8 @@ void *xenbus_dev_request_and_reply(struct xsd_sockmsg *msg)
 
        mutex_unlock(&xs_state.request_mutex);
 
-       if (IS_ERR(ret))
-               return ret;
-
        if ((msg->type == XS_TRANSACTION_END) ||
-           ((req_msg.type == XS_TRANSACTION_START) &&
-            (msg->type == XS_ERROR)))
+           ((type == XS_TRANSACTION_START) && (msg->type == XS_ERROR)))
                transaction_end();
 
        return ret;
index 33b7ee3..bbc1252 100644 (file)
@@ -357,8 +357,6 @@ configfs_write_bin_file(struct file *file, const char __user *buf,
 
        len = simple_write_to_buffer(buffer->bin_buffer,
                        buffer->bin_buffer_size, ppos, buf, count);
-       if (len > 0)
-               *ppos += len;
 out:
        mutex_unlock(&buffer->mutex);
        return len;
index 0d8eb34..e5e29f8 100644 (file)
@@ -45,7 +45,7 @@
  * ecryptfs_to_hex
  * @dst: Buffer to take hex character representation of contents of
  *       src; must be at least of size (src_size * 2)
- * @src: Buffer to be converted to a hex string respresentation
+ * @src: Buffer to be converted to a hex string representation
  * @src_size: number of bytes to convert
  */
 void ecryptfs_to_hex(char *dst, char *src, size_t src_size)
@@ -60,7 +60,7 @@ void ecryptfs_to_hex(char *dst, char *src, size_t src_size)
  * ecryptfs_from_hex
  * @dst: Buffer to take the bytes from src hex; must be at least of
  *       size (src_size / 2)
- * @src: Buffer to be converted from a hex string respresentation to raw value
+ * @src: Buffer to be converted from a hex string representation to raw value
  * @dst_size: size of dst buffer, or number of hex characters pairs to convert
  */
 void ecryptfs_from_hex(char *dst, char *src, int dst_size)
@@ -953,7 +953,7 @@ struct ecryptfs_cipher_code_str_map_elem {
 };
 
 /* Add support for additional ciphers by adding elements here. The
- * cipher_code is whatever OpenPGP applicatoins use to identify the
+ * cipher_code is whatever OpenPGP applications use to identify the
  * ciphers. List in order of probability. */
 static struct ecryptfs_cipher_code_str_map_elem
 ecryptfs_cipher_code_str_map[] = {
@@ -1410,7 +1410,7 @@ int ecryptfs_read_and_validate_xattr_region(struct dentry *dentry,
  *
  * Common entry point for reading file metadata. From here, we could
  * retrieve the header information from the header region of the file,
- * the xattr region of the file, or some other repostory that is
+ * the xattr region of the file, or some other repository that is
  * stored separately from the file itself. The current implementation
  * supports retrieving the metadata information from the file contents
  * and from the xattr region.
index 7000b96..ca4e837 100644 (file)
@@ -169,9 +169,22 @@ out:
        return rc;
 }
 
+static int ecryptfs_mmap(struct file *file, struct vm_area_struct *vma)
+{
+       struct file *lower_file = ecryptfs_file_to_lower(file);
+       /*
+        * Don't allow mmap on top of file systems that don't support it
+        * natively.  If FILESYSTEM_MAX_STACK_DEPTH > 2 or ecryptfs
+        * allows recursive mounting, this will need to be extended.
+        */
+       if (!lower_file->f_op->mmap)
+               return -ENODEV;
+       return generic_file_mmap(file, vma);
+}
+
 /**
  * ecryptfs_open
- * @inode: inode speciying file to open
+ * @inode: inode specifying file to open
  * @file: Structure to return filled in
  *
  * Opens the file specified by inode.
@@ -240,7 +253,7 @@ out:
 
 /**
  * ecryptfs_dir_open
- * @inode: inode speciying file to open
+ * @inode: inode specifying file to open
  * @file: Structure to return filled in
  *
  * Opens the file specified by inode.
@@ -403,7 +416,7 @@ const struct file_operations ecryptfs_main_fops = {
 #ifdef CONFIG_COMPAT
        .compat_ioctl = ecryptfs_compat_ioctl,
 #endif
-       .mmap = generic_file_mmap,
+       .mmap = ecryptfs_mmap,
        .open = ecryptfs_open,
        .flush = ecryptfs_flush,
        .release = ecryptfs_release,
index e818f5a..866bb18 100644 (file)
@@ -25,7 +25,6 @@
 #include <linux/slab.h>
 #include <linux/wait.h>
 #include <linux/mount.h>
-#include <linux/file.h>
 #include "ecryptfs_kernel.h"
 
 struct ecryptfs_open_req {
@@ -148,7 +147,7 @@ int ecryptfs_privileged_open(struct file **lower_file,
        flags |= IS_RDONLY(d_inode(lower_dentry)) ? O_RDONLY : O_RDWR;
        (*lower_file) = dentry_open(&req.path, flags, cred);
        if (!IS_ERR(*lower_file))
-               goto have_file;
+               goto out;
        if ((flags & O_ACCMODE) == O_RDONLY) {
                rc = PTR_ERR((*lower_file));
                goto out;
@@ -166,16 +165,8 @@ int ecryptfs_privileged_open(struct file **lower_file,
        mutex_unlock(&ecryptfs_kthread_ctl.mux);
        wake_up(&ecryptfs_kthread_ctl.wait);
        wait_for_completion(&req.done);
-       if (IS_ERR(*lower_file)) {
+       if (IS_ERR(*lower_file))
                rc = PTR_ERR(*lower_file);
-               goto out;
-       }
-have_file:
-       if ((*lower_file)->f_op->mmap == NULL) {
-               fput(*lower_file);
-               *lower_file = NULL;
-               rc = -EMEDIUMTYPE;
-       }
 out:
        return rc;
 }
index 1698132..6120044 100644 (file)
@@ -738,8 +738,7 @@ static void ecryptfs_free_kmem_caches(void)
                struct ecryptfs_cache_info *info;
 
                info = &ecryptfs_cache_infos[i];
-               if (*(info->cache))
-                       kmem_cache_destroy(*(info->cache));
+               kmem_cache_destroy(*(info->cache));
        }
 }
 
index 989a2ce..fe7e83a 100644 (file)
@@ -483,9 +483,9 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id)
                goto out_free;
        }
        inode->i_state |= I_WB_SWITCH;
+       __iget(inode);
        spin_unlock(&inode->i_lock);
 
-       ihold(inode);
        isw->inode = inode;
 
        atomic_inc(&isw_nr_in_flight);
index 797ae2e..29c6912 100644 (file)
@@ -78,6 +78,7 @@
 
 /* ACPI PCI Interrupt Link (pci_link.c) */
 
+int acpi_irq_penalty_init(void);
 int acpi_pci_link_allocate_irq(acpi_handle handle, int index, int *triggering,
                               int *polarity, char **name);
 int acpi_pci_link_free_irq(acpi_handle handle);
index d259274..d9aef2a 100644 (file)
@@ -31,6 +31,19 @@ static inline void user_exit(void)
                context_tracking_exit(CONTEXT_USER);
 }
 
+/* Called with interrupts disabled.  */
+static inline void user_enter_irqoff(void)
+{
+       if (context_tracking_is_enabled())
+               __context_tracking_enter(CONTEXT_USER);
+
+}
+static inline void user_exit_irqoff(void)
+{
+       if (context_tracking_is_enabled())
+               __context_tracking_exit(CONTEXT_USER);
+}
+
 static inline enum ctx_state exception_enter(void)
 {
        enum ctx_state prev_ctx;
@@ -69,6 +82,8 @@ static inline enum ctx_state ct_state(void)
 #else
 static inline void user_enter(void) { }
 static inline void user_exit(void) { }
+static inline void user_enter_irqoff(void) { }
+static inline void user_exit_irqoff(void) { }
 static inline enum ctx_state exception_enter(void) { return 0; }
 static inline void exception_exit(enum ctx_state prev_ctx) { }
 static inline enum ctx_state ct_state(void) { return CONTEXT_DISABLED; }
index 80776d0..fd72ecf 100644 (file)
@@ -629,6 +629,7 @@ struct mlx5_cmd_work_ent {
        void                   *uout;
        int                     uout_size;
        mlx5_cmd_cbk_t          callback;
+       struct delayed_work     cb_timeout_work;
        void                   *context;
        int                     idx;
        struct completion       done;
index e47e533..3d6e981 100644 (file)
@@ -95,27 +95,27 @@ static inline void prandom_seed_state(struct rnd_state *state, u64 seed)
 #ifdef CONFIG_ARCH_RANDOM
 # include <asm/archrandom.h>
 #else
-static inline int arch_get_random_long(unsigned long *v)
+static inline bool arch_get_random_long(unsigned long *v)
 {
        return 0;
 }
-static inline int arch_get_random_int(unsigned int *v)
+static inline bool arch_get_random_int(unsigned int *v)
 {
        return 0;
 }
-static inline int arch_has_random(void)
+static inline bool arch_has_random(void)
 {
        return 0;
 }
-static inline int arch_get_random_seed_long(unsigned long *v)
+static inline bool arch_get_random_seed_long(unsigned long *v)
 {
        return 0;
 }
-static inline int arch_get_random_seed_int(unsigned int *v)
+static inline bool arch_get_random_seed_int(unsigned int *v)
 {
        return 0;
 }
-static inline int arch_has_random_seed(void)
+static inline bool arch_has_random_seed(void)
 {
        return 0;
 }
index ee38a41..f39b371 100644 (file)
@@ -1062,6 +1062,7 @@ __skb_set_sw_hash(struct sk_buff *skb, __u32 hash, bool is_l4)
 }
 
 void __skb_get_hash(struct sk_buff *skb);
+u32 __skb_get_hash_symmetric(struct sk_buff *skb);
 u32 skb_get_poff(const struct sk_buff *skb);
 u32 __skb_get_poff(const struct sk_buff *skb, void *data,
                   const struct flow_keys *keys, int hlen);
@@ -2869,6 +2870,25 @@ static inline void skb_postpush_rcsum(struct sk_buff *skb,
                skb->csum = csum_partial(start, len, skb->csum);
 }
 
+/**
+ *     skb_push_rcsum - push skb and update receive checksum
+ *     @skb: buffer to update
+ *     @len: length of data pulled
+ *
+ *     This function performs an skb_push on the packet and updates
+ *     the CHECKSUM_COMPLETE checksum.  It should be used on
+ *     receive path processing instead of skb_push unless you know
+ *     that the checksum difference is zero (e.g., a valid IP header)
+ *     or you are setting ip_summed to CHECKSUM_NONE.
+ */
+static inline unsigned char *skb_push_rcsum(struct sk_buff *skb,
+                                           unsigned int len)
+{
+       skb_push(skb, len);
+       skb_postpush_rcsum(skb, skb->data, len);
+       return skb->data;
+}
+
 /**
  *     pskb_trim_rcsum - trim received skb and update checksum
  *     @skb: buffer to trim
index 791800d..6360c25 100644 (file)
@@ -34,6 +34,9 @@
 
 #define BOND_DEFAULT_MIIMON    100
 
+#ifndef __long_aligned
+#define __long_aligned __attribute__((aligned((sizeof(long)))))
+#endif
 /*
  * Less bad way to call ioctl from within the kernel; this needs to be
  * done some other way to get the call out of interrupt context.
@@ -138,7 +141,9 @@ struct bond_params {
        struct reciprocal_value reciprocal_packets_per_slave;
        u16 ad_actor_sys_prio;
        u16 ad_user_port_key;
-       u8 ad_actor_system[ETH_ALEN];
+
+       /* 2 bytes of padding : see ether_addr_equal_64bits() */
+       u8 ad_actor_system[ETH_ALEN + 2];
 };
 
 struct bond_parm_tbl {
index 37165fb..08f36cd 100644 (file)
@@ -313,10 +313,9 @@ static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst,
        return min(dst->dev->mtu, IP_MAX_MTU);
 }
 
-static inline unsigned int ip_skb_dst_mtu(const struct sk_buff *skb)
+static inline unsigned int ip_skb_dst_mtu(struct sock *sk,
+                                         const struct sk_buff *skb)
 {
-       struct sock *sk = skb->sk;
-
        if (!sk || !sk_fullsock(sk) || ip_sk_use_pmtu(sk)) {
                bool forwarding = IPCB(skb)->flags & IPSKB_FORWARDED;
 
index f755a60..c02d897 100644 (file)
@@ -1458,6 +1458,7 @@ config KALLSYMS_ALL
 
 config KALLSYMS_ABSOLUTE_PERCPU
        bool
+       depends on KALLSYMS
        default X86_64 && SMP
 
 config KALLSYMS_BASE_RELATIVE
index 85cd418..43d43a2 100644 (file)
@@ -1678,12 +1678,33 @@ static bool is_orphaned_event(struct perf_event *event)
        return event->state == PERF_EVENT_STATE_DEAD;
 }
 
-static inline int pmu_filter_match(struct perf_event *event)
+static inline int __pmu_filter_match(struct perf_event *event)
 {
        struct pmu *pmu = event->pmu;
        return pmu->filter_match ? pmu->filter_match(event) : 1;
 }
 
+/*
+ * Check whether we should attempt to schedule an event group based on
+ * PMU-specific filtering. An event group can consist of HW and SW events,
+ * potentially with a SW leader, so we must check all the filters, to
+ * determine whether a group is schedulable:
+ */
+static inline int pmu_filter_match(struct perf_event *event)
+{
+       struct perf_event *child;
+
+       if (!__pmu_filter_match(event))
+               return 0;
+
+       list_for_each_entry(child, &event->sibling_list, group_entry) {
+               if (!__pmu_filter_match(child))
+                       return 0;
+       }
+
+       return 1;
+}
+
 static inline int
 event_filter_match(struct perf_event *event)
 {
index bdcbeea..c8c5d2d 100644 (file)
@@ -735,8 +735,6 @@ void post_init_entity_util_avg(struct sched_entity *se)
        }
 }
 
-static inline unsigned long cfs_rq_runnable_load_avg(struct cfs_rq *cfs_rq);
-static inline unsigned long cfs_rq_load_avg(struct cfs_rq *cfs_rq);
 #else
 void init_entity_runnable_average(struct sched_entity *se)
 {
@@ -2499,28 +2497,22 @@ account_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se)
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
 # ifdef CONFIG_SMP
-static inline long calc_tg_weight(struct task_group *tg, struct cfs_rq *cfs_rq)
+static long calc_cfs_shares(struct cfs_rq *cfs_rq, struct task_group *tg)
 {
-       long tg_weight;
+       long tg_weight, load, shares;
 
        /*
-        * Use this CPU's real-time load instead of the last load contribution
-        * as the updating of the contribution is delayed, and we will use the
-        * the real-time load to calc the share. See update_tg_load_avg().
+        * This really should be: cfs_rq->avg.load_avg, but instead we use
+        * cfs_rq->load.weight, which is its upper bound. This helps ramp up
+        * the shares for small weight interactive tasks.
         */
-       tg_weight = atomic_long_read(&tg->load_avg);
-       tg_weight -= cfs_rq->tg_load_avg_contrib;
-       tg_weight += cfs_rq->load.weight;
+       load = scale_load_down(cfs_rq->load.weight);
 
-       return tg_weight;
-}
-
-static long calc_cfs_shares(struct cfs_rq *cfs_rq, struct task_group *tg)
-{
-       long tg_weight, load, shares;
+       tg_weight = atomic_long_read(&tg->load_avg);
 
-       tg_weight = calc_tg_weight(tg, cfs_rq);
-       load = cfs_rq->load.weight;
+       /* Ensure tg_weight >= load */
+       tg_weight -= cfs_rq->tg_load_avg_contrib;
+       tg_weight += load;
 
        shares = (tg->shares * load);
        if (tg_weight)
@@ -2539,6 +2531,7 @@ static inline long calc_cfs_shares(struct cfs_rq *cfs_rq, struct task_group *tg)
        return tg->shares;
 }
 # endif /* CONFIG_SMP */
+
 static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
                            unsigned long weight)
 {
@@ -4946,19 +4939,24 @@ static long effective_load(struct task_group *tg, int cpu, long wl, long wg)
                return wl;
 
        for_each_sched_entity(se) {
-               long w, W;
+               struct cfs_rq *cfs_rq = se->my_q;
+               long W, w = cfs_rq_load_avg(cfs_rq);
 
-               tg = se->my_q->tg;
+               tg = cfs_rq->tg;
 
                /*
                 * W = @wg + \Sum rw_j
                 */
-               W = wg + calc_tg_weight(tg, se->my_q);
+               W = wg + atomic_long_read(&tg->load_avg);
+
+               /* Ensure \Sum rw_j >= rw_i */
+               W -= cfs_rq->tg_load_avg_contrib;
+               W += w;
 
                /*
                 * w = rw_i + @wl
                 */
-               w = cfs_rq_load_avg(se->my_q) + wl;
+               w += wl;
 
                /*
                 * wl = S * s'_i; see (2)
index ff6a7a6..07d06a8 100644 (file)
@@ -15,9 +15,6 @@ KCOV_INSTRUMENT_rbtree.o := n
 KCOV_INSTRUMENT_list_debug.o := n
 KCOV_INSTRUMENT_debugobjects.o := n
 KCOV_INSTRUMENT_dynamic_debug.o := n
-# Kernel does not boot if we instrument this file as it uses custom calling
-# convention (see CONFIG_ARCH_HWEIGHT_CFLAGS).
-KCOV_INSTRUMENT_hweight.o := n
 
 lib-y := ctype.o string.o vsprintf.o cmdline.o \
         rbtree.o radix-tree.o dump_stack.o timerqueue.o\
@@ -74,8 +71,6 @@ obj-$(CONFIG_HAS_IOMEM) += iomap_copy.o devres.o
 obj-$(CONFIG_CHECK_SIGNATURE) += check_signature.o
 obj-$(CONFIG_DEBUG_LOCKING_API_SELFTESTS) += locking-selftest.o
 
-GCOV_PROFILE_hweight.o := n
-CFLAGS_hweight.o = $(subst $(quote),,$(CONFIG_ARCH_HWEIGHT_CFLAGS))
 obj-$(CONFIG_GENERIC_HWEIGHT) += hweight.o
 
 obj-$(CONFIG_BTREE) += btree.o
index 9a5c1f2..43273a7 100644 (file)
@@ -9,6 +9,7 @@
  * The Hamming Weight of a number is the total number of bits set in it.
  */
 
+#ifndef __HAVE_ARCH_SW_HWEIGHT
 unsigned int __sw_hweight32(unsigned int w)
 {
 #ifdef CONFIG_ARCH_HAS_FAST_MULTIPLIER
@@ -25,6 +26,7 @@ unsigned int __sw_hweight32(unsigned int w)
 #endif
 }
 EXPORT_SYMBOL(__sw_hweight32);
+#endif
 
 unsigned int __sw_hweight16(unsigned int w)
 {
@@ -43,6 +45,7 @@ unsigned int __sw_hweight8(unsigned int w)
 }
 EXPORT_SYMBOL(__sw_hweight8);
 
+#ifndef __HAVE_ARCH_SW_HWEIGHT
 unsigned long __sw_hweight64(__u64 w)
 {
 #if BITS_PER_LONG == 32
@@ -65,3 +68,4 @@ unsigned long __sw_hweight64(__u64 w)
 #endif
 }
 EXPORT_SYMBOL(__sw_hweight64);
+#endif
index 2d25979..77e7f69 100644 (file)
@@ -700,7 +700,7 @@ static int
 br_nf_ip_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
                  int (*output)(struct net *, struct sock *, struct sk_buff *))
 {
-       unsigned int mtu = ip_skb_dst_mtu(skb);
+       unsigned int mtu = ip_skb_dst_mtu(sk, skb);
        struct iphdr *iph = ip_hdr(skb);
 
        if (unlikely(((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) ||
index a669dea..61ad43f 100644 (file)
@@ -651,6 +651,23 @@ void make_flow_keys_digest(struct flow_keys_digest *digest,
 }
 EXPORT_SYMBOL(make_flow_keys_digest);
 
+static struct flow_dissector flow_keys_dissector_symmetric __read_mostly;
+
+u32 __skb_get_hash_symmetric(struct sk_buff *skb)
+{
+       struct flow_keys keys;
+
+       __flow_hash_secret_init();
+
+       memset(&keys, 0, sizeof(keys));
+       __skb_flow_dissect(skb, &flow_keys_dissector_symmetric, &keys,
+                          NULL, 0, 0, 0,
+                          FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL);
+
+       return __flow_hash_from_keys(&keys, hashrnd);
+}
+EXPORT_SYMBOL_GPL(__skb_get_hash_symmetric);
+
 /**
  * __skb_get_hash: calculate a flow hash
  * @skb: sk_buff to calculate flow hash from
@@ -868,6 +885,29 @@ static const struct flow_dissector_key flow_keys_dissector_keys[] = {
        },
 };
 
+static const struct flow_dissector_key flow_keys_dissector_symmetric_keys[] = {
+       {
+               .key_id = FLOW_DISSECTOR_KEY_CONTROL,
+               .offset = offsetof(struct flow_keys, control),
+       },
+       {
+               .key_id = FLOW_DISSECTOR_KEY_BASIC,
+               .offset = offsetof(struct flow_keys, basic),
+       },
+       {
+               .key_id = FLOW_DISSECTOR_KEY_IPV4_ADDRS,
+               .offset = offsetof(struct flow_keys, addrs.v4addrs),
+       },
+       {
+               .key_id = FLOW_DISSECTOR_KEY_IPV6_ADDRS,
+               .offset = offsetof(struct flow_keys, addrs.v6addrs),
+       },
+       {
+               .key_id = FLOW_DISSECTOR_KEY_PORTS,
+               .offset = offsetof(struct flow_keys, ports),
+       },
+};
+
 static const struct flow_dissector_key flow_keys_buf_dissector_keys[] = {
        {
                .key_id = FLOW_DISSECTOR_KEY_CONTROL,
@@ -889,6 +929,9 @@ static int __init init_default_flow_dissectors(void)
        skb_flow_dissector_init(&flow_keys_dissector,
                                flow_keys_dissector_keys,
                                ARRAY_SIZE(flow_keys_dissector_keys));
+       skb_flow_dissector_init(&flow_keys_dissector_symmetric,
+                               flow_keys_dissector_symmetric_keys,
+                               ARRAY_SIZE(flow_keys_dissector_symmetric_keys));
        skb_flow_dissector_init(&flow_keys_buf_dissector,
                                flow_keys_buf_dissector_keys,
                                ARRAY_SIZE(flow_keys_buf_dissector_keys));
index f2b77e5..eb12d21 100644 (file)
@@ -3015,24 +3015,6 @@ int skb_append_pagefrags(struct sk_buff *skb, struct page *page,
 }
 EXPORT_SYMBOL_GPL(skb_append_pagefrags);
 
-/**
- *     skb_push_rcsum - push skb and update receive checksum
- *     @skb: buffer to update
- *     @len: length of data pulled
- *
- *     This function performs an skb_push on the packet and updates
- *     the CHECKSUM_COMPLETE checksum.  It should be used on
- *     receive path processing instead of skb_push unless you know
- *     that the checksum difference is zero (e.g., a valid IP header)
- *     or you are setting ip_summed to CHECKSUM_NONE.
- */
-static unsigned char *skb_push_rcsum(struct sk_buff *skb, unsigned len)
-{
-       skb_push(skb, len);
-       skb_postpush_rcsum(skb, skb->data, len);
-       return skb->data;
-}
-
 /**
  *     skb_pull_rcsum - pull skb and update receive checksum
  *     @skb: buffer to update
index df48034..a796fc7 100644 (file)
@@ -41,6 +41,7 @@
 #include <net/dn_fib.h>
 #include <net/dn_neigh.h>
 #include <net/dn_dev.h>
+#include <net/nexthop.h>
 
 #define RT_MIN_TABLE 1
 
@@ -150,14 +151,13 @@ static int dn_fib_count_nhs(const struct nlattr *attr)
        struct rtnexthop *nhp = nla_data(attr);
        int nhs = 0, nhlen = nla_len(attr);
 
-       while(nhlen >= (int)sizeof(struct rtnexthop)) {
-               if ((nhlen -= nhp->rtnh_len) < 0)
-                       return 0;
+       while (rtnh_ok(nhp, nhlen)) {
                nhs++;
-               nhp = RTNH_NEXT(nhp);
+               nhp = rtnh_next(nhp, &nhlen);
        }
 
-       return nhs;
+       /* leftover implies invalid nexthop configuration, discard it */
+       return nhlen > 0 ? 0 : nhs;
 }
 
 static int dn_fib_get_nhs(struct dn_fib_info *fi, const struct nlattr *attr,
@@ -167,21 +167,24 @@ static int dn_fib_get_nhs(struct dn_fib_info *fi, const struct nlattr *attr,
        int nhlen = nla_len(attr);
 
        change_nexthops(fi) {
-               int attrlen = nhlen - sizeof(struct rtnexthop);
-               if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0)
+               int attrlen;
+
+               if (!rtnh_ok(nhp, nhlen))
                        return -EINVAL;
 
                nh->nh_flags  = (r->rtm_flags&~0xFF) | nhp->rtnh_flags;
                nh->nh_oif    = nhp->rtnh_ifindex;
                nh->nh_weight = nhp->rtnh_hops + 1;
 
-               if (attrlen) {
+               attrlen = rtnh_attrlen(nhp);
+               if (attrlen > 0) {
                        struct nlattr *gw_attr;
 
                        gw_attr = nla_find((struct nlattr *) (nhp + 1), attrlen, RTA_GATEWAY);
                        nh->nh_gw = gw_attr ? nla_get_le16(gw_attr) : 0;
                }
-               nhp = RTNH_NEXT(nhp);
+
+               nhp = rtnh_next(nhp, &nhlen);
        } endfor_nexthops(fi);
 
        return 0;
index 124bf0a..4bd4921 100644 (file)
@@ -271,7 +271,7 @@ static int ip_finish_output(struct net *net, struct sock *sk, struct sk_buff *sk
                return dst_output(net, sk, skb);
        }
 #endif
-       mtu = ip_skb_dst_mtu(skb);
+       mtu = ip_skb_dst_mtu(sk, skb);
        if (skb_is_gso(skb))
                return ip_finish_output_gso(net, sk, skb, mtu);
 
@@ -541,7 +541,7 @@ int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
 
        iph = ip_hdr(skb);
 
-       mtu = ip_skb_dst_mtu(skb);
+       mtu = ip_skb_dst_mtu(sk, skb);
        if (IPCB(skb)->frag_max_size && IPCB(skb)->frag_max_size < mtu)
                mtu = IPCB(skb)->frag_max_size;
 
index 1bcef23..771be1f 100644 (file)
@@ -177,6 +177,7 @@ static void rt6_free_pcpu(struct rt6_info *non_pcpu_rt)
                }
        }
 
+       free_percpu(non_pcpu_rt->rt6i_pcpu);
        non_pcpu_rt->rt6i_pcpu = NULL;
 }
 
index 9bff6ef..9f0983f 100644 (file)
@@ -1341,7 +1341,7 @@ static unsigned int fanout_demux_hash(struct packet_fanout *f,
                                      struct sk_buff *skb,
                                      unsigned int num)
 {
-       return reciprocal_scale(skb_get_hash(skb), num);
+       return reciprocal_scale(__skb_get_hash_symmetric(skb), num);
 }
 
 static unsigned int fanout_demux_lb(struct packet_fanout *f,
index 74ee126..c8a7b4c 100644 (file)
@@ -616,7 +616,7 @@ static int rds_tcp_init(void)
 
        ret = rds_tcp_recv_init();
        if (ret)
-               goto out_slab;
+               goto out_pernet;
 
        ret = rds_trans_register(&rds_tcp_transport);
        if (ret)
@@ -628,8 +628,9 @@ static int rds_tcp_init(void)
 
 out_recv:
        rds_tcp_recv_exit();
-out_slab:
+out_pernet:
        unregister_pernet_subsys(&rds_tcp_net_ops);
+out_slab:
        kmem_cache_destroy(rds_tcp_conn_slab);
 out:
        return ret;
index 128942b..1f5bd6c 100644 (file)
@@ -181,7 +181,7 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a,
 
        if (!(at & AT_EGRESS)) {
                if (m->tcfm_ok_push)
-                       skb_push(skb2, skb->mac_len);
+                       skb_push_rcsum(skb2, skb->mac_len);
        }
 
        /* mirror is always swallowed */
index 3ad9fab..1fd4647 100644 (file)
@@ -604,7 +604,7 @@ static int tipc_nl_compat_link_dump(struct tipc_nl_compat_msg *msg,
 
        link_info.dest = nla_get_flag(link[TIPC_NLA_LINK_DEST]);
        link_info.up = htonl(nla_get_flag(link[TIPC_NLA_LINK_UP]));
-       nla_strlcpy(link_info.str, nla_data(link[TIPC_NLA_LINK_NAME]),
+       nla_strlcpy(link_info.str, link[TIPC_NLA_LINK_NAME],
                    TIPC_MAX_LINK_NAME);
 
        return tipc_add_tlv(msg->rep, TIPC_TLV_LINK_INFO,
index 2660fbc..7798e16 100644 (file)
@@ -500,34 +500,34 @@ static int apparmor_setprocattr(struct task_struct *task, char *name,
 {
        struct common_audit_data sa;
        struct apparmor_audit_data aad = {0,};
-       char *command, *args = value;
+       char *command, *largs = NULL, *args = value;
        size_t arg_size;
        int error;
 
        if (size == 0)
                return -EINVAL;
-       /* args points to a PAGE_SIZE buffer, AppArmor requires that
-        * the buffer must be null terminated or have size <= PAGE_SIZE -1
-        * so that AppArmor can null terminate them
-        */
-       if (args[size - 1] != '\0') {
-               if (size == PAGE_SIZE)
-                       return -EINVAL;
-               args[size] = '\0';
-       }
-
        /* task can only write its own attributes */
        if (current != task)
                return -EACCES;
 
-       args = value;
+       /* AppArmor requires that the buffer must be null terminated atm */
+       if (args[size - 1] != '\0') {
+               /* null terminate */
+               largs = args = kmalloc(size + 1, GFP_KERNEL);
+               if (!args)
+                       return -ENOMEM;
+               memcpy(args, value, size);
+               args[size] = '\0';
+       }
+
+       error = -EINVAL;
        args = strim(args);
        command = strsep(&args, " ");
        if (!args)
-               return -EINVAL;
+               goto out;
        args = skip_spaces(args);
        if (!*args)
-               return -EINVAL;
+               goto out;
 
        arg_size = size - (args - (char *) value);
        if (strcmp(name, "current") == 0) {
@@ -553,10 +553,12 @@ static int apparmor_setprocattr(struct task_struct *task, char *name,
                        goto fail;
        } else
                /* only support the "current" and "exec" process attributes */
-               return -EINVAL;
+               goto fail;
 
        if (!error)
                error = size;
+out:
+       kfree(largs);
        return error;
 
 fail:
@@ -565,9 +567,9 @@ fail:
        aad.profile = aa_current_profile();
        aad.op = OP_SETPROCATTR;
        aad.info = name;
-       aad.error = -EINVAL;
+       aad.error = error = -EINVAL;
        aa_audit_msg(AUDIT_APPARMOR_DENIED, &sa, NULL);
-       return -EINVAL;
+       goto out;
 }
 
 static int apparmor_task_setrlimit(struct task_struct *task,
index e722022..9a6157e 100644 (file)
@@ -1955,6 +1955,7 @@ static ssize_t snd_timer_user_read(struct file *file, char __user *buffer,
 
                qhead = tu->qhead++;
                tu->qhead %= tu->queue_size;
+               tu->qused--;
                spin_unlock_irq(&tu->qlock);
 
                if (tu->tread) {
@@ -1968,7 +1969,6 @@ static ssize_t snd_timer_user_read(struct file *file, char __user *buffer,
                }
 
                spin_lock_irq(&tu->qlock);
-               tu->qused--;
                if (err < 0)
                        goto _error;
                result += unit;
index 4a054d7..d3125c1 100644 (file)
@@ -1444,9 +1444,8 @@ static int vortex_wtdma_bufshift(vortex_t * vortex, int wtdma)
        int page, p, pp, delta, i;
 
        page =
-           (hwread(vortex->mmio, VORTEX_WTDMA_STAT + (wtdma << 2)) &
-            WT_SUBBUF_MASK)
-           >> WT_SUBBUF_SHIFT;
+           (hwread(vortex->mmio, VORTEX_WTDMA_STAT + (wtdma << 2))
+            >> WT_SUBBUF_SHIFT) & WT_SUBBUF_MASK;
        if (dma->nr_periods >= 4)
                delta = (page - dma->period_real) & 3;
        else {
index 1cb85ae..286f5e3 100644 (file)
@@ -2200,11 +2200,11 @@ static int snd_echo_resume(struct device *dev)
        u32 pipe_alloc_mask;
        int err;
 
-       commpage_bak = kmalloc(sizeof(struct echoaudio), GFP_KERNEL);
+       commpage_bak = kmalloc(sizeof(*commpage), GFP_KERNEL);
        if (commpage_bak == NULL)
                return -ENOMEM;
        commpage = chip->comm_page;
-       memcpy(commpage_bak, commpage, sizeof(struct comm_page));
+       memcpy(commpage_bak, commpage, sizeof(*commpage));
 
        err = init_hw(chip, chip->pci->device, chip->pci->subsystem_device);
        if (err < 0) {
index 320445f..79c7b34 100644 (file)
@@ -3977,6 +3977,8 @@ static hda_nid_t set_path_power(struct hda_codec *codec, hda_nid_t nid,
 
        for (n = 0; n < spec->paths.used; n++) {
                path = snd_array_elem(&spec->paths, n);
+               if (!path->depth)
+                       continue;
                if (path->path[0] == nid ||
                    path->path[path->depth - 1] == nid) {
                        bool pin_old = path->pin_enabled;
index 94089fc..e320c44 100644 (file)
@@ -367,9 +367,10 @@ enum {
 #define IS_SKL_LP(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x9d70)
 #define IS_KBL(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0xa171)
 #define IS_KBL_LP(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x9d71)
+#define IS_KBL_H(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0xa2f0)
 #define IS_BXT(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x5a98)
 #define IS_SKL_PLUS(pci) (IS_SKL(pci) || IS_SKL_LP(pci) || IS_BXT(pci)) || \
-                       IS_KBL(pci) || IS_KBL_LP(pci)
+                       IS_KBL(pci) || IS_KBL_LP(pci) || IS_KBL_H(pci)
 
 static char *driver_short_names[] = {
        [AZX_DRIVER_ICH] = "HDA Intel",
@@ -2190,6 +2191,9 @@ static const struct pci_device_id azx_ids[] = {
        /* Kabylake-LP */
        { PCI_DEVICE(0x8086, 0x9d71),
          .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_SKYLAKE },
+       /* Kabylake-H */
+       { PCI_DEVICE(0x8086, 0xa2f0),
+         .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_SKYLAKE },
        /* Broxton-P(Apollolake) */
        { PCI_DEVICE(0x8086, 0x5a98),
          .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_BROXTON },
index 900bfbc..5fac786 100644 (file)
@@ -5651,6 +5651,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x17aa, 0x504a, "ThinkPad X260", ALC292_FIXUP_TPT440_DOCK),
        SND_PCI_QUIRK(0x17aa, 0x504b, "Thinkpad", ALC293_FIXUP_LENOVO_SPK_NOISE),
        SND_PCI_QUIRK(0x17aa, 0x5050, "Thinkpad T560p", ALC292_FIXUP_TPT460),
+       SND_PCI_QUIRK(0x17aa, 0x5051, "Thinkpad L460", ALC292_FIXUP_TPT460),
        SND_PCI_QUIRK(0x17aa, 0x5053, "Thinkpad T460", ALC292_FIXUP_TPT460),
        SND_PCI_QUIRK(0x17aa, 0x5109, "Thinkpad", ALC269_FIXUP_LIMIT_INT_MIC_BOOST),
        SND_PCI_QUIRK(0x17aa, 0x3bf8, "Quanta FL1", ALC269_FIXUP_PCM_44K),
index 4d82a58..f3fb98f 100644 (file)
@@ -483,9 +483,10 @@ config SND_SOC_DMIC
        tristate
 
 config SND_SOC_HDMI_CODEC
-       tristate
-       select SND_PCM_ELD
-       select SND_PCM_IEC958
+       tristate
+       select SND_PCM_ELD
+       select SND_PCM_IEC958
+       select HDMI
 
 config SND_SOC_ES8328
        tristate "Everest Semi ES8328 CODEC"
index 647f69d..5013d2b 100644 (file)
@@ -146,6 +146,7 @@ static const struct regmap_config ak4613_regmap_cfg = {
        .max_register           = 0x16,
        .reg_defaults           = ak4613_reg,
        .num_reg_defaults       = ARRAY_SIZE(ak4613_reg),
+       .cache_type             = REGCACHE_RBTREE,
 };
 
 static const struct of_device_id ak4613_of_match[] = {
@@ -530,7 +531,6 @@ static int ak4613_i2c_remove(struct i2c_client *client)
 static struct i2c_driver ak4613_i2c_driver = {
        .driver = {
                .name = "ak4613-codec",
-               .owner = THIS_MODULE,
                .of_match_table = ak4613_of_match,
        },
        .probe          = ak4613_i2c_probe,
index d6f4abb..fb3885f 100644 (file)
@@ -226,6 +226,7 @@ static int v253_open(struct tty_struct *tty)
        if (!tty->disc_data)
                return -ENODEV;
 
+       tty->receive_room = 16;
        if (tty->ops->write(tty, v253_init, len) != len) {
                ret = -EIO;
                goto err;
index 181cd3b..2abb742 100644 (file)
@@ -1474,6 +1474,11 @@ static int hdmi_codec_probe(struct snd_soc_codec *codec)
         * exit, we call pm_runtime_suspend() so that will do for us
         */
        hlink = snd_hdac_ext_bus_get_link(edev->ebus, dev_name(&edev->hdac.dev));
+       if (!hlink) {
+               dev_err(&edev->hdac.dev, "hdac link not found\n");
+               return -EIO;
+       }
+
        snd_hdac_ext_bus_link_get(edev->ebus, hlink);
 
        ret = create_fill_widget_route_map(dapm);
@@ -1634,6 +1639,11 @@ static int hdac_hdmi_dev_probe(struct hdac_ext_device *edev)
 
        /* hold the ref while we probe */
        hlink = snd_hdac_ext_bus_get_link(edev->ebus, dev_name(&edev->hdac.dev));
+       if (!hlink) {
+               dev_err(&edev->hdac.dev, "hdac link not found\n");
+               return -EIO;
+       }
+
        snd_hdac_ext_bus_link_get(edev->ebus, hlink);
 
        hdmi_priv = devm_kzalloc(&codec->dev, sizeof(*hdmi_priv), GFP_KERNEL);
@@ -1744,6 +1754,11 @@ static int hdac_hdmi_runtime_suspend(struct device *dev)
        }
 
        hlink = snd_hdac_ext_bus_get_link(ebus, dev_name(dev));
+       if (!hlink) {
+               dev_err(dev, "hdac link not found\n");
+               return -EIO;
+       }
+
        snd_hdac_ext_bus_link_put(ebus, hlink);
 
        return 0;
@@ -1765,6 +1780,11 @@ static int hdac_hdmi_runtime_resume(struct device *dev)
                return 0;
 
        hlink = snd_hdac_ext_bus_get_link(ebus, dev_name(dev));
+       if (!hlink) {
+               dev_err(dev, "hdac link not found\n");
+               return -EIO;
+       }
+
        snd_hdac_ext_bus_link_get(ebus, hlink);
 
        err = snd_hdac_display_power(bus, true);
index 3c6594d..d70847c 100644 (file)
@@ -253,7 +253,7 @@ static const struct reg_default rt5650_reg[] = {
        { 0x2b, 0x5454 },
        { 0x2c, 0xaaa0 },
        { 0x2d, 0x0000 },
-       { 0x2f, 0x1002 },
+       { 0x2f, 0x5002 },
        { 0x31, 0x5000 },
        { 0x32, 0x0000 },
        { 0x33, 0x0000 },
index 49a9e70..0af5ddb 100644 (file)
@@ -619,7 +619,7 @@ static const struct snd_kcontrol_new rt5670_snd_controls[] = {
                RT5670_L_MUTE_SFT, RT5670_R_MUTE_SFT, 1, 1),
        SOC_DOUBLE_TLV("HP Playback Volume", RT5670_HP_VOL,
                RT5670_L_VOL_SFT, RT5670_R_VOL_SFT,
-               39, 0, out_vol_tlv),
+               39, 1, out_vol_tlv),
        /* OUTPUT Control */
        SOC_DOUBLE("OUT Channel Switch", RT5670_LOUT1,
                RT5670_VOL_L_SFT, RT5670_VOL_R_SFT, 1, 1),
index da60e3f..e7fe6b7 100644 (file)
@@ -1872,7 +1872,7 @@ static struct snd_soc_dai_driver wm5102_dai[] = {
                .capture = {
                        .stream_name = "Audio Trace CPU",
                        .channels_min = 1,
-                       .channels_max = 6,
+                       .channels_max = 4,
                        .rates = WM5102_RATES,
                        .formats = WM5102_FORMATS,
                },
index b5820e4..d54f1b4 100644 (file)
@@ -1723,6 +1723,7 @@ static const struct snd_soc_dapm_route wm5110_dapm_routes[] = {
        { "OUT2L", NULL, "SYSCLK" },
        { "OUT2R", NULL, "SYSCLK" },
        { "OUT3L", NULL, "SYSCLK" },
+       { "OUT3R", NULL, "SYSCLK" },
        { "OUT4L", NULL, "SYSCLK" },
        { "OUT4R", NULL, "SYSCLK" },
        { "OUT5L", NULL, "SYSCLK" },
index f6f9395..1c60081 100644 (file)
@@ -743,6 +743,7 @@ static const struct regmap_config wm8940_regmap = {
        .max_register = WM8940_MONOMIX,
        .reg_defaults = wm8940_reg_defaults,
        .num_reg_defaults = ARRAY_SIZE(wm8940_reg_defaults),
+       .cache_type = REGCACHE_RBTREE,
 
        .readable_reg = wm8940_readable_register,
        .volatile_reg = wm8940_volatile_register,
index 0f66fda..237dc67 100644 (file)
@@ -1513,8 +1513,9 @@ static struct davinci_mcasp_pdata am33xx_mcasp_pdata = {
 };
 
 static struct davinci_mcasp_pdata dra7_mcasp_pdata = {
-       .tx_dma_offset = 0x200,
-       .rx_dma_offset = 0x284,
+       /* The CFG port offset will be calculated if it is needed */
+       .tx_dma_offset = 0,
+       .rx_dma_offset = 0,
        .version = MCASP_VERSION_4,
 };
 
@@ -1734,6 +1735,52 @@ static int davinci_mcasp_get_dma_type(struct davinci_mcasp *mcasp)
        return PCM_EDMA;
 }
 
+static u32 davinci_mcasp_txdma_offset(struct davinci_mcasp_pdata *pdata)
+{
+       int i;
+       u32 offset = 0;
+
+       if (pdata->version != MCASP_VERSION_4)
+               return pdata->tx_dma_offset;
+
+       for (i = 0; i < pdata->num_serializer; i++) {
+               if (pdata->serial_dir[i] == TX_MODE) {
+                       if (!offset) {
+                               offset = DAVINCI_MCASP_TXBUF_REG(i);
+                       } else {
+                               pr_err("%s: Only one serializer allowed!\n",
+                                      __func__);
+                               break;
+                       }
+               }
+       }
+
+       return offset;
+}
+
+static u32 davinci_mcasp_rxdma_offset(struct davinci_mcasp_pdata *pdata)
+{
+       int i;
+       u32 offset = 0;
+
+       if (pdata->version != MCASP_VERSION_4)
+               return pdata->rx_dma_offset;
+
+       for (i = 0; i < pdata->num_serializer; i++) {
+               if (pdata->serial_dir[i] == RX_MODE) {
+                       if (!offset) {
+                               offset = DAVINCI_MCASP_RXBUF_REG(i);
+                       } else {
+                               pr_err("%s: Only one serializer allowed!\n",
+                                      __func__);
+                               break;
+                       }
+               }
+       }
+
+       return offset;
+}
+
 static int davinci_mcasp_probe(struct platform_device *pdev)
 {
        struct snd_dmaengine_dai_dma_data *dma_data;
@@ -1862,7 +1909,7 @@ static int davinci_mcasp_probe(struct platform_device *pdev)
        if (dat)
                dma_data->addr = dat->start;
        else
-               dma_data->addr = mem->start + pdata->tx_dma_offset;
+               dma_data->addr = mem->start + davinci_mcasp_txdma_offset(pdata);
 
        dma = &mcasp->dma_request[SNDRV_PCM_STREAM_PLAYBACK];
        res = platform_get_resource(pdev, IORESOURCE_DMA, 0);
@@ -1883,7 +1930,8 @@ static int davinci_mcasp_probe(struct platform_device *pdev)
                if (dat)
                        dma_data->addr = dat->start;
                else
-                       dma_data->addr = mem->start + pdata->rx_dma_offset;
+                       dma_data->addr =
+                               mem->start + davinci_mcasp_rxdma_offset(pdata);
 
                dma = &mcasp->dma_request[SNDRV_PCM_STREAM_CAPTURE];
                res = platform_get_resource(pdev, IORESOURCE_DMA, 1);
index 1e8787f..afddc80 100644 (file)
@@ -85,9 +85,9 @@
                                                (n << 2))
 
 /* Transmit Buffer for Serializer n */
-#define DAVINCI_MCASP_TXBUF_REG                0x200
+#define DAVINCI_MCASP_TXBUF_REG(n)     (0x200 + (n << 2))
 /* Receive Buffer for Serializer n */
-#define DAVINCI_MCASP_RXBUF_REG                0x280
+#define DAVINCI_MCASP_RXBUF_REG(n)     (0x280 + (n << 2))
 
 /* McASP FIFO Registers */
 #define DAVINCI_MCASP_V2_AFIFO_BASE    (0x1010)
index 632ecc0..bedec4a 100644 (file)
@@ -952,16 +952,16 @@ static int _fsl_ssi_set_dai_fmt(struct device *dev,
        ssi_private->i2s_mode = CCSR_SSI_SCR_NET;
        switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) {
        case SND_SOC_DAIFMT_I2S:
+               regmap_update_bits(regs, CCSR_SSI_STCCR,
+                                  CCSR_SSI_SxCCR_DC_MASK,
+                                  CCSR_SSI_SxCCR_DC(2));
+               regmap_update_bits(regs, CCSR_SSI_SRCCR,
+                                  CCSR_SSI_SxCCR_DC_MASK,
+                                  CCSR_SSI_SxCCR_DC(2));
                switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) {
                case SND_SOC_DAIFMT_CBM_CFS:
                case SND_SOC_DAIFMT_CBS_CFS:
                        ssi_private->i2s_mode |= CCSR_SSI_SCR_I2S_MODE_MASTER;
-                       regmap_update_bits(regs, CCSR_SSI_STCCR,
-                                       CCSR_SSI_SxCCR_DC_MASK,
-                                       CCSR_SSI_SxCCR_DC(2));
-                       regmap_update_bits(regs, CCSR_SSI_SRCCR,
-                                       CCSR_SSI_SxCCR_DC_MASK,
-                                       CCSR_SSI_SxCCR_DC(2));
                        break;
                case SND_SOC_DAIFMT_CBM_CFM:
                        ssi_private->i2s_mode |= CCSR_SSI_SCR_I2S_MODE_SLAVE;
index 3951689..1bead81 100644 (file)
@@ -182,24 +182,29 @@ static int sst_platform_compr_trigger(struct snd_compr_stream *cstream, int cmd)
        case SNDRV_PCM_TRIGGER_START:
                if (stream->compr_ops->stream_start)
                        return stream->compr_ops->stream_start(sst->dev, stream->id);
+               break;
        case SNDRV_PCM_TRIGGER_STOP:
                if (stream->compr_ops->stream_drop)
                        return stream->compr_ops->stream_drop(sst->dev, stream->id);
+               break;
        case SND_COMPR_TRIGGER_DRAIN:
                if (stream->compr_ops->stream_drain)
                        return stream->compr_ops->stream_drain(sst->dev, stream->id);
+               break;
        case SND_COMPR_TRIGGER_PARTIAL_DRAIN:
                if (stream->compr_ops->stream_partial_drain)
                        return stream->compr_ops->stream_partial_drain(sst->dev, stream->id);
+               break;
        case SNDRV_PCM_TRIGGER_PAUSE_PUSH:
                if (stream->compr_ops->stream_pause)
                        return stream->compr_ops->stream_pause(sst->dev, stream->id);
+               break;
        case SNDRV_PCM_TRIGGER_PAUSE_RELEASE:
                if (stream->compr_ops->stream_pause_release)
                        return stream->compr_ops->stream_pause_release(sst->dev, stream->id);
-       default:
-               return -EINVAL;
+               break;
        }
+       return -EINVAL;
 }
 
 static int sst_platform_compr_pointer(struct snd_compr_stream *cstream,
index 965ce40..8b95e09 100644 (file)
@@ -291,6 +291,7 @@ int bxt_sst_dsp_init(struct device *dev, void __iomem *mmio_base, int irq,
        sst_dsp_mailbox_init(sst, (BXT_ADSP_SRAM0_BASE + SKL_ADSP_W0_STAT_SZ),
                        SKL_ADSP_W0_UP_SZ, BXT_ADSP_SRAM1_BASE, SKL_ADSP_W1_SZ);
 
+       INIT_LIST_HEAD(&sst->module_list);
        ret = skl_ipc_init(dev, skl);
        if (ret)
                return ret;
index 49354d1..c4c51a4 100644 (file)
@@ -518,7 +518,7 @@ static void rsnd_adg_get_clkout(struct rsnd_priv *priv,
                }
        }
 
-       rsnd_mod_bset(adg_mod, SSICKR, 0x00FF0000, ckr);
+       rsnd_mod_bset(adg_mod, SSICKR, 0x80FF0000, ckr);
        rsnd_mod_write(adg_mod, BRRA,  rbga);
        rsnd_mod_write(adg_mod, BRRB,  rbgb);
 
index 543a6d0..4f747ee 100644 (file)
@@ -5,7 +5,7 @@ include ../lib.mk
 .PHONY: all all_32 all_64 warn_32bit_failure clean
 
 TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt ptrace_syscall test_mremap_vdso \
-                       check_initial_reg_state sigreturn ldt_gdt iopl
+                       check_initial_reg_state sigreturn ldt_gdt iopl mpx-mini-test
 TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \
                        test_FCMOV test_FCOMI test_FISTTP \
                        vdso_restorer
diff --git a/tools/testing/selftests/x86/mpx-debug.h b/tools/testing/selftests/x86/mpx-debug.h
new file mode 100644 (file)
index 0000000..9230981
--- /dev/null
@@ -0,0 +1,14 @@
+#ifndef _MPX_DEBUG_H
+#define _MPX_DEBUG_H
+
+#ifndef DEBUG_LEVEL
+#define DEBUG_LEVEL 0
+#endif
+#define dprintf_level(level, args...) do { if(level <= DEBUG_LEVEL) printf(args); } while(0)
+#define dprintf1(args...) dprintf_level(1, args)
+#define dprintf2(args...) dprintf_level(2, args)
+#define dprintf3(args...) dprintf_level(3, args)
+#define dprintf4(args...) dprintf_level(4, args)
+#define dprintf5(args...) dprintf_level(5, args)
+
+#endif /* _MPX_DEBUG_H */
diff --git a/tools/testing/selftests/x86/mpx-dig.c b/tools/testing/selftests/x86/mpx-dig.c
new file mode 100644 (file)
index 0000000..ce85356
--- /dev/null
@@ -0,0 +1,498 @@
+/*
+ * Written by Dave Hansen <dave.hansen@intel.com>
+ */
+
+#include <stdlib.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <string.h>
+#include <fcntl.h>
+#include "mpx-debug.h"
+#include "mpx-mm.h"
+#include "mpx-hw.h"
+
+unsigned long bounds_dir_global;
+
+#define mpx_dig_abort()        __mpx_dig_abort(__FILE__, __func__, __LINE__)
+static void inline __mpx_dig_abort(const char *file, const char *func, int line)
+{
+       fprintf(stderr, "MPX dig abort @ %s::%d in %s()\n", file, line, func);
+       printf("MPX dig abort @ %s::%d in %s()\n", file, line, func);
+       abort();
+}
+
+/*
+ * run like this (BDIR finds the probably bounds directory):
+ *
+ *     BDIR="$(cat /proc/$pid/smaps | grep -B1 2097152 \
+ *             | head -1 | awk -F- '{print $1}')";
+ *     ./mpx-dig $pid 0x$BDIR
+ *
+ * NOTE:
+ *     assumes that the only 2097152-kb VMA is the bounds dir
+ */
+
+long nr_incore(void *ptr, unsigned long size_bytes)
+{
+       int i;
+       long ret = 0;
+       long vec_len = size_bytes / PAGE_SIZE;
+       unsigned char *vec = malloc(vec_len);
+       int incore_ret;
+
+       if (!vec)
+               mpx_dig_abort();
+
+       incore_ret = mincore(ptr, size_bytes, vec);
+       if (incore_ret) {
+               printf("mincore ret: %d\n", incore_ret);
+               perror("mincore");
+               mpx_dig_abort();
+       }
+       for (i = 0; i < vec_len; i++)
+               ret += vec[i];
+       free(vec);
+       return ret;
+}
+
+int open_proc(int pid, char *file)
+{
+       static char buf[100];
+       int fd;
+
+       snprintf(&buf[0], sizeof(buf), "/proc/%d/%s", pid, file);
+       fd = open(&buf[0], O_RDONLY);
+       if (fd < 0)
+               perror(buf);
+
+       return fd;
+}
+
+struct vaddr_range {
+       unsigned long start;
+       unsigned long end;
+};
+struct vaddr_range *ranges;
+int nr_ranges_allocated;
+int nr_ranges_populated;
+int last_range = -1;
+
+int __pid_load_vaddrs(int pid)
+{
+       int ret = 0;
+       int proc_maps_fd = open_proc(pid, "maps");
+       char linebuf[10000];
+       unsigned long start;
+       unsigned long end;
+       char rest[1000];
+       FILE *f = fdopen(proc_maps_fd, "r");
+
+       if (!f)
+               mpx_dig_abort();
+       nr_ranges_populated = 0;
+       while (!feof(f)) {
+               char *readret = fgets(linebuf, sizeof(linebuf), f);
+               int parsed;
+
+               if (readret == NULL) {
+                       if (feof(f))
+                               break;
+                       mpx_dig_abort();
+               }
+
+               parsed = sscanf(linebuf, "%lx-%lx%s", &start, &end, rest);
+               if (parsed != 3)
+                       mpx_dig_abort();
+
+               dprintf4("result[%d]: %lx-%lx<->%s\n", parsed, start, end, rest);
+               if (nr_ranges_populated >= nr_ranges_allocated) {
+                       ret = -E2BIG;
+                       break;
+               }
+               ranges[nr_ranges_populated].start = start;
+               ranges[nr_ranges_populated].end = end;
+               nr_ranges_populated++;
+       }
+       last_range = -1;
+       fclose(f);
+       close(proc_maps_fd);
+       return ret;
+}
+
+int pid_load_vaddrs(int pid)
+{
+       int ret;
+
+       dprintf2("%s(%d)\n", __func__, pid);
+       if (!ranges) {
+               nr_ranges_allocated = 4;
+               ranges = malloc(nr_ranges_allocated * sizeof(ranges[0]));
+               dprintf2("%s(%d) allocated %d ranges @ %p\n", __func__, pid,
+                        nr_ranges_allocated, ranges);
+               assert(ranges != NULL);
+       }
+       do {
+               ret = __pid_load_vaddrs(pid);
+               if (!ret)
+                       break;
+               if (ret == -E2BIG) {
+                       dprintf2("%s(%d) need to realloc\n", __func__, pid);
+                       nr_ranges_allocated *= 2;
+                       ranges = realloc(ranges,
+                                       nr_ranges_allocated * sizeof(ranges[0]));
+                       dprintf2("%s(%d) allocated %d ranges @ %p\n", __func__,
+                                       pid, nr_ranges_allocated, ranges);
+                       assert(ranges != NULL);
+                       dprintf1("reallocating to hold %d ranges\n", nr_ranges_allocated);
+               }
+       } while (1);
+
+       dprintf2("%s(%d) done\n", __func__, pid);
+
+       return ret;
+}
+
+static inline int vaddr_in_range(unsigned long vaddr, struct vaddr_range *r)
+{
+       if (vaddr < r->start)
+               return 0;
+       if (vaddr >= r->end)
+               return 0;
+       return 1;
+}
+
+static inline int vaddr_mapped_by_range(unsigned long vaddr)
+{
+       int i;
+
+       if (last_range > 0 && vaddr_in_range(vaddr, &ranges[last_range]))
+               return 1;
+
+       for (i = 0; i < nr_ranges_populated; i++) {
+               struct vaddr_range *r = &ranges[i];
+
+               if (vaddr_in_range(vaddr, r))
+                       continue;
+               last_range = i;
+               return 1;
+       }
+       return 0;
+}
+
+const int bt_entry_size_bytes = sizeof(unsigned long) * 4;
+
+void *read_bounds_table_into_buf(unsigned long table_vaddr)
+{
+#ifdef MPX_DIG_STANDALONE
+       static char bt_buf[MPX_BOUNDS_TABLE_SIZE_BYTES];
+       off_t seek_ret = lseek(fd, table_vaddr, SEEK_SET);
+       if (seek_ret != table_vaddr)
+               mpx_dig_abort();
+
+       int read_ret = read(fd, &bt_buf, sizeof(bt_buf));
+       if (read_ret != sizeof(bt_buf))
+               mpx_dig_abort();
+       return &bt_buf;
+#else
+       return (void *)table_vaddr;
+#endif
+}
+
+int dump_table(unsigned long table_vaddr, unsigned long base_controlled_vaddr,
+               unsigned long bde_vaddr)
+{
+       unsigned long offset_inside_bt;
+       int nr_entries = 0;
+       int do_abort = 0;
+       char *bt_buf;
+
+       dprintf3("%s() base_controlled_vaddr: 0x%012lx bde_vaddr: 0x%012lx\n",
+                       __func__, base_controlled_vaddr, bde_vaddr);
+
+       bt_buf = read_bounds_table_into_buf(table_vaddr);
+
+       dprintf4("%s() read done\n", __func__);
+
+       for (offset_inside_bt = 0;
+            offset_inside_bt < MPX_BOUNDS_TABLE_SIZE_BYTES;
+            offset_inside_bt += bt_entry_size_bytes) {
+               unsigned long bt_entry_index;
+               unsigned long bt_entry_controls;
+               unsigned long this_bt_entry_for_vaddr;
+               unsigned long *bt_entry_buf;
+               int i;
+
+               dprintf4("%s() offset_inside_bt: 0x%lx of 0x%llx\n", __func__,
+                       offset_inside_bt, MPX_BOUNDS_TABLE_SIZE_BYTES);
+               bt_entry_buf = (void *)&bt_buf[offset_inside_bt];
+               if (!bt_buf) {
+                       printf("null bt_buf\n");
+                       mpx_dig_abort();
+               }
+               if (!bt_entry_buf) {
+                       printf("null bt_entry_buf\n");
+                       mpx_dig_abort();
+               }
+               dprintf4("%s() reading *bt_entry_buf @ %p\n", __func__,
+                               bt_entry_buf);
+               if (!bt_entry_buf[0] &&
+                   !bt_entry_buf[1] &&
+                   !bt_entry_buf[2] &&
+                   !bt_entry_buf[3])
+                       continue;
+
+               nr_entries++;
+
+               bt_entry_index = offset_inside_bt/bt_entry_size_bytes;
+               bt_entry_controls = sizeof(void *);
+               this_bt_entry_for_vaddr =
+                       base_controlled_vaddr + bt_entry_index*bt_entry_controls;
+               /*
+                * We sign extend vaddr bits 48->63 which effectively
+                * creates a hole in the virtual address space.
+                * This calculation corrects for the hole.
+                */
+               if (this_bt_entry_for_vaddr > 0x00007fffffffffffUL)
+                       this_bt_entry_for_vaddr |= 0xffff800000000000;
+
+               if (!vaddr_mapped_by_range(this_bt_entry_for_vaddr)) {
+                       printf("bt_entry_buf: %p\n", bt_entry_buf);
+                       printf("there is a bte for %lx but no mapping\n",
+                                       this_bt_entry_for_vaddr);
+                       printf("          bde   vaddr: %016lx\n", bde_vaddr);
+                       printf("base_controlled_vaddr: %016lx\n", base_controlled_vaddr);
+                       printf("          table_vaddr: %016lx\n", table_vaddr);
+                       printf("          entry vaddr: %016lx @ offset %lx\n",
+                               table_vaddr + offset_inside_bt, offset_inside_bt);
+                       do_abort = 1;
+                       mpx_dig_abort();
+               }
+               if (DEBUG_LEVEL < 4)
+                       continue;
+
+               printf("table entry[%lx]: ", offset_inside_bt);
+               for (i = 0; i < bt_entry_size_bytes; i += sizeof(unsigned long))
+                       printf("0x%016lx ", bt_entry_buf[i]);
+               printf("\n");
+       }
+       if (do_abort)
+               mpx_dig_abort();
+       dprintf4("%s() done\n",  __func__);
+       return nr_entries;
+}
+
+int search_bd_buf(char *buf, int len_bytes, unsigned long bd_offset_bytes,
+               int *nr_populated_bdes)
+{
+       unsigned long i;
+       int total_entries = 0;
+
+       dprintf3("%s(%p, %x, %lx, ...) buf end: %p\n", __func__, buf,
+                       len_bytes, bd_offset_bytes, buf + len_bytes);
+
+       for (i = 0; i < len_bytes; i += sizeof(unsigned long)) {
+               unsigned long bd_index = (bd_offset_bytes + i) / sizeof(unsigned long);
+               unsigned long *bounds_dir_entry_ptr = (unsigned long *)&buf[i];
+               unsigned long bounds_dir_entry;
+               unsigned long bd_for_vaddr;
+               unsigned long bt_start;
+               unsigned long bt_tail;
+               int nr_entries;
+
+               dprintf4("%s() loop i: %ld bounds_dir_entry_ptr: %p\n", __func__, i,
+                               bounds_dir_entry_ptr);
+
+               bounds_dir_entry = *bounds_dir_entry_ptr;
+               if (!bounds_dir_entry) {
+                       dprintf4("no bounds dir at index 0x%lx / 0x%lx "
+                                "start at offset:%lx %lx\n", bd_index, bd_index,
+                                       bd_offset_bytes, i);
+                       continue;
+               }
+               dprintf3("found bounds_dir_entry: 0x%lx @ "
+                        "index 0x%lx buf ptr: %p\n", bounds_dir_entry, i,
+                                       &buf[i]);
+               /* mask off the enable bit: */
+               bounds_dir_entry &= ~0x1;
+               (*nr_populated_bdes)++;
+               dprintf4("nr_populated_bdes: %p\n", nr_populated_bdes);
+               dprintf4("*nr_populated_bdes: %d\n", *nr_populated_bdes);
+
+               bt_start = bounds_dir_entry;
+               bt_tail = bounds_dir_entry + MPX_BOUNDS_TABLE_SIZE_BYTES - 1;
+               if (!vaddr_mapped_by_range(bt_start)) {
+                       printf("bounds directory 0x%lx points to nowhere\n",
+                                       bounds_dir_entry);
+                       mpx_dig_abort();
+               }
+               if (!vaddr_mapped_by_range(bt_tail)) {
+                       printf("bounds directory end 0x%lx points to nowhere\n",
+                                       bt_tail);
+                       mpx_dig_abort();
+               }
+               /*
+                * Each bounds directory entry controls 1MB of virtual address
+                * space.  This variable is the virtual address in the process
+                * of the beginning of the area controlled by this bounds_dir.
+                */
+               bd_for_vaddr = bd_index * (1UL<<20);
+
+               nr_entries = dump_table(bounds_dir_entry, bd_for_vaddr,
+                               bounds_dir_global+bd_offset_bytes+i);
+               total_entries += nr_entries;
+               dprintf5("dir entry[%4ld @ %p]: 0x%lx %6d entries "
+                        "total this buf: %7d bd_for_vaddrs: 0x%lx -> 0x%lx\n",
+                               bd_index, buf+i,
+                               bounds_dir_entry, nr_entries, total_entries,
+                               bd_for_vaddr, bd_for_vaddr + (1UL<<20));
+       }
+       dprintf3("%s(%p, %x, %lx, ...) done\n", __func__, buf, len_bytes,
+                       bd_offset_bytes);
+       return total_entries;
+}
+
+int proc_pid_mem_fd = -1;
+
+void *fill_bounds_dir_buf_other(long byte_offset_inside_bounds_dir,
+                          long buffer_size_bytes, void *buffer)
+{
+       unsigned long seekto = bounds_dir_global + byte_offset_inside_bounds_dir;
+       int read_ret;
+       off_t seek_ret = lseek(proc_pid_mem_fd, seekto, SEEK_SET);
+
+       if (seek_ret != seekto)
+               mpx_dig_abort();
+
+       read_ret = read(proc_pid_mem_fd, buffer, buffer_size_bytes);
+       /* there shouldn't practically be short reads of /proc/$pid/mem */
+       if (read_ret != buffer_size_bytes)
+               mpx_dig_abort();
+
+       return buffer;
+}
+void *fill_bounds_dir_buf_self(long byte_offset_inside_bounds_dir,
+                          long buffer_size_bytes, void *buffer)
+
+{
+       unsigned char vec[buffer_size_bytes / PAGE_SIZE];
+       char *dig_bounds_dir_ptr =
+               (void *)(bounds_dir_global + byte_offset_inside_bounds_dir);
+       /*
+        * use mincore() to quickly find the areas of the bounds directory
+        * that have memory and thus will be worth scanning.
+        */
+       int incore_ret;
+
+       int incore = 0;
+       int i;
+
+       dprintf4("%s() dig_bounds_dir_ptr: %p\n", __func__, dig_bounds_dir_ptr);
+
+       incore_ret = mincore(dig_bounds_dir_ptr, buffer_size_bytes, &vec[0]);
+       if (incore_ret) {
+               printf("mincore ret: %d\n", incore_ret);
+               perror("mincore");
+               mpx_dig_abort();
+       }
+       for (i = 0; i < sizeof(vec); i++)
+               incore += vec[i];
+       dprintf4("%s() total incore: %d\n", __func__, incore);
+       if (!incore)
+               return NULL;
+       dprintf3("%s() total incore: %d\n", __func__, incore);
+       return dig_bounds_dir_ptr;
+}
+
+int inspect_pid(int pid)
+{
+       static int dig_nr;
+       long offset_inside_bounds_dir;
+       char bounds_dir_buf[sizeof(unsigned long) * (1UL << 15)];
+       char *dig_bounds_dir_ptr;
+       int total_entries = 0;
+       int nr_populated_bdes = 0;
+       int inspect_self;
+
+       if (getpid() == pid) {
+               dprintf4("inspecting self\n");
+               inspect_self = 1;
+       } else {
+               dprintf4("inspecting pid %d\n", pid);
+               mpx_dig_abort();
+       }
+
+       for (offset_inside_bounds_dir = 0;
+            offset_inside_bounds_dir < MPX_BOUNDS_TABLE_SIZE_BYTES;
+            offset_inside_bounds_dir += sizeof(bounds_dir_buf)) {
+               static int bufs_skipped;
+               int this_entries;
+
+               if (inspect_self) {
+                       dig_bounds_dir_ptr =
+                               fill_bounds_dir_buf_self(offset_inside_bounds_dir,
+                                                        sizeof(bounds_dir_buf),
+                                                        &bounds_dir_buf[0]);
+               } else {
+                       dig_bounds_dir_ptr =
+                               fill_bounds_dir_buf_other(offset_inside_bounds_dir,
+                                                         sizeof(bounds_dir_buf),
+                                                         &bounds_dir_buf[0]);
+               }
+               if (!dig_bounds_dir_ptr) {
+                       bufs_skipped++;
+                       continue;
+               }
+               this_entries = search_bd_buf(dig_bounds_dir_ptr,
+                                       sizeof(bounds_dir_buf),
+                                       offset_inside_bounds_dir,
+                                       &nr_populated_bdes);
+               total_entries += this_entries;
+       }
+       printf("mpx dig (%3d) complete, SUCCESS (%8d / %4d)\n", ++dig_nr,
+                       total_entries, nr_populated_bdes);
+       return total_entries + nr_populated_bdes;
+}
+
+#ifdef MPX_DIG_REMOTE
+int main(int argc, char **argv)
+{
+       int err;
+       char *c;
+       unsigned long bounds_dir_entry;
+       int pid;
+
+       printf("mpx-dig starting...\n");
+       err = sscanf(argv[1], "%d", &pid);
+       printf("parsing: '%s', err: %d\n", argv[1], err);
+       if (err != 1)
+               mpx_dig_abort();
+
+       err = sscanf(argv[2], "%lx", &bounds_dir_global);
+       printf("parsing: '%s': %d\n", argv[2], err);
+       if (err != 1)
+               mpx_dig_abort();
+
+       proc_pid_mem_fd = open_proc(pid, "mem");
+       if (proc_pid_mem_fd < 0)
+               mpx_dig_abort();
+
+       inspect_pid(pid);
+       return 0;
+}
+#endif
+
+long inspect_me(struct mpx_bounds_dir *bounds_dir)
+{
+       int pid = getpid();
+
+       pid_load_vaddrs(pid);
+       bounds_dir_global = (unsigned long)bounds_dir;
+       dprintf4("enter %s() bounds dir: %p\n", __func__, bounds_dir);
+       return inspect_pid(pid);
+}
diff --git a/tools/testing/selftests/x86/mpx-hw.h b/tools/testing/selftests/x86/mpx-hw.h
new file mode 100644 (file)
index 0000000..093c190
--- /dev/null
@@ -0,0 +1,123 @@
+#ifndef _MPX_HW_H
+#define _MPX_HW_H
+
+#include <assert.h>
+
+/* Describe the MPX Hardware Layout in here */
+
+#define NR_MPX_BOUNDS_REGISTERS 4
+
+#ifdef __i386__
+
+#define MPX_BOUNDS_TABLE_ENTRY_SIZE_BYTES      16 /* 4 * 32-bits */
+#define MPX_BOUNDS_TABLE_SIZE_BYTES            (1ULL << 14) /* 16k */
+#define MPX_BOUNDS_DIR_ENTRY_SIZE_BYTES                4
+#define MPX_BOUNDS_DIR_SIZE_BYTES              (1ULL << 22) /* 4MB */
+
+#define MPX_BOUNDS_TABLE_BOTTOM_BIT            2
+#define MPX_BOUNDS_TABLE_TOP_BIT               11
+#define MPX_BOUNDS_DIR_BOTTOM_BIT              12
+#define MPX_BOUNDS_DIR_TOP_BIT                 31
+
+#else
+
+/*
+ * Linear Address of "pointer" (LAp)
+ *   0 ->  2: ignored
+ *   3 -> 19: index in to bounds table
+ *  20 -> 47: index in to bounds directory
+ *  48 -> 63: ignored
+ */
+
+#define MPX_BOUNDS_TABLE_ENTRY_SIZE_BYTES      32
+#define MPX_BOUNDS_TABLE_SIZE_BYTES            (1ULL << 22) /* 4MB */
+#define MPX_BOUNDS_DIR_ENTRY_SIZE_BYTES                8
+#define MPX_BOUNDS_DIR_SIZE_BYTES              (1ULL << 31) /* 2GB */
+
+#define MPX_BOUNDS_TABLE_BOTTOM_BIT            3
+#define MPX_BOUNDS_TABLE_TOP_BIT               19
+#define MPX_BOUNDS_DIR_BOTTOM_BIT              20
+#define MPX_BOUNDS_DIR_TOP_BIT                 47
+
+#endif
+
+#define MPX_BOUNDS_DIR_NR_ENTRIES      \
+       (MPX_BOUNDS_DIR_SIZE_BYTES/MPX_BOUNDS_DIR_ENTRY_SIZE_BYTES)
+#define MPX_BOUNDS_TABLE_NR_ENTRIES    \
+       (MPX_BOUNDS_TABLE_SIZE_BYTES/MPX_BOUNDS_TABLE_ENTRY_SIZE_BYTES)
+
+#define MPX_BOUNDS_TABLE_ENTRY_VALID_BIT       0x1
+
+struct mpx_bd_entry {
+       union {
+               char x[MPX_BOUNDS_DIR_ENTRY_SIZE_BYTES];
+               void *contents[1];
+       };
+} __attribute__((packed));
+
+struct mpx_bt_entry {
+       union {
+               char x[MPX_BOUNDS_TABLE_ENTRY_SIZE_BYTES];
+               unsigned long contents[1];
+       };
+} __attribute__((packed));
+
+struct mpx_bounds_dir {
+       struct mpx_bd_entry entries[MPX_BOUNDS_DIR_NR_ENTRIES];
+} __attribute__((packed));
+
+struct mpx_bounds_table {
+       struct mpx_bt_entry entries[MPX_BOUNDS_TABLE_NR_ENTRIES];
+} __attribute__((packed));
+
+static inline unsigned long GET_BITS(unsigned long val, int bottombit, int topbit)
+{
+       int total_nr_bits = topbit - bottombit;
+       unsigned long mask = (1UL << total_nr_bits)-1;
+       return (val >> bottombit) & mask;
+}
+
+static inline unsigned long __vaddr_bounds_table_index(void *vaddr)
+{
+       return GET_BITS((unsigned long)vaddr, MPX_BOUNDS_TABLE_BOTTOM_BIT,
+                                             MPX_BOUNDS_TABLE_TOP_BIT);
+}
+
+static inline unsigned long __vaddr_bounds_directory_index(void *vaddr)
+{
+       return GET_BITS((unsigned long)vaddr, MPX_BOUNDS_DIR_BOTTOM_BIT,
+                                             MPX_BOUNDS_DIR_TOP_BIT);
+}
+
+static inline struct mpx_bd_entry *mpx_vaddr_to_bd_entry(void *vaddr,
+               struct mpx_bounds_dir *bounds_dir)
+{
+       unsigned long index = __vaddr_bounds_directory_index(vaddr);
+       return &bounds_dir->entries[index];
+}
+
+static inline int bd_entry_valid(struct mpx_bd_entry *bounds_dir_entry)
+{
+       unsigned long __bd_entry = (unsigned long)bounds_dir_entry->contents;
+       return (__bd_entry & MPX_BOUNDS_TABLE_ENTRY_VALID_BIT);
+}
+
+static inline struct mpx_bounds_table *
+__bd_entry_to_bounds_table(struct mpx_bd_entry *bounds_dir_entry)
+{
+       unsigned long __bd_entry = (unsigned long)bounds_dir_entry->contents;
+       assert(__bd_entry & MPX_BOUNDS_TABLE_ENTRY_VALID_BIT);
+       __bd_entry &= ~MPX_BOUNDS_TABLE_ENTRY_VALID_BIT;
+       return (struct mpx_bounds_table *)__bd_entry;
+}
+
+static inline struct mpx_bt_entry *
+mpx_vaddr_to_bt_entry(void *vaddr, struct mpx_bounds_dir *bounds_dir)
+{
+       struct mpx_bd_entry *bde = mpx_vaddr_to_bd_entry(vaddr, bounds_dir);
+       struct mpx_bounds_table *bt = __bd_entry_to_bounds_table(bde);
+       unsigned long index = __vaddr_bounds_table_index(vaddr);
+       return &bt->entries[index];
+}
+
+#endif /* _MPX_HW_H */
diff --git a/tools/testing/selftests/x86/mpx-mini-test.c b/tools/testing/selftests/x86/mpx-mini-test.c
new file mode 100644 (file)
index 0000000..616ee96
--- /dev/null
@@ -0,0 +1,1585 @@
+/*
+ * mpx-mini-test.c: routines to test Intel MPX (Memory Protection eXtentions)
+ *
+ * Written by:
+ * "Ren, Qiaowei" <qiaowei.ren@intel.com>
+ * "Wei, Gang" <gang.wei@intel.com>
+ * "Hansen, Dave" <dave.hansen@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2.
+ */
+
+/*
+ * 2014-12-05: Dave Hansen: fixed all of the compiler warnings, and made sure
+ *            it works on 32-bit.
+ */
+
+int inspect_every_this_many_mallocs = 100;
+int zap_all_every_this_many_mallocs = 1000;
+
+#define _GNU_SOURCE
+#define _LARGEFILE64_SOURCE
+
+#include <string.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <signal.h>
+#include <assert.h>
+#include <stdlib.h>
+#include <ucontext.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+#include "mpx-hw.h"
+#include "mpx-debug.h"
+#include "mpx-mm.h"
+
+#ifndef __always_inline
+#define __always_inline inline __attribute__((always_inline)
+#endif
+
+#ifndef TEST_DURATION_SECS
+#define TEST_DURATION_SECS 3
+#endif
+
+void write_int_to(char *prefix, char *file, int int_to_write)
+{
+       char buf[100];
+       int fd = open(file, O_RDWR);
+       int len;
+       int ret;
+
+       assert(fd >= 0);
+       len = snprintf(buf, sizeof(buf), "%s%d", prefix, int_to_write);
+       assert(len >= 0);
+       assert(len < sizeof(buf));
+       ret = write(fd, buf, len);
+       assert(ret == len);
+       ret = close(fd);
+       assert(!ret);
+}
+
+void write_pid_to(char *prefix, char *file)
+{
+       write_int_to(prefix, file, getpid());
+}
+
+void trace_me(void)
+{
+/* tracing events dir */
+#define TED "/sys/kernel/debug/tracing/events/"
+/*
+       write_pid_to("common_pid=", TED "signal/filter");
+       write_pid_to("common_pid=", TED "exceptions/filter");
+       write_int_to("", TED "signal/enable", 1);
+       write_int_to("", TED "exceptions/enable", 1);
+*/
+       write_pid_to("", "/sys/kernel/debug/tracing/set_ftrace_pid");
+       write_int_to("", "/sys/kernel/debug/tracing/trace", 0);
+}
+
+#define test_failed() __test_failed(__FILE__, __LINE__)
+static void __test_failed(char *f, int l)
+{
+       fprintf(stderr, "abort @ %s::%d\n", f, l);
+       abort();
+}
+
+/* Error Printf */
+#define eprintf(args...)       fprintf(stderr, args)
+
+#ifdef __i386__
+
+/* i386 directory size is 4MB */
+#define REG_IP_IDX     REG_EIP
+#define REX_PREFIX
+
+#define XSAVE_OFFSET_IN_FPMEM  sizeof(struct _libc_fpstate)
+
+/*
+ * __cpuid() is from the Linux Kernel:
+ */
+static inline void __cpuid(unsigned int *eax, unsigned int *ebx,
+               unsigned int *ecx, unsigned int *edx)
+{
+       /* ecx is often an input as well as an output. */
+       asm volatile(
+               "push %%ebx;"
+               "cpuid;"
+               "mov %%ebx, %1;"
+               "pop %%ebx"
+               : "=a" (*eax),
+                 "=g" (*ebx),
+                 "=c" (*ecx),
+                 "=d" (*edx)
+               : "0" (*eax), "2" (*ecx));
+}
+
+#else /* __i386__ */
+
+#define REG_IP_IDX     REG_RIP
+#define REX_PREFIX "0x48, "
+
+#define XSAVE_OFFSET_IN_FPMEM  0
+
+/*
+ * __cpuid() is from the Linux Kernel:
+ */
+static inline void __cpuid(unsigned int *eax, unsigned int *ebx,
+               unsigned int *ecx, unsigned int *edx)
+{
+       /* ecx is often an input as well as an output. */
+       asm volatile(
+               "cpuid;"
+               : "=a" (*eax),
+                 "=b" (*ebx),
+                 "=c" (*ecx),
+                 "=d" (*edx)
+               : "0" (*eax), "2" (*ecx));
+}
+
+#endif /* !__i386__ */
+
+struct xsave_hdr_struct {
+       uint64_t xstate_bv;
+       uint64_t reserved1[2];
+       uint64_t reserved2[5];
+} __attribute__((packed));
+
+struct bndregs_struct {
+       uint64_t bndregs[8];
+} __attribute__((packed));
+
+struct bndcsr_struct {
+       uint64_t cfg_reg_u;
+       uint64_t status_reg;
+} __attribute__((packed));
+
+struct xsave_struct {
+       uint8_t fpu_sse[512];
+       struct xsave_hdr_struct xsave_hdr;
+       uint8_t ymm[256];
+       uint8_t lwp[128];
+       struct bndregs_struct bndregs;
+       struct bndcsr_struct bndcsr;
+} __attribute__((packed));
+
+uint8_t __attribute__((__aligned__(64))) buffer[4096];
+struct xsave_struct *xsave_buf = (struct xsave_struct *)buffer;
+
+uint8_t __attribute__((__aligned__(64))) test_buffer[4096];
+struct xsave_struct *xsave_test_buf = (struct xsave_struct *)test_buffer;
+
+uint64_t num_bnd_chk;
+
+static __always_inline void xrstor_state(struct xsave_struct *fx, uint64_t mask)
+{
+       uint32_t lmask = mask;
+       uint32_t hmask = mask >> 32;
+
+       asm volatile(".byte " REX_PREFIX "0x0f,0xae,0x2f\n\t"
+                    : : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
+                    :   "memory");
+}
+
+static __always_inline void xsave_state_1(void *_fx, uint64_t mask)
+{
+       uint32_t lmask = mask;
+       uint32_t hmask = mask >> 32;
+       unsigned char *fx = _fx;
+
+       asm volatile(".byte " REX_PREFIX "0x0f,0xae,0x27\n\t"
+                    : : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
+                    :   "memory");
+}
+
+static inline uint64_t xgetbv(uint32_t index)
+{
+       uint32_t eax, edx;
+
+       asm volatile(".byte 0x0f,0x01,0xd0" /* xgetbv */
+                    : "=a" (eax), "=d" (edx)
+                    : "c" (index));
+       return eax + ((uint64_t)edx << 32);
+}
+
+static uint64_t read_mpx_status_sig(ucontext_t *uctxt)
+{
+       memset(buffer, 0, sizeof(buffer));
+       memcpy(buffer,
+               (uint8_t *)uctxt->uc_mcontext.fpregs + XSAVE_OFFSET_IN_FPMEM,
+               sizeof(struct xsave_struct));
+
+       return xsave_buf->bndcsr.status_reg;
+}
+
+#include <pthread.h>
+
+static uint8_t *get_next_inst_ip(uint8_t *addr)
+{
+       uint8_t *ip = addr;
+       uint8_t sib;
+       uint8_t rm;
+       uint8_t mod;
+       uint8_t base;
+       uint8_t modrm;
+
+       /* determine the prefix. */
+       switch(*ip) {
+       case 0xf2:
+       case 0xf3:
+       case 0x66:
+               ip++;
+               break;
+       }
+
+       /* look for rex prefix */
+       if ((*ip & 0x40) == 0x40)
+               ip++;
+
+       /* Make sure we have a MPX instruction. */
+       if (*ip++ != 0x0f)
+               return addr;
+
+       /* Skip the op code byte. */
+       ip++;
+
+       /* Get the modrm byte. */
+       modrm = *ip++;
+
+       /* Break it down into parts. */
+       rm = modrm & 7;
+       mod = (modrm >> 6);
+
+       /* Init the parts of the address mode. */
+       base = 8;
+
+       /* Is it a mem mode? */
+       if (mod != 3) {
+               /* look for scaled indexed addressing */
+               if (rm == 4) {
+                       /* SIB addressing */
+                       sib = *ip++;
+                       base = sib & 7;
+                       switch (mod) {
+                       case 0:
+                               if (base == 5)
+                                       ip += 4;
+                               break;
+
+                       case 1:
+                               ip++;
+                               break;
+
+                       case 2:
+                               ip += 4;
+                               break;
+                       }
+
+               } else {
+                       /* MODRM addressing */
+                       switch (mod) {
+                       case 0:
+                               /* DISP32 addressing, no base */
+                               if (rm == 5)
+                                       ip += 4;
+                               break;
+
+                       case 1:
+                               ip++;
+                               break;
+
+                       case 2:
+                               ip += 4;
+                               break;
+                       }
+               }
+       }
+       return ip;
+}
+
+#ifdef si_lower
+static inline void *__si_bounds_lower(siginfo_t *si)
+{
+       return si->si_lower;
+}
+
+static inline void *__si_bounds_upper(siginfo_t *si)
+{
+       return si->si_upper;
+}
+#else
+static inline void **__si_bounds_hack(siginfo_t *si)
+{
+       void *sigfault = &si->_sifields._sigfault;
+       void *end_sigfault = sigfault + sizeof(si->_sifields._sigfault);
+       void **__si_lower = end_sigfault;
+
+       return __si_lower;
+}
+
+static inline void *__si_bounds_lower(siginfo_t *si)
+{
+       return *__si_bounds_hack(si);
+}
+
+static inline void *__si_bounds_upper(siginfo_t *si)
+{
+       return (*__si_bounds_hack(si)) + sizeof(void *);
+}
+#endif
+
+static int br_count;
+static int expected_bnd_index = -1;
+uint64_t shadow_plb[NR_MPX_BOUNDS_REGISTERS][2]; /* shadow MPX bound registers */
+unsigned long shadow_map[NR_MPX_BOUNDS_REGISTERS];
+
+/*
+ * The kernel is supposed to provide some information about the bounds
+ * exception in the siginfo.  It should match what we have in the bounds
+ * registers that we are checking against.  Just check against the shadow copy
+ * since it is easily available, and we also check that *it* matches the real
+ * registers.
+ */
+void check_siginfo_vs_shadow(siginfo_t* si)
+{
+       int siginfo_ok = 1;
+       void *shadow_lower = (void *)(unsigned long)shadow_plb[expected_bnd_index][0];
+       void *shadow_upper = (void *)(unsigned long)shadow_plb[expected_bnd_index][1];
+
+       if ((expected_bnd_index < 0) ||
+           (expected_bnd_index >= NR_MPX_BOUNDS_REGISTERS)) {
+               fprintf(stderr, "ERROR: invalid expected_bnd_index: %d\n",
+                       expected_bnd_index);
+               exit(6);
+       }
+       if (__si_bounds_lower(si) != shadow_lower)
+               siginfo_ok = 0;
+       if (__si_bounds_upper(si) != shadow_upper)
+               siginfo_ok = 0;
+
+       if (!siginfo_ok) {
+               fprintf(stderr, "ERROR: siginfo bounds do not match "
+                       "shadow bounds for register %d\n", expected_bnd_index);
+               exit(7);
+       }
+}
+
+void handler(int signum, siginfo_t *si, void *vucontext)
+{
+       int i;
+       ucontext_t *uctxt = vucontext;
+       int trapno;
+       unsigned long ip;
+
+       dprintf1("entered signal handler\n");
+
+       trapno = uctxt->uc_mcontext.gregs[REG_TRAPNO];
+       ip = uctxt->uc_mcontext.gregs[REG_IP_IDX];
+
+       if (trapno == 5) {
+               typeof(si->si_addr) *si_addr_ptr = &si->si_addr;
+               uint64_t status = read_mpx_status_sig(uctxt);
+               uint64_t br_reason =  status & 0x3;
+
+               br_count++;
+               dprintf1("#BR 0x%jx (total seen: %d)\n", status, br_count);
+
+#define __SI_FAULT      (3 << 16)
+#define SEGV_BNDERR     (__SI_FAULT|3)  /* failed address bound checks */
+
+               dprintf2("Saw a #BR! status 0x%jx at %016lx br_reason: %jx\n",
+                               status, ip, br_reason);
+               dprintf2("si_signo: %d\n", si->si_signo);
+               dprintf2("  signum: %d\n", signum);
+               dprintf2("info->si_code == SEGV_BNDERR: %d\n",
+                               (si->si_code == SEGV_BNDERR));
+               dprintf2("info->si_code: %d\n", si->si_code);
+               dprintf2("info->si_lower: %p\n", __si_bounds_lower(si));
+               dprintf2("info->si_upper: %p\n", __si_bounds_upper(si));
+
+               check_siginfo_vs_shadow(si);
+
+               for (i = 0; i < 8; i++)
+                       dprintf3("[%d]: %p\n", i, si_addr_ptr[i]);
+               switch (br_reason) {
+               case 0: /* traditional BR */
+                       fprintf(stderr,
+                               "Undefined status with bound exception:%jx\n",
+                                status);
+                       exit(5);
+               case 1: /* #BR MPX bounds exception */
+                       /* these are normal and we expect to see them */
+                       dprintf1("bounds exception (normal): status 0x%jx at %p si_addr: %p\n",
+                               status, (void *)ip, si->si_addr);
+                       num_bnd_chk++;
+                       uctxt->uc_mcontext.gregs[REG_IP_IDX] =
+                               (greg_t)get_next_inst_ip((uint8_t *)ip);
+                       break;
+               case 2:
+                       fprintf(stderr, "#BR status == 2, missing bounds table,"
+                                       "kernel should have handled!!\n");
+                       exit(4);
+                       break;
+               default:
+                       fprintf(stderr, "bound check error: status 0x%jx at %p\n",
+                               status, (void *)ip);
+                       num_bnd_chk++;
+                       uctxt->uc_mcontext.gregs[REG_IP_IDX] =
+                               (greg_t)get_next_inst_ip((uint8_t *)ip);
+                       fprintf(stderr, "bound check error: si_addr %p\n", si->si_addr);
+                       exit(3);
+               }
+       } else if (trapno == 14) {
+               eprintf("ERROR: In signal handler, page fault, trapno = %d, ip = %016lx\n",
+                       trapno, ip);
+               eprintf("si_addr %p\n", si->si_addr);
+               eprintf("REG_ERR: %lx\n", (unsigned long)uctxt->uc_mcontext.gregs[REG_ERR]);
+               test_failed();
+       } else {
+               eprintf("unexpected trap %d! at 0x%lx\n", trapno, ip);
+               eprintf("si_addr %p\n", si->si_addr);
+               eprintf("REG_ERR: %lx\n", (unsigned long)uctxt->uc_mcontext.gregs[REG_ERR]);
+               test_failed();
+       }
+}
+
+static inline void cpuid_count(unsigned int op, int count,
+                              unsigned int *eax, unsigned int *ebx,
+                              unsigned int *ecx, unsigned int *edx)
+{
+       *eax = op;
+       *ecx = count;
+       __cpuid(eax, ebx, ecx, edx);
+}
+
+#define XSTATE_CPUID       0x0000000d
+
+/*
+ * List of XSAVE features Linux knows about:
+ */
+enum xfeature_bit {
+       XSTATE_BIT_FP,
+       XSTATE_BIT_SSE,
+       XSTATE_BIT_YMM,
+       XSTATE_BIT_BNDREGS,
+       XSTATE_BIT_BNDCSR,
+       XSTATE_BIT_OPMASK,
+       XSTATE_BIT_ZMM_Hi256,
+       XSTATE_BIT_Hi16_ZMM,
+
+       XFEATURES_NR_MAX,
+};
+
+#define XSTATE_FP             (1 << XSTATE_BIT_FP)
+#define XSTATE_SSE           (1 << XSTATE_BIT_SSE)
+#define XSTATE_YMM           (1 << XSTATE_BIT_YMM)
+#define XSTATE_BNDREGS   (1 << XSTATE_BIT_BNDREGS)
+#define XSTATE_BNDCSR     (1 << XSTATE_BIT_BNDCSR)
+#define XSTATE_OPMASK     (1 << XSTATE_BIT_OPMASK)
+#define XSTATE_ZMM_Hi256       (1 << XSTATE_BIT_ZMM_Hi256)
+#define XSTATE_Hi16_ZMM         (1 << XSTATE_BIT_Hi16_ZMM)
+
+#define MPX_XSTATES            (XSTATE_BNDREGS | XSTATE_BNDCSR) /* 0x18 */
+
+bool one_bit(unsigned int x, int bit)
+{
+       return !!(x & (1<<bit));
+}
+
+void print_state_component(int state_bit_nr, char *name)
+{
+       unsigned int eax, ebx, ecx, edx;
+       unsigned int state_component_size;
+       unsigned int state_component_supervisor;
+       unsigned int state_component_user;
+       unsigned int state_component_aligned;
+
+       /* See SDM Section 13.2 */
+       cpuid_count(XSTATE_CPUID, state_bit_nr, &eax, &ebx, &ecx, &edx);
+       assert(eax || ebx || ecx);
+       state_component_size = eax;
+       state_component_supervisor = ((!ebx) && one_bit(ecx, 0));
+       state_component_user = !one_bit(ecx, 0);
+       state_component_aligned = one_bit(ecx, 1);
+       printf("%8s: size: %d user: %d supervisor: %d aligned: %d\n",
+               name,
+               state_component_size,       state_component_user,
+               state_component_supervisor, state_component_aligned);
+
+}
+
+/* Intel-defined CPU features, CPUID level 0x00000001 (ecx) */
+#define XSAVE_FEATURE_BIT       (26)  /* XSAVE/XRSTOR/XSETBV/XGETBV */
+#define OSXSAVE_FEATURE_BIT     (27) /* XSAVE enabled in the OS */
+
+bool check_mpx_support(void)
+{
+       unsigned int eax, ebx, ecx, edx;
+
+       cpuid_count(1, 0, &eax, &ebx, &ecx, &edx);
+
+       /* We can't do much without XSAVE, so just make these assert()'s */
+       if (!one_bit(ecx, XSAVE_FEATURE_BIT)) {
+               fprintf(stderr, "processor lacks XSAVE, can not run MPX tests\n");
+               exit(0);
+       }
+
+       if (!one_bit(ecx, OSXSAVE_FEATURE_BIT)) {
+               fprintf(stderr, "processor lacks OSXSAVE, can not run MPX tests\n");
+               exit(0);
+       }
+
+       /* CPUs not supporting the XSTATE CPUID leaf do not support MPX */
+       /* Is this redundant with the feature bit checks? */
+       cpuid_count(0, 0, &eax, &ebx, &ecx, &edx);
+       if (eax < XSTATE_CPUID) {
+               fprintf(stderr, "processor lacks XSTATE CPUID leaf,"
+                               " can not run MPX tests\n");
+               exit(0);
+       }
+
+       printf("XSAVE is supported by HW & OS\n");
+
+       cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
+
+       printf("XSAVE processor supported state mask: 0x%x\n", eax);
+       printf("XSAVE OS supported state mask: 0x%jx\n", xgetbv(0));
+
+       /* Make sure that the MPX states are enabled in in XCR0 */
+       if ((eax & MPX_XSTATES) != MPX_XSTATES) {
+               fprintf(stderr, "processor lacks MPX XSTATE(s), can not run MPX tests\n");
+               exit(0);
+       }
+
+       /* Make sure the MPX states are supported by XSAVE* */
+       if ((xgetbv(0) & MPX_XSTATES) != MPX_XSTATES) {
+               fprintf(stderr, "MPX XSTATE(s) no enabled in XCR0, "
+                               "can not run MPX tests\n");
+               exit(0);
+       }
+
+       print_state_component(XSTATE_BIT_BNDREGS, "BNDREGS");
+       print_state_component(XSTATE_BIT_BNDCSR,  "BNDCSR");
+
+       return true;
+}
+
+void enable_mpx(void *l1base)
+{
+       /* enable point lookup */
+       memset(buffer, 0, sizeof(buffer));
+       xrstor_state(xsave_buf, 0x18);
+
+       xsave_buf->xsave_hdr.xstate_bv = 0x10;
+       xsave_buf->bndcsr.cfg_reg_u = (unsigned long)l1base | 1;
+       xsave_buf->bndcsr.status_reg = 0;
+
+       dprintf2("bf xrstor\n");
+       dprintf2("xsave cndcsr: status %jx, configu %jx\n",
+              xsave_buf->bndcsr.status_reg, xsave_buf->bndcsr.cfg_reg_u);
+       xrstor_state(xsave_buf, 0x18);
+       dprintf2("after xrstor\n");
+
+       xsave_state_1(xsave_buf, 0x18);
+
+       dprintf1("xsave bndcsr: status %jx, configu %jx\n",
+              xsave_buf->bndcsr.status_reg, xsave_buf->bndcsr.cfg_reg_u);
+}
+
+#include <sys/prctl.h>
+
+struct mpx_bounds_dir *bounds_dir_ptr;
+
+unsigned long __bd_incore(const char *func, int line)
+{
+       unsigned long ret = nr_incore(bounds_dir_ptr, MPX_BOUNDS_DIR_SIZE_BYTES);
+       return ret;
+}
+#define bd_incore() __bd_incore(__func__, __LINE__)
+
+void check_clear(void *ptr, unsigned long sz)
+{
+       unsigned long *i;
+
+       for (i = ptr; (void *)i < ptr + sz; i++) {
+               if (*i) {
+                       dprintf1("%p is NOT clear at %p\n", ptr, i);
+                       assert(0);
+               }
+       }
+       dprintf1("%p is clear for %lx\n", ptr, sz);
+}
+
+void check_clear_bd(void)
+{
+       check_clear(bounds_dir_ptr, 2UL << 30);
+}
+
+#define USE_MALLOC_FOR_BOUNDS_DIR 1
+bool process_specific_init(void)
+{
+       unsigned long size;
+       unsigned long *dir;
+       /* Guarantee we have the space to align it, add padding: */
+       unsigned long pad = getpagesize();
+
+       size = 2UL << 30; /* 2GB */
+       if (sizeof(unsigned long) == 4)
+               size = 4UL << 20; /* 4MB */
+       dprintf1("trying to allocate %ld MB bounds directory\n", (size >> 20));
+
+       if (USE_MALLOC_FOR_BOUNDS_DIR) {
+               unsigned long _dir;
+
+               dir = malloc(size + pad);
+               assert(dir);
+               _dir = (unsigned long)dir;
+               _dir += 0xfffUL;
+               _dir &= ~0xfffUL;
+               dir = (void *)_dir;
+       } else {
+               /*
+                * This makes debugging easier because the address
+                * calculations are simpler:
+                */
+               dir = mmap((void *)0x200000000000, size + pad,
+                               PROT_READ|PROT_WRITE,
+                               MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
+               if (dir == (void *)-1) {
+                       perror("unable to allocate bounds directory");
+                       abort();
+               }
+               check_clear(dir, size);
+       }
+       bounds_dir_ptr = (void *)dir;
+       madvise(bounds_dir_ptr, size, MADV_NOHUGEPAGE);
+       bd_incore();
+       dprintf1("bounds directory: 0x%p -> 0x%p\n", bounds_dir_ptr,
+                       (char *)bounds_dir_ptr + size);
+       check_clear(dir, size);
+       enable_mpx(dir);
+       check_clear(dir, size);
+       if (prctl(43, 0, 0, 0, 0)) {
+               printf("no MPX support\n");
+               abort();
+               return false;
+       }
+       return true;
+}
+
+bool process_specific_finish(void)
+{
+       if (prctl(44)) {
+               printf("no MPX support\n");
+               return false;
+       }
+       return true;
+}
+
+void setup_handler()
+{
+       int r, rs;
+       struct sigaction newact;
+       struct sigaction oldact;
+
+       /* #BR is mapped to sigsegv */
+       int signum  = SIGSEGV;
+
+       newact.sa_handler = 0;   /* void(*)(int)*/
+       newact.sa_sigaction = handler; /* void (*)(int, siginfo_t*, void *) */
+
+       /*sigset_t - signals to block while in the handler */
+       /* get the old signal mask. */
+       rs = sigprocmask(SIG_SETMASK, 0, &newact.sa_mask);
+       assert(rs == 0);
+
+       /* call sa_sigaction, not sa_handler*/
+       newact.sa_flags = SA_SIGINFO;
+
+       newact.sa_restorer = 0;  /* void(*)(), obsolete */
+       r = sigaction(signum, &newact, &oldact);
+       assert(r == 0);
+}
+
+void mpx_prepare(void)
+{
+       dprintf2("%s()\n", __func__);
+       setup_handler();
+       process_specific_init();
+}
+
+void mpx_cleanup(void)
+{
+       printf("%s(): %jd BRs. bye...\n", __func__, num_bnd_chk);
+       process_specific_finish();
+}
+
+/*-------------- the following is test case ---------------*/
+#include <stdint.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <time.h>
+
+uint64_t num_lower_brs;
+uint64_t num_upper_brs;
+
+#define MPX_CONFIG_OFFSET 1024
+#define MPX_BOUNDS_OFFSET 960
+#define MPX_HEADER_OFFSET 512
+#define MAX_ADDR_TESTED (1<<28)
+#define TEST_ROUNDS 100
+
+/*
+      0F 1A /r BNDLDX-Load
+      0F 1B /r BNDSTX-Store Extended Bounds Using Address Translation
+   66 0F 1A /r BNDMOV bnd1, bnd2/m128
+   66 0F 1B /r BNDMOV bnd1/m128, bnd2
+   F2 0F 1A /r BNDCU bnd, r/m64
+   F2 0F 1B /r BNDCN bnd, r/m64
+   F3 0F 1A /r BNDCL bnd, r/m64
+   F3 0F 1B /r BNDMK bnd, m64
+*/
+
+static __always_inline void xsave_state(void *_fx, uint64_t mask)
+{
+       uint32_t lmask = mask;
+       uint32_t hmask = mask >> 32;
+       unsigned char *fx = _fx;
+
+       asm volatile(".byte " REX_PREFIX "0x0f,0xae,0x27\n\t"
+                    : : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
+                    :   "memory");
+}
+
+static __always_inline void mpx_clear_bnd0(void)
+{
+       long size = 0;
+       void *ptr = NULL;
+       /* F3 0F 1B /r BNDMK bnd, m64                   */
+       /* f3 0f 1b 04 11    bndmk  (%rcx,%rdx,1),%bnd0 */
+       asm volatile(".byte 0xf3,0x0f,0x1b,0x04,0x11\n\t"
+                    : : "c" (ptr), "d" (size-1)
+                    :   "memory");
+}
+
+static __always_inline void mpx_make_bound_helper(unsigned long ptr,
+               unsigned long size)
+{
+       /* F3 0F 1B /r          BNDMK bnd, m64                  */
+       /* f3 0f 1b 04 11       bndmk  (%rcx,%rdx,1),%bnd0      */
+       asm volatile(".byte 0xf3,0x0f,0x1b,0x04,0x11\n\t"
+                    : : "c" (ptr), "d" (size-1)
+                    :   "memory");
+}
+
+static __always_inline void mpx_check_lowerbound_helper(unsigned long ptr)
+{
+       /* F3 0F 1A /r  NDCL bnd, r/m64                 */
+       /* f3 0f 1a 01  bndcl  (%rcx),%bnd0             */
+       asm volatile(".byte 0xf3,0x0f,0x1a,0x01\n\t"
+                    : : "c" (ptr)
+                    :   "memory");
+}
+
+static __always_inline void mpx_check_upperbound_helper(unsigned long ptr)
+{
+       /* F2 0F 1A /r  BNDCU bnd, r/m64        */
+       /* f2 0f 1a 01  bndcu  (%rcx),%bnd0     */
+       asm volatile(".byte 0xf2,0x0f,0x1a,0x01\n\t"
+                    : : "c" (ptr)
+                    :   "memory");
+}
+
+static __always_inline void mpx_movbndreg_helper()
+{
+       /* 66 0F 1B /r  BNDMOV bnd1/m128, bnd2  */
+       /* 66 0f 1b c2  bndmov %bnd0,%bnd2      */
+
+       asm volatile(".byte 0x66,0x0f,0x1b,0xc2\n\t");
+}
+
+static __always_inline void mpx_movbnd2mem_helper(uint8_t *mem)
+{
+       /* 66 0F 1B /r  BNDMOV bnd1/m128, bnd2  */
+       /* 66 0f 1b 01  bndmov %bnd0,(%rcx)     */
+       asm volatile(".byte 0x66,0x0f,0x1b,0x01\n\t"
+                    : : "c" (mem)
+                    :   "memory");
+}
+
+static __always_inline void mpx_movbnd_from_mem_helper(uint8_t *mem)
+{
+       /* 66 0F 1A /r  BNDMOV bnd1, bnd2/m128  */
+       /* 66 0f 1a 01  bndmov (%rcx),%bnd0     */
+       asm volatile(".byte 0x66,0x0f,0x1a,0x01\n\t"
+                    : : "c" (mem)
+                    :   "memory");
+}
+
+static __always_inline void mpx_store_dsc_helper(unsigned long ptr_addr,
+               unsigned long ptr_val)
+{
+       /* 0F 1B /r     BNDSTX-Store Extended Bounds Using Address Translation  */
+       /* 0f 1b 04 11  bndstx %bnd0,(%rcx,%rdx,1)                              */
+       asm volatile(".byte 0x0f,0x1b,0x04,0x11\n\t"
+                    : : "c" (ptr_addr), "d" (ptr_val)
+                    :   "memory");
+}
+
+static __always_inline void mpx_load_dsc_helper(unsigned long ptr_addr,
+               unsigned long ptr_val)
+{
+       /* 0F 1A /r     BNDLDX-Load                     */
+       /*/ 0f 1a 04 11 bndldx (%rcx,%rdx,1),%bnd0      */
+       asm volatile(".byte 0x0f,0x1a,0x04,0x11\n\t"
+                    : : "c" (ptr_addr), "d" (ptr_val)
+                    :   "memory");
+}
+
+void __print_context(void *__print_xsave_buffer, int line)
+{
+       uint64_t *bounds = (uint64_t *)(__print_xsave_buffer + MPX_BOUNDS_OFFSET);
+       uint64_t *cfg    = (uint64_t *)(__print_xsave_buffer + MPX_CONFIG_OFFSET);
+
+       int i;
+       eprintf("%s()::%d\n", "print_context", line);
+       for (i = 0; i < 4; i++) {
+               eprintf("bound[%d]: 0x%016lx 0x%016lx(0x%016lx)\n", i,
+                      (unsigned long)bounds[i*2],
+                      ~(unsigned long)bounds[i*2+1],
+                       (unsigned long)bounds[i*2+1]);
+       }
+
+       eprintf("cpcfg: %jx  cpstatus: %jx\n", cfg[0], cfg[1]);
+}
+#define print_context(x) __print_context(x, __LINE__)
+#ifdef DEBUG
+#define dprint_context(x) print_context(x)
+#else
+#define dprint_context(x) do{}while(0)
+#endif
+
+void init()
+{
+       int i;
+
+       srand((unsigned int)time(NULL));
+
+       for (i = 0; i < 4; i++) {
+               shadow_plb[i][0] = 0;
+               shadow_plb[i][1] = ~(unsigned long)0;
+       }
+}
+
+long int __mpx_random(int line)
+{
+#ifdef NOT_SO_RANDOM
+       static long fake = 722122311;
+       fake += 563792075;
+       return fakse;
+#else
+       return random();
+#endif
+}
+#define mpx_random() __mpx_random(__LINE__)
+
+uint8_t *get_random_addr()
+{
+       uint8_t*addr = (uint8_t *)(unsigned long)(rand() % MAX_ADDR_TESTED);
+       return (addr - (unsigned long)addr % sizeof(uint8_t *));
+}
+
+static inline bool compare_context(void *__xsave_buffer)
+{
+       uint64_t *bounds = (uint64_t *)(__xsave_buffer + MPX_BOUNDS_OFFSET);
+
+       int i;
+       for (i = 0; i < 4; i++) {
+               dprintf3("shadow[%d]{%016lx/%016lx}\nbounds[%d]{%016lx/%016lx}\n",
+                      i, (unsigned long)shadow_plb[i][0], (unsigned long)shadow_plb[i][1],
+                      i, (unsigned long)bounds[i*2],     ~(unsigned long)bounds[i*2+1]);
+               if ((shadow_plb[i][0] != bounds[i*2]) ||
+                   (shadow_plb[i][1] != ~(unsigned long)bounds[i*2+1])) {
+                       eprintf("ERROR comparing shadow to real bound register %d\n", i);
+                       eprintf("shadow{0x%016lx/0x%016lx}\nbounds{0x%016lx/0x%016lx}\n",
+                              (unsigned long)shadow_plb[i][0], (unsigned long)shadow_plb[i][1],
+                              (unsigned long)bounds[i*2], (unsigned long)bounds[i*2+1]);
+                       return false;
+               }
+       }
+
+       return true;
+}
+
+void mkbnd_shadow(uint8_t *ptr, int index, long offset)
+{
+       uint64_t *lower = (uint64_t *)&(shadow_plb[index][0]);
+       uint64_t *upper = (uint64_t *)&(shadow_plb[index][1]);
+       *lower = (unsigned long)ptr;
+       *upper = (unsigned long)ptr + offset - 1;
+}
+
+void check_lowerbound_shadow(uint8_t *ptr, int index)
+{
+       uint64_t *lower = (uint64_t *)&(shadow_plb[index][0]);
+       if (*lower > (uint64_t)(unsigned long)ptr)
+               num_lower_brs++;
+       else
+               dprintf1("LowerBoundChk passed:%p\n", ptr);
+}
+
+void check_upperbound_shadow(uint8_t *ptr, int index)
+{
+       uint64_t upper = *(uint64_t *)&(shadow_plb[index][1]);
+       if (upper < (uint64_t)(unsigned long)ptr)
+               num_upper_brs++;
+       else
+               dprintf1("UpperBoundChk passed:%p\n", ptr);
+}
+
+__always_inline void movbndreg_shadow(int src, int dest)
+{
+       shadow_plb[dest][0] = shadow_plb[src][0];
+       shadow_plb[dest][1] = shadow_plb[src][1];
+}
+
+__always_inline void movbnd2mem_shadow(int src, unsigned long *dest)
+{
+       unsigned long *lower = (unsigned long *)&(shadow_plb[src][0]);
+       unsigned long *upper = (unsigned long *)&(shadow_plb[src][1]);
+       *dest = *lower;
+       *(dest+1) = *upper;
+}
+
+__always_inline void movbnd_from_mem_shadow(unsigned long *src, int dest)
+{
+       unsigned long *lower = (unsigned long *)&(shadow_plb[dest][0]);
+       unsigned long *upper = (unsigned long *)&(shadow_plb[dest][1]);
+       *lower = *src;
+       *upper = *(src+1);
+}
+
+__always_inline void stdsc_shadow(int index, uint8_t *ptr, uint8_t *ptr_val)
+{
+       shadow_map[0] = (unsigned long)shadow_plb[index][0];
+       shadow_map[1] = (unsigned long)shadow_plb[index][1];
+       shadow_map[2] = (unsigned long)ptr_val;
+       dprintf3("%s(%d, %p, %p) set shadow map[2]: %p\n", __func__,
+                       index, ptr, ptr_val, ptr_val);
+       /*ptr ignored */
+}
+
+void lddsc_shadow(int index, uint8_t *ptr, uint8_t *ptr_val)
+{
+       uint64_t lower = shadow_map[0];
+       uint64_t upper = shadow_map[1];
+       uint8_t *value = (uint8_t *)shadow_map[2];
+
+       if (value != ptr_val) {
+               dprintf2("%s(%d, %p, %p) init shadow bounds[%d] "
+                        "because %p != %p\n", __func__, index, ptr,
+                        ptr_val, index, value, ptr_val);
+               shadow_plb[index][0] = 0;
+               shadow_plb[index][1] = ~(unsigned long)0;
+       } else {
+               shadow_plb[index][0] = lower;
+               shadow_plb[index][1] = upper;
+       }
+       /* ptr ignored */
+}
+
+static __always_inline void mpx_test_helper0(uint8_t *buf, uint8_t *ptr)
+{
+       mpx_make_bound_helper((unsigned long)ptr, 0x1800);
+}
+
+static __always_inline void mpx_test_helper0_shadow(uint8_t *buf, uint8_t *ptr)
+{
+       mkbnd_shadow(ptr, 0, 0x1800);
+}
+
+static __always_inline void mpx_test_helper1(uint8_t *buf, uint8_t *ptr)
+{
+       /* these are hard-coded to check bnd0 */
+       expected_bnd_index = 0;
+       mpx_check_lowerbound_helper((unsigned long)(ptr-1));
+       mpx_check_upperbound_helper((unsigned long)(ptr+0x1800));
+       /* reset this since we do not expect any more bounds exceptions */
+       expected_bnd_index = -1;
+}
+
+static __always_inline void mpx_test_helper1_shadow(uint8_t *buf, uint8_t *ptr)
+{
+       check_lowerbound_shadow(ptr-1, 0);
+       check_upperbound_shadow(ptr+0x1800, 0);
+}
+
+static __always_inline void mpx_test_helper2(uint8_t *buf, uint8_t *ptr)
+{
+       mpx_make_bound_helper((unsigned long)ptr, 0x1800);
+       mpx_movbndreg_helper();
+       mpx_movbnd2mem_helper(buf);
+       mpx_make_bound_helper((unsigned long)(ptr+0x12), 0x1800);
+}
+
+static __always_inline void mpx_test_helper2_shadow(uint8_t *buf, uint8_t *ptr)
+{
+       mkbnd_shadow(ptr, 0, 0x1800);
+       movbndreg_shadow(0, 2);
+       movbnd2mem_shadow(0, (unsigned long *)buf);
+       mkbnd_shadow(ptr+0x12, 0, 0x1800);
+}
+
+static __always_inline void mpx_test_helper3(uint8_t *buf, uint8_t *ptr)
+{
+       mpx_movbnd_from_mem_helper(buf);
+}
+
+static __always_inline void mpx_test_helper3_shadow(uint8_t *buf, uint8_t *ptr)
+{
+       movbnd_from_mem_shadow((unsigned long *)buf, 0);
+}
+
+static __always_inline void mpx_test_helper4(uint8_t *buf, uint8_t *ptr)
+{
+       mpx_store_dsc_helper((unsigned long)buf, (unsigned long)ptr);
+       mpx_make_bound_helper((unsigned long)(ptr+0x12), 0x1800);
+}
+
+static __always_inline void mpx_test_helper4_shadow(uint8_t *buf, uint8_t *ptr)
+{
+       stdsc_shadow(0, buf, ptr);
+       mkbnd_shadow(ptr+0x12, 0, 0x1800);
+}
+
+static __always_inline void mpx_test_helper5(uint8_t *buf, uint8_t *ptr)
+{
+       mpx_load_dsc_helper((unsigned long)buf, (unsigned long)ptr);
+}
+
+static __always_inline void mpx_test_helper5_shadow(uint8_t *buf, uint8_t *ptr)
+{
+       lddsc_shadow(0, buf, ptr);
+}
+
+#define NR_MPX_TEST_FUNCTIONS 6
+
+/*
+ * For compatibility reasons, MPX will clear the bounds registers
+ * when you make function calls (among other things).  We have to
+ * preserve the registers in between calls to the "helpers" since
+ * they build on each other.
+ *
+ * Be very careful not to make any function calls inside the
+ * helpers, or anywhere else beween the xrstor and xsave.
+ */
+#define run_helper(helper_nr, buf, buf_shadow, ptr)    do {    \
+       xrstor_state(xsave_test_buf, flags);                    \
+       mpx_test_helper##helper_nr(buf, ptr);                   \
+       xsave_state(xsave_test_buf, flags);                     \
+       mpx_test_helper##helper_nr##_shadow(buf_shadow, ptr);   \
+} while (0)
+
+static void run_helpers(int nr, uint8_t *buf, uint8_t *buf_shadow, uint8_t *ptr)
+{
+       uint64_t flags = 0x18;
+
+       dprint_context(xsave_test_buf);
+       switch (nr) {
+       case 0:
+               run_helper(0, buf, buf_shadow, ptr);
+               break;
+       case 1:
+               run_helper(1, buf, buf_shadow, ptr);
+               break;
+       case 2:
+               run_helper(2, buf, buf_shadow, ptr);
+               break;
+       case 3:
+               run_helper(3, buf, buf_shadow, ptr);
+               break;
+       case 4:
+               run_helper(4, buf, buf_shadow, ptr);
+               break;
+       case 5:
+               run_helper(5, buf, buf_shadow, ptr);
+               break;
+       default:
+               test_failed();
+               break;
+       }
+       dprint_context(xsave_test_buf);
+}
+
+unsigned long buf_shadow[1024]; /* used to check load / store descriptors */
+extern long inspect_me(struct mpx_bounds_dir *bounds_dir);
+
+long cover_buf_with_bt_entries(void *buf, long buf_len)
+{
+       int i;
+       long nr_to_fill;
+       int ratio = 1000;
+       unsigned long buf_len_in_ptrs;
+
+       /* Fill about 1/100 of the space with bt entries */
+       nr_to_fill = buf_len / (sizeof(unsigned long) * ratio);
+
+       if (!nr_to_fill)
+               dprintf3("%s() nr_to_fill: %ld\n", __func__, nr_to_fill);
+
+       /* Align the buffer to pointer size */
+       while (((unsigned long)buf) % sizeof(void *)) {
+               buf++;
+               buf_len--;
+       }
+       /* We are storing pointers, so make */
+       buf_len_in_ptrs = buf_len / sizeof(void *);
+
+       for (i = 0; i < nr_to_fill; i++) {
+               long index = (mpx_random() % buf_len_in_ptrs);
+               void *ptr = buf + index * sizeof(unsigned long);
+               unsigned long ptr_addr = (unsigned long)ptr;
+
+               /* ptr and size can be anything */
+               mpx_make_bound_helper((unsigned long)ptr, 8);
+
+               /*
+                * take bnd0 and put it in to bounds tables "buf + index" is an
+                * address inside the buffer where we are pretending that we
+                * are going to put a pointer We do not, though because we will
+                * never load entries from the table, so it doesn't matter.
+                */
+               mpx_store_dsc_helper(ptr_addr, (unsigned long)ptr);
+               dprintf4("storing bound table entry for %lx (buf start @ %p)\n",
+                               ptr_addr, buf);
+       }
+       return nr_to_fill;
+}
+
+unsigned long align_down(unsigned long alignme, unsigned long align_to)
+{
+       return alignme & ~(align_to-1);
+}
+
+unsigned long align_up(unsigned long alignme, unsigned long align_to)
+{
+       return (alignme + align_to - 1) & ~(align_to-1);
+}
+
+/*
+ * Using 1MB alignment guarantees that each no allocation
+ * will overlap with another's bounds tables.
+ *
+ * We have to cook our own allocator here.  malloc() can
+ * mix other allocation with ours which means that even
+ * if we free all of our allocations, there might still
+ * be bounds tables for the *areas* since there is other
+ * valid memory there.
+ *
+ * We also can't use malloc() because a free() of an area
+ * might not free it back to the kernel.  We want it
+ * completely unmapped an malloc() does not guarantee
+ * that.
+ */
+#ifdef __i386__
+long alignment = 4096;
+long sz_alignment = 4096;
+#else
+long alignment = 1 * MB;
+long sz_alignment = 1 * MB;
+#endif
+void *mpx_mini_alloc(unsigned long sz)
+{
+       unsigned long long tries = 0;
+       static void *last;
+       void *ptr;
+       void *try_at;
+
+       sz = align_up(sz, sz_alignment);
+
+       try_at = last + alignment;
+       while (1) {
+               ptr = mmap(try_at, sz, PROT_READ|PROT_WRITE,
+                               MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
+               if (ptr == (void *)-1)
+                       return NULL;
+               if (ptr == try_at)
+                       break;
+
+               munmap(ptr, sz);
+               try_at += alignment;
+#ifdef __i386__
+               /*
+                * This isn't quite correct for 32-bit binaries
+                * on 64-bit kernels since they can use the
+                * entire 32-bit address space, but it's close
+                * enough.
+                */
+               if (try_at > (void *)0xC0000000)
+#else
+               if (try_at > (void *)0x0000800000000000)
+#endif
+                       try_at = (void *)0x0;
+               if (!(++tries % 10000))
+                       dprintf1("stuck in %s(), tries: %lld\n", __func__, tries);
+               continue;
+       }
+       last = ptr;
+       dprintf3("mpx_mini_alloc(0x%lx) returning: %p\n", sz, ptr);
+       return ptr;
+}
+void mpx_mini_free(void *ptr, long sz)
+{
+       dprintf2("%s() ptr: %p\n", __func__, ptr);
+       if ((unsigned long)ptr > 0x100000000000) {
+               dprintf1("uh oh !!!!!!!!!!!!!!! pointer too high: %p\n", ptr);
+               test_failed();
+       }
+       sz = align_up(sz, sz_alignment);
+       dprintf3("%s() ptr: %p before munmap\n", __func__, ptr);
+       munmap(ptr, sz);
+       dprintf3("%s() ptr: %p DONE\n", __func__, ptr);
+}
+
+#define NR_MALLOCS 100
+struct one_malloc {
+       char *ptr;
+       int nr_filled_btes;
+       unsigned long size;
+};
+struct one_malloc mallocs[NR_MALLOCS];
+
+void free_one_malloc(int index)
+{
+       unsigned long free_ptr;
+       unsigned long mask;
+
+       if (!mallocs[index].ptr)
+               return;
+
+       mpx_mini_free(mallocs[index].ptr, mallocs[index].size);
+       dprintf4("freed[%d]:  %p\n", index, mallocs[index].ptr);
+
+       free_ptr = (unsigned long)mallocs[index].ptr;
+       mask = alignment-1;
+       dprintf4("lowerbits: %lx / %lx mask: %lx\n", free_ptr,
+                       (free_ptr & mask), mask);
+       assert((free_ptr & mask) == 0);
+
+       mallocs[index].ptr = NULL;
+}
+
+#ifdef __i386__
+#define MPX_BOUNDS_TABLE_COVERS 4096
+#else
+#define MPX_BOUNDS_TABLE_COVERS (1 * MB)
+#endif
+void zap_everything(void)
+{
+       long after_zap;
+       long before_zap;
+       int i;
+
+       before_zap = inspect_me(bounds_dir_ptr);
+       dprintf1("zapping everything start: %ld\n", before_zap);
+       for (i = 0; i < NR_MALLOCS; i++)
+               free_one_malloc(i);
+
+       after_zap = inspect_me(bounds_dir_ptr);
+       dprintf1("zapping everything done: %ld\n", after_zap);
+       /*
+        * We only guarantee to empty the thing out if our allocations are
+        * exactly aligned on the boundaries of a boudns table.
+        */
+       if ((alignment >= MPX_BOUNDS_TABLE_COVERS) &&
+           (sz_alignment >= MPX_BOUNDS_TABLE_COVERS)) {
+               if (after_zap != 0)
+                       test_failed();
+
+               assert(after_zap == 0);
+       }
+}
+
+void do_one_malloc(void)
+{
+       static int malloc_counter;
+       long sz;
+       int rand_index = (mpx_random() % NR_MALLOCS);
+       void *ptr = mallocs[rand_index].ptr;
+
+       dprintf3("%s() enter\n", __func__);
+
+       if (ptr) {
+               dprintf3("freeing one malloc at index: %d\n", rand_index);
+               free_one_malloc(rand_index);
+               if (mpx_random() % (NR_MALLOCS*3) == 3) {
+                       int i;
+                       dprintf3("zapping some more\n");
+                       for (i = rand_index; i < NR_MALLOCS; i++)
+                               free_one_malloc(i);
+               }
+               if ((mpx_random() % zap_all_every_this_many_mallocs) == 4)
+                       zap_everything();
+       }
+
+       /* 1->~1M */
+       sz = (1 + mpx_random() % 1000) * 1000;
+       ptr = mpx_mini_alloc(sz);
+       if (!ptr) {
+               /*
+                * If we are failing allocations, just assume we
+                * are out of memory and zap everything.
+                */
+               dprintf3("zapping everything because out of memory\n");
+               zap_everything();
+               goto out;
+       }
+
+       dprintf3("malloc: %p size: 0x%lx\n", ptr, sz);
+       mallocs[rand_index].nr_filled_btes = cover_buf_with_bt_entries(ptr, sz);
+       mallocs[rand_index].ptr = ptr;
+       mallocs[rand_index].size = sz;
+out:
+       if ((++malloc_counter) % inspect_every_this_many_mallocs == 0)
+               inspect_me(bounds_dir_ptr);
+}
+
+void run_timed_test(void (*test_func)(void))
+{
+       int done = 0;
+       long iteration = 0;
+       static time_t last_print;
+       time_t now;
+       time_t start;
+
+       time(&start);
+       while (!done) {
+               time(&now);
+               if ((now - start) > TEST_DURATION_SECS)
+                       done = 1;
+
+               test_func();
+               iteration++;
+
+               if ((now - last_print > 1) || done) {
+                       printf("iteration %ld complete, OK so far\n", iteration);
+                       last_print = now;
+               }
+       }
+}
+
+void check_bounds_table_frees(void)
+{
+       printf("executing unmaptest\n");
+       inspect_me(bounds_dir_ptr);
+       run_timed_test(&do_one_malloc);
+       printf("done with malloc() fun\n");
+}
+
+void insn_test_failed(int test_nr, int test_round, void *buf,
+               void *buf_shadow, void *ptr)
+{
+       print_context(xsave_test_buf);
+       eprintf("ERROR: test %d round %d failed\n", test_nr, test_round);
+       while (test_nr == 5) {
+               struct mpx_bt_entry *bte;
+               struct mpx_bounds_dir *bd = (void *)bounds_dir_ptr;
+               struct mpx_bd_entry *bde = mpx_vaddr_to_bd_entry(buf, bd);
+
+               printf("  bd: %p\n", bd);
+               printf("&bde: %p\n", bde);
+               printf("*bde: %lx\n", *(unsigned long *)bde);
+               if (!bd_entry_valid(bde))
+                       break;
+
+               bte = mpx_vaddr_to_bt_entry(buf, bd);
+               printf(" te: %p\n", bte);
+               printf("bte[0]: %lx\n", bte->contents[0]);
+               printf("bte[1]: %lx\n", bte->contents[1]);
+               printf("bte[2]: %lx\n", bte->contents[2]);
+               printf("bte[3]: %lx\n", bte->contents[3]);
+               break;
+       }
+       test_failed();
+}
+
+void check_mpx_insns_and_tables(void)
+{
+       int successes = 0;
+       int failures  = 0;
+       int buf_size = (1024*1024);
+       unsigned long *buf = malloc(buf_size);
+       const int total_nr_tests = NR_MPX_TEST_FUNCTIONS * TEST_ROUNDS;
+       int i, j;
+
+       memset(buf, 0, buf_size);
+       memset(buf_shadow, 0, sizeof(buf_shadow));
+
+       for (i = 0; i < TEST_ROUNDS; i++) {
+               uint8_t *ptr = get_random_addr() + 8;
+
+               for (j = 0; j < NR_MPX_TEST_FUNCTIONS; j++) {
+                       if (0 && j != 5) {
+                               successes++;
+                               continue;
+                       }
+                       dprintf2("starting test %d round %d\n", j, i);
+                       dprint_context(xsave_test_buf);
+                       /*
+                        * test5 loads an address from the bounds tables.
+                        * The load will only complete if 'ptr' matches
+                        * the load and the store, so with random addrs,
+                        * the odds of this are very small.  Make it
+                        * higher by only moving 'ptr' 1/10 times.
+                        */
+                       if (random() % 10 <= 0)
+                               ptr = get_random_addr() + 8;
+                       dprintf3("random ptr{%p}\n", ptr);
+                       dprint_context(xsave_test_buf);
+                       run_helpers(j, (void *)buf, (void *)buf_shadow, ptr);
+                       dprint_context(xsave_test_buf);
+                       if (!compare_context(xsave_test_buf)) {
+                               insn_test_failed(j, i, buf, buf_shadow, ptr);
+                               failures++;
+                               goto exit;
+                       }
+                       successes++;
+                       dprint_context(xsave_test_buf);
+                       dprintf2("finished test %d round %d\n", j, i);
+                       dprintf3("\n");
+                       dprint_context(xsave_test_buf);
+               }
+       }
+
+exit:
+       dprintf2("\nabout to free:\n");
+       free(buf);
+       dprintf1("successes: %d\n", successes);
+       dprintf1(" failures: %d\n", failures);
+       dprintf1("    tests: %d\n", total_nr_tests);
+       dprintf1(" expected: %jd #BRs\n", num_upper_brs + num_lower_brs);
+       dprintf1("      saw: %d #BRs\n", br_count);
+       if (failures) {
+               eprintf("ERROR: non-zero number of failures\n");
+               exit(20);
+       }
+       if (successes != total_nr_tests) {
+               eprintf("ERROR: succeded fewer than number of tries (%d != %d)\n",
+                               successes, total_nr_tests);
+               exit(21);
+       }
+       if (num_upper_brs + num_lower_brs != br_count) {
+               eprintf("ERROR: unexpected number of #BRs: %jd %jd %d\n",
+                               num_upper_brs, num_lower_brs, br_count);
+               eprintf("successes: %d\n", successes);
+               eprintf(" failures: %d\n", failures);
+               eprintf("    tests: %d\n", total_nr_tests);
+               eprintf(" expected: %jd #BRs\n", num_upper_brs + num_lower_brs);
+               eprintf("      saw: %d #BRs\n", br_count);
+               exit(22);
+       }
+}
+
+/*
+ * This is supposed to SIGSEGV nicely once the kernel
+ * can no longer allocate vaddr space.
+ */
+void exhaust_vaddr_space(void)
+{
+       unsigned long ptr;
+       /* Try to make sure there is no room for a bounds table anywhere */
+       unsigned long skip = MPX_BOUNDS_TABLE_SIZE_BYTES - PAGE_SIZE;
+#ifdef __i386__
+       unsigned long max_vaddr = 0xf7788000UL;
+#else
+       unsigned long max_vaddr = 0x800000000000UL;
+#endif
+
+       dprintf1("%s() start\n", __func__);
+       /* do not start at 0, we aren't allowed to map there */
+       for (ptr = PAGE_SIZE; ptr < max_vaddr; ptr += skip) {
+               void *ptr_ret;
+               int ret = madvise((void *)ptr, PAGE_SIZE, MADV_NORMAL);
+
+               if (!ret) {
+                       dprintf1("madvise() %lx ret: %d\n", ptr, ret);
+                       continue;
+               }
+               ptr_ret = mmap((void *)ptr, PAGE_SIZE, PROT_READ|PROT_WRITE,
+                               MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
+               if (ptr_ret != (void *)ptr) {
+                       perror("mmap");
+                       dprintf1("mmap(%lx) ret: %p\n", ptr, ptr_ret);
+                       break;
+               }
+               if (!(ptr & 0xffffff))
+                       dprintf1("mmap(%lx) ret: %p\n", ptr, ptr_ret);
+       }
+       for (ptr = PAGE_SIZE; ptr < max_vaddr; ptr += skip) {
+               dprintf2("covering 0x%lx with bounds table entries\n", ptr);
+               cover_buf_with_bt_entries((void *)ptr, PAGE_SIZE);
+       }
+       dprintf1("%s() end\n", __func__);
+       printf("done with vaddr space fun\n");
+}
+
+void mpx_table_test(void)
+{
+       printf("starting mpx bounds table test\n");
+       run_timed_test(check_mpx_insns_and_tables);
+       printf("done with mpx bounds table test\n");
+}
+
+int main(int argc, char **argv)
+{
+       int unmaptest = 0;
+       int vaddrexhaust = 0;
+       int tabletest = 0;
+       int i;
+
+       check_mpx_support();
+       mpx_prepare();
+       srandom(11179);
+
+       bd_incore();
+       init();
+       bd_incore();
+
+       trace_me();
+
+       xsave_state((void *)xsave_test_buf, 0x1f);
+       if (!compare_context(xsave_test_buf))
+               printf("Init failed\n");
+
+       for (i = 1; i < argc; i++) {
+               if (!strcmp(argv[i], "unmaptest"))
+                       unmaptest = 1;
+               if (!strcmp(argv[i], "vaddrexhaust"))
+                       vaddrexhaust = 1;
+               if (!strcmp(argv[i], "tabletest"))
+                       tabletest = 1;
+       }
+       if (!(unmaptest || vaddrexhaust || tabletest)) {
+               unmaptest = 1;
+               /* vaddrexhaust = 1; */
+               tabletest = 1;
+       }
+       if (unmaptest)
+               check_bounds_table_frees();
+       if (tabletest)
+               mpx_table_test();
+       if (vaddrexhaust)
+               exhaust_vaddr_space();
+       printf("%s completed successfully\n", argv[0]);
+       exit(0);
+}
+
+#include "mpx-dig.c"
diff --git a/tools/testing/selftests/x86/mpx-mm.h b/tools/testing/selftests/x86/mpx-mm.h
new file mode 100644 (file)
index 0000000..af706a5
--- /dev/null
@@ -0,0 +1,9 @@
+#ifndef _MPX_MM_H
+#define _MPX_MM_H
+
+#define PAGE_SIZE 4096
+#define MB (1UL<<20)
+
+extern long nr_incore(void *ptr, unsigned long size_bytes);
+
+#endif /* _MPX_MM_H */