Merge branch 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git...
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 6 Jan 2012 21:59:14 +0000 (13:59 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 6 Jan 2012 21:59:14 +0000 (13:59 -0800)
* 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (21 commits)
  x86: Fix atomic64_xxx_cx8() functions
  x86: Fix and improve cmpxchg_double{,_local}()
  x86_64, asm: Optimise fls(), ffs() and fls64()
  x86, bitops: Move fls64.h inside __KERNEL__
  x86: Fix and improve percpu_cmpxchg{8,16}b_double()
  x86: Report cpb and eff_freq_ro flags correctly
  x86/i386: Use less assembly in strlen(), speed things up a bit
  x86: Use the same node_distance for 32 and 64-bit
  x86: Fix rflags in FAKE_STACK_FRAME
  x86: Clean up and extend do_int3()
  x86: Call do_notify_resume() with interrupts enabled
  x86/div64: Add a micro-optimization shortcut if base is power of two
  x86-64: Cleanup some assembly entry points
  x86-64: Slightly shorten line system call entry and exit paths
  x86-64: Reduce amount of redundant code generated for invalidate_interruptNN
  x86-64: Slightly shorten int_ret_from_sys_call
  x86, efi: Convert efi_phys_get_time() args to physical addresses
  x86: Default to vsyscall=emulate
  x86-64: Set siginfo and context on vsyscall emulation faults
  x86: consolidate xchg and xadd macros
  ...

26 files changed:
Documentation/kernel-parameters.txt
arch/x86/ia32/ia32entry.S
arch/x86/include/asm/alternative-asm.h
arch/x86/include/asm/bitops.h
arch/x86/include/asm/cmpxchg.h
arch/x86/include/asm/cmpxchg_32.h
arch/x86/include/asm/cmpxchg_64.h
arch/x86/include/asm/div64.h
arch/x86/include/asm/percpu.h
arch/x86/include/asm/processor-flags.h
arch/x86/include/asm/spinlock.h
arch/x86/include/asm/thread_info.h
arch/x86/include/asm/topology.h
arch/x86/include/asm/uaccess.h
arch/x86/kernel/cpu/powerflags.c
arch/x86/kernel/entry_32.S
arch/x86/kernel/entry_64.S
arch/x86/kernel/process.c
arch/x86/kernel/traps.c
arch/x86/kernel/vsyscall_64.c
arch/x86/lib/string_32.c
arch/x86/mm/extable.c
arch/x86/mm/fault.c
arch/x86/platform/efi/efi.c
drivers/lguest/x86/core.c
mm/slub.c

index 0293fc8..e229769 100644 (file)
@@ -2755,11 +2755,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
                        functions are at fixed addresses, they make nice
                        targets for exploits that can control RIP.
 
-                       emulate     Vsyscalls turn into traps and are emulated
-                                   reasonably safely.
+                       emulate     [default] Vsyscalls turn into traps and are
+                                   emulated reasonably safely.
 
-                       native      [default] Vsyscalls are native syscall
-                                   instructions.
+                       native      Vsyscalls are native syscall instructions.
                                    This is a little bit faster than trapping
                                    and makes a few dynamic recompilers work
                                    better than they would in emulation mode.
index a6253ec..3e27456 100644 (file)
@@ -134,7 +134,7 @@ ENTRY(ia32_sysenter_target)
        CFI_REL_OFFSET rsp,0
        pushfq_cfi
        /*CFI_REL_OFFSET rflags,0*/
-       movl    8*3-THREAD_SIZE+TI_sysenter_return(%rsp), %r10d
+       movl    TI_sysenter_return+THREAD_INFO(%rsp,3*8-KERNEL_STACK_OFFSET),%r10d
        CFI_REGISTER rip,r10
        pushq_cfi $__USER32_CS
        /*CFI_REL_OFFSET cs,0*/
@@ -150,9 +150,8 @@ ENTRY(ia32_sysenter_target)
        .section __ex_table,"a"
        .quad 1b,ia32_badarg
        .previous       
-       GET_THREAD_INFO(%r10)
-       orl    $TS_COMPAT,TI_status(%r10)
-       testl  $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10)
+       orl     $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+       testl   $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
        CFI_REMEMBER_STATE
        jnz  sysenter_tracesys
        cmpq    $(IA32_NR_syscalls-1),%rax
@@ -162,13 +161,12 @@ sysenter_do_call:
 sysenter_dispatch:
        call    *ia32_sys_call_table(,%rax,8)
        movq    %rax,RAX-ARGOFFSET(%rsp)
-       GET_THREAD_INFO(%r10)
        DISABLE_INTERRUPTS(CLBR_NONE)
        TRACE_IRQS_OFF
-       testl   $_TIF_ALLWORK_MASK,TI_flags(%r10)
+       testl   $_TIF_ALLWORK_MASK,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
        jnz     sysexit_audit
 sysexit_from_sys_call:
-       andl    $~TS_COMPAT,TI_status(%r10)
+       andl    $~TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
        /* clear IF, that popfq doesn't enable interrupts early */
        andl  $~0x200,EFLAGS-R11(%rsp) 
        movl    RIP-R11(%rsp),%edx              /* User %eip */
@@ -205,7 +203,7 @@ sysexit_from_sys_call:
        .endm
 
        .macro auditsys_exit exit
-       testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),TI_flags(%r10)
+       testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
        jnz ia32_ret_from_sys_call
        TRACE_IRQS_ON
        sti
@@ -215,12 +213,11 @@ sysexit_from_sys_call:
        movzbl %al,%edi         /* zero-extend that into %edi */
        inc %edi /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */
        call audit_syscall_exit
-       GET_THREAD_INFO(%r10)
        movl RAX-ARGOFFSET(%rsp),%eax   /* reload syscall return value */
        movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi
        cli
        TRACE_IRQS_OFF
-       testl %edi,TI_flags(%r10)
+       testl %edi,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
        jz \exit
        CLEAR_RREGS -ARGOFFSET
        jmp int_with_check
@@ -238,7 +235,7 @@ sysexit_audit:
 
 sysenter_tracesys:
 #ifdef CONFIG_AUDITSYSCALL
-       testl   $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%r10)
+       testl   $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
        jz      sysenter_auditsys
 #endif
        SAVE_REST
@@ -309,9 +306,8 @@ ENTRY(ia32_cstar_target)
        .section __ex_table,"a"
        .quad 1b,ia32_badarg
        .previous       
-       GET_THREAD_INFO(%r10)
-       orl   $TS_COMPAT,TI_status(%r10)
-       testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10)
+       orl     $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+       testl   $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
        CFI_REMEMBER_STATE
        jnz   cstar_tracesys
        cmpq $IA32_NR_syscalls-1,%rax
@@ -321,13 +317,12 @@ cstar_do_call:
 cstar_dispatch:
        call *ia32_sys_call_table(,%rax,8)
        movq %rax,RAX-ARGOFFSET(%rsp)
-       GET_THREAD_INFO(%r10)
        DISABLE_INTERRUPTS(CLBR_NONE)
        TRACE_IRQS_OFF
-       testl $_TIF_ALLWORK_MASK,TI_flags(%r10)
+       testl $_TIF_ALLWORK_MASK,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
        jnz sysretl_audit
 sysretl_from_sys_call:
-       andl $~TS_COMPAT,TI_status(%r10)
+       andl $~TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
        RESTORE_ARGS 0,-ARG_SKIP,0,0,0
        movl RIP-ARGOFFSET(%rsp),%ecx
        CFI_REGISTER rip,rcx
@@ -355,7 +350,7 @@ sysretl_audit:
 
 cstar_tracesys:
 #ifdef CONFIG_AUDITSYSCALL
-       testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%r10)
+       testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
        jz cstar_auditsys
 #endif
        xchgl %r9d,%ebp
@@ -420,9 +415,8 @@ ENTRY(ia32_syscall)
        /* note the registers are not zero extended to the sf.
           this could be a problem. */
        SAVE_ARGS 0,1,0
-       GET_THREAD_INFO(%r10)
-       orl   $TS_COMPAT,TI_status(%r10)
-       testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10)
+       orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+       testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
        jnz ia32_tracesys
        cmpq $(IA32_NR_syscalls-1),%rax
        ja ia32_badsys
@@ -459,8 +453,8 @@ quiet_ni_syscall:
        CFI_ENDPROC
        
        .macro PTREGSCALL label, func, arg
-       .globl \label
-\label:
+       ALIGN
+GLOBAL(\label)
        leaq \func(%rip),%rax
        leaq -ARGOFFSET+8(%rsp),\arg    /* 8 for return address */
        jmp  ia32_ptregs_common 
@@ -477,7 +471,8 @@ quiet_ni_syscall:
        PTREGSCALL stub32_vfork, sys_vfork, %rdi
        PTREGSCALL stub32_iopl, sys_iopl, %rsi
 
-ENTRY(ia32_ptregs_common)
+       ALIGN
+ia32_ptregs_common:
        popq %r11
        CFI_ENDPROC
        CFI_STARTPROC32 simple
index 091508b..952bd01 100644 (file)
@@ -4,10 +4,10 @@
 
 #ifdef CONFIG_SMP
        .macro LOCK_PREFIX
-1:     lock
+672:   lock
        .section .smp_locks,"a"
        .balign 4
-       .long 1b - .
+       .long 672b - .
        .previous
        .endm
 #else
index 1775d6e..b97596e 100644 (file)
@@ -380,6 +380,8 @@ static inline unsigned long __fls(unsigned long word)
        return word;
 }
 
+#undef ADDR
+
 #ifdef __KERNEL__
 /**
  * ffs - find first set bit in word
@@ -395,10 +397,25 @@ static inline unsigned long __fls(unsigned long word)
 static inline int ffs(int x)
 {
        int r;
-#ifdef CONFIG_X86_CMOV
+
+#ifdef CONFIG_X86_64
+       /*
+        * AMD64 says BSFL won't clobber the dest reg if x==0; Intel64 says the
+        * dest reg is undefined if x==0, but their CPU architect says its
+        * value is written to set it to the same as before, except that the
+        * top 32 bits will be cleared.
+        *
+        * We cannot do this on 32 bits because at the very least some
+        * 486 CPUs did not behave this way.
+        */
+       long tmp = -1;
+       asm("bsfl %1,%0"
+           : "=r" (r)
+           : "rm" (x), "0" (tmp));
+#elif defined(CONFIG_X86_CMOV)
        asm("bsfl %1,%0\n\t"
            "cmovzl %2,%0"
-           : "=r" (r) : "rm" (x), "r" (-1));
+           : "=&r" (r) : "rm" (x), "r" (-1));
 #else
        asm("bsfl %1,%0\n\t"
            "jnz 1f\n\t"
@@ -422,7 +439,22 @@ static inline int ffs(int x)
 static inline int fls(int x)
 {
        int r;
-#ifdef CONFIG_X86_CMOV
+
+#ifdef CONFIG_X86_64
+       /*
+        * AMD64 says BSRL won't clobber the dest reg if x==0; Intel64 says the
+        * dest reg is undefined if x==0, but their CPU architect says its
+        * value is written to set it to the same as before, except that the
+        * top 32 bits will be cleared.
+        *
+        * We cannot do this on 32 bits because at the very least some
+        * 486 CPUs did not behave this way.
+        */
+       long tmp = -1;
+       asm("bsrl %1,%0"
+           : "=r" (r)
+           : "rm" (x), "0" (tmp));
+#elif defined(CONFIG_X86_CMOV)
        asm("bsrl %1,%0\n\t"
            "cmovzl %2,%0"
            : "=&r" (r) : "rm" (x), "rm" (-1));
@@ -434,11 +466,35 @@ static inline int fls(int x)
 #endif
        return r + 1;
 }
-#endif /* __KERNEL__ */
-
-#undef ADDR
 
-#ifdef __KERNEL__
+/**
+ * fls64 - find last set bit in a 64-bit word
+ * @x: the word to search
+ *
+ * This is defined in a similar way as the libc and compiler builtin
+ * ffsll, but returns the position of the most significant set bit.
+ *
+ * fls64(value) returns 0 if value is 0 or the position of the last
+ * set bit if value is nonzero. The last (most significant) bit is
+ * at position 64.
+ */
+#ifdef CONFIG_X86_64
+static __always_inline int fls64(__u64 x)
+{
+       long bitpos = -1;
+       /*
+        * AMD64 says BSRQ won't clobber the dest reg if x==0; Intel64 says the
+        * dest reg is undefined if x==0, but their CPU architect says its
+        * value is written to set it to the same as before.
+        */
+       asm("bsrq %1,%0"
+           : "+r" (bitpos)
+           : "rm" (x));
+       return bitpos + 1;
+}
+#else
+#include <asm-generic/bitops/fls64.h>
+#endif
 
 #include <asm-generic/bitops/find.h>
 
@@ -450,12 +506,6 @@ static inline int fls(int x)
 
 #include <asm-generic/bitops/const_hweight.h>
 
-#endif /* __KERNEL__ */
-
-#include <asm-generic/bitops/fls64.h>
-
-#ifdef __KERNEL__
-
 #include <asm-generic/bitops/le.h>
 
 #include <asm-generic/bitops/ext2-atomic-setbit.h>
index 5d3acdf..0c9fa27 100644 (file)
@@ -14,6 +14,8 @@ extern void __cmpxchg_wrong_size(void)
        __compiletime_error("Bad argument size for cmpxchg");
 extern void __xadd_wrong_size(void)
        __compiletime_error("Bad argument size for xadd");
+extern void __add_wrong_size(void)
+       __compiletime_error("Bad argument size for add");
 
 /*
  * Constants for operation sizes. On 32-bit, the 64-bit size it set to
@@ -31,60 +33,47 @@ extern void __xadd_wrong_size(void)
 #define        __X86_CASE_Q    -1              /* sizeof will never return -1 */
 #endif
 
+/* 
+ * An exchange-type operation, which takes a value and a pointer, and
+ * returns a the old value.
+ */
+#define __xchg_op(ptr, arg, op, lock)                                  \
+       ({                                                              \
+               __typeof__ (*(ptr)) __ret = (arg);                      \
+               switch (sizeof(*(ptr))) {                               \
+               case __X86_CASE_B:                                      \
+                       asm volatile (lock #op "b %b0, %1\n"            \
+                                     : "+r" (__ret), "+m" (*(ptr))     \
+                                     : : "memory", "cc");              \
+                       break;                                          \
+               case __X86_CASE_W:                                      \
+                       asm volatile (lock #op "w %w0, %1\n"            \
+                                     : "+r" (__ret), "+m" (*(ptr))     \
+                                     : : "memory", "cc");              \
+                       break;                                          \
+               case __X86_CASE_L:                                      \
+                       asm volatile (lock #op "l %0, %1\n"             \
+                                     : "+r" (__ret), "+m" (*(ptr))     \
+                                     : : "memory", "cc");              \
+                       break;                                          \
+               case __X86_CASE_Q:                                      \
+                       asm volatile (lock #op "q %q0, %1\n"            \
+                                     : "+r" (__ret), "+m" (*(ptr))     \
+                                     : : "memory", "cc");              \
+                       break;                                          \
+               default:                                                \
+                       __ ## op ## _wrong_size();                      \
+               }                                                       \
+               __ret;                                                  \
+       })
+
 /*
  * Note: no "lock" prefix even on SMP: xchg always implies lock anyway.
  * Since this is generally used to protect other memory information, we
  * use "asm volatile" and "memory" clobbers to prevent gcc from moving
  * information around.
  */
-#define __xchg(x, ptr, size)                                           \
-({                                                                     \
-       __typeof(*(ptr)) __x = (x);                                     \
-       switch (size) {                                                 \
-       case __X86_CASE_B:                                              \
-       {                                                               \
-               volatile u8 *__ptr = (volatile u8 *)(ptr);              \
-               asm volatile("xchgb %0,%1"                              \
-                            : "=q" (__x), "+m" (*__ptr)                \
-                            : "0" (__x)                                \
-                            : "memory");                               \
-               break;                                                  \
-       }                                                               \
-       case __X86_CASE_W:                                              \
-       {                                                               \
-               volatile u16 *__ptr = (volatile u16 *)(ptr);            \
-               asm volatile("xchgw %0,%1"                              \
-                            : "=r" (__x), "+m" (*__ptr)                \
-                            : "0" (__x)                                \
-                            : "memory");                               \
-               break;                                                  \
-       }                                                               \
-       case __X86_CASE_L:                                              \
-       {                                                               \
-               volatile u32 *__ptr = (volatile u32 *)(ptr);            \
-               asm volatile("xchgl %0,%1"                              \
-                            : "=r" (__x), "+m" (*__ptr)                \
-                            : "0" (__x)                                \
-                            : "memory");                               \
-               break;                                                  \
-       }                                                               \
-       case __X86_CASE_Q:                                              \
-       {                                                               \
-               volatile u64 *__ptr = (volatile u64 *)(ptr);            \
-               asm volatile("xchgq %0,%1"                              \
-                            : "=r" (__x), "+m" (*__ptr)                \
-                            : "0" (__x)                                \
-                            : "memory");                               \
-               break;                                                  \
-       }                                                               \
-       default:                                                        \
-               __xchg_wrong_size();                                    \
-       }                                                               \
-       __x;                                                            \
-})
-
-#define xchg(ptr, v)                                                   \
-       __xchg((v), (ptr), sizeof(*ptr))
+#define xchg(ptr, v)   __xchg_op((ptr), (v), xchg, "")
 
 /*
  * Atomic compare and exchange.  Compare OLD with MEM, if identical,
@@ -165,46 +154,80 @@ extern void __xadd_wrong_size(void)
        __cmpxchg_local((ptr), (old), (new), sizeof(*ptr))
 #endif
 
-#define __xadd(ptr, inc, lock)                                         \
+/*
+ * xadd() adds "inc" to "*ptr" and atomically returns the previous
+ * value of "*ptr".
+ *
+ * xadd() is locked when multiple CPUs are online
+ * xadd_sync() is always locked
+ * xadd_local() is never locked
+ */
+#define __xadd(ptr, inc, lock) __xchg_op((ptr), (inc), xadd, lock)
+#define xadd(ptr, inc)         __xadd((ptr), (inc), LOCK_PREFIX)
+#define xadd_sync(ptr, inc)    __xadd((ptr), (inc), "lock; ")
+#define xadd_local(ptr, inc)   __xadd((ptr), (inc), "")
+
+#define __add(ptr, inc, lock)                                          \
        ({                                                              \
                __typeof__ (*(ptr)) __ret = (inc);                      \
                switch (sizeof(*(ptr))) {                               \
                case __X86_CASE_B:                                      \
-                       asm volatile (lock "xaddb %b0, %1\n"            \
-                                     : "+r" (__ret), "+m" (*(ptr))     \
-                                     : : "memory", "cc");              \
+                       asm volatile (lock "addb %b1, %0\n"             \
+                                     : "+m" (*(ptr)) : "ri" (inc)      \
+                                     : "memory", "cc");                \
                        break;                                          \
                case __X86_CASE_W:                                      \
-                       asm volatile (lock "xaddw %w0, %1\n"            \
-                                     : "+r" (__ret), "+m" (*(ptr))     \
-                                     : : "memory", "cc");              \
+                       asm volatile (lock "addw %w1, %0\n"             \
+                                     : "+m" (*(ptr)) : "ri" (inc)      \
+                                     : "memory", "cc");                \
                        break;                                          \
                case __X86_CASE_L:                                      \
-                       asm volatile (lock "xaddl %0, %1\n"             \
-                                     : "+r" (__ret), "+m" (*(ptr))     \
-                                     : : "memory", "cc");              \
+                       asm volatile (lock "addl %1, %0\n"              \
+                                     : "+m" (*(ptr)) : "ri" (inc)      \
+                                     : "memory", "cc");                \
                        break;                                          \
                case __X86_CASE_Q:                                      \
-                       asm volatile (lock "xaddq %q0, %1\n"            \
-                                     : "+r" (__ret), "+m" (*(ptr))     \
-                                     : : "memory", "cc");              \
+                       asm volatile (lock "addq %1, %0\n"              \
+                                     : "+m" (*(ptr)) : "ri" (inc)      \
+                                     : "memory", "cc");                \
                        break;                                          \
                default:                                                \
-                       __xadd_wrong_size();                            \
+                       __add_wrong_size();                             \
                }                                                       \
                __ret;                                                  \
        })
 
 /*
- * xadd() adds "inc" to "*ptr" and atomically returns the previous
- * value of "*ptr".
+ * add_*() adds "inc" to "*ptr"
  *
- * xadd() is locked when multiple CPUs are online
- * xadd_sync() is always locked
- * xadd_local() is never locked
+ * __add() takes a lock prefix
+ * add_smp() is locked when multiple CPUs are online
+ * add_sync() is always locked
  */
-#define xadd(ptr, inc)         __xadd((ptr), (inc), LOCK_PREFIX)
-#define xadd_sync(ptr, inc)    __xadd((ptr), (inc), "lock; ")
-#define xadd_local(ptr, inc)   __xadd((ptr), (inc), "")
+#define add_smp(ptr, inc)      __add((ptr), (inc), LOCK_PREFIX)
+#define add_sync(ptr, inc)     __add((ptr), (inc), "lock; ")
+
+#define __cmpxchg_double(pfx, p1, p2, o1, o2, n1, n2)                  \
+({                                                                     \
+       bool __ret;                                                     \
+       __typeof__(*(p1)) __old1 = (o1), __new1 = (n1);                 \
+       __typeof__(*(p2)) __old2 = (o2), __new2 = (n2);                 \
+       BUILD_BUG_ON(sizeof(*(p1)) != sizeof(long));                    \
+       BUILD_BUG_ON(sizeof(*(p2)) != sizeof(long));                    \
+       VM_BUG_ON((unsigned long)(p1) % (2 * sizeof(long)));            \
+       VM_BUG_ON((unsigned long)((p1) + 1) != (unsigned long)(p2));    \
+       asm volatile(pfx "cmpxchg%c4b %2; sete %0"                      \
+                    : "=a" (__ret), "+d" (__old2),                     \
+                      "+m" (*(p1)), "+m" (*(p2))                       \
+                    : "i" (2 * sizeof(long)), "a" (__old1),            \
+                      "b" (__new1), "c" (__new2));                     \
+       __ret;                                                          \
+})
+
+#define cmpxchg_double(p1, p2, o1, o2, n1, n2) \
+       __cmpxchg_double(LOCK_PREFIX, p1, p2, o1, o2, n1, n2)
+
+#define cmpxchg_double_local(p1, p2, o1, o2, n1, n2) \
+       __cmpxchg_double(, p1, p2, o1, o2, n1, n2)
 
 #endif /* ASM_X86_CMPXCHG_H */
index fbebb07..53f4b21 100644 (file)
@@ -166,52 +166,6 @@ static inline unsigned long cmpxchg_386(volatile void *ptr, unsigned long old,
 
 #endif
 
-#define cmpxchg8b(ptr, o1, o2, n1, n2)                         \
-({                                                             \
-       char __ret;                                             \
-       __typeof__(o2) __dummy;                                 \
-       __typeof__(*(ptr)) __old1 = (o1);                       \
-       __typeof__(o2) __old2 = (o2);                           \
-       __typeof__(*(ptr)) __new1 = (n1);                       \
-       __typeof__(o2) __new2 = (n2);                           \
-       asm volatile(LOCK_PREFIX "cmpxchg8b %2; setz %1"        \
-                      : "=d"(__dummy), "=a" (__ret), "+m" (*ptr)\
-                      : "a" (__old1), "d"(__old2),             \
-                        "b" (__new1), "c" (__new2)             \
-                      : "memory");                             \
-       __ret; })
-
-
-#define cmpxchg8b_local(ptr, o1, o2, n1, n2)                   \
-({                                                             \
-       char __ret;                                             \
-       __typeof__(o2) __dummy;                                 \
-       __typeof__(*(ptr)) __old1 = (o1);                       \
-       __typeof__(o2) __old2 = (o2);                           \
-       __typeof__(*(ptr)) __new1 = (n1);                       \
-       __typeof__(o2) __new2 = (n2);                           \
-       asm volatile("cmpxchg8b %2; setz %1"                    \
-                      : "=d"(__dummy), "=a"(__ret), "+m" (*ptr)\
-                      : "a" (__old), "d"(__old2),              \
-                        "b" (__new1), "c" (__new2),            \
-                      : "memory");                             \
-       __ret; })
-
-
-#define cmpxchg_double(ptr, o1, o2, n1, n2)                            \
-({                                                                     \
-       BUILD_BUG_ON(sizeof(*(ptr)) != 4);                              \
-       VM_BUG_ON((unsigned long)(ptr) % 8);                            \
-       cmpxchg8b((ptr), (o1), (o2), (n1), (n2));                       \
-})
-
-#define cmpxchg_double_local(ptr, o1, o2, n1, n2)                      \
-({                                                                     \
-       BUILD_BUG_ON(sizeof(*(ptr)) != 4);                              \
-       VM_BUG_ON((unsigned long)(ptr) % 8);                            \
-       cmpxchg16b_local((ptr), (o1), (o2), (n1), (n2));                        \
-})
-
 #define system_has_cmpxchg_double() cpu_has_cx8
 
 #endif /* _ASM_X86_CMPXCHG_32_H */
index 285da02..614be87 100644 (file)
@@ -20,49 +20,6 @@ static inline void set_64bit(volatile u64 *ptr, u64 val)
        cmpxchg_local((ptr), (o), (n));                                 \
 })
 
-#define cmpxchg16b(ptr, o1, o2, n1, n2)                                \
-({                                                             \
-       char __ret;                                             \
-       __typeof__(o2) __junk;                                  \
-       __typeof__(*(ptr)) __old1 = (o1);                       \
-       __typeof__(o2) __old2 = (o2);                           \
-       __typeof__(*(ptr)) __new1 = (n1);                       \
-       __typeof__(o2) __new2 = (n2);                           \
-       asm volatile(LOCK_PREFIX "cmpxchg16b %2;setz %1"        \
-                      : "=d"(__junk), "=a"(__ret), "+m" (*ptr) \
-                      : "b"(__new1), "c"(__new2),              \
-                        "a"(__old1), "d"(__old2));             \
-       __ret; })
-
-
-#define cmpxchg16b_local(ptr, o1, o2, n1, n2)                  \
-({                                                             \
-       char __ret;                                             \
-       __typeof__(o2) __junk;                                  \
-       __typeof__(*(ptr)) __old1 = (o1);                       \
-       __typeof__(o2) __old2 = (o2);                           \
-       __typeof__(*(ptr)) __new1 = (n1);                       \
-       __typeof__(o2) __new2 = (n2);                           \
-       asm volatile("cmpxchg16b %2;setz %1"                    \
-                      : "=d"(__junk), "=a"(__ret), "+m" (*ptr) \
-                      : "b"(__new1), "c"(__new2),              \
-                        "a"(__old1), "d"(__old2));             \
-       __ret; })
-
-#define cmpxchg_double(ptr, o1, o2, n1, n2)                            \
-({                                                                     \
-       BUILD_BUG_ON(sizeof(*(ptr)) != 8);                              \
-       VM_BUG_ON((unsigned long)(ptr) % 16);                           \
-       cmpxchg16b((ptr), (o1), (o2), (n1), (n2));                      \
-})
-
-#define cmpxchg_double_local(ptr, o1, o2, n1, n2)                      \
-({                                                                     \
-       BUILD_BUG_ON(sizeof(*(ptr)) != 8);                              \
-       VM_BUG_ON((unsigned long)(ptr) % 16);                           \
-       cmpxchg16b_local((ptr), (o1), (o2), (n1), (n2));                \
-})
-
 #define system_has_cmpxchg_double() cpu_has_cx16
 
 #endif /* _ASM_X86_CMPXCHG_64_H */
index 9a2d644..ced283a 100644 (file)
@@ -4,6 +4,7 @@
 #ifdef CONFIG_X86_32
 
 #include <linux/types.h>
+#include <linux/log2.h>
 
 /*
  * do_div() is NOT a C function. It wants to return
 ({                                                             \
        unsigned long __upper, __low, __high, __mod, __base;    \
        __base = (base);                                        \
-       asm("":"=a" (__low), "=d" (__high) : "A" (n));          \
-       __upper = __high;                                       \
-       if (__high) {                                           \
-               __upper = __high % (__base);                    \
-               __high = __high / (__base);                     \
+       if (__builtin_constant_p(__base) && is_power_of_2(__base)) { \
+               __mod = n & (__base - 1);                       \
+               n >>= ilog2(__base);                            \
+       } else {                                                \
+               asm("" : "=a" (__low), "=d" (__high) : "A" (n));\
+               __upper = __high;                               \
+               if (__high) {                                   \
+                       __upper = __high % (__base);            \
+                       __high = __high / (__base);             \
+               }                                               \
+               asm("divl %2" : "=a" (__low), "=d" (__mod)      \
+                       : "rm" (__base), "0" (__low), "1" (__upper));   \
+               asm("" : "=A" (n) : "a" (__low), "d" (__high)); \
        }                                                       \
-       asm("divl %2":"=a" (__low), "=d" (__mod)                \
-           : "rm" (__base), "0" (__low), "1" (__upper));       \
-       asm("":"=A" (n) : "a" (__low), "d" (__high));           \
        __mod;                                                  \
 })
 
index 3470c9d..529bf07 100644 (file)
@@ -451,23 +451,20 @@ do {                                                                      \
 #endif /* !CONFIG_M386 */
 
 #ifdef CONFIG_X86_CMPXCHG64
-#define percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2)                  \
+#define percpu_cmpxchg8b_double(pcp1, pcp2, o1, o2, n1, n2)            \
 ({                                                                     \
-       char __ret;                                                     \
-       typeof(o1) __o1 = o1;                                           \
-       typeof(o1) __n1 = n1;                                           \
-       typeof(o2) __o2 = o2;                                           \
-       typeof(o2) __n2 = n2;                                           \
-       typeof(o2) __dummy = n2;                                        \
+       bool __ret;                                                     \
+       typeof(pcp1) __o1 = (o1), __n1 = (n1);                          \
+       typeof(pcp2) __o2 = (o2), __n2 = (n2);                          \
        asm volatile("cmpxchg8b "__percpu_arg(1)"\n\tsetz %0\n\t"       \
-                   : "=a"(__ret), "=m" (pcp1), "=d"(__dummy)           \
-                   :  "b"(__n1), "c"(__n2), "a"(__o1), "d"(__o2));     \
+                   : "=a" (__ret), "+m" (pcp1), "+m" (pcp2), "+d" (__o2) \
+                   :  "b" (__n1), "c" (__n2), "a" (__o1));             \
        __ret;                                                          \
 })
 
-#define __this_cpu_cmpxchg_double_4(pcp1, pcp2, o1, o2, n1, n2)                percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2)
-#define this_cpu_cmpxchg_double_4(pcp1, pcp2, o1, o2, n1, n2)          percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2)
-#define irqsafe_cpu_cmpxchg_double_4(pcp1, pcp2, o1, o2, n1, n2)       percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2)
+#define __this_cpu_cmpxchg_double_4    percpu_cmpxchg8b_double
+#define this_cpu_cmpxchg_double_4      percpu_cmpxchg8b_double
+#define irqsafe_cpu_cmpxchg_double_4   percpu_cmpxchg8b_double
 #endif /* CONFIG_X86_CMPXCHG64 */
 
 /*
@@ -508,31 +505,23 @@ do {                                                                      \
  * it in software.  The address used in the cmpxchg16 instruction must be
  * aligned to a 16 byte boundary.
  */
-#ifdef CONFIG_SMP
-#define CMPXCHG16B_EMU_CALL "call this_cpu_cmpxchg16b_emu\n\t" ASM_NOP3
-#else
-#define CMPXCHG16B_EMU_CALL "call this_cpu_cmpxchg16b_emu\n\t" ASM_NOP2
-#endif
-#define percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2)                 \
+#define percpu_cmpxchg16b_double(pcp1, pcp2, o1, o2, n1, n2)           \
 ({                                                                     \
-       char __ret;                                                     \
-       typeof(o1) __o1 = o1;                                           \
-       typeof(o1) __n1 = n1;                                           \
-       typeof(o2) __o2 = o2;                                           \
-       typeof(o2) __n2 = n2;                                           \
-       typeof(o2) __dummy;                                             \
-       alternative_io(CMPXCHG16B_EMU_CALL,                             \
-                      "cmpxchg16b " __percpu_prefix "(%%rsi)\n\tsetz %0\n\t",  \
+       bool __ret;                                                     \
+       typeof(pcp1) __o1 = (o1), __n1 = (n1);                          \
+       typeof(pcp2) __o2 = (o2), __n2 = (n2);                          \
+       alternative_io("leaq %P1,%%rsi\n\tcall this_cpu_cmpxchg16b_emu\n\t", \
+                      "cmpxchg16b " __percpu_arg(1) "\n\tsetz %0\n\t", \
                       X86_FEATURE_CX16,                                \
-                      ASM_OUTPUT2("=a"(__ret), "=d"(__dummy)),         \
-                      "S" (&pcp1), "b"(__n1), "c"(__n2),               \
-                      "a"(__o1), "d"(__o2) : "memory");                \
+                      ASM_OUTPUT2("=a" (__ret), "+m" (pcp1),           \
+                                  "+m" (pcp2), "+d" (__o2)),           \
+                      "b" (__n1), "c" (__n2), "a" (__o1) : "rsi");     \
        __ret;                                                          \
 })
 
-#define __this_cpu_cmpxchg_double_8(pcp1, pcp2, o1, o2, n1, n2)                percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2)
-#define this_cpu_cmpxchg_double_8(pcp1, pcp2, o1, o2, n1, n2)          percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2)
-#define irqsafe_cpu_cmpxchg_double_8(pcp1, pcp2, o1, o2, n1, n2)       percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2)
+#define __this_cpu_cmpxchg_double_8    percpu_cmpxchg16b_double
+#define this_cpu_cmpxchg_double_8      percpu_cmpxchg16b_double
+#define irqsafe_cpu_cmpxchg_double_8   percpu_cmpxchg16b_double
 
 #endif
 
index 2dddb31..f8ab3ea 100644 (file)
@@ -6,6 +6,7 @@
  * EFLAGS bits
  */
 #define X86_EFLAGS_CF  0x00000001 /* Carry Flag */
+#define X86_EFLAGS_BIT1        0x00000002 /* Bit 1 - always on */
 #define X86_EFLAGS_PF  0x00000004 /* Parity Flag */
 #define X86_EFLAGS_AF  0x00000010 /* Auxiliary carry Flag */
 #define X86_EFLAGS_ZF  0x00000040 /* Zero Flag */
index 972c260..a82c2bf 100644 (file)
@@ -79,23 +79,10 @@ static __always_inline int __ticket_spin_trylock(arch_spinlock_t *lock)
        return cmpxchg(&lock->head_tail, old.head_tail, new.head_tail) == old.head_tail;
 }
 
-#if (NR_CPUS < 256)
 static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock)
 {
-       asm volatile(UNLOCK_LOCK_PREFIX "incb %0"
-                    : "+m" (lock->head_tail)
-                    :
-                    : "memory", "cc");
+       __add(&lock->tickets.head, 1, UNLOCK_LOCK_PREFIX);
 }
-#else
-static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock)
-{
-       asm volatile(UNLOCK_LOCK_PREFIX "incw %0"
-                    : "+m" (lock->head_tail)
-                    :
-                    : "memory", "cc");
-}
-#endif
 
 static inline int __ticket_spin_is_locked(arch_spinlock_t *lock)
 {
index a1fe5c1..185b719 100644 (file)
@@ -40,7 +40,8 @@ struct thread_info {
                                                */
        __u8                    supervisor_stack[0];
 #endif
-       int                     uaccess_err;
+       int                     sig_on_uaccess_error:1;
+       int                     uaccess_err:1;  /* uaccess failed */
 };
 
 #define INIT_THREAD_INFO(tsk)                  \
@@ -231,6 +232,12 @@ static inline struct thread_info *current_thread_info(void)
        movq PER_CPU_VAR(kernel_stack),reg ; \
        subq $(THREAD_SIZE-KERNEL_STACK_OFFSET),reg
 
+/*
+ * Same if PER_CPU_VAR(kernel_stack) is, perhaps with some offset, already in
+ * a certain register (to be used in assembler memory operands).
+ */
+#define THREAD_INFO(reg, off) KERNEL_STACK_OFFSET+(off)-THREAD_SIZE(reg)
+
 #endif
 
 #endif /* !X86_32 */
index c006924..800f77c 100644 (file)
@@ -130,10 +130,8 @@ extern void setup_node_to_cpumask_map(void);
        .balance_interval       = 1,                                    \
 }
 
-#ifdef CONFIG_X86_64
 extern int __node_distance(int, int);
 #define node_distance(a, b) __node_distance(a, b)
-#endif
 
 #else /* !CONFIG_NUMA */
 
index 36361bf..8be5f54 100644 (file)
@@ -462,7 +462,7 @@ struct __large_struct { unsigned long buf[100]; };
        barrier();
 
 #define uaccess_catch(err)                                             \
-       (err) |= current_thread_info()->uaccess_err;                    \
+       (err) |= (current_thread_info()->uaccess_err ? -EFAULT : 0);    \
        current_thread_info()->uaccess_err = prev_err;                  \
 } while (0)
 
index 5abbea2..7b3fe56 100644 (file)
@@ -16,5 +16,6 @@ const char *const x86_power_flags[32] = {
        "100mhzsteps",
        "hwpstate",
        "",     /* tsc invariant mapped to constant_tsc */
-               /* nothing */
+       "cpb",  /* core performance boost */
+       "eff_freq_ro", /* Readonly aperf/mperf */
 };
index f3f6f53..22d0e21 100644 (file)
@@ -625,6 +625,8 @@ work_notifysig:                             # deal with pending signals and
        movl %esp, %eax
        jne work_notifysig_v86          # returning to kernel-space or
                                        # vm86-space
+       TRACE_IRQS_ON
+       ENABLE_INTERRUPTS(CLBR_NONE)
        xorl %edx, %edx
        call do_notify_resume
        jmp resume_userspace_sig
@@ -638,6 +640,8 @@ work_notifysig_v86:
 #else
        movl %esp, %eax
 #endif
+       TRACE_IRQS_ON
+       ENABLE_INTERRUPTS(CLBR_NONE)
        xorl %edx, %edx
        call do_notify_resume
        jmp resume_userspace_sig
index faf8d5e..a20e1cb 100644 (file)
@@ -221,7 +221,7 @@ ENDPROC(native_usergs_sysret64)
        /*CFI_REL_OFFSET        ss,0*/
        pushq_cfi %rax /* rsp */
        CFI_REL_OFFSET  rsp,0
-       pushq_cfi $X86_EFLAGS_IF /* eflags - interrupts on */
+       pushq_cfi $(X86_EFLAGS_IF|X86_EFLAGS_BIT1) /* eflags - interrupts on */
        /*CFI_REL_OFFSET        rflags,0*/
        pushq_cfi $__KERNEL_CS /* cs */
        /*CFI_REL_OFFSET        cs,0*/
@@ -411,7 +411,7 @@ ENTRY(ret_from_fork)
        RESTORE_REST
 
        testl $3, CS-ARGOFFSET(%rsp)            # from kernel_thread?
-       je   int_ret_from_sys_call
+       jz   retint_restore_args
 
        testl $_TIF_IA32, TI_flags(%rcx)        # 32-bit compat task needs IRET
        jnz  int_ret_from_sys_call
@@ -465,7 +465,7 @@ ENTRY(system_call)
         * after the swapgs, so that it can do the swapgs
         * for the guest and jump here on syscall.
         */
-ENTRY(system_call_after_swapgs)
+GLOBAL(system_call_after_swapgs)
 
        movq    %rsp,PER_CPU_VAR(old_rsp)
        movq    PER_CPU_VAR(kernel_stack),%rsp
@@ -478,8 +478,7 @@ ENTRY(system_call_after_swapgs)
        movq  %rax,ORIG_RAX-ARGOFFSET(%rsp)
        movq  %rcx,RIP-ARGOFFSET(%rsp)
        CFI_REL_OFFSET rip,RIP-ARGOFFSET
-       GET_THREAD_INFO(%rcx)
-       testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%rcx)
+       testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
        jnz tracesys
 system_call_fastpath:
        cmpq $__NR_syscall_max,%rax
@@ -496,10 +495,9 @@ ret_from_sys_call:
        /* edi: flagmask */
 sysret_check:
        LOCKDEP_SYS_EXIT
-       GET_THREAD_INFO(%rcx)
        DISABLE_INTERRUPTS(CLBR_NONE)
        TRACE_IRQS_OFF
-       movl TI_flags(%rcx),%edx
+       movl TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET),%edx
        andl %edi,%edx
        jnz  sysret_careful
        CFI_REMEMBER_STATE
@@ -583,7 +581,7 @@ sysret_audit:
        /* Do syscall tracing */
 tracesys:
 #ifdef CONFIG_AUDITSYSCALL
-       testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%rcx)
+       testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
        jz auditsys
 #endif
        SAVE_REST
@@ -612,8 +610,6 @@ tracesys:
 GLOBAL(int_ret_from_sys_call)
        DISABLE_INTERRUPTS(CLBR_NONE)
        TRACE_IRQS_OFF
-       testl $3,CS-ARGOFFSET(%rsp)
-       je retint_restore_args
        movl $_TIF_ALLWORK_MASK,%edi
        /* edi: mask to check */
 GLOBAL(int_with_check)
@@ -953,6 +949,7 @@ END(common_interrupt)
 ENTRY(\sym)
        INTR_FRAME
        pushq_cfi $~(\num)
+.Lcommon_\sym:
        interrupt \do_sym
        jmp ret_from_intr
        CFI_ENDPROC
@@ -976,13 +973,21 @@ apicinterrupt X86_PLATFORM_IPI_VECTOR \
        x86_platform_ipi smp_x86_platform_ipi
 
 #ifdef CONFIG_SMP
-.irp idx,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, \
+       ALIGN
+       INTR_FRAME
+.irp idx,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, \
        16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
 .if NUM_INVALIDATE_TLB_VECTORS > \idx
-apicinterrupt (INVALIDATE_TLB_VECTOR_START)+\idx \
-       invalidate_interrupt\idx smp_invalidate_interrupt
+ENTRY(invalidate_interrupt\idx)
+       pushq_cfi $~(INVALIDATE_TLB_VECTOR_START+\idx)
+       jmp .Lcommon_invalidate_interrupt0
+       CFI_ADJUST_CFA_OFFSET -8
+END(invalidate_interrupt\idx)
 .endif
 .endr
+       CFI_ENDPROC
+apicinterrupt INVALIDATE_TLB_VECTOR_START, \
+       invalidate_interrupt0, smp_invalidate_interrupt
 #endif
 
 apicinterrupt THRESHOLD_APIC_VECTOR \
index ee5d4fb..15763af 100644 (file)
@@ -293,7 +293,7 @@ int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
        regs.orig_ax = -1;
        regs.ip = (unsigned long) kernel_thread_helper;
        regs.cs = __KERNEL_CS | get_kernel_rpl();
-       regs.flags = X86_EFLAGS_IF | 0x2;
+       regs.flags = X86_EFLAGS_IF | X86_EFLAGS_BIT1;
 
        /* Ok, create the new process.. */
        return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, &regs, 0, NULL, NULL);
index a8e3eb8..fa1191f 100644 (file)
@@ -306,15 +306,10 @@ dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code)
                        == NOTIFY_STOP)
                return;
 #endif /* CONFIG_KGDB_LOW_LEVEL_TRAP */
-#ifdef CONFIG_KPROBES
+
        if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP)
                        == NOTIFY_STOP)
                return;
-#else
-       if (notify_die(DIE_TRAP, "int3", regs, error_code, 3, SIGTRAP)
-                       == NOTIFY_STOP)
-               return;
-#endif
 
        preempt_conditional_sti(regs);
        do_trap(3, SIGTRAP, "int3", regs, error_code, NULL);
index e4d4a22..b07ba93 100644 (file)
@@ -57,7 +57,7 @@ DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data) =
        .lock = __SEQLOCK_UNLOCKED(__vsyscall_gtod_data.lock),
 };
 
-static enum { EMULATE, NATIVE, NONE } vsyscall_mode = NATIVE;
+static enum { EMULATE, NATIVE, NONE } vsyscall_mode = EMULATE;
 
 static int __init vsyscall_setup(char *str)
 {
@@ -140,11 +140,40 @@ static int addr_to_vsyscall_nr(unsigned long addr)
        return nr;
 }
 
+static bool write_ok_or_segv(unsigned long ptr, size_t size)
+{
+       /*
+        * XXX: if access_ok, get_user, and put_user handled
+        * sig_on_uaccess_error, this could go away.
+        */
+
+       if (!access_ok(VERIFY_WRITE, (void __user *)ptr, size)) {
+               siginfo_t info;
+               struct thread_struct *thread = &current->thread;
+
+               thread->error_code      = 6;  /* user fault, no page, write */
+               thread->cr2             = ptr;
+               thread->trap_no         = 14;
+
+               memset(&info, 0, sizeof(info));
+               info.si_signo           = SIGSEGV;
+               info.si_errno           = 0;
+               info.si_code            = SEGV_MAPERR;
+               info.si_addr            = (void __user *)ptr;
+
+               force_sig_info(SIGSEGV, &info, current);
+               return false;
+       } else {
+               return true;
+       }
+}
+
 bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
 {
        struct task_struct *tsk;
        unsigned long caller;
        int vsyscall_nr;
+       int prev_sig_on_uaccess_error;
        long ret;
 
        /*
@@ -180,35 +209,65 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
        if (seccomp_mode(&tsk->seccomp))
                do_exit(SIGKILL);
 
+       /*
+        * With a real vsyscall, page faults cause SIGSEGV.  We want to
+        * preserve that behavior to make writing exploits harder.
+        */
+       prev_sig_on_uaccess_error = current_thread_info()->sig_on_uaccess_error;
+       current_thread_info()->sig_on_uaccess_error = 1;
+
+       /*
+        * 0 is a valid user pointer (in the access_ok sense) on 32-bit and
+        * 64-bit, so we don't need to special-case it here.  For all the
+        * vsyscalls, 0 means "don't write anything" not "write it at
+        * address 0".
+        */
+       ret = -EFAULT;
        switch (vsyscall_nr) {
        case 0:
+               if (!write_ok_or_segv(regs->di, sizeof(struct timeval)) ||
+                   !write_ok_or_segv(regs->si, sizeof(struct timezone)))
+                       break;
+
                ret = sys_gettimeofday(
                        (struct timeval __user *)regs->di,
                        (struct timezone __user *)regs->si);
                break;
 
        case 1:
+               if (!write_ok_or_segv(regs->di, sizeof(time_t)))
+                       break;
+
                ret = sys_time((time_t __user *)regs->di);
                break;
 
        case 2:
+               if (!write_ok_or_segv(regs->di, sizeof(unsigned)) ||
+                   !write_ok_or_segv(regs->si, sizeof(unsigned)))
+                       break;
+
                ret = sys_getcpu((unsigned __user *)regs->di,
                                 (unsigned __user *)regs->si,
                                 0);
                break;
        }
 
+       current_thread_info()->sig_on_uaccess_error = prev_sig_on_uaccess_error;
+
        if (ret == -EFAULT) {
-               /*
-                * Bad news -- userspace fed a bad pointer to a vsyscall.
-                *
-                * With a real vsyscall, that would have caused SIGSEGV.
-                * To make writing reliable exploits using the emulated
-                * vsyscalls harder, generate SIGSEGV here as well.
-                */
+               /* Bad news -- userspace fed a bad pointer to a vsyscall. */
                warn_bad_vsyscall(KERN_INFO, regs,
                                  "vsyscall fault (exploit attempt?)");
-               goto sigsegv;
+
+               /*
+                * If we failed to generate a signal for any reason,
+                * generate one here.  (This should be impossible.)
+                */
+               if (WARN_ON_ONCE(!sigismember(&tsk->pending.signal, SIGBUS) &&
+                                !sigismember(&tsk->pending.signal, SIGSEGV)))
+                       goto sigsegv;
+
+               return true;  /* Don't emulate the ret. */
        }
 
        regs->ax = ret;
index 82004d2..bd59090 100644 (file)
@@ -164,15 +164,13 @@ EXPORT_SYMBOL(strchr);
 size_t strlen(const char *s)
 {
        int d0;
-       int res;
+       size_t res;
        asm volatile("repne\n\t"
-               "scasb\n\t"
-               "notl %0\n\t"
-               "decl %0"
+               "scasb"
                : "=c" (res), "=&D" (d0)
                : "1" (s), "a" (0), "0" (0xffffffffu)
                : "memory");
-       return res;
+       return ~res - 1;
 }
 EXPORT_SYMBOL(strlen);
 #endif
index d0474ad..1fb85db 100644 (file)
@@ -25,7 +25,7 @@ int fixup_exception(struct pt_regs *regs)
        if (fixup) {
                /* If fixup is less than 16, it means uaccess error */
                if (fixup->fixup < 16) {
-                       current_thread_info()->uaccess_err = -EFAULT;
+                       current_thread_info()->uaccess_err = 1;
                        regs->ip += fixup->fixup;
                        return 1;
                }
index 5db0490..9d74824 100644 (file)
@@ -626,7 +626,7 @@ pgtable_bad(struct pt_regs *regs, unsigned long error_code,
 
 static noinline void
 no_context(struct pt_regs *regs, unsigned long error_code,
-          unsigned long address)
+          unsigned long address, int signal, int si_code)
 {
        struct task_struct *tsk = current;
        unsigned long *stackend;
@@ -634,8 +634,17 @@ no_context(struct pt_regs *regs, unsigned long error_code,
        int sig;
 
        /* Are we prepared to handle this kernel fault? */
-       if (fixup_exception(regs))
+       if (fixup_exception(regs)) {
+               if (current_thread_info()->sig_on_uaccess_error && signal) {
+                       tsk->thread.trap_no = 14;
+                       tsk->thread.error_code = error_code | PF_USER;
+                       tsk->thread.cr2 = address;
+
+                       /* XXX: hwpoison faults will set the wrong code. */
+                       force_sig_info_fault(signal, si_code, address, tsk, 0);
+               }
                return;
+       }
 
        /*
         * 32-bit:
@@ -755,7 +764,7 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
        if (is_f00f_bug(regs, address))
                return;
 
-       no_context(regs, error_code, address);
+       no_context(regs, error_code, address, SIGSEGV, si_code);
 }
 
 static noinline void
@@ -819,7 +828,7 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address,
 
        /* Kernel mode? Handle exceptions or die: */
        if (!(error_code & PF_USER)) {
-               no_context(regs, error_code, address);
+               no_context(regs, error_code, address, SIGBUS, BUS_ADRERR);
                return;
        }
 
@@ -854,7 +863,7 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code,
                if (!(fault & VM_FAULT_RETRY))
                        up_read(&current->mm->mmap_sem);
                if (!(error_code & PF_USER))
-                       no_context(regs, error_code, address);
+                       no_context(regs, error_code, address, 0, 0);
                return 1;
        }
        if (!(fault & VM_FAULT_ERROR))
@@ -864,7 +873,8 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code,
                /* Kernel mode? Handle exceptions or die: */
                if (!(error_code & PF_USER)) {
                        up_read(&current->mm->mmap_sem);
-                       no_context(regs, error_code, address);
+                       no_context(regs, error_code, address,
+                                  SIGSEGV, SEGV_MAPERR);
                        return 1;
                }
 
index 4a01967..4cf9bd0 100644 (file)
@@ -238,7 +238,8 @@ static efi_status_t __init phys_efi_get_time(efi_time_t *tm,
 
        spin_lock_irqsave(&rtc_lock, flags);
        efi_call_phys_prelog();
-       status = efi_call_phys2(efi_phys.get_time, tm, tc);
+       status = efi_call_phys2(efi_phys.get_time, virt_to_phys(tm),
+                               virt_to_phys(tc));
        efi_call_phys_epilog();
        spin_unlock_irqrestore(&rtc_lock, flags);
        return status;
index 65af42f..3980903 100644 (file)
@@ -697,7 +697,7 @@ void lguest_arch_setup_regs(struct lg_cpu *cpu, unsigned long start)
         * interrupts are enabled.  We always leave interrupts enabled while
         * running the Guest.
         */
-       regs->eflags = X86_EFLAGS_IF | 0x2;
+       regs->eflags = X86_EFLAGS_IF | X86_EFLAGS_BIT1;
 
        /*
         * The "Extended Instruction Pointer" register says where the Guest is
index ed3334d..09ccee8 100644 (file)
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -368,7 +368,7 @@ static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct page *page
        VM_BUG_ON(!irqs_disabled());
 #ifdef CONFIG_CMPXCHG_DOUBLE
        if (s->flags & __CMPXCHG_DOUBLE) {
-               if (cmpxchg_double(&page->freelist,
+               if (cmpxchg_double(&page->freelist, &page->counters,
                        freelist_old, counters_old,
                        freelist_new, counters_new))
                return 1;
@@ -402,7 +402,7 @@ static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
 {
 #ifdef CONFIG_CMPXCHG_DOUBLE
        if (s->flags & __CMPXCHG_DOUBLE) {
-               if (cmpxchg_double(&page->freelist,
+               if (cmpxchg_double(&page->freelist, &page->counters,
                        freelist_old, counters_old,
                        freelist_new, counters_new))
                return 1;