x86/mm: Improve stack-overflow #PF handling
authorAndy Lutomirski <luto@kernel.org>
Wed, 31 Aug 2016 00:27:57 +0000 (17:27 -0700)
committerIngo Molnar <mingo@kernel.org>
Thu, 8 Sep 2016 06:47:20 +0000 (08:47 +0200)
If we get a page fault indicating kernel stack overflow, invoke
handle_stack_overflow().  To prevent us from overflowing the stack
again while handling the overflow (because we are likely to have
very little stack space left), call handle_stack_overflow() on the
double-fault stack.

Signed-off-by: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/6d6cf96b3fb9b4c9aa303817e1dc4de0c7c36487.1472603235.git.luto@kernel.org
[ Minor edit. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
arch/x86/include/asm/traps.h
arch/x86/kernel/traps.c
arch/x86/mm/fault.c

index c349661..01fd0a7 100644 (file)
@@ -117,6 +117,12 @@ extern void ist_exit(struct pt_regs *regs);
 extern void ist_begin_non_atomic(struct pt_regs *regs);
 extern void ist_end_non_atomic(void);
 
+#ifdef CONFIG_VMAP_STACK
+void __noreturn handle_stack_overflow(const char *message,
+                                     struct pt_regs *regs,
+                                     unsigned long fault_address);
+#endif
+
 /* Interrupts/Exceptions */
 enum {
        X86_TRAP_DE = 0,        /*  0, Divide-by-zero */
index 907b4e4..bd4e3d4 100644 (file)
@@ -293,9 +293,9 @@ DO_ERROR(X86_TRAP_SS,     SIGBUS,  "stack segment",         stack_segment)
 DO_ERROR(X86_TRAP_AC,     SIGBUS,  "alignment check",          alignment_check)
 
 #ifdef CONFIG_VMAP_STACK
-static void __noreturn handle_stack_overflow(const char *message,
-                                            struct pt_regs *regs,
-                                            unsigned long fault_address)
+__visible void __noreturn handle_stack_overflow(const char *message,
+                                               struct pt_regs *regs,
+                                               unsigned long fault_address)
 {
        printk(KERN_EMERG "BUG: stack guard page was hit at %p (stack is %p..%p)\n",
                 (void *)fault_address, current->stack,
index dc80230..0b92fce 100644 (file)
@@ -753,6 +753,38 @@ no_context(struct pt_regs *regs, unsigned long error_code,
                return;
        }
 
+#ifdef CONFIG_VMAP_STACK
+       /*
+        * Stack overflow?  During boot, we can fault near the initial
+        * stack in the direct map, but that's not an overflow -- check
+        * that we're in vmalloc space to avoid this.
+        */
+       if (is_vmalloc_addr((void *)address) &&
+           (((unsigned long)tsk->stack - 1 - address < PAGE_SIZE) ||
+            address - ((unsigned long)tsk->stack + THREAD_SIZE) < PAGE_SIZE)) {
+               register void *__sp asm("rsp");
+               unsigned long stack = this_cpu_read(orig_ist.ist[DOUBLEFAULT_STACK]) - sizeof(void *);
+               /*
+                * We're likely to be running with very little stack space
+                * left.  It's plausible that we'd hit this condition but
+                * double-fault even before we get this far, in which case
+                * we're fine: the double-fault handler will deal with it.
+                *
+                * We don't want to make it all the way into the oops code
+                * and then double-fault, though, because we're likely to
+                * break the console driver and lose most of the stack dump.
+                */
+               asm volatile ("movq %[stack], %%rsp\n\t"
+                             "call handle_stack_overflow\n\t"
+                             "1: jmp 1b"
+                             : "+r" (__sp)
+                             : "D" ("kernel stack overflow (page fault)"),
+                               "S" (regs), "d" (address),
+                               [stack] "rm" (stack));
+               unreachable();
+       }
+#endif
+
        /*
         * 32-bit:
         *