Merge branch 'x86-trace-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git...
authorLinus Torvalds <torvalds@linux-foundation.org>
Thu, 14 Nov 2013 07:25:10 +0000 (16:25 +0900)
committerLinus Torvalds <torvalds@linux-foundation.org>
Thu, 14 Nov 2013 07:25:10 +0000 (16:25 +0900)
Pull x86/trace changes from Ingo Molnar:
 "This adds page fault tracepoints which have zero runtime cost in the
  disabled case via IDT trickery (no NOPs in the page fault hotpath)"

* 'x86-trace-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86, trace: Change user|kernel_page_fault to page_fault_user|kernel
  x86, trace: Add page fault tracepoints
  x86, trace: Delete __trace_alloc_intr_gate()
  x86, trace: Register exception handler to trace IDT
  x86, trace: Remove __alloc_intr_gate()

1  2 
arch/x86/kernel/entry_32.S
arch/x86/kernel/entry_64.S
arch/x86/kernel/traps.c
arch/x86/mm/fault.c

@@@ -362,9 -362,12 +362,9 @@@ END(ret_from_exception
  #ifdef CONFIG_PREEMPT
  ENTRY(resume_kernel)
        DISABLE_INTERRUPTS(CLBR_ANY)
 -      cmpl $0,TI_preempt_count(%ebp)  # non-zero preempt_count ?
 -      jnz restore_all
  need_resched:
 -      movl TI_flags(%ebp), %ecx       # need_resched set ?
 -      testb $_TIF_NEED_RESCHED, %cl
 -      jz restore_all
 +      cmpl $0,PER_CPU_VAR(__preempt_count)
 +      jnz restore_all
        testl $X86_EFLAGS_IF,PT_EFLAGS(%esp)    # interrupts off (exception path) ?
        jz restore_all
        call preempt_schedule_irq
@@@ -1244,6 -1247,16 +1244,16 @@@ return_to_handler
   */
        .pushsection .kprobes.text, "ax"
  
+ #ifdef CONFIG_TRACING
+ ENTRY(trace_page_fault)
+       RING0_EC_FRAME
+       ASM_CLAC
+       pushl_cfi $trace_do_page_fault
+       jmp error_code
+       CFI_ENDPROC
+ END(trace_page_fault)
+ #endif
  ENTRY(page_fault)
        RING0_EC_FRAME
        ASM_CLAC
@@@ -1103,8 -1103,10 +1103,8 @@@ retint_signal
        /* Returning to kernel space. Check if we need preemption */
        /* rcx:  threadinfo. interrupts off. */
  ENTRY(retint_kernel)
 -      cmpl $0,TI_preempt_count(%rcx)
 +      cmpl $0,PER_CPU_VAR(__preempt_count)
        jnz  retint_restore_args
 -      bt  $TIF_NEED_RESCHED,TI_flags(%rcx)
 -      jnc  retint_restore_args
        bt   $9,EFLAGS-ARGOFFSET(%rsp)  /* interrupts off? */
        jnc  retint_restore_args
        call preempt_schedule_irq
@@@ -1278,6 -1280,17 +1278,17 @@@ ENTRY(\sym
  END(\sym)
  .endm
  
+ #ifdef CONFIG_TRACING
+ .macro trace_errorentry sym do_sym
+ errorentry trace(\sym) trace(\do_sym)
+ errorentry \sym \do_sym
+ .endm
+ #else
+ .macro trace_errorentry sym do_sym
+ errorentry \sym \do_sym
+ .endm
+ #endif
        /* error code is on the stack already */
  .macro paranoiderrorentry sym do_sym
  ENTRY(\sym)
@@@ -1340,7 -1353,7 +1351,7 @@@ bad_gs
        .previous
  
  /* Call softirq on interrupt stack. Interrupts are off. */
 -ENTRY(call_softirq)
 +ENTRY(do_softirq_own_stack)
        CFI_STARTPROC
        pushq_cfi %rbp
        CFI_REL_OFFSET rbp,0
        decl PER_CPU_VAR(irq_count)
        ret
        CFI_ENDPROC
 -END(call_softirq)
 +END(do_softirq_own_stack)
  
  #ifdef CONFIG_XEN
  zeroentry xen_hypervisor_callback xen_do_hypervisor_callback
@@@ -1480,7 -1493,7 +1491,7 @@@ zeroentry xen_int3 do_int
  errorentry xen_stack_segment do_stack_segment
  #endif
  errorentry general_protection do_general_protection
- errorentry page_fault do_page_fault
trace_errorentry page_fault do_page_fault
  #ifdef CONFIG_KVM_GUEST
  errorentry async_page_fault do_async_page_fault
  #endif
diff --combined arch/x86/kernel/traps.c
@@@ -88,7 -88,7 +88,7 @@@ static inline void conditional_sti(stru
  
  static inline void preempt_conditional_sti(struct pt_regs *regs)
  {
 -      inc_preempt_count();
 +      preempt_count_inc();
        if (regs->flags & X86_EFLAGS_IF)
                local_irq_enable();
  }
@@@ -103,7 -103,7 +103,7 @@@ static inline void preempt_conditional_
  {
        if (regs->flags & X86_EFLAGS_IF)
                local_irq_disable();
 -      dec_preempt_count();
 +      preempt_count_dec();
  }
  
  static int __kprobes
@@@ -653,7 -653,7 +653,7 @@@ void math_state_restore(void
                return;
        }
  
 -      tsk->fpu_counter++;
 +      tsk->thread.fpu_counter++;
  }
  EXPORT_SYMBOL_GPL(math_state_restore);
  
@@@ -713,7 -713,7 +713,7 @@@ void __init early_trap_init(void
        /* int3 can be called from all */
        set_system_intr_gate_ist(X86_TRAP_BP, &int3, DEBUG_STACK);
  #ifdef CONFIG_X86_32
-       set_intr_gate(X86_TRAP_PF, &page_fault);
+       set_intr_gate(X86_TRAP_PF, page_fault);
  #endif
        load_idt(&idt_descr);
  }
  void __init early_trap_pf_init(void)
  {
  #ifdef CONFIG_X86_64
-       set_intr_gate(X86_TRAP_PF, &page_fault);
+       set_intr_gate(X86_TRAP_PF, page_fault);
  #endif
  }
  
@@@ -737,30 -737,30 +737,30 @@@ void __init trap_init(void
        early_iounmap(p, 4);
  #endif
  
-       set_intr_gate(X86_TRAP_DE, &divide_error);
+       set_intr_gate(X86_TRAP_DE, divide_error);
        set_intr_gate_ist(X86_TRAP_NMI, &nmi, NMI_STACK);
        /* int4 can be called from all */
        set_system_intr_gate(X86_TRAP_OF, &overflow);
-       set_intr_gate(X86_TRAP_BR, &bounds);
-       set_intr_gate(X86_TRAP_UD, &invalid_op);
-       set_intr_gate(X86_TRAP_NM, &device_not_available);
+       set_intr_gate(X86_TRAP_BR, bounds);
+       set_intr_gate(X86_TRAP_UD, invalid_op);
+       set_intr_gate(X86_TRAP_NM, device_not_available);
  #ifdef CONFIG_X86_32
        set_task_gate(X86_TRAP_DF, GDT_ENTRY_DOUBLEFAULT_TSS);
  #else
        set_intr_gate_ist(X86_TRAP_DF, &double_fault, DOUBLEFAULT_STACK);
  #endif
-       set_intr_gate(X86_TRAP_OLD_MF, &coprocessor_segment_overrun);
-       set_intr_gate(X86_TRAP_TS, &invalid_TSS);
-       set_intr_gate(X86_TRAP_NP, &segment_not_present);
+       set_intr_gate(X86_TRAP_OLD_MF, coprocessor_segment_overrun);
+       set_intr_gate(X86_TRAP_TS, invalid_TSS);
+       set_intr_gate(X86_TRAP_NP, segment_not_present);
        set_intr_gate_ist(X86_TRAP_SS, &stack_segment, STACKFAULT_STACK);
-       set_intr_gate(X86_TRAP_GP, &general_protection);
-       set_intr_gate(X86_TRAP_SPURIOUS, &spurious_interrupt_bug);
-       set_intr_gate(X86_TRAP_MF, &coprocessor_error);
-       set_intr_gate(X86_TRAP_AC, &alignment_check);
+       set_intr_gate(X86_TRAP_GP, general_protection);
+       set_intr_gate(X86_TRAP_SPURIOUS, spurious_interrupt_bug);
+       set_intr_gate(X86_TRAP_MF, coprocessor_error);
+       set_intr_gate(X86_TRAP_AC, alignment_check);
  #ifdef CONFIG_X86_MCE
        set_intr_gate_ist(X86_TRAP_MC, &machine_check, MCE_STACK);
  #endif
-       set_intr_gate(X86_TRAP_XF, &simd_coprocessor_error);
+       set_intr_gate(X86_TRAP_XF, simd_coprocessor_error);
  
        /* Reserve all the builtin and the syscall vector: */
        for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++)
diff --combined arch/x86/mm/fault.c
@@@ -20,6 -20,9 +20,9 @@@
  #include <asm/kmemcheck.h>            /* kmemcheck_*(), ...           */
  #include <asm/fixmap.h>                       /* VSYSCALL_START               */
  
+ #define CREATE_TRACE_POINTS
+ #include <asm/trace/exceptions.h>
  /*
   * Page fault error code bits:
   *
@@@ -51,7 -54,7 +54,7 @@@ kmmio_fault(struct pt_regs *regs, unsig
        return 0;
  }
  
 -static inline int __kprobes notify_page_fault(struct pt_regs *regs)
 +static inline int __kprobes kprobes_fault(struct pt_regs *regs)
  {
        int ret = 0;
  
@@@ -1048,7 -1051,7 +1051,7 @@@ __do_page_fault(struct pt_regs *regs, u
                        return;
  
                /* kprobes don't want to hook the spurious faults: */
 -              if (notify_page_fault(regs))
 +              if (kprobes_fault(regs))
                        return;
                /*
                 * Don't take the mm semaphore here. If we fixup a prefetch
        }
  
        /* kprobes don't want to hook the spurious faults: */
 -      if (unlikely(notify_page_fault(regs)))
 +      if (unlikely(kprobes_fault(regs)))
                return;
 -      /*
 -       * It's safe to allow irq's after cr2 has been saved and the
 -       * vmalloc fault has been handled.
 -       *
 -       * User-mode registers count as a user access even for any
 -       * potential system fault or CPU buglet:
 -       */
 -      if (user_mode_vm(regs)) {
 -              local_irq_enable();
 -              error_code |= PF_USER;
 -              flags |= FAULT_FLAG_USER;
 -      } else {
 -              if (regs->flags & X86_EFLAGS_IF)
 -                      local_irq_enable();
 -      }
  
        if (unlikely(error_code & PF_RSVD))
                pgtable_bad(regs, error_code, address);
                }
        }
  
 -      perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
 -
        /*
         * If we're in an interrupt, have no user context or are running
         * in an atomic region then we must not take the fault:
                return;
        }
  
 +      /*
 +       * It's safe to allow irq's after cr2 has been saved and the
 +       * vmalloc fault has been handled.
 +       *
 +       * User-mode registers count as a user access even for any
 +       * potential system fault or CPU buglet:
 +       */
 +      if (user_mode_vm(regs)) {
 +              local_irq_enable();
 +              error_code |= PF_USER;
 +              flags |= FAULT_FLAG_USER;
 +      } else {
 +              if (regs->flags & X86_EFLAGS_IF)
 +                      local_irq_enable();
 +      }
 +
 +      perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
 +
        if (error_code & PF_WRITE)
                flags |= FAULT_FLAG_WRITE;
  
@@@ -1232,3 -1234,23 +1235,23 @@@ do_page_fault(struct pt_regs *regs, uns
        __do_page_fault(regs, error_code);
        exception_exit(prev_state);
  }
+ static void trace_page_fault_entries(struct pt_regs *regs,
+                                    unsigned long error_code)
+ {
+       if (user_mode(regs))
+               trace_page_fault_user(read_cr2(), regs, error_code);
+       else
+               trace_page_fault_kernel(read_cr2(), regs, error_code);
+ }
+ dotraplinkage void __kprobes
+ trace_do_page_fault(struct pt_regs *regs, unsigned long error_code)
+ {
+       enum ctx_state prev_state;
+       prev_state = exception_enter();
+       trace_page_fault_entries(regs, error_code);
+       __do_page_fault(regs, error_code);
+       exception_exit(prev_state);
+ }