percpu, x86: Fix percpu_xchg_op()
authorEric Dumazet <eric.dumazet@gmail.com>
Tue, 25 Jan 2011 16:31:54 +0000 (17:31 +0100)
committerIngo Molnar <mingo@elte.hu>
Wed, 26 Jan 2011 07:10:49 +0000 (08:10 +0100)
These recent percpu commits:

  2485b6464cf8: x86,percpu: Move out of place 64 bit ops into X86_64 section
  8270137a0d50: cpuops: Use cmpxchg for xchg to avoid lock semantics

Caused this 'perf top' crash:

 Kernel panic - not syncing: Fatal exception in interrupt
 Pid: 0, comm: swapper Tainted: G     D
 2.6.38-rc2-00181-gef71723 #413 Call Trace: <IRQ> [<ffffffff810465b5>]
    ? panic
    ? kmsg_dump
    ? kmsg_dump
    ? oops_end
    ? no_context
    ? __bad_area_nosemaphore
    ? perf_output_begin
    ? bad_area_nosemaphore
    ? do_page_fault
    ? __task_pid_nr_ns
    ? perf_event_tid
    ? __perf_event_header__init_id
    ? validate_chain
    ? perf_output_sample
    ? trace_hardirqs_off
    ? page_fault
    ? irq_work_run
    ? update_process_times
    ? tick_sched_timer
    ? tick_sched_timer
    ? __run_hrtimer
    ? hrtimer_interrupt
    ? account_system_vtime
    ? smp_apic_timer_interrupt
    ? apic_timer_interrupt
 ...

Looking at assembly code, I found:

list = this_cpu_xchg(irq_work_list, NULL);

gives this wrong code : (gcc-4.1.2 cross compiler)

ffffffff810bc45e:
mov    %gs:0xead0,%rax
cmpxchg %rax,%gs:0xead0
jne    ffffffff810bc45e <irq_work_run+0x3e>
test   %rax,%rax
je     ffffffff810bc4aa <irq_work_run+0x8a>

Tell gcc we dirty eax/rax register in percpu_xchg_op()

Compiler must use another register to store pxo_new__

We also dont need to reload percpu value after a jump,
since a 'failed' cmpxchg already updated eax/rax

Wrong generated code was :
xor     %rax,%rax   /* load 0 into %rax */
1: mov     %gs:0xead0,%rax
cmpxchg %rax,%gs:0xead0
jne     1b
test    %rax,%rax

After patch :

xor     %rdx,%rdx   /* load 0 into %rdx */
mov     %gs:0xead0,%rax
1: cmpxchg %rdx,%gs:0xead0
jne     1b:
test    %rax,%rax

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: Tejun Heo <tj@kernel.org>
LKML-Reference: <1295973114.3588.312.camel@edumazet-laptop>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
arch/x86/include/asm/percpu.h

index 3788f46..7e17295 100644 (file)
@@ -273,34 +273,34 @@ do {                                                                      \
        typeof(var) pxo_new__ = (nval);                                 \
        switch (sizeof(var)) {                                          \
        case 1:                                                         \
-               asm("\n1:mov "__percpu_arg(1)",%%al"                    \
-                   "\n\tcmpxchgb %2, "__percpu_arg(1)                  \
+               asm("\n\tmov "__percpu_arg(1)",%%al"                    \
+                   "\n1:\tcmpxchgb %2, "__percpu_arg(1)                \
                    "\n\tjnz 1b"                                        \
-                           : "=a" (pxo_ret__), "+m" (var)              \
+                           : "=&a" (pxo_ret__), "+m" (var)             \
                            : "q" (pxo_new__)                           \
                            : "memory");                                \
                break;                                                  \
        case 2:                                                         \
-               asm("\n1:mov "__percpu_arg(1)",%%ax"                    \
-                   "\n\tcmpxchgw %2, "__percpu_arg(1)                  \
+               asm("\n\tmov "__percpu_arg(1)",%%ax"                    \
+                   "\n1:\tcmpxchgw %2, "__percpu_arg(1)                \
                    "\n\tjnz 1b"                                        \
-                           : "=a" (pxo_ret__), "+m" (var)              \
+                           : "=&a" (pxo_ret__), "+m" (var)             \
                            : "r" (pxo_new__)                           \
                            : "memory");                                \
                break;                                                  \
        case 4:                                                         \
-               asm("\n1:mov "__percpu_arg(1)",%%eax"                   \
-                   "\n\tcmpxchgl %2, "__percpu_arg(1)                  \
+               asm("\n\tmov "__percpu_arg(1)",%%eax"                   \
+                   "\n1:\tcmpxchgl %2, "__percpu_arg(1)                \
                    "\n\tjnz 1b"                                        \
-                           : "=a" (pxo_ret__), "+m" (var)              \
+                           : "=&a" (pxo_ret__), "+m" (var)             \
                            : "r" (pxo_new__)                           \
                            : "memory");                                \
                break;                                                  \
        case 8:                                                         \
-               asm("\n1:mov "__percpu_arg(1)",%%rax"                   \
-                   "\n\tcmpxchgq %2, "__percpu_arg(1)                  \
+               asm("\n\tmov "__percpu_arg(1)",%%rax"                   \
+                   "\n1:\tcmpxchgq %2, "__percpu_arg(1)                \
                    "\n\tjnz 1b"                                        \
-                           : "=a" (pxo_ret__), "+m" (var)              \
+                           : "=&a" (pxo_ret__), "+m" (var)             \
                            : "r" (pxo_new__)                           \
                            : "memory");                                \
                break;                                                  \