Merge branch 'x86-trampoline-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
[cascardo/linux.git] / arch / x86 / kernel / smpboot.c
index bc2cc44..6af1185 100644 (file)
@@ -62,7 +62,7 @@
 #include <asm/pgtable.h>
 #include <asm/tlbflush.h>
 #include <asm/mtrr.h>
-#include <asm/vmi.h>
+#include <asm/mwait.h>
 #include <asm/apic.h>
 #include <asm/setup.h>
 #include <asm/uv/uv.h>
@@ -299,23 +299,16 @@ notrace static void __cpuinit start_secondary(void *unused)
         * fragile that we want to limit the things done here to the
         * most necessary things.
         */
+       cpu_init();
+       preempt_disable();
+       smp_callin();
 
 #ifdef CONFIG_X86_32
-       /*
-        * Switch away from the trampoline page-table
-        *
-        * Do this before cpu_init() because it needs to access per-cpu
-        * data which may not be mapped in the trampoline page-table.
-        */
+       /* switch away from the initial page table */
        load_cr3(swapper_pg_dir);
        __flush_tlb_all();
 #endif
 
-       vmi_bringup();
-       cpu_init();
-       preempt_disable();
-       smp_callin();
-
        /* otherwise gcc will move up smp_processor_id before the cpu_init */
        barrier();
        /*
@@ -324,9 +317,9 @@ notrace static void __cpuinit start_secondary(void *unused)
        check_tsc_sync_target();
 
        if (nmi_watchdog == NMI_IO_APIC) {
-               legacy_pic->chip->mask(0);
+               legacy_pic->mask(0);
                enable_NMI_through_LVT0();
-               legacy_pic->chip->unmask(0);
+               legacy_pic->unmask(0);
        }
 
        /* This must be done before setting cpu_online_mask */
@@ -786,7 +779,6 @@ do_rest:
 #ifdef CONFIG_X86_32
        /* Stack for startup_32 can be just as for start_secondary onwards */
        irq_ctx_init(cpu);
-       initial_page_table = __pa(&trampoline_pg_dir);
 #else
        clear_tsk_thread_flag(c_idle.idle, TIF_FORK);
        initial_gs = per_cpu_offset(cpu);
@@ -935,7 +927,6 @@ int __cpuinit native_cpu_up(unsigned int cpu)
        per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
 
        err = do_boot_cpu(apicid, cpu);
-
        if (err) {
                pr_debug("do_boot_cpu failed %d\n", err);
                return -EIO;
@@ -1121,8 +1112,6 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
        }
        set_cpu_sibling_map(0);
 
-       enable_IR_x2apic();
-       default_setup_apic_routing();
 
        if (smp_sanity_check(max_cpus) < 0) {
                printk(KERN_INFO "SMP disabled\n");
@@ -1130,6 +1119,8 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
                goto out;
        }
 
+       default_setup_apic_routing();
+
        preempt_disable();
        if (read_apic_id() != boot_cpu_physical_apicid) {
                panic("Boot APIC ID in local APIC unexpected (%d vs %d)",
@@ -1395,11 +1386,88 @@ void play_dead_common(void)
        local_irq_disable();
 }
 
+/*
+ * We need to flush the caches before going to sleep, lest we have
+ * dirty data in our caches when we come back up.
+ */
+static inline void mwait_play_dead(void)
+{
+       unsigned int eax, ebx, ecx, edx;
+       unsigned int highest_cstate = 0;
+       unsigned int highest_subcstate = 0;
+       int i;
+       void *mwait_ptr;
+
+       if (!cpu_has(&current_cpu_data, X86_FEATURE_MWAIT))
+               return;
+       if (!cpu_has(&current_cpu_data, X86_FEATURE_CLFLSH))
+               return;
+       if (current_cpu_data.cpuid_level < CPUID_MWAIT_LEAF)
+               return;
+
+       eax = CPUID_MWAIT_LEAF;
+       ecx = 0;
+       native_cpuid(&eax, &ebx, &ecx, &edx);
+
+       /*
+        * eax will be 0 if EDX enumeration is not valid.
+        * Initialized below to cstate, sub_cstate value when EDX is valid.
+        */
+       if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED)) {
+               eax = 0;
+       } else {
+               edx >>= MWAIT_SUBSTATE_SIZE;
+               for (i = 0; i < 7 && edx; i++, edx >>= MWAIT_SUBSTATE_SIZE) {
+                       if (edx & MWAIT_SUBSTATE_MASK) {
+                               highest_cstate = i;
+                               highest_subcstate = edx & MWAIT_SUBSTATE_MASK;
+                       }
+               }
+               eax = (highest_cstate << MWAIT_SUBSTATE_SIZE) |
+                       (highest_subcstate - 1);
+       }
+
+       /*
+        * This should be a memory location in a cache line which is
+        * unlikely to be touched by other processors.  The actual
+        * content is immaterial as it is not actually modified in any way.
+        */
+       mwait_ptr = &current_thread_info()->flags;
+
+       wbinvd();
+
+       while (1) {
+               /*
+                * The CLFLUSH is a workaround for erratum AAI65 for
+                * the Xeon 7400 series.  It's not clear it is actually
+                * needed, but it should be harmless in either case.
+                * The WBINVD is insufficient due to the spurious-wakeup
+                * case where we return around the loop.
+                */
+               clflush(mwait_ptr);
+               __monitor(mwait_ptr, 0, 0);
+               mb();
+               __mwait(eax, 0);
+       }
+}
+
+static inline void hlt_play_dead(void)
+{
+       if (current_cpu_data.x86 >= 4)
+               wbinvd();
+
+       while (1) {
+               native_halt();
+       }
+}
+
 void native_play_dead(void)
 {
        play_dead_common();
        tboot_shutdown(TB_SHUTDOWN_WFS);
-       wbinvd_halt();
+
+       mwait_play_dead();      /* Only returns on failure */
+       hlt_play_dead();
 }
 
 #else /* ... !CONFIG_HOTPLUG_CPU */