Merge branch 'smp-hotplug-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
[cascardo/linux.git] / arch / x86 / kernel / smpboot.c
index 2a6e84a..42a9362 100644 (file)
@@ -100,10 +100,11 @@ EXPORT_PER_CPU_SYMBOL(cpu_info);
 /* Logical package management. We might want to allocate that dynamically */
 static int *physical_to_logical_pkg __read_mostly;
 static unsigned long *physical_package_map __read_mostly;;
-static unsigned long *logical_package_map  __read_mostly;
 static unsigned int max_physical_pkg_id __read_mostly;
 unsigned int __max_logical_packages __read_mostly;
 EXPORT_SYMBOL(__max_logical_packages);
+static unsigned int logical_packages __read_mostly;
+static bool logical_packages_frozen __read_mostly;
 
 /* Maximum number of SMT threads on any online core */
 int __max_smt_threads __read_mostly;
@@ -277,14 +278,14 @@ int topology_update_package_map(unsigned int apicid, unsigned int cpu)
        if (test_and_set_bit(pkg, physical_package_map))
                goto found;
 
-       new = find_first_zero_bit(logical_package_map, __max_logical_packages);
-       if (new >= __max_logical_packages) {
+       if (logical_packages_frozen) {
                physical_to_logical_pkg[pkg] = -1;
-               pr_warn("APIC(%x) Package %u exceeds logical package map\n",
+               pr_warn("APIC(%x) Package %u exceeds logical package max\n",
                        apicid, pkg);
                return -ENOSPC;
        }
-       set_bit(new, logical_package_map);
+
+       new = logical_packages++;
        pr_info("APIC(%x) Converting physical %u to logical package %u\n",
                apicid, pkg, new);
        physical_to_logical_pkg[pkg] = new;
@@ -341,6 +342,7 @@ static void __init smp_init_package_map(void)
        }
 
        __max_logical_packages = DIV_ROUND_UP(total_cpus, ncpus);
+       logical_packages = 0;
 
        /*
         * Possibly larger than what we need as the number of apic ids per
@@ -352,10 +354,6 @@ static void __init smp_init_package_map(void)
        memset(physical_to_logical_pkg, 0xff, size);
        size = BITS_TO_LONGS(max_physical_pkg_id) * sizeof(unsigned long);
        physical_package_map = kzalloc(size, GFP_KERNEL);
-       size = BITS_TO_LONGS(__max_logical_packages) * sizeof(unsigned long);
-       logical_package_map = kzalloc(size, GFP_KERNEL);
-
-       pr_info("Max logical packages: %u\n", __max_logical_packages);
 
        for_each_present_cpu(cpu) {
                unsigned int apicid = apic->cpu_present_to_apicid(cpu);
@@ -369,6 +367,15 @@ static void __init smp_init_package_map(void)
                set_cpu_possible(cpu, false);
                set_cpu_present(cpu, false);
        }
+
+       if (logical_packages > __max_logical_packages) {
+               pr_warn("Detected more packages (%u), then computed by BIOS data (%u).\n",
+                       logical_packages, __max_logical_packages);
+               logical_packages_frozen = true;
+               __max_logical_packages  = logical_packages;
+       }
+
+       pr_info("Max logical packages: %u\n", __max_logical_packages);
 }
 
 void __init smp_store_boot_cpu_info(void)
@@ -464,7 +471,7 @@ static bool match_die(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
        return false;
 }
 
-static struct sched_domain_topology_level numa_inside_package_topology[] = {
+static struct sched_domain_topology_level x86_numa_in_package_topology[] = {
 #ifdef CONFIG_SCHED_SMT
        { cpu_smt_mask, cpu_smt_flags, SD_INIT_NAME(SMT) },
 #endif
@@ -473,22 +480,23 @@ static struct sched_domain_topology_level numa_inside_package_topology[] = {
 #endif
        { NULL, },
 };
+
+static struct sched_domain_topology_level x86_topology[] = {
+#ifdef CONFIG_SCHED_SMT
+       { cpu_smt_mask, cpu_smt_flags, SD_INIT_NAME(SMT) },
+#endif
+#ifdef CONFIG_SCHED_MC
+       { cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) },
+#endif
+       { cpu_cpu_mask, SD_INIT_NAME(DIE) },
+       { NULL, },
+};
+
 /*
- * set_sched_topology() sets the topology internal to a CPU.  The
- * NUMA topologies are layered on top of it to build the full
- * system topology.
- *
- * If NUMA nodes are observed to occur within a CPU package, this
- * function should be called.  It forces the sched domain code to
- * only use the SMT level for the CPU portion of the topology.
- * This essentially falls back to relying on NUMA information
- * from the SRAT table to describe the entire system topology
- * (except for hyperthreads).
+ * Set if a package/die has multiple NUMA nodes inside.
+ * AMD Magny-Cours and Intel Cluster-on-Die have this.
  */
-static void primarily_use_numa_for_topology(void)
-{
-       set_sched_topology(numa_inside_package_topology);
-}
+static bool x86_has_numa_in_package;
 
 void set_cpu_sibling_map(int cpu)
 {
@@ -551,7 +559,7 @@ void set_cpu_sibling_map(int cpu)
                                c->booted_cores = cpu_data(i).booted_cores;
                }
                if (match_die(c, o) && !topology_same_node(c, o))
-                       primarily_use_numa_for_topology();
+                       x86_has_numa_in_package = true;
        }
 
        threads = cpumask_weight(topology_sibling_cpumask(cpu));
@@ -683,7 +691,7 @@ wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip)
         * Give the other CPU some time to accept the IPI.
         */
        udelay(200);
-       if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) {
+       if (APIC_INTEGRATED(boot_cpu_apic_version)) {
                maxlvt = lapic_get_maxlvt();
                if (maxlvt > 3)                 /* Due to the Pentium erratum 3AP.  */
                        apic_write(APIC_ESR, 0);
@@ -710,7 +718,7 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
        /*
         * Be paranoid about clearing APIC errors.
         */
-       if (APIC_INTEGRATED(apic_version[phys_apicid])) {
+       if (APIC_INTEGRATED(boot_cpu_apic_version)) {
                if (maxlvt > 3)         /* Due to the Pentium erratum 3AP.  */
                        apic_write(APIC_ESR, 0);
                apic_read(APIC_ESR);
@@ -749,7 +757,7 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
         * Determine this based on the APIC version.
         * If we don't have an integrated APIC, don't send the STARTUP IPIs.
         */
-       if (APIC_INTEGRATED(apic_version[phys_apicid]))
+       if (APIC_INTEGRATED(boot_cpu_apic_version))
                num_starts = 2;
        else
                num_starts = 0;
@@ -935,7 +943,6 @@ void common_cpu_up(unsigned int cpu, struct task_struct *idle)
        per_cpu(cpu_current_top_of_stack, cpu) =
                (unsigned long)task_stack_page(idle) + THREAD_SIZE;
 #else
-       clear_tsk_thread_flag(idle, TIF_FORK);
        initial_gs = per_cpu_offset(cpu);
 #endif
 }
@@ -962,7 +969,7 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
 
        early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu);
        initial_code = (unsigned long)start_secondary;
-       stack_start  = idle->thread.sp;
+       initial_stack  = idle->thread.sp;
 
        /*
         * Enable the espfix hack for this CPU
@@ -987,7 +994,7 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
                /*
                 * Be paranoid about clearing APIC errors.
                */
-               if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) {
+               if (APIC_INTEGRATED(boot_cpu_apic_version)) {
                        apic_write(APIC_ESR, 0);
                        apic_read(APIC_ESR);
                }
@@ -1108,17 +1115,8 @@ int native_cpu_up(unsigned int cpu, struct task_struct *tidle)
 
        common_cpu_up(cpu, tidle);
 
-       /*
-        * We have to walk the irq descriptors to setup the vector
-        * space for the cpu which comes online.  Prevent irq
-        * alloc/free across the bringup.
-        */
-       irq_lock_sparse();
-
        err = do_boot_cpu(apicid, cpu, tidle);
-
        if (err) {
-               irq_unlock_sparse();
                pr_err("do_boot_cpu failed(%d) to wakeup CPU#%u\n", err, cpu);
                return -EIO;
        }
@@ -1136,8 +1134,6 @@ int native_cpu_up(unsigned int cpu, struct task_struct *tidle)
                touch_nmi_watchdog();
        }
 
-       irq_unlock_sparse();
-
        return 0;
 }
 
@@ -1242,7 +1238,7 @@ static int __init smp_sanity_check(unsigned max_cpus)
        /*
         * If we couldn't find a local APIC, then get out of here now!
         */
-       if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid]) &&
+       if (APIC_INTEGRATED(boot_cpu_apic_version) &&
            !boot_cpu_has(X86_FEATURE_APIC)) {
                if (!disable_apic) {
                        pr_err("BIOS bug, local APIC #%d not detected!...\n",
@@ -1297,6 +1293,16 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
                zalloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL);
                zalloc_cpumask_var(&per_cpu(cpu_llc_shared_map, i), GFP_KERNEL);
        }
+
+       /*
+        * Set 'default' x86 topology, this matches default_topology() in that
+        * it has NUMA nodes as a topology level. See also
+        * native_smp_cpus_done().
+        *
+        * Must be done before set_cpus_sibling_map() is ran.
+        */
+       set_sched_topology(x86_topology);
+
        set_cpu_sibling_map(0);
 
        switch (smp_sanity_check(max_cpus)) {
@@ -1316,14 +1322,13 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
                break;
        }
 
-       default_setup_apic_routing();
-
        if (read_apic_id() != boot_cpu_physical_apicid) {
                panic("Boot APIC ID in local APIC unexpected (%d vs %d)",
                     read_apic_id(), boot_cpu_physical_apicid);
                /* Or can we switch back to PIC here? */
        }
 
+       default_setup_apic_routing();
        cpu0_logical_apicid = apic_bsp_setup(false);
 
        pr_info("CPU%d: ", 0);
@@ -1363,6 +1368,9 @@ void __init native_smp_cpus_done(unsigned int max_cpus)
 {
        pr_debug("Boot done\n");
 
+       if (x86_has_numa_in_package)
+               set_sched_topology(x86_numa_in_package_topology);
+
        nmi_selftest();
        impress_friends();
        setup_ioapic_dest();