Merge branch 'x86-headers-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
authorLinus Torvalds <torvalds@linux-foundation.org>
Mon, 1 Aug 2016 18:23:42 +0000 (14:23 -0400)
committerLinus Torvalds <torvalds@linux-foundation.org>
Mon, 1 Aug 2016 18:23:42 +0000 (14:23 -0400)
Pull x86 header cleanups from Ingo Molnar:
 "This tree is a cleanup of the x86 tree reducing spurious uses of
  module.h - which should improve build performance a bit"

* 'x86-headers-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86, crypto: Restore MODULE_LICENSE() to glue_helper.c so it loads
  x86/apic: Remove duplicated include from probe_64.c
  x86/ce4100: Remove duplicated include from ce4100.c
  x86/headers: Include spinlock_types.h in x8664_ksyms_64.c for missing spinlock_t
  x86/platform: Delete extraneous MODULE_* tags fromm ts5500
  x86: Audit and remove any remaining unnecessary uses of module.h
  x86/kvm: Audit and remove any unnecessary uses of module.h
  x86/xen: Audit and remove any unnecessary uses of module.h
  x86/platform: Audit and remove any unnecessary uses of module.h
  x86/lib: Audit and remove any unnecessary uses of module.h
  x86/kernel: Audit and remove any unnecessary uses of module.h
  x86/mm: Audit and remove any unnecessary uses of module.h
  x86: Don't use module.h just for AUTHOR / LICENSE tags

38 files changed:
1  2 
arch/x86/events/amd/ibs.c
arch/x86/events/core.c
arch/x86/events/intel/uncore.c
arch/x86/events/intel/uncore.h
arch/x86/include/asm/topology.h
arch/x86/kernel/acpi/boot.c
arch/x86/kernel/amd_nb.c
arch/x86/kernel/apic/apic.c
arch/x86/kernel/apic/apic_flat_64.c
arch/x86/kernel/apic/apic_noop.c
arch/x86/kernel/apic/io_apic.c
arch/x86/kernel/apic/probe_32.c
arch/x86/kernel/apic/x2apic_uv_x.c
arch/x86/kernel/cpu/common.c
arch/x86/kernel/cpu/intel.c
arch/x86/kernel/dumpstack_32.c
arch/x86/kernel/dumpstack_64.c
arch/x86/kernel/i386_ksyms_32.c
arch/x86/kernel/kvm.c
arch/x86/kernel/process.c
arch/x86/kernel/reboot.c
arch/x86/kernel/setup.c
arch/x86/kernel/smpboot.c
arch/x86/kernel/tsc.c
arch/x86/kernel/x8664_ksyms_64.c
arch/x86/kernel/x86_init.c
arch/x86/kvm/cpuid.c
arch/x86/kvm/x86.c
arch/x86/lib/csum-wrappers_64.c
arch/x86/lib/usercopy_64.c
arch/x86/mm/dump_pagetables.c
arch/x86/mm/init_64.c
arch/x86/mm/numa.c
arch/x86/mm/pat.c
arch/x86/mm/pgtable_32.c
arch/x86/mm/srat.c
arch/x86/platform/efi/efi_64.c
arch/x86/xen/enlighten.c

@@@ -7,7 -7,8 +7,8 @@@
   */
  
  #include <linux/perf_event.h>
- #include <linux/module.h>
+ #include <linux/init.h>
+ #include <linux/export.h>
  #include <linux/pci.h>
  #include <linux/ptrace.h>
  #include <linux/syscore_ops.h>
@@@ -655,12 -656,8 +656,12 @@@ fail
        }
  
        if (event->attr.sample_type & PERF_SAMPLE_RAW) {
 -              raw.size = sizeof(u32) + ibs_data.size;
 -              raw.data = ibs_data.data;
 +              raw = (struct perf_raw_record){
 +                      .frag = {
 +                              .size = sizeof(u32) + ibs_data.size,
 +                              .data = ibs_data.data,
 +                      },
 +              };
                data.raw = &raw;
        }
  
@@@ -725,10 -722,13 +726,10 @@@ static __init int perf_ibs_pmu_init(str
        return ret;
  }
  
 -static __init int perf_event_ibs_init(void)
 +static __init void perf_event_ibs_init(void)
  {
        struct attribute **attr = ibs_op_format_attrs;
  
 -      if (!ibs_caps)
 -              return -ENODEV; /* ibs not supported by the cpu */
 -
        perf_ibs_pmu_init(&perf_ibs_fetch, "ibs_fetch");
  
        if (ibs_caps & IBS_CAPS_OPCNT) {
  
        register_nmi_handler(NMI_LOCAL, perf_ibs_nmi_handler, 0, "perf_ibs");
        pr_info("perf: AMD IBS detected (0x%08x)\n", ibs_caps);
 -
 -      return 0;
  }
  
  #else /* defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD) */
  
 -static __init int perf_event_ibs_init(void) { return 0; }
 +static __init void perf_event_ibs_init(void) { }
  
  #endif
  
@@@ -920,7 -922,7 +921,7 @@@ static inline int get_ibs_lvt_offset(vo
        return val & IBSCTL_LVT_OFFSET_MASK;
  }
  
 -static void setup_APIC_ibs(void *dummy)
 +static void setup_APIC_ibs(void)
  {
        int offset;
  
@@@ -935,7 -937,7 +936,7 @@@ failed
                smp_processor_id());
  }
  
 -static void clear_APIC_ibs(void *dummy)
 +static void clear_APIC_ibs(void)
  {
        int offset;
  
                setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_FIX, 1);
  }
  
 +static int x86_pmu_amd_ibs_starting_cpu(unsigned int cpu)
 +{
 +      setup_APIC_ibs();
 +      return 0;
 +}
 +
  #ifdef CONFIG_PM
  
  static int perf_ibs_suspend(void)
  {
 -      clear_APIC_ibs(NULL);
 +      clear_APIC_ibs();
        return 0;
  }
  
  static void perf_ibs_resume(void)
  {
        ibs_eilvt_setup();
 -      setup_APIC_ibs(NULL);
 +      setup_APIC_ibs();
  }
  
  static struct syscore_ops perf_ibs_syscore_ops = {
@@@ -980,15 -976,27 +981,15 @@@ static inline void perf_ibs_pm_init(voi
  
  #endif
  
 -static int
 -perf_ibs_cpu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
 +static int x86_pmu_amd_ibs_dying_cpu(unsigned int cpu)
  {
 -      switch (action & ~CPU_TASKS_FROZEN) {
 -      case CPU_STARTING:
 -              setup_APIC_ibs(NULL);
 -              break;
 -      case CPU_DYING:
 -              clear_APIC_ibs(NULL);
 -              break;
 -      default:
 -              break;
 -      }
 -
 -      return NOTIFY_OK;
 +      clear_APIC_ibs();
 +      return 0;
  }
  
  static __init int amd_ibs_init(void)
  {
        u32 caps;
 -      int ret = -EINVAL;
  
        caps = __get_ibs_caps();
        if (!caps)
        ibs_eilvt_setup();
  
        if (!ibs_eilvt_valid())
 -              goto out;
 +              return -EINVAL;
  
        perf_ibs_pm_init();
 -      cpu_notifier_register_begin();
 +
        ibs_caps = caps;
        /* make ibs_caps visible to other cpus: */
        smp_mb();
 -      smp_call_function(setup_APIC_ibs, NULL, 1);
 -      __perf_cpu_notifier(perf_ibs_cpu_notifier);
 -      cpu_notifier_register_done();
 +      /*
 +       * x86_pmu_amd_ibs_starting_cpu will be called from core on
 +       * all online cpus.
 +       */
 +      cpuhp_setup_state(CPUHP_AP_PERF_X86_AMD_IBS_STARTING,
 +                        "AP_PERF_X86_AMD_IBS_STARTING",
 +                        x86_pmu_amd_ibs_starting_cpu,
 +                        x86_pmu_amd_ibs_dying_cpu);
  
 -      ret = perf_event_ibs_init();
 -out:
 -      if (ret)
 -              pr_err("Failed to setup IBS, %d\n", ret);
 -      return ret;
 +      perf_event_ibs_init();
 +
 +      return 0;
  }
  
  /* Since we need the pci subsystem to init ibs we can't do this earlier: */
diff --combined arch/x86/events/core.c
@@@ -17,7 -17,8 +17,8 @@@
  #include <linux/notifier.h>
  #include <linux/hardirq.h>
  #include <linux/kprobes.h>
- #include <linux/module.h>
+ #include <linux/export.h>
+ #include <linux/init.h>
  #include <linux/kdebug.h>
  #include <linux/sched.h>
  #include <linux/uaccess.h>
@@@ -263,7 -264,7 +264,7 @@@ static bool check_hw_exists(void
  
  msr_fail:
        pr_cont("Broken PMU hardware detected, using software events only.\n");
 -      pr_info("%sFailed to access perfctr msr (MSR %x is %Lx)\n",
 +      printk("%sFailed to access perfctr msr (MSR %x is %Lx)\n",
                boot_cpu_has(X86_FEATURE_HYPERVISOR) ? KERN_INFO : KERN_ERR,
                reg, val_new);
  
@@@ -1477,49 -1478,49 +1478,49 @@@ NOKPROBE_SYMBOL(perf_event_nmi_handler)
  struct event_constraint emptyconstraint;
  struct event_constraint unconstrained;
  
 -static int
 -x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
 +static int x86_pmu_prepare_cpu(unsigned int cpu)
  {
 -      unsigned int cpu = (long)hcpu;
        struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
 -      int i, ret = NOTIFY_OK;
 -
 -      switch (action & ~CPU_TASKS_FROZEN) {
 -      case CPU_UP_PREPARE:
 -              for (i = 0 ; i < X86_PERF_KFREE_MAX; i++)
 -                      cpuc->kfree_on_online[i] = NULL;
 -              if (x86_pmu.cpu_prepare)
 -                      ret = x86_pmu.cpu_prepare(cpu);
 -              break;
 -
 -      case CPU_STARTING:
 -              if (x86_pmu.cpu_starting)
 -                      x86_pmu.cpu_starting(cpu);
 -              break;
 +      int i;
  
 -      case CPU_ONLINE:
 -              for (i = 0 ; i < X86_PERF_KFREE_MAX; i++) {
 -                      kfree(cpuc->kfree_on_online[i]);
 -                      cpuc->kfree_on_online[i] = NULL;
 -              }
 -              break;
 +      for (i = 0 ; i < X86_PERF_KFREE_MAX; i++)
 +              cpuc->kfree_on_online[i] = NULL;
 +      if (x86_pmu.cpu_prepare)
 +              return x86_pmu.cpu_prepare(cpu);
 +      return 0;
 +}
  
 -      case CPU_DYING:
 -              if (x86_pmu.cpu_dying)
 -                      x86_pmu.cpu_dying(cpu);
 -              break;
 +static int x86_pmu_dead_cpu(unsigned int cpu)
 +{
 +      if (x86_pmu.cpu_dead)
 +              x86_pmu.cpu_dead(cpu);
 +      return 0;
 +}
  
 -      case CPU_UP_CANCELED:
 -      case CPU_DEAD:
 -              if (x86_pmu.cpu_dead)
 -                      x86_pmu.cpu_dead(cpu);
 -              break;
 +static int x86_pmu_online_cpu(unsigned int cpu)
 +{
 +      struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
 +      int i;
  
 -      default:
 -              break;
 +      for (i = 0 ; i < X86_PERF_KFREE_MAX; i++) {
 +              kfree(cpuc->kfree_on_online[i]);
 +              cpuc->kfree_on_online[i] = NULL;
        }
 +      return 0;
 +}
  
 -      return ret;
 +static int x86_pmu_starting_cpu(unsigned int cpu)
 +{
 +      if (x86_pmu.cpu_starting)
 +              x86_pmu.cpu_starting(cpu);
 +      return 0;
 +}
 +
 +static int x86_pmu_dying_cpu(unsigned int cpu)
 +{
 +      if (x86_pmu.cpu_dying)
 +              x86_pmu.cpu_dying(cpu);
 +      return 0;
  }
  
  static void __init pmu_check_apic(void)
@@@ -1622,29 -1623,6 +1623,29 @@@ ssize_t events_sysfs_show(struct devic
  }
  EXPORT_SYMBOL_GPL(events_sysfs_show);
  
 +ssize_t events_ht_sysfs_show(struct device *dev, struct device_attribute *attr,
 +                        char *page)
 +{
 +      struct perf_pmu_events_ht_attr *pmu_attr =
 +              container_of(attr, struct perf_pmu_events_ht_attr, attr);
 +
 +      /*
 +       * Report conditional events depending on Hyper-Threading.
 +       *
 +       * This is overly conservative as usually the HT special
 +       * handling is not needed if the other CPU thread is idle.
 +       *
 +       * Note this does not (and cannot) handle the case when thread
 +       * siblings are invisible, for example with virtualization
 +       * if they are owned by some other guest.  The user tool
 +       * has to re-read when a thread sibling gets onlined later.
 +       */
 +      return sprintf(page, "%s",
 +                      topology_max_smt_threads() > 1 ?
 +                      pmu_attr->event_str_ht :
 +                      pmu_attr->event_str_noht);
 +}
 +
  EVENT_ATTR(cpu-cycles,                        CPU_CYCLES              );
  EVENT_ATTR(instructions,              INSTRUCTIONS            );
  EVENT_ATTR(cache-references,          CACHE_REFERENCES        );
@@@ -1787,39 -1765,10 +1788,39 @@@ static int __init init_hw_perf_events(v
        pr_info("... fixed-purpose events:   %d\n",     x86_pmu.num_counters_fixed);
        pr_info("... event mask:             %016Lx\n", x86_pmu.intel_ctrl);
  
 -      perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW);
 -      perf_cpu_notifier(x86_pmu_notifier);
 +      /*
 +       * Install callbacks. Core will call them for each online
 +       * cpu.
 +       */
 +      err = cpuhp_setup_state(CPUHP_PERF_X86_PREPARE, "PERF_X86_PREPARE",
 +                              x86_pmu_prepare_cpu, x86_pmu_dead_cpu);
 +      if (err)
 +              return err;
 +
 +      err = cpuhp_setup_state(CPUHP_AP_PERF_X86_STARTING,
 +                              "AP_PERF_X86_STARTING", x86_pmu_starting_cpu,
 +                              x86_pmu_dying_cpu);
 +      if (err)
 +              goto out;
 +
 +      err = cpuhp_setup_state(CPUHP_AP_PERF_X86_ONLINE, "AP_PERF_X86_ONLINE",
 +                              x86_pmu_online_cpu, NULL);
 +      if (err)
 +              goto out1;
 +
 +      err = perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW);
 +      if (err)
 +              goto out2;
  
        return 0;
 +
 +out2:
 +      cpuhp_remove_state(CPUHP_AP_PERF_X86_ONLINE);
 +out1:
 +      cpuhp_remove_state(CPUHP_AP_PERF_X86_STARTING);
 +out:
 +      cpuhp_remove_state(CPUHP_PERF_X86_PREPARE);
 +      return err;
  }
  early_initcall(init_hw_perf_events);
  
@@@ -1,5 -1,6 +1,7 @@@
+ #include <linux/module.h>
  #include <asm/cpu_device_id.h>
 +#include <asm/intel-family.h>
  #include "uncore.h"
  
  static struct intel_uncore_type *empty_uncore[] = { NULL, };
@@@ -883,7 -884,7 +885,7 @@@ uncore_types_init(struct intel_uncore_t
  static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
  {
        struct intel_uncore_type *type;
 -      struct intel_uncore_pmu *pmu;
 +      struct intel_uncore_pmu *pmu = NULL;
        struct intel_uncore_box *box;
        int phys_id, pkg, ret;
  
        }
  
        type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(id->driver_data)];
 +
        /*
 -       * for performance monitoring unit with multiple boxes,
 -       * each box has a different function id.
 +       * Some platforms, e.g.  Knights Landing, use a common PCI device ID
 +       * for multiple instances of an uncore PMU device type. We should check
 +       * PCI slot and func to indicate the uncore box.
         */
 -      pmu = &type->pmus[UNCORE_PCI_DEV_IDX(id->driver_data)];
 -      /* Knights Landing uses a common PCI device ID for multiple instances of
 -       * an uncore PMU device type. There is only one entry per device type in
 -       * the knl_uncore_pci_ids table inspite of multiple devices present for
 -       * some device types. Hence PCI device idx would be 0 for all devices.
 -       * So increment pmu pointer to point to an unused array element.
 -       */
 -      if (boot_cpu_data.x86_model == 87) {
 -              while (pmu->func_id >= 0)
 -                      pmu++;
 +      if (id->driver_data & ~0xffff) {
 +              struct pci_driver *pci_drv = pdev->driver;
 +              const struct pci_device_id *ids = pci_drv->id_table;
 +              unsigned int devfn;
 +
 +              while (ids && ids->vendor) {
 +                      if ((ids->vendor == pdev->vendor) &&
 +                          (ids->device == pdev->device)) {
 +                              devfn = PCI_DEVFN(UNCORE_PCI_DEV_DEV(ids->driver_data),
 +                                                UNCORE_PCI_DEV_FUNC(ids->driver_data));
 +                              if (devfn == pdev->devfn) {
 +                                      pmu = &type->pmus[UNCORE_PCI_DEV_IDX(ids->driver_data)];
 +                                      break;
 +                              }
 +                      }
 +                      ids++;
 +              }
 +              if (pmu == NULL)
 +                      return -ENODEV;
 +      } else {
 +              /*
 +               * for performance monitoring unit with multiple boxes,
 +               * each box has a different function id.
 +               */
 +              pmu = &type->pmus[UNCORE_PCI_DEV_IDX(id->driver_data)];
        }
  
        if (WARN_ON_ONCE(pmu->boxes[pkg] != NULL))
  
  static void uncore_pci_remove(struct pci_dev *pdev)
  {
 -      struct intel_uncore_box *box = pci_get_drvdata(pdev);
 +      struct intel_uncore_box *box;
        struct intel_uncore_pmu *pmu;
        int i, phys_id, pkg;
  
@@@ -1052,7 -1036,7 +1054,7 @@@ static void uncore_pci_exit(void
        }
  }
  
 -static void uncore_cpu_dying(int cpu)
 +static int uncore_cpu_dying(unsigned int cpu)
  {
        struct intel_uncore_type *type, **types = uncore_msr_uncores;
        struct intel_uncore_pmu *pmu;
                                uncore_box_exit(box);
                }
        }
 +      return 0;
  }
  
 -static void uncore_cpu_starting(int cpu, bool init)
 +static int first_init;
 +
 +static int uncore_cpu_starting(unsigned int cpu)
  {
        struct intel_uncore_type *type, **types = uncore_msr_uncores;
        struct intel_uncore_pmu *pmu;
        struct intel_uncore_box *box;
        int i, pkg, ncpus = 1;
  
 -      if (init) {
 +      if (first_init) {
                /*
                 * On init we get the number of online cpus in the package
                 * and set refcount for all of them.
                                uncore_box_init(box);
                }
        }
 +
 +      return 0;
  }
  
 -static int uncore_cpu_prepare(int cpu)
 +static int uncore_cpu_prepare(unsigned int cpu)
  {
        struct intel_uncore_type *type, **types = uncore_msr_uncores;
        struct intel_uncore_pmu *pmu;
@@@ -1169,13 -1148,13 +1171,13 @@@ static void uncore_change_context(struc
                uncore_change_type_ctx(*uncores, old_cpu, new_cpu);
  }
  
 -static void uncore_event_exit_cpu(int cpu)
 +static int uncore_event_cpu_offline(unsigned int cpu)
  {
        int target;
  
        /* Check if exiting cpu is used for collecting uncore events */
        if (!cpumask_test_and_clear_cpu(cpu, &uncore_cpu_mask))
 -              return;
 +              return 0;
  
        /* Find a new cpu to collect uncore events */
        target = cpumask_any_but(topology_core_cpumask(cpu), cpu);
  
        uncore_change_context(uncore_msr_uncores, cpu, target);
        uncore_change_context(uncore_pci_uncores, cpu, target);
 +      return 0;
  }
  
 -static void uncore_event_init_cpu(int cpu)
 +static int uncore_event_cpu_online(unsigned int cpu)
  {
        int target;
  
         */
        target = cpumask_any_and(&uncore_cpu_mask, topology_core_cpumask(cpu));
        if (target < nr_cpu_ids)
 -              return;
 +              return 0;
  
        cpumask_set_cpu(cpu, &uncore_cpu_mask);
  
        uncore_change_context(uncore_msr_uncores, -1, cpu);
        uncore_change_context(uncore_pci_uncores, -1, cpu);
 +      return 0;
  }
  
 -static int uncore_cpu_notifier(struct notifier_block *self,
 -                             unsigned long action, void *hcpu)
 -{
 -      unsigned int cpu = (long)hcpu;
 -
 -      switch (action & ~CPU_TASKS_FROZEN) {
 -      case CPU_UP_PREPARE:
 -              return notifier_from_errno(uncore_cpu_prepare(cpu));
 -
 -      case CPU_STARTING:
 -              uncore_cpu_starting(cpu, false);
 -      case CPU_DOWN_FAILED:
 -              uncore_event_init_cpu(cpu);
 -              break;
 -
 -      case CPU_UP_CANCELED:
 -      case CPU_DYING:
 -              uncore_cpu_dying(cpu);
 -              break;
 -
 -      case CPU_DOWN_PREPARE:
 -              uncore_event_exit_cpu(cpu);
 -              break;
 -      }
 -      return NOTIFY_OK;
 -}
 -
 -static struct notifier_block uncore_cpu_nb = {
 -      .notifier_call  = uncore_cpu_notifier,
 -      /*
 -       * to migrate uncore events, our notifier should be executed
 -       * before perf core's notifier.
 -       */
 -      .priority       = CPU_PRI_PERF + 1,
 -};
 -
  static int __init type_pmu_register(struct intel_uncore_type *type)
  {
        int i, ret;
@@@ -1253,6 -1266,41 +1255,6 @@@ err
        return ret;
  }
  
 -static void __init uncore_cpu_setup(void *dummy)
 -{
 -      uncore_cpu_starting(smp_processor_id(), true);
 -}
 -
 -/* Lazy to avoid allocation of a few bytes for the normal case */
 -static __initdata DECLARE_BITMAP(packages, MAX_LOCAL_APIC);
 -
 -static int __init uncore_cpumask_init(bool msr)
 -{
 -      unsigned int cpu;
 -
 -      for_each_online_cpu(cpu) {
 -              unsigned int pkg = topology_logical_package_id(cpu);
 -              int ret;
 -
 -              if (test_and_set_bit(pkg, packages))
 -                      continue;
 -              /*
 -               * The first online cpu of each package allocates and takes
 -               * the refcounts for all other online cpus in that package.
 -               * If msrs are not enabled no allocation is required.
 -               */
 -              if (msr) {
 -                      ret = uncore_cpu_prepare(cpu);
 -                      if (ret)
 -                              return ret;
 -              }
 -              uncore_event_init_cpu(cpu);
 -              smp_call_function_single(cpu, uncore_cpu_setup, NULL, 1);
 -      }
 -      __register_cpu_notifier(&uncore_cpu_nb);
 -      return 0;
 -}
 -
  #define X86_UNCORE_MODEL_MATCH(model, init)   \
        { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long)&init }
  
@@@ -1315,32 -1363,30 +1317,32 @@@ static const struct intel_uncore_init_f
  };
  
  static const struct intel_uncore_init_fun skl_uncore_init __initconst = {
 +      .cpu_init = skl_uncore_cpu_init,
        .pci_init = skl_uncore_pci_init,
  };
  
  static const struct x86_cpu_id intel_uncore_match[] __initconst = {
 -      X86_UNCORE_MODEL_MATCH(26, nhm_uncore_init),    /* Nehalem */
 -      X86_UNCORE_MODEL_MATCH(30, nhm_uncore_init),
 -      X86_UNCORE_MODEL_MATCH(37, nhm_uncore_init),    /* Westmere */
 -      X86_UNCORE_MODEL_MATCH(44, nhm_uncore_init),
 -      X86_UNCORE_MODEL_MATCH(42, snb_uncore_init),    /* Sandy Bridge */
 -      X86_UNCORE_MODEL_MATCH(58, ivb_uncore_init),    /* Ivy Bridge */
 -      X86_UNCORE_MODEL_MATCH(60, hsw_uncore_init),    /* Haswell */
 -      X86_UNCORE_MODEL_MATCH(69, hsw_uncore_init),    /* Haswell Celeron */
 -      X86_UNCORE_MODEL_MATCH(70, hsw_uncore_init),    /* Haswell */
 -      X86_UNCORE_MODEL_MATCH(61, bdw_uncore_init),    /* Broadwell */
 -      X86_UNCORE_MODEL_MATCH(71, bdw_uncore_init),    /* Broadwell */
 -      X86_UNCORE_MODEL_MATCH(45, snbep_uncore_init),  /* Sandy Bridge-EP */
 -      X86_UNCORE_MODEL_MATCH(46, nhmex_uncore_init),  /* Nehalem-EX */
 -      X86_UNCORE_MODEL_MATCH(47, nhmex_uncore_init),  /* Westmere-EX aka. Xeon E7 */
 -      X86_UNCORE_MODEL_MATCH(62, ivbep_uncore_init),  /* Ivy Bridge-EP */
 -      X86_UNCORE_MODEL_MATCH(63, hswep_uncore_init),  /* Haswell-EP */
 -      X86_UNCORE_MODEL_MATCH(79, bdx_uncore_init),    /* BDX-EP */
 -      X86_UNCORE_MODEL_MATCH(86, bdx_uncore_init),    /* BDX-DE */
 -      X86_UNCORE_MODEL_MATCH(87, knl_uncore_init),    /* Knights Landing */
 -      X86_UNCORE_MODEL_MATCH(94, skl_uncore_init),    /* SkyLake */
 +      X86_UNCORE_MODEL_MATCH(INTEL_FAM6_NEHALEM_EP,     nhm_uncore_init),
 +      X86_UNCORE_MODEL_MATCH(INTEL_FAM6_NEHALEM,        nhm_uncore_init),
 +      X86_UNCORE_MODEL_MATCH(INTEL_FAM6_WESTMERE,       nhm_uncore_init),
 +      X86_UNCORE_MODEL_MATCH(INTEL_FAM6_WESTMERE_EP,    nhm_uncore_init),
 +      X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE,    snb_uncore_init),
 +      X86_UNCORE_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE,      ivb_uncore_init),
 +      X86_UNCORE_MODEL_MATCH(INTEL_FAM6_HASWELL_CORE,   hsw_uncore_init),
 +      X86_UNCORE_MODEL_MATCH(INTEL_FAM6_HASWELL_ULT,    hsw_uncore_init),
 +      X86_UNCORE_MODEL_MATCH(INTEL_FAM6_HASWELL_GT3E,   hsw_uncore_init),
 +      X86_UNCORE_MODEL_MATCH(INTEL_FAM6_BROADWELL_CORE, bdw_uncore_init),
 +      X86_UNCORE_MODEL_MATCH(INTEL_FAM6_BROADWELL_GT3E, bdw_uncore_init),
 +      X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE_X,  snbep_uncore_init),
 +      X86_UNCORE_MODEL_MATCH(INTEL_FAM6_NEHALEM_EX,     nhmex_uncore_init),
 +      X86_UNCORE_MODEL_MATCH(INTEL_FAM6_WESTMERE_EX,    nhmex_uncore_init),
 +      X86_UNCORE_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE_X,    ivbep_uncore_init),
 +      X86_UNCORE_MODEL_MATCH(INTEL_FAM6_HASWELL_X,      hswep_uncore_init),
 +      X86_UNCORE_MODEL_MATCH(INTEL_FAM6_BROADWELL_X,    bdx_uncore_init),
 +      X86_UNCORE_MODEL_MATCH(INTEL_FAM6_BROADWELL_XEON_D, bdx_uncore_init),
 +      X86_UNCORE_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNL,   knl_uncore_init),
 +      X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SKYLAKE_DESKTOP,skl_uncore_init),
 +      X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SKYLAKE_MOBILE, skl_uncore_init),
        {},
  };
  
@@@ -1376,33 -1422,11 +1378,33 @@@ static int __init intel_uncore_init(voi
        if (cret && pret)
                return -ENODEV;
  
 -      cpu_notifier_register_begin();
 -      ret = uncore_cpumask_init(!cret);
 -      if (ret)
 -              goto err;
 -      cpu_notifier_register_done();
 +      /*
 +       * Install callbacks. Core will call them for each online cpu.
 +       *
 +       * The first online cpu of each package allocates and takes
 +       * the refcounts for all other online cpus in that package.
 +       * If msrs are not enabled no allocation is required and
 +       * uncore_cpu_prepare() is not called for each online cpu.
 +       */
 +      if (!cret) {
 +             ret = cpuhp_setup_state(CPUHP_PERF_X86_UNCORE_PREP,
 +                                      "PERF_X86_UNCORE_PREP",
 +                                      uncore_cpu_prepare, NULL);
 +              if (ret)
 +                      goto err;
 +      } else {
 +              cpuhp_setup_state_nocalls(CPUHP_PERF_X86_UNCORE_PREP,
 +                                        "PERF_X86_UNCORE_PREP",
 +                                        uncore_cpu_prepare, NULL);
 +      }
 +      first_init = 1;
 +      cpuhp_setup_state(CPUHP_AP_PERF_X86_UNCORE_STARTING,
 +                        "AP_PERF_X86_UNCORE_STARTING",
 +                        uncore_cpu_starting, uncore_cpu_dying);
 +      first_init = 0;
 +      cpuhp_setup_state(CPUHP_AP_PERF_X86_UNCORE_ONLINE,
 +                        "AP_PERF_X86_UNCORE_ONLINE",
 +                        uncore_event_cpu_online, uncore_event_cpu_offline);
        return 0;
  
  err:
        on_each_cpu_mask(&uncore_cpu_mask, uncore_exit_boxes, NULL, 1);
        uncore_types_exit(uncore_msr_uncores);
        uncore_pci_exit();
 -      cpu_notifier_register_done();
        return ret;
  }
  module_init(intel_uncore_init);
  
  static void __exit intel_uncore_exit(void)
  {
 -      cpu_notifier_register_begin();
 -      __unregister_cpu_notifier(&uncore_cpu_nb);
 +      cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_UNCORE_ONLINE);
 +      cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_UNCORE_STARTING);
 +      cpuhp_remove_state_nocalls(CPUHP_PERF_X86_UNCORE_PREP);
        uncore_types_exit(uncore_msr_uncores);
        uncore_pci_exit();
 -      cpu_notifier_register_done();
  }
  module_exit(intel_uncore_exit);
@@@ -1,4 -1,3 +1,3 @@@
- #include <linux/module.h>
  #include <linux/slab.h>
  #include <linux/pci.h>
  #include <asm/apicdef.h>
  #define UNCORE_PMC_IDX_FIXED          UNCORE_PMC_IDX_MAX_GENERIC
  #define UNCORE_PMC_IDX_MAX            (UNCORE_PMC_IDX_FIXED + 1)
  
 +#define UNCORE_PCI_DEV_FULL_DATA(dev, func, type, idx)        \
 +              ((dev << 24) | (func << 16) | (type << 8) | idx)
  #define UNCORE_PCI_DEV_DATA(type, idx)        ((type << 8) | idx)
 +#define UNCORE_PCI_DEV_DEV(data)      ((data >> 24) & 0xff)
 +#define UNCORE_PCI_DEV_FUNC(data)     ((data >> 16) & 0xff)
  #define UNCORE_PCI_DEV_TYPE(data)     ((data >> 8) & 0xff)
  #define UNCORE_PCI_DEV_IDX(data)      (data & 0xff)
  #define UNCORE_EXTRA_PCI_DEV          0xff
@@@ -364,7 -359,6 +363,7 @@@ int bdw_uncore_pci_init(void)
  int skl_uncore_pci_init(void);
  void snb_uncore_cpu_init(void);
  void nhm_uncore_cpu_init(void);
 +void skl_uncore_cpu_init(void);
  int snb_pci2phy_map_init(int devid);
  
  /* perf_event_intel_uncore_snbep.c */
@@@ -36,6 -36,7 +36,7 @@@
  #include <linux/cpumask.h>
  
  #include <asm/mpspec.h>
+ #include <asm/percpu.h>
  
  /* Mappings between logical cpu number and node number */
  DECLARE_EARLY_PER_CPU(int, x86_cpu_to_node_map);
@@@ -119,14 -120,6 +120,14 @@@ extern const struct cpumask *cpu_coregr
  
  extern unsigned int __max_logical_packages;
  #define topology_max_packages()                       (__max_logical_packages)
 +
 +extern int __max_smt_threads;
 +
 +static inline int topology_max_smt_threads(void)
 +{
 +      return __max_smt_threads;
 +}
 +
  int topology_update_package_map(unsigned int apicid, unsigned int cpu);
  extern int topology_phys_to_logical_pkg(unsigned int pkg);
  #else
  static inline int
  topology_update_package_map(unsigned int apicid, unsigned int cpu) { return 0; }
  static inline int topology_phys_to_logical_pkg(unsigned int pkg) { return 0; }
 +static inline int topology_max_smt_threads(void) { return 1; }
  #endif
  
  static inline void arch_fix_phys_package_id(int num, u32 slot)
@@@ -28,7 -28,7 +28,7 @@@
  #include <linux/acpi_pmtmr.h>
  #include <linux/efi.h>
  #include <linux/cpumask.h>
- #include <linux/module.h>
+ #include <linux/export.h>
  #include <linux/dmi.h>
  #include <linux/irq.h>
  #include <linux/slab.h>
@@@ -161,15 -161,13 +161,15 @@@ static int __init acpi_parse_madt(struc
  /**
   * acpi_register_lapic - register a local apic and generates a logic cpu number
   * @id: local apic id to register
 + * @acpiid: ACPI id to register
   * @enabled: this cpu is enabled or not
   *
   * Returns the logic cpu number which maps to the local apic
   */
 -static int acpi_register_lapic(int id, u8 enabled)
 +static int acpi_register_lapic(int id, u32 acpiid, u8 enabled)
  {
        unsigned int ver = 0;
 +      int cpu;
  
        if (id >= MAX_LOCAL_APIC) {
                printk(KERN_INFO PREFIX "skipped apicid that is too big\n");
        if (boot_cpu_physical_apicid != -1U)
                ver = apic_version[boot_cpu_physical_apicid];
  
 -      return generic_processor_info(id, ver);
 +      cpu = generic_processor_info(id, ver);
 +      if (cpu >= 0)
 +              early_per_cpu(x86_cpu_to_acpiid, cpu) = acpiid;
 +
 +      return cpu;
  }
  
  static int __init
@@@ -218,7 -212,7 +218,7 @@@ acpi_parse_x2apic(struct acpi_subtable_
        if (!apic->apic_id_valid(apic_id) && enabled)
                printk(KERN_WARNING PREFIX "x2apic entry ignored\n");
        else
 -              acpi_register_lapic(apic_id, enabled);
 +              acpi_register_lapic(apic_id, processor->uid, enabled);
  #else
        printk(KERN_WARNING PREFIX "x2apic entry ignored\n");
  #endif
@@@ -246,7 -240,6 +246,7 @@@ acpi_parse_lapic(struct acpi_subtable_h
         * when we use CPU hotplug.
         */
        acpi_register_lapic(processor->id,      /* APIC ID */
 +                          processor->processor_id, /* ACPI ID */
                            processor->lapic_flags & ACPI_MADT_ENABLED);
  
        return 0;
@@@ -265,7 -258,6 +265,7 @@@ acpi_parse_sapic(struct acpi_subtable_h
        acpi_table_print_madt_entry(header);
  
        acpi_register_lapic((processor->id << 8) | processor->eid,/* APIC ID */
 +                          processor->processor_id, /* ACPI ID */
                            processor->lapic_flags & ACPI_MADT_ENABLED);
  
        return 0;
@@@ -722,7 -714,7 +722,7 @@@ int acpi_map_cpu(acpi_handle handle, ph
  {
        int cpu;
  
 -      cpu = acpi_register_lapic(physid, ACPI_MADT_ENABLED);
 +      cpu = acpi_register_lapic(physid, U32_MAX, ACPI_MADT_ENABLED);
        if (cpu < 0) {
                pr_info(PREFIX "Unable to map lapic to logical cpu number\n");
                return cpu;
diff --combined arch/x86/kernel/amd_nb.c
@@@ -9,7 -9,7 +9,7 @@@
  #include <linux/slab.h>
  #include <linux/init.h>
  #include <linux/errno.h>
- #include <linux/module.h>
+ #include <linux/export.h>
  #include <linux/spinlock.h>
  #include <asm/amd_nb.h>
  
@@@ -219,22 -219,24 +219,22 @@@ int amd_set_subcaches(int cpu, unsigne
        return 0;
  }
  
 -static int amd_cache_gart(void)
 +static void amd_cache_gart(void)
  {
        u16 i;
  
 -       if (!amd_nb_has_feature(AMD_NB_GART))
 -               return 0;
 -
 -       flush_words = kmalloc(amd_nb_num() * sizeof(u32), GFP_KERNEL);
 -       if (!flush_words) {
 -               amd_northbridges.flags &= ~AMD_NB_GART;
 -               return -ENOMEM;
 -       }
 +      if (!amd_nb_has_feature(AMD_NB_GART))
 +              return;
  
 -       for (i = 0; i != amd_nb_num(); i++)
 -               pci_read_config_dword(node_to_amd_nb(i)->misc, 0x9c,
 -                                     &flush_words[i]);
 +      flush_words = kmalloc(amd_nb_num() * sizeof(u32), GFP_KERNEL);
 +      if (!flush_words) {
 +              amd_northbridges.flags &= ~AMD_NB_GART;
 +              pr_notice("Cannot initialize GART flush words, GART support disabled\n");
 +              return;
 +      }
  
 -       return 0;
 +      for (i = 0; i != amd_nb_num(); i++)
 +              pci_read_config_dword(node_to_amd_nb(i)->misc, 0x9c, &flush_words[i]);
  }
  
  void amd_flush_garts(void)
@@@ -276,10 -278,17 +276,10 @@@ EXPORT_SYMBOL_GPL(amd_flush_garts)
  
  static __init int init_amd_nbs(void)
  {
 -      int err = 0;
 +      amd_cache_northbridges();
 +      amd_cache_gart();
  
 -      err = amd_cache_northbridges();
 -
 -      if (err < 0)
 -              pr_notice("Cannot enumerate AMD northbridges\n");
 -
 -      if (amd_cache_gart() < 0)
 -              pr_notice("Cannot initialize GART flush words, GART support disabled\n");
 -
 -      return err;
 +      return 0;
  }
  
  /* This has to go after the PCI subsystem */
@@@ -23,7 -23,7 +23,7 @@@
  #include <linux/bootmem.h>
  #include <linux/ftrace.h>
  #include <linux/ioport.h>
- #include <linux/module.h>
+ #include <linux/export.h>
  #include <linux/syscore_ops.h>
  #include <linux/delay.h>
  #include <linux/timex.h>
@@@ -92,10 -92,8 +92,10 @@@ static int apic_extnmi = APIC_EXTNMI_BS
   */
  DEFINE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_cpu_to_apicid, BAD_APICID);
  DEFINE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_bios_cpu_apicid, BAD_APICID);
 +DEFINE_EARLY_PER_CPU_READ_MOSTLY(u32, x86_cpu_to_acpiid, U32_MAX);
  EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid);
  EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
 +EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_acpiid);
  
  #ifdef CONFIG_X86_32
  
@@@ -2047,7 -2045,7 +2047,7 @@@ int generic_processor_info(int apicid, 
                int thiscpu = max + disabled_cpus - 1;
  
                pr_warning(
 -                      "ACPI: NR_CPUS/possible_cpus limit of %i almost"
 +                      "APIC: NR_CPUS/possible_cpus limit of %i almost"
                        " reached. Keeping one slot for boot cpu."
                        "  Processor %d/0x%x ignored.\n", max, thiscpu, apicid);
  
                int thiscpu = max + disabled_cpus;
  
                pr_warning(
 -                      "ACPI: NR_CPUS/possible_cpus limit of %i reached."
 +                      "APIC: NR_CPUS/possible_cpus limit of %i reached."
                        "  Processor %d/0x%x ignored.\n", max, thiscpu, apicid);
  
                disabled_cpus++;
        if (topology_update_package_map(apicid, cpu) < 0) {
                int thiscpu = max + disabled_cpus;
  
 -              pr_warning("ACPI: Package limit reached. Processor %d/0x%x ignored.\n",
 +              pr_warning("APIC: Package limit reached. Processor %d/0x%x ignored.\n",
                           thiscpu, apicid);
                disabled_cpus++;
                return -ENOSPC;
@@@ -15,7 -15,7 +15,7 @@@
  #include <linux/kernel.h>
  #include <linux/ctype.h>
  #include <linux/hardirq.h>
- #include <linux/module.h>
+ #include <linux/export.h>
  #include <asm/smp.h>
  #include <asm/apic.h>
  #include <asm/ipi.h>
@@@ -181,6 -181,7 +181,6 @@@ static struct apic apic_flat =  
  
        .get_apic_id                    = flat_get_apic_id,
        .set_apic_id                    = set_apic_id,
 -      .apic_id_mask                   = 0xFFu << 24,
  
        .cpu_mask_to_apicid_and         = flat_cpu_mask_to_apicid_and,
  
@@@ -277,6 -278,7 +277,6 @@@ static struct apic apic_physflat =  
  
        .get_apic_id                    = flat_get_apic_id,
        .set_apic_id                    = set_apic_id,
 -      .apic_id_mask                   = 0xFFu << 24,
  
        .cpu_mask_to_apicid_and         = default_cpu_mask_to_apicid_and,
  
@@@ -11,7 -11,6 +11,6 @@@
  
  #include <linux/threads.h>
  #include <linux/cpumask.h>
- #include <linux/module.h>
  #include <linux/string.h>
  #include <linux/kernel.h>
  #include <linux/ctype.h>
@@@ -141,6 -140,7 +140,6 @@@ struct apic apic_noop = 
  
        .get_apic_id                    = noop_get_apic_id,
        .set_apic_id                    = NULL,
 -      .apic_id_mask                   = 0x0F << 24,
  
        .cpu_mask_to_apicid_and         = flat_cpu_mask_to_apicid_and,
  
@@@ -39,7 -39,7 +39,7 @@@
  #include <linux/mc146818rtc.h>
  #include <linux/compiler.h>
  #include <linux/acpi.h>
- #include <linux/module.h>
+ #include <linux/export.h>
  #include <linux/syscore_ops.h>
  #include <linux/freezer.h>
  #include <linux/kthread.h>
@@@ -981,7 -981,7 +981,7 @@@ static int alloc_irq_from_domain(struc
  
        return __irq_domain_alloc_irqs(domain, irq, 1,
                                       ioapic_alloc_attr_node(info),
 -                                     info, legacy);
 +                                     info, legacy, NULL);
  }
  
  /*
@@@ -1014,8 -1014,7 +1014,8 @@@ static int alloc_isa_irq_from_domain(st
                                          info->ioapic_pin))
                        return -ENOMEM;
        } else {
 -              irq = __irq_domain_alloc_irqs(domain, irq, 1, node, info, true);
 +              irq = __irq_domain_alloc_irqs(domain, irq, 1, node, info, true,
 +                                            NULL);
                if (irq >= 0) {
                        irq_data = irq_domain_get_irq_data(domain, irq);
                        data = irq_data->chip_data;
@@@ -2568,25 -2567,29 +2568,25 @@@ static struct resource * __init ioapic_
        unsigned long n;
        struct resource *res;
        char *mem;
 -      int i, num = 0;
 +      int i;
  
 -      for_each_ioapic(i)
 -              num++;
 -      if (num == 0)
 +      if (nr_ioapics == 0)
                return NULL;
  
        n = IOAPIC_RESOURCE_NAME_SIZE + sizeof(struct resource);
 -      n *= num;
 +      n *= nr_ioapics;
  
        mem = alloc_bootmem(n);
        res = (void *)mem;
  
 -      mem += sizeof(struct resource) * num;
 +      mem += sizeof(struct resource) * nr_ioapics;
  
 -      num = 0;
        for_each_ioapic(i) {
 -              res[num].name = mem;
 -              res[num].flags = IORESOURCE_MEM | IORESOURCE_BUSY;
 +              res[i].name = mem;
 +              res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY;
                snprintf(mem, IOAPIC_RESOURCE_NAME_SIZE, "IOAPIC %u", i);
                mem += IOAPIC_RESOURCE_NAME_SIZE;
 -              ioapics[i].iomem_res = &res[num];
 -              num++;
 +              ioapics[i].iomem_res = &res[i];
        }
  
        ioapic_resources = res;
@@@ -8,7 -8,7 +8,7 @@@
   */
  #include <linux/threads.h>
  #include <linux/cpumask.h>
- #include <linux/module.h>
+ #include <linux/export.h>
  #include <linux/string.h>
  #include <linux/kernel.h>
  #include <linux/ctype.h>
@@@ -101,6 -101,7 +101,6 @@@ static struct apic apic_default = 
  
        .get_apic_id                    = default_get_apic_id,
        .set_apic_id                    = NULL,
 -      .apic_id_mask                   = 0x0F << 24,
  
        .cpu_mask_to_apicid_and         = flat_cpu_mask_to_apicid_and,
  
@@@ -12,7 -12,7 +12,7 @@@
  #include <linux/proc_fs.h>
  #include <linux/threads.h>
  #include <linux/kernel.h>
- #include <linux/module.h>
+ #include <linux/export.h>
  #include <linux/string.h>
  #include <linux/ctype.h>
  #include <linux/sched.h>
@@@ -582,6 -582,7 +582,6 @@@ static struct apic __refdata apic_x2api
  
        .get_apic_id                    = x2apic_get_apic_id,
        .set_apic_id                    = set_apic_id,
 -      .apic_id_mask                   = 0xFFFFFFFFu,
  
        .cpu_mask_to_apicid_and         = uv_cpu_mask_to_apicid_and,
  
@@@ -918,7 -919,7 +918,7 @@@ static void uv_heartbeat(unsigned long 
        uv_set_scir_bits(bits);
  
        /* enable next timer period */
 -      mod_timer_pinned(timer, jiffies + SCIR_CPU_HB_INTERVAL);
 +      mod_timer(timer, jiffies + SCIR_CPU_HB_INTERVAL);
  }
  
  static void uv_heartbeat_enable(int cpu)
                struct timer_list *timer = &uv_cpu_scir_info(cpu)->timer;
  
                uv_set_cpu_scir_bits(cpu, SCIR_CPU_HEARTBEAT|SCIR_CPU_ACTIVITY);
 -              setup_timer(timer, uv_heartbeat, cpu);
 +              setup_pinned_timer(timer, uv_heartbeat, cpu);
                timer->expires = jiffies + SCIR_CPU_HB_INTERVAL;
                add_timer_on(timer, cpu);
                uv_cpu_scir_info(cpu)->enabled = 1;
@@@ -2,7 -2,7 +2,7 @@@
  #include <linux/linkage.h>
  #include <linux/bitops.h>
  #include <linux/kernel.h>
- #include <linux/module.h>
+ #include <linux/export.h>
  #include <linux/percpu.h>
  #include <linux/string.h>
  #include <linux/ctype.h>
@@@ -1452,7 -1452,7 +1452,7 @@@ void cpu_init(void
        struct task_struct *me;
        struct tss_struct *t;
        unsigned long v;
 -      int cpu = stack_smp_processor_id();
 +      int cpu = raw_smp_processor_id();
        int i;
  
        wait_for_master_cpu(cpu);
@@@ -5,7 -5,7 +5,7 @@@
  #include <linux/smp.h>
  #include <linux/sched.h>
  #include <linux/thread_info.h>
- #include <linux/module.h>
+ #include <linux/init.h>
  #include <linux/uaccess.h>
  
  #include <asm/cpufeature.h>
@@@ -13,7 -13,6 +13,7 @@@
  #include <asm/msr.h>
  #include <asm/bugs.h>
  #include <asm/cpu.h>
 +#include <asm/intel-family.h>
  
  #ifdef CONFIG_X86_64
  #include <linux/topology.h>
@@@ -509,10 -508,6 +509,10 @@@ static void init_intel(struct cpuinfo_x
            (c->x86_model == 29 || c->x86_model == 46 || c->x86_model == 47))
                set_cpu_bug(c, X86_BUG_CLFLUSH_MONITOR);
  
 +      if (c->x86 == 6 && boot_cpu_has(X86_FEATURE_MWAIT) &&
 +              ((c->x86_model == INTEL_FAM6_ATOM_GOLDMONT)))
 +              set_cpu_bug(c, X86_BUG_MONITOR);
 +
  #ifdef CONFIG_X86_64
        if (c->x86 == 15)
                c->x86_cache_alignment = c->x86_clflush_size * 2;
@@@ -7,7 -7,7 +7,7 @@@
  #include <linux/uaccess.h>
  #include <linux/hardirq.h>
  #include <linux/kdebug.h>
- #include <linux/module.h>
+ #include <linux/export.h>
  #include <linux/ptrace.h>
  #include <linux/kexec.h>
  #include <linux/sysfs.h>
@@@ -96,9 -96,7 +96,9 @@@ show_stack_log_lvl(struct task_struct *
        int i;
  
        if (sp == NULL) {
 -              if (task)
 +              if (regs)
 +                      sp = (unsigned long *)regs->sp;
 +              else if (task)
                        sp = (unsigned long *)task->thread.sp;
                else
                        sp = (unsigned long *)&sp;
@@@ -7,7 -7,7 +7,7 @@@
  #include <linux/uaccess.h>
  #include <linux/hardirq.h>
  #include <linux/kdebug.h>
- #include <linux/module.h>
+ #include <linux/export.h>
  #include <linux/ptrace.h>
  #include <linux/kexec.h>
  #include <linux/sysfs.h>
@@@ -264,9 -264,7 +264,9 @@@ show_stack_log_lvl(struct task_struct *
         * back trace for this cpu:
         */
        if (sp == NULL) {
 -              if (task)
 +              if (regs)
 +                      sp = (unsigned long *)regs->sp;
 +              else if (task)
                        sp = (unsigned long *)task->thread.sp;
                else
                        sp = (unsigned long *)&sp;
  
        stack = sp;
        for (i = 0; i < kstack_depth_to_print; i++) {
 +              unsigned long word;
 +
                if (stack >= irq_stack && stack <= irq_stack_end) {
                        if (stack == irq_stack_end) {
                                stack = (unsigned long *) (irq_stack_end[-1]);
                if (kstack_end(stack))
                        break;
                }
 +
 +              if (probe_kernel_address(stack, word))
 +                      break;
 +
                if ((i % STACKSLOTS_PER_LINE) == 0) {
                        if (i != 0)
                                pr_cont("\n");
 -                      printk("%s %016lx", log_lvl, *stack++);
 +                      printk("%s %016lx", log_lvl, word);
                } else
 -                      pr_cont(" %016lx", *stack++);
 +                      pr_cont(" %016lx", word);
 +
 +              stack++;
                touch_nmi_watchdog();
        }
        preempt_enable();
@@@ -1,4 -1,5 +1,5 @@@
- #include <linux/module.h>
+ #include <linux/export.h>
+ #include <linux/spinlock_types.h>
  
  #include <asm/checksum.h>
  #include <asm/pgtable.h>
@@@ -42,5 -43,3 +43,5 @@@ EXPORT_SYMBOL(empty_zero_page)
  EXPORT_SYMBOL(___preempt_schedule);
  EXPORT_SYMBOL(___preempt_schedule_notrace);
  #endif
 +
 +EXPORT_SYMBOL(__sw_hweight32);
diff --combined arch/x86/kernel/kvm.c
@@@ -21,7 -21,7 +21,7 @@@
   */
  
  #include <linux/context_tracking.h>
- #include <linux/module.h>
+ #include <linux/init.h>
  #include <linux/kernel.h>
  #include <linux/kvm_para.h>
  #include <linux/cpu.h>
@@@ -301,6 -301,8 +301,6 @@@ static void kvm_register_steal_time(voi
        if (!has_steal_clock)
                return;
  
 -      memset(st, 0, sizeof(*st));
 -
        wrmsrl(MSR_KVM_STEAL_TIME, (slow_virt_to_phys(st) | KVM_MSR_ENABLED));
        pr_info("kvm-stealtime: cpu %d, msr %llx\n",
                cpu, (unsigned long long) slow_virt_to_phys(st));
@@@ -7,7 -7,8 +7,8 @@@
  #include <linux/prctl.h>
  #include <linux/slab.h>
  #include <linux/sched.h>
- #include <linux/module.h>
+ #include <linux/init.h>
+ #include <linux/export.h>
  #include <linux/pm.h>
  #include <linux/tick.h>
  #include <linux/random.h>
@@@ -404,7 -405,7 +405,7 @@@ static int prefer_mwait_c1_over_halt(co
        if (c->x86_vendor != X86_VENDOR_INTEL)
                return 0;
  
 -      if (!cpu_has(c, X86_FEATURE_MWAIT))
 +      if (!cpu_has(c, X86_FEATURE_MWAIT) || static_cpu_has_bug(X86_BUG_MONITOR))
                return 0;
  
        return 1;
diff --combined arch/x86/kernel/reboot.c
@@@ -1,6 -1,6 +1,6 @@@
  #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  
- #include <linux/module.h>
+ #include <linux/export.h>
  #include <linux/reboot.h>
  #include <linux/init.h>
  #include <linux/pm.h>
@@@ -54,19 -54,6 +54,19 @@@ bool port_cf9_safe = false
   * Dell Inc. so their systems "just work". :-)
   */
  
 +/*
 + * Some machines require the "reboot=a" commandline options
 + */
 +static int __init set_acpi_reboot(const struct dmi_system_id *d)
 +{
 +      if (reboot_type != BOOT_ACPI) {
 +              reboot_type = BOOT_ACPI;
 +              pr_info("%s series board detected. Selecting %s-method for reboots.\n",
 +                      d->ident, "ACPI");
 +      }
 +      return 0;
 +}
 +
  /*
   * Some machines require the "reboot=b" or "reboot=k"  commandline options,
   * this quirk makes that automatic.
@@@ -408,14 -395,6 +408,14 @@@ static struct dmi_system_id __initdata 
                        DMI_MATCH(DMI_PRODUCT_NAME, "Dell XPS710"),
                },
        },
 +      {       /* Handle problems with rebooting on Dell Optiplex 7450 AIO */
 +              .callback = set_acpi_reboot,
 +              .ident = "Dell OptiPlex 7450 AIO",
 +              .matches = {
 +                      DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
 +                      DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 7450 AIO"),
 +              },
 +      },
  
        /* Hewlett-Packard */
        {       /* Handle problems with rebooting on HP laptops */
diff --combined arch/x86/kernel/setup.c
@@@ -36,7 -36,7 +36,7 @@@
  #include <linux/console.h>
  #include <linux/root_dev.h>
  #include <linux/highmem.h>
- #include <linux/module.h>
+ #include <linux/export.h>
  #include <linux/efi.h>
  #include <linux/init.h>
  #include <linux/edd.h>
  #include <asm/prom.h>
  #include <asm/microcode.h>
  #include <asm/mmu_context.h>
 +#include <asm/kaslr.h>
  
  /*
   * max_low_pfn_mapped: highest direct mapped pfn under 4GB
@@@ -400,6 -399,10 +400,6 @@@ static void __init reserve_initrd(void
        memblock_free(ramdisk_image, ramdisk_end - ramdisk_image);
  }
  
 -static void __init early_initrd_acpi_init(void)
 -{
 -      early_acpi_table_init((void *)initrd_start, initrd_end - initrd_start);
 -}
  #else
  static void __init early_reserve_initrd(void)
  {
  static void __init reserve_initrd(void)
  {
  }
 -static void __init early_initrd_acpi_init(void)
 -{
 -}
  #endif /* CONFIG_BLK_DEV_INITRD */
  
  static void __init parse_setup_data(void)
@@@ -936,8 -942,6 +936,8 @@@ void __init setup_arch(char **cmdline_p
  
        x86_init.oem.arch_setup();
  
 +      kernel_randomize_memory();
 +
        iomem_resource.end = (1ULL << boot_cpu_data.x86_phys_bits) - 1;
        setup_memory_map();
        parse_setup_data();
  
        reserve_initrd();
  
 -      early_initrd_acpi_init();
 +      acpi_table_upgrade();
  
        vsmp_init();
  
@@@ -43,7 -43,7 +43,7 @@@
  
  #include <linux/init.h>
  #include <linux/smp.h>
- #include <linux/module.h>
+ #include <linux/export.h>
  #include <linux/sched.h>
  #include <linux/percpu.h>
  #include <linux/bootmem.h>
@@@ -105,9 -105,6 +105,9 @@@ static unsigned int max_physical_pkg_i
  unsigned int __max_logical_packages __read_mostly;
  EXPORT_SYMBOL(__max_logical_packages);
  
 +/* Maximum number of SMT threads on any online core */
 +int __max_smt_threads __read_mostly;
 +
  static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip)
  {
        unsigned long flags;
@@@ -496,7 -493,7 +496,7 @@@ void set_cpu_sibling_map(int cpu
        bool has_mp = has_smt || boot_cpu_data.x86_max_cores > 1;
        struct cpuinfo_x86 *c = &cpu_data(cpu);
        struct cpuinfo_x86 *o;
 -      int i;
 +      int i, threads;
  
        cpumask_set_cpu(cpu, cpu_sibling_setup_mask);
  
                if (match_die(c, o) && !topology_same_node(c, o))
                        primarily_use_numa_for_topology();
        }
 +
 +      threads = cpumask_weight(topology_sibling_cpumask(cpu));
 +      if (threads > __max_smt_threads)
 +              __max_smt_threads = threads;
  }
  
  /* maps the cpu to the sched domain representing multi-core */
@@@ -1292,6 -1285,7 +1292,6 @@@ void __init native_smp_prepare_cpus(uns
        cpumask_copy(cpu_callin_mask, cpumask_of(0));
        mb();
  
 -      current_thread_info()->cpu = 0;  /* needed? */
        for_each_possible_cpu(i) {
                zalloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL);
                zalloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL);
@@@ -1447,21 -1441,6 +1447,21 @@@ __init void prefill_possible_map(void
  
  #ifdef CONFIG_HOTPLUG_CPU
  
 +/* Recompute SMT state for all CPUs on offline */
 +static void recompute_smt_state(void)
 +{
 +      int max_threads, cpu;
 +
 +      max_threads = 0;
 +      for_each_online_cpu (cpu) {
 +              int threads = cpumask_weight(topology_sibling_cpumask(cpu));
 +
 +              if (threads > max_threads)
 +                      max_threads = threads;
 +      }
 +      __max_smt_threads = max_threads;
 +}
 +
  static void remove_siblinginfo(int cpu)
  {
        int sibling;
        c->phys_proc_id = 0;
        c->cpu_core_id = 0;
        cpumask_clear_cpu(cpu, cpu_sibling_setup_mask);
 +      recompute_smt_state();
  }
  
  static void remove_cpu_from_maps(int cpu)
@@@ -1644,7 -1622,7 +1644,7 @@@ static inline void mwait_play_dead(void
        }
  }
  
 -static inline void hlt_play_dead(void)
 +void hlt_play_dead(void)
  {
        if (__this_cpu_read(cpu_info.x86) >= 4)
                wbinvd();
diff --combined arch/x86/kernel/tsc.c
@@@ -3,7 -3,7 +3,7 @@@
  #include <linux/kernel.h>
  #include <linux/sched.h>
  #include <linux/init.h>
- #include <linux/module.h>
+ #include <linux/export.h>
  #include <linux/timer.h>
  #include <linux/acpi_pmtmr.h>
  #include <linux/cpufreq.h>
@@@ -239,7 -239,7 +239,7 @@@ static inline unsigned long long cycles
        return ns;
  }
  
 -static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu)
 +static void set_cyc2ns_scale(unsigned long khz, int cpu)
  {
        unsigned long long tsc_now, ns_now;
        struct cyc2ns_data *data;
        local_irq_save(flags);
        sched_clock_idle_sleep_event();
  
 -      if (!cpu_khz)
 +      if (!khz)
                goto done;
  
        data = cyc2ns_write_begin(cpu);
         * time function is continuous; see the comment near struct
         * cyc2ns_data.
         */
 -      clocks_calc_mult_shift(&data->cyc2ns_mul, &data->cyc2ns_shift, cpu_khz,
 +      clocks_calc_mult_shift(&data->cyc2ns_mul, &data->cyc2ns_shift, khz,
                               NSEC_PER_MSEC, 0);
  
        /*
@@@ -335,6 -335,12 +335,6 @@@ int check_tsc_unstable(void
  }
  EXPORT_SYMBOL_GPL(check_tsc_unstable);
  
 -int check_tsc_disabled(void)
 -{
 -      return tsc_disabled;
 -}
 -EXPORT_SYMBOL_GPL(check_tsc_disabled);
 -
  #ifdef CONFIG_X86_TSC
  int __init notsc_setup(char *str)
  {
@@@ -659,77 -665,19 +659,77 @@@ success
  }
  
  /**
 - * native_calibrate_tsc - calibrate the tsc on boot
 + * native_calibrate_tsc
 + * Determine TSC frequency via CPUID, else return 0.
   */
  unsigned long native_calibrate_tsc(void)
 +{
 +      unsigned int eax_denominator, ebx_numerator, ecx_hz, edx;
 +      unsigned int crystal_khz;
 +
 +      if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
 +              return 0;
 +
 +      if (boot_cpu_data.cpuid_level < 0x15)
 +              return 0;
 +
 +      eax_denominator = ebx_numerator = ecx_hz = edx = 0;
 +
 +      /* CPUID 15H TSC/Crystal ratio, plus optionally Crystal Hz */
 +      cpuid(0x15, &eax_denominator, &ebx_numerator, &ecx_hz, &edx);
 +
 +      if (ebx_numerator == 0 || eax_denominator == 0)
 +              return 0;
 +
 +      crystal_khz = ecx_hz / 1000;
 +
 +      if (crystal_khz == 0) {
 +              switch (boot_cpu_data.x86_model) {
 +              case 0x4E:      /* SKL */
 +              case 0x5E:      /* SKL */
 +                      crystal_khz = 24000;    /* 24.0 MHz */
 +                      break;
 +              case 0x5C:      /* BXT */
 +                      crystal_khz = 19200;    /* 19.2 MHz */
 +                      break;
 +              }
 +      }
 +
 +      return crystal_khz * ebx_numerator / eax_denominator;
 +}
 +
 +static unsigned long cpu_khz_from_cpuid(void)
 +{
 +      unsigned int eax_base_mhz, ebx_max_mhz, ecx_bus_mhz, edx;
 +
 +      if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
 +              return 0;
 +
 +      if (boot_cpu_data.cpuid_level < 0x16)
 +              return 0;
 +
 +      eax_base_mhz = ebx_max_mhz = ecx_bus_mhz = edx = 0;
 +
 +      cpuid(0x16, &eax_base_mhz, &ebx_max_mhz, &ecx_bus_mhz, &edx);
 +
 +      return eax_base_mhz * 1000;
 +}
 +
 +/**
 + * native_calibrate_cpu - calibrate the cpu on boot
 + */
 +unsigned long native_calibrate_cpu(void)
  {
        u64 tsc1, tsc2, delta, ref1, ref2;
        unsigned long tsc_pit_min = ULONG_MAX, tsc_ref_min = ULONG_MAX;
        unsigned long flags, latch, ms, fast_calibrate;
        int hpet = is_hpet_enabled(), i, loopmin;
  
 -      /* Calibrate TSC using MSR for Intel Atom SoCs */
 -      local_irq_save(flags);
 -      fast_calibrate = try_msr_calibrate_tsc();
 -      local_irq_restore(flags);
 +      fast_calibrate = cpu_khz_from_cpuid();
 +      if (fast_calibrate)
 +              return fast_calibrate;
 +
 +      fast_calibrate = cpu_khz_from_msr();
        if (fast_calibrate)
                return fast_calibrate;
  
@@@ -889,12 -837,8 +889,12 @@@ int recalibrate_cpu_khz(void
        if (!boot_cpu_has(X86_FEATURE_TSC))
                return -ENODEV;
  
 +      cpu_khz = x86_platform.calibrate_cpu();
        tsc_khz = x86_platform.calibrate_tsc();
 -      cpu_khz = tsc_khz;
 +      if (tsc_khz == 0)
 +              tsc_khz = cpu_khz;
 +      else if (abs(cpu_khz - tsc_khz) * 10 > tsc_khz)
 +              cpu_khz = tsc_khz;
        cpu_data(0).loops_per_jiffy = cpufreq_scale(cpu_data(0).loops_per_jiffy,
                                                    cpu_khz_old, cpu_khz);
  
@@@ -1300,18 -1244,8 +1300,18 @@@ void __init tsc_init(void
                return;
        }
  
 +      cpu_khz = x86_platform.calibrate_cpu();
        tsc_khz = x86_platform.calibrate_tsc();
 -      cpu_khz = tsc_khz;
 +
 +      /*
 +       * Trust non-zero tsc_khz as authorative,
 +       * and use it to sanity check cpu_khz,
 +       * which will be off if system timer is off.
 +       */
 +      if (tsc_khz == 0)
 +              tsc_khz = cpu_khz;
 +      else if (abs(cpu_khz - tsc_khz) * 10 > tsc_khz)
 +              cpu_khz = tsc_khz;
  
        if (!tsc_khz) {
                mark_tsc_unstable("could not calculate TSC khz");
         */
        for_each_possible_cpu(cpu) {
                cyc2ns_init(cpu);
 -              set_cyc2ns_scale(cpu_khz, cpu);
 +              set_cyc2ns_scale(tsc_khz, cpu);
        }
  
        if (tsc_disabled > 0)
@@@ -1,7 -1,8 +1,8 @@@
  /* Exports for assembly files.
     All C exports should go in the respective C files. */
  
- #include <linux/module.h>
+ #include <linux/export.h>
+ #include <linux/spinlock_types.h>
  #include <linux/smp.h>
  
  #include <net/checksum.h>
@@@ -44,9 -45,6 +45,9 @@@ EXPORT_SYMBOL(clear_page)
  
  EXPORT_SYMBOL(csum_partial);
  
 +EXPORT_SYMBOL(__sw_hweight32);
 +EXPORT_SYMBOL(__sw_hweight64);
 +
  /*
   * Export string functions. We normally rely on gcc builtin for most of these,
   * but gcc sometimes decides not to inline them.
@@@ -5,7 -5,7 +5,7 @@@
   */
  #include <linux/init.h>
  #include <linux/ioport.h>
- #include <linux/module.h>
+ #include <linux/export.h>
  #include <linux/pci.h>
  
  #include <asm/bios_ebda.h>
@@@ -92,7 -92,6 +92,7 @@@ static void default_nmi_init(void) { }
  static int default_i8042_detect(void) { return 1; };
  
  struct x86_platform_ops x86_platform = {
 +      .calibrate_cpu                  = native_calibrate_cpu,
        .calibrate_tsc                  = native_calibrate_tsc,
        .get_wallclock                  = mach_get_cmos_time,
        .set_wallclock                  = mach_set_rtc_mmss,
diff --combined arch/x86/kvm/cpuid.c
@@@ -13,7 -13,7 +13,7 @@@
   */
  
  #include <linux/kvm_host.h>
- #include <linux/module.h>
+ #include <linux/export.h>
  #include <linux/vmalloc.h>
  #include <linux/uaccess.h>
  #include <asm/fpu/internal.h> /* For use_eager_fpu.  Ugh! */
@@@ -366,7 -366,7 +366,7 @@@ static inline int __do_cpuid_ent(struc
                F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) |
                F(BMI2) | F(ERMS) | f_invpcid | F(RTM) | f_mpx | F(RDSEED) |
                F(ADX) | F(SMAP) | F(AVX512F) | F(AVX512PF) | F(AVX512ER) |
 -              F(AVX512CD) | F(CLFLUSHOPT) | F(CLWB) | F(PCOMMIT);
 +              F(AVX512CD) | F(CLFLUSHOPT) | F(CLWB);
  
        /* cpuid 0xD.1.eax */
        const u32 kvm_cpuid_D_1_eax_x86_features =
diff --combined arch/x86/kvm/x86.c
@@@ -36,7 -36,8 +36,8 @@@
  #include <linux/kvm.h>
  #include <linux/fs.h>
  #include <linux/vmalloc.h>
- #include <linux/module.h>
+ #include <linux/export.h>
+ #include <linux/moduleparam.h>
  #include <linux/mman.h>
  #include <linux/highmem.h>
  #include <linux/iommu.h>
@@@ -55,6 -56,9 +56,6 @@@
  #include <linux/irqbypass.h>
  #include <trace/events/kvm.h>
  
 -#define CREATE_TRACE_POINTS
 -#include "trace.h"
 -
  #include <asm/debugreg.h>
  #include <asm/msr.h>
  #include <asm/desc.h>
@@@ -65,9 -69,6 +66,9 @@@
  #include <asm/div64.h>
  #include <asm/irq_remapping.h>
  
 +#define CREATE_TRACE_POINTS
 +#include "trace.h"
 +
  #define MAX_IO_MSRS 256
  #define KVM_MAX_MCE_BANKS 32
  #define KVM_MCE_CAP_SUPPORTED (MCG_CTL_P | MCG_SER_P)
@@@ -5552,10 -5553,9 +5553,10 @@@ int kvm_fast_pio_out(struct kvm_vcpu *v
  }
  EXPORT_SYMBOL_GPL(kvm_fast_pio_out);
  
 -static void tsc_bad(void *info)
 +static int kvmclock_cpu_down_prep(unsigned int cpu)
  {
        __this_cpu_write(cpu_tsc_khz, 0);
 +      return 0;
  }
  
  static void tsc_khz_changed(void *data)
@@@ -5660,18 -5660,35 +5661,18 @@@ static struct notifier_block kvmclock_c
        .notifier_call  = kvmclock_cpufreq_notifier
  };
  
 -static int kvmclock_cpu_notifier(struct notifier_block *nfb,
 -                                      unsigned long action, void *hcpu)
 +static int kvmclock_cpu_online(unsigned int cpu)
  {
 -      unsigned int cpu = (unsigned long)hcpu;
 -
 -      switch (action) {
 -              case CPU_ONLINE:
 -              case CPU_DOWN_FAILED:
 -                      smp_call_function_single(cpu, tsc_khz_changed, NULL, 1);
 -                      break;
 -              case CPU_DOWN_PREPARE:
 -                      smp_call_function_single(cpu, tsc_bad, NULL, 1);
 -                      break;
 -      }
 -      return NOTIFY_OK;
 +      tsc_khz_changed(NULL);
 +      return 0;
  }
  
 -static struct notifier_block kvmclock_cpu_notifier_block = {
 -      .notifier_call  = kvmclock_cpu_notifier,
 -      .priority = -INT_MAX
 -};
 -
  static void kvm_timer_init(void)
  {
        int cpu;
  
        max_tsc_khz = tsc_khz;
  
 -      cpu_notifier_register_begin();
        if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
  #ifdef CONFIG_CPU_FREQ
                struct cpufreq_policy policy;
                                          CPUFREQ_TRANSITION_NOTIFIER);
        }
        pr_debug("kvm: max_tsc_khz = %ld\n", max_tsc_khz);
 -      for_each_online_cpu(cpu)
 -              smp_call_function_single(cpu, tsc_khz_changed, NULL, 1);
 -
 -      __register_hotcpu_notifier(&kvmclock_cpu_notifier_block);
 -      cpu_notifier_register_done();
  
 +      cpuhp_setup_state(CPUHP_AP_X86_KVM_CLK_ONLINE, "AP_X86_KVM_CLK_ONLINE",
 +                        kvmclock_cpu_online, kvmclock_cpu_down_prep);
  }
  
  static DEFINE_PER_CPU(struct kvm_vcpu *, current_vcpu);
@@@ -5877,7 -5897,7 +5878,7 @@@ void kvm_arch_exit(void
        if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
                cpufreq_unregister_notifier(&kvmclock_cpufreq_notifier_block,
                                            CPUFREQ_TRANSITION_NOTIFIER);
 -      unregister_hotcpu_notifier(&kvmclock_cpu_notifier_block);
 +      cpuhp_remove_state_nocalls(CPUHP_AP_X86_KVM_CLK_ONLINE);
  #ifdef CONFIG_X86_64
        pvclock_gtod_unregister_notifier(&pvclock_gtod_notifier);
  #endif
@@@ -5,8 -5,7 +5,8 @@@
   * Wrappers of assembly checksum functions for x86-64.
   */
  #include <asm/checksum.h>
- #include <linux/module.h>
+ #include <linux/export.h>
 +#include <linux/uaccess.h>
  #include <asm/smap.h>
  
  /**
@@@ -5,8 -5,8 +5,8 @@@
   * Copyright 1997 Linus Torvalds
   * Copyright 2002 Andi Kleen <ak@suse.de>
   */
- #include <linux/module.h>
+ #include <linux/export.h>
 -#include <asm/uaccess.h>
 +#include <linux/uaccess.h>
  
  /*
   * Zero Userspace
@@@ -14,7 -14,7 +14,7 @@@
  
  #include <linux/debugfs.h>
  #include <linux/mm.h>
- #include <linux/module.h>
+ #include <linux/init.h>
  #include <linux/seq_file.h>
  
  #include <asm/pgtable.h>
@@@ -72,9 -72,9 +72,9 @@@ static struct addr_marker address_marke
        { 0, "User Space" },
  #ifdef CONFIG_X86_64
        { 0x8000000000000000UL, "Kernel Space" },
 -      { PAGE_OFFSET,          "Low Kernel Mapping" },
 -      { VMALLOC_START,        "vmalloc() Area" },
 -      { VMEMMAP_START,        "Vmemmap" },
 +      { 0/* PAGE_OFFSET */,   "Low Kernel Mapping" },
 +      { 0/* VMALLOC_START */, "vmalloc() Area" },
 +      { 0/* VMEMMAP_START */, "Vmemmap" },
  # ifdef CONFIG_X86_ESPFIX64
        { ESPFIX_BASE_ADDR,     "ESPfix Area", 16 },
  # endif
@@@ -434,16 -434,8 +434,16 @@@ void ptdump_walk_pgd_level_checkwx(void
  
  static int __init pt_dump_init(void)
  {
 +      /*
 +       * Various markers are not compile-time constants, so assign them
 +       * here.
 +       */
 +#ifdef CONFIG_X86_64
 +      address_markers[LOW_KERNEL_NR].start_address = PAGE_OFFSET;
 +      address_markers[VMALLOC_START_NR].start_address = VMALLOC_START;
 +      address_markers[VMEMMAP_START_NR].start_address = VMEMMAP_START;
 +#endif
  #ifdef CONFIG_X86_32
 -      /* Not a compile-time constant on x86-32 */
        address_markers[VMALLOC_START_NR].start_address = VMALLOC_START;
        address_markers[VMALLOC_END_NR].start_address = VMALLOC_END;
  # ifdef CONFIG_HIGHMEM
  
        return 0;
  }
  __initcall(pt_dump_init);
- MODULE_LICENSE("GPL");
- MODULE_AUTHOR("Arjan van de Ven <arjan@linux.intel.com>");
- MODULE_DESCRIPTION("Kernel debugging helper that dumps pagetables");
diff --combined arch/x86/mm/init_64.c
@@@ -27,7 -27,6 +27,6 @@@
  #include <linux/pfn.h>
  #include <linux/poison.h>
  #include <linux/dma-mapping.h>
- #include <linux/module.h>
  #include <linux/memory.h>
  #include <linux/memory_hotplug.h>
  #include <linux/memremap.h>
@@@ -328,30 -327,22 +327,30 @@@ void __init cleanup_highmap(void
        }
  }
  
 +/*
 + * Create PTE level page table mapping for physical addresses.
 + * It returns the last physical address mapped.
 + */
  static unsigned long __meminit
 -phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end,
 +phys_pte_init(pte_t *pte_page, unsigned long paddr, unsigned long paddr_end,
              pgprot_t prot)
  {
 -      unsigned long pages = 0, next;
 -      unsigned long last_map_addr = end;
 +      unsigned long pages = 0, paddr_next;
 +      unsigned long paddr_last = paddr_end;
 +      pte_t *pte;
        int i;
  
 -      pte_t *pte = pte_page + pte_index(addr);
 +      pte = pte_page + pte_index(paddr);
 +      i = pte_index(paddr);
  
 -      for (i = pte_index(addr); i < PTRS_PER_PTE; i++, addr = next, pte++) {
 -              next = (addr & PAGE_MASK) + PAGE_SIZE;
 -              if (addr >= end) {
 +      for (; i < PTRS_PER_PTE; i++, paddr = paddr_next, pte++) {
 +              paddr_next = (paddr & PAGE_MASK) + PAGE_SIZE;
 +              if (paddr >= paddr_end) {
                        if (!after_bootmem &&
 -                          !e820_any_mapped(addr & PAGE_MASK, next, E820_RAM) &&
 -                          !e820_any_mapped(addr & PAGE_MASK, next, E820_RESERVED_KERN))
 +                          !e820_any_mapped(paddr & PAGE_MASK, paddr_next,
 +                                           E820_RAM) &&
 +                          !e820_any_mapped(paddr & PAGE_MASK, paddr_next,
 +                                           E820_RESERVED_KERN))
                                set_pte(pte, __pte(0));
                        continue;
                }
                 * pagetable pages as RO. So assume someone who pre-setup
                 * these mappings are more intelligent.
                 */
 -              if (pte_val(*pte)) {
 +              if (!pte_none(*pte)) {
                        if (!after_bootmem)
                                pages++;
                        continue;
                }
  
                if (0)
 -                      printk("   pte=%p addr=%lx pte=%016lx\n",
 -                             pte, addr, pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL).pte);
 +                      pr_info("   pte=%p addr=%lx pte=%016lx\n", pte, paddr,
 +                              pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL).pte);
                pages++;
 -              set_pte(pte, pfn_pte(addr >> PAGE_SHIFT, prot));
 -              last_map_addr = (addr & PAGE_MASK) + PAGE_SIZE;
 +              set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, prot));
 +              paddr_last = (paddr & PAGE_MASK) + PAGE_SIZE;
        }
  
        update_page_count(PG_LEVEL_4K, pages);
  
 -      return last_map_addr;
 +      return paddr_last;
  }
  
 +/*
 + * Create PMD level page table mapping for physical addresses. The virtual
 + * and physical address have to be aligned at this level.
 + * It returns the last physical address mapped.
 + */
  static unsigned long __meminit
 -phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
 +phys_pmd_init(pmd_t *pmd_page, unsigned long paddr, unsigned long paddr_end,
              unsigned long page_size_mask, pgprot_t prot)
  {
 -      unsigned long pages = 0, next;
 -      unsigned long last_map_addr = end;
 +      unsigned long pages = 0, paddr_next;
 +      unsigned long paddr_last = paddr_end;
  
 -      int i = pmd_index(address);
 +      int i = pmd_index(paddr);
  
 -      for (; i < PTRS_PER_PMD; i++, address = next) {
 -              pmd_t *pmd = pmd_page + pmd_index(address);
 +      for (; i < PTRS_PER_PMD; i++, paddr = paddr_next) {
 +              pmd_t *pmd = pmd_page + pmd_index(paddr);
                pte_t *pte;
                pgprot_t new_prot = prot;
  
 -              next = (address & PMD_MASK) + PMD_SIZE;
 -              if (address >= end) {
 +              paddr_next = (paddr & PMD_MASK) + PMD_SIZE;
 +              if (paddr >= paddr_end) {
                        if (!after_bootmem &&
 -                          !e820_any_mapped(address & PMD_MASK, next, E820_RAM) &&
 -                          !e820_any_mapped(address & PMD_MASK, next, E820_RESERVED_KERN))
 +                          !e820_any_mapped(paddr & PMD_MASK, paddr_next,
 +                                           E820_RAM) &&
 +                          !e820_any_mapped(paddr & PMD_MASK, paddr_next,
 +                                           E820_RESERVED_KERN))
                                set_pmd(pmd, __pmd(0));
                        continue;
                }
  
 -              if (pmd_val(*pmd)) {
 +              if (!pmd_none(*pmd)) {
                        if (!pmd_large(*pmd)) {
                                spin_lock(&init_mm.page_table_lock);
                                pte = (pte_t *)pmd_page_vaddr(*pmd);
 -                              last_map_addr = phys_pte_init(pte, address,
 -                                                              end, prot);
 +                              paddr_last = phys_pte_init(pte, paddr,
 +                                                         paddr_end, prot);
                                spin_unlock(&init_mm.page_table_lock);
                                continue;
                        }
                        if (page_size_mask & (1 << PG_LEVEL_2M)) {
                                if (!after_bootmem)
                                        pages++;
 -                              last_map_addr = next;
 +                              paddr_last = paddr_next;
                                continue;
                        }
                        new_prot = pte_pgprot(pte_clrhuge(*(pte_t *)pmd));
                        pages++;
                        spin_lock(&init_mm.page_table_lock);
                        set_pte((pte_t *)pmd,
 -                              pfn_pte((address & PMD_MASK) >> PAGE_SHIFT,
 +                              pfn_pte((paddr & PMD_MASK) >> PAGE_SHIFT,
                                        __pgprot(pgprot_val(prot) | _PAGE_PSE)));
                        spin_unlock(&init_mm.page_table_lock);
 -                      last_map_addr = next;
 +                      paddr_last = paddr_next;
                        continue;
                }
  
                pte = alloc_low_page();
 -              last_map_addr = phys_pte_init(pte, address, end, new_prot);
 +              paddr_last = phys_pte_init(pte, paddr, paddr_end, new_prot);
  
                spin_lock(&init_mm.page_table_lock);
                pmd_populate_kernel(&init_mm, pmd, pte);
                spin_unlock(&init_mm.page_table_lock);
        }
        update_page_count(PG_LEVEL_2M, pages);
 -      return last_map_addr;
 +      return paddr_last;
  }
  
 +/*
 + * Create PUD level page table mapping for physical addresses. The virtual
 + * and physical address do not have to be aligned at this level. KASLR can
 + * randomize virtual addresses up to this level.
 + * It returns the last physical address mapped.
 + */
  static unsigned long __meminit
 -phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
 -                       unsigned long page_size_mask)
 +phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end,
 +            unsigned long page_size_mask)
  {
 -      unsigned long pages = 0, next;
 -      unsigned long last_map_addr = end;
 -      int i = pud_index(addr);
 +      unsigned long pages = 0, paddr_next;
 +      unsigned long paddr_last = paddr_end;
 +      unsigned long vaddr = (unsigned long)__va(paddr);
 +      int i = pud_index(vaddr);
  
 -      for (; i < PTRS_PER_PUD; i++, addr = next) {
 -              pud_t *pud = pud_page + pud_index(addr);
 +      for (; i < PTRS_PER_PUD; i++, paddr = paddr_next) {
 +              pud_t *pud;
                pmd_t *pmd;
                pgprot_t prot = PAGE_KERNEL;
  
 -              next = (addr & PUD_MASK) + PUD_SIZE;
 -              if (addr >= end) {
 +              vaddr = (unsigned long)__va(paddr);
 +              pud = pud_page + pud_index(vaddr);
 +              paddr_next = (paddr & PUD_MASK) + PUD_SIZE;
 +
 +              if (paddr >= paddr_end) {
                        if (!after_bootmem &&
 -                          !e820_any_mapped(addr & PUD_MASK, next, E820_RAM) &&
 -                          !e820_any_mapped(addr & PUD_MASK, next, E820_RESERVED_KERN))
 +                          !e820_any_mapped(paddr & PUD_MASK, paddr_next,
 +                                           E820_RAM) &&
 +                          !e820_any_mapped(paddr & PUD_MASK, paddr_next,
 +                                           E820_RESERVED_KERN))
                                set_pud(pud, __pud(0));
                        continue;
                }
  
 -              if (pud_val(*pud)) {
 +              if (!pud_none(*pud)) {
                        if (!pud_large(*pud)) {
                                pmd = pmd_offset(pud, 0);
 -                              last_map_addr = phys_pmd_init(pmd, addr, end,
 -                                                       page_size_mask, prot);
 +                              paddr_last = phys_pmd_init(pmd, paddr,
 +                                                         paddr_end,
 +                                                         page_size_mask,
 +                                                         prot);
                                __flush_tlb_all();
                                continue;
                        }
                        if (page_size_mask & (1 << PG_LEVEL_1G)) {
                                if (!after_bootmem)
                                        pages++;
 -                              last_map_addr = next;
 +                              paddr_last = paddr_next;
                                continue;
                        }
                        prot = pte_pgprot(pte_clrhuge(*(pte_t *)pud));
                        pages++;
                        spin_lock(&init_mm.page_table_lock);
                        set_pte((pte_t *)pud,
 -                              pfn_pte((addr & PUD_MASK) >> PAGE_SHIFT,
 +                              pfn_pte((paddr & PUD_MASK) >> PAGE_SHIFT,
                                        PAGE_KERNEL_LARGE));
                        spin_unlock(&init_mm.page_table_lock);
 -                      last_map_addr = next;
 +                      paddr_last = paddr_next;
                        continue;
                }
  
                pmd = alloc_low_page();
 -              last_map_addr = phys_pmd_init(pmd, addr, end, page_size_mask,
 -                                            prot);
 +              paddr_last = phys_pmd_init(pmd, paddr, paddr_end,
 +                                         page_size_mask, prot);
  
                spin_lock(&init_mm.page_table_lock);
                pud_populate(&init_mm, pud, pmd);
  
        update_page_count(PG_LEVEL_1G, pages);
  
 -      return last_map_addr;
 +      return paddr_last;
  }
  
 +/*
 + * Create page table mapping for the physical memory for specific physical
 + * addresses. The virtual and physical addresses have to be aligned on PMD level
 + * down. It returns the last physical address mapped.
 + */
  unsigned long __meminit
 -kernel_physical_mapping_init(unsigned long start,
 -                           unsigned long end,
 +kernel_physical_mapping_init(unsigned long paddr_start,
 +                           unsigned long paddr_end,
                             unsigned long page_size_mask)
  {
        bool pgd_changed = false;
 -      unsigned long next, last_map_addr = end;
 -      unsigned long addr;
 +      unsigned long vaddr, vaddr_start, vaddr_end, vaddr_next, paddr_last;
  
 -      start = (unsigned long)__va(start);
 -      end = (unsigned long)__va(end);
 -      addr = start;
 +      paddr_last = paddr_end;
 +      vaddr = (unsigned long)__va(paddr_start);
 +      vaddr_end = (unsigned long)__va(paddr_end);
 +      vaddr_start = vaddr;
  
 -      for (; start < end; start = next) {
 -              pgd_t *pgd = pgd_offset_k(start);
 +      for (; vaddr < vaddr_end; vaddr = vaddr_next) {
 +              pgd_t *pgd = pgd_offset_k(vaddr);
                pud_t *pud;
  
 -              next = (start & PGDIR_MASK) + PGDIR_SIZE;
 +              vaddr_next = (vaddr & PGDIR_MASK) + PGDIR_SIZE;
  
                if (pgd_val(*pgd)) {
                        pud = (pud_t *)pgd_page_vaddr(*pgd);
 -                      last_map_addr = phys_pud_init(pud, __pa(start),
 -                                               __pa(end), page_size_mask);
 +                      paddr_last = phys_pud_init(pud, __pa(vaddr),
 +                                                 __pa(vaddr_end),
 +                                                 page_size_mask);
                        continue;
                }
  
                pud = alloc_low_page();
 -              last_map_addr = phys_pud_init(pud, __pa(start), __pa(end),
 -                                               page_size_mask);
 +              paddr_last = phys_pud_init(pud, __pa(vaddr), __pa(vaddr_end),
 +                                         page_size_mask);
  
                spin_lock(&init_mm.page_table_lock);
                pgd_populate(&init_mm, pgd, pud);
        }
  
        if (pgd_changed)
 -              sync_global_pgds(addr, end - 1, 0);
 +              sync_global_pgds(vaddr_start, vaddr_end - 1, 0);
  
        __flush_tlb_all();
  
 -      return last_map_addr;
 +      return paddr_last;
  }
  
  #ifndef CONFIG_NUMA
@@@ -708,7 -672,7 +707,7 @@@ static void __meminit free_pte_table(pt
  
        for (i = 0; i < PTRS_PER_PTE; i++) {
                pte = pte_start + i;
 -              if (pte_val(*pte))
 +              if (!pte_none(*pte))
                        return;
        }
  
@@@ -726,7 -690,7 +725,7 @@@ static void __meminit free_pmd_table(pm
  
        for (i = 0; i < PTRS_PER_PMD; i++) {
                pmd = pmd_start + i;
 -              if (pmd_val(*pmd))
 +              if (!pmd_none(*pmd))
                        return;
        }
  
        spin_unlock(&init_mm.page_table_lock);
  }
  
 -/* Return true if pgd is changed, otherwise return false. */
 -static bool __meminit free_pud_table(pud_t *pud_start, pgd_t *pgd)
 -{
 -      pud_t *pud;
 -      int i;
 -
 -      for (i = 0; i < PTRS_PER_PUD; i++) {
 -              pud = pud_start + i;
 -              if (pud_val(*pud))
 -                      return false;
 -      }
 -
 -      /* free a pud table */
 -      free_pagetable(pgd_page(*pgd), 0);
 -      spin_lock(&init_mm.page_table_lock);
 -      pgd_clear(pgd);
 -      spin_unlock(&init_mm.page_table_lock);
 -
 -      return true;
 -}
 -
  static void __meminit
  remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end,
                 bool direct)
@@@ -927,6 -912,7 +926,6 @@@ remove_pagetable(unsigned long start, u
        unsigned long addr;
        pgd_t *pgd;
        pud_t *pud;
 -      bool pgd_changed = false;
  
        for (addr = start; addr < end; addr = next) {
                next = pgd_addr_end(addr, end);
  
                pud = (pud_t *)pgd_page_vaddr(*pgd);
                remove_pud_table(pud, addr, next, direct);
 -              if (free_pud_table(pud, pgd))
 -                      pgd_changed = true;
        }
  
 -      if (pgd_changed)
 -              sync_global_pgds(start, end - 1, 1);
 -
        flush_tlb_all();
  }
  
diff --combined arch/x86/mm/numa.c
@@@ -1,5 -1,4 +1,5 @@@
  /* Common code for 32 and 64-bit NUMA */
 +#include <linux/acpi.h>
  #include <linux/kernel.h>
  #include <linux/mm.h>
  #include <linux/string.h>
@@@ -8,7 -7,6 +8,6 @@@
  #include <linux/memblock.h>
  #include <linux/mmzone.h>
  #include <linux/ctype.h>
- #include <linux/module.h>
  #include <linux/nodemask.h>
  #include <linux/sched.h>
  #include <linux/topology.h>
@@@ -16,6 -14,7 +15,6 @@@
  #include <asm/e820.h>
  #include <asm/proto.h>
  #include <asm/dma.h>
 -#include <asm/acpi.h>
  #include <asm/amd_nb.h>
  
  #include "numa_internal.h"
diff --combined arch/x86/mm/pat.c
@@@ -11,7 -11,6 +11,6 @@@
  #include <linux/bootmem.h>
  #include <linux/debugfs.h>
  #include <linux/kernel.h>
- #include <linux/module.h>
  #include <linux/pfn_t.h>
  #include <linux/slab.h>
  #include <linux/mm.h>
@@@ -755,8 -754,11 +754,8 @@@ static inline int range_is_allowed(unsi
                return 1;
  
        while (cursor < to) {
 -              if (!devmem_is_allowed(pfn)) {
 -                      pr_info("x86/PAT: Program %s tried to access /dev/mem between [mem %#010Lx-%#010Lx], PAT prevents it\n",
 -                              current->comm, from, to - 1);
 +              if (!devmem_is_allowed(pfn))
                        return 0;
 -              }
                cursor += PAGE_SIZE;
                pfn++;
        }
diff --combined arch/x86/mm/pgtable_32.c
@@@ -8,7 -8,6 +8,6 @@@
  #include <linux/highmem.h>
  #include <linux/pagemap.h>
  #include <linux/spinlock.h>
- #include <linux/module.h>
  
  #include <asm/pgtable.h>
  #include <asm/pgalloc.h>
@@@ -47,7 -46,7 +46,7 @@@ void set_pte_vaddr(unsigned long vaddr
                return;
        }
        pte = pte_offset_kernel(pmd, vaddr);
 -      if (pte_val(pteval))
 +      if (!pte_none(pteval))
                set_pte_at(&init_mm, vaddr, pte, pteval);
        else
                pte_clear(&init_mm, vaddr, pte);
diff --combined arch/x86/mm/srat.c
  #include <linux/acpi.h>
  #include <linux/mmzone.h>
  #include <linux/bitmap.h>
- #include <linux/module.h>
+ #include <linux/init.h>
  #include <linux/topology.h>
 -#include <linux/bootmem.h>
 -#include <linux/memblock.h>
  #include <linux/mm.h>
  #include <asm/proto.h>
  #include <asm/numa.h>
  #include <asm/apic.h>
  #include <asm/uv/uv.h>
  
 -int acpi_numa __initdata;
 -
 -static __init int setup_node(int pxm)
 -{
 -      return acpi_map_pxm_to_node(pxm);
 -}
 -
 -static __init void bad_srat(void)
 -{
 -      printk(KERN_ERR "SRAT: SRAT not used.\n");
 -      acpi_numa = -1;
 -}
 -
 -static __init inline int srat_disabled(void)
 -{
 -      return acpi_numa < 0;
 -}
 -
 -/*
 - * Callback for SLIT parsing.  pxm_to_node() returns NUMA_NO_NODE for
 - * I/O localities since SRAT does not list them.  I/O localities are
 - * not supported at this point.
 - */
 -void __init acpi_numa_slit_init(struct acpi_table_slit *slit)
 -{
 -      int i, j;
 -
 -      for (i = 0; i < slit->locality_count; i++) {
 -              const int from_node = pxm_to_node(i);
 -
 -              if (from_node == NUMA_NO_NODE)
 -                      continue;
 -
 -              for (j = 0; j < slit->locality_count; j++) {
 -                      const int to_node = pxm_to_node(j);
 -
 -                      if (to_node == NUMA_NO_NODE)
 -                              continue;
 -
 -                      numa_set_distance(from_node, to_node,
 -                              slit->entry[slit->locality_count * i + j]);
 -              }
 -      }
 -}
 -
  /* Callback for Proximity Domain -> x2APIC mapping */
  void __init
  acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa)
@@@ -44,7 -91,7 +44,7 @@@
                         pxm, apic_id);
                return;
        }
 -      node = setup_node(pxm);
 +      node = acpi_map_pxm_to_node(pxm);
        if (node < 0) {
                printk(KERN_ERR "SRAT: Too many proximity domains %x\n", pxm);
                bad_srat();
        }
        set_apicid_to_node(apic_id, node);
        node_set(node, numa_nodes_parsed);
 -      acpi_numa = 1;
        printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u\n",
               pxm, apic_id, node);
  }
@@@ -79,7 -127,7 +79,7 @@@ acpi_numa_processor_affinity_init(struc
        pxm = pa->proximity_domain_lo;
        if (acpi_srat_revision >= 2)
                pxm |= *((unsigned int*)pa->proximity_domain_hi) << 8;
 -      node = setup_node(pxm);
 +      node = acpi_map_pxm_to_node(pxm);
        if (node < 0) {
                printk(KERN_ERR "SRAT: Too many proximity domains %x\n", pxm);
                bad_srat();
  
        set_apicid_to_node(apic_id, node);
        node_set(node, numa_nodes_parsed);
 -      acpi_numa = 1;
        printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%02x -> Node %u\n",
               pxm, apic_id, node);
  }
  
 -#ifdef CONFIG_MEMORY_HOTPLUG
 -static inline int save_add_info(void) {return 1;}
 -#else
 -static inline int save_add_info(void) {return 0;}
 -#endif
 -
 -/* Callback for parsing of the Proximity Domain <-> Memory Area mappings */
 -int __init
 -acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
 -{
 -      u64 start, end;
 -      u32 hotpluggable;
 -      int node, pxm;
 -
 -      if (srat_disabled())
 -              goto out_err;
 -      if (ma->header.length != sizeof(struct acpi_srat_mem_affinity))
 -              goto out_err_bad_srat;
 -      if ((ma->flags & ACPI_SRAT_MEM_ENABLED) == 0)
 -              goto out_err;
 -      hotpluggable = ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE;
 -      if (hotpluggable && !save_add_info())
 -              goto out_err;
 -
 -      start = ma->base_address;
 -      end = start + ma->length;
 -      pxm = ma->proximity_domain;
 -      if (acpi_srat_revision <= 1)
 -              pxm &= 0xff;
 -
 -      node = setup_node(pxm);
 -      if (node < 0) {
 -              printk(KERN_ERR "SRAT: Too many proximity domains.\n");
 -              goto out_err_bad_srat;
 -      }
 -
 -      if (numa_add_memblk(node, start, end) < 0)
 -              goto out_err_bad_srat;
 -
 -      node_set(node, numa_nodes_parsed);
 -
 -      pr_info("SRAT: Node %u PXM %u [mem %#010Lx-%#010Lx]%s%s\n",
 -              node, pxm,
 -              (unsigned long long) start, (unsigned long long) end - 1,
 -              hotpluggable ? " hotplug" : "",
 -              ma->flags & ACPI_SRAT_MEM_NON_VOLATILE ? " non-volatile" : "");
 -
 -      /* Mark hotplug range in memblock. */
 -      if (hotpluggable && memblock_mark_hotplug(start, ma->length))
 -              pr_warn("SRAT: Failed to mark hotplug range [mem %#010Lx-%#010Lx] in memblock\n",
 -                      (unsigned long long)start, (unsigned long long)end - 1);
 -
 -      max_possible_pfn = max(max_possible_pfn, PFN_UP(end - 1));
 -
 -      return 0;
 -out_err_bad_srat:
 -      bad_srat();
 -out_err:
 -      return -1;
 -}
 -
 -void __init acpi_numa_arch_fixup(void) {}
 -
  int __init x86_acpi_numa_init(void)
  {
        int ret;
@@@ -24,7 -24,7 +24,7 @@@
  #include <linux/spinlock.h>
  #include <linux/bootmem.h>
  #include <linux/ioport.h>
- #include <linux/module.h>
+ #include <linux/init.h>
  #include <linux/efi.h>
  #include <linux/uaccess.h>
  #include <linux/io.h>
@@@ -285,6 -285,11 +285,6 @@@ int __init efi_setup_page_tables(unsign
        return 0;
  }
  
 -void __init efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages)
 -{
 -      kernel_unmap_pages_in_pgd(efi_pgd, pa_memmap, num_pages);
 -}
 -
  static void __init __map_region(efi_memory_desc_t *md, u64 va)
  {
        unsigned long flags = _PAGE_RW;
@@@ -461,17 -466,22 +461,17 @@@ extern efi_status_t efi64_thunk(u32, ..
  #define efi_thunk(f, ...)                                             \
  ({                                                                    \
        efi_status_t __s;                                               \
 -      unsigned long flags;                                            \
 -      u32 func;                                                       \
 -                                                                      \
 -      efi_sync_low_kernel_mappings();                                 \
 -      local_irq_save(flags);                                          \
 +      unsigned long __flags;                                          \
 +      u32 __func;                                                     \
                                                                        \
 -      efi_scratch.prev_cr3 = read_cr3();                              \
 -      write_cr3((unsigned long)efi_scratch.efi_pgt);                  \
 -      __flush_tlb_all();                                              \
 +      local_irq_save(__flags);                                        \
 +      arch_efi_call_virt_setup();                                     \
                                                                        \
 -      func = runtime_service32(f);                                    \
 -      __s = efi64_thunk(func, __VA_ARGS__);                   \
 +      __func = runtime_service32(f);                                  \
 +      __s = efi64_thunk(__func, __VA_ARGS__);                         \
                                                                        \
 -      write_cr3(efi_scratch.prev_cr3);                                \
 -      __flush_tlb_all();                                              \
 -      local_irq_restore(flags);                                       \
 +      arch_efi_call_virt_teardown();                                  \
 +      local_irq_restore(__flags);                                     \
                                                                        \
        __s;                                                            \
  })
diff --combined arch/x86/xen/enlighten.c
@@@ -23,7 -23,7 +23,7 @@@
  #include <linux/sched.h>
  #include <linux/kprobes.h>
  #include <linux/bootmem.h>
- #include <linux/module.h>
+ #include <linux/export.h>
  #include <linux/mm.h>
  #include <linux/page-flags.h>
  #include <linux/highmem.h>
@@@ -59,7 -59,6 +59,7 @@@
  #include <asm/xen/pci.h>
  #include <asm/xen/hypercall.h>
  #include <asm/xen/hypervisor.h>
 +#include <asm/xen/cpuid.h>
  #include <asm/fixmap.h>
  #include <asm/processor.h>
  #include <asm/proto.h>
@@@ -119,10 -118,6 +119,10 @@@ DEFINE_PER_CPU(struct vcpu_info *, xen_
   */
  DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info);
  
 +/* Linux <-> Xen vCPU id mapping */
 +DEFINE_PER_CPU(int, xen_vcpu_id) = -1;
 +EXPORT_PER_CPU_SYMBOL(xen_vcpu_id);
 +
  enum xen_domain_type xen_domain_type = XEN_NATIVE;
  EXPORT_SYMBOL_GPL(xen_domain_type);
  
@@@ -184,7 -179,7 +184,7 @@@ static void clamp_max_cpus(void
  #endif
  }
  
 -static void xen_vcpu_setup(int cpu)
 +void xen_vcpu_setup(int cpu)
  {
        struct vcpu_register_vcpu_info info;
        int err;
                if (per_cpu(xen_vcpu, cpu) == &per_cpu(xen_vcpu_info, cpu))
                        return;
        }
 -      if (cpu < MAX_VIRT_CPUS)
 -              per_cpu(xen_vcpu,cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu];
 +      if (xen_vcpu_nr(cpu) < MAX_VIRT_CPUS)
 +              per_cpu(xen_vcpu, cpu) =
 +                      &HYPERVISOR_shared_info->vcpu_info[xen_vcpu_nr(cpu)];
  
        if (!have_vcpu_info_placement) {
                if (cpu >= MAX_VIRT_CPUS)
           hypervisor has no unregister variant and this hypercall does not
           allow to over-write info.mfn and info.offset.
         */
 -      err = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_info, cpu, &info);
 +      err = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_info, xen_vcpu_nr(cpu),
 +                               &info);
  
        if (err) {
                printk(KERN_DEBUG "register_vcpu_info failed: err=%d\n", err);
@@@ -254,11 -247,10 +254,11 @@@ void xen_vcpu_restore(void
  
        for_each_possible_cpu(cpu) {
                bool other_cpu = (cpu != smp_processor_id());
 -              bool is_up = HYPERVISOR_vcpu_op(VCPUOP_is_up, cpu, NULL);
 +              bool is_up = HYPERVISOR_vcpu_op(VCPUOP_is_up, xen_vcpu_nr(cpu),
 +                                              NULL);
  
                if (other_cpu && is_up &&
 -                  HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL))
 +                  HYPERVISOR_vcpu_op(VCPUOP_down, xen_vcpu_nr(cpu), NULL))
                        BUG();
  
                xen_setup_runstate_info(cpu);
                        xen_vcpu_setup(cpu);
  
                if (other_cpu && is_up &&
 -                  HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL))
 +                  HYPERVISOR_vcpu_op(VCPUOP_up, xen_vcpu_nr(cpu), NULL))
                        BUG();
        }
  }
@@@ -529,7 -521,9 +529,7 @@@ static void set_aliased_prot(void *v, p
  
        preempt_disable();
  
 -      pagefault_disable();    /* Avoid warnings due to being atomic. */
 -      __get_user(dummy, (unsigned char __user __force *)v);
 -      pagefault_enable();
 +      probe_kernel_read(&dummy, v, 1);
  
        if (HYPERVISOR_update_va_mapping((unsigned long)v, pte, 0))
                BUG();
@@@ -596,7 -590,7 +596,7 @@@ static void xen_load_gdt(const struct d
  {
        unsigned long va = dtr->address;
        unsigned int size = dtr->size + 1;
 -      unsigned pages = (size + PAGE_SIZE - 1) / PAGE_SIZE;
 +      unsigned pages = DIV_ROUND_UP(size, PAGE_SIZE);
        unsigned long frames[pages];
        int f;
  
@@@ -645,7 -639,7 +645,7 @@@ static void __init xen_load_gdt_boot(co
  {
        unsigned long va = dtr->address;
        unsigned int size = dtr->size + 1;
 -      unsigned pages = (size + PAGE_SIZE - 1) / PAGE_SIZE;
 +      unsigned pages = DIV_ROUND_UP(size, PAGE_SIZE);
        unsigned long frames[pages];
        int f;
  
@@@ -1143,11 -1137,8 +1143,11 @@@ void xen_setup_vcpu_info_placement(void
  {
        int cpu;
  
 -      for_each_possible_cpu(cpu)
 +      for_each_possible_cpu(cpu) {
 +              /* Set up direct vCPU id mapping for PV guests. */
 +              per_cpu(xen_vcpu_id, cpu) = cpu;
                xen_vcpu_setup(cpu);
 +      }
  
        /* xen_vcpu_setup managed to place the vcpu_info within the
         * percpu area for all cpus, so make use of it. Note that for
@@@ -1738,9 -1729,6 +1738,9 @@@ asmlinkage __visible void __init xen_st
  #endif
        xen_raw_console_write("about to get started...\n");
  
 +      /* Let's presume PV guests always boot on vCPU with id 0. */
 +      per_cpu(xen_vcpu_id, 0) = 0;
 +
        xen_setup_runstate_info(0);
  
        xen_efi_init();
@@@ -1782,10 -1770,9 +1782,10 @@@ void __ref xen_hvm_init_shared_info(voi
         * in that case multiple vcpus might be online. */
        for_each_online_cpu(cpu) {
                /* Leave it to be NULL. */
 -              if (cpu >= MAX_VIRT_CPUS)
 +              if (xen_vcpu_nr(cpu) >= MAX_VIRT_CPUS)
                        continue;
 -              per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu];
 +              per_cpu(xen_vcpu, cpu) =
 +                      &HYPERVISOR_shared_info->vcpu_info[xen_vcpu_nr(cpu)];
        }
  }
  
@@@ -1810,12 -1797,6 +1810,12 @@@ static void __init init_hvm_pv_info(voi
  
        xen_setup_features();
  
 +      cpuid(base + 4, &eax, &ebx, &ecx, &edx);
 +      if (eax & XEN_HVM_CPUID_VCPU_ID_PRESENT)
 +              this_cpu_write(xen_vcpu_id, ebx);
 +      else
 +              this_cpu_write(xen_vcpu_id, smp_processor_id());
 +
        pv_info.name = "Xen HVM";
  
        xen_domain_type = XEN_HVM_DOMAIN;
@@@ -1827,10 -1808,6 +1827,10 @@@ static int xen_hvm_cpu_notify(struct no
        int cpu = (long)hcpu;
        switch (action) {
        case CPU_UP_PREPARE:
 +              if (cpu_acpi_id(cpu) != U32_MAX)
 +                      per_cpu(xen_vcpu_id, cpu) = cpu_acpi_id(cpu);
 +              else
 +                      per_cpu(xen_vcpu_id, cpu) = cpu;
                xen_vcpu_setup(cpu);
                if (xen_have_vector_callback) {
                        if (xen_feature(XENFEAT_hvm_safe_pvclock))