*/
#include <linux/perf_event.h>
- #include <linux/module.h>
+ #include <linux/init.h>
+ #include <linux/export.h>
#include <linux/pci.h>
#include <linux/ptrace.h>
#include <linux/syscore_ops.h>
}
if (event->attr.sample_type & PERF_SAMPLE_RAW) {
- raw.size = sizeof(u32) + ibs_data.size;
- raw.data = ibs_data.data;
+ raw = (struct perf_raw_record){
+ .frag = {
+ .size = sizeof(u32) + ibs_data.size,
+ .data = ibs_data.data,
+ },
+ };
data.raw = &raw;
}
return ret;
}
-static __init int perf_event_ibs_init(void)
+static __init void perf_event_ibs_init(void)
{
struct attribute **attr = ibs_op_format_attrs;
- if (!ibs_caps)
- return -ENODEV; /* ibs not supported by the cpu */
-
perf_ibs_pmu_init(&perf_ibs_fetch, "ibs_fetch");
if (ibs_caps & IBS_CAPS_OPCNT) {
register_nmi_handler(NMI_LOCAL, perf_ibs_nmi_handler, 0, "perf_ibs");
pr_info("perf: AMD IBS detected (0x%08x)\n", ibs_caps);
-
- return 0;
}
#else /* defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD) */
-static __init int perf_event_ibs_init(void) { return 0; }
+static __init void perf_event_ibs_init(void) { }
#endif
return val & IBSCTL_LVT_OFFSET_MASK;
}
-static void setup_APIC_ibs(void *dummy)
+static void setup_APIC_ibs(void)
{
int offset;
smp_processor_id());
}
-static void clear_APIC_ibs(void *dummy)
+static void clear_APIC_ibs(void)
{
int offset;
setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_FIX, 1);
}
+static int x86_pmu_amd_ibs_starting_cpu(unsigned int cpu)
+{
+ setup_APIC_ibs();
+ return 0;
+}
+
#ifdef CONFIG_PM
static int perf_ibs_suspend(void)
{
- clear_APIC_ibs(NULL);
+ clear_APIC_ibs();
return 0;
}
static void perf_ibs_resume(void)
{
ibs_eilvt_setup();
- setup_APIC_ibs(NULL);
+ setup_APIC_ibs();
}
static struct syscore_ops perf_ibs_syscore_ops = {
#endif
-static int
-perf_ibs_cpu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
+static int x86_pmu_amd_ibs_dying_cpu(unsigned int cpu)
{
- switch (action & ~CPU_TASKS_FROZEN) {
- case CPU_STARTING:
- setup_APIC_ibs(NULL);
- break;
- case CPU_DYING:
- clear_APIC_ibs(NULL);
- break;
- default:
- break;
- }
-
- return NOTIFY_OK;
+ clear_APIC_ibs();
+ return 0;
}
static __init int amd_ibs_init(void)
{
u32 caps;
- int ret = -EINVAL;
caps = __get_ibs_caps();
if (!caps)
ibs_eilvt_setup();
if (!ibs_eilvt_valid())
- goto out;
+ return -EINVAL;
perf_ibs_pm_init();
- cpu_notifier_register_begin();
+
ibs_caps = caps;
/* make ibs_caps visible to other cpus: */
smp_mb();
- smp_call_function(setup_APIC_ibs, NULL, 1);
- __perf_cpu_notifier(perf_ibs_cpu_notifier);
- cpu_notifier_register_done();
+ /*
+ * x86_pmu_amd_ibs_starting_cpu will be called from core on
+ * all online cpus.
+ */
+ cpuhp_setup_state(CPUHP_AP_PERF_X86_AMD_IBS_STARTING,
+ "AP_PERF_X86_AMD_IBS_STARTING",
+ x86_pmu_amd_ibs_starting_cpu,
+ x86_pmu_amd_ibs_dying_cpu);
- ret = perf_event_ibs_init();
-out:
- if (ret)
- pr_err("Failed to setup IBS, %d\n", ret);
- return ret;
+ perf_event_ibs_init();
+
+ return 0;
}
/* Since we need the pci subsystem to init ibs we can't do this earlier: */
#include <linux/notifier.h>
#include <linux/hardirq.h>
#include <linux/kprobes.h>
- #include <linux/module.h>
+ #include <linux/export.h>
+ #include <linux/init.h>
#include <linux/kdebug.h>
#include <linux/sched.h>
#include <linux/uaccess.h>
msr_fail:
pr_cont("Broken PMU hardware detected, using software events only.\n");
- pr_info("%sFailed to access perfctr msr (MSR %x is %Lx)\n",
+ printk("%sFailed to access perfctr msr (MSR %x is %Lx)\n",
boot_cpu_has(X86_FEATURE_HYPERVISOR) ? KERN_INFO : KERN_ERR,
reg, val_new);
struct event_constraint emptyconstraint;
struct event_constraint unconstrained;
-static int
-x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
+static int x86_pmu_prepare_cpu(unsigned int cpu)
{
- unsigned int cpu = (long)hcpu;
struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
- int i, ret = NOTIFY_OK;
-
- switch (action & ~CPU_TASKS_FROZEN) {
- case CPU_UP_PREPARE:
- for (i = 0 ; i < X86_PERF_KFREE_MAX; i++)
- cpuc->kfree_on_online[i] = NULL;
- if (x86_pmu.cpu_prepare)
- ret = x86_pmu.cpu_prepare(cpu);
- break;
-
- case CPU_STARTING:
- if (x86_pmu.cpu_starting)
- x86_pmu.cpu_starting(cpu);
- break;
+ int i;
- case CPU_ONLINE:
- for (i = 0 ; i < X86_PERF_KFREE_MAX; i++) {
- kfree(cpuc->kfree_on_online[i]);
- cpuc->kfree_on_online[i] = NULL;
- }
- break;
+ for (i = 0 ; i < X86_PERF_KFREE_MAX; i++)
+ cpuc->kfree_on_online[i] = NULL;
+ if (x86_pmu.cpu_prepare)
+ return x86_pmu.cpu_prepare(cpu);
+ return 0;
+}
- case CPU_DYING:
- if (x86_pmu.cpu_dying)
- x86_pmu.cpu_dying(cpu);
- break;
+static int x86_pmu_dead_cpu(unsigned int cpu)
+{
+ if (x86_pmu.cpu_dead)
+ x86_pmu.cpu_dead(cpu);
+ return 0;
+}
- case CPU_UP_CANCELED:
- case CPU_DEAD:
- if (x86_pmu.cpu_dead)
- x86_pmu.cpu_dead(cpu);
- break;
+static int x86_pmu_online_cpu(unsigned int cpu)
+{
+ struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
+ int i;
- default:
- break;
+ for (i = 0 ; i < X86_PERF_KFREE_MAX; i++) {
+ kfree(cpuc->kfree_on_online[i]);
+ cpuc->kfree_on_online[i] = NULL;
}
+ return 0;
+}
- return ret;
+static int x86_pmu_starting_cpu(unsigned int cpu)
+{
+ if (x86_pmu.cpu_starting)
+ x86_pmu.cpu_starting(cpu);
+ return 0;
+}
+
+static int x86_pmu_dying_cpu(unsigned int cpu)
+{
+ if (x86_pmu.cpu_dying)
+ x86_pmu.cpu_dying(cpu);
+ return 0;
}
static void __init pmu_check_apic(void)
}
EXPORT_SYMBOL_GPL(events_sysfs_show);
+ssize_t events_ht_sysfs_show(struct device *dev, struct device_attribute *attr,
+ char *page)
+{
+ struct perf_pmu_events_ht_attr *pmu_attr =
+ container_of(attr, struct perf_pmu_events_ht_attr, attr);
+
+ /*
+ * Report conditional events depending on Hyper-Threading.
+ *
+ * This is overly conservative as usually the HT special
+ * handling is not needed if the other CPU thread is idle.
+ *
+ * Note this does not (and cannot) handle the case when thread
+ * siblings are invisible, for example with virtualization
+ * if they are owned by some other guest. The user tool
+ * has to re-read when a thread sibling gets onlined later.
+ */
+ return sprintf(page, "%s",
+ topology_max_smt_threads() > 1 ?
+ pmu_attr->event_str_ht :
+ pmu_attr->event_str_noht);
+}
+
EVENT_ATTR(cpu-cycles, CPU_CYCLES );
EVENT_ATTR(instructions, INSTRUCTIONS );
EVENT_ATTR(cache-references, CACHE_REFERENCES );
pr_info("... fixed-purpose events: %d\n", x86_pmu.num_counters_fixed);
pr_info("... event mask: %016Lx\n", x86_pmu.intel_ctrl);
- perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW);
- perf_cpu_notifier(x86_pmu_notifier);
+ /*
+ * Install callbacks. Core will call them for each online
+ * cpu.
+ */
+ err = cpuhp_setup_state(CPUHP_PERF_X86_PREPARE, "PERF_X86_PREPARE",
+ x86_pmu_prepare_cpu, x86_pmu_dead_cpu);
+ if (err)
+ return err;
+
+ err = cpuhp_setup_state(CPUHP_AP_PERF_X86_STARTING,
+ "AP_PERF_X86_STARTING", x86_pmu_starting_cpu,
+ x86_pmu_dying_cpu);
+ if (err)
+ goto out;
+
+ err = cpuhp_setup_state(CPUHP_AP_PERF_X86_ONLINE, "AP_PERF_X86_ONLINE",
+ x86_pmu_online_cpu, NULL);
+ if (err)
+ goto out1;
+
+ err = perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW);
+ if (err)
+ goto out2;
return 0;
+
+out2:
+ cpuhp_remove_state(CPUHP_AP_PERF_X86_ONLINE);
+out1:
+ cpuhp_remove_state(CPUHP_AP_PERF_X86_STARTING);
+out:
+ cpuhp_remove_state(CPUHP_PERF_X86_PREPARE);
+ return err;
}
early_initcall(init_hw_perf_events);
+ #include <linux/module.h>
+
#include <asm/cpu_device_id.h>
+#include <asm/intel-family.h>
#include "uncore.h"
static struct intel_uncore_type *empty_uncore[] = { NULL, };
static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
{
struct intel_uncore_type *type;
- struct intel_uncore_pmu *pmu;
+ struct intel_uncore_pmu *pmu = NULL;
struct intel_uncore_box *box;
int phys_id, pkg, ret;
}
type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(id->driver_data)];
+
/*
- * for performance monitoring unit with multiple boxes,
- * each box has a different function id.
+ * Some platforms, e.g. Knights Landing, use a common PCI device ID
+ * for multiple instances of an uncore PMU device type. We should check
+ * PCI slot and func to indicate the uncore box.
*/
- pmu = &type->pmus[UNCORE_PCI_DEV_IDX(id->driver_data)];
- /* Knights Landing uses a common PCI device ID for multiple instances of
- * an uncore PMU device type. There is only one entry per device type in
- * the knl_uncore_pci_ids table inspite of multiple devices present for
- * some device types. Hence PCI device idx would be 0 for all devices.
- * So increment pmu pointer to point to an unused array element.
- */
- if (boot_cpu_data.x86_model == 87) {
- while (pmu->func_id >= 0)
- pmu++;
+ if (id->driver_data & ~0xffff) {
+ struct pci_driver *pci_drv = pdev->driver;
+ const struct pci_device_id *ids = pci_drv->id_table;
+ unsigned int devfn;
+
+ while (ids && ids->vendor) {
+ if ((ids->vendor == pdev->vendor) &&
+ (ids->device == pdev->device)) {
+ devfn = PCI_DEVFN(UNCORE_PCI_DEV_DEV(ids->driver_data),
+ UNCORE_PCI_DEV_FUNC(ids->driver_data));
+ if (devfn == pdev->devfn) {
+ pmu = &type->pmus[UNCORE_PCI_DEV_IDX(ids->driver_data)];
+ break;
+ }
+ }
+ ids++;
+ }
+ if (pmu == NULL)
+ return -ENODEV;
+ } else {
+ /*
+ * for performance monitoring unit with multiple boxes,
+ * each box has a different function id.
+ */
+ pmu = &type->pmus[UNCORE_PCI_DEV_IDX(id->driver_data)];
}
if (WARN_ON_ONCE(pmu->boxes[pkg] != NULL))
static void uncore_pci_remove(struct pci_dev *pdev)
{
- struct intel_uncore_box *box = pci_get_drvdata(pdev);
+ struct intel_uncore_box *box;
struct intel_uncore_pmu *pmu;
int i, phys_id, pkg;
}
}
-static void uncore_cpu_dying(int cpu)
+static int uncore_cpu_dying(unsigned int cpu)
{
struct intel_uncore_type *type, **types = uncore_msr_uncores;
struct intel_uncore_pmu *pmu;
uncore_box_exit(box);
}
}
+ return 0;
}
-static void uncore_cpu_starting(int cpu, bool init)
+static int first_init;
+
+static int uncore_cpu_starting(unsigned int cpu)
{
struct intel_uncore_type *type, **types = uncore_msr_uncores;
struct intel_uncore_pmu *pmu;
struct intel_uncore_box *box;
int i, pkg, ncpus = 1;
- if (init) {
+ if (first_init) {
/*
* On init we get the number of online cpus in the package
* and set refcount for all of them.
uncore_box_init(box);
}
}
+
+ return 0;
}
-static int uncore_cpu_prepare(int cpu)
+static int uncore_cpu_prepare(unsigned int cpu)
{
struct intel_uncore_type *type, **types = uncore_msr_uncores;
struct intel_uncore_pmu *pmu;
uncore_change_type_ctx(*uncores, old_cpu, new_cpu);
}
-static void uncore_event_exit_cpu(int cpu)
+static int uncore_event_cpu_offline(unsigned int cpu)
{
int target;
/* Check if exiting cpu is used for collecting uncore events */
if (!cpumask_test_and_clear_cpu(cpu, &uncore_cpu_mask))
- return;
+ return 0;
/* Find a new cpu to collect uncore events */
target = cpumask_any_but(topology_core_cpumask(cpu), cpu);
uncore_change_context(uncore_msr_uncores, cpu, target);
uncore_change_context(uncore_pci_uncores, cpu, target);
+ return 0;
}
-static void uncore_event_init_cpu(int cpu)
+static int uncore_event_cpu_online(unsigned int cpu)
{
int target;
*/
target = cpumask_any_and(&uncore_cpu_mask, topology_core_cpumask(cpu));
if (target < nr_cpu_ids)
- return;
+ return 0;
cpumask_set_cpu(cpu, &uncore_cpu_mask);
uncore_change_context(uncore_msr_uncores, -1, cpu);
uncore_change_context(uncore_pci_uncores, -1, cpu);
+ return 0;
}
-static int uncore_cpu_notifier(struct notifier_block *self,
- unsigned long action, void *hcpu)
-{
- unsigned int cpu = (long)hcpu;
-
- switch (action & ~CPU_TASKS_FROZEN) {
- case CPU_UP_PREPARE:
- return notifier_from_errno(uncore_cpu_prepare(cpu));
-
- case CPU_STARTING:
- uncore_cpu_starting(cpu, false);
- case CPU_DOWN_FAILED:
- uncore_event_init_cpu(cpu);
- break;
-
- case CPU_UP_CANCELED:
- case CPU_DYING:
- uncore_cpu_dying(cpu);
- break;
-
- case CPU_DOWN_PREPARE:
- uncore_event_exit_cpu(cpu);
- break;
- }
- return NOTIFY_OK;
-}
-
-static struct notifier_block uncore_cpu_nb = {
- .notifier_call = uncore_cpu_notifier,
- /*
- * to migrate uncore events, our notifier should be executed
- * before perf core's notifier.
- */
- .priority = CPU_PRI_PERF + 1,
-};
-
static int __init type_pmu_register(struct intel_uncore_type *type)
{
int i, ret;
return ret;
}
-static void __init uncore_cpu_setup(void *dummy)
-{
- uncore_cpu_starting(smp_processor_id(), true);
-}
-
-/* Lazy to avoid allocation of a few bytes for the normal case */
-static __initdata DECLARE_BITMAP(packages, MAX_LOCAL_APIC);
-
-static int __init uncore_cpumask_init(bool msr)
-{
- unsigned int cpu;
-
- for_each_online_cpu(cpu) {
- unsigned int pkg = topology_logical_package_id(cpu);
- int ret;
-
- if (test_and_set_bit(pkg, packages))
- continue;
- /*
- * The first online cpu of each package allocates and takes
- * the refcounts for all other online cpus in that package.
- * If msrs are not enabled no allocation is required.
- */
- if (msr) {
- ret = uncore_cpu_prepare(cpu);
- if (ret)
- return ret;
- }
- uncore_event_init_cpu(cpu);
- smp_call_function_single(cpu, uncore_cpu_setup, NULL, 1);
- }
- __register_cpu_notifier(&uncore_cpu_nb);
- return 0;
-}
-
#define X86_UNCORE_MODEL_MATCH(model, init) \
{ X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long)&init }
};
static const struct intel_uncore_init_fun skl_uncore_init __initconst = {
+ .cpu_init = skl_uncore_cpu_init,
.pci_init = skl_uncore_pci_init,
};
static const struct x86_cpu_id intel_uncore_match[] __initconst = {
- X86_UNCORE_MODEL_MATCH(26, nhm_uncore_init), /* Nehalem */
- X86_UNCORE_MODEL_MATCH(30, nhm_uncore_init),
- X86_UNCORE_MODEL_MATCH(37, nhm_uncore_init), /* Westmere */
- X86_UNCORE_MODEL_MATCH(44, nhm_uncore_init),
- X86_UNCORE_MODEL_MATCH(42, snb_uncore_init), /* Sandy Bridge */
- X86_UNCORE_MODEL_MATCH(58, ivb_uncore_init), /* Ivy Bridge */
- X86_UNCORE_MODEL_MATCH(60, hsw_uncore_init), /* Haswell */
- X86_UNCORE_MODEL_MATCH(69, hsw_uncore_init), /* Haswell Celeron */
- X86_UNCORE_MODEL_MATCH(70, hsw_uncore_init), /* Haswell */
- X86_UNCORE_MODEL_MATCH(61, bdw_uncore_init), /* Broadwell */
- X86_UNCORE_MODEL_MATCH(71, bdw_uncore_init), /* Broadwell */
- X86_UNCORE_MODEL_MATCH(45, snbep_uncore_init), /* Sandy Bridge-EP */
- X86_UNCORE_MODEL_MATCH(46, nhmex_uncore_init), /* Nehalem-EX */
- X86_UNCORE_MODEL_MATCH(47, nhmex_uncore_init), /* Westmere-EX aka. Xeon E7 */
- X86_UNCORE_MODEL_MATCH(62, ivbep_uncore_init), /* Ivy Bridge-EP */
- X86_UNCORE_MODEL_MATCH(63, hswep_uncore_init), /* Haswell-EP */
- X86_UNCORE_MODEL_MATCH(79, bdx_uncore_init), /* BDX-EP */
- X86_UNCORE_MODEL_MATCH(86, bdx_uncore_init), /* BDX-DE */
- X86_UNCORE_MODEL_MATCH(87, knl_uncore_init), /* Knights Landing */
- X86_UNCORE_MODEL_MATCH(94, skl_uncore_init), /* SkyLake */
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_NEHALEM_EP, nhm_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_NEHALEM, nhm_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_WESTMERE, nhm_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_WESTMERE_EP, nhm_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE, snb_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE, ivb_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_HASWELL_CORE, hsw_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_HASWELL_ULT, hsw_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_HASWELL_GT3E, hsw_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_BROADWELL_CORE, bdw_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_BROADWELL_GT3E, bdw_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE_X, snbep_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_NEHALEM_EX, nhmex_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_WESTMERE_EX, nhmex_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE_X, ivbep_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_HASWELL_X, hswep_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_BROADWELL_X, bdx_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_BROADWELL_XEON_D, bdx_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNL, knl_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SKYLAKE_DESKTOP,skl_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SKYLAKE_MOBILE, skl_uncore_init),
{},
};
if (cret && pret)
return -ENODEV;
- cpu_notifier_register_begin();
- ret = uncore_cpumask_init(!cret);
- if (ret)
- goto err;
- cpu_notifier_register_done();
+ /*
+ * Install callbacks. Core will call them for each online cpu.
+ *
+ * The first online cpu of each package allocates and takes
+ * the refcounts for all other online cpus in that package.
+ * If msrs are not enabled no allocation is required and
+ * uncore_cpu_prepare() is not called for each online cpu.
+ */
+ if (!cret) {
+ ret = cpuhp_setup_state(CPUHP_PERF_X86_UNCORE_PREP,
+ "PERF_X86_UNCORE_PREP",
+ uncore_cpu_prepare, NULL);
+ if (ret)
+ goto err;
+ } else {
+ cpuhp_setup_state_nocalls(CPUHP_PERF_X86_UNCORE_PREP,
+ "PERF_X86_UNCORE_PREP",
+ uncore_cpu_prepare, NULL);
+ }
+ first_init = 1;
+ cpuhp_setup_state(CPUHP_AP_PERF_X86_UNCORE_STARTING,
+ "AP_PERF_X86_UNCORE_STARTING",
+ uncore_cpu_starting, uncore_cpu_dying);
+ first_init = 0;
+ cpuhp_setup_state(CPUHP_AP_PERF_X86_UNCORE_ONLINE,
+ "AP_PERF_X86_UNCORE_ONLINE",
+ uncore_event_cpu_online, uncore_event_cpu_offline);
return 0;
err:
on_each_cpu_mask(&uncore_cpu_mask, uncore_exit_boxes, NULL, 1);
uncore_types_exit(uncore_msr_uncores);
uncore_pci_exit();
- cpu_notifier_register_done();
return ret;
}
module_init(intel_uncore_init);
static void __exit intel_uncore_exit(void)
{
- cpu_notifier_register_begin();
- __unregister_cpu_notifier(&uncore_cpu_nb);
+ cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_UNCORE_ONLINE);
+ cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_UNCORE_STARTING);
+ cpuhp_remove_state_nocalls(CPUHP_PERF_X86_UNCORE_PREP);
uncore_types_exit(uncore_msr_uncores);
uncore_pci_exit();
- cpu_notifier_register_done();
}
module_exit(intel_uncore_exit);
- #include <linux/module.h>
#include <linux/slab.h>
#include <linux/pci.h>
#include <asm/apicdef.h>
#define UNCORE_PMC_IDX_FIXED UNCORE_PMC_IDX_MAX_GENERIC
#define UNCORE_PMC_IDX_MAX (UNCORE_PMC_IDX_FIXED + 1)
+#define UNCORE_PCI_DEV_FULL_DATA(dev, func, type, idx) \
+ ((dev << 24) | (func << 16) | (type << 8) | idx)
#define UNCORE_PCI_DEV_DATA(type, idx) ((type << 8) | idx)
+#define UNCORE_PCI_DEV_DEV(data) ((data >> 24) & 0xff)
+#define UNCORE_PCI_DEV_FUNC(data) ((data >> 16) & 0xff)
#define UNCORE_PCI_DEV_TYPE(data) ((data >> 8) & 0xff)
#define UNCORE_PCI_DEV_IDX(data) (data & 0xff)
#define UNCORE_EXTRA_PCI_DEV 0xff
int skl_uncore_pci_init(void);
void snb_uncore_cpu_init(void);
void nhm_uncore_cpu_init(void);
+void skl_uncore_cpu_init(void);
int snb_pci2phy_map_init(int devid);
/* perf_event_intel_uncore_snbep.c */
#include <linux/cpumask.h>
#include <asm/mpspec.h>
+ #include <asm/percpu.h>
/* Mappings between logical cpu number and node number */
DECLARE_EARLY_PER_CPU(int, x86_cpu_to_node_map);
extern unsigned int __max_logical_packages;
#define topology_max_packages() (__max_logical_packages)
+
+extern int __max_smt_threads;
+
+static inline int topology_max_smt_threads(void)
+{
+ return __max_smt_threads;
+}
+
int topology_update_package_map(unsigned int apicid, unsigned int cpu);
extern int topology_phys_to_logical_pkg(unsigned int pkg);
#else
static inline int
topology_update_package_map(unsigned int apicid, unsigned int cpu) { return 0; }
static inline int topology_phys_to_logical_pkg(unsigned int pkg) { return 0; }
+static inline int topology_max_smt_threads(void) { return 1; }
#endif
static inline void arch_fix_phys_package_id(int num, u32 slot)
#include <linux/acpi_pmtmr.h>
#include <linux/efi.h>
#include <linux/cpumask.h>
- #include <linux/module.h>
+ #include <linux/export.h>
#include <linux/dmi.h>
#include <linux/irq.h>
#include <linux/slab.h>
/**
* acpi_register_lapic - register a local apic and generates a logic cpu number
* @id: local apic id to register
+ * @acpiid: ACPI id to register
* @enabled: this cpu is enabled or not
*
* Returns the logic cpu number which maps to the local apic
*/
-static int acpi_register_lapic(int id, u8 enabled)
+static int acpi_register_lapic(int id, u32 acpiid, u8 enabled)
{
unsigned int ver = 0;
+ int cpu;
if (id >= MAX_LOCAL_APIC) {
printk(KERN_INFO PREFIX "skipped apicid that is too big\n");
if (boot_cpu_physical_apicid != -1U)
ver = apic_version[boot_cpu_physical_apicid];
- return generic_processor_info(id, ver);
+ cpu = generic_processor_info(id, ver);
+ if (cpu >= 0)
+ early_per_cpu(x86_cpu_to_acpiid, cpu) = acpiid;
+
+ return cpu;
}
static int __init
if (!apic->apic_id_valid(apic_id) && enabled)
printk(KERN_WARNING PREFIX "x2apic entry ignored\n");
else
- acpi_register_lapic(apic_id, enabled);
+ acpi_register_lapic(apic_id, processor->uid, enabled);
#else
printk(KERN_WARNING PREFIX "x2apic entry ignored\n");
#endif
* when we use CPU hotplug.
*/
acpi_register_lapic(processor->id, /* APIC ID */
+ processor->processor_id, /* ACPI ID */
processor->lapic_flags & ACPI_MADT_ENABLED);
return 0;
acpi_table_print_madt_entry(header);
acpi_register_lapic((processor->id << 8) | processor->eid,/* APIC ID */
+ processor->processor_id, /* ACPI ID */
processor->lapic_flags & ACPI_MADT_ENABLED);
return 0;
{
int cpu;
- cpu = acpi_register_lapic(physid, ACPI_MADT_ENABLED);
+ cpu = acpi_register_lapic(physid, U32_MAX, ACPI_MADT_ENABLED);
if (cpu < 0) {
pr_info(PREFIX "Unable to map lapic to logical cpu number\n");
return cpu;
#include <linux/slab.h>
#include <linux/init.h>
#include <linux/errno.h>
- #include <linux/module.h>
+ #include <linux/export.h>
#include <linux/spinlock.h>
#include <asm/amd_nb.h>
return 0;
}
-static int amd_cache_gart(void)
+static void amd_cache_gart(void)
{
u16 i;
- if (!amd_nb_has_feature(AMD_NB_GART))
- return 0;
-
- flush_words = kmalloc(amd_nb_num() * sizeof(u32), GFP_KERNEL);
- if (!flush_words) {
- amd_northbridges.flags &= ~AMD_NB_GART;
- return -ENOMEM;
- }
+ if (!amd_nb_has_feature(AMD_NB_GART))
+ return;
- for (i = 0; i != amd_nb_num(); i++)
- pci_read_config_dword(node_to_amd_nb(i)->misc, 0x9c,
- &flush_words[i]);
+ flush_words = kmalloc(amd_nb_num() * sizeof(u32), GFP_KERNEL);
+ if (!flush_words) {
+ amd_northbridges.flags &= ~AMD_NB_GART;
+ pr_notice("Cannot initialize GART flush words, GART support disabled\n");
+ return;
+ }
- return 0;
+ for (i = 0; i != amd_nb_num(); i++)
+ pci_read_config_dword(node_to_amd_nb(i)->misc, 0x9c, &flush_words[i]);
}
void amd_flush_garts(void)
static __init int init_amd_nbs(void)
{
- int err = 0;
+ amd_cache_northbridges();
+ amd_cache_gart();
- err = amd_cache_northbridges();
-
- if (err < 0)
- pr_notice("Cannot enumerate AMD northbridges\n");
-
- if (amd_cache_gart() < 0)
- pr_notice("Cannot initialize GART flush words, GART support disabled\n");
-
- return err;
+ return 0;
}
/* This has to go after the PCI subsystem */
#include <linux/bootmem.h>
#include <linux/ftrace.h>
#include <linux/ioport.h>
- #include <linux/module.h>
+ #include <linux/export.h>
#include <linux/syscore_ops.h>
#include <linux/delay.h>
#include <linux/timex.h>
*/
DEFINE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_cpu_to_apicid, BAD_APICID);
DEFINE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_bios_cpu_apicid, BAD_APICID);
+DEFINE_EARLY_PER_CPU_READ_MOSTLY(u32, x86_cpu_to_acpiid, U32_MAX);
EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid);
EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
+EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_acpiid);
#ifdef CONFIG_X86_32
int thiscpu = max + disabled_cpus - 1;
pr_warning(
- "ACPI: NR_CPUS/possible_cpus limit of %i almost"
+ "APIC: NR_CPUS/possible_cpus limit of %i almost"
" reached. Keeping one slot for boot cpu."
" Processor %d/0x%x ignored.\n", max, thiscpu, apicid);
int thiscpu = max + disabled_cpus;
pr_warning(
- "ACPI: NR_CPUS/possible_cpus limit of %i reached."
+ "APIC: NR_CPUS/possible_cpus limit of %i reached."
" Processor %d/0x%x ignored.\n", max, thiscpu, apicid);
disabled_cpus++;
if (topology_update_package_map(apicid, cpu) < 0) {
int thiscpu = max + disabled_cpus;
- pr_warning("ACPI: Package limit reached. Processor %d/0x%x ignored.\n",
+ pr_warning("APIC: Package limit reached. Processor %d/0x%x ignored.\n",
thiscpu, apicid);
disabled_cpus++;
return -ENOSPC;
#include <linux/kernel.h>
#include <linux/ctype.h>
#include <linux/hardirq.h>
- #include <linux/module.h>
+ #include <linux/export.h>
#include <asm/smp.h>
#include <asm/apic.h>
#include <asm/ipi.h>
.get_apic_id = flat_get_apic_id,
.set_apic_id = set_apic_id,
- .apic_id_mask = 0xFFu << 24,
.cpu_mask_to_apicid_and = flat_cpu_mask_to_apicid_and,
.get_apic_id = flat_get_apic_id,
.set_apic_id = set_apic_id,
- .apic_id_mask = 0xFFu << 24,
.cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and,
#include <linux/threads.h>
#include <linux/cpumask.h>
- #include <linux/module.h>
#include <linux/string.h>
#include <linux/kernel.h>
#include <linux/ctype.h>
.get_apic_id = noop_get_apic_id,
.set_apic_id = NULL,
- .apic_id_mask = 0x0F << 24,
.cpu_mask_to_apicid_and = flat_cpu_mask_to_apicid_and,
#include <linux/mc146818rtc.h>
#include <linux/compiler.h>
#include <linux/acpi.h>
- #include <linux/module.h>
+ #include <linux/export.h>
#include <linux/syscore_ops.h>
#include <linux/freezer.h>
#include <linux/kthread.h>
return __irq_domain_alloc_irqs(domain, irq, 1,
ioapic_alloc_attr_node(info),
- info, legacy);
+ info, legacy, NULL);
}
/*
info->ioapic_pin))
return -ENOMEM;
} else {
- irq = __irq_domain_alloc_irqs(domain, irq, 1, node, info, true);
+ irq = __irq_domain_alloc_irqs(domain, irq, 1, node, info, true,
+ NULL);
if (irq >= 0) {
irq_data = irq_domain_get_irq_data(domain, irq);
data = irq_data->chip_data;
unsigned long n;
struct resource *res;
char *mem;
- int i, num = 0;
+ int i;
- for_each_ioapic(i)
- num++;
- if (num == 0)
+ if (nr_ioapics == 0)
return NULL;
n = IOAPIC_RESOURCE_NAME_SIZE + sizeof(struct resource);
- n *= num;
+ n *= nr_ioapics;
mem = alloc_bootmem(n);
res = (void *)mem;
- mem += sizeof(struct resource) * num;
+ mem += sizeof(struct resource) * nr_ioapics;
- num = 0;
for_each_ioapic(i) {
- res[num].name = mem;
- res[num].flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+ res[i].name = mem;
+ res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY;
snprintf(mem, IOAPIC_RESOURCE_NAME_SIZE, "IOAPIC %u", i);
mem += IOAPIC_RESOURCE_NAME_SIZE;
- ioapics[i].iomem_res = &res[num];
- num++;
+ ioapics[i].iomem_res = &res[i];
}
ioapic_resources = res;
*/
#include <linux/threads.h>
#include <linux/cpumask.h>
- #include <linux/module.h>
+ #include <linux/export.h>
#include <linux/string.h>
#include <linux/kernel.h>
#include <linux/ctype.h>
.get_apic_id = default_get_apic_id,
.set_apic_id = NULL,
- .apic_id_mask = 0x0F << 24,
.cpu_mask_to_apicid_and = flat_cpu_mask_to_apicid_and,
#include <linux/proc_fs.h>
#include <linux/threads.h>
#include <linux/kernel.h>
- #include <linux/module.h>
+ #include <linux/export.h>
#include <linux/string.h>
#include <linux/ctype.h>
#include <linux/sched.h>
.get_apic_id = x2apic_get_apic_id,
.set_apic_id = set_apic_id,
- .apic_id_mask = 0xFFFFFFFFu,
.cpu_mask_to_apicid_and = uv_cpu_mask_to_apicid_and,
uv_set_scir_bits(bits);
/* enable next timer period */
- mod_timer_pinned(timer, jiffies + SCIR_CPU_HB_INTERVAL);
+ mod_timer(timer, jiffies + SCIR_CPU_HB_INTERVAL);
}
static void uv_heartbeat_enable(int cpu)
struct timer_list *timer = &uv_cpu_scir_info(cpu)->timer;
uv_set_cpu_scir_bits(cpu, SCIR_CPU_HEARTBEAT|SCIR_CPU_ACTIVITY);
- setup_timer(timer, uv_heartbeat, cpu);
+ setup_pinned_timer(timer, uv_heartbeat, cpu);
timer->expires = jiffies + SCIR_CPU_HB_INTERVAL;
add_timer_on(timer, cpu);
uv_cpu_scir_info(cpu)->enabled = 1;
#include <linux/linkage.h>
#include <linux/bitops.h>
#include <linux/kernel.h>
- #include <linux/module.h>
+ #include <linux/export.h>
#include <linux/percpu.h>
#include <linux/string.h>
#include <linux/ctype.h>
struct task_struct *me;
struct tss_struct *t;
unsigned long v;
- int cpu = stack_smp_processor_id();
+ int cpu = raw_smp_processor_id();
int i;
wait_for_master_cpu(cpu);
#include <linux/smp.h>
#include <linux/sched.h>
#include <linux/thread_info.h>
- #include <linux/module.h>
+ #include <linux/init.h>
#include <linux/uaccess.h>
#include <asm/cpufeature.h>
#include <asm/msr.h>
#include <asm/bugs.h>
#include <asm/cpu.h>
+#include <asm/intel-family.h>
#ifdef CONFIG_X86_64
#include <linux/topology.h>
(c->x86_model == 29 || c->x86_model == 46 || c->x86_model == 47))
set_cpu_bug(c, X86_BUG_CLFLUSH_MONITOR);
+ if (c->x86 == 6 && boot_cpu_has(X86_FEATURE_MWAIT) &&
+ ((c->x86_model == INTEL_FAM6_ATOM_GOLDMONT)))
+ set_cpu_bug(c, X86_BUG_MONITOR);
+
#ifdef CONFIG_X86_64
if (c->x86 == 15)
c->x86_cache_alignment = c->x86_clflush_size * 2;
#include <linux/uaccess.h>
#include <linux/hardirq.h>
#include <linux/kdebug.h>
- #include <linux/module.h>
+ #include <linux/export.h>
#include <linux/ptrace.h>
#include <linux/kexec.h>
#include <linux/sysfs.h>
int i;
if (sp == NULL) {
- if (task)
+ if (regs)
+ sp = (unsigned long *)regs->sp;
+ else if (task)
sp = (unsigned long *)task->thread.sp;
else
sp = (unsigned long *)&sp;
#include <linux/uaccess.h>
#include <linux/hardirq.h>
#include <linux/kdebug.h>
- #include <linux/module.h>
+ #include <linux/export.h>
#include <linux/ptrace.h>
#include <linux/kexec.h>
#include <linux/sysfs.h>
* back trace for this cpu:
*/
if (sp == NULL) {
- if (task)
+ if (regs)
+ sp = (unsigned long *)regs->sp;
+ else if (task)
sp = (unsigned long *)task->thread.sp;
else
sp = (unsigned long *)&sp;
stack = sp;
for (i = 0; i < kstack_depth_to_print; i++) {
+ unsigned long word;
+
if (stack >= irq_stack && stack <= irq_stack_end) {
if (stack == irq_stack_end) {
stack = (unsigned long *) (irq_stack_end[-1]);
if (kstack_end(stack))
break;
}
+
+ if (probe_kernel_address(stack, word))
+ break;
+
if ((i % STACKSLOTS_PER_LINE) == 0) {
if (i != 0)
pr_cont("\n");
- printk("%s %016lx", log_lvl, *stack++);
+ printk("%s %016lx", log_lvl, word);
} else
- pr_cont(" %016lx", *stack++);
+ pr_cont(" %016lx", word);
+
+ stack++;
touch_nmi_watchdog();
}
preempt_enable();
- #include <linux/module.h>
+ #include <linux/export.h>
+ #include <linux/spinlock_types.h>
#include <asm/checksum.h>
#include <asm/pgtable.h>
EXPORT_SYMBOL(___preempt_schedule);
EXPORT_SYMBOL(___preempt_schedule_notrace);
#endif
+
+EXPORT_SYMBOL(__sw_hweight32);
*/
#include <linux/context_tracking.h>
- #include <linux/module.h>
+ #include <linux/init.h>
#include <linux/kernel.h>
#include <linux/kvm_para.h>
#include <linux/cpu.h>
if (!has_steal_clock)
return;
- memset(st, 0, sizeof(*st));
-
wrmsrl(MSR_KVM_STEAL_TIME, (slow_virt_to_phys(st) | KVM_MSR_ENABLED));
pr_info("kvm-stealtime: cpu %d, msr %llx\n",
cpu, (unsigned long long) slow_virt_to_phys(st));
#include <linux/prctl.h>
#include <linux/slab.h>
#include <linux/sched.h>
- #include <linux/module.h>
+ #include <linux/init.h>
+ #include <linux/export.h>
#include <linux/pm.h>
#include <linux/tick.h>
#include <linux/random.h>
if (c->x86_vendor != X86_VENDOR_INTEL)
return 0;
- if (!cpu_has(c, X86_FEATURE_MWAIT))
+ if (!cpu_has(c, X86_FEATURE_MWAIT) || static_cpu_has_bug(X86_BUG_MONITOR))
return 0;
return 1;
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
- #include <linux/module.h>
+ #include <linux/export.h>
#include <linux/reboot.h>
#include <linux/init.h>
#include <linux/pm.h>
* Dell Inc. so their systems "just work". :-)
*/
+/*
+ * Some machines require the "reboot=a" commandline options
+ */
+static int __init set_acpi_reboot(const struct dmi_system_id *d)
+{
+ if (reboot_type != BOOT_ACPI) {
+ reboot_type = BOOT_ACPI;
+ pr_info("%s series board detected. Selecting %s-method for reboots.\n",
+ d->ident, "ACPI");
+ }
+ return 0;
+}
+
/*
* Some machines require the "reboot=b" or "reboot=k" commandline options,
* this quirk makes that automatic.
DMI_MATCH(DMI_PRODUCT_NAME, "Dell XPS710"),
},
},
+ { /* Handle problems with rebooting on Dell Optiplex 7450 AIO */
+ .callback = set_acpi_reboot,
+ .ident = "Dell OptiPlex 7450 AIO",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 7450 AIO"),
+ },
+ },
/* Hewlett-Packard */
{ /* Handle problems with rebooting on HP laptops */
#include <linux/console.h>
#include <linux/root_dev.h>
#include <linux/highmem.h>
- #include <linux/module.h>
+ #include <linux/export.h>
#include <linux/efi.h>
#include <linux/init.h>
#include <linux/edd.h>
#include <asm/prom.h>
#include <asm/microcode.h>
#include <asm/mmu_context.h>
+#include <asm/kaslr.h>
/*
* max_low_pfn_mapped: highest direct mapped pfn under 4GB
memblock_free(ramdisk_image, ramdisk_end - ramdisk_image);
}
-static void __init early_initrd_acpi_init(void)
-{
- early_acpi_table_init((void *)initrd_start, initrd_end - initrd_start);
-}
#else
static void __init early_reserve_initrd(void)
{
static void __init reserve_initrd(void)
{
}
-static void __init early_initrd_acpi_init(void)
-{
-}
#endif /* CONFIG_BLK_DEV_INITRD */
static void __init parse_setup_data(void)
x86_init.oem.arch_setup();
+ kernel_randomize_memory();
+
iomem_resource.end = (1ULL << boot_cpu_data.x86_phys_bits) - 1;
setup_memory_map();
parse_setup_data();
reserve_initrd();
- early_initrd_acpi_init();
+ acpi_table_upgrade();
vsmp_init();
#include <linux/init.h>
#include <linux/smp.h>
- #include <linux/module.h>
+ #include <linux/export.h>
#include <linux/sched.h>
#include <linux/percpu.h>
#include <linux/bootmem.h>
unsigned int __max_logical_packages __read_mostly;
EXPORT_SYMBOL(__max_logical_packages);
+/* Maximum number of SMT threads on any online core */
+int __max_smt_threads __read_mostly;
+
static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip)
{
unsigned long flags;
bool has_mp = has_smt || boot_cpu_data.x86_max_cores > 1;
struct cpuinfo_x86 *c = &cpu_data(cpu);
struct cpuinfo_x86 *o;
- int i;
+ int i, threads;
cpumask_set_cpu(cpu, cpu_sibling_setup_mask);
if (match_die(c, o) && !topology_same_node(c, o))
primarily_use_numa_for_topology();
}
+
+ threads = cpumask_weight(topology_sibling_cpumask(cpu));
+ if (threads > __max_smt_threads)
+ __max_smt_threads = threads;
}
/* maps the cpu to the sched domain representing multi-core */
cpumask_copy(cpu_callin_mask, cpumask_of(0));
mb();
- current_thread_info()->cpu = 0; /* needed? */
for_each_possible_cpu(i) {
zalloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL);
zalloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL);
#ifdef CONFIG_HOTPLUG_CPU
+/* Recompute SMT state for all CPUs on offline */
+static void recompute_smt_state(void)
+{
+ int max_threads, cpu;
+
+ max_threads = 0;
+ for_each_online_cpu (cpu) {
+ int threads = cpumask_weight(topology_sibling_cpumask(cpu));
+
+ if (threads > max_threads)
+ max_threads = threads;
+ }
+ __max_smt_threads = max_threads;
+}
+
static void remove_siblinginfo(int cpu)
{
int sibling;
c->phys_proc_id = 0;
c->cpu_core_id = 0;
cpumask_clear_cpu(cpu, cpu_sibling_setup_mask);
+ recompute_smt_state();
}
static void remove_cpu_from_maps(int cpu)
}
}
-static inline void hlt_play_dead(void)
+void hlt_play_dead(void)
{
if (__this_cpu_read(cpu_info.x86) >= 4)
wbinvd();
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/init.h>
- #include <linux/module.h>
+ #include <linux/export.h>
#include <linux/timer.h>
#include <linux/acpi_pmtmr.h>
#include <linux/cpufreq.h>
return ns;
}
-static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu)
+static void set_cyc2ns_scale(unsigned long khz, int cpu)
{
unsigned long long tsc_now, ns_now;
struct cyc2ns_data *data;
local_irq_save(flags);
sched_clock_idle_sleep_event();
- if (!cpu_khz)
+ if (!khz)
goto done;
data = cyc2ns_write_begin(cpu);
* time function is continuous; see the comment near struct
* cyc2ns_data.
*/
- clocks_calc_mult_shift(&data->cyc2ns_mul, &data->cyc2ns_shift, cpu_khz,
+ clocks_calc_mult_shift(&data->cyc2ns_mul, &data->cyc2ns_shift, khz,
NSEC_PER_MSEC, 0);
/*
}
EXPORT_SYMBOL_GPL(check_tsc_unstable);
-int check_tsc_disabled(void)
-{
- return tsc_disabled;
-}
-EXPORT_SYMBOL_GPL(check_tsc_disabled);
-
#ifdef CONFIG_X86_TSC
int __init notsc_setup(char *str)
{
}
/**
- * native_calibrate_tsc - calibrate the tsc on boot
+ * native_calibrate_tsc
+ * Determine TSC frequency via CPUID, else return 0.
*/
unsigned long native_calibrate_tsc(void)
+{
+ unsigned int eax_denominator, ebx_numerator, ecx_hz, edx;
+ unsigned int crystal_khz;
+
+ if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
+ return 0;
+
+ if (boot_cpu_data.cpuid_level < 0x15)
+ return 0;
+
+ eax_denominator = ebx_numerator = ecx_hz = edx = 0;
+
+ /* CPUID 15H TSC/Crystal ratio, plus optionally Crystal Hz */
+ cpuid(0x15, &eax_denominator, &ebx_numerator, &ecx_hz, &edx);
+
+ if (ebx_numerator == 0 || eax_denominator == 0)
+ return 0;
+
+ crystal_khz = ecx_hz / 1000;
+
+ if (crystal_khz == 0) {
+ switch (boot_cpu_data.x86_model) {
+ case 0x4E: /* SKL */
+ case 0x5E: /* SKL */
+ crystal_khz = 24000; /* 24.0 MHz */
+ break;
+ case 0x5C: /* BXT */
+ crystal_khz = 19200; /* 19.2 MHz */
+ break;
+ }
+ }
+
+ return crystal_khz * ebx_numerator / eax_denominator;
+}
+
+static unsigned long cpu_khz_from_cpuid(void)
+{
+ unsigned int eax_base_mhz, ebx_max_mhz, ecx_bus_mhz, edx;
+
+ if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
+ return 0;
+
+ if (boot_cpu_data.cpuid_level < 0x16)
+ return 0;
+
+ eax_base_mhz = ebx_max_mhz = ecx_bus_mhz = edx = 0;
+
+ cpuid(0x16, &eax_base_mhz, &ebx_max_mhz, &ecx_bus_mhz, &edx);
+
+ return eax_base_mhz * 1000;
+}
+
+/**
+ * native_calibrate_cpu - calibrate the cpu on boot
+ */
+unsigned long native_calibrate_cpu(void)
{
u64 tsc1, tsc2, delta, ref1, ref2;
unsigned long tsc_pit_min = ULONG_MAX, tsc_ref_min = ULONG_MAX;
unsigned long flags, latch, ms, fast_calibrate;
int hpet = is_hpet_enabled(), i, loopmin;
- /* Calibrate TSC using MSR for Intel Atom SoCs */
- local_irq_save(flags);
- fast_calibrate = try_msr_calibrate_tsc();
- local_irq_restore(flags);
+ fast_calibrate = cpu_khz_from_cpuid();
+ if (fast_calibrate)
+ return fast_calibrate;
+
+ fast_calibrate = cpu_khz_from_msr();
if (fast_calibrate)
return fast_calibrate;
if (!boot_cpu_has(X86_FEATURE_TSC))
return -ENODEV;
+ cpu_khz = x86_platform.calibrate_cpu();
tsc_khz = x86_platform.calibrate_tsc();
- cpu_khz = tsc_khz;
+ if (tsc_khz == 0)
+ tsc_khz = cpu_khz;
+ else if (abs(cpu_khz - tsc_khz) * 10 > tsc_khz)
+ cpu_khz = tsc_khz;
cpu_data(0).loops_per_jiffy = cpufreq_scale(cpu_data(0).loops_per_jiffy,
cpu_khz_old, cpu_khz);
return;
}
+ cpu_khz = x86_platform.calibrate_cpu();
tsc_khz = x86_platform.calibrate_tsc();
- cpu_khz = tsc_khz;
+
+ /*
+ * Trust non-zero tsc_khz as authorative,
+ * and use it to sanity check cpu_khz,
+ * which will be off if system timer is off.
+ */
+ if (tsc_khz == 0)
+ tsc_khz = cpu_khz;
+ else if (abs(cpu_khz - tsc_khz) * 10 > tsc_khz)
+ cpu_khz = tsc_khz;
if (!tsc_khz) {
mark_tsc_unstable("could not calculate TSC khz");
*/
for_each_possible_cpu(cpu) {
cyc2ns_init(cpu);
- set_cyc2ns_scale(cpu_khz, cpu);
+ set_cyc2ns_scale(tsc_khz, cpu);
}
if (tsc_disabled > 0)
/* Exports for assembly files.
All C exports should go in the respective C files. */
- #include <linux/module.h>
+ #include <linux/export.h>
+ #include <linux/spinlock_types.h>
#include <linux/smp.h>
#include <net/checksum.h>
EXPORT_SYMBOL(csum_partial);
+EXPORT_SYMBOL(__sw_hweight32);
+EXPORT_SYMBOL(__sw_hweight64);
+
/*
* Export string functions. We normally rely on gcc builtin for most of these,
* but gcc sometimes decides not to inline them.
*/
#include <linux/init.h>
#include <linux/ioport.h>
- #include <linux/module.h>
+ #include <linux/export.h>
#include <linux/pci.h>
#include <asm/bios_ebda.h>
static int default_i8042_detect(void) { return 1; };
struct x86_platform_ops x86_platform = {
+ .calibrate_cpu = native_calibrate_cpu,
.calibrate_tsc = native_calibrate_tsc,
.get_wallclock = mach_get_cmos_time,
.set_wallclock = mach_set_rtc_mmss,
*/
#include <linux/kvm_host.h>
- #include <linux/module.h>
+ #include <linux/export.h>
#include <linux/vmalloc.h>
#include <linux/uaccess.h>
#include <asm/fpu/internal.h> /* For use_eager_fpu. Ugh! */
F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) |
F(BMI2) | F(ERMS) | f_invpcid | F(RTM) | f_mpx | F(RDSEED) |
F(ADX) | F(SMAP) | F(AVX512F) | F(AVX512PF) | F(AVX512ER) |
- F(AVX512CD) | F(CLFLUSHOPT) | F(CLWB) | F(PCOMMIT);
+ F(AVX512CD) | F(CLFLUSHOPT) | F(CLWB);
/* cpuid 0xD.1.eax */
const u32 kvm_cpuid_D_1_eax_x86_features =
#include <linux/kvm.h>
#include <linux/fs.h>
#include <linux/vmalloc.h>
- #include <linux/module.h>
+ #include <linux/export.h>
+ #include <linux/moduleparam.h>
#include <linux/mman.h>
#include <linux/highmem.h>
#include <linux/iommu.h>
#include <linux/irqbypass.h>
#include <trace/events/kvm.h>
-#define CREATE_TRACE_POINTS
-#include "trace.h"
-
#include <asm/debugreg.h>
#include <asm/msr.h>
#include <asm/desc.h>
#include <asm/div64.h>
#include <asm/irq_remapping.h>
+#define CREATE_TRACE_POINTS
+#include "trace.h"
+
#define MAX_IO_MSRS 256
#define KVM_MAX_MCE_BANKS 32
#define KVM_MCE_CAP_SUPPORTED (MCG_CTL_P | MCG_SER_P)
}
EXPORT_SYMBOL_GPL(kvm_fast_pio_out);
-static void tsc_bad(void *info)
+static int kvmclock_cpu_down_prep(unsigned int cpu)
{
__this_cpu_write(cpu_tsc_khz, 0);
+ return 0;
}
static void tsc_khz_changed(void *data)
.notifier_call = kvmclock_cpufreq_notifier
};
-static int kvmclock_cpu_notifier(struct notifier_block *nfb,
- unsigned long action, void *hcpu)
+static int kvmclock_cpu_online(unsigned int cpu)
{
- unsigned int cpu = (unsigned long)hcpu;
-
- switch (action) {
- case CPU_ONLINE:
- case CPU_DOWN_FAILED:
- smp_call_function_single(cpu, tsc_khz_changed, NULL, 1);
- break;
- case CPU_DOWN_PREPARE:
- smp_call_function_single(cpu, tsc_bad, NULL, 1);
- break;
- }
- return NOTIFY_OK;
+ tsc_khz_changed(NULL);
+ return 0;
}
-static struct notifier_block kvmclock_cpu_notifier_block = {
- .notifier_call = kvmclock_cpu_notifier,
- .priority = -INT_MAX
-};
-
static void kvm_timer_init(void)
{
int cpu;
max_tsc_khz = tsc_khz;
- cpu_notifier_register_begin();
if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
#ifdef CONFIG_CPU_FREQ
struct cpufreq_policy policy;
CPUFREQ_TRANSITION_NOTIFIER);
}
pr_debug("kvm: max_tsc_khz = %ld\n", max_tsc_khz);
- for_each_online_cpu(cpu)
- smp_call_function_single(cpu, tsc_khz_changed, NULL, 1);
-
- __register_hotcpu_notifier(&kvmclock_cpu_notifier_block);
- cpu_notifier_register_done();
+ cpuhp_setup_state(CPUHP_AP_X86_KVM_CLK_ONLINE, "AP_X86_KVM_CLK_ONLINE",
+ kvmclock_cpu_online, kvmclock_cpu_down_prep);
}
static DEFINE_PER_CPU(struct kvm_vcpu *, current_vcpu);
if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
cpufreq_unregister_notifier(&kvmclock_cpufreq_notifier_block,
CPUFREQ_TRANSITION_NOTIFIER);
- unregister_hotcpu_notifier(&kvmclock_cpu_notifier_block);
+ cpuhp_remove_state_nocalls(CPUHP_AP_X86_KVM_CLK_ONLINE);
#ifdef CONFIG_X86_64
pvclock_gtod_unregister_notifier(&pvclock_gtod_notifier);
#endif
* Wrappers of assembly checksum functions for x86-64.
*/
#include <asm/checksum.h>
- #include <linux/module.h>
+ #include <linux/export.h>
+#include <linux/uaccess.h>
#include <asm/smap.h>
/**
* Copyright 1997 Linus Torvalds
* Copyright 2002 Andi Kleen <ak@suse.de>
*/
- #include <linux/module.h>
+ #include <linux/export.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
/*
* Zero Userspace
#include <linux/debugfs.h>
#include <linux/mm.h>
- #include <linux/module.h>
+ #include <linux/init.h>
#include <linux/seq_file.h>
#include <asm/pgtable.h>
{ 0, "User Space" },
#ifdef CONFIG_X86_64
{ 0x8000000000000000UL, "Kernel Space" },
- { PAGE_OFFSET, "Low Kernel Mapping" },
- { VMALLOC_START, "vmalloc() Area" },
- { VMEMMAP_START, "Vmemmap" },
+ { 0/* PAGE_OFFSET */, "Low Kernel Mapping" },
+ { 0/* VMALLOC_START */, "vmalloc() Area" },
+ { 0/* VMEMMAP_START */, "Vmemmap" },
# ifdef CONFIG_X86_ESPFIX64
{ ESPFIX_BASE_ADDR, "ESPfix Area", 16 },
# endif
static int __init pt_dump_init(void)
{
+ /*
+ * Various markers are not compile-time constants, so assign them
+ * here.
+ */
+#ifdef CONFIG_X86_64
+ address_markers[LOW_KERNEL_NR].start_address = PAGE_OFFSET;
+ address_markers[VMALLOC_START_NR].start_address = VMALLOC_START;
+ address_markers[VMEMMAP_START_NR].start_address = VMEMMAP_START;
+#endif
#ifdef CONFIG_X86_32
- /* Not a compile-time constant on x86-32 */
address_markers[VMALLOC_START_NR].start_address = VMALLOC_START;
address_markers[VMALLOC_END_NR].start_address = VMALLOC_END;
# ifdef CONFIG_HIGHMEM
return 0;
}
-
__initcall(pt_dump_init);
- MODULE_LICENSE("GPL");
- MODULE_AUTHOR("Arjan van de Ven <arjan@linux.intel.com>");
- MODULE_DESCRIPTION("Kernel debugging helper that dumps pagetables");
#include <linux/pfn.h>
#include <linux/poison.h>
#include <linux/dma-mapping.h>
- #include <linux/module.h>
#include <linux/memory.h>
#include <linux/memory_hotplug.h>
#include <linux/memremap.h>
}
}
+/*
+ * Create PTE level page table mapping for physical addresses.
+ * It returns the last physical address mapped.
+ */
static unsigned long __meminit
-phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end,
+phys_pte_init(pte_t *pte_page, unsigned long paddr, unsigned long paddr_end,
pgprot_t prot)
{
- unsigned long pages = 0, next;
- unsigned long last_map_addr = end;
+ unsigned long pages = 0, paddr_next;
+ unsigned long paddr_last = paddr_end;
+ pte_t *pte;
int i;
- pte_t *pte = pte_page + pte_index(addr);
+ pte = pte_page + pte_index(paddr);
+ i = pte_index(paddr);
- for (i = pte_index(addr); i < PTRS_PER_PTE; i++, addr = next, pte++) {
- next = (addr & PAGE_MASK) + PAGE_SIZE;
- if (addr >= end) {
+ for (; i < PTRS_PER_PTE; i++, paddr = paddr_next, pte++) {
+ paddr_next = (paddr & PAGE_MASK) + PAGE_SIZE;
+ if (paddr >= paddr_end) {
if (!after_bootmem &&
- !e820_any_mapped(addr & PAGE_MASK, next, E820_RAM) &&
- !e820_any_mapped(addr & PAGE_MASK, next, E820_RESERVED_KERN))
+ !e820_any_mapped(paddr & PAGE_MASK, paddr_next,
+ E820_RAM) &&
+ !e820_any_mapped(paddr & PAGE_MASK, paddr_next,
+ E820_RESERVED_KERN))
set_pte(pte, __pte(0));
continue;
}
* pagetable pages as RO. So assume someone who pre-setup
* these mappings are more intelligent.
*/
- if (pte_val(*pte)) {
+ if (!pte_none(*pte)) {
if (!after_bootmem)
pages++;
continue;
}
if (0)
- printk(" pte=%p addr=%lx pte=%016lx\n",
- pte, addr, pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL).pte);
+ pr_info(" pte=%p addr=%lx pte=%016lx\n", pte, paddr,
+ pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL).pte);
pages++;
- set_pte(pte, pfn_pte(addr >> PAGE_SHIFT, prot));
- last_map_addr = (addr & PAGE_MASK) + PAGE_SIZE;
+ set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, prot));
+ paddr_last = (paddr & PAGE_MASK) + PAGE_SIZE;
}
update_page_count(PG_LEVEL_4K, pages);
- return last_map_addr;
+ return paddr_last;
}
+/*
+ * Create PMD level page table mapping for physical addresses. The virtual
+ * and physical address have to be aligned at this level.
+ * It returns the last physical address mapped.
+ */
static unsigned long __meminit
-phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
+phys_pmd_init(pmd_t *pmd_page, unsigned long paddr, unsigned long paddr_end,
unsigned long page_size_mask, pgprot_t prot)
{
- unsigned long pages = 0, next;
- unsigned long last_map_addr = end;
+ unsigned long pages = 0, paddr_next;
+ unsigned long paddr_last = paddr_end;
- int i = pmd_index(address);
+ int i = pmd_index(paddr);
- for (; i < PTRS_PER_PMD; i++, address = next) {
- pmd_t *pmd = pmd_page + pmd_index(address);
+ for (; i < PTRS_PER_PMD; i++, paddr = paddr_next) {
+ pmd_t *pmd = pmd_page + pmd_index(paddr);
pte_t *pte;
pgprot_t new_prot = prot;
- next = (address & PMD_MASK) + PMD_SIZE;
- if (address >= end) {
+ paddr_next = (paddr & PMD_MASK) + PMD_SIZE;
+ if (paddr >= paddr_end) {
if (!after_bootmem &&
- !e820_any_mapped(address & PMD_MASK, next, E820_RAM) &&
- !e820_any_mapped(address & PMD_MASK, next, E820_RESERVED_KERN))
+ !e820_any_mapped(paddr & PMD_MASK, paddr_next,
+ E820_RAM) &&
+ !e820_any_mapped(paddr & PMD_MASK, paddr_next,
+ E820_RESERVED_KERN))
set_pmd(pmd, __pmd(0));
continue;
}
- if (pmd_val(*pmd)) {
+ if (!pmd_none(*pmd)) {
if (!pmd_large(*pmd)) {
spin_lock(&init_mm.page_table_lock);
pte = (pte_t *)pmd_page_vaddr(*pmd);
- last_map_addr = phys_pte_init(pte, address,
- end, prot);
+ paddr_last = phys_pte_init(pte, paddr,
+ paddr_end, prot);
spin_unlock(&init_mm.page_table_lock);
continue;
}
if (page_size_mask & (1 << PG_LEVEL_2M)) {
if (!after_bootmem)
pages++;
- last_map_addr = next;
+ paddr_last = paddr_next;
continue;
}
new_prot = pte_pgprot(pte_clrhuge(*(pte_t *)pmd));
pages++;
spin_lock(&init_mm.page_table_lock);
set_pte((pte_t *)pmd,
- pfn_pte((address & PMD_MASK) >> PAGE_SHIFT,
+ pfn_pte((paddr & PMD_MASK) >> PAGE_SHIFT,
__pgprot(pgprot_val(prot) | _PAGE_PSE)));
spin_unlock(&init_mm.page_table_lock);
- last_map_addr = next;
+ paddr_last = paddr_next;
continue;
}
pte = alloc_low_page();
- last_map_addr = phys_pte_init(pte, address, end, new_prot);
+ paddr_last = phys_pte_init(pte, paddr, paddr_end, new_prot);
spin_lock(&init_mm.page_table_lock);
pmd_populate_kernel(&init_mm, pmd, pte);
spin_unlock(&init_mm.page_table_lock);
}
update_page_count(PG_LEVEL_2M, pages);
- return last_map_addr;
+ return paddr_last;
}
+/*
+ * Create PUD level page table mapping for physical addresses. The virtual
+ * and physical address do not have to be aligned at this level. KASLR can
+ * randomize virtual addresses up to this level.
+ * It returns the last physical address mapped.
+ */
static unsigned long __meminit
-phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
- unsigned long page_size_mask)
+phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end,
+ unsigned long page_size_mask)
{
- unsigned long pages = 0, next;
- unsigned long last_map_addr = end;
- int i = pud_index(addr);
+ unsigned long pages = 0, paddr_next;
+ unsigned long paddr_last = paddr_end;
+ unsigned long vaddr = (unsigned long)__va(paddr);
+ int i = pud_index(vaddr);
- for (; i < PTRS_PER_PUD; i++, addr = next) {
- pud_t *pud = pud_page + pud_index(addr);
+ for (; i < PTRS_PER_PUD; i++, paddr = paddr_next) {
+ pud_t *pud;
pmd_t *pmd;
pgprot_t prot = PAGE_KERNEL;
- next = (addr & PUD_MASK) + PUD_SIZE;
- if (addr >= end) {
+ vaddr = (unsigned long)__va(paddr);
+ pud = pud_page + pud_index(vaddr);
+ paddr_next = (paddr & PUD_MASK) + PUD_SIZE;
+
+ if (paddr >= paddr_end) {
if (!after_bootmem &&
- !e820_any_mapped(addr & PUD_MASK, next, E820_RAM) &&
- !e820_any_mapped(addr & PUD_MASK, next, E820_RESERVED_KERN))
+ !e820_any_mapped(paddr & PUD_MASK, paddr_next,
+ E820_RAM) &&
+ !e820_any_mapped(paddr & PUD_MASK, paddr_next,
+ E820_RESERVED_KERN))
set_pud(pud, __pud(0));
continue;
}
- if (pud_val(*pud)) {
+ if (!pud_none(*pud)) {
if (!pud_large(*pud)) {
pmd = pmd_offset(pud, 0);
- last_map_addr = phys_pmd_init(pmd, addr, end,
- page_size_mask, prot);
+ paddr_last = phys_pmd_init(pmd, paddr,
+ paddr_end,
+ page_size_mask,
+ prot);
__flush_tlb_all();
continue;
}
if (page_size_mask & (1 << PG_LEVEL_1G)) {
if (!after_bootmem)
pages++;
- last_map_addr = next;
+ paddr_last = paddr_next;
continue;
}
prot = pte_pgprot(pte_clrhuge(*(pte_t *)pud));
pages++;
spin_lock(&init_mm.page_table_lock);
set_pte((pte_t *)pud,
- pfn_pte((addr & PUD_MASK) >> PAGE_SHIFT,
+ pfn_pte((paddr & PUD_MASK) >> PAGE_SHIFT,
PAGE_KERNEL_LARGE));
spin_unlock(&init_mm.page_table_lock);
- last_map_addr = next;
+ paddr_last = paddr_next;
continue;
}
pmd = alloc_low_page();
- last_map_addr = phys_pmd_init(pmd, addr, end, page_size_mask,
- prot);
+ paddr_last = phys_pmd_init(pmd, paddr, paddr_end,
+ page_size_mask, prot);
spin_lock(&init_mm.page_table_lock);
pud_populate(&init_mm, pud, pmd);
update_page_count(PG_LEVEL_1G, pages);
- return last_map_addr;
+ return paddr_last;
}
+/*
+ * Create page table mapping for the physical memory for specific physical
+ * addresses. The virtual and physical addresses have to be aligned on PMD level
+ * down. It returns the last physical address mapped.
+ */
unsigned long __meminit
-kernel_physical_mapping_init(unsigned long start,
- unsigned long end,
+kernel_physical_mapping_init(unsigned long paddr_start,
+ unsigned long paddr_end,
unsigned long page_size_mask)
{
bool pgd_changed = false;
- unsigned long next, last_map_addr = end;
- unsigned long addr;
+ unsigned long vaddr, vaddr_start, vaddr_end, vaddr_next, paddr_last;
- start = (unsigned long)__va(start);
- end = (unsigned long)__va(end);
- addr = start;
+ paddr_last = paddr_end;
+ vaddr = (unsigned long)__va(paddr_start);
+ vaddr_end = (unsigned long)__va(paddr_end);
+ vaddr_start = vaddr;
- for (; start < end; start = next) {
- pgd_t *pgd = pgd_offset_k(start);
+ for (; vaddr < vaddr_end; vaddr = vaddr_next) {
+ pgd_t *pgd = pgd_offset_k(vaddr);
pud_t *pud;
- next = (start & PGDIR_MASK) + PGDIR_SIZE;
+ vaddr_next = (vaddr & PGDIR_MASK) + PGDIR_SIZE;
if (pgd_val(*pgd)) {
pud = (pud_t *)pgd_page_vaddr(*pgd);
- last_map_addr = phys_pud_init(pud, __pa(start),
- __pa(end), page_size_mask);
+ paddr_last = phys_pud_init(pud, __pa(vaddr),
+ __pa(vaddr_end),
+ page_size_mask);
continue;
}
pud = alloc_low_page();
- last_map_addr = phys_pud_init(pud, __pa(start), __pa(end),
- page_size_mask);
+ paddr_last = phys_pud_init(pud, __pa(vaddr), __pa(vaddr_end),
+ page_size_mask);
spin_lock(&init_mm.page_table_lock);
pgd_populate(&init_mm, pgd, pud);
}
if (pgd_changed)
- sync_global_pgds(addr, end - 1, 0);
+ sync_global_pgds(vaddr_start, vaddr_end - 1, 0);
__flush_tlb_all();
- return last_map_addr;
+ return paddr_last;
}
#ifndef CONFIG_NUMA
for (i = 0; i < PTRS_PER_PTE; i++) {
pte = pte_start + i;
- if (pte_val(*pte))
+ if (!pte_none(*pte))
return;
}
for (i = 0; i < PTRS_PER_PMD; i++) {
pmd = pmd_start + i;
- if (pmd_val(*pmd))
+ if (!pmd_none(*pmd))
return;
}
spin_unlock(&init_mm.page_table_lock);
}
-/* Return true if pgd is changed, otherwise return false. */
-static bool __meminit free_pud_table(pud_t *pud_start, pgd_t *pgd)
-{
- pud_t *pud;
- int i;
-
- for (i = 0; i < PTRS_PER_PUD; i++) {
- pud = pud_start + i;
- if (pud_val(*pud))
- return false;
- }
-
- /* free a pud table */
- free_pagetable(pgd_page(*pgd), 0);
- spin_lock(&init_mm.page_table_lock);
- pgd_clear(pgd);
- spin_unlock(&init_mm.page_table_lock);
-
- return true;
-}
-
static void __meminit
remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end,
bool direct)
unsigned long addr;
pgd_t *pgd;
pud_t *pud;
- bool pgd_changed = false;
for (addr = start; addr < end; addr = next) {
next = pgd_addr_end(addr, end);
pud = (pud_t *)pgd_page_vaddr(*pgd);
remove_pud_table(pud, addr, next, direct);
- if (free_pud_table(pud, pgd))
- pgd_changed = true;
}
- if (pgd_changed)
- sync_global_pgds(start, end - 1, 1);
-
flush_tlb_all();
}
/* Common code for 32 and 64-bit NUMA */
+#include <linux/acpi.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/string.h>
#include <linux/memblock.h>
#include <linux/mmzone.h>
#include <linux/ctype.h>
- #include <linux/module.h>
#include <linux/nodemask.h>
#include <linux/sched.h>
#include <linux/topology.h>
#include <asm/e820.h>
#include <asm/proto.h>
#include <asm/dma.h>
-#include <asm/acpi.h>
#include <asm/amd_nb.h>
#include "numa_internal.h"
#include <linux/bootmem.h>
#include <linux/debugfs.h>
#include <linux/kernel.h>
- #include <linux/module.h>
#include <linux/pfn_t.h>
#include <linux/slab.h>
#include <linux/mm.h>
return 1;
while (cursor < to) {
- if (!devmem_is_allowed(pfn)) {
- pr_info("x86/PAT: Program %s tried to access /dev/mem between [mem %#010Lx-%#010Lx], PAT prevents it\n",
- current->comm, from, to - 1);
+ if (!devmem_is_allowed(pfn))
return 0;
- }
cursor += PAGE_SIZE;
pfn++;
}
#include <linux/highmem.h>
#include <linux/pagemap.h>
#include <linux/spinlock.h>
- #include <linux/module.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
return;
}
pte = pte_offset_kernel(pmd, vaddr);
- if (pte_val(pteval))
+ if (!pte_none(pteval))
set_pte_at(&init_mm, vaddr, pte, pteval);
else
pte_clear(&init_mm, vaddr, pte);
#include <linux/acpi.h>
#include <linux/mmzone.h>
#include <linux/bitmap.h>
- #include <linux/module.h>
+ #include <linux/init.h>
#include <linux/topology.h>
-#include <linux/bootmem.h>
-#include <linux/memblock.h>
#include <linux/mm.h>
#include <asm/proto.h>
#include <asm/numa.h>
#include <asm/apic.h>
#include <asm/uv/uv.h>
-int acpi_numa __initdata;
-
-static __init int setup_node(int pxm)
-{
- return acpi_map_pxm_to_node(pxm);
-}
-
-static __init void bad_srat(void)
-{
- printk(KERN_ERR "SRAT: SRAT not used.\n");
- acpi_numa = -1;
-}
-
-static __init inline int srat_disabled(void)
-{
- return acpi_numa < 0;
-}
-
-/*
- * Callback for SLIT parsing. pxm_to_node() returns NUMA_NO_NODE for
- * I/O localities since SRAT does not list them. I/O localities are
- * not supported at this point.
- */
-void __init acpi_numa_slit_init(struct acpi_table_slit *slit)
-{
- int i, j;
-
- for (i = 0; i < slit->locality_count; i++) {
- const int from_node = pxm_to_node(i);
-
- if (from_node == NUMA_NO_NODE)
- continue;
-
- for (j = 0; j < slit->locality_count; j++) {
- const int to_node = pxm_to_node(j);
-
- if (to_node == NUMA_NO_NODE)
- continue;
-
- numa_set_distance(from_node, to_node,
- slit->entry[slit->locality_count * i + j]);
- }
- }
-}
-
/* Callback for Proximity Domain -> x2APIC mapping */
void __init
acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa)
pxm, apic_id);
return;
}
- node = setup_node(pxm);
+ node = acpi_map_pxm_to_node(pxm);
if (node < 0) {
printk(KERN_ERR "SRAT: Too many proximity domains %x\n", pxm);
bad_srat();
}
set_apicid_to_node(apic_id, node);
node_set(node, numa_nodes_parsed);
- acpi_numa = 1;
printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u\n",
pxm, apic_id, node);
}
pxm = pa->proximity_domain_lo;
if (acpi_srat_revision >= 2)
pxm |= *((unsigned int*)pa->proximity_domain_hi) << 8;
- node = setup_node(pxm);
+ node = acpi_map_pxm_to_node(pxm);
if (node < 0) {
printk(KERN_ERR "SRAT: Too many proximity domains %x\n", pxm);
bad_srat();
set_apicid_to_node(apic_id, node);
node_set(node, numa_nodes_parsed);
- acpi_numa = 1;
printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%02x -> Node %u\n",
pxm, apic_id, node);
}
-#ifdef CONFIG_MEMORY_HOTPLUG
-static inline int save_add_info(void) {return 1;}
-#else
-static inline int save_add_info(void) {return 0;}
-#endif
-
-/* Callback for parsing of the Proximity Domain <-> Memory Area mappings */
-int __init
-acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
-{
- u64 start, end;
- u32 hotpluggable;
- int node, pxm;
-
- if (srat_disabled())
- goto out_err;
- if (ma->header.length != sizeof(struct acpi_srat_mem_affinity))
- goto out_err_bad_srat;
- if ((ma->flags & ACPI_SRAT_MEM_ENABLED) == 0)
- goto out_err;
- hotpluggable = ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE;
- if (hotpluggable && !save_add_info())
- goto out_err;
-
- start = ma->base_address;
- end = start + ma->length;
- pxm = ma->proximity_domain;
- if (acpi_srat_revision <= 1)
- pxm &= 0xff;
-
- node = setup_node(pxm);
- if (node < 0) {
- printk(KERN_ERR "SRAT: Too many proximity domains.\n");
- goto out_err_bad_srat;
- }
-
- if (numa_add_memblk(node, start, end) < 0)
- goto out_err_bad_srat;
-
- node_set(node, numa_nodes_parsed);
-
- pr_info("SRAT: Node %u PXM %u [mem %#010Lx-%#010Lx]%s%s\n",
- node, pxm,
- (unsigned long long) start, (unsigned long long) end - 1,
- hotpluggable ? " hotplug" : "",
- ma->flags & ACPI_SRAT_MEM_NON_VOLATILE ? " non-volatile" : "");
-
- /* Mark hotplug range in memblock. */
- if (hotpluggable && memblock_mark_hotplug(start, ma->length))
- pr_warn("SRAT: Failed to mark hotplug range [mem %#010Lx-%#010Lx] in memblock\n",
- (unsigned long long)start, (unsigned long long)end - 1);
-
- max_possible_pfn = max(max_possible_pfn, PFN_UP(end - 1));
-
- return 0;
-out_err_bad_srat:
- bad_srat();
-out_err:
- return -1;
-}
-
-void __init acpi_numa_arch_fixup(void) {}
-
int __init x86_acpi_numa_init(void)
{
int ret;
#include <linux/spinlock.h>
#include <linux/bootmem.h>
#include <linux/ioport.h>
- #include <linux/module.h>
+ #include <linux/init.h>
#include <linux/efi.h>
#include <linux/uaccess.h>
#include <linux/io.h>
return 0;
}
-void __init efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages)
-{
- kernel_unmap_pages_in_pgd(efi_pgd, pa_memmap, num_pages);
-}
-
static void __init __map_region(efi_memory_desc_t *md, u64 va)
{
unsigned long flags = _PAGE_RW;
#define efi_thunk(f, ...) \
({ \
efi_status_t __s; \
- unsigned long flags; \
- u32 func; \
- \
- efi_sync_low_kernel_mappings(); \
- local_irq_save(flags); \
+ unsigned long __flags; \
+ u32 __func; \
\
- efi_scratch.prev_cr3 = read_cr3(); \
- write_cr3((unsigned long)efi_scratch.efi_pgt); \
- __flush_tlb_all(); \
+ local_irq_save(__flags); \
+ arch_efi_call_virt_setup(); \
\
- func = runtime_service32(f); \
- __s = efi64_thunk(func, __VA_ARGS__); \
+ __func = runtime_service32(f); \
+ __s = efi64_thunk(__func, __VA_ARGS__); \
\
- write_cr3(efi_scratch.prev_cr3); \
- __flush_tlb_all(); \
- local_irq_restore(flags); \
+ arch_efi_call_virt_teardown(); \
+ local_irq_restore(__flags); \
\
__s; \
})
#include <linux/sched.h>
#include <linux/kprobes.h>
#include <linux/bootmem.h>
- #include <linux/module.h>
+ #include <linux/export.h>
#include <linux/mm.h>
#include <linux/page-flags.h>
#include <linux/highmem.h>
#include <asm/xen/pci.h>
#include <asm/xen/hypercall.h>
#include <asm/xen/hypervisor.h>
+#include <asm/xen/cpuid.h>
#include <asm/fixmap.h>
#include <asm/processor.h>
#include <asm/proto.h>
*/
DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info);
+/* Linux <-> Xen vCPU id mapping */
+DEFINE_PER_CPU(int, xen_vcpu_id) = -1;
+EXPORT_PER_CPU_SYMBOL(xen_vcpu_id);
+
enum xen_domain_type xen_domain_type = XEN_NATIVE;
EXPORT_SYMBOL_GPL(xen_domain_type);
#endif
}
-static void xen_vcpu_setup(int cpu)
+void xen_vcpu_setup(int cpu)
{
struct vcpu_register_vcpu_info info;
int err;
if (per_cpu(xen_vcpu, cpu) == &per_cpu(xen_vcpu_info, cpu))
return;
}
- if (cpu < MAX_VIRT_CPUS)
- per_cpu(xen_vcpu,cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu];
+ if (xen_vcpu_nr(cpu) < MAX_VIRT_CPUS)
+ per_cpu(xen_vcpu, cpu) =
+ &HYPERVISOR_shared_info->vcpu_info[xen_vcpu_nr(cpu)];
if (!have_vcpu_info_placement) {
if (cpu >= MAX_VIRT_CPUS)
hypervisor has no unregister variant and this hypercall does not
allow to over-write info.mfn and info.offset.
*/
- err = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_info, cpu, &info);
+ err = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_info, xen_vcpu_nr(cpu),
+ &info);
if (err) {
printk(KERN_DEBUG "register_vcpu_info failed: err=%d\n", err);
for_each_possible_cpu(cpu) {
bool other_cpu = (cpu != smp_processor_id());
- bool is_up = HYPERVISOR_vcpu_op(VCPUOP_is_up, cpu, NULL);
+ bool is_up = HYPERVISOR_vcpu_op(VCPUOP_is_up, xen_vcpu_nr(cpu),
+ NULL);
if (other_cpu && is_up &&
- HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL))
+ HYPERVISOR_vcpu_op(VCPUOP_down, xen_vcpu_nr(cpu), NULL))
BUG();
xen_setup_runstate_info(cpu);
xen_vcpu_setup(cpu);
if (other_cpu && is_up &&
- HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL))
+ HYPERVISOR_vcpu_op(VCPUOP_up, xen_vcpu_nr(cpu), NULL))
BUG();
}
}
preempt_disable();
- pagefault_disable(); /* Avoid warnings due to being atomic. */
- __get_user(dummy, (unsigned char __user __force *)v);
- pagefault_enable();
+ probe_kernel_read(&dummy, v, 1);
if (HYPERVISOR_update_va_mapping((unsigned long)v, pte, 0))
BUG();
{
unsigned long va = dtr->address;
unsigned int size = dtr->size + 1;
- unsigned pages = (size + PAGE_SIZE - 1) / PAGE_SIZE;
+ unsigned pages = DIV_ROUND_UP(size, PAGE_SIZE);
unsigned long frames[pages];
int f;
{
unsigned long va = dtr->address;
unsigned int size = dtr->size + 1;
- unsigned pages = (size + PAGE_SIZE - 1) / PAGE_SIZE;
+ unsigned pages = DIV_ROUND_UP(size, PAGE_SIZE);
unsigned long frames[pages];
int f;
{
int cpu;
- for_each_possible_cpu(cpu)
+ for_each_possible_cpu(cpu) {
+ /* Set up direct vCPU id mapping for PV guests. */
+ per_cpu(xen_vcpu_id, cpu) = cpu;
xen_vcpu_setup(cpu);
+ }
/* xen_vcpu_setup managed to place the vcpu_info within the
* percpu area for all cpus, so make use of it. Note that for
#endif
xen_raw_console_write("about to get started...\n");
+ /* Let's presume PV guests always boot on vCPU with id 0. */
+ per_cpu(xen_vcpu_id, 0) = 0;
+
xen_setup_runstate_info(0);
xen_efi_init();
* in that case multiple vcpus might be online. */
for_each_online_cpu(cpu) {
/* Leave it to be NULL. */
- if (cpu >= MAX_VIRT_CPUS)
+ if (xen_vcpu_nr(cpu) >= MAX_VIRT_CPUS)
continue;
- per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu];
+ per_cpu(xen_vcpu, cpu) =
+ &HYPERVISOR_shared_info->vcpu_info[xen_vcpu_nr(cpu)];
}
}
xen_setup_features();
+ cpuid(base + 4, &eax, &ebx, &ecx, &edx);
+ if (eax & XEN_HVM_CPUID_VCPU_ID_PRESENT)
+ this_cpu_write(xen_vcpu_id, ebx);
+ else
+ this_cpu_write(xen_vcpu_id, smp_processor_id());
+
pv_info.name = "Xen HVM";
xen_domain_type = XEN_HVM_DOMAIN;
int cpu = (long)hcpu;
switch (action) {
case CPU_UP_PREPARE:
+ if (cpu_acpi_id(cpu) != U32_MAX)
+ per_cpu(xen_vcpu_id, cpu) = cpu_acpi_id(cpu);
+ else
+ per_cpu(xen_vcpu_id, cpu) = cpu;
xen_vcpu_setup(cpu);
if (xen_have_vector_callback) {
if (xen_feature(XENFEAT_hvm_safe_pvclock))