Merge branch 'perf/urgent' into perf/core, to pick up fixes before merging new changes

[cascardo/linux.git] / arch / x86 / events / intel / core.c
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c

index 9b4f9d3..0974ba1 100644 (file)
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -16,6 +16,7 @@
  
  #include <asm/cpufeature.h>
  #include <asm/hardirq.h>
+#include <asm/intel-family.h>
  #include <asm/apic.h>
  
  #include "../perf_event.h"
@@ -185,7 +186,7 @@ static struct event_constraint intel_slm_event_constraints[] __read_mostly =
         EVENT_CONSTRAINT_END
  };
  
-struct event_constraint intel_skl_event_constraints[] = {
+static struct event_constraint intel_skl_event_constraints[] = {
         FIXED_EVENT_CONSTRAINT(0x00c0, 0),      /* INST_RETIRED.ANY */
         FIXED_EVENT_CONSTRAINT(0x003c, 1),      /* CPU_CLK_UNHALTED.CORE */
         FIXED_EVENT_CONSTRAINT(0x0300, 2),      /* CPU_CLK_UNHALTED.REF */
@@ -204,10 +205,8 @@ struct event_constraint intel_skl_event_constraints[] = {
  };
  
  static struct extra_reg intel_knl_extra_regs[] __read_mostly = {
-       INTEL_UEVENT_EXTRA_REG(0x01b7,
-                              MSR_OFFCORE_RSP_0, 0x7f9ffbffffull, RSP_0),
-       INTEL_UEVENT_EXTRA_REG(0x02b7,
-                              MSR_OFFCORE_RSP_1, 0x3f9ffbffffull, RSP_1),
+       INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x799ffbb6e7ull, RSP_0),
+       INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0x399ffbffe7ull, RSP_1),
         EVENT_EXTRA_END
  };
  
@@ -243,14 +242,51 @@ EVENT_ATTR_STR(mem-loads, mem_ld_nhm,     "event=0x0b,umask=0x10,ldlat=3");
  EVENT_ATTR_STR(mem-loads,      mem_ld_snb,     "event=0xcd,umask=0x1,ldlat=3");
  EVENT_ATTR_STR(mem-stores,     mem_st_snb,     "event=0xcd,umask=0x2");
  
-struct attribute *nhm_events_attrs[] = {
+static struct attribute *nhm_events_attrs[] = {
         EVENT_PTR(mem_ld_nhm),
         NULL,
  };
  
-struct attribute *snb_events_attrs[] = {
+/*
+ * topdown events for Intel Core CPUs.
+ *
+ * The events are all in slots, which is a free slot in a 4 wide
+ * pipeline. Some events are already reported in slots, for cycle
+ * events we multiply by the pipeline width (4).
+ *
+ * With Hyper Threading on, topdown metrics are either summed or averaged
+ * between the threads of a core: (count_t0 + count_t1).
+ *
+ * For the average case the metric is always scaled to pipeline width,
+ * so we use factor 2 ((count_t0 + count_t1) / 2 * 4)
+ */
+
+EVENT_ATTR_STR_HT(topdown-total-slots, td_total_slots,
+       "event=0x3c,umask=0x0",                 /* cpu_clk_unhalted.thread */
+       "event=0x3c,umask=0x0,any=1");          /* cpu_clk_unhalted.thread_any */
+EVENT_ATTR_STR_HT(topdown-total-slots.scale, td_total_slots_scale, "4", "2");
+EVENT_ATTR_STR(topdown-slots-issued, td_slots_issued,
+       "event=0xe,umask=0x1");                 /* uops_issued.any */
+EVENT_ATTR_STR(topdown-slots-retired, td_slots_retired,
+       "event=0xc2,umask=0x2");                /* uops_retired.retire_slots */
+EVENT_ATTR_STR(topdown-fetch-bubbles, td_fetch_bubbles,
+       "event=0x9c,umask=0x1");                /* idq_uops_not_delivered_core */
+EVENT_ATTR_STR_HT(topdown-recovery-bubbles, td_recovery_bubbles,
+       "event=0xd,umask=0x3,cmask=1",          /* int_misc.recovery_cycles */
+       "event=0xd,umask=0x3,cmask=1,any=1");   /* int_misc.recovery_cycles_any */
+EVENT_ATTR_STR_HT(topdown-recovery-bubbles.scale, td_recovery_bubbles_scale,
+       "4", "2");
+
+static struct attribute *snb_events_attrs[] = {
         EVENT_PTR(mem_ld_snb),
         EVENT_PTR(mem_st_snb),
+       EVENT_PTR(td_slots_issued),
+       EVENT_PTR(td_slots_retired),
+       EVENT_PTR(td_fetch_bubbles),
+       EVENT_PTR(td_total_slots),
+       EVENT_PTR(td_total_slots_scale),
+       EVENT_PTR(td_recovery_bubbles),
+       EVENT_PTR(td_recovery_bubbles_scale),
         NULL,
  };
  
@@ -280,7 +316,7 @@ static struct event_constraint intel_hsw_event_constraints[] = {
         EVENT_CONSTRAINT_END
  };
  
-struct event_constraint intel_bdw_event_constraints[] = {
+static struct event_constraint intel_bdw_event_constraints[] = {
         FIXED_EVENT_CONSTRAINT(0x00c0, 0),      /* INST_RETIRED.ANY */
         FIXED_EVENT_CONSTRAINT(0x003c, 1),      /* CPU_CLK_UNHALTED.CORE */
         FIXED_EVENT_CONSTRAINT(0x0300, 2),      /* CPU_CLK_UNHALTED.REF */
@@ -1361,6 +1397,29 @@ static __initconst const u64 atom_hw_cache_event_ids
   },
  };
  
+EVENT_ATTR_STR(topdown-total-slots, td_total_slots_slm, "event=0x3c");
+EVENT_ATTR_STR(topdown-total-slots.scale, td_total_slots_scale_slm, "2");
+/* no_alloc_cycles.not_delivered */
+EVENT_ATTR_STR(topdown-fetch-bubbles, td_fetch_bubbles_slm,
+              "event=0xca,umask=0x50");
+EVENT_ATTR_STR(topdown-fetch-bubbles.scale, td_fetch_bubbles_scale_slm, "2");
+/* uops_retired.all */
+EVENT_ATTR_STR(topdown-slots-issued, td_slots_issued_slm,
+              "event=0xc2,umask=0x10");
+/* uops_retired.all */
+EVENT_ATTR_STR(topdown-slots-retired, td_slots_retired_slm,
+              "event=0xc2,umask=0x10");
+
+static struct attribute *slm_events_attrs[] = {
+       EVENT_PTR(td_total_slots_slm),
+       EVENT_PTR(td_total_slots_scale_slm),
+       EVENT_PTR(td_fetch_bubbles_slm),
+       EVENT_PTR(td_fetch_bubbles_scale_slm),
+       EVENT_PTR(td_slots_issued_slm),
+       EVENT_PTR(td_slots_retired_slm),
+       NULL
+};
+
  static struct extra_reg intel_slm_extra_regs[] __read_mostly =
  {
         /* must define OFFCORE_RSP_X first, see intel_fixup_er() */
@@ -3290,11 +3349,11 @@ static int intel_snb_pebs_broken(int cpu)
         u32 rev = UINT_MAX; /* default to broken for unknown models */
  
         switch (cpu_data(cpu).x86_model) {
-       case 42: /* SNB */
+       case INTEL_FAM6_SANDYBRIDGE:
                 rev = 0x28;
                 break;
  
-       case 45: /* SNB-EP */
+       case INTEL_FAM6_SANDYBRIDGE_X:
                 switch (cpu_data(cpu).x86_mask) {
                 case 6: rev = 0x618; break;
                 case 7: rev = 0x70c; break;
@@ -3331,6 +3390,13 @@ static void intel_snb_check_microcode(void)
         }
  }
  
+static bool is_lbr_from(unsigned long msr)
+{
+       unsigned long lbr_from_nr = x86_pmu.lbr_from + x86_pmu.lbr_nr;
+
+       return x86_pmu.lbr_from <= msr && msr < lbr_from_nr;
+}
+
  /*
   * Under certain circumstances, access certain MSR may cause #GP.
   * The function tests if the input MSR can be safely accessed.
@@ -3351,13 +3417,24 @@ static bool check_msr(unsigned long msr, u64 mask)
          * Only change the bits which can be updated by wrmsrl.
          */
         val_tmp = val_old ^ mask;
+
+       if (is_lbr_from(msr))
+               val_tmp = lbr_from_signext_quirk_wr(val_tmp);
+
         if (wrmsrl_safe(msr, val_tmp) ||
             rdmsrl_safe(msr, &val_new))
                 return false;
  
+       /*
+        * Quirk only affects validation in wrmsr(), so wrmsrl()'s value
+        * should equal rdmsrl()'s even with the quirk.
+        */
         if (val_new != val_tmp)
                 return false;
  
+       if (is_lbr_from(msr))
+               val_old = lbr_from_signext_quirk_wr(val_old);
+
         /* Here it's sure that the MSR can be safely accessed.
          * Restore the old value and return.
          */
@@ -3466,6 +3543,13 @@ static struct attribute *hsw_events_attrs[] = {
         EVENT_PTR(cycles_ct),
         EVENT_PTR(mem_ld_hsw),
         EVENT_PTR(mem_st_hsw),
+       EVENT_PTR(td_slots_issued),
+       EVENT_PTR(td_slots_retired),
+       EVENT_PTR(td_fetch_bubbles),
+       EVENT_PTR(td_total_slots),
+       EVENT_PTR(td_total_slots_scale),
+       EVENT_PTR(td_recovery_bubbles),
+       EVENT_PTR(td_recovery_bubbles_scale),
         NULL
  };
  
@@ -3537,15 +3621,15 @@ __init int intel_pmu_init(void)
          * Install the hw-cache-events table:
          */
         switch (boot_cpu_data.x86_model) {
-       case 14: /* 65nm Core "Yonah" */
+       case INTEL_FAM6_CORE_YONAH:
                 pr_cont("Core events, ");
                 break;
  
-       case 15: /* 65nm Core2 "Merom"          */
+       case INTEL_FAM6_CORE2_MEROM:
                 x86_add_quirk(intel_clovertown_quirk);
-       case 22: /* 65nm Core2 "Merom-L"        */
-       case 23: /* 45nm Core2 "Penryn"         */
-       case 29: /* 45nm Core2 "Dunnington (MP) */
+       case INTEL_FAM6_CORE2_MEROM_L:
+       case INTEL_FAM6_CORE2_PENRYN:
+       case INTEL_FAM6_CORE2_DUNNINGTON:
                 memcpy(hw_cache_event_ids, core2_hw_cache_event_ids,
                        sizeof(hw_cache_event_ids));
  
@@ -3556,9 +3640,9 @@ __init int intel_pmu_init(void)
                 pr_cont("Core2 events, ");
                 break;
  
-       case 30: /* 45nm Nehalem    */
-       case 26: /* 45nm Nehalem-EP */
-       case 46: /* 45nm Nehalem-EX */
+       case INTEL_FAM6_NEHALEM:
+       case INTEL_FAM6_NEHALEM_EP:
+       case INTEL_FAM6_NEHALEM_EX:
                 memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
                        sizeof(hw_cache_event_ids));
                 memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs,
@@ -3586,11 +3670,11 @@ __init int intel_pmu_init(void)
                 pr_cont("Nehalem events, ");
                 break;
  
-       case 28: /* 45nm Atom "Pineview"   */
-       case 38: /* 45nm Atom "Lincroft"   */
-       case 39: /* 32nm Atom "Penwell"    */
-       case 53: /* 32nm Atom "Cloverview" */
-       case 54: /* 32nm Atom "Cedarview"  */
+       case INTEL_FAM6_ATOM_PINEVIEW:
+       case INTEL_FAM6_ATOM_LINCROFT:
+       case INTEL_FAM6_ATOM_PENWELL:
+       case INTEL_FAM6_ATOM_CLOVERVIEW:
+       case INTEL_FAM6_ATOM_CEDARVIEW:
                 memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
                        sizeof(hw_cache_event_ids));
  
@@ -3602,9 +3686,9 @@ __init int intel_pmu_init(void)
                 pr_cont("Atom events, ");
                 break;
  
-       case 55: /* 22nm Atom "Silvermont"                */
-       case 76: /* 14nm Atom "Airmont"                   */
-       case 77: /* 22nm Atom "Silvermont Avoton/Rangely" */
+       case INTEL_FAM6_ATOM_SILVERMONT1:
+       case INTEL_FAM6_ATOM_SILVERMONT2:
+       case INTEL_FAM6_ATOM_AIRMONT:
                 memcpy(hw_cache_event_ids, slm_hw_cache_event_ids,
                         sizeof(hw_cache_event_ids));
                 memcpy(hw_cache_extra_regs, slm_hw_cache_extra_regs,
@@ -3616,11 +3700,12 @@ __init int intel_pmu_init(void)
                 x86_pmu.pebs_constraints = intel_slm_pebs_event_constraints;
                 x86_pmu.extra_regs = intel_slm_extra_regs;
                 x86_pmu.flags |= PMU_FL_HAS_RSP_1;
+               x86_pmu.cpu_events = slm_events_attrs;
                 pr_cont("Silvermont events, ");
                 break;
  
-       case 92: /* 14nm Atom "Goldmont" */
-       case 95: /* 14nm Atom "Goldmont Denverton" */
+       case INTEL_FAM6_ATOM_GOLDMONT:
+       case INTEL_FAM6_ATOM_DENVERTON:
                 memcpy(hw_cache_event_ids, glm_hw_cache_event_ids,
                        sizeof(hw_cache_event_ids));
                 memcpy(hw_cache_extra_regs, glm_hw_cache_extra_regs,
@@ -3643,9 +3728,9 @@ __init int intel_pmu_init(void)
                 pr_cont("Goldmont events, ");
                 break;
  
-       case 37: /* 32nm Westmere    */
-       case 44: /* 32nm Westmere-EP */
-       case 47: /* 32nm Westmere-EX */
+       case INTEL_FAM6_WESTMERE:
+       case INTEL_FAM6_WESTMERE_EP:
+       case INTEL_FAM6_WESTMERE_EX:
                 memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids,
                        sizeof(hw_cache_event_ids));
                 memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs,
@@ -3672,8 +3757,8 @@ __init int intel_pmu_init(void)
                 pr_cont("Westmere events, ");
                 break;
  
-       case 42: /* 32nm SandyBridge         */
-       case 45: /* 32nm SandyBridge-E/EN/EP */
+       case INTEL_FAM6_SANDYBRIDGE:
+       case INTEL_FAM6_SANDYBRIDGE_X:
                 x86_add_quirk(intel_sandybridge_quirk);
                 x86_add_quirk(intel_ht_bug);
                 memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
@@ -3686,7 +3771,7 @@ __init int intel_pmu_init(void)
                 x86_pmu.event_constraints = intel_snb_event_constraints;
                 x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints;
                 x86_pmu.pebs_aliases = intel_pebs_aliases_snb;
-               if (boot_cpu_data.x86_model == 45)
+               if (boot_cpu_data.x86_model == INTEL_FAM6_SANDYBRIDGE_X)
                         x86_pmu.extra_regs = intel_snbep_extra_regs;
                 else
                         x86_pmu.extra_regs = intel_snb_extra_regs;
@@ -3708,8 +3793,8 @@ __init int intel_pmu_init(void)
                 pr_cont("SandyBridge events, ");
                 break;
  
-       case 58: /* 22nm IvyBridge       */
-       case 62: /* 22nm IvyBridge-EP/EX */
+       case INTEL_FAM6_IVYBRIDGE:
+       case INTEL_FAM6_IVYBRIDGE_X:
                 x86_add_quirk(intel_ht_bug);
                 memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
                        sizeof(hw_cache_event_ids));
@@ -3725,7 +3810,7 @@ __init int intel_pmu_init(void)
                 x86_pmu.pebs_constraints = intel_ivb_pebs_event_constraints;
                 x86_pmu.pebs_aliases = intel_pebs_aliases_ivb;
                 x86_pmu.pebs_prec_dist = true;
-               if (boot_cpu_data.x86_model == 62)
+               if (boot_cpu_data.x86_model == INTEL_FAM6_IVYBRIDGE_X)
                         x86_pmu.extra_regs = intel_snbep_extra_regs;
                 else
                         x86_pmu.extra_regs = intel_snb_extra_regs;
@@ -3743,10 +3828,10 @@ __init int intel_pmu_init(void)
                 break;
  
  
-       case 60: /* 22nm Haswell Core */
-       case 63: /* 22nm Haswell Server */
-       case 69: /* 22nm Haswell ULT */
-       case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */
+       case INTEL_FAM6_HASWELL_CORE:
+       case INTEL_FAM6_HASWELL_X:
+       case INTEL_FAM6_HASWELL_ULT:
+       case INTEL_FAM6_HASWELL_GT3E:
                 x86_add_quirk(intel_ht_bug);
                 x86_pmu.late_ack = true;
                 memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids));
@@ -3770,10 +3855,10 @@ __init int intel_pmu_init(void)
                 pr_cont("Haswell events, ");
                 break;
  
-       case 61: /* 14nm Broadwell Core-M */
-       case 86: /* 14nm Broadwell Xeon D */
-       case 71: /* 14nm Broadwell + GT3e (Intel Iris Pro graphics) */
-       case 79: /* 14nm Broadwell Server */
+       case INTEL_FAM6_BROADWELL_CORE:
+       case INTEL_FAM6_BROADWELL_XEON_D:
+       case INTEL_FAM6_BROADWELL_GT3E:
+       case INTEL_FAM6_BROADWELL_X:
                 x86_pmu.late_ack = true;
                 memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids));
                 memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
@@ -3806,7 +3891,7 @@ __init int intel_pmu_init(void)
                 pr_cont("Broadwell events, ");
                 break;
  
-       case 87: /* Knights Landing Xeon Phi */
+       case INTEL_FAM6_XEON_PHI_KNL:
                 memcpy(hw_cache_event_ids,
                        slm_hw_cache_event_ids, sizeof(hw_cache_event_ids));
                 memcpy(hw_cache_extra_regs,
@@ -3824,16 +3909,22 @@ __init int intel_pmu_init(void)
                 pr_cont("Knights Landing events, ");
                 break;
  
-       case 142: /* 14nm Kabylake Mobile */
-       case 158: /* 14nm Kabylake Desktop */
-       case 78: /* 14nm Skylake Mobile */
-       case 94: /* 14nm Skylake Desktop */
-       case 85: /* 14nm Skylake Server */
+       case INTEL_FAM6_SKYLAKE_MOBILE:
+       case INTEL_FAM6_SKYLAKE_DESKTOP:
+       case INTEL_FAM6_SKYLAKE_X:
+       case INTEL_FAM6_KABYLAKE_MOBILE:
+       case INTEL_FAM6_KABYLAKE_DESKTOP:
                 x86_pmu.late_ack = true;
                 memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, sizeof(hw_cache_event_ids));
                 memcpy(hw_cache_extra_regs, skl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
                 intel_pmu_lbr_init_skl();
  
+               /* INT_MISC.RECOVERY_CYCLES has umask 1 in Skylake */
+               event_attr_td_recovery_bubbles.event_str_noht =
+                       "event=0xd,umask=0x1,cmask=1";
+               event_attr_td_recovery_bubbles.event_str_ht =
+                       "event=0xd,umask=0x1,cmask=1,any=1";
+
                 x86_pmu.event_constraints = intel_skl_event_constraints;
                 x86_pmu.pebs_constraints = intel_skl_pebs_event_constraints;
                 x86_pmu.extra_regs = intel_skl_extra_regs;
@@ -3914,6 +4005,8 @@ __init int intel_pmu_init(void)
                         x86_pmu.lbr_nr = 0;
         }
  
+       if (x86_pmu.lbr_nr)
+               pr_cont("%d-deep LBR, ", x86_pmu.lbr_nr);
         /*
          * Access extra MSR may cause #GP under certain circumstances.
          * E.g. KVM doesn't support offcore event
@@ -3946,16 +4039,14 @@ __init int intel_pmu_init(void)
   */
  static __init int fixup_ht_bug(void)
  {
-       int cpu = smp_processor_id();
-       int w, c;
+       int c;
         /*
          * problem not present on this CPU model, nothing to do
          */
         if (!(x86_pmu.flags & PMU_FL_EXCL_ENABLED))
                 return 0;
  
-       w = cpumask_weight(topology_sibling_cpumask(cpu));
-       if (w > 1) {
+       if (topology_max_smt_threads() > 1) {
                 pr_info("PMU erratum BJ122, BV98, HSD29 worked around, HT is on\n");
                 return 0;
         }