Merge branch 'linus' into perf/core, to pick up fixes before merging new changes

author Ingo Molnar <mingo@kernel.org>

Tue, 14 Jun 2016 09:14:34 +0000 (11:14 +0200)

committer Ingo Molnar <mingo@kernel.org>

Tue, 14 Jun 2016 09:14:34 +0000 (11:14 +0200)
author Ingo Molnar <mingo@kernel.org>
Tue, 14 Jun 2016 09:14:34 +0000 (11:14 +0200)
committer Ingo Molnar <mingo@kernel.org>
Tue, 14 Jun 2016 09:14:34 +0000 (11:14 +0200)
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c

index 33787ee..929655d 100644 (file)
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -1622,6 +1622,29 @@ ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr, cha
  }
  EXPORT_SYMBOL_GPL(events_sysfs_show);
  
+ssize_t events_ht_sysfs_show(struct device *dev, struct device_attribute *attr,
+                         char *page)
+{
+       struct perf_pmu_events_ht_attr *pmu_attr =
+               container_of(attr, struct perf_pmu_events_ht_attr, attr);
+
+       /*
+        * Report conditional events depending on Hyper-Threading.
+        *
+        * This is overly conservative as usually the HT special
+        * handling is not needed if the other CPU thread is idle.
+        *
+        * Note this does not (and cannot) handle the case when thread
+        * siblings are invisible, for example with virtualization
+        * if they are owned by some other guest.  The user tool
+        * has to re-read when a thread sibling gets onlined later.
+        */
+       return sprintf(page, "%s",
+                       topology_max_smt_threads() > 1 ?
+                       pmu_attr->event_str_ht :
+                       pmu_attr->event_str_noht);
+}
+
  EVENT_ATTR(cpu-cycles,                 CPU_CYCLES              );
  EVENT_ATTR(instructions,               INSTRUCTIONS            );
  EVENT_ATTR(cache-references,           CACHE_REFERENCES        );
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c

index 7c66695..3ed528c 100644 (file)
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -16,6 +16,7 @@
  
  #include <asm/cpufeature.h>
  #include <asm/hardirq.h>
+#include <asm/intel-family.h>
  #include <asm/apic.h>
  
  #include "../perf_event.h"
@@ -177,7 +178,7 @@ static struct event_constraint intel_slm_event_constraints[] __read_mostly =
         EVENT_CONSTRAINT_END
  };
  
-struct event_constraint intel_skl_event_constraints[] = {
+static struct event_constraint intel_skl_event_constraints[] = {
         FIXED_EVENT_CONSTRAINT(0x00c0, 0),      /* INST_RETIRED.ANY */
         FIXED_EVENT_CONSTRAINT(0x003c, 1),      /* CPU_CLK_UNHALTED.CORE */
         FIXED_EVENT_CONSTRAINT(0x0300, 2),      /* CPU_CLK_UNHALTED.REF */
@@ -186,10 +187,8 @@ struct event_constraint intel_skl_event_constraints[] = {
  };
  
  static struct extra_reg intel_knl_extra_regs[] __read_mostly = {
-       INTEL_UEVENT_EXTRA_REG(0x01b7,
-                              MSR_OFFCORE_RSP_0, 0x7f9ffbffffull, RSP_0),
-       INTEL_UEVENT_EXTRA_REG(0x02b7,
-                              MSR_OFFCORE_RSP_1, 0x3f9ffbffffull, RSP_1),
+       INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x799ffbb6e7ull, RSP_0),
+       INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0x399ffbffe7ull, RSP_1),
         EVENT_EXTRA_END
  };
  
@@ -225,14 +224,51 @@ EVENT_ATTR_STR(mem-loads, mem_ld_nhm,     "event=0x0b,umask=0x10,ldlat=3");
  EVENT_ATTR_STR(mem-loads,      mem_ld_snb,     "event=0xcd,umask=0x1,ldlat=3");
  EVENT_ATTR_STR(mem-stores,     mem_st_snb,     "event=0xcd,umask=0x2");
  
-struct attribute *nhm_events_attrs[] = {
+static struct attribute *nhm_events_attrs[] = {
         EVENT_PTR(mem_ld_nhm),
         NULL,
  };
  
-struct attribute *snb_events_attrs[] = {
+/*
+ * topdown events for Intel Core CPUs.
+ *
+ * The events are all in slots, which is a free slot in a 4 wide
+ * pipeline. Some events are already reported in slots, for cycle
+ * events we multiply by the pipeline width (4).
+ *
+ * With Hyper Threading on, topdown metrics are either summed or averaged
+ * between the threads of a core: (count_t0 + count_t1).
+ *
+ * For the average case the metric is always scaled to pipeline width,
+ * so we use factor 2 ((count_t0 + count_t1) / 2 * 4)
+ */
+
+EVENT_ATTR_STR_HT(topdown-total-slots, td_total_slots,
+       "event=0x3c,umask=0x0",                 /* cpu_clk_unhalted.thread */
+       "event=0x3c,umask=0x0,any=1");          /* cpu_clk_unhalted.thread_any */
+EVENT_ATTR_STR_HT(topdown-total-slots.scale, td_total_slots_scale, "4", "2");
+EVENT_ATTR_STR(topdown-slots-issued, td_slots_issued,
+       "event=0xe,umask=0x1");                 /* uops_issued.any */
+EVENT_ATTR_STR(topdown-slots-retired, td_slots_retired,
+       "event=0xc2,umask=0x2");                /* uops_retired.retire_slots */
+EVENT_ATTR_STR(topdown-fetch-bubbles, td_fetch_bubbles,
+       "event=0x9c,umask=0x1");                /* idq_uops_not_delivered_core */
+EVENT_ATTR_STR_HT(topdown-recovery-bubbles, td_recovery_bubbles,
+       "event=0xd,umask=0x3,cmask=1",          /* int_misc.recovery_cycles */
+       "event=0xd,umask=0x3,cmask=1,any=1");   /* int_misc.recovery_cycles_any */
+EVENT_ATTR_STR_HT(topdown-recovery-bubbles.scale, td_recovery_bubbles_scale,
+       "4", "2");
+
+static struct attribute *snb_events_attrs[] = {
         EVENT_PTR(mem_ld_snb),
         EVENT_PTR(mem_st_snb),
+       EVENT_PTR(td_slots_issued),
+       EVENT_PTR(td_slots_retired),
+       EVENT_PTR(td_fetch_bubbles),
+       EVENT_PTR(td_total_slots),
+       EVENT_PTR(td_total_slots_scale),
+       EVENT_PTR(td_recovery_bubbles),
+       EVENT_PTR(td_recovery_bubbles_scale),
         NULL,
  };
  
@@ -258,7 +294,7 @@ static struct event_constraint intel_hsw_event_constraints[] = {
         EVENT_CONSTRAINT_END
  };
  
-struct event_constraint intel_bdw_event_constraints[] = {
+static struct event_constraint intel_bdw_event_constraints[] = {
         FIXED_EVENT_CONSTRAINT(0x00c0, 0),      /* INST_RETIRED.ANY */
         FIXED_EVENT_CONSTRAINT(0x003c, 1),      /* CPU_CLK_UNHALTED.CORE */
         FIXED_EVENT_CONSTRAINT(0x0300, 2),      /* CPU_CLK_UNHALTED.REF */
@@ -1332,6 +1368,29 @@ static __initconst const u64 atom_hw_cache_event_ids
   },
  };
  
+EVENT_ATTR_STR(topdown-total-slots, td_total_slots_slm, "event=0x3c");
+EVENT_ATTR_STR(topdown-total-slots.scale, td_total_slots_scale_slm, "2");
+/* no_alloc_cycles.not_delivered */
+EVENT_ATTR_STR(topdown-fetch-bubbles, td_fetch_bubbles_slm,
+              "event=0xca,umask=0x50");
+EVENT_ATTR_STR(topdown-fetch-bubbles.scale, td_fetch_bubbles_scale_slm, "2");
+/* uops_retired.all */
+EVENT_ATTR_STR(topdown-slots-issued, td_slots_issued_slm,
+              "event=0xc2,umask=0x10");
+/* uops_retired.all */
+EVENT_ATTR_STR(topdown-slots-retired, td_slots_retired_slm,
+              "event=0xc2,umask=0x10");
+
+static struct attribute *slm_events_attrs[] = {
+       EVENT_PTR(td_total_slots_slm),
+       EVENT_PTR(td_total_slots_scale_slm),
+       EVENT_PTR(td_fetch_bubbles_slm),
+       EVENT_PTR(td_fetch_bubbles_scale_slm),
+       EVENT_PTR(td_slots_issued_slm),
+       EVENT_PTR(td_slots_retired_slm),
+       NULL
+};
+
  static struct extra_reg intel_slm_extra_regs[] __read_mostly =
  {
         /* must define OFFCORE_RSP_X first, see intel_fixup_er() */
@@ -3261,11 +3320,11 @@ static int intel_snb_pebs_broken(int cpu)
         u32 rev = UINT_MAX; /* default to broken for unknown models */
  
         switch (cpu_data(cpu).x86_model) {
-       case 42: /* SNB */
+       case INTEL_FAM6_SANDYBRIDGE:
                 rev = 0x28;
                 break;
  
-       case 45: /* SNB-EP */
+       case INTEL_FAM6_SANDYBRIDGE_X:
                 switch (cpu_data(cpu).x86_mask) {
                 case 6: rev = 0x618; break;
                 case 7: rev = 0x70c; break;
@@ -3437,6 +3496,13 @@ static struct attribute *hsw_events_attrs[] = {
         EVENT_PTR(cycles_ct),
         EVENT_PTR(mem_ld_hsw),
         EVENT_PTR(mem_st_hsw),
+       EVENT_PTR(td_slots_issued),
+       EVENT_PTR(td_slots_retired),
+       EVENT_PTR(td_fetch_bubbles),
+       EVENT_PTR(td_total_slots),
+       EVENT_PTR(td_total_slots_scale),
+       EVENT_PTR(td_recovery_bubbles),
+       EVENT_PTR(td_recovery_bubbles_scale),
         NULL
  };
  
@@ -3508,15 +3574,15 @@ __init int intel_pmu_init(void)
          * Install the hw-cache-events table:
          */
         switch (boot_cpu_data.x86_model) {
-       case 14: /* 65nm Core "Yonah" */
+       case INTEL_FAM6_CORE_YONAH:
                 pr_cont("Core events, ");
                 break;
  
-       case 15: /* 65nm Core2 "Merom"          */
+       case INTEL_FAM6_CORE2_MEROM:
                 x86_add_quirk(intel_clovertown_quirk);
-       case 22: /* 65nm Core2 "Merom-L"        */
-       case 23: /* 45nm Core2 "Penryn"         */
-       case 29: /* 45nm Core2 "Dunnington (MP) */
+       case INTEL_FAM6_CORE2_MEROM_L:
+       case INTEL_FAM6_CORE2_PENRYN:
+       case INTEL_FAM6_CORE2_DUNNINGTON:
                 memcpy(hw_cache_event_ids, core2_hw_cache_event_ids,
                        sizeof(hw_cache_event_ids));
  
@@ -3527,9 +3593,9 @@ __init int intel_pmu_init(void)
                 pr_cont("Core2 events, ");
                 break;
  
-       case 30: /* 45nm Nehalem    */
-       case 26: /* 45nm Nehalem-EP */
-       case 46: /* 45nm Nehalem-EX */
+       case INTEL_FAM6_NEHALEM:
+       case INTEL_FAM6_NEHALEM_EP:
+       case INTEL_FAM6_NEHALEM_EX:
                 memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
                        sizeof(hw_cache_event_ids));
                 memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs,
@@ -3557,11 +3623,11 @@ __init int intel_pmu_init(void)
                 pr_cont("Nehalem events, ");
                 break;
  
-       case 28: /* 45nm Atom "Pineview"   */
-       case 38: /* 45nm Atom "Lincroft"   */
-       case 39: /* 32nm Atom "Penwell"    */
-       case 53: /* 32nm Atom "Cloverview" */
-       case 54: /* 32nm Atom "Cedarview"  */
+       case INTEL_FAM6_ATOM_PINEVIEW:
+       case INTEL_FAM6_ATOM_LINCROFT:
+       case INTEL_FAM6_ATOM_PENWELL:
+       case INTEL_FAM6_ATOM_CLOVERVIEW:
+       case INTEL_FAM6_ATOM_CEDARVIEW:
                 memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
                        sizeof(hw_cache_event_ids));
  
@@ -3573,9 +3639,9 @@ __init int intel_pmu_init(void)
                 pr_cont("Atom events, ");
                 break;
  
-       case 55: /* 22nm Atom "Silvermont"                */
-       case 76: /* 14nm Atom "Airmont"                   */
-       case 77: /* 22nm Atom "Silvermont Avoton/Rangely" */
+       case INTEL_FAM6_ATOM_SILVERMONT1:
+       case INTEL_FAM6_ATOM_SILVERMONT2:
+       case INTEL_FAM6_ATOM_AIRMONT:
                 memcpy(hw_cache_event_ids, slm_hw_cache_event_ids,
                         sizeof(hw_cache_event_ids));
                 memcpy(hw_cache_extra_regs, slm_hw_cache_extra_regs,
@@ -3587,11 +3653,12 @@ __init int intel_pmu_init(void)
                 x86_pmu.pebs_constraints = intel_slm_pebs_event_constraints;
                 x86_pmu.extra_regs = intel_slm_extra_regs;
                 x86_pmu.flags |= PMU_FL_HAS_RSP_1;
+               x86_pmu.cpu_events = slm_events_attrs;
                 pr_cont("Silvermont events, ");
                 break;
  
-       case 92: /* 14nm Atom "Goldmont" */
-       case 95: /* 14nm Atom "Goldmont Denverton" */
+       case INTEL_FAM6_ATOM_GOLDMONT:
+       case INTEL_FAM6_ATOM_DENVERTON:
                 memcpy(hw_cache_event_ids, glm_hw_cache_event_ids,
                        sizeof(hw_cache_event_ids));
                 memcpy(hw_cache_extra_regs, glm_hw_cache_extra_regs,
@@ -3614,9 +3681,9 @@ __init int intel_pmu_init(void)
                 pr_cont("Goldmont events, ");
                 break;
  
-       case 37: /* 32nm Westmere    */
-       case 44: /* 32nm Westmere-EP */
-       case 47: /* 32nm Westmere-EX */
+       case INTEL_FAM6_WESTMERE:
+       case INTEL_FAM6_WESTMERE_EP:
+       case INTEL_FAM6_WESTMERE_EX:
                 memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids,
                        sizeof(hw_cache_event_ids));
                 memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs,
@@ -3643,8 +3710,8 @@ __init int intel_pmu_init(void)
                 pr_cont("Westmere events, ");
                 break;
  
-       case 42: /* 32nm SandyBridge         */
-       case 45: /* 32nm SandyBridge-E/EN/EP */
+       case INTEL_FAM6_SANDYBRIDGE:
+       case INTEL_FAM6_SANDYBRIDGE_X:
                 x86_add_quirk(intel_sandybridge_quirk);
                 x86_add_quirk(intel_ht_bug);
                 memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
@@ -3657,7 +3724,7 @@ __init int intel_pmu_init(void)
                 x86_pmu.event_constraints = intel_snb_event_constraints;
                 x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints;
                 x86_pmu.pebs_aliases = intel_pebs_aliases_snb;
-               if (boot_cpu_data.x86_model == 45)
+               if (boot_cpu_data.x86_model == INTEL_FAM6_SANDYBRIDGE_X)
                         x86_pmu.extra_regs = intel_snbep_extra_regs;
                 else
                         x86_pmu.extra_regs = intel_snb_extra_regs;
@@ -3679,8 +3746,8 @@ __init int intel_pmu_init(void)
                 pr_cont("SandyBridge events, ");
                 break;
  
-       case 58: /* 22nm IvyBridge       */
-       case 62: /* 22nm IvyBridge-EP/EX */
+       case INTEL_FAM6_IVYBRIDGE:
+       case INTEL_FAM6_IVYBRIDGE_X:
                 x86_add_quirk(intel_ht_bug);
                 memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
                        sizeof(hw_cache_event_ids));
@@ -3696,7 +3763,7 @@ __init int intel_pmu_init(void)
                 x86_pmu.pebs_constraints = intel_ivb_pebs_event_constraints;
                 x86_pmu.pebs_aliases = intel_pebs_aliases_ivb;
                 x86_pmu.pebs_prec_dist = true;
-               if (boot_cpu_data.x86_model == 62)
+               if (boot_cpu_data.x86_model == INTEL_FAM6_IVYBRIDGE_X)
                         x86_pmu.extra_regs = intel_snbep_extra_regs;
                 else
                         x86_pmu.extra_regs = intel_snb_extra_regs;
@@ -3714,10 +3781,10 @@ __init int intel_pmu_init(void)
                 break;
  
  
-       case 60: /* 22nm Haswell Core */
-       case 63: /* 22nm Haswell Server */
-       case 69: /* 22nm Haswell ULT */
-       case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */
+       case INTEL_FAM6_HASWELL_CORE:
+       case INTEL_FAM6_HASWELL_X:
+       case INTEL_FAM6_HASWELL_ULT:
+       case INTEL_FAM6_HASWELL_GT3E:
                 x86_add_quirk(intel_ht_bug);
                 x86_pmu.late_ack = true;
                 memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids));
@@ -3741,10 +3808,10 @@ __init int intel_pmu_init(void)
                 pr_cont("Haswell events, ");
                 break;
  
-       case 61: /* 14nm Broadwell Core-M */
-       case 86: /* 14nm Broadwell Xeon D */
-       case 71: /* 14nm Broadwell + GT3e (Intel Iris Pro graphics) */
-       case 79: /* 14nm Broadwell Server */
+       case INTEL_FAM6_BROADWELL_CORE:
+       case INTEL_FAM6_BROADWELL_XEON_D:
+       case INTEL_FAM6_BROADWELL_GT3E:
+       case INTEL_FAM6_BROADWELL_X:
                 x86_pmu.late_ack = true;
                 memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids));
                 memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
@@ -3777,7 +3844,7 @@ __init int intel_pmu_init(void)
                 pr_cont("Broadwell events, ");
                 break;
  
-       case 87: /* Knights Landing Xeon Phi */
+       case INTEL_FAM6_XEON_PHI_KNL:
                 memcpy(hw_cache_event_ids,
                        slm_hw_cache_event_ids, sizeof(hw_cache_event_ids));
                 memcpy(hw_cache_extra_regs,
@@ -3795,16 +3862,22 @@ __init int intel_pmu_init(void)
                 pr_cont("Knights Landing events, ");
                 break;
  
-       case 142: /* 14nm Kabylake Mobile */
-       case 158: /* 14nm Kabylake Desktop */
-       case 78: /* 14nm Skylake Mobile */
-       case 94: /* 14nm Skylake Desktop */
-       case 85: /* 14nm Skylake Server */
+       case INTEL_FAM6_SKYLAKE_MOBILE:
+       case INTEL_FAM6_SKYLAKE_DESKTOP:
+       case INTEL_FAM6_SKYLAKE_X:
+       case INTEL_FAM6_KABYLAKE_MOBILE:
+       case INTEL_FAM6_KABYLAKE_DESKTOP:
                 x86_pmu.late_ack = true;
                 memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, sizeof(hw_cache_event_ids));
                 memcpy(hw_cache_extra_regs, skl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
                 intel_pmu_lbr_init_skl();
  
+               /* INT_MISC.RECOVERY_CYCLES has umask 1 in Skylake */
+               event_attr_td_recovery_bubbles.event_str_noht =
+                       "event=0xd,umask=0x1,cmask=1";
+               event_attr_td_recovery_bubbles.event_str_ht =
+                       "event=0xd,umask=0x1,cmask=1,any=1";
+
                 x86_pmu.event_constraints = intel_skl_event_constraints;
                 x86_pmu.pebs_constraints = intel_skl_pebs_event_constraints;
                 x86_pmu.extra_regs = intel_skl_extra_regs;
@@ -3917,16 +3990,14 @@ __init int intel_pmu_init(void)
   */
  static __init int fixup_ht_bug(void)
  {
-       int cpu = smp_processor_id();
-       int w, c;
+       int c;
         /*
          * problem not present on this CPU model, nothing to do
          */
         if (!(x86_pmu.flags & PMU_FL_EXCL_ENABLED))
                 return 0;
  
-       w = cpumask_weight(topology_sibling_cpumask(cpu));
-       if (w > 1) {
+       if (topology_max_smt_threads() > 1) {
                 pr_info("PMU erratum BJ122, BV98, HSD29 worked around, HT is on\n");
                 return 0;
         }
diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c

index 9ba4e41..4c7638b 100644 (file)
--- a/arch/x86/events/intel/cstate.c
+++ b/arch/x86/events/intel/cstate.c
@@ -89,6 +89,7 @@
  #include <linux/slab.h>
  #include <linux/perf_event.h>
  #include <asm/cpu_device_id.h>
+#include <asm/intel-family.h>
  #include "../perf_event.h"
  
  MODULE_LICENSE("GPL");
@@ -511,37 +512,37 @@ static const struct cstate_model slm_cstates __initconst = {
         { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long) &(states) }
  
  static const struct x86_cpu_id intel_cstates_match[] __initconst = {
-       X86_CSTATES_MODEL(30, nhm_cstates),    /* 45nm Nehalem              */
-       X86_CSTATES_MODEL(26, nhm_cstates),    /* 45nm Nehalem-EP           */
-       X86_CSTATES_MODEL(46, nhm_cstates),    /* 45nm Nehalem-EX           */
+       X86_CSTATES_MODEL(INTEL_FAM6_NEHALEM,    nhm_cstates),
+       X86_CSTATES_MODEL(INTEL_FAM6_NEHALEM_EP, nhm_cstates),
+       X86_CSTATES_MODEL(INTEL_FAM6_NEHALEM_EX, nhm_cstates),
  
-       X86_CSTATES_MODEL(37, nhm_cstates),    /* 32nm Westmere             */
-       X86_CSTATES_MODEL(44, nhm_cstates),    /* 32nm Westmere-EP          */
-       X86_CSTATES_MODEL(47, nhm_cstates),    /* 32nm Westmere-EX          */
+       X86_CSTATES_MODEL(INTEL_FAM6_WESTMERE,    nhm_cstates),
+       X86_CSTATES_MODEL(INTEL_FAM6_WESTMERE_EP, nhm_cstates),
+       X86_CSTATES_MODEL(INTEL_FAM6_WESTMERE_EX, nhm_cstates),
  
-       X86_CSTATES_MODEL(42, snb_cstates),    /* 32nm SandyBridge          */
-       X86_CSTATES_MODEL(45, snb_cstates),    /* 32nm SandyBridge-E/EN/EP  */
+       X86_CSTATES_MODEL(INTEL_FAM6_SANDYBRIDGE,   snb_cstates),
+       X86_CSTATES_MODEL(INTEL_FAM6_SANDYBRIDGE_X, snb_cstates),
  
-       X86_CSTATES_MODEL(58, snb_cstates),    /* 22nm IvyBridge            */
-       X86_CSTATES_MODEL(62, snb_cstates),    /* 22nm IvyBridge-EP/EX      */
+       X86_CSTATES_MODEL(INTEL_FAM6_IVYBRIDGE,   snb_cstates),
+       X86_CSTATES_MODEL(INTEL_FAM6_IVYBRIDGE_X, snb_cstates),
  
-       X86_CSTATES_MODEL(60, snb_cstates),    /* 22nm Haswell Core         */
-       X86_CSTATES_MODEL(63, snb_cstates),    /* 22nm Haswell Server       */
-       X86_CSTATES_MODEL(70, snb_cstates),    /* 22nm Haswell + GT3e       */
+       X86_CSTATES_MODEL(INTEL_FAM6_HASWELL_CORE, snb_cstates),
+       X86_CSTATES_MODEL(INTEL_FAM6_HASWELL_X,    snb_cstates),
+       X86_CSTATES_MODEL(INTEL_FAM6_HASWELL_GT3E, snb_cstates),
  
-       X86_CSTATES_MODEL(69, hswult_cstates), /* 22nm Haswell ULT          */
+       X86_CSTATES_MODEL(INTEL_FAM6_HASWELL_ULT, hswult_cstates),
  
-       X86_CSTATES_MODEL(55, slm_cstates),    /* 22nm Atom Silvermont      */
-       X86_CSTATES_MODEL(77, slm_cstates),    /* 22nm Atom Avoton/Rangely  */
-       X86_CSTATES_MODEL(76, slm_cstates),    /* 22nm Atom Airmont         */
+       X86_CSTATES_MODEL(INTEL_FAM6_ATOM_SILVERMONT1, slm_cstates),
+       X86_CSTATES_MODEL(INTEL_FAM6_ATOM_SILVERMONT2, slm_cstates),
+       X86_CSTATES_MODEL(INTEL_FAM6_ATOM_AIRMONT,     slm_cstates),
  
-       X86_CSTATES_MODEL(61, snb_cstates),    /* 14nm Broadwell Core-M     */
-       X86_CSTATES_MODEL(86, snb_cstates),    /* 14nm Broadwell Xeon D     */
-       X86_CSTATES_MODEL(71, snb_cstates),    /* 14nm Broadwell + GT3e     */
-       X86_CSTATES_MODEL(79, snb_cstates),    /* 14nm Broadwell Server     */
+       X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL_CORE,   snb_cstates),
+       X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL_XEON_D, snb_cstates),
+       X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL_GT3E,   snb_cstates),
+       X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL_X,      snb_cstates),
  
-       X86_CSTATES_MODEL(78, snb_cstates),    /* 14nm Skylake Mobile       */
-       X86_CSTATES_MODEL(94, snb_cstates),    /* 14nm Skylake Desktop      */
+       X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_MOBILE,  snb_cstates),
+       X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_DESKTOP, snb_cstates),
         { },
  };
  MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match);
diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c

index e30eef4..d0c58b3 100644 (file)
--- a/arch/x86/events/intel/rapl.c
+++ b/arch/x86/events/intel/rapl.c
@@ -55,6 +55,7 @@
  #include <linux/slab.h>
  #include <linux/perf_event.h>
  #include <asm/cpu_device_id.h>
+#include <asm/intel-family.h>
  #include "../perf_event.h"
  
  MODULE_LICENSE("GPL");
@@ -786,26 +787,27 @@ static const struct intel_rapl_init_fun skl_rapl_init __initconst = {
  };
  
  static const struct x86_cpu_id rapl_cpu_match[] __initconst = {
-       X86_RAPL_MODEL_MATCH(42, snb_rapl_init),        /* Sandy Bridge */
-       X86_RAPL_MODEL_MATCH(45, snbep_rapl_init),      /* Sandy Bridge-EP */
+       X86_RAPL_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE,   snb_rapl_init),
+       X86_RAPL_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE_X, snbep_rapl_init),
  
-       X86_RAPL_MODEL_MATCH(58, snb_rapl_init),        /* Ivy Bridge */
-       X86_RAPL_MODEL_MATCH(62, snbep_rapl_init),      /* IvyTown */
+       X86_RAPL_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE,   snb_rapl_init),
+       X86_RAPL_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE_X, snbep_rapl_init),
  
-       X86_RAPL_MODEL_MATCH(60, hsw_rapl_init),        /* Haswell */
-       X86_RAPL_MODEL_MATCH(63, hsx_rapl_init),        /* Haswell-Server */
-       X86_RAPL_MODEL_MATCH(69, hsw_rapl_init),        /* Haswell-Celeron */
-       X86_RAPL_MODEL_MATCH(70, hsw_rapl_init),        /* Haswell GT3e */
+       X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_CORE, hsw_rapl_init),
+       X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_X,    hsw_rapl_init),
+       X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_ULT,  hsw_rapl_init),
+       X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_GT3E, hsw_rapl_init),
  
-       X86_RAPL_MODEL_MATCH(61, hsw_rapl_init),        /* Broadwell */
-       X86_RAPL_MODEL_MATCH(71, hsw_rapl_init),        /* Broadwell-H */
-       X86_RAPL_MODEL_MATCH(79, hsx_rapl_init),        /* Broadwell-Server */
-       X86_RAPL_MODEL_MATCH(86, hsx_rapl_init),        /* Broadwell Xeon D */
+       X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_CORE,   hsw_rapl_init),
+       X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_GT3E,   hsw_rapl_init),
+       X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_X,      hsw_rapl_init),
+       X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_XEON_D, hsw_rapl_init),
  
-       X86_RAPL_MODEL_MATCH(87, knl_rapl_init),        /* Knights Landing */
+       X86_RAPL_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNL, knl_rapl_init),
  
-       X86_RAPL_MODEL_MATCH(78, skl_rapl_init),        /* Skylake */
-       X86_RAPL_MODEL_MATCH(94, skl_rapl_init),        /* Skylake H/S */
+       X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_MOBILE,  skl_rapl_init),
+       X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_DESKTOP, skl_rapl_init),
+       X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_X,       hsx_rapl_init),
         {},
  };
  
diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c

index fce7406..4e70d27 100644 (file)
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -1,4 +1,5 @@
  #include <asm/cpu_device_id.h>
+#include <asm/intel-family.h>
  #include "uncore.h"
  
  static struct intel_uncore_type *empty_uncore[] = { NULL, };
@@ -882,7 +883,7 @@ uncore_types_init(struct intel_uncore_type **types, bool setid)
  static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
  {
         struct intel_uncore_type *type;
-       struct intel_uncore_pmu *pmu;
+       struct intel_uncore_pmu *pmu = NULL;
         struct intel_uncore_box *box;
         int phys_id, pkg, ret;
  
@@ -903,20 +904,37 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id
         }
  
         type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(id->driver_data)];
+
         /*
-        * for performance monitoring unit with multiple boxes,
-        * each box has a different function id.
+        * Some platforms, e.g.  Knights Landing, use a common PCI device ID
+        * for multiple instances of an uncore PMU device type. We should check
+        * PCI slot and func to indicate the uncore box.
          */
-       pmu = &type->pmus[UNCORE_PCI_DEV_IDX(id->driver_data)];
-       /* Knights Landing uses a common PCI device ID for multiple instances of
-        * an uncore PMU device type. There is only one entry per device type in
-        * the knl_uncore_pci_ids table inspite of multiple devices present for
-        * some device types. Hence PCI device idx would be 0 for all devices.
-        * So increment pmu pointer to point to an unused array element.
-        */
-       if (boot_cpu_data.x86_model == 87) {
-               while (pmu->func_id >= 0)
-                       pmu++;
+       if (id->driver_data & ~0xffff) {
+               struct pci_driver *pci_drv = pdev->driver;
+               const struct pci_device_id *ids = pci_drv->id_table;
+               unsigned int devfn;
+
+               while (ids && ids->vendor) {
+                       if ((ids->vendor == pdev->vendor) &&
+                           (ids->device == pdev->device)) {
+                               devfn = PCI_DEVFN(UNCORE_PCI_DEV_DEV(ids->driver_data),
+                                                 UNCORE_PCI_DEV_FUNC(ids->driver_data));
+                               if (devfn == pdev->devfn) {
+                                       pmu = &type->pmus[UNCORE_PCI_DEV_IDX(ids->driver_data)];
+                                       break;
+                               }
+                       }
+                       ids++;
+               }
+               if (pmu == NULL)
+                       return -ENODEV;
+       } else {
+               /*
+                * for performance monitoring unit with multiple boxes,
+                * each box has a different function id.
+                */
+               pmu = &type->pmus[UNCORE_PCI_DEV_IDX(id->driver_data)];
         }
  
         if (WARN_ON_ONCE(pmu->boxes[pkg] != NULL))
@@ -1365,26 +1383,26 @@ static const struct intel_uncore_init_fun skl_uncore_init __initconst = {
  };
  
  static const struct x86_cpu_id intel_uncore_match[] __initconst = {
-       X86_UNCORE_MODEL_MATCH(26, nhm_uncore_init),    /* Nehalem */
-       X86_UNCORE_MODEL_MATCH(30, nhm_uncore_init),
-       X86_UNCORE_MODEL_MATCH(37, nhm_uncore_init),    /* Westmere */
-       X86_UNCORE_MODEL_MATCH(44, nhm_uncore_init),
-       X86_UNCORE_MODEL_MATCH(42, snb_uncore_init),    /* Sandy Bridge */
-       X86_UNCORE_MODEL_MATCH(58, ivb_uncore_init),    /* Ivy Bridge */
-       X86_UNCORE_MODEL_MATCH(60, hsw_uncore_init),    /* Haswell */
-       X86_UNCORE_MODEL_MATCH(69, hsw_uncore_init),    /* Haswell Celeron */
-       X86_UNCORE_MODEL_MATCH(70, hsw_uncore_init),    /* Haswell */
-       X86_UNCORE_MODEL_MATCH(61, bdw_uncore_init),    /* Broadwell */
-       X86_UNCORE_MODEL_MATCH(71, bdw_uncore_init),    /* Broadwell */
-       X86_UNCORE_MODEL_MATCH(45, snbep_uncore_init),  /* Sandy Bridge-EP */
-       X86_UNCORE_MODEL_MATCH(46, nhmex_uncore_init),  /* Nehalem-EX */
-       X86_UNCORE_MODEL_MATCH(47, nhmex_uncore_init),  /* Westmere-EX aka. Xeon E7 */
-       X86_UNCORE_MODEL_MATCH(62, ivbep_uncore_init),  /* Ivy Bridge-EP */
-       X86_UNCORE_MODEL_MATCH(63, hswep_uncore_init),  /* Haswell-EP */
-       X86_UNCORE_MODEL_MATCH(79, bdx_uncore_init),    /* BDX-EP */
-       X86_UNCORE_MODEL_MATCH(86, bdx_uncore_init),    /* BDX-DE */
-       X86_UNCORE_MODEL_MATCH(87, knl_uncore_init),    /* Knights Landing */
-       X86_UNCORE_MODEL_MATCH(94, skl_uncore_init),    /* SkyLake */
+       X86_UNCORE_MODEL_MATCH(INTEL_FAM6_NEHALEM_EP,     nhm_uncore_init),
+       X86_UNCORE_MODEL_MATCH(INTEL_FAM6_NEHALEM,        nhm_uncore_init),
+       X86_UNCORE_MODEL_MATCH(INTEL_FAM6_WESTMERE,       nhm_uncore_init),
+       X86_UNCORE_MODEL_MATCH(INTEL_FAM6_WESTMERE_EP,    nhm_uncore_init),
+       X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE,    snb_uncore_init),
+       X86_UNCORE_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE,      ivb_uncore_init),
+       X86_UNCORE_MODEL_MATCH(INTEL_FAM6_HASWELL_CORE,   hsw_uncore_init),
+       X86_UNCORE_MODEL_MATCH(INTEL_FAM6_HASWELL_ULT,    hsw_uncore_init),
+       X86_UNCORE_MODEL_MATCH(INTEL_FAM6_HASWELL_GT3E,   hsw_uncore_init),
+       X86_UNCORE_MODEL_MATCH(INTEL_FAM6_BROADWELL_CORE, bdw_uncore_init),
+       X86_UNCORE_MODEL_MATCH(INTEL_FAM6_BROADWELL_GT3E, bdw_uncore_init),
+       X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE_X,  snbep_uncore_init),
+       X86_UNCORE_MODEL_MATCH(INTEL_FAM6_NEHALEM_EX,     nhmex_uncore_init),
+       X86_UNCORE_MODEL_MATCH(INTEL_FAM6_WESTMERE_EX,    nhmex_uncore_init),
+       X86_UNCORE_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE_X,    ivbep_uncore_init),
+       X86_UNCORE_MODEL_MATCH(INTEL_FAM6_HASWELL_X,      hswep_uncore_init),
+       X86_UNCORE_MODEL_MATCH(INTEL_FAM6_BROADWELL_X,    bdx_uncore_init),
+       X86_UNCORE_MODEL_MATCH(INTEL_FAM6_BROADWELL_XEON_D, bdx_uncore_init),
+       X86_UNCORE_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNL,   knl_uncore_init),
+       X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SKYLAKE_DESKTOP,skl_uncore_init),
         {},
  };
  
diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h

index 79766b9..66c3a36 100644 (file)
--- a/arch/x86/events/intel/uncore.h
+++ b/arch/x86/events/intel/uncore.h
@@ -15,7 +15,11 @@
  #define UNCORE_PMC_IDX_FIXED           UNCORE_PMC_IDX_MAX_GENERIC
  #define UNCORE_PMC_IDX_MAX             (UNCORE_PMC_IDX_FIXED + 1)
  
+#define UNCORE_PCI_DEV_FULL_DATA(dev, func, type, idx) \
+               ((dev << 24) | (func << 16) | (type << 8) | idx)
  #define UNCORE_PCI_DEV_DATA(type, idx) ((type << 8) | idx)
+#define UNCORE_PCI_DEV_DEV(data)       ((data >> 24) & 0xff)
+#define UNCORE_PCI_DEV_FUNC(data)      ((data >> 16) & 0xff)
  #define UNCORE_PCI_DEV_TYPE(data)      ((data >> 8) & 0xff)
  #define UNCORE_PCI_DEV_IDX(data)       (data & 0xff)
  #define UNCORE_EXTRA_PCI_DEV           0xff
diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c

index 874e8bd..824e540 100644 (file)
--- a/arch/x86/events/intel/uncore_snbep.c
+++ b/arch/x86/events/intel/uncore_snbep.c
@@ -2164,21 +2164,101 @@ static struct intel_uncore_type *knl_pci_uncores[] = {
  */
  
  static const struct pci_device_id knl_uncore_pci_ids[] = {
-       { /* MC UClk */
+       { /* MC0 UClk */
                 PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7841),
-               .driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_MC_UCLK, 0),
+               .driver_data = UNCORE_PCI_DEV_FULL_DATA(10, 0, KNL_PCI_UNCORE_MC_UCLK, 0),
         },
-       { /* MC DClk Channel */
+       { /* MC1 UClk */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7841),
+               .driver_data = UNCORE_PCI_DEV_FULL_DATA(11, 0, KNL_PCI_UNCORE_MC_UCLK, 1),
+       },
+       { /* MC0 DClk CH 0 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7843),
+               .driver_data = UNCORE_PCI_DEV_FULL_DATA(8, 2, KNL_PCI_UNCORE_MC_DCLK, 0),
+       },
+       { /* MC0 DClk CH 1 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7843),
+               .driver_data = UNCORE_PCI_DEV_FULL_DATA(8, 3, KNL_PCI_UNCORE_MC_DCLK, 1),
+       },
+       { /* MC0 DClk CH 2 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7843),
+               .driver_data = UNCORE_PCI_DEV_FULL_DATA(8, 4, KNL_PCI_UNCORE_MC_DCLK, 2),
+       },
+       { /* MC1 DClk CH 0 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7843),
+               .driver_data = UNCORE_PCI_DEV_FULL_DATA(9, 2, KNL_PCI_UNCORE_MC_DCLK, 3),
+       },
+       { /* MC1 DClk CH 1 */
                 PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7843),
-               .driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_MC_DCLK, 0),
+               .driver_data = UNCORE_PCI_DEV_FULL_DATA(9, 3, KNL_PCI_UNCORE_MC_DCLK, 4),
+       },
+       { /* MC1 DClk CH 2 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7843),
+               .driver_data = UNCORE_PCI_DEV_FULL_DATA(9, 4, KNL_PCI_UNCORE_MC_DCLK, 5),
+       },
+       { /* EDC0 UClk */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833),
+               .driver_data = UNCORE_PCI_DEV_FULL_DATA(15, 0, KNL_PCI_UNCORE_EDC_UCLK, 0),
+       },
+       { /* EDC1 UClk */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833),
+               .driver_data = UNCORE_PCI_DEV_FULL_DATA(16, 0, KNL_PCI_UNCORE_EDC_UCLK, 1),
+       },
+       { /* EDC2 UClk */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833),
+               .driver_data = UNCORE_PCI_DEV_FULL_DATA(17, 0, KNL_PCI_UNCORE_EDC_UCLK, 2),
+       },
+       { /* EDC3 UClk */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833),
+               .driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 0, KNL_PCI_UNCORE_EDC_UCLK, 3),
         },
-       { /* EDC UClk */
+       { /* EDC4 UClk */
                 PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833),
-               .driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_EDC_UCLK, 0),
+               .driver_data = UNCORE_PCI_DEV_FULL_DATA(19, 0, KNL_PCI_UNCORE_EDC_UCLK, 4),
+       },
+       { /* EDC5 UClk */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833),
+               .driver_data = UNCORE_PCI_DEV_FULL_DATA(20, 0, KNL_PCI_UNCORE_EDC_UCLK, 5),
+       },
+       { /* EDC6 UClk */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833),
+               .driver_data = UNCORE_PCI_DEV_FULL_DATA(21, 0, KNL_PCI_UNCORE_EDC_UCLK, 6),
+       },
+       { /* EDC7 UClk */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833),
+               .driver_data = UNCORE_PCI_DEV_FULL_DATA(22, 0, KNL_PCI_UNCORE_EDC_UCLK, 7),
+       },
+       { /* EDC0 EClk */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835),
+               .driver_data = UNCORE_PCI_DEV_FULL_DATA(24, 2, KNL_PCI_UNCORE_EDC_ECLK, 0),
+       },
+       { /* EDC1 EClk */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835),
+               .driver_data = UNCORE_PCI_DEV_FULL_DATA(25, 2, KNL_PCI_UNCORE_EDC_ECLK, 1),
+       },
+       { /* EDC2 EClk */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835),
+               .driver_data = UNCORE_PCI_DEV_FULL_DATA(26, 2, KNL_PCI_UNCORE_EDC_ECLK, 2),
+       },
+       { /* EDC3 EClk */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835),
+               .driver_data = UNCORE_PCI_DEV_FULL_DATA(27, 2, KNL_PCI_UNCORE_EDC_ECLK, 3),
+       },
+       { /* EDC4 EClk */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835),
+               .driver_data = UNCORE_PCI_DEV_FULL_DATA(28, 2, KNL_PCI_UNCORE_EDC_ECLK, 4),
+       },
+       { /* EDC5 EClk */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835),
+               .driver_data = UNCORE_PCI_DEV_FULL_DATA(29, 2, KNL_PCI_UNCORE_EDC_ECLK, 5),
+       },
+       { /* EDC6 EClk */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835),
+               .driver_data = UNCORE_PCI_DEV_FULL_DATA(30, 2, KNL_PCI_UNCORE_EDC_ECLK, 6),
         },
-       { /* EDC EClk */
+       { /* EDC7 EClk */
                 PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835),
-               .driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_EDC_ECLK, 0),
+               .driver_data = UNCORE_PCI_DEV_FULL_DATA(31, 2, KNL_PCI_UNCORE_EDC_ECLK, 7),
         },
         { /* M2PCIe */
                 PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7817),
diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c

index 85ef3c2..50b3a05 100644 (file)
--- a/arch/x86/events/msr.c
+++ b/arch/x86/events/msr.c
@@ -1,4 +1,5 @@
  #include <linux/perf_event.h>
+#include <asm/intel-family.h>
  
  enum perf_msr_id {
         PERF_MSR_TSC                    = 0,
@@ -34,39 +35,43 @@ static bool test_intel(int idx)
                 return false;
  
         switch (boot_cpu_data.x86_model) {
-       case 30: /* 45nm Nehalem    */
-       case 26: /* 45nm Nehalem-EP */
-       case 46: /* 45nm Nehalem-EX */
-
-       case 37: /* 32nm Westmere    */
-       case 44: /* 32nm Westmere-EP */
-       case 47: /* 32nm Westmere-EX */
-
-       case 42: /* 32nm SandyBridge         */
-       case 45: /* 32nm SandyBridge-E/EN/EP */
-
-       case 58: /* 22nm IvyBridge       */
-       case 62: /* 22nm IvyBridge-EP/EX */
-
-       case 60: /* 22nm Haswell Core */
-       case 63: /* 22nm Haswell Server */
-       case 69: /* 22nm Haswell ULT */
-       case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */
-
-       case 61: /* 14nm Broadwell Core-M */
-       case 86: /* 14nm Broadwell Xeon D */
-       case 71: /* 14nm Broadwell + GT3e (Intel Iris Pro graphics) */
-       case 79: /* 14nm Broadwell Server */
-
-       case 55: /* 22nm Atom "Silvermont"                */
-       case 77: /* 22nm Atom "Silvermont Avoton/Rangely" */
-       case 76: /* 14nm Atom "Airmont"                   */
+       case INTEL_FAM6_NEHALEM:
+       case INTEL_FAM6_NEHALEM_EP:
+       case INTEL_FAM6_NEHALEM_EX:
+
+       case INTEL_FAM6_WESTMERE:
+       case INTEL_FAM6_WESTMERE2:
+       case INTEL_FAM6_WESTMERE_EP:
+       case INTEL_FAM6_WESTMERE_EX:
+
+       case INTEL_FAM6_SANDYBRIDGE:
+       case INTEL_FAM6_SANDYBRIDGE_X:
+
+       case INTEL_FAM6_IVYBRIDGE:
+       case INTEL_FAM6_IVYBRIDGE_X:
+
+       case INTEL_FAM6_HASWELL_CORE:
+       case INTEL_FAM6_HASWELL_X:
+       case INTEL_FAM6_HASWELL_ULT:
+       case INTEL_FAM6_HASWELL_GT3E:
+
+       case INTEL_FAM6_BROADWELL_CORE:
+       case INTEL_FAM6_BROADWELL_XEON_D:
+       case INTEL_FAM6_BROADWELL_GT3E:
+       case INTEL_FAM6_BROADWELL_X:
+
+       case INTEL_FAM6_ATOM_SILVERMONT1:
+       case INTEL_FAM6_ATOM_SILVERMONT2:
+       case INTEL_FAM6_ATOM_AIRMONT:
                 if (idx == PERF_MSR_SMI)
                         return true;
                 break;
  
-       case 78: /* 14nm Skylake Mobile */
-       case 94: /* 14nm Skylake Desktop */
+       case INTEL_FAM6_SKYLAKE_MOBILE:
+       case INTEL_FAM6_SKYLAKE_DESKTOP:
+       case INTEL_FAM6_SKYLAKE_X:
+       case INTEL_FAM6_KABYLAKE_MOBILE:
+       case INTEL_FAM6_KABYLAKE_DESKTOP:
                 if (idx == PERF_MSR_SMI || idx == PERF_MSR_PPERF)
                         return true;
                 break;
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h

index 8bd764d..e2d7285 100644 (file)
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -668,6 +668,14 @@ static struct perf_pmu_events_attr event_attr_##v = {                      \
         .event_str      = str,                                          \
  };
  
+#define EVENT_ATTR_STR_HT(_name, v, noht, ht)                          \
+static struct perf_pmu_events_ht_attr event_attr_##v = {               \
+       .attr           = __ATTR(_name, 0444, events_ht_sysfs_show, NULL),\
+       .id             = 0,                                            \
+       .event_str_noht = noht,                                         \
+       .event_str_ht   = ht,                                           \
+}
+
  extern struct x86_pmu x86_pmu __read_mostly;
  
  static inline bool x86_pmu_has_lbr_callstack(void)
@@ -803,6 +811,8 @@ struct attribute **merge_attr(struct attribute **a, struct attribute **b);
  
  ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr,
                           char *page);
+ssize_t events_ht_sysfs_show(struct device *dev, struct device_attribute *attr,
+                         char *page);
  
  #ifdef CONFIG_CPU_SUP_AMD
  
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h

index 7f991bd..e346572 100644 (file)
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -129,6 +129,14 @@ extern const struct cpumask *cpu_coregroup_mask(int cpu);
  
  extern unsigned int __max_logical_packages;
  #define topology_max_packages()                        (__max_logical_packages)
+
+extern int __max_smt_threads;
+
+static inline int topology_max_smt_threads(void)
+{
+       return __max_smt_threads;
+}
+
  int topology_update_package_map(unsigned int apicid, unsigned int cpu);
  extern int topology_phys_to_logical_pkg(unsigned int pkg);
  #else
@@ -136,6 +144,7 @@ extern int topology_phys_to_logical_pkg(unsigned int pkg);
  static inline int
  topology_update_package_map(unsigned int apicid, unsigned int cpu) { return 0; }
  static inline int topology_phys_to_logical_pkg(unsigned int pkg) { return 0; }
+static inline int topology_max_smt_threads(void) { return 1; }
  #endif
  
  static inline void arch_fix_phys_package_id(int num, u32 slot)
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c

index fafe8b9..2ed0ec1 100644 (file)
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -105,6 +105,9 @@ static unsigned int max_physical_pkg_id __read_mostly;
  unsigned int __max_logical_packages __read_mostly;
  EXPORT_SYMBOL(__max_logical_packages);
  
+/* Maximum number of SMT threads on any online core */
+int __max_smt_threads __read_mostly;
+
  static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip)
  {
         unsigned long flags;
@@ -493,7 +496,7 @@ void set_cpu_sibling_map(int cpu)
         bool has_mp = has_smt || boot_cpu_data.x86_max_cores > 1;
         struct cpuinfo_x86 *c = &cpu_data(cpu);
         struct cpuinfo_x86 *o;
-       int i;
+       int i, threads;
  
         cpumask_set_cpu(cpu, cpu_sibling_setup_mask);
  
@@ -550,6 +553,10 @@ void set_cpu_sibling_map(int cpu)
                 if (match_die(c, o) && !topology_same_node(c, o))
                         primarily_use_numa_for_topology();
         }
+
+       threads = cpumask_weight(topology_sibling_cpumask(cpu));
+       if (threads > __max_smt_threads)
+               __max_smt_threads = threads;
  }
  
  /* maps the cpu to the sched domain representing multi-core */
@@ -1441,6 +1448,21 @@ __init void prefill_possible_map(void)
  
  #ifdef CONFIG_HOTPLUG_CPU
  
+/* Recompute SMT state for all CPUs on offline */
+static void recompute_smt_state(void)
+{
+       int max_threads, cpu;
+
+       max_threads = 0;
+       for_each_online_cpu (cpu) {
+               int threads = cpumask_weight(topology_sibling_cpumask(cpu));
+
+               if (threads > max_threads)
+                       max_threads = threads;
+       }
+       __max_smt_threads = max_threads;
+}
+
  static void remove_siblinginfo(int cpu)
  {
         int sibling;
@@ -1465,6 +1487,7 @@ static void remove_siblinginfo(int cpu)
         c->phys_proc_id = 0;
         c->cpu_core_id = 0;
         cpumask_clear_cpu(cpu, cpu_sibling_setup_mask);
+       recompute_smt_state();
  }
  
  static void remove_cpu_from_maps(int cpu)
diff --git a/drivers/platform/x86/intel_pmc_core.c b/drivers/platform/x86/intel_pmc_core.c

index 2776bec..e57f923 100644 (file)
--- a/drivers/platform/x86/intel_pmc_core.c
+++ b/drivers/platform/x86/intel_pmc_core.c
@@ -26,6 +26,7 @@
  #include <linux/seq_file.h>
  
  #include <asm/cpu_device_id.h>
+#include <asm/intel-family.h>
  #include <asm/pmc_core.h>
  
  #include "intel_pmc_core.h"
@@ -138,10 +139,10 @@ static inline void pmc_core_dbgfs_unregister(struct pmc_dev *pmcdev)
  #endif /* CONFIG_DEBUG_FS */
  
  static const struct x86_cpu_id intel_pmc_core_ids[] = {
-       { X86_VENDOR_INTEL, 6, 0x4e, X86_FEATURE_MWAIT,
-               (kernel_ulong_t)NULL}, /* Skylake CPUID Signature */
-       { X86_VENDOR_INTEL, 6, 0x5e, X86_FEATURE_MWAIT,
-               (kernel_ulong_t)NULL}, /* Skylake CPUID Signature */
+       { X86_VENDOR_INTEL, 6, INTEL_FAM6_SKYLAKE_MOBILE, X86_FEATURE_MWAIT,
+               (kernel_ulong_t)NULL},
+       { X86_VENDOR_INTEL, 6, INTEL_FAM6_SKYLAKE_DESKTOP, X86_FEATURE_MWAIT,
+               (kernel_ulong_t)NULL},
         {}
  };
  
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h

index 1a827ce..7921f4f 100644 (file)
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -517,6 +517,11 @@ struct swevent_hlist {
  struct perf_cgroup;
  struct ring_buffer;
  
+struct pmu_event_list {
+       raw_spinlock_t          lock;
+       struct list_head        list;
+};
+
  /**
   * struct perf_event - performance event kernel representation:
   */
@@ -675,6 +680,7 @@ struct perf_event {
         int                             cgrp_defer_enabled;
  #endif
  
+       struct list_head                sb_list;
  #endif /* CONFIG_PERF_EVENTS */
  };
  
@@ -1074,7 +1080,7 @@ extern void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct
  extern struct perf_callchain_entry *
  get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user,
                    u32 max_stack, bool crosstask, bool add_mark);
-extern int get_callchain_buffers(void);
+extern int get_callchain_buffers(int max_stack);
  extern void put_callchain_buffers(void);
  
  extern int sysctl_perf_event_max_stack;
@@ -1326,6 +1332,13 @@ struct perf_pmu_events_attr {
         const char *event_str;
  };
  
+struct perf_pmu_events_ht_attr {
+       struct device_attribute                 attr;
+       u64                                     id;
+       const char                              *event_str_ht;
+       const char                              *event_str_noht;
+};
+
  ssize_t perf_event_sysfs_show(struct device *dev, struct device_attribute *attr,
                               char *page);
  
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h

index 36ce552..c66a485 100644 (file)
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -276,6 +276,9 @@ enum perf_event_read_format {
  
  /*
   * Hardware event_id to monitor via a performance monitoring event:
+ *
+ * @sample_max_stack: Max number of frame pointers in a callchain,
+ *                   should be < /proc/sys/kernel/perf_event_max_stack
   */
  struct perf_event_attr {
  
@@ -385,7 +388,8 @@ struct perf_event_attr {
          * Wakeup watermark for AUX area
          */
         __u32   aux_watermark;
-       __u32   __reserved_2;   /* align to __u64 */
+       __u16   sample_max_stack;
+       __u16   __reserved_2;   /* align to __u64 */
  };
  
  #define perf_flags(attr)       (*(&(attr)->read_format + 1))
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c

index 080a2df..bf4495f 100644 (file)
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -99,7 +99,7 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr)
         if (err)
                 goto free_smap;
  
-       err = get_callchain_buffers();
+       err = get_callchain_buffers(sysctl_perf_event_max_stack);
         if (err)
                 goto free_smap;
  
diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c

index 179ef46..e9fdb52 100644 (file)
--- a/kernel/events/callchain.c
+++ b/kernel/events/callchain.c
@@ -104,7 +104,7 @@ fail:
         return -ENOMEM;
  }
  
-int get_callchain_buffers(void)
+int get_callchain_buffers(int event_max_stack)
  {
         int err = 0;
         int count;
@@ -121,6 +121,15 @@ int get_callchain_buffers(void)
                 /* If the allocation failed, give up */
                 if (!callchain_cpus_entries)
                         err = -ENOMEM;
+               /*
+                * If requesting per event more than the global cap,
+                * return a different error to help userspace figure
+                * this out.
+                *
+                * And also do it here so that we have &callchain_mutex held.
+                */
+               if (event_max_stack > sysctl_perf_event_max_stack)
+                       err = -EOVERFLOW;
                 goto exit;
         }
  
@@ -174,11 +183,12 @@ perf_callchain(struct perf_event *event, struct pt_regs *regs)
         bool user   = !event->attr.exclude_callchain_user;
         /* Disallow cross-task user callchains. */
         bool crosstask = event->ctx->task && event->ctx->task != current;
+       const u32 max_stack = event->attr.sample_max_stack;
  
         if (!kernel && !user)
                 return NULL;
  
-       return get_perf_callchain(regs, 0, kernel, user, sysctl_perf_event_max_stack, crosstask, true);
+       return get_perf_callchain(regs, 0, kernel, user, max_stack, crosstask, true);
  }
  
  struct perf_callchain_entry *
diff --git a/kernel/events/core.c b/kernel/events/core.c

index 9c51ec3..9345028 100644 (file)
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -335,6 +335,7 @@ static atomic_t perf_sched_count;
  
  static DEFINE_PER_CPU(atomic_t, perf_cgroup_events);
  static DEFINE_PER_CPU(int, perf_sched_cb_usages);
+static DEFINE_PER_CPU(struct pmu_event_list, pmu_sb_events);
  
  static atomic_t nr_mmap_events __read_mostly;
  static atomic_t nr_comm_events __read_mostly;
@@ -396,6 +397,13 @@ int perf_proc_update_handler(struct ctl_table *table, int write,
         if (ret || !write)
                 return ret;
  
+       /*
+        * If throttling is disabled don't allow the write:
+        */
+       if (sysctl_perf_cpu_time_max_percent == 100 ||
+           sysctl_perf_cpu_time_max_percent == 0)
+               return -EINVAL;
+
         max_samples_per_tick = DIV_ROUND_UP(sysctl_perf_event_sample_rate, HZ);
         perf_sample_period_ns = NSEC_PER_SEC / sysctl_perf_event_sample_rate;
         update_perf_cpu_limits();
@@ -3665,6 +3673,39 @@ static void free_event_rcu(struct rcu_head *head)
  static void ring_buffer_attach(struct perf_event *event,
                                struct ring_buffer *rb);
  
+static void detach_sb_event(struct perf_event *event)
+{
+       struct pmu_event_list *pel = per_cpu_ptr(&pmu_sb_events, event->cpu);
+
+       raw_spin_lock(&pel->lock);
+       list_del_rcu(&event->sb_list);
+       raw_spin_unlock(&pel->lock);
+}
+
+static bool is_sb_event(struct perf_event *event)
+{
+       struct perf_event_attr *attr = &event->attr;
+
+       if (event->parent)
+               return false;
+
+       if (event->attach_state & PERF_ATTACH_TASK)
+               return false;
+
+       if (attr->mmap || attr->mmap_data || attr->mmap2 ||
+           attr->comm || attr->comm_exec ||
+           attr->task ||
+           attr->context_switch)
+               return true;
+       return false;
+}
+
+static void unaccount_pmu_sb_event(struct perf_event *event)
+{
+       if (is_sb_event(event))
+               detach_sb_event(event);
+}
+
  static void unaccount_event_cpu(struct perf_event *event, int cpu)
  {
         if (event->parent)
@@ -3728,6 +3769,8 @@ static void unaccount_event(struct perf_event *event)
         }
  
         unaccount_event_cpu(event, event->cpu);
+
+       unaccount_pmu_sb_event(event);
  }
  
  static void perf_sched_delayed(struct work_struct *work)
@@ -5854,11 +5897,11 @@ perf_event_read_event(struct perf_event *event,
         perf_output_end(&handle);
  }
  
-typedef void (perf_event_aux_output_cb)(struct perf_event *event, void *data);
+typedef void (perf_iterate_f)(struct perf_event *event, void *data);
  
  static void
-perf_event_aux_ctx(struct perf_event_context *ctx,
-                  perf_event_aux_output_cb output,
+perf_iterate_ctx(struct perf_event_context *ctx,
+                  perf_iterate_f output,
                    void *data, bool all)
  {
         struct perf_event *event;
@@ -5875,52 +5918,55 @@ perf_event_aux_ctx(struct perf_event_context *ctx,
         }
  }
  
-static void
-perf_event_aux_task_ctx(perf_event_aux_output_cb output, void *data,
-                       struct perf_event_context *task_ctx)
+static void perf_iterate_sb_cpu(perf_iterate_f output, void *data)
  {
-       rcu_read_lock();
-       preempt_disable();
-       perf_event_aux_ctx(task_ctx, output, data, false);
-       preempt_enable();
-       rcu_read_unlock();
+       struct pmu_event_list *pel = this_cpu_ptr(&pmu_sb_events);
+       struct perf_event *event;
+
+       list_for_each_entry_rcu(event, &pel->list, sb_list) {
+               if (event->state < PERF_EVENT_STATE_INACTIVE)
+                       continue;
+               if (!event_filter_match(event))
+                       continue;
+               output(event, data);
+       }
  }
  
+/*
+ * Iterate all events that need to receive side-band events.
+ *
+ * For new callers; ensure that account_pmu_sb_event() includes
+ * your event, otherwise it might not get delivered.
+ */
  static void
-perf_event_aux(perf_event_aux_output_cb output, void *data,
+perf_iterate_sb(perf_iterate_f output, void *data,
                struct perf_event_context *task_ctx)
  {
-       struct perf_cpu_context *cpuctx;
         struct perf_event_context *ctx;
-       struct pmu *pmu;
         int ctxn;
  
+       rcu_read_lock();
+       preempt_disable();
+
         /*
-        * If we have task_ctx != NULL we only notify
-        * the task context itself. The task_ctx is set
-        * only for EXIT events before releasing task
+        * If we have task_ctx != NULL we only notify the task context itself.
+        * The task_ctx is set only for EXIT events before releasing task
          * context.
          */
         if (task_ctx) {
-               perf_event_aux_task_ctx(output, data, task_ctx);
-               return;
+               perf_iterate_ctx(task_ctx, output, data, false);
+               goto done;
         }
  
-       rcu_read_lock();
-       list_for_each_entry_rcu(pmu, &pmus, entry) {
-               cpuctx = get_cpu_ptr(pmu->pmu_cpu_context);
-               if (cpuctx->unique_pmu != pmu)
-                       goto next;
-               perf_event_aux_ctx(&cpuctx->ctx, output, data, false);
-               ctxn = pmu->task_ctx_nr;
-               if (ctxn < 0)
-                       goto next;
+       perf_iterate_sb_cpu(output, data);
+
+       for_each_task_context_nr(ctxn) {
                 ctx = rcu_dereference(current->perf_event_ctxp[ctxn]);
                 if (ctx)
-                       perf_event_aux_ctx(ctx, output, data, false);
-next:
-               put_cpu_ptr(pmu->pmu_cpu_context);
+                       perf_iterate_ctx(ctx, output, data, false);
         }
+done:
+       preempt_enable();
         rcu_read_unlock();
  }
  
@@ -5969,7 +6015,7 @@ void perf_event_exec(void)
  
                 perf_event_enable_on_exec(ctxn);
  
-               perf_event_aux_ctx(ctx, perf_event_addr_filters_exec, NULL,
+               perf_iterate_ctx(ctx, perf_event_addr_filters_exec, NULL,
                                    true);
         }
         rcu_read_unlock();
@@ -6013,9 +6059,9 @@ static int __perf_pmu_output_stop(void *info)
         };
  
         rcu_read_lock();
-       perf_event_aux_ctx(&cpuctx->ctx, __perf_event_output_stop, &ro, false);
+       perf_iterate_ctx(&cpuctx->ctx, __perf_event_output_stop, &ro, false);
         if (cpuctx->task_ctx)
-               perf_event_aux_ctx(cpuctx->task_ctx, __perf_event_output_stop,
+               perf_iterate_ctx(cpuctx->task_ctx, __perf_event_output_stop,
                                    &ro, false);
         rcu_read_unlock();
  
@@ -6144,7 +6190,7 @@ static void perf_event_task(struct task_struct *task,
                 },
         };
  
-       perf_event_aux(perf_event_task_output,
+       perf_iterate_sb(perf_event_task_output,
                        &task_event,
                        task_ctx);
  }
@@ -6223,7 +6269,7 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event)
  
         comm_event->event_id.header.size = sizeof(comm_event->event_id) + size;
  
-       perf_event_aux(perf_event_comm_output,
+       perf_iterate_sb(perf_event_comm_output,
                        comm_event,
                        NULL);
  }
@@ -6454,7 +6500,7 @@ got_name:
  
         mmap_event->event_id.header.size = sizeof(mmap_event->event_id) + size;
  
-       perf_event_aux(perf_event_mmap_output,
+       perf_iterate_sb(perf_event_mmap_output,
                        mmap_event,
                        NULL);
  
@@ -6537,7 +6583,7 @@ static void perf_addr_filters_adjust(struct vm_area_struct *vma)
                 if (!ctx)
                         continue;
  
-               perf_event_aux_ctx(ctx, __perf_addr_filters_adjust, vma, true);
+               perf_iterate_ctx(ctx, __perf_addr_filters_adjust, vma, true);
         }
         rcu_read_unlock();
  }
@@ -6724,7 +6770,7 @@ static void perf_event_switch(struct task_struct *task,
                 },
         };
  
-       perf_event_aux(perf_event_switch_output,
+       perf_iterate_sb(perf_event_switch_output,
                        &switch_event,
                        NULL);
  }
@@ -8646,6 +8692,28 @@ unlock:
         return pmu;
  }
  
+static void attach_sb_event(struct perf_event *event)
+{
+       struct pmu_event_list *pel = per_cpu_ptr(&pmu_sb_events, event->cpu);
+
+       raw_spin_lock(&pel->lock);
+       list_add_rcu(&event->sb_list, &pel->list);
+       raw_spin_unlock(&pel->lock);
+}
+
+/*
+ * We keep a list of all !task (and therefore per-cpu) events
+ * that need to receive side-band records.
+ *
+ * This avoids having to scan all the various PMU per-cpu contexts
+ * looking for them.
+ */
+static void account_pmu_sb_event(struct perf_event *event)
+{
+       if (is_sb_event(event))
+               attach_sb_event(event);
+}
+
  static void account_event_cpu(struct perf_event *event, int cpu)
  {
         if (event->parent)
@@ -8726,6 +8794,8 @@ static void account_event(struct perf_event *event)
  enabled:
  
         account_event_cpu(event, event->cpu);
+
+       account_pmu_sb_event(event);
  }
  
  /*
@@ -8874,7 +8944,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
  
         if (!event->parent) {
                 if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) {
-                       err = get_callchain_buffers();
+                       err = get_callchain_buffers(attr->sample_max_stack);
                         if (err)
                                 goto err_addr_filters;
                 }
@@ -9196,6 +9266,9 @@ SYSCALL_DEFINE5(perf_event_open,
                         return -EINVAL;
         }
  
+       if (!attr.sample_max_stack)
+               attr.sample_max_stack = sysctl_perf_event_max_stack;
+
         /*
          * In cgroup mode, the pid argument is used to pass the fd
          * opened to the cgroup directory in cgroupfs. The cpu argument
@@ -9269,7 +9342,7 @@ SYSCALL_DEFINE5(perf_event_open,
  
         if (is_sampling_event(event)) {
                 if (event->pmu->capabilities & PERF_PMU_CAP_NO_INTERRUPT) {
-                       err = -ENOTSUPP;
+                       err = -EOPNOTSUPP;
                         goto err_alloc;
                 }
         }
@@ -10231,6 +10304,9 @@ static void __init perf_event_init_all_cpus(void)
                 swhash = &per_cpu(swevent_htable, cpu);
                 mutex_init(&swhash->hlist_mutex);
                 INIT_LIST_HEAD(&per_cpu(active_ctx_list, cpu));
+
+               INIT_LIST_HEAD(&per_cpu(pmu_sb_events.list, cpu));
+               raw_spin_lock_init(&per_cpu(pmu_sb_events.lock, cpu));
         }
  }
  
diff --git a/tools/lib/api/Makefile b/tools/lib/api/Makefile

index 316f308..67ff93e 100644 (file)
--- a/tools/lib/api/Makefile
+++ b/tools/lib/api/Makefile
@@ -10,6 +10,7 @@ endif
  
  CC = $(CROSS_COMPILE)gcc
  AR = $(CROSS_COMPILE)ar
+LD = $(CROSS_COMPILE)ld
  
  MAKEFLAGS += --no-print-directory
  
diff --git a/tools/lib/api/fd/array.c b/tools/lib/api/fd/array.c

index 0e636c4..b0a035f 100644 (file)
--- a/tools/lib/api/fd/array.c
+++ b/tools/lib/api/fd/array.c
@@ -85,7 +85,8 @@ int fdarray__add(struct fdarray *fda, int fd, short revents)
  }
  
  int fdarray__filter(struct fdarray *fda, short revents,
-                   void (*entry_destructor)(struct fdarray *fda, int fd))
+                   void (*entry_destructor)(struct fdarray *fda, int fd, void *arg),
+                   void *arg)
  {
         int fd, nr = 0;
  
@@ -95,7 +96,7 @@ int fdarray__filter(struct fdarray *fda, short revents,
         for (fd = 0; fd < fda->nr; ++fd) {
                 if (fda->entries[fd].revents & revents) {
                         if (entry_destructor)
-                               entry_destructor(fda, fd);
+                               entry_destructor(fda, fd, arg);
  
                         continue;
                 }
diff --git a/tools/lib/api/fd/array.h b/tools/lib/api/fd/array.h

index 45db018..e87fd80 100644 (file)
--- a/tools/lib/api/fd/array.h
+++ b/tools/lib/api/fd/array.h
@@ -34,7 +34,8 @@ void fdarray__delete(struct fdarray *fda);
  int fdarray__add(struct fdarray *fda, int fd, short revents);
  int fdarray__poll(struct fdarray *fda, int timeout);
  int fdarray__filter(struct fdarray *fda, short revents,
-                   void (*entry_destructor)(struct fdarray *fda, int fd));
+                   void (*entry_destructor)(struct fdarray *fda, int fd, void *arg),
+                   void *arg);
  int fdarray__grow(struct fdarray *fda, int extra);
  int fdarray__fprintf(struct fdarray *fda, FILE *fp);
  
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c

index 7e543c3..462e526 100644 (file)
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -1186,20 +1186,14 @@ bpf_object__next(struct bpf_object *prev)
         return next;
  }
  
-const char *
-bpf_object__get_name(struct bpf_object *obj)
+const char *bpf_object__name(struct bpf_object *obj)
  {
-       if (!obj)
-               return ERR_PTR(-EINVAL);
-       return obj->path;
+       return obj ? obj->path : ERR_PTR(-EINVAL);
  }
  
-unsigned int
-bpf_object__get_kversion(struct bpf_object *obj)
+unsigned int bpf_object__kversion(struct bpf_object *obj)
  {
-       if (!obj)
-               return 0;
-       return obj->kern_version;
+       return obj ? obj->kern_version : 0;
  }
  
  struct bpf_program *
@@ -1224,9 +1218,8 @@ bpf_program__next(struct bpf_program *prev, struct bpf_object *obj)
         return &obj->programs[idx];
  }
  
-int bpf_program__set_private(struct bpf_program *prog,
-                            void *priv,
-                            bpf_program_clear_priv_t clear_priv)
+int bpf_program__set_priv(struct bpf_program *prog, void *priv,
+                         bpf_program_clear_priv_t clear_priv)
  {
         if (prog->priv && prog->clear_priv)
                 prog->clear_priv(prog, prog->priv);
@@ -1236,10 +1229,9 @@ int bpf_program__set_private(struct bpf_program *prog,
         return 0;
  }
  
-int bpf_program__get_private(struct bpf_program *prog, void **ppriv)
+void *bpf_program__priv(struct bpf_program *prog)
  {
-       *ppriv = prog->priv;
-       return 0;
+       return prog ? prog->priv : ERR_PTR(-EINVAL);
  }
  
  const char *bpf_program__title(struct bpf_program *prog, bool needs_copy)
@@ -1311,32 +1303,23 @@ int bpf_program__nth_fd(struct bpf_program *prog, int n)
         return fd;
  }
  
-int bpf_map__get_fd(struct bpf_map *map)
+int bpf_map__fd(struct bpf_map *map)
  {
-       if (!map)
-               return -EINVAL;
-
-       return map->fd;
+       return map ? map->fd : -EINVAL;
  }
  
-int bpf_map__get_def(struct bpf_map *map, struct bpf_map_def *pdef)
+const struct bpf_map_def *bpf_map__def(struct bpf_map *map)
  {
-       if (!map || !pdef)
-               return -EINVAL;
-
-       *pdef = map->def;
-       return 0;
+       return map ? &map->def : ERR_PTR(-EINVAL);
  }
  
-const char *bpf_map__get_name(struct bpf_map *map)
+const char *bpf_map__name(struct bpf_map *map)
  {
-       if (!map)
-               return NULL;
-       return map->name;
+       return map ? map->name : NULL;
  }
  
-int bpf_map__set_private(struct bpf_map *map, void *priv,
-                        bpf_map_clear_priv_t clear_priv)
+int bpf_map__set_priv(struct bpf_map *map, void *priv,
+                    bpf_map_clear_priv_t clear_priv)
  {
         if (!map)
                 return -EINVAL;
@@ -1351,14 +1334,9 @@ int bpf_map__set_private(struct bpf_map *map, void *priv,
         return 0;
  }
  
-int bpf_map__get_private(struct bpf_map *map, void **ppriv)
+void *bpf_map__priv(struct bpf_map *map)
  {
-       if (!map)
-               return -EINVAL;
-
-       if (ppriv)
-               *ppriv = map->priv;
-       return 0;
+       return map ? map->priv : ERR_PTR(-EINVAL);
  }
  
  struct bpf_map *
@@ -1389,7 +1367,7 @@ bpf_map__next(struct bpf_map *prev, struct bpf_object *obj)
  }
  
  struct bpf_map *
-bpf_object__get_map_by_name(struct bpf_object *obj, const char *name)
+bpf_object__find_map_by_name(struct bpf_object *obj, const char *name)
  {
         struct bpf_map *pos;
  
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h

index a51594c..722f46b 100644 (file)
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -55,8 +55,8 @@ void bpf_object__close(struct bpf_object *object);
  /* Load/unload object into/from kernel */
  int bpf_object__load(struct bpf_object *obj);
  int bpf_object__unload(struct bpf_object *obj);
-const char *bpf_object__get_name(struct bpf_object *obj);
-unsigned int bpf_object__get_kversion(struct bpf_object *obj);
+const char *bpf_object__name(struct bpf_object *obj);
+unsigned int bpf_object__kversion(struct bpf_object *obj);
  
  struct bpf_object *bpf_object__next(struct bpf_object *prev);
  #define bpf_object__for_each_safe(pos, tmp)                    \
@@ -78,11 +78,10 @@ struct bpf_program *bpf_program__next(struct bpf_program *prog,
  typedef void (*bpf_program_clear_priv_t)(struct bpf_program *,
                                          void *);
  
-int bpf_program__set_private(struct bpf_program *prog, void *priv,
-                            bpf_program_clear_priv_t clear_priv);
+int bpf_program__set_priv(struct bpf_program *prog, void *priv,
+                         bpf_program_clear_priv_t clear_priv);
  
-int bpf_program__get_private(struct bpf_program *prog,
-                            void **ppriv);
+void *bpf_program__priv(struct bpf_program *prog);
  
  const char *bpf_program__title(struct bpf_program *prog, bool needs_copy);
  
@@ -171,7 +170,7 @@ struct bpf_map_def {
   */
  struct bpf_map;
  struct bpf_map *
-bpf_object__get_map_by_name(struct bpf_object *obj, const char *name);
+bpf_object__find_map_by_name(struct bpf_object *obj, const char *name);
  
  struct bpf_map *
  bpf_map__next(struct bpf_map *map, struct bpf_object *obj);
@@ -180,13 +179,13 @@ bpf_map__next(struct bpf_map *map, struct bpf_object *obj);
              (pos) != NULL;                             \
              (pos) = bpf_map__next((pos), (obj)))
  
-int bpf_map__get_fd(struct bpf_map *map);
-int bpf_map__get_def(struct bpf_map *map, struct bpf_map_def *pdef);
-const char *bpf_map__get_name(struct bpf_map *map);
+int bpf_map__fd(struct bpf_map *map);
+const struct bpf_map_def *bpf_map__def(struct bpf_map *map);
+const char *bpf_map__name(struct bpf_map *map);
  
  typedef void (*bpf_map_clear_priv_t)(struct bpf_map *, void *);
-int bpf_map__set_private(struct bpf_map *map, void *priv,
-                        bpf_map_clear_priv_t clear_priv);
-int bpf_map__get_private(struct bpf_map *map, void **ppriv);
+int bpf_map__set_priv(struct bpf_map *map, void *priv,
+                     bpf_map_clear_priv_t clear_priv);
+void *bpf_map__priv(struct bpf_map *map);
  
  #endif
diff --git a/tools/perf/.gitignore b/tools/perf/.gitignore

index 3d1bb80..3db3db9 100644 (file)
--- a/tools/perf/.gitignore
+++ b/tools/perf/.gitignore
@@ -30,3 +30,4 @@ config.mak.autogen
  *.pyo
  .config-detected
  util/intel-pt-decoder/inat-tables.c
+arch/*/include/generated/
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt

index 04f23b4..d96ccd4 100644 (file)
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -204,6 +204,38 @@ Aggregate counts per physical processor for system-wide mode measurements.
  --no-aggr::
  Do not aggregate counts across all monitored CPUs.
  
+--topdown::
+Print top down level 1 metrics if supported by the CPU. This allows to
+determine bottle necks in the CPU pipeline for CPU bound workloads,
+by breaking the cycles consumed down into frontend bound, backend bound,
+bad speculation and retiring.
+
+Frontend bound means that the CPU cannot fetch and decode instructions fast
+enough. Backend bound means that computation or memory access is the bottle
+neck. Bad Speculation means that the CPU wasted cycles due to branch
+mispredictions and similar issues. Retiring means that the CPU computed without
+an apparently bottleneck. The bottleneck is only the real bottleneck
+if the workload is actually bound by the CPU and not by something else.
+
+For best results it is usually a good idea to use it with interval
+mode like -I 1000, as the bottleneck of workloads can change often.
+
+The top down metrics are collected per core instead of per
+CPU thread. Per core mode is automatically enabled
+and -a (global monitoring) is needed, requiring root rights or
+perf.perf_event_paranoid=-1.
+
+Topdown uses the full Performance Monitoring Unit, and needs
+disabling of the NMI watchdog (as root):
+echo 0 > /proc/sys/kernel/nmi_watchdog
+for best results. Otherwise the bottlenecks may be inconsistent
+on workload with changing phases.
+
+This enables --metric-only, unless overriden with --no-metric-only.
+
+To interpret the results it is usually needed to know on which
+CPUs the workload runs on. If needed the CPUs can be forced using
+taskset.
  
  EXAMPLES
  --------
diff --git a/tools/perf/arch/arm/util/Build b/tools/perf/arch/arm/util/Build

index d22e3d0..f98da17 100644 (file)
--- a/tools/perf/arch/arm/util/Build
+++ b/tools/perf/arch/arm/util/Build
@@ -1,4 +1,4 @@
  libperf-$(CONFIG_DWARF) += dwarf-regs.o
  
-libperf-$(CONFIG_LIBUNWIND)          += unwind-libunwind.o
+libperf-$(CONFIG_LOCAL_LIBUNWIND)    += unwind-libunwind.o
  libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
diff --git a/tools/perf/arch/arm64/util/Build b/tools/perf/arch/arm64/util/Build

index e58123a..02f41db 100644 (file)
--- a/tools/perf/arch/arm64/util/Build
+++ b/tools/perf/arch/arm64/util/Build
@@ -1,2 +1,2 @@
  libperf-$(CONFIG_DWARF)     += dwarf-regs.o
-libperf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o
+libperf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o
diff --git a/tools/perf/arch/arm64/util/unwind-libunwind.c b/tools/perf/arch/arm64/util/unwind-libunwind.c

index a87afa9..c116b71 100644 (file)
--- a/tools/perf/arch/arm64/util/unwind-libunwind.c
+++ b/tools/perf/arch/arm64/util/unwind-libunwind.c
@@ -1,11 +1,13 @@
  
+#ifndef REMOTE_UNWIND_LIBUNWIND
  #include <errno.h>
  #include <libunwind.h>
  #include "perf_regs.h"
  #include "../../util/unwind.h"
  #include "../../util/debug.h"
+#endif
  
-int libunwind__arch_reg_id(int regnum)
+int LIBUNWIND__ARCH_REG_ID(int regnum)
  {
         switch (regnum) {
         case UNW_AARCH64_X0:
diff --git a/tools/perf/arch/common.c b/tools/perf/arch/common.c

index e83c8ce..fa090a9 100644 (file)
--- a/tools/perf/arch/common.c
+++ b/tools/perf/arch/common.c
@@ -102,7 +102,7 @@ static int lookup_triplets(const char *const *triplets, const char *name)
   * Return architecture name in a normalized form.
   * The conversion logic comes from the Makefile.
   */
-static const char *normalize_arch(char *arch)
+const char *normalize_arch(char *arch)
  {
         if (!strcmp(arch, "x86_64"))
                 return "x86";
diff --git a/tools/perf/arch/common.h b/tools/perf/arch/common.h

index 7529cfb..6b01c73 100644 (file)
--- a/tools/perf/arch/common.h
+++ b/tools/perf/arch/common.h
@@ -6,5 +6,6 @@
  extern const char *objdump_path;
  
  int perf_env__lookup_objdump(struct perf_env *env);
+const char *normalize_arch(char *arch);
  
  #endif /* ARCH_PERF_COMMON_H */
diff --git a/tools/perf/arch/x86/util/Build b/tools/perf/arch/x86/util/Build

index 4659703..f95e6f4 100644 (file)
--- a/tools/perf/arch/x86/util/Build
+++ b/tools/perf/arch/x86/util/Build
@@ -3,11 +3,12 @@ libperf-y += tsc.o
  libperf-y += pmu.o
  libperf-y += kvm-stat.o
  libperf-y += perf_regs.o
+libperf-y += group.o
  
  libperf-$(CONFIG_DWARF) += dwarf-regs.o
  libperf-$(CONFIG_BPF_PROLOGUE) += dwarf-regs.o
  
-libperf-$(CONFIG_LIBUNWIND)          += unwind-libunwind.o
+libperf-$(CONFIG_LOCAL_LIBUNWIND)    += unwind-libunwind.o
  libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
  
  libperf-$(CONFIG_AUXTRACE) += auxtrace.o
diff --git a/tools/perf/arch/x86/util/group.c b/tools/perf/arch/x86/util/group.c

new file mode 100644 (file)

index 0000000..37f92aa
--- /dev/null
+++ b/tools/perf/arch/x86/util/group.c
@@ -0,0 +1,27 @@
+#include <stdio.h>
+#include "api/fs/fs.h"
+#include "util/group.h"
+
+/*
+ * Check whether we can use a group for top down.
+ * Without a group may get bad results due to multiplexing.
+ */
+bool arch_topdown_check_group(bool *warn)
+{
+       int n;
+
+       if (sysctl__read_int("kernel/nmi_watchdog", &n) < 0)
+               return false;
+       if (n > 0) {
+               *warn = true;
+               return false;
+       }
+       return true;
+}
+
+void arch_topdown_group_warn(void)
+{
+       fprintf(stderr,
+               "nmi_watchdog enabled with topdown. May give wrong results.\n"
+               "Disable with echo 0 > /proc/sys/kernel/nmi_watchdog\n");
+}
diff --git a/tools/perf/arch/x86/util/tsc.c b/tools/perf/arch/x86/util/tsc.c

index 357f1b1..2e5567c 100644 (file)
--- a/tools/perf/arch/x86/util/tsc.c
+++ b/tools/perf/arch/x86/util/tsc.c
@@ -62,6 +62,8 @@ int perf_event__synth_time_conv(const struct perf_event_mmap_page *pc,
         struct perf_tsc_conversion tc;
         int err;
  
+       if (!pc)
+               return 0;
         err = perf_read_tsc_conversion(pc, &tc);
         if (err == -EOPNOTSUPP)
                 return 0;
diff --git a/tools/perf/arch/x86/util/unwind-libunwind.c b/tools/perf/arch/x86/util/unwind-libunwind.c

index db25e93..4f16661 100644 (file)
--- a/tools/perf/arch/x86/util/unwind-libunwind.c
+++ b/tools/perf/arch/x86/util/unwind-libunwind.c
@@ -1,12 +1,14 @@
  
+#ifndef REMOTE_UNWIND_LIBUNWIND
  #include <errno.h>
  #include <libunwind.h>
  #include "perf_regs.h"
  #include "../../util/unwind.h"
  #include "../../util/debug.h"
+#endif
  
  #ifdef HAVE_ARCH_X86_64_SUPPORT
-int libunwind__arch_reg_id(int regnum)
+int LIBUNWIND__ARCH_REG_ID(int regnum)
  {
         int id;
  
@@ -70,7 +72,7 @@ int libunwind__arch_reg_id(int regnum)
         return id;
  }
  #else
-int libunwind__arch_reg_id(int regnum)
+int LIBUNWIND__ARCH_REG_ID(int regnum)
  {
         int id;
  
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c

index dc3fcb5..d4cf1b0 100644 (file)
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -655,6 +655,13 @@ perf_event__synth_time_conv(const struct perf_event_mmap_page *pc __maybe_unused
         return 0;
  }
  
+static const struct perf_event_mmap_page *record__pick_pc(struct record *rec)
+{
+       if (rec->evlist && rec->evlist->mmap && rec->evlist->mmap[0].base)
+               return rec->evlist->mmap[0].base;
+       return NULL;
+}
+
  static int record__synthesize(struct record *rec)
  {
         struct perf_session *session = rec->session;
@@ -692,7 +699,7 @@ static int record__synthesize(struct record *rec)
                 }
         }
  
-       err = perf_event__synth_time_conv(rec->evlist->mmap[0].base, tool,
+       err = perf_event__synth_time_conv(record__pick_pc(rec), tool,
                                           process_synthesized_event, machine);
         if (err)
                 goto out;
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c

index e3ce2f3..4601123 100644 (file)
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -339,7 +339,7 @@ static void set_print_ip_opts(struct perf_event_attr *attr)
   */
  static int perf_session__check_output_opt(struct perf_session *session)
  {
-       int j;
+       unsigned int j;
         struct perf_evsel *evsel;
  
         for (j = 0; j < PERF_TYPE_MAX; ++j) {
@@ -388,17 +388,20 @@ static int perf_session__check_output_opt(struct perf_session *session)
                 struct perf_event_attr *attr;
  
                 j = PERF_TYPE_TRACEPOINT;
-               evsel = perf_session__find_first_evtype(session, j);
-               if (evsel == NULL)
-                       goto out;
  
-               attr = &evsel->attr;
+               evlist__for_each(session->evlist, evsel) {
+                       if (evsel->attr.type != j)
+                               continue;
+
+                       attr = &evsel->attr;
  
-               if (attr->sample_type & PERF_SAMPLE_CALLCHAIN) {
-                       output[j].fields |= PERF_OUTPUT_IP;
-                       output[j].fields |= PERF_OUTPUT_SYM;
-                       output[j].fields |= PERF_OUTPUT_DSO;
-                       set_print_ip_opts(attr);
+                       if (attr->sample_type & PERF_SAMPLE_CALLCHAIN) {
+                               output[j].fields |= PERF_OUTPUT_IP;
+                               output[j].fields |= PERF_OUTPUT_SYM;
+                               output[j].fields |= PERF_OUTPUT_DSO;
+                               set_print_ip_opts(attr);
+                               goto out;
+                       }
                 }
         }
  
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c

index ee7ada7..dff6373 100644 (file)
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -59,10 +59,13 @@
  #include "util/thread.h"
  #include "util/thread_map.h"
  #include "util/counts.h"
+#include "util/group.h"
  #include "util/session.h"
  #include "util/tool.h"
+#include "util/group.h"
  #include "asm/bug.h"
  
+#include <api/fs/fs.h>
  #include <stdlib.h>
  #include <sys/prctl.h>
  #include <locale.h>
@@ -98,6 +101,15 @@ static const char * transaction_limited_attrs = {
         "}"
  };
  
+static const char * topdown_attrs[] = {
+       "topdown-total-slots",
+       "topdown-slots-retired",
+       "topdown-recovery-bubbles",
+       "topdown-fetch-bubbles",
+       "topdown-slots-issued",
+       NULL,
+};
+
  static struct perf_evlist      *evsel_list;
  
  static struct target target = {
@@ -112,6 +124,7 @@ static volatile pid_t               child_pid                       = -1;
  static bool                    null_run                        =  false;
  static int                     detailed_run                    =  0;
  static bool                    transaction_run;
+static bool                    topdown_run                     = false;
  static bool                    big_num                         =  true;
  static int                     big_num_opt                     =  -1;
  static const char              *csv_sep                        = NULL;
@@ -124,6 +137,7 @@ static unsigned int         initial_delay                   = 0;
  static unsigned int            unit_width                      = 4; /* strlen("unit") */
  static bool                    forever                         = false;
  static bool                    metric_only                     = false;
+static bool                    force_metric_only               = false;
  static struct timespec         ref_time;
  static struct cpu_map          *aggr_map;
  static aggr_get_id_t           aggr_get_id;
@@ -1302,7 +1316,15 @@ static int aggr_header_lens[] = {
         [AGGR_GLOBAL] = 0,
  };
  
-static void print_metric_headers(char *prefix)
+static const char *aggr_header_csv[] = {
+       [AGGR_CORE]     =       "core,cpus,",
+       [AGGR_SOCKET]   =       "socket,cpus",
+       [AGGR_NONE]     =       "cpu,",
+       [AGGR_THREAD]   =       "comm-pid,",
+       [AGGR_GLOBAL]   =       ""
+};
+
+static void print_metric_headers(const char *prefix, bool no_indent)
  {
         struct perf_stat_output_ctx out;
         struct perf_evsel *counter;
@@ -1313,9 +1335,15 @@ static void print_metric_headers(char *prefix)
         if (prefix)
                 fprintf(stat_config.output, "%s", prefix);
  
-       if (!csv_output)
+       if (!csv_output && !no_indent)
                 fprintf(stat_config.output, "%*s",
                         aggr_header_lens[stat_config.aggr_mode], "");
+       if (csv_output) {
+               if (stat_config.interval)
+                       fputs("time,", stat_config.output);
+               fputs(aggr_header_csv[stat_config.aggr_mode],
+                       stat_config.output);
+       }
  
         /* Print metrics headers only */
         evlist__for_each(evsel_list, counter) {
@@ -1338,28 +1366,40 @@ static void print_interval(char *prefix, struct timespec *ts)
  
         sprintf(prefix, "%6lu.%09lu%s", ts->tv_sec, ts->tv_nsec, csv_sep);
  
-       if (num_print_interval == 0 && !csv_output && !metric_only) {
+       if (num_print_interval == 0 && !csv_output) {
                 switch (stat_config.aggr_mode) {
                 case AGGR_SOCKET:
-                       fprintf(output, "#           time socket cpus             counts %*s events\n", unit_width, "unit");
+                       fprintf(output, "#           time socket cpus");
+                       if (!metric_only)
+                               fprintf(output, "             counts %*s events\n", unit_width, "unit");
                         break;
                 case AGGR_CORE:
-                       fprintf(output, "#           time core         cpus             counts %*s events\n", unit_width, "unit");
+                       fprintf(output, "#           time core         cpus");
+                       if (!metric_only)
+                               fprintf(output, "             counts %*s events\n", unit_width, "unit");
                         break;
                 case AGGR_NONE:
-                       fprintf(output, "#           time CPU                counts %*s events\n", unit_width, "unit");
+                       fprintf(output, "#           time CPU");
+                       if (!metric_only)
+                               fprintf(output, "                counts %*s events\n", unit_width, "unit");
                         break;
                 case AGGR_THREAD:
-                       fprintf(output, "#           time             comm-pid                  counts %*s events\n", unit_width, "unit");
+                       fprintf(output, "#           time             comm-pid");
+                       if (!metric_only)
+                               fprintf(output, "                  counts %*s events\n", unit_width, "unit");
                         break;
                 case AGGR_GLOBAL:
                 default:
-                       fprintf(output, "#           time             counts %*s events\n", unit_width, "unit");
+                       fprintf(output, "#           time");
+                       if (!metric_only)
+                               fprintf(output, "             counts %*s events\n", unit_width, "unit");
                 case AGGR_UNSET:
                         break;
                 }
         }
  
+       if (num_print_interval == 0 && metric_only)
+               print_metric_headers(" ", true);
         if (++num_print_interval == 25)
                 num_print_interval = 0;
  }
@@ -1428,8 +1468,8 @@ static void print_counters(struct timespec *ts, int argc, const char **argv)
         if (metric_only) {
                 static int num_print_iv;
  
-               if (num_print_iv == 0)
-                       print_metric_headers(prefix);
+               if (num_print_iv == 0 && !interval)
+                       print_metric_headers(prefix, false);
                 if (num_print_iv++ == 25)
                         num_print_iv = 0;
                 if (stat_config.aggr_mode == AGGR_GLOBAL && prefix)
@@ -1520,6 +1560,14 @@ static int stat__set_big_num(const struct option *opt __maybe_unused,
         return 0;
  }
  
+static int enable_metric_only(const struct option *opt __maybe_unused,
+                             const char *s __maybe_unused, int unset)
+{
+       force_metric_only = true;
+       metric_only = !unset;
+       return 0;
+}
+
  static const struct option stat_options[] = {
         OPT_BOOLEAN('T', "transaction", &transaction_run,
                     "hardware transaction statistics"),
@@ -1578,8 +1626,10 @@ static const struct option stat_options[] = {
                      "aggregate counts per thread", AGGR_THREAD),
         OPT_UINTEGER('D', "delay", &initial_delay,
                      "ms to wait before starting measurement after program start"),
-       OPT_BOOLEAN(0, "metric-only", &metric_only,
-                       "Only print computed metrics. No raw values"),
+       OPT_CALLBACK_NOOPT(0, "metric-only", &metric_only, NULL,
+                       "Only print computed metrics. No raw values", enable_metric_only),
+       OPT_BOOLEAN(0, "topdown", &topdown_run,
+                       "measure topdown level 1 statistics"),
         OPT_END()
  };
  
@@ -1772,12 +1822,62 @@ static int perf_stat_init_aggr_mode_file(struct perf_stat *st)
         return 0;
  }
  
+static int topdown_filter_events(const char **attr, char **str, bool use_group)
+{
+       int off = 0;
+       int i;
+       int len = 0;
+       char *s;
+
+       for (i = 0; attr[i]; i++) {
+               if (pmu_have_event("cpu", attr[i])) {
+                       len += strlen(attr[i]) + 1;
+                       attr[i - off] = attr[i];
+               } else
+                       off++;
+       }
+       attr[i - off] = NULL;
+
+       *str = malloc(len + 1 + 2);
+       if (!*str)
+               return -1;
+       s = *str;
+       if (i - off == 0) {
+               *s = 0;
+               return 0;
+       }
+       if (use_group)
+               *s++ = '{';
+       for (i = 0; attr[i]; i++) {
+               strcpy(s, attr[i]);
+               s += strlen(s);
+               *s++ = ',';
+       }
+       if (use_group) {
+               s[-1] = '}';
+               *s = 0;
+       } else
+               s[-1] = 0;
+       return 0;
+}
+
+__weak bool arch_topdown_check_group(bool *warn)
+{
+       *warn = false;
+       return false;
+}
+
+__weak void arch_topdown_group_warn(void)
+{
+}
+
  /*
   * Add default attributes, if there were no attributes specified or
   * if -d/--detailed, -d -d or -d -d -d is used:
   */
  static int add_default_attributes(void)
  {
+       int err;
         struct perf_event_attr default_attrs0[] = {
  
    { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK             },
@@ -1896,7 +1996,6 @@ static int add_default_attributes(void)
                 return 0;
  
         if (transaction_run) {
-               int err;
                 if (pmu_have_event("cpu", "cycles-ct") &&
                     pmu_have_event("cpu", "el-start"))
                         err = parse_events(evsel_list, transaction_attrs, NULL);
@@ -1909,6 +2008,46 @@ static int add_default_attributes(void)
                 return 0;
         }
  
+       if (topdown_run) {
+               char *str = NULL;
+               bool warn = false;
+
+               if (stat_config.aggr_mode != AGGR_GLOBAL &&
+                   stat_config.aggr_mode != AGGR_CORE) {
+                       pr_err("top down event configuration requires --per-core mode\n");
+                       return -1;
+               }
+               stat_config.aggr_mode = AGGR_CORE;
+               if (nr_cgroups || !target__has_cpu(&target)) {
+                       pr_err("top down event configuration requires system-wide mode (-a)\n");
+                       return -1;
+               }
+
+               if (!force_metric_only)
+                       metric_only = true;
+               if (topdown_filter_events(topdown_attrs, &str,
+                               arch_topdown_check_group(&warn)) < 0) {
+                       pr_err("Out of memory\n");
+                       return -1;
+               }
+               if (topdown_attrs[0] && str) {
+                       if (warn)
+                               arch_topdown_group_warn();
+                       err = parse_events(evsel_list, str, NULL);
+                       if (err) {
+                               fprintf(stderr,
+                                       "Cannot set up top down events %s: %d\n",
+                                       str, err);
+                               free(str);
+                               return -1;
+                       }
+               } else {
+                       fprintf(stderr, "System does not support topdown\n");
+                       return -1;
+               }
+               free(str);
+       }
+
         if (!evsel_list->nr_entries) {
                 if (target__has_cpu(&target))
                         default_attrs0[0].config = PERF_COUNT_SW_CPU_CLOCK;
diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile

index 5ad0255..098874b 100644 (file)
--- a/tools/perf/config/Makefile
+++ b/tools/perf/config/Makefile
@@ -73,17 +73,25 @@ endif
  #
  #   make DEBUG=1 LIBUNWIND_DIR=/opt/libunwind/
  #
+
+libunwind_arch_set_flags = $(eval $(libunwind_arch_set_flags_code))
+define libunwind_arch_set_flags_code
+  FEATURE_CHECK_CFLAGS-libunwind-$(1)  = -I$(LIBUNWIND_DIR)/include
+  FEATURE_CHECK_LDFLAGS-libunwind-$(1) = -L$(LIBUNWIND_DIR)/lib
+endef
+
  ifdef LIBUNWIND_DIR
    LIBUNWIND_CFLAGS  = -I$(LIBUNWIND_DIR)/include
    LIBUNWIND_LDFLAGS = -L$(LIBUNWIND_DIR)/lib
+  LIBUNWIND_ARCHS = x86 x86_64 arm aarch64 debug-frame-arm debug-frame-aarch64
+  $(foreach libunwind_arch,$(LIBUNWIND_ARCHS),$(call libunwind_arch_set_flags,$(libunwind_arch)))
  endif
-LIBUNWIND_LDFLAGS += $(LIBUNWIND_LIBS)
  
  # Set per-feature check compilation flags
  FEATURE_CHECK_CFLAGS-libunwind = $(LIBUNWIND_CFLAGS)
-FEATURE_CHECK_LDFLAGS-libunwind = $(LIBUNWIND_LDFLAGS)
+FEATURE_CHECK_LDFLAGS-libunwind = $(LIBUNWIND_LDFLAGS) $(LIBUNWIND_LIBS)
  FEATURE_CHECK_CFLAGS-libunwind-debug-frame = $(LIBUNWIND_CFLAGS)
-FEATURE_CHECK_LDFLAGS-libunwind-debug-frame = $(LIBUNWIND_LDFLAGS)
+FEATURE_CHECK_LDFLAGS-libunwind-debug-frame = $(LIBUNWIND_LDFLAGS) $(LIBUNWIND_LIBS)
  
  ifeq ($(NO_PERF_REGS),0)
    CFLAGS += -DHAVE_PERF_REGS_SUPPORT
@@ -351,10 +359,40 @@ ifeq ($(ARCH),powerpc)
  endif
  
  ifndef NO_LIBUNWIND
+  have_libunwind :=
+
+  ifeq ($(feature-libunwind-x86), 1)
+    $(call detected,CONFIG_LIBUNWIND_X86)
+    CFLAGS += -DHAVE_LIBUNWIND_X86_SUPPORT
+    LDFLAGS += -lunwind-x86
+    have_libunwind = 1
+  endif
+
+  ifeq ($(feature-libunwind-aarch64), 1)
+    $(call detected,CONFIG_LIBUNWIND_AARCH64)
+    CFLAGS += -DHAVE_LIBUNWIND_AARCH64_SUPPORT
+    LDFLAGS += -lunwind-aarch64
+    have_libunwind = 1
+    $(call feature_check,libunwind-debug-frame-aarch64)
+    ifneq ($(feature-libunwind-debug-frame-aarch64), 1)
+      msg := $(warning No debug_frame support found in libunwind-aarch64);
+      CFLAGS += -DNO_LIBUNWIND_DEBUG_FRAME_AARCH64
+    endif
+  endif
+
    ifneq ($(feature-libunwind), 1)
      msg := $(warning No libunwind found. Please install libunwind-dev[el] >= 1.1 and/or set LIBUNWIND_DIR);
+    NO_LOCAL_LIBUNWIND := 1
+  else
+    have_libunwind := 1
+    $(call detected,CONFIG_LOCAL_LIBUNWIND)
+  endif
+
+  ifneq ($(have_libunwind), 1)
      NO_LIBUNWIND := 1
    endif
+else
+  NO_LOCAL_LIBUNWIND := 1
  endif
  
  ifndef NO_LIBBPF
@@ -392,7 +430,7 @@ else
    NO_DWARF_UNWIND := 1
  endif
  
-ifndef NO_LIBUNWIND
+ifndef NO_LOCAL_LIBUNWIND
    ifeq ($(ARCH),$(filter $(ARCH),arm arm64))
      $(call feature_check,libunwind-debug-frame)
      ifneq ($(feature-libunwind-debug-frame), 1)
@@ -403,8 +441,12 @@ ifndef NO_LIBUNWIND
      # non-ARM has no dwarf_find_debug_frame() function:
      CFLAGS += -DNO_LIBUNWIND_DEBUG_FRAME
    endif
-  CFLAGS  += -DHAVE_LIBUNWIND_SUPPORT
    EXTLIBS += $(LIBUNWIND_LIBS)
+  LDFLAGS += $(LIBUNWIND_LIBS)
+endif
+
+ifndef NO_LIBUNWIND
+  CFLAGS  += -DHAVE_LIBUNWIND_SUPPORT
    CFLAGS  += $(LIBUNWIND_CFLAGS)
    LDFLAGS += $(LIBUNWIND_LDFLAGS)
  endif
diff --git a/tools/perf/tests/fdarray.c b/tools/perf/tests/fdarray.c

index c809463..59dbd05 100644 (file)
--- a/tools/perf/tests/fdarray.c
+++ b/tools/perf/tests/fdarray.c
@@ -36,7 +36,7 @@ int test__fdarray__filter(int subtest __maybe_unused)
         }
  
         fdarray__init_revents(fda, POLLIN);
-       nr_fds = fdarray__filter(fda, POLLHUP, NULL);
+       nr_fds = fdarray__filter(fda, POLLHUP, NULL, NULL);
         if (nr_fds != fda->nr_alloc) {
                 pr_debug("\nfdarray__filter()=%d != %d shouldn't have filtered anything",
                          nr_fds, fda->nr_alloc);
@@ -44,7 +44,7 @@ int test__fdarray__filter(int subtest __maybe_unused)
         }
  
         fdarray__init_revents(fda, POLLHUP);
-       nr_fds = fdarray__filter(fda, POLLHUP, NULL);
+       nr_fds = fdarray__filter(fda, POLLHUP, NULL, NULL);
         if (nr_fds != 0) {
                 pr_debug("\nfdarray__filter()=%d != %d, should have filtered all fds",
                          nr_fds, fda->nr_alloc);
@@ -57,7 +57,7 @@ int test__fdarray__filter(int subtest __maybe_unused)
  
         pr_debug("\nfiltering all but fda->entries[2]:");
         fdarray__fprintf_prefix(fda, "before", stderr);
-       nr_fds = fdarray__filter(fda, POLLHUP, NULL);
+       nr_fds = fdarray__filter(fda, POLLHUP, NULL, NULL);
         fdarray__fprintf_prefix(fda, " after", stderr);
         if (nr_fds != 1) {
                 pr_debug("\nfdarray__filter()=%d != 1, should have left just one event", nr_fds);
@@ -78,7 +78,7 @@ int test__fdarray__filter(int subtest __maybe_unused)
  
         pr_debug("\nfiltering all but (fda->entries[0], fda->entries[3]):");
         fdarray__fprintf_prefix(fda, "before", stderr);
-       nr_fds = fdarray__filter(fda, POLLHUP, NULL);
+       nr_fds = fdarray__filter(fda, POLLHUP, NULL, NULL);
         fdarray__fprintf_prefix(fda, " after", stderr);
         if (nr_fds != 2) {
                 pr_debug("\nfdarray__filter()=%d != 2, should have left just two events",
diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c

index 7865f68..b2a2c74 100644 (file)
--- a/tools/perf/tests/parse-events.c
+++ b/tools/perf/tests/parse-events.c
@@ -1783,8 +1783,8 @@ static int test_pmu_events(void)
                 struct evlist_test e;
                 char name[MAX_NAME];
  
-               if (!strcmp(ent->d_name, ".") ||
-                   !strcmp(ent->d_name, ".."))
+               /* Names containing . are special and cannot be used directly */
+               if (strchr(ent->d_name, '.'))
                         continue;
  
                 snprintf(name, MAX_NAME, "cpu/event=%s/u", ent->d_name);
diff --git a/tools/perf/util/Build b/tools/perf/util/Build

index 8c6c8a0..fced833 100644 (file)
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -99,7 +99,10 @@ libperf-$(CONFIG_DWARF) += probe-finder.o
  libperf-$(CONFIG_DWARF) += dwarf-aux.o
  
  libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
+libperf-$(CONFIG_LOCAL_LIBUNWIND)    += unwind-libunwind-local.o
  libperf-$(CONFIG_LIBUNWIND)          += unwind-libunwind.o
+libperf-$(CONFIG_LIBUNWIND_X86)      += libunwind/x86_32.o
+libperf-$(CONFIG_LIBUNWIND_AARCH64)  += libunwind/arm64.o
  
  libperf-$(CONFIG_LIBBABELTRACE) += data-convert-bt.o
  
diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c

index 493307d..dcc8845 100644 (file)
--- a/tools/perf/util/bpf-loader.c
+++ b/tools/perf/util/bpf-loader.c
@@ -339,7 +339,7 @@ config_bpf_program(struct bpf_program *prog)
         }
         pr_debug("bpf: config '%s' is ok\n", config_str);
  
-       err = bpf_program__set_private(prog, priv, clear_prog_priv);
+       err = bpf_program__set_priv(prog, priv, clear_prog_priv);
         if (err) {
                 pr_debug("Failed to set priv for program '%s'\n", config_str);
                 goto errout;
@@ -380,15 +380,14 @@ preproc_gen_prologue(struct bpf_program *prog, int n,
                      struct bpf_insn *orig_insns, int orig_insns_cnt,
                      struct bpf_prog_prep_result *res)
  {
+       struct bpf_prog_priv *priv = bpf_program__priv(prog);
         struct probe_trace_event *tev;
         struct perf_probe_event *pev;
-       struct bpf_prog_priv *priv;
         struct bpf_insn *buf;
         size_t prologue_cnt = 0;
         int i, err;
  
-       err = bpf_program__get_private(prog, (void **)&priv);
-       if (err || !priv)
+       if (IS_ERR(priv) || !priv)
                 goto errout;
  
         pev = &priv->pev;
@@ -535,13 +534,12 @@ static int map_prologue(struct perf_probe_event *pev, int *mapping,
  
  static int hook_load_preprocessor(struct bpf_program *prog)
  {
+       struct bpf_prog_priv *priv = bpf_program__priv(prog);
         struct perf_probe_event *pev;
-       struct bpf_prog_priv *priv;
         bool need_prologue = false;
         int err, i;
  
-       err = bpf_program__get_private(prog, (void **)&priv);
-       if (err || !priv) {
+       if (IS_ERR(priv) || !priv) {
                 pr_debug("Internal error when hook preprocessor\n");
                 return -BPF_LOADER_ERRNO__INTERNAL;
         }
@@ -607,9 +605,11 @@ int bpf__probe(struct bpf_object *obj)
                 if (err)
                         goto out;
  
-               err = bpf_program__get_private(prog, (void **)&priv);
-               if (err || !priv)
+               priv = bpf_program__priv(prog);
+               if (IS_ERR(priv) || !priv) {
+                       err = PTR_ERR(priv);
                         goto out;
+               }
                 pev = &priv->pev;
  
                 err = convert_perf_probe_events(pev, 1);
@@ -645,13 +645,12 @@ int bpf__unprobe(struct bpf_object *obj)
  {
         int err, ret = 0;
         struct bpf_program *prog;
-       struct bpf_prog_priv *priv;
  
         bpf_object__for_each_program(prog, obj) {
+               struct bpf_prog_priv *priv = bpf_program__priv(prog);
                 int i;
  
-               err = bpf_program__get_private(prog, (void **)&priv);
-               if (err || !priv)
+               if (IS_ERR(priv) || !priv)
                         continue;
  
                 for (i = 0; i < priv->pev.ntevs; i++) {
@@ -702,14 +701,12 @@ int bpf__foreach_tev(struct bpf_object *obj,
         int err;
  
         bpf_object__for_each_program(prog, obj) {
+               struct bpf_prog_priv *priv = bpf_program__priv(prog);
                 struct probe_trace_event *tev;
                 struct perf_probe_event *pev;
-               struct bpf_prog_priv *priv;
                 int i, fd;
  
-               err = bpf_program__get_private(prog,
-                               (void **)&priv);
-               if (err || !priv) {
+               if (IS_ERR(priv) || !priv) {
                         pr_debug("bpf: failed to get private field\n");
                         return -BPF_LOADER_ERRNO__INTERNAL;
                 }
@@ -897,15 +894,12 @@ bpf_map_priv__clone(struct bpf_map_priv *priv)
  static int
  bpf_map__add_op(struct bpf_map *map, struct bpf_map_op *op)
  {
-       struct bpf_map_priv *priv;
-       const char *map_name;
-       int err;
+       const char *map_name = bpf_map__name(map);
+       struct bpf_map_priv *priv = bpf_map__priv(map);
  
-       map_name = bpf_map__get_name(map);
-       err = bpf_map__get_private(map, (void **)&priv);
-       if (err) {
+       if (IS_ERR(priv)) {
                 pr_debug("Failed to get private from map %s\n", map_name);
-               return err;
+               return PTR_ERR(priv);
         }
  
         if (!priv) {
@@ -916,7 +910,7 @@ bpf_map__add_op(struct bpf_map *map, struct bpf_map_op *op)
                 }
                 INIT_LIST_HEAD(&priv->ops_list);
  
-               if (bpf_map__set_private(map, priv, bpf_map_priv__clear)) {
+               if (bpf_map__set_priv(map, priv, bpf_map_priv__clear)) {
                         free(priv);
                         return -BPF_LOADER_ERRNO__INTERNAL;
                 }
@@ -948,30 +942,26 @@ static int
  __bpf_map__config_value(struct bpf_map *map,
                         struct parse_events_term *term)
  {
-       struct bpf_map_def def;
         struct bpf_map_op *op;
-       const char *map_name;
-       int err;
+       const char *map_name = bpf_map__name(map);
+       const struct bpf_map_def *def = bpf_map__def(map);
  
-       map_name = bpf_map__get_name(map);
-
-       err = bpf_map__get_def(map, &def);
-       if (err) {
+       if (IS_ERR(def)) {
                 pr_debug("Unable to get map definition from '%s'\n",
                          map_name);
                 return -BPF_LOADER_ERRNO__INTERNAL;
         }
  
-       if (def.type != BPF_MAP_TYPE_ARRAY) {
+       if (def->type != BPF_MAP_TYPE_ARRAY) {
                 pr_debug("Map %s type is not BPF_MAP_TYPE_ARRAY\n",
                          map_name);
                 return -BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE;
         }
-       if (def.key_size < sizeof(unsigned int)) {
+       if (def->key_size < sizeof(unsigned int)) {
                 pr_debug("Map %s has incorrect key size\n", map_name);
                 return -BPF_LOADER_ERRNO__OBJCONF_MAP_KEYSIZE;
         }
-       switch (def.value_size) {
+       switch (def->value_size) {
         case 1:
         case 2:
         case 4:
@@ -1014,12 +1004,10 @@ __bpf_map__config_event(struct bpf_map *map,
                         struct perf_evlist *evlist)
  {
         struct perf_evsel *evsel;
-       struct bpf_map_def def;
+       const struct bpf_map_def *def;
         struct bpf_map_op *op;
-       const char *map_name;
-       int err;
+       const char *map_name = bpf_map__name(map);
  
-       map_name = bpf_map__get_name(map);
         evsel = perf_evlist__find_evsel_by_str(evlist, term->val.str);
         if (!evsel) {
                 pr_debug("Event (for '%s') '%s' doesn't exist\n",
@@ -1027,18 +1015,18 @@ __bpf_map__config_event(struct bpf_map *map,
                 return -BPF_LOADER_ERRNO__OBJCONF_MAP_NOEVT;
         }
  
-       err = bpf_map__get_def(map, &def);
-       if (err) {
+       def = bpf_map__def(map);
+       if (IS_ERR(def)) {
                 pr_debug("Unable to get map definition from '%s'\n",
                          map_name);
-               return err;
+               return PTR_ERR(def);
         }
  
         /*
          * No need to check key_size and value_size:
          * kernel has already checked them.
          */
-       if (def.type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) {
+       if (def->type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) {
                 pr_debug("Map %s type is not BPF_MAP_TYPE_PERF_EVENT_ARRAY\n",
                          map_name);
                 return -BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE;
@@ -1087,9 +1075,8 @@ config_map_indices_range_check(struct parse_events_term *term,
                                const char *map_name)
  {
         struct parse_events_array *array = &term->array;
-       struct bpf_map_def def;
+       const struct bpf_map_def *def;
         unsigned int i;
-       int err;
  
         if (!array->nr_ranges)
                 return 0;
@@ -1099,8 +1086,8 @@ config_map_indices_range_check(struct parse_events_term *term,
                 return -BPF_LOADER_ERRNO__INTERNAL;
         }
  
-       err = bpf_map__get_def(map, &def);
-       if (err) {
+       def = bpf_map__def(map);
+       if (IS_ERR(def)) {
                 pr_debug("ERROR: Unable to get map definition from '%s'\n",
                          map_name);
                 return -BPF_LOADER_ERRNO__INTERNAL;
@@ -1111,7 +1098,7 @@ config_map_indices_range_check(struct parse_events_term *term,
                 size_t length = array->ranges[i].length;
                 unsigned int idx = start + length - 1;
  
-               if (idx >= def.max_entries) {
+               if (idx >= def->max_entries) {
                         pr_debug("ERROR: index %d too large\n", idx);
                         return -BPF_LOADER_ERRNO__OBJCONF_MAP_IDX2BIG;
                 }
@@ -1147,7 +1134,7 @@ bpf__obj_config_map(struct bpf_object *obj,
                 goto out;
         }
  
-       map = bpf_object__get_map_by_name(obj, map_name);
+       map = bpf_object__find_map_by_name(obj, map_name);
         if (!map) {
                 pr_debug("ERROR: Map %s doesn't exist\n", map_name);
                 err = -BPF_LOADER_ERRNO__OBJCONF_MAP_NOTEXIST;
@@ -1204,14 +1191,14 @@ out:
  }
  
  typedef int (*map_config_func_t)(const char *name, int map_fd,
-                                struct bpf_map_def *pdef,
+                                const struct bpf_map_def *pdef,
                                  struct bpf_map_op *op,
                                  void *pkey, void *arg);
  
  static int
  foreach_key_array_all(map_config_func_t func,
                       void *arg, const char *name,
-                     int map_fd, struct bpf_map_def *pdef,
+                     int map_fd, const struct bpf_map_def *pdef,
                       struct bpf_map_op *op)
  {
         unsigned int i;
@@ -1231,7 +1218,7 @@ foreach_key_array_all(map_config_func_t func,
  static int
  foreach_key_array_ranges(map_config_func_t func, void *arg,
                          const char *name, int map_fd,
-                        struct bpf_map_def *pdef,
+                        const struct bpf_map_def *pdef,
                          struct bpf_map_op *op)
  {
         unsigned int i, j;
@@ -1261,15 +1248,12 @@ bpf_map_config_foreach_key(struct bpf_map *map,
                            void *arg)
  {
         int err, map_fd;
-       const char *name;
         struct bpf_map_op *op;
-       struct bpf_map_def def;
-       struct bpf_map_priv *priv;
+       const struct bpf_map_def *def;
+       const char *name = bpf_map__name(map);
+       struct bpf_map_priv *priv = bpf_map__priv(map);
  
-       name = bpf_map__get_name(map);
-
-       err = bpf_map__get_private(map, (void **)&priv);
-       if (err) {
+       if (IS_ERR(priv)) {
                 pr_debug("ERROR: failed to get private from map %s\n", name);
                 return -BPF_LOADER_ERRNO__INTERNAL;
         }
@@ -1278,29 +1262,29 @@ bpf_map_config_foreach_key(struct bpf_map *map,
                 return 0;
         }
  
-       err = bpf_map__get_def(map, &def);
-       if (err) {
+       def = bpf_map__def(map);
+       if (IS_ERR(def)) {
                 pr_debug("ERROR: failed to get definition from map %s\n", name);
                 return -BPF_LOADER_ERRNO__INTERNAL;
         }
-       map_fd = bpf_map__get_fd(map);
+       map_fd = bpf_map__fd(map);
         if (map_fd < 0) {
                 pr_debug("ERROR: failed to get fd from map %s\n", name);
                 return map_fd;
         }
  
         list_for_each_entry(op, &priv->ops_list, list) {
-               switch (def.type) {
+               switch (def->type) {
                 case BPF_MAP_TYPE_ARRAY:
                 case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
                         switch (op->key_type) {
                         case BPF_MAP_KEY_ALL:
                                 err = foreach_key_array_all(func, arg, name,
-                                                           map_fd, &def, op);
+                                                           map_fd, def, op);
                                 break;
                         case BPF_MAP_KEY_RANGES:
                                 err = foreach_key_array_ranges(func, arg, name,
-                                                              map_fd, &def,
+                                                              map_fd, def,
                                                                op);
                                 break;
                         default:
@@ -1410,7 +1394,7 @@ apply_config_evsel_for_key(const char *name, int map_fd, void *pkey,
  
  static int
  apply_obj_config_map_for_key(const char *name, int map_fd,
-                            struct bpf_map_def *pdef __maybe_unused,
+                            const struct bpf_map_def *pdef,
                              struct bpf_map_op *op,
                              void *pkey, void *arg __maybe_unused)
  {
@@ -1475,9 +1459,9 @@ int bpf__apply_obj_config(void)
  
  #define bpf__for_each_stdout_map(pos, obj, objtmp)     \
         bpf__for_each_map(pos, obj, objtmp)             \
-               if (bpf_map__get_name(pos) &&           \
+               if (bpf_map__name(pos) &&               \
                         (strcmp("__bpf_stdout__",       \
-                               bpf_map__get_name(pos)) == 0))
+                               bpf_map__name(pos)) == 0))
  
  int bpf__setup_stdout(struct perf_evlist *evlist __maybe_unused)
  {
@@ -1489,10 +1473,9 @@ int bpf__setup_stdout(struct perf_evlist *evlist __maybe_unused)
         bool need_init = false;
  
         bpf__for_each_stdout_map(map, obj, tmp) {
-               struct bpf_map_priv *priv;
+               struct bpf_map_priv *priv = bpf_map__priv(map);
  
-               err = bpf_map__get_private(map, (void **)&priv);
-               if (err)
+               if (IS_ERR(priv))
                         return -BPF_LOADER_ERRNO__INTERNAL;
  
                 /*
@@ -1520,10 +1503,9 @@ int bpf__setup_stdout(struct perf_evlist *evlist __maybe_unused)
         }
  
         bpf__for_each_stdout_map(map, obj, tmp) {
-               struct bpf_map_priv *priv;
+               struct bpf_map_priv *priv = bpf_map__priv(map);
  
-               err = bpf_map__get_private(map, (void **)&priv);
-               if (err)
+               if (IS_ERR(priv))
                         return -BPF_LOADER_ERRNO__INTERNAL;
                 if (priv)
                         continue;
@@ -1533,7 +1515,7 @@ int bpf__setup_stdout(struct perf_evlist *evlist __maybe_unused)
                         if (!priv)
                                 return -ENOMEM;
  
-                       err = bpf_map__set_private(map, priv, bpf_map_priv__clear);
+                       err = bpf_map__set_priv(map, priv, bpf_map_priv__clear);
                         if (err) {
                                 bpf_map_priv__clear(map, priv);
                                 return err;
@@ -1677,7 +1659,7 @@ int bpf__strerror_load(struct bpf_object *obj,
  {
         bpf__strerror_head(err, buf, size);
         case LIBBPF_ERRNO__KVER: {
-               unsigned int obj_kver = bpf_object__get_kversion(obj);
+               unsigned int obj_kver = bpf_object__kversion(obj);
                 unsigned int real_kver;
  
                 if (fetch_kernel_version(&real_kver, NULL, 0)) {
diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c

index 67e5966..20aef90 100644 (file)
--- a/tools/perf/util/build-id.c
+++ b/tools/perf/util/build-id.c
@@ -144,7 +144,29 @@ static int asnprintf(char **strp, size_t size, const char *fmt, ...)
         return ret;
  }
  
-static char *build_id__filename(const char *sbuild_id, char *bf, size_t size)
+char *build_id_cache__kallsyms_path(const char *sbuild_id, char *bf,
+                                   size_t size)
+{
+       bool retry_old = true;
+
+       snprintf(bf, size, "%s/%s/%s/kallsyms",
+                buildid_dir, DSO__NAME_KALLSYMS, sbuild_id);
+retry:
+       if (!access(bf, F_OK))
+               return bf;
+       if (retry_old) {
+               /* Try old style kallsyms cache */
+               snprintf(bf, size, "%s/%s/%s",
+                        buildid_dir, DSO__NAME_KALLSYMS, sbuild_id);
+               retry_old = false;
+               goto retry;
+       }
+
+       return NULL;
+}
+
+static char *build_id_cache__linkname(const char *sbuild_id, char *bf,
+                                     size_t size)
  {
         char *tmp = bf;
         int ret = asnprintf(&bf, size, "%s/.build-id/%.2s/%s", buildid_dir,
@@ -154,23 +176,52 @@ static char *build_id__filename(const char *sbuild_id, char *bf, size_t size)
         return bf;
  }
  
+static const char *build_id_cache__basename(bool is_kallsyms, bool is_vdso)
+{
+       return is_kallsyms ? "kallsyms" : (is_vdso ? "vdso" : "elf");
+}
+
  char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size)
  {
-       char build_id_hex[SBUILD_ID_SIZE];
+       bool is_kallsyms = dso__is_kallsyms((struct dso *)dso);
+       bool is_vdso = dso__is_vdso((struct dso *)dso);
+       char sbuild_id[SBUILD_ID_SIZE];
+       char *linkname;
+       bool alloc = (bf == NULL);
+       int ret;
  
         if (!dso->has_build_id)
                 return NULL;
  
-       build_id__sprintf(dso->build_id, sizeof(dso->build_id), build_id_hex);
-       return build_id__filename(build_id_hex, bf, size);
+       build_id__sprintf(dso->build_id, sizeof(dso->build_id), sbuild_id);
+       linkname = build_id_cache__linkname(sbuild_id, NULL, 0);
+       if (!linkname)
+               return NULL;
+
+       /* Check if old style build_id cache */
+       if (is_regular_file(linkname))
+               ret = asnprintf(&bf, size, "%s", linkname);
+       else
+               ret = asnprintf(&bf, size, "%s/%s", linkname,
+                        build_id_cache__basename(is_kallsyms, is_vdso));
+       if (ret < 0 || (!alloc && size < (unsigned int)ret))
+               bf = NULL;
+       free(linkname);
+
+       return bf;
  }
  
  bool dso__build_id_is_kmod(const struct dso *dso, char *bf, size_t size)
  {
-       char *id_name, *ch;
+       char *id_name = NULL, *ch;
         struct stat sb;
+       char sbuild_id[SBUILD_ID_SIZE];
+
+       if (!dso->has_build_id)
+               goto err;
  
-       id_name = dso__build_id_filename(dso, bf, size);
+       build_id__sprintf(dso->build_id, sizeof(dso->build_id), sbuild_id);
+       id_name = build_id_cache__linkname(sbuild_id, NULL, 0);
         if (!id_name)
                 goto err;
         if (access(id_name, F_OK))
@@ -194,18 +245,14 @@ bool dso__build_id_is_kmod(const struct dso *dso, char *bf, size_t size)
         if (ch - 3 < bf)
                 goto err;
  
+       free(id_name);
         return strncmp(".ko", ch - 3, 3) == 0;
  err:
-       /*
-        * If dso__build_id_filename work, get id_name again,
-        * because id_name points to bf and is broken.
-        */
-       if (id_name)
-               id_name = dso__build_id_filename(dso, bf, size);
         pr_err("Invalid build id: %s\n", id_name ? :
                                          dso->long_name ? :
                                          dso->short_name ? :
                                          "[unknown]");
+       free(id_name);
         return false;
  }
  
@@ -341,7 +388,8 @@ void disable_buildid_cache(void)
  }
  
  static char *build_id_cache__dirname_from_path(const char *name,
-                                              bool is_kallsyms, bool is_vdso)
+                                              bool is_kallsyms, bool is_vdso,
+                                              const char *sbuild_id)
  {
         char *realname = (char *)name, *filename;
         bool slash = is_kallsyms || is_vdso;
@@ -352,8 +400,9 @@ static char *build_id_cache__dirname_from_path(const char *name,
                         return NULL;
         }
  
-       if (asprintf(&filename, "%s%s%s", buildid_dir, slash ? "/" : "",
-                    is_vdso ? DSO__NAME_VDSO : realname) < 0)
+       if (asprintf(&filename, "%s%s%s%s%s", buildid_dir, slash ? "/" : "",
+                    is_vdso ? DSO__NAME_VDSO : realname,
+                    sbuild_id ? "/" : "", sbuild_id ?: "") < 0)
                 filename = NULL;
  
         if (!slash)
@@ -368,7 +417,8 @@ int build_id_cache__list_build_ids(const char *pathname,
         char *dir_name;
         int ret = 0;
  
-       dir_name = build_id_cache__dirname_from_path(pathname, false, false);
+       dir_name = build_id_cache__dirname_from_path(pathname, false, false,
+                                                    NULL);
         if (!dir_name)
                 return -ENOMEM;
  
@@ -385,7 +435,7 @@ int build_id_cache__add_s(const char *sbuild_id, const char *name,
  {
         const size_t size = PATH_MAX;
         char *realname = NULL, *filename = NULL, *dir_name = NULL,
-            *linkname = zalloc(size), *targetname, *tmp;
+            *linkname = zalloc(size), *tmp;
         int err = -1;
  
         if (!is_kallsyms) {
@@ -394,14 +444,22 @@ int build_id_cache__add_s(const char *sbuild_id, const char *name,
                         goto out_free;
         }
  
-       dir_name = build_id_cache__dirname_from_path(name, is_kallsyms, is_vdso);
+       dir_name = build_id_cache__dirname_from_path(name, is_kallsyms,
+                                                    is_vdso, sbuild_id);
         if (!dir_name)
                 goto out_free;
  
+       /* Remove old style build-id cache */
+       if (is_regular_file(dir_name))
+               if (unlink(dir_name))
+                       goto out_free;
+
         if (mkdir_p(dir_name, 0755))
                 goto out_free;
  
-       if (asprintf(&filename, "%s/%s", dir_name, sbuild_id) < 0) {
+       /* Save the allocated buildid dirname */
+       if (asprintf(&filename, "%s/%s", dir_name,
+                    build_id_cache__basename(is_kallsyms, is_vdso)) < 0) {
                 filename = NULL;
                 goto out_free;
         }
@@ -415,7 +473,7 @@ int build_id_cache__add_s(const char *sbuild_id, const char *name,
                         goto out_free;
         }
  
-       if (!build_id__filename(sbuild_id, linkname, size))
+       if (!build_id_cache__linkname(sbuild_id, linkname, size))
                 goto out_free;
         tmp = strrchr(linkname, '/');
         *tmp = '\0';
@@ -424,10 +482,10 @@ int build_id_cache__add_s(const char *sbuild_id, const char *name,
                 goto out_free;
  
         *tmp = '/';
-       targetname = filename + strlen(buildid_dir) - 5;
-       memcpy(targetname, "../..", 5);
+       tmp = dir_name + strlen(buildid_dir) - 5;
+       memcpy(tmp, "../..", 5);
  
-       if (symlink(targetname, linkname) == 0)
+       if (symlink(tmp, linkname) == 0)
                 err = 0;
  out_free:
         if (!is_kallsyms)
@@ -452,7 +510,7 @@ static int build_id_cache__add_b(const u8 *build_id, size_t build_id_size,
  bool build_id_cache__cached(const char *sbuild_id)
  {
         bool ret = false;
-       char *filename = build_id__filename(sbuild_id, NULL, 0);
+       char *filename = build_id_cache__linkname(sbuild_id, NULL, 0);
  
         if (filename && !access(filename, F_OK))
                 ret = true;
@@ -471,7 +529,7 @@ int build_id_cache__remove_s(const char *sbuild_id)
         if (filename == NULL || linkname == NULL)
                 goto out_free;
  
-       if (!build_id__filename(sbuild_id, linkname, size))
+       if (!build_id_cache__linkname(sbuild_id, linkname, size))
                 goto out_free;
  
         if (access(linkname, F_OK))
@@ -489,7 +547,7 @@ int build_id_cache__remove_s(const char *sbuild_id)
         tmp = strrchr(linkname, '/') + 1;
         snprintf(tmp, size - (tmp - linkname), "%s", filename);
  
-       if (unlink(linkname))
+       if (rm_rf(linkname))
                 goto out_free;
  
         err = 0;
@@ -501,7 +559,7 @@ out_free:
  
  static int dso__cache_build_id(struct dso *dso, struct machine *machine)
  {
-       bool is_kallsyms = dso->kernel && dso->long_name[0] != '/';
+       bool is_kallsyms = dso__is_kallsyms(dso);
         bool is_vdso = dso__is_vdso(dso);
         const char *name = dso->long_name;
         char nm[PATH_MAX];
diff --git a/tools/perf/util/build-id.h b/tools/perf/util/build-id.h

index 64af3e2..e5435f4 100644 (file)
--- a/tools/perf/util/build-id.h
+++ b/tools/perf/util/build-id.h
@@ -14,6 +14,8 @@ struct dso;
  int build_id__sprintf(const u8 *build_id, int len, char *bf);
  int sysfs__sprintf_build_id(const char *root_dir, char *sbuild_id);
  int filename__sprintf_build_id(const char *pathname, char *sbuild_id);
+char *build_id_cache__kallsyms_path(const char *sbuild_id, char *bf,
+                                   size_t size);
  
  char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size);
  bool dso__build_id_is_kmod(const struct dso *dso, char *bf, size_t size);
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h

index 65e2a4f..a70f6b5 100644 (file)
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -94,6 +94,7 @@ struct callchain_param {
         enum perf_call_graph_mode record_mode;
         u32                     dump_size;
         enum chain_mode         mode;
+       u16                     max_stack;
         u32                     print_limit;
         double                  min_percent;
         sort_chain_func_t       sort;
diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c

index dad7d82..8749eca 100644 (file)
--- a/tools/perf/util/config.c
+++ b/tools/perf/util/config.c
@@ -275,7 +275,8 @@ static int perf_parse_file(config_fn_t fn, void *data)
                         break;
                 }
         }
-       die("bad config file line %d in %s", config_linenr, config_file_name);
+       pr_err("bad config file line %d in %s\n", config_linenr, config_file_name);
+       return -1;
  }
  
  static int parse_unit_factor(const char *end, unsigned long *val)
@@ -479,16 +480,15 @@ static int perf_config_global(void)
  
  int perf_config(config_fn_t fn, void *data)
  {
-       int ret = 0, found = 0;
+       int ret = -1;
         const char *home = NULL;
  
         /* Setting $PERF_CONFIG makes perf read _only_ the given config file. */
         if (config_exclusive_filename)
                 return perf_config_from_file(fn, config_exclusive_filename, data);
         if (perf_config_system() && !access(perf_etc_perfconfig(), R_OK)) {
-               ret += perf_config_from_file(fn, perf_etc_perfconfig(),
-                                           data);
-               found += 1;
+               if (perf_config_from_file(fn, perf_etc_perfconfig(), data) < 0)
+                       goto out;
         }
  
         home = getenv("HOME");
@@ -514,14 +514,12 @@ int perf_config(config_fn_t fn, void *data)
                 if (!st.st_size)
                         goto out_free;
  
-               ret += perf_config_from_file(fn, user_config, data);
-               found += 1;
+               ret = perf_config_from_file(fn, user_config, data);
+
  out_free:
                 free(user_config);
         }
  out:
-       if (found == 0)
-               return -1;
         return ret;
  }
  
@@ -609,8 +607,12 @@ static int collect_config(const char *var, const char *value,
         struct perf_config_section *section = NULL;
         struct perf_config_item *item = NULL;
         struct perf_config_set *set = perf_config_set;
-       struct list_head *sections = &set->sections;
+       struct list_head *sections;
+
+       if (set == NULL)
+               return -1;
  
+       sections = &set->sections;
         key = ptr = strdup(var);
         if (!key) {
                 pr_debug("%s: strdup failed\n", __func__);
@@ -641,17 +643,64 @@ static int collect_config(const char *var, const char *value,
  
  out_free:
         free(key);
-       perf_config_set__delete(set);
         return -1;
  }
  
+static int perf_config_set__init(struct perf_config_set *set)
+{
+       int ret = -1;
+       const char *home = NULL;
+
+       /* Setting $PERF_CONFIG makes perf read _only_ the given config file. */
+       if (config_exclusive_filename)
+               return perf_config_from_file(collect_config, config_exclusive_filename, set);
+       if (perf_config_system() && !access(perf_etc_perfconfig(), R_OK)) {
+               if (perf_config_from_file(collect_config, perf_etc_perfconfig(), set) < 0)
+                       goto out;
+       }
+
+       home = getenv("HOME");
+       if (perf_config_global() && home) {
+               char *user_config = strdup(mkpath("%s/.perfconfig", home));
+               struct stat st;
+
+               if (user_config == NULL) {
+                       warning("Not enough memory to process %s/.perfconfig, "
+                               "ignoring it.", home);
+                       goto out;
+               }
+
+               if (stat(user_config, &st) < 0)
+                       goto out_free;
+
+               if (st.st_uid && (st.st_uid != geteuid())) {
+                       warning("File %s not owned by current user or root, "
+                               "ignoring it.", user_config);
+                       goto out_free;
+               }
+
+               if (!st.st_size)
+                       goto out_free;
+
+               ret = perf_config_from_file(collect_config, user_config, set);
+
+out_free:
+               free(user_config);
+       }
+out:
+       return ret;
+}
+
  struct perf_config_set *perf_config_set__new(void)
  {
         struct perf_config_set *set = zalloc(sizeof(*set));
  
         if (set) {
                 INIT_LIST_HEAD(&set->sections);
-               perf_config(collect_config, set);
+               if (perf_config_set__init(set) < 0) {
+                       perf_config_set__delete(set);
+                       set = NULL;
+               }
         }
  
         return set;
diff --git a/tools/perf/util/db-export.c b/tools/perf/util/db-export.c

index c9a6dc1..b0c2b5c 100644 (file)
--- a/tools/perf/util/db-export.c
+++ b/tools/perf/util/db-export.c
@@ -233,17 +233,6 @@ int db_export__symbol(struct db_export *dbe, struct symbol *sym,
         return 0;
  }
  
-static struct thread *get_main_thread(struct machine *machine, struct thread *thread)
-{
-       if (thread->pid_ == thread->tid)
-               return thread__get(thread);
-
-       if (thread->pid_ == -1)
-               return NULL;
-
-       return machine__find_thread(machine, thread->pid_, thread->pid_);
-}
-
  static int db_ids_from_al(struct db_export *dbe, struct addr_location *al,
                           u64 *dso_db_id, u64 *sym_db_id, u64 *offset)
  {
@@ -382,7 +371,7 @@ int db_export__sample(struct db_export *dbe, union perf_event *event,
         if (err)
                 return err;
  
-       main_thread = get_main_thread(al->machine, thread);
+       main_thread = thread__main_thread(al->machine, thread);
         if (main_thread)
                 comm = machine__thread_exec_comm(al->machine, main_thread);
  
diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h

index 0953280..76d79d0 100644 (file)
--- a/tools/perf/util/dso.h
+++ b/tools/perf/util/dso.h
@@ -349,6 +349,11 @@ static inline bool dso__is_kcore(struct dso *dso)
                dso->binary_type == DSO_BINARY_TYPE__GUEST_KCORE;
  }
  
+static inline bool dso__is_kallsyms(struct dso *dso)
+{
+       return dso->kernel && dso->long_name[0] != '/';
+}
+
  void dso__free_a2l(struct dso *dso);
  
  enum dso_type dso__type(struct dso *dso, struct machine *machine);
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c

index e82ba90..1b918aa 100644 (file)
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -462,9 +462,9 @@ int perf_evlist__alloc_pollfd(struct perf_evlist *evlist)
         return 0;
  }
  
-static int __perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd, int idx)
+static int __perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd, int idx, short revent)
  {
-       int pos = fdarray__add(&evlist->pollfd, fd, POLLIN | POLLERR | POLLHUP);
+       int pos = fdarray__add(&evlist->pollfd, fd, revent | POLLERR | POLLHUP);
         /*
          * Save the idx so that when we filter out fds POLLHUP'ed we can
          * close the associated evlist->mmap[] entry.
@@ -480,10 +480,11 @@ static int __perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd, int idx
  
  int perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd)
  {
-       return __perf_evlist__add_pollfd(evlist, fd, -1);
+       return __perf_evlist__add_pollfd(evlist, fd, -1, POLLIN);
  }
  
-static void perf_evlist__munmap_filtered(struct fdarray *fda, int fd)
+static void perf_evlist__munmap_filtered(struct fdarray *fda, int fd,
+                                        void *arg __maybe_unused)
  {
         struct perf_evlist *evlist = container_of(fda, struct perf_evlist, pollfd);
  
@@ -493,7 +494,7 @@ static void perf_evlist__munmap_filtered(struct fdarray *fda, int fd)
  int perf_evlist__filter_pollfd(struct perf_evlist *evlist, short revents_and_mask)
  {
         return fdarray__filter(&evlist->pollfd, revents_and_mask,
-                              perf_evlist__munmap_filtered);
+                              perf_evlist__munmap_filtered, NULL);
  }
  
  int perf_evlist__poll(struct perf_evlist *evlist, int timeout)
@@ -777,7 +778,7 @@ broken_event:
         return event;
  }
  
-union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx)
+union perf_event *perf_evlist__mmap_read_forward(struct perf_evlist *evlist, int idx)
  {
         struct perf_mmap *md = &evlist->mmap[idx];
         u64 head;
@@ -832,6 +833,13 @@ perf_evlist__mmap_read_backward(struct perf_evlist *evlist, int idx)
         return perf_mmap__read(md, false, start, end, &md->prev);
  }
  
+union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx)
+{
+       if (!evlist->backward)
+               return perf_evlist__mmap_read_forward(evlist, idx);
+       return perf_evlist__mmap_read_backward(evlist, idx);
+}
+
  void perf_evlist__mmap_read_catchup(struct perf_evlist *evlist, int idx)
  {
         struct perf_mmap *md = &evlist->mmap[idx];
@@ -856,9 +864,11 @@ static void perf_evlist__mmap_get(struct perf_evlist *evlist, int idx)
  
  static void perf_evlist__mmap_put(struct perf_evlist *evlist, int idx)
  {
-       BUG_ON(atomic_read(&evlist->mmap[idx].refcnt) == 0);
+       struct perf_mmap *md = &evlist->mmap[idx];
+
+       BUG_ON(md->base && atomic_read(&md->refcnt) == 0);
  
-       if (atomic_dec_and_test(&evlist->mmap[idx].refcnt))
+       if (atomic_dec_and_test(&md->refcnt))
                 __perf_evlist__munmap(evlist, idx);
  }
  
@@ -936,9 +946,12 @@ static int perf_evlist__alloc_mmap(struct perf_evlist *evlist)
         if (cpu_map__empty(evlist->cpus))
                 evlist->nr_mmaps = thread_map__nr(evlist->threads);
         evlist->mmap = zalloc(evlist->nr_mmaps * sizeof(struct perf_mmap));
+       if (!evlist->mmap)
+               return -ENOMEM;
+
         for (i = 0; i < evlist->nr_mmaps; i++)
                 evlist->mmap[i].fd = -1;
-       return evlist->mmap != NULL ? 0 : -ENOMEM;
+       return 0;
  }
  
  struct mmap_params {
@@ -983,15 +996,28 @@ static int __perf_evlist__mmap(struct perf_evlist *evlist, int idx,
         return 0;
  }
  
+static bool
+perf_evlist__should_poll(struct perf_evlist *evlist __maybe_unused,
+                        struct perf_evsel *evsel)
+{
+       if (evsel->overwrite)
+               return false;
+       return true;
+}
+
  static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx,
                                        struct mmap_params *mp, int cpu,
                                        int thread, int *output)
  {
         struct perf_evsel *evsel;
+       int revent;
  
         evlist__for_each(evlist, evsel) {
                 int fd;
  
+               if (evsel->overwrite != (evlist->overwrite && evlist->backward))
+                       continue;
+
                 if (evsel->system_wide && thread)
                         continue;
  
@@ -1008,6 +1034,8 @@ static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx,
                         perf_evlist__mmap_get(evlist, idx);
                 }
  
+               revent = perf_evlist__should_poll(evlist, evsel) ? POLLIN : 0;
+
                 /*
                  * The system_wide flag causes a selected event to be opened
                  * always without a pid.  Consequently it will never get a
@@ -1016,7 +1044,7 @@ static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx,
                  * Therefore don't add it for polling.
                  */
                 if (!evsel->system_wide &&
-                   __perf_evlist__add_pollfd(evlist, fd, idx) < 0) {
+                   __perf_evlist__add_pollfd(evlist, fd, idx, revent) < 0) {
                         perf_evlist__mmap_put(evlist, idx);
                         return -1;
                 }
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h

index d740fb8..68cb136 100644 (file)
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -131,6 +131,8 @@ struct perf_sample_id *perf_evlist__id2sid(struct perf_evlist *evlist, u64 id);
  
  union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx);
  
+union perf_event *perf_evlist__mmap_read_forward(struct perf_evlist *evlist,
+                                                int idx);
  union perf_event *perf_evlist__mmap_read_backward(struct perf_evlist *evlist,
                                                   int idx);
  void perf_evlist__mmap_read_catchup(struct perf_evlist *evlist, int idx);
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c

index 5d7037e..9b2e3e6 100644 (file)
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -572,6 +572,8 @@ void perf_evsel__config_callchain(struct perf_evsel *evsel,
  
         perf_evsel__set_sample_bit(evsel, CALLCHAIN);
  
+       attr->sample_max_stack = param->max_stack;
+
         if (param->record_mode == CALLCHAIN_LBR) {
                 if (!opts->branch_stack) {
                         if (attr->exclude_user) {
@@ -635,7 +637,8 @@ static void apply_config_terms(struct perf_evsel *evsel,
         struct perf_event_attr *attr = &evsel->attr;
         struct callchain_param param;
         u32 dump_size = 0;
-       char *callgraph_buf = NULL;
+       int max_stack = 0;
+       const char *callgraph_buf = NULL;
  
         /* callgraph default */
         param.record_mode = callchain_param.record_mode;
@@ -662,6 +665,9 @@ static void apply_config_terms(struct perf_evsel *evsel,
                 case PERF_EVSEL__CONFIG_TERM_STACK_USER:
                         dump_size = term->val.stack_user;
                         break;
+               case PERF_EVSEL__CONFIG_TERM_MAX_STACK:
+                       max_stack = term->val.max_stack;
+                       break;
                 case PERF_EVSEL__CONFIG_TERM_INHERIT:
                         /*
                          * attr->inherit should has already been set by
@@ -677,7 +683,12 @@ static void apply_config_terms(struct perf_evsel *evsel,
         }
  
         /* User explicitly set per-event callgraph, clear the old setting and reset. */
-       if ((callgraph_buf != NULL) || (dump_size > 0)) {
+       if ((callgraph_buf != NULL) || (dump_size > 0) || max_stack) {
+               if (max_stack) {
+                       param.max_stack = max_stack;
+                       if (callgraph_buf == NULL)
+                               callgraph_buf = "fp";
+               }
  
                 /* parse callgraph parameters */
                 if (callgraph_buf != NULL) {
@@ -1329,6 +1340,7 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr,
         PRINT_ATTRf(clockid, p_signed);
         PRINT_ATTRf(sample_regs_intr, p_hex);
         PRINT_ATTRf(aux_watermark, p_unsigned);
+       PRINT_ATTRf(sample_max_stack, p_unsigned);
  
         return ret;
  }
@@ -2239,17 +2251,11 @@ void *perf_evsel__rawptr(struct perf_evsel *evsel, struct perf_sample *sample,
         return sample->raw_data + offset;
  }
  
-u64 perf_evsel__intval(struct perf_evsel *evsel, struct perf_sample *sample,
-                      const char *name)
+u64 format_field__intval(struct format_field *field, struct perf_sample *sample,
+                        bool needs_swap)
  {
-       struct format_field *field = perf_evsel__field(evsel, name);
-       void *ptr;
         u64 value;
-
-       if (!field)
-               return 0;
-
-       ptr = sample->raw_data + field->offset;
+       void *ptr = sample->raw_data + field->offset;
  
         switch (field->size) {
         case 1:
@@ -2267,7 +2273,7 @@ u64 perf_evsel__intval(struct perf_evsel *evsel, struct perf_sample *sample,
                 return 0;
         }
  
-       if (!evsel->needs_swap)
+       if (!needs_swap)
                 return value;
  
         switch (field->size) {
@@ -2284,6 +2290,17 @@ u64 perf_evsel__intval(struct perf_evsel *evsel, struct perf_sample *sample,
         return 0;
  }
  
+u64 perf_evsel__intval(struct perf_evsel *evsel, struct perf_sample *sample,
+                      const char *name)
+{
+       struct format_field *field = perf_evsel__field(evsel, name);
+
+       if (!field)
+               return 0;
+
+       return field ? format_field__intval(field, sample, evsel->needs_swap) : 0;
+}
+
  bool perf_evsel__fallback(struct perf_evsel *evsel, int err,
                           char *msg, size_t msgsize)
  {
@@ -2372,6 +2389,9 @@ int perf_evsel__open_strerror(struct perf_evsel *evsel, struct target *target,
          "No such device - did you specify an out-of-range profile CPU?");
                 break;
         case EOPNOTSUPP:
+               if (evsel->attr.sample_period != 0)
+                       return scnprintf(msg, size, "%s",
+       "PMU Hardware doesn't support sampling/overflow-interrupts.");
                 if (evsel->attr.precise_ip)
                         return scnprintf(msg, size, "%s",
         "\'precise\' request may not be supported. Try removing 'p' modifier.");
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h

index c1f1015..828ddd1 100644 (file)
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -44,6 +44,7 @@ enum {
         PERF_EVSEL__CONFIG_TERM_CALLGRAPH,
         PERF_EVSEL__CONFIG_TERM_STACK_USER,
         PERF_EVSEL__CONFIG_TERM_INHERIT,
+       PERF_EVSEL__CONFIG_TERM_MAX_STACK,
         PERF_EVSEL__CONFIG_TERM_MAX,
  };
  
@@ -56,6 +57,7 @@ struct perf_evsel_config_term {
                 bool    time;
                 char    *callgraph;
                 u64     stack_user;
+               int     max_stack;
                 bool    inherit;
         } val;
  };
@@ -259,6 +261,8 @@ static inline char *perf_evsel__strval(struct perf_evsel *evsel,
  
  struct format_field;
  
+u64 format_field__intval(struct format_field *field, struct perf_sample *sample, bool needs_swap);
+
  struct format_field *perf_evsel__field(struct perf_evsel *evsel, const char *name);
  
  #define perf_evsel__match(evsel, t, c)         \
diff --git a/tools/perf/util/group.h b/tools/perf/util/group.h

new file mode 100644 (file)

index 0000000..116debe
--- /dev/null
+++ b/tools/perf/util/group.h
@@ -0,0 +1,7 @@
+#ifndef GROUP_H
+#define GROUP_H 1
+
+bool arch_topdown_check_group(bool *warn);
+void arch_topdown_group_warn(void);
+
+#endif
diff --git a/tools/perf/util/libunwind/arm64.c b/tools/perf/util/libunwind/arm64.c

new file mode 100644 (file)

index 0000000..4fb5395
--- /dev/null
+++ b/tools/perf/util/libunwind/arm64.c
@@ -0,0 +1,35 @@
+/*
+ * This file setups defines to compile arch specific binary from the
+ * generic one.
+ *
+ * The function 'LIBUNWIND__ARCH_REG_ID' name is set according to arch
+ * name and the defination of this function is included directly from
+ * 'arch/arm64/util/unwind-libunwind.c', to make sure that this function
+ * is defined no matter what arch the host is.
+ *
+ * Finally, the arch specific unwind methods are exported which will
+ * be assigned to each arm64 thread.
+ */
+
+#define REMOTE_UNWIND_LIBUNWIND
+
+#define LIBUNWIND__ARCH_REG_ID(regnum) libunwind__arm64_reg_id(regnum)
+
+#include "unwind.h"
+#include "debug.h"
+#include "libunwind-aarch64.h"
+#include <../../../../arch/arm64/include/uapi/asm/perf_regs.h>
+#include "../../arch/arm64/util/unwind-libunwind.c"
+
+/* NO_LIBUNWIND_DEBUG_FRAME is a feature flag for local libunwind,
+ * assign NO_LIBUNWIND_DEBUG_FRAME_AARCH64 to it for compiling arm64
+ * unwind methods.
+ */
+#undef NO_LIBUNWIND_DEBUG_FRAME
+#ifdef NO_LIBUNWIND_DEBUG_FRAME_AARCH64
+#define NO_LIBUNWIND_DEBUG_FRAME
+#endif
+#include "util/unwind-libunwind-local.c"
+
+struct unwind_libunwind_ops *
+arm64_unwind_libunwind_ops = &_unwind_libunwind_ops;
diff --git a/tools/perf/util/libunwind/x86_32.c b/tools/perf/util/libunwind/x86_32.c

new file mode 100644 (file)

index 0000000..d98c17e
--- /dev/null
+++ b/tools/perf/util/libunwind/x86_32.c
@@ -0,0 +1,37 @@
+/*
+ * This file setups defines to compile arch specific binary from the
+ * generic one.
+ *
+ * The function 'LIBUNWIND__ARCH_REG_ID' name is set according to arch
+ * name and the defination of this function is included directly from
+ * 'arch/x86/util/unwind-libunwind.c', to make sure that this function
+ * is defined no matter what arch the host is.
+ *
+ * Finally, the arch specific unwind methods are exported which will
+ * be assigned to each x86 thread.
+ */
+
+#define REMOTE_UNWIND_LIBUNWIND
+#define LIBUNWIND__ARCH_REG_ID(regnum) libunwind__x86_reg_id(regnum)
+
+#include "unwind.h"
+#include "debug.h"
+#include "libunwind-x86.h"
+#include <../../../../arch/x86/include/uapi/asm/perf_regs.h>
+
+/* HAVE_ARCH_X86_64_SUPPORT is used in'arch/x86/util/unwind-libunwind.c'
+ * for x86_32, we undef it to compile code for x86_32 only.
+ */
+#undef HAVE_ARCH_X86_64_SUPPORT
+#include "../../arch/x86/util/unwind-libunwind.c"
+
+/* Explicitly define NO_LIBUNWIND_DEBUG_FRAME, because non-ARM has no
+ * dwarf_find_debug_frame() function.
+ */
+#ifndef NO_LIBUNWIND_DEBUG_FRAME
+#define NO_LIBUNWIND_DEBUG_FRAME
+#endif
+#include "util/unwind-libunwind-local.c"
+
+struct unwind_libunwind_ops *
+x86_32_unwind_libunwind_ops = &_unwind_libunwind_ops;
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c

index b177218..a0c186a 100644 (file)
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -1353,11 +1353,16 @@ int machine__process_mmap2_event(struct machine *machine,
         if (map == NULL)
                 goto out_problem_map;
  
-       thread__insert_map(thread, map);
+       ret = thread__insert_map(thread, map);
+       if (ret)
+               goto out_problem_insert;
+
         thread__put(thread);
         map__put(map);
         return 0;
  
+out_problem_insert:
+       map__put(map);
  out_problem_map:
         thread__put(thread);
  out_problem:
@@ -1403,11 +1408,16 @@ int machine__process_mmap_event(struct machine *machine, union perf_event *event
         if (map == NULL)
                 goto out_problem_map;
  
-       thread__insert_map(thread, map);
+       ret = thread__insert_map(thread, map);
+       if (ret)
+               goto out_problem_insert;
+
         thread__put(thread);
         map__put(map);
         return 0;
  
+out_problem_insert:
+       map__put(map);
  out_problem_map:
         thread__put(thread);
  out_problem:
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c

index c6fd047..d15e335 100644 (file)
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -900,6 +900,7 @@ static const char *config_term_names[__PARSE_EVENTS__TERM_TYPE_NR] = {
         [PARSE_EVENTS__TERM_TYPE_STACKSIZE]             = "stack-size",
         [PARSE_EVENTS__TERM_TYPE_NOINHERIT]             = "no-inherit",
         [PARSE_EVENTS__TERM_TYPE_INHERIT]               = "inherit",
+       [PARSE_EVENTS__TERM_TYPE_MAX_STACK]             = "max-stack",
  };
  
  static bool config_term_shrinked;
@@ -995,6 +996,9 @@ do {                                                                           \
         case PARSE_EVENTS__TERM_TYPE_NAME:
                 CHECK_TYPE_VAL(STR);
                 break;
+       case PARSE_EVENTS__TERM_TYPE_MAX_STACK:
+               CHECK_TYPE_VAL(NUM);
+               break;
         default:
                 err->str = strdup("unknown term");
                 err->idx = term->err_term;
@@ -1040,6 +1044,7 @@ static int config_term_tracepoint(struct perf_event_attr *attr,
         case PARSE_EVENTS__TERM_TYPE_STACKSIZE:
         case PARSE_EVENTS__TERM_TYPE_INHERIT:
         case PARSE_EVENTS__TERM_TYPE_NOINHERIT:
+       case PARSE_EVENTS__TERM_TYPE_MAX_STACK:
                 return config_term_common(attr, term, err);
         default:
                 if (err) {
@@ -1109,6 +1114,9 @@ do {                                                              \
                 case PARSE_EVENTS__TERM_TYPE_NOINHERIT:
                         ADD_CONFIG_TERM(INHERIT, inherit, term->val.num ? 0 : 1);
                         break;
+               case PARSE_EVENTS__TERM_TYPE_MAX_STACK:
+                       ADD_CONFIG_TERM(MAX_STACK, max_stack, term->val.num);
+                       break;
                 default:
                         break;
                 }
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h

index d740c3c..46c05cc 100644 (file)
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -68,6 +68,7 @@ enum {
         PARSE_EVENTS__TERM_TYPE_STACKSIZE,
         PARSE_EVENTS__TERM_TYPE_NOINHERIT,
         PARSE_EVENTS__TERM_TYPE_INHERIT,
+       PARSE_EVENTS__TERM_TYPE_MAX_STACK,
         __PARSE_EVENTS__TERM_TYPE_NR,
  };
  
diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l

index 1477fbc..3c15b33 100644 (file)
--- a/tools/perf/util/parse-events.l
+++ b/tools/perf/util/parse-events.l
@@ -199,6 +199,7 @@ branch_type         { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE
  time                   { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_TIME); }
  call-graph             { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CALLGRAPH); }
  stack-size             { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_STACKSIZE); }
+max-stack              { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_MAX_STACK); }
  inherit                        { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_INHERIT); }
  no-inherit             { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_NOINHERIT); }
  ,                      { return ','; }
@@ -259,6 +260,7 @@ cycles-ct                                   { return str(yyscanner, PE_KERNEL_PMU_EVENT); }
  cycles-t                                       { return str(yyscanner, PE_KERNEL_PMU_EVENT); }
  mem-loads                                      { return str(yyscanner, PE_KERNEL_PMU_EVENT); }
  mem-stores                                     { return str(yyscanner, PE_KERNEL_PMU_EVENT); }
+topdown-[a-z-]+                                        { return str(yyscanner, PE_KERNEL_PMU_EVENT); }
  
  L1-dcache|l1-d|l1d|L1-data             |
  L1-icache|l1-i|l1i|L1-instruction      |
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c

index 5214974..dfedf09 100644 (file)
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -593,6 +593,7 @@ do {                                                \
         if (bswap_safe(f, 0))                   \
                 attr->f = bswap_##sz(attr->f);  \
  } while(0)
+#define bswap_field_16(f) bswap_field(f, 16)
  #define bswap_field_32(f) bswap_field(f, 32)
  #define bswap_field_64(f) bswap_field(f, 64)
  
@@ -608,6 +609,7 @@ do {                                                \
         bswap_field_64(sample_regs_user);
         bswap_field_32(sample_stack_user);
         bswap_field_32(aux_watermark);
+       bswap_field_16(sample_max_stack);
  
         /*
          * After read_format are bitfields. Check read_format because
diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c

index aa9efe0..8a2bbd2 100644 (file)
--- a/tools/perf/util/stat-shadow.c
+++ b/tools/perf/util/stat-shadow.c
@@ -36,6 +36,11 @@ static struct stats runtime_dtlb_cache_stats[NUM_CTX][MAX_NR_CPUS];
  static struct stats runtime_cycles_in_tx_stats[NUM_CTX][MAX_NR_CPUS];
  static struct stats runtime_transaction_stats[NUM_CTX][MAX_NR_CPUS];
  static struct stats runtime_elision_stats[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_topdown_total_slots[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_topdown_slots_issued[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_topdown_slots_retired[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_topdown_fetch_bubbles[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_topdown_recovery_bubbles[NUM_CTX][MAX_NR_CPUS];
  static bool have_frontend_stalled;
  
  struct stats walltime_nsecs_stats;
@@ -82,6 +87,11 @@ void perf_stat__reset_shadow_stats(void)
                 sizeof(runtime_transaction_stats));
         memset(runtime_elision_stats, 0, sizeof(runtime_elision_stats));
         memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats));
+       memset(runtime_topdown_total_slots, 0, sizeof(runtime_topdown_total_slots));
+       memset(runtime_topdown_slots_retired, 0, sizeof(runtime_topdown_slots_retired));
+       memset(runtime_topdown_slots_issued, 0, sizeof(runtime_topdown_slots_issued));
+       memset(runtime_topdown_fetch_bubbles, 0, sizeof(runtime_topdown_fetch_bubbles));
+       memset(runtime_topdown_recovery_bubbles, 0, sizeof(runtime_topdown_recovery_bubbles));
  }
  
  /*
@@ -105,6 +115,16 @@ void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count,
                 update_stats(&runtime_transaction_stats[ctx][cpu], count[0]);
         else if (perf_stat_evsel__is(counter, ELISION_START))
                 update_stats(&runtime_elision_stats[ctx][cpu], count[0]);
+       else if (perf_stat_evsel__is(counter, TOPDOWN_TOTAL_SLOTS))
+               update_stats(&runtime_topdown_total_slots[ctx][cpu], count[0]);
+       else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_ISSUED))
+               update_stats(&runtime_topdown_slots_issued[ctx][cpu], count[0]);
+       else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_RETIRED))
+               update_stats(&runtime_topdown_slots_retired[ctx][cpu], count[0]);
+       else if (perf_stat_evsel__is(counter, TOPDOWN_FETCH_BUBBLES))
+               update_stats(&runtime_topdown_fetch_bubbles[ctx][cpu],count[0]);
+       else if (perf_stat_evsel__is(counter, TOPDOWN_RECOVERY_BUBBLES))
+               update_stats(&runtime_topdown_recovery_bubbles[ctx][cpu], count[0]);
         else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
                 update_stats(&runtime_stalled_cycles_front_stats[ctx][cpu], count[0]);
         else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND))
@@ -302,6 +322,107 @@ static void print_ll_cache_misses(int cpu,
         out->print_metric(out->ctx, color, "%7.2f%%", "of all LL-cache hits", ratio);
  }
  
+/*
+ * High level "TopDown" CPU core pipe line bottleneck break down.
+ *
+ * Basic concept following
+ * Yasin, A Top Down Method for Performance analysis and Counter architecture
+ * ISPASS14
+ *
+ * The CPU pipeline is divided into 4 areas that can be bottlenecks:
+ *
+ * Frontend -> Backend -> Retiring
+ * BadSpeculation in addition means out of order execution that is thrown away
+ * (for example branch mispredictions)
+ * Frontend is instruction decoding.
+ * Backend is execution, like computation and accessing data in memory
+ * Retiring is good execution that is not directly bottlenecked
+ *
+ * The formulas are computed in slots.
+ * A slot is an entry in the pipeline each for the pipeline width
+ * (for example a 4-wide pipeline has 4 slots for each cycle)
+ *
+ * Formulas:
+ * BadSpeculation = ((SlotsIssued - SlotsRetired) + RecoveryBubbles) /
+ *                     TotalSlots
+ * Retiring = SlotsRetired / TotalSlots
+ * FrontendBound = FetchBubbles / TotalSlots
+ * BackendBound = 1.0 - BadSpeculation - Retiring - FrontendBound
+ *
+ * The kernel provides the mapping to the low level CPU events and any scaling
+ * needed for the CPU pipeline width, for example:
+ *
+ * TotalSlots = Cycles * 4
+ *
+ * The scaling factor is communicated in the sysfs unit.
+ *
+ * In some cases the CPU may not be able to measure all the formulas due to
+ * missing events. In this case multiple formulas are combined, as possible.
+ *
+ * Full TopDown supports more levels to sub-divide each area: for example
+ * BackendBound into computing bound and memory bound. For now we only
+ * support Level 1 TopDown.
+ */
+
+static double sanitize_val(double x)
+{
+       if (x < 0 && x >= -0.02)
+               return 0.0;
+       return x;
+}
+
+static double td_total_slots(int ctx, int cpu)
+{
+       return avg_stats(&runtime_topdown_total_slots[ctx][cpu]);
+}
+
+static double td_bad_spec(int ctx, int cpu)
+{
+       double bad_spec = 0;
+       double total_slots;
+       double total;
+
+       total = avg_stats(&runtime_topdown_slots_issued[ctx][cpu]) -
+               avg_stats(&runtime_topdown_slots_retired[ctx][cpu]) +
+               avg_stats(&runtime_topdown_recovery_bubbles[ctx][cpu]);
+       total_slots = td_total_slots(ctx, cpu);
+       if (total_slots)
+               bad_spec = total / total_slots;
+       return sanitize_val(bad_spec);
+}
+
+static double td_retiring(int ctx, int cpu)
+{
+       double retiring = 0;
+       double total_slots = td_total_slots(ctx, cpu);
+       double ret_slots = avg_stats(&runtime_topdown_slots_retired[ctx][cpu]);
+
+       if (total_slots)
+               retiring = ret_slots / total_slots;
+       return retiring;
+}
+
+static double td_fe_bound(int ctx, int cpu)
+{
+       double fe_bound = 0;
+       double total_slots = td_total_slots(ctx, cpu);
+       double fetch_bub = avg_stats(&runtime_topdown_fetch_bubbles[ctx][cpu]);
+
+       if (total_slots)
+               fe_bound = fetch_bub / total_slots;
+       return fe_bound;
+}
+
+static double td_be_bound(int ctx, int cpu)
+{
+       double sum = (td_fe_bound(ctx, cpu) +
+                     td_bad_spec(ctx, cpu) +
+                     td_retiring(ctx, cpu));
+       if (sum == 0)
+               return 0;
+       return sanitize_val(1.0 - sum);
+}
+
  void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
                                    double avg, int cpu,
                                    struct perf_stat_output_ctx *out)
@@ -309,6 +430,7 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
         void *ctxp = out->ctx;
         print_metric_t print_metric = out->print_metric;
         double total, ratio = 0.0, total2;
+       const char *color = NULL;
         int ctx = evsel_context(evsel);
  
         if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) {
@@ -452,6 +574,46 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
                                      avg / ratio);
                 else
                         print_metric(ctxp, NULL, NULL, "CPUs utilized", 0);
+       } else if (perf_stat_evsel__is(evsel, TOPDOWN_FETCH_BUBBLES)) {
+               double fe_bound = td_fe_bound(ctx, cpu);
+
+               if (fe_bound > 0.2)
+                       color = PERF_COLOR_RED;
+               print_metric(ctxp, color, "%8.1f%%", "frontend bound",
+                               fe_bound * 100.);
+       } else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_RETIRED)) {
+               double retiring = td_retiring(ctx, cpu);
+
+               if (retiring > 0.7)
+                       color = PERF_COLOR_GREEN;
+               print_metric(ctxp, color, "%8.1f%%", "retiring",
+                               retiring * 100.);
+       } else if (perf_stat_evsel__is(evsel, TOPDOWN_RECOVERY_BUBBLES)) {
+               double bad_spec = td_bad_spec(ctx, cpu);
+
+               if (bad_spec > 0.1)
+                       color = PERF_COLOR_RED;
+               print_metric(ctxp, color, "%8.1f%%", "bad speculation",
+                               bad_spec * 100.);
+       } else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_ISSUED)) {
+               double be_bound = td_be_bound(ctx, cpu);
+               const char *name = "backend bound";
+               static int have_recovery_bubbles = -1;
+
+               /* In case the CPU does not support topdown-recovery-bubbles */
+               if (have_recovery_bubbles < 0)
+                       have_recovery_bubbles = pmu_have_event("cpu",
+                                       "topdown-recovery-bubbles");
+               if (!have_recovery_bubbles)
+                       name = "backend bound/bad spec";
+
+               if (be_bound > 0.2)
+                       color = PERF_COLOR_RED;
+               if (td_total_slots(ctx, cpu) > 0)
+                       print_metric(ctxp, color, "%8.1f%%", name,
+                                       be_bound * 100.);
+               else
+                       print_metric(ctxp, NULL, NULL, name, 0);
         } else if (runtime_nsecs_stats[cpu].n != 0) {
                 char unit = 'M';
                 char unit_buf[10];
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c

index ffa1d06..c1ba255 100644 (file)
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -79,6 +79,11 @@ static const char *id_str[PERF_STAT_EVSEL_ID__MAX] = {
         ID(TRANSACTION_START,   cpu/tx-start/),
         ID(ELISION_START,       cpu/el-start/),
         ID(CYCLES_IN_TX_CP,     cpu/cycles-ct/),
+       ID(TOPDOWN_TOTAL_SLOTS, topdown-total-slots),
+       ID(TOPDOWN_SLOTS_ISSUED, topdown-slots-issued),
+       ID(TOPDOWN_SLOTS_RETIRED, topdown-slots-retired),
+       ID(TOPDOWN_FETCH_BUBBLES, topdown-fetch-bubbles),
+       ID(TOPDOWN_RECOVERY_BUBBLES, topdown-recovery-bubbles),
  };
  #undef ID
  
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h

index 0150e78..c29bb94 100644 (file)
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -17,6 +17,11 @@ enum perf_stat_evsel_id {
         PERF_STAT_EVSEL_ID__TRANSACTION_START,
         PERF_STAT_EVSEL_ID__ELISION_START,
         PERF_STAT_EVSEL_ID__CYCLES_IN_TX_CP,
+       PERF_STAT_EVSEL_ID__TOPDOWN_TOTAL_SLOTS,
+       PERF_STAT_EVSEL_ID__TOPDOWN_SLOTS_ISSUED,
+       PERF_STAT_EVSEL_ID__TOPDOWN_SLOTS_RETIRED,
+       PERF_STAT_EVSEL_ID__TOPDOWN_FETCH_BUBBLES,
+       PERF_STAT_EVSEL_ID__TOPDOWN_RECOVERY_BUBBLES,
         PERF_STAT_EVSEL_ID__MAX,
  };
  
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c

index 54c4ff2..09c5c34 100644 (file)
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -1641,6 +1641,20 @@ static int find_matching_kcore(struct map *map, char *dir, size_t dir_sz)
         return ret;
  }
  
+/*
+ * Use open(O_RDONLY) to check readability directly instead of access(R_OK)
+ * since access(R_OK) only checks with real UID/GID but open() use effective
+ * UID/GID and actual capabilities (e.g. /proc/kcore requires CAP_SYS_RAWIO).
+ */
+static bool filename__readable(const char *file)
+{
+       int fd = open(file, O_RDONLY);
+       if (fd < 0)
+               return false;
+       close(fd);
+       return true;
+}
+
  static char *dso__find_kallsyms(struct dso *dso, struct map *map)
  {
         u8 host_build_id[BUILD_ID_SIZE];
@@ -1660,58 +1674,43 @@ static char *dso__find_kallsyms(struct dso *dso, struct map *map)
                                  sizeof(host_build_id)) == 0)
                 is_host = dso__build_id_equal(dso, host_build_id);
  
-       build_id__sprintf(dso->build_id, sizeof(dso->build_id), sbuild_id);
-
-       scnprintf(path, sizeof(path), "%s/%s/%s", buildid_dir,
-                 DSO__NAME_KCORE, sbuild_id);
-
-       /* Use /proc/kallsyms if possible */
+       /* Try a fast path for /proc/kallsyms if possible */
         if (is_host) {
-               DIR *d;
-               int fd;
-
-               /* If no cached kcore go with /proc/kallsyms */
-               d = opendir(path);
-               if (!d)
-                       goto proc_kallsyms;
-               closedir(d);
-
                 /*
-                * Do not check the build-id cache, until we know we cannot use
-                * /proc/kcore.
+                * Do not check the build-id cache, unless we know we cannot use
+                * /proc/kcore or module maps don't match to /proc/kallsyms.
+                * To check readability of /proc/kcore, do not use access(R_OK)
+                * since /proc/kcore requires CAP_SYS_RAWIO to read and access
+                * can't check it.
                  */
-               fd = open("/proc/kcore", O_RDONLY);
-               if (fd != -1) {
-                       close(fd);
-                       /* If module maps match go with /proc/kallsyms */
-                       if (!validate_kcore_addresses("/proc/kallsyms", map))
-                               goto proc_kallsyms;
-               }
-
-               /* Find kallsyms in build-id cache with kcore */
-               if (!find_matching_kcore(map, path, sizeof(path)))
-                       return strdup(path);
-
-               goto proc_kallsyms;
+               if (filename__readable("/proc/kcore") &&
+                   !validate_kcore_addresses("/proc/kallsyms", map))
+                       goto proc_kallsyms;
         }
  
+       build_id__sprintf(dso->build_id, sizeof(dso->build_id), sbuild_id);
+
         /* Find kallsyms in build-id cache with kcore */
+       scnprintf(path, sizeof(path), "%s/%s/%s",
+                 buildid_dir, DSO__NAME_KCORE, sbuild_id);
+
         if (!find_matching_kcore(map, path, sizeof(path)))
                 return strdup(path);
  
-       scnprintf(path, sizeof(path), "%s/%s/%s",
-                 buildid_dir, DSO__NAME_KALLSYMS, sbuild_id);
+       /* Use current /proc/kallsyms if possible */
+       if (is_host) {
+proc_kallsyms:
+               return strdup("/proc/kallsyms");
+       }
  
-       if (access(path, F_OK)) {
+       /* Finally, find a cache of kallsyms */
+       if (!build_id_cache__kallsyms_path(sbuild_id, path, sizeof(path))) {
                 pr_err("No kallsyms or vmlinux with build-id %s was found\n",
                        sbuild_id);
                 return NULL;
         }
  
         return strdup(path);
-
-proc_kallsyms:
-       return strdup("/proc/kallsyms");
  }
  
  static int dso__load_kernel_sym(struct dso *dso, struct map *map,
diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c

index 45fcb71..f30f956 100644 (file)
--- a/tools/perf/util/thread.c
+++ b/tools/perf/util/thread.c
@@ -43,9 +43,6 @@ struct thread *thread__new(pid_t pid, pid_t tid)
                 thread->cpu = -1;
                 INIT_LIST_HEAD(&thread->comm_list);
  
-               if (unwind__prepare_access(thread) < 0)
-                       goto err_thread;
-
                 comm_str = malloc(32);
                 if (!comm_str)
                         goto err_thread;
@@ -201,10 +198,18 @@ size_t thread__fprintf(struct thread *thread, FILE *fp)
                map_groups__fprintf(thread->mg, fp);
  }
  
-void thread__insert_map(struct thread *thread, struct map *map)
+int thread__insert_map(struct thread *thread, struct map *map)
  {
+       int ret;
+
+       ret = unwind__prepare_access(thread, map);
+       if (ret)
+               return ret;
+
         map_groups__fixup_overlappings(thread->mg, map, stderr);
         map_groups__insert(thread->mg, map);
+
+       return 0;
  }
  
  static int thread__clone_map_groups(struct thread *thread,
@@ -265,3 +270,14 @@ void thread__find_cpumode_addr_location(struct thread *thread,
                         break;
         }
  }
+
+struct thread *thread__main_thread(struct machine *machine, struct thread *thread)
+{
+       if (thread->pid_ == thread->tid)
+               return thread__get(thread);
+
+       if (thread->pid_ == -1)
+               return NULL;
+
+       return machine__find_thread(machine, thread->pid_, thread->pid_);
+}
diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h

index 45fba13..99263cb 100644 (file)
--- a/tools/perf/util/thread.h
+++ b/tools/perf/util/thread.h
@@ -9,11 +9,9 @@
  #include "symbol.h"
  #include <strlist.h>
  #include <intlist.h>
-#ifdef HAVE_LIBUNWIND_SUPPORT
-#include <libunwind.h>
-#endif
  
  struct thread_stack;
+struct unwind_libunwind_ops;
  
  struct thread {
         union {
@@ -36,7 +34,8 @@ struct thread {
         void                    *priv;
         struct thread_stack     *ts;
  #ifdef HAVE_LIBUNWIND_SUPPORT
-       unw_addr_space_t        addr_space;
+       void                            *addr_space;
+       struct unwind_libunwind_ops     *unwind_libunwind_ops;
  #endif
  };
  
@@ -77,10 +76,12 @@ int thread__comm_len(struct thread *thread);
  struct comm *thread__comm(const struct thread *thread);
  struct comm *thread__exec_comm(const struct thread *thread);
  const char *thread__comm_str(const struct thread *thread);
-void thread__insert_map(struct thread *thread, struct map *map);
+int thread__insert_map(struct thread *thread, struct map *map);
  int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp);
  size_t thread__fprintf(struct thread *thread, FILE *fp);
  
+struct thread *thread__main_thread(struct machine *machine, struct thread *thread);
+
  void thread__find_addr_map(struct thread *thread,
                            u8 cpumode, enum map_type type, u64 addr,
                            struct addr_location *al);
diff --git a/tools/perf/util/unwind-libunwind-local.c b/tools/perf/util/unwind-libunwind-local.c

new file mode 100644 (file)

index 0000000..01c2e86
--- /dev/null
+++ b/tools/perf/util/unwind-libunwind-local.c
@@ -0,0 +1,697 @@
+/*
+ * Post mortem Dwarf CFI based unwinding on top of regs and stack dumps.
+ *
+ * Lots of this code have been borrowed or heavily inspired from parts of
+ * the libunwind 0.99 code which are (amongst other contributors I may have
+ * forgotten):
+ *
+ * Copyright (C) 2002-2007 Hewlett-Packard Co
+ *     Contributed by David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * And the bugs have been added by:
+ *
+ * Copyright (C) 2010, Frederic Weisbecker <fweisbec@gmail.com>
+ * Copyright (C) 2012, Jiri Olsa <jolsa@redhat.com>
+ *
+ */
+
+#include <elf.h>
+#include <gelf.h>
+#include <fcntl.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <linux/list.h>
+#ifndef REMOTE_UNWIND_LIBUNWIND
+#include <libunwind.h>
+#include <libunwind-ptrace.h>
+#endif
+#include "callchain.h"
+#include "thread.h"
+#include "session.h"
+#include "perf_regs.h"
+#include "unwind.h"
+#include "symbol.h"
+#include "util.h"
+#include "debug.h"
+#include "asm/bug.h"
+
+extern int
+UNW_OBJ(dwarf_search_unwind_table) (unw_addr_space_t as,
+                                   unw_word_t ip,
+                                   unw_dyn_info_t *di,
+                                   unw_proc_info_t *pi,
+                                   int need_unwind_info, void *arg);
+
+#define dwarf_search_unwind_table UNW_OBJ(dwarf_search_unwind_table)
+
+extern int
+UNW_OBJ(dwarf_find_debug_frame) (int found, unw_dyn_info_t *di_debug,
+                                unw_word_t ip,
+                                unw_word_t segbase,
+                                const char *obj_name, unw_word_t start,
+                                unw_word_t end);
+
+#define dwarf_find_debug_frame UNW_OBJ(dwarf_find_debug_frame)
+
+#define DW_EH_PE_FORMAT_MASK   0x0f    /* format of the encoded value */
+#define DW_EH_PE_APPL_MASK     0x70    /* how the value is to be applied */
+
+/* Pointer-encoding formats: */
+#define DW_EH_PE_omit          0xff
+#define DW_EH_PE_ptr           0x00    /* pointer-sized unsigned value */
+#define DW_EH_PE_udata4                0x03    /* unsigned 32-bit value */
+#define DW_EH_PE_udata8                0x04    /* unsigned 64-bit value */
+#define DW_EH_PE_sdata4                0x0b    /* signed 32-bit value */
+#define DW_EH_PE_sdata8                0x0c    /* signed 64-bit value */
+
+/* Pointer-encoding application: */
+#define DW_EH_PE_absptr                0x00    /* absolute value */
+#define DW_EH_PE_pcrel         0x10    /* rel. to addr. of encoded value */
+
+/*
+ * The following are not documented by LSB v1.3, yet they are used by
+ * GCC, presumably they aren't documented by LSB since they aren't
+ * used on Linux:
+ */
+#define DW_EH_PE_funcrel       0x40    /* start-of-procedure-relative */
+#define DW_EH_PE_aligned       0x50    /* aligned pointer */
+
+/* Flags intentionaly not handled, since they're not needed:
+ * #define DW_EH_PE_indirect      0x80
+ * #define DW_EH_PE_uleb128       0x01
+ * #define DW_EH_PE_udata2        0x02
+ * #define DW_EH_PE_sleb128       0x09
+ * #define DW_EH_PE_sdata2        0x0a
+ * #define DW_EH_PE_textrel       0x20
+ * #define DW_EH_PE_datarel       0x30
+ */
+
+struct unwind_info {
+       struct perf_sample      *sample;
+       struct machine          *machine;
+       struct thread           *thread;
+};
+
+#define dw_read(ptr, type, end) ({     \
+       type *__p = (type *) ptr;       \
+       type  __v;                      \
+       if ((__p + 1) > (type *) end)   \
+               return -EINVAL;         \
+       __v = *__p++;                   \
+       ptr = (typeof(ptr)) __p;        \
+       __v;                            \
+       })
+
+static int __dw_read_encoded_value(u8 **p, u8 *end, u64 *val,
+                                  u8 encoding)
+{
+       u8 *cur = *p;
+       *val = 0;
+
+       switch (encoding) {
+       case DW_EH_PE_omit:
+               *val = 0;
+               goto out;
+       case DW_EH_PE_ptr:
+               *val = dw_read(cur, unsigned long, end);
+               goto out;
+       default:
+               break;
+       }
+
+       switch (encoding & DW_EH_PE_APPL_MASK) {
+       case DW_EH_PE_absptr:
+               break;
+       case DW_EH_PE_pcrel:
+               *val = (unsigned long) cur;
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       if ((encoding & 0x07) == 0x00)
+               encoding |= DW_EH_PE_udata4;
+
+       switch (encoding & DW_EH_PE_FORMAT_MASK) {
+       case DW_EH_PE_sdata4:
+               *val += dw_read(cur, s32, end);
+               break;
+       case DW_EH_PE_udata4:
+               *val += dw_read(cur, u32, end);
+               break;
+       case DW_EH_PE_sdata8:
+               *val += dw_read(cur, s64, end);
+               break;
+       case DW_EH_PE_udata8:
+               *val += dw_read(cur, u64, end);
+               break;
+       default:
+               return -EINVAL;
+       }
+
+ out:
+       *p = cur;
+       return 0;
+}
+
+#define dw_read_encoded_value(ptr, end, enc) ({                        \
+       u64 __v;                                                \
+       if (__dw_read_encoded_value(&ptr, end, &__v, enc)) {    \
+               return -EINVAL;                                 \
+       }                                                       \
+       __v;                                                    \
+       })
+
+static u64 elf_section_offset(int fd, const char *name)
+{
+       Elf *elf;
+       GElf_Ehdr ehdr;
+       GElf_Shdr shdr;
+       u64 offset = 0;
+
+       elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL);
+       if (elf == NULL)
+               return 0;
+
+       do {
+               if (gelf_getehdr(elf, &ehdr) == NULL)
+                       break;
+
+               if (!elf_section_by_name(elf, &ehdr, &shdr, name, NULL))
+                       break;
+
+               offset = shdr.sh_offset;
+       } while (0);
+
+       elf_end(elf);
+       return offset;
+}
+
+#ifndef NO_LIBUNWIND_DEBUG_FRAME
+static int elf_is_exec(int fd, const char *name)
+{
+       Elf *elf;
+       GElf_Ehdr ehdr;
+       int retval = 0;
+
+       elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL);
+       if (elf == NULL)
+               return 0;
+       if (gelf_getehdr(elf, &ehdr) == NULL)
+               goto out;
+
+       retval = (ehdr.e_type == ET_EXEC);
+
+out:
+       elf_end(elf);
+       pr_debug("unwind: elf_is_exec(%s): %d\n", name, retval);
+       return retval;
+}
+#endif
+
+struct table_entry {
+       u32 start_ip_offset;
+       u32 fde_offset;
+};
+
+struct eh_frame_hdr {
+       unsigned char version;
+       unsigned char eh_frame_ptr_enc;
+       unsigned char fde_count_enc;
+       unsigned char table_enc;
+
+       /*
+        * The rest of the header is variable-length and consists of the
+        * following members:
+        *
+        *      encoded_t eh_frame_ptr;
+        *      encoded_t fde_count;
+        */
+
+       /* A single encoded pointer should not be more than 8 bytes. */
+       u64 enc[2];
+
+       /*
+        * struct {
+        *    encoded_t start_ip;
+        *    encoded_t fde_addr;
+        * } binary_search_table[fde_count];
+        */
+       char data[0];
+} __packed;
+
+static int unwind_spec_ehframe(struct dso *dso, struct machine *machine,
+                              u64 offset, u64 *table_data, u64 *segbase,
+                              u64 *fde_count)
+{
+       struct eh_frame_hdr hdr;
+       u8 *enc = (u8 *) &hdr.enc;
+       u8 *end = (u8 *) &hdr.data;
+       ssize_t r;
+
+       r = dso__data_read_offset(dso, machine, offset,
+                                 (u8 *) &hdr, sizeof(hdr));
+       if (r != sizeof(hdr))
+               return -EINVAL;
+
+       /* We dont need eh_frame_ptr, just skip it. */
+       dw_read_encoded_value(enc, end, hdr.eh_frame_ptr_enc);
+
+       *fde_count  = dw_read_encoded_value(enc, end, hdr.fde_count_enc);
+       *segbase    = offset;
+       *table_data = (enc - (u8 *) &hdr) + offset;
+       return 0;
+}
+
+static int read_unwind_spec_eh_frame(struct dso *dso, struct machine *machine,
+                                    u64 *table_data, u64 *segbase,
+                                    u64 *fde_count)
+{
+       int ret = -EINVAL, fd;
+       u64 offset = dso->data.eh_frame_hdr_offset;
+
+       if (offset == 0) {
+               fd = dso__data_get_fd(dso, machine);
+               if (fd < 0)
+                       return -EINVAL;
+
+               /* Check the .eh_frame section for unwinding info */
+               offset = elf_section_offset(fd, ".eh_frame_hdr");
+               dso->data.eh_frame_hdr_offset = offset;
+               dso__data_put_fd(dso);
+       }
+
+       if (offset)
+               ret = unwind_spec_ehframe(dso, machine, offset,
+                                         table_data, segbase,
+                                         fde_count);
+
+       return ret;
+}
+
+#ifndef NO_LIBUNWIND_DEBUG_FRAME
+static int read_unwind_spec_debug_frame(struct dso *dso,
+                                       struct machine *machine, u64 *offset)
+{
+       int fd;
+       u64 ofs = dso->data.debug_frame_offset;
+
+       if (ofs == 0) {
+               fd = dso__data_get_fd(dso, machine);
+               if (fd < 0)
+                       return -EINVAL;
+
+               /* Check the .debug_frame section for unwinding info */
+               ofs = elf_section_offset(fd, ".debug_frame");
+               dso->data.debug_frame_offset = ofs;
+               dso__data_put_fd(dso);
+       }
+
+       *offset = ofs;
+       if (*offset)
+               return 0;
+
+       return -EINVAL;
+}
+#endif
+
+static struct map *find_map(unw_word_t ip, struct unwind_info *ui)
+{
+       struct addr_location al;
+
+       thread__find_addr_map(ui->thread, PERF_RECORD_MISC_USER,
+                             MAP__FUNCTION, ip, &al);
+       if (!al.map) {
+               /*
+                * We've seen cases (softice) where DWARF unwinder went
+                * through non executable mmaps, which we need to lookup
+                * in MAP__VARIABLE tree.
+                */
+               thread__find_addr_map(ui->thread, PERF_RECORD_MISC_USER,
+                                     MAP__VARIABLE, ip, &al);
+       }
+       return al.map;
+}
+
+static int
+find_proc_info(unw_addr_space_t as, unw_word_t ip, unw_proc_info_t *pi,
+              int need_unwind_info, void *arg)
+{
+       struct unwind_info *ui = arg;
+       struct map *map;
+       unw_dyn_info_t di;
+       u64 table_data, segbase, fde_count;
+       int ret = -EINVAL;
+
+       map = find_map(ip, ui);
+       if (!map || !map->dso)
+               return -EINVAL;
+
+       pr_debug("unwind: find_proc_info dso %s\n", map->dso->name);
+
+       /* Check the .eh_frame section for unwinding info */
+       if (!read_unwind_spec_eh_frame(map->dso, ui->machine,
+                                      &table_data, &segbase, &fde_count)) {
+               memset(&di, 0, sizeof(di));
+               di.format   = UNW_INFO_FORMAT_REMOTE_TABLE;
+               di.start_ip = map->start;
+               di.end_ip   = map->end;
+               di.u.rti.segbase    = map->start + segbase;
+               di.u.rti.table_data = map->start + table_data;
+               di.u.rti.table_len  = fde_count * sizeof(struct table_entry)
+                                     / sizeof(unw_word_t);
+               ret = dwarf_search_unwind_table(as, ip, &di, pi,
+                                               need_unwind_info, arg);
+       }
+
+#ifndef NO_LIBUNWIND_DEBUG_FRAME
+       /* Check the .debug_frame section for unwinding info */
+       if (ret < 0 &&
+           !read_unwind_spec_debug_frame(map->dso, ui->machine, &segbase)) {
+               int fd = dso__data_get_fd(map->dso, ui->machine);
+               int is_exec = elf_is_exec(fd, map->dso->name);
+               unw_word_t base = is_exec ? 0 : map->start;
+               const char *symfile;
+
+               if (fd >= 0)
+                       dso__data_put_fd(map->dso);
+
+               symfile = map->dso->symsrc_filename ?: map->dso->name;
+
+               memset(&di, 0, sizeof(di));
+               if (dwarf_find_debug_frame(0, &di, ip, base, symfile,
+                                          map->start, map->end))
+                       return dwarf_search_unwind_table(as, ip, &di, pi,
+                                                        need_unwind_info, arg);
+       }
+#endif
+
+       return ret;
+}
+
+static int access_fpreg(unw_addr_space_t __maybe_unused as,
+                       unw_regnum_t __maybe_unused num,
+                       unw_fpreg_t __maybe_unused *val,
+                       int __maybe_unused __write,
+                       void __maybe_unused *arg)
+{
+       pr_err("unwind: access_fpreg unsupported\n");
+       return -UNW_EINVAL;
+}
+
+static int get_dyn_info_list_addr(unw_addr_space_t __maybe_unused as,
+                                 unw_word_t __maybe_unused *dil_addr,
+                                 void __maybe_unused *arg)
+{
+       return -UNW_ENOINFO;
+}
+
+static int resume(unw_addr_space_t __maybe_unused as,
+                 unw_cursor_t __maybe_unused *cu,
+                 void __maybe_unused *arg)
+{
+       pr_err("unwind: resume unsupported\n");
+       return -UNW_EINVAL;
+}
+
+static int
+get_proc_name(unw_addr_space_t __maybe_unused as,
+             unw_word_t __maybe_unused addr,
+               char __maybe_unused *bufp, size_t __maybe_unused buf_len,
+               unw_word_t __maybe_unused *offp, void __maybe_unused *arg)
+{
+       pr_err("unwind: get_proc_name unsupported\n");
+       return -UNW_EINVAL;
+}
+
+static int access_dso_mem(struct unwind_info *ui, unw_word_t addr,
+                         unw_word_t *data)
+{
+       struct map *map;
+       ssize_t size;
+
+       map = find_map(addr, ui);
+       if (!map) {
+               pr_debug("unwind: no map for %lx\n", (unsigned long)addr);
+               return -1;
+       }
+
+       if (!map->dso)
+               return -1;
+
+       size = dso__data_read_addr(map->dso, map, ui->machine,
+                                  addr, (u8 *) data, sizeof(*data));
+
+       return !(size == sizeof(*data));
+}
+
+static int access_mem(unw_addr_space_t __maybe_unused as,
+                     unw_word_t addr, unw_word_t *valp,
+                     int __write, void *arg)
+{
+       struct unwind_info *ui = arg;
+       struct stack_dump *stack = &ui->sample->user_stack;
+       u64 start, end;
+       int offset;
+       int ret;
+
+       /* Don't support write, probably not needed. */
+       if (__write || !stack || !ui->sample->user_regs.regs) {
+               *valp = 0;
+               return 0;
+       }
+
+       ret = perf_reg_value(&start, &ui->sample->user_regs, PERF_REG_SP);
+       if (ret)
+               return ret;
+
+       end = start + stack->size;
+
+       /* Check overflow. */
+       if (addr + sizeof(unw_word_t) < addr)
+               return -EINVAL;
+
+       if (addr < start || addr + sizeof(unw_word_t) >= end) {
+               ret = access_dso_mem(ui, addr, valp);
+               if (ret) {
+                       pr_debug("unwind: access_mem %p not inside range"
+                                " 0x%" PRIx64 "-0x%" PRIx64 "\n",
+                                (void *) (uintptr_t) addr, start, end);
+                       *valp = 0;
+                       return ret;
+               }
+               return 0;
+       }
+
+       offset = addr - start;
+       *valp  = *(unw_word_t *)&stack->data[offset];
+       pr_debug("unwind: access_mem addr %p val %lx, offset %d\n",
+                (void *) (uintptr_t) addr, (unsigned long)*valp, offset);
+       return 0;
+}
+
+static int access_reg(unw_addr_space_t __maybe_unused as,
+                     unw_regnum_t regnum, unw_word_t *valp,
+                     int __write, void *arg)
+{
+       struct unwind_info *ui = arg;
+       int id, ret;
+       u64 val;
+
+       /* Don't support write, I suspect we don't need it. */
+       if (__write) {
+               pr_err("unwind: access_reg w %d\n", regnum);
+               return 0;
+       }
+
+       if (!ui->sample->user_regs.regs) {
+               *valp = 0;
+               return 0;
+       }
+
+       id = LIBUNWIND__ARCH_REG_ID(regnum);
+       if (id < 0)
+               return -EINVAL;
+
+       ret = perf_reg_value(&val, &ui->sample->user_regs, id);
+       if (ret) {
+               pr_err("unwind: can't read reg %d\n", regnum);
+               return ret;
+       }
+
+       *valp = (unw_word_t) val;
+       pr_debug("unwind: reg %d, val %lx\n", regnum, (unsigned long)*valp);
+       return 0;
+}
+
+static void put_unwind_info(unw_addr_space_t __maybe_unused as,
+                           unw_proc_info_t *pi __maybe_unused,
+                           void *arg __maybe_unused)
+{
+       pr_debug("unwind: put_unwind_info called\n");
+}
+
+static int entry(u64 ip, struct thread *thread,
+                unwind_entry_cb_t cb, void *arg)
+{
+       struct unwind_entry e;
+       struct addr_location al;
+
+       thread__find_addr_location(thread, PERF_RECORD_MISC_USER,
+                                  MAP__FUNCTION, ip, &al);
+
+       e.ip = ip;
+       e.map = al.map;
+       e.sym = al.sym;
+
+       pr_debug("unwind: %s:ip = 0x%" PRIx64 " (0x%" PRIx64 ")\n",
+                al.sym ? al.sym->name : "''",
+                ip,
+                al.map ? al.map->map_ip(al.map, ip) : (u64) 0);
+
+       return cb(&e, arg);
+}
+
+static void display_error(int err)
+{
+       switch (err) {
+       case UNW_EINVAL:
+               pr_err("unwind: Only supports local.\n");
+               break;
+       case UNW_EUNSPEC:
+               pr_err("unwind: Unspecified error.\n");
+               break;
+       case UNW_EBADREG:
+               pr_err("unwind: Register unavailable.\n");
+               break;
+       default:
+               break;
+       }
+}
+
+static unw_accessors_t accessors = {
+       .find_proc_info         = find_proc_info,
+       .put_unwind_info        = put_unwind_info,
+       .get_dyn_info_list_addr = get_dyn_info_list_addr,
+       .access_mem             = access_mem,
+       .access_reg             = access_reg,
+       .access_fpreg           = access_fpreg,
+       .resume                 = resume,
+       .get_proc_name          = get_proc_name,
+};
+
+static int _unwind__prepare_access(struct thread *thread)
+{
+       if (callchain_param.record_mode != CALLCHAIN_DWARF)
+               return 0;
+
+       thread->addr_space = unw_create_addr_space(&accessors, 0);
+       if (!thread->addr_space) {
+               pr_err("unwind: Can't create unwind address space.\n");
+               return -ENOMEM;
+       }
+
+       unw_set_caching_policy(thread->addr_space, UNW_CACHE_GLOBAL);
+       return 0;
+}
+
+static void _unwind__flush_access(struct thread *thread)
+{
+       if (callchain_param.record_mode != CALLCHAIN_DWARF)
+               return;
+
+       unw_flush_cache(thread->addr_space, 0, 0);
+}
+
+static void _unwind__finish_access(struct thread *thread)
+{
+       if (callchain_param.record_mode != CALLCHAIN_DWARF)
+               return;
+
+       unw_destroy_addr_space(thread->addr_space);
+}
+
+static int get_entries(struct unwind_info *ui, unwind_entry_cb_t cb,
+                      void *arg, int max_stack)
+{
+       u64 val;
+       unw_word_t ips[max_stack];
+       unw_addr_space_t addr_space;
+       unw_cursor_t c;
+       int ret, i = 0;
+
+       ret = perf_reg_value(&val, &ui->sample->user_regs, PERF_REG_IP);
+       if (ret)
+               return ret;
+
+       ips[i++] = (unw_word_t) val;
+
+       /*
+        * If we need more than one entry, do the DWARF
+        * unwind itself.
+        */
+       if (max_stack - 1 > 0) {
+               WARN_ONCE(!ui->thread, "WARNING: ui->thread is NULL");
+               addr_space = ui->thread->addr_space;
+
+               if (addr_space == NULL)
+                       return -1;
+
+               ret = unw_init_remote(&c, addr_space, ui);
+               if (ret)
+                       display_error(ret);
+
+               while (!ret && (unw_step(&c) > 0) && i < max_stack) {
+                       unw_get_reg(&c, UNW_REG_IP, &ips[i]);
+                       ++i;
+               }
+
+               max_stack = i;
+       }
+
+       /*
+        * Display what we got based on the order setup.
+        */
+       for (i = 0; i < max_stack && !ret; i++) {
+               int j = i;
+
+               if (callchain_param.order == ORDER_CALLER)
+                       j = max_stack - i - 1;
+               ret = ips[j] ? entry(ips[j], ui->thread, cb, arg) : 0;
+       }
+
+       return ret;
+}
+
+static int _unwind__get_entries(unwind_entry_cb_t cb, void *arg,
+                       struct thread *thread,
+                       struct perf_sample *data, int max_stack)
+{
+       struct unwind_info ui = {
+               .sample       = data,
+               .thread       = thread,
+               .machine      = thread->mg->machine,
+       };
+
+       if (!data->user_regs.regs)
+               return -EINVAL;
+
+       if (max_stack <= 0)
+               return -EINVAL;
+
+       return get_entries(&ui, cb, arg, max_stack);
+}
+
+static struct unwind_libunwind_ops
+_unwind_libunwind_ops = {
+       .prepare_access = _unwind__prepare_access,
+       .flush_access   = _unwind__flush_access,
+       .finish_access  = _unwind__finish_access,
+       .get_entries    = _unwind__get_entries,
+};
+
+#ifndef REMOTE_UNWIND_LIBUNWIND
+struct unwind_libunwind_ops *
+local_unwind_libunwind_ops = &_unwind_libunwind_ops;
+#endif
diff --git a/tools/perf/util/unwind-libunwind.c b/tools/perf/util/unwind-libunwind.c

index 63687d3..8547119 100644 (file)
--- a/tools/perf/util/unwind-libunwind.c
+++ b/tools/perf/util/unwind-libunwind.c
@@ -1,682 +1,76 @@
-/*
- * Post mortem Dwarf CFI based unwinding on top of regs and stack dumps.
- *
- * Lots of this code have been borrowed or heavily inspired from parts of
- * the libunwind 0.99 code which are (amongst other contributors I may have
- * forgotten):
- *
- * Copyright (C) 2002-2007 Hewlett-Packard Co
- *     Contributed by David Mosberger-Tang <davidm@hpl.hp.com>
- *
- * And the bugs have been added by:
- *
- * Copyright (C) 2010, Frederic Weisbecker <fweisbec@gmail.com>
- * Copyright (C) 2012, Jiri Olsa <jolsa@redhat.com>
- *
- */
-
-#include <elf.h>
-#include <gelf.h>
-#include <fcntl.h>
-#include <string.h>
-#include <unistd.h>
-#include <sys/mman.h>
-#include <linux/list.h>
-#include <libunwind.h>
-#include <libunwind-ptrace.h>
-#include "callchain.h"
+#include "unwind.h"
  #include "thread.h"
  #include "session.h"
-#include "perf_regs.h"
-#include "unwind.h"
-#include "symbol.h"
-#include "util.h"
  #include "debug.h"
-#include "asm/bug.h"
-
-extern int
-UNW_OBJ(dwarf_search_unwind_table) (unw_addr_space_t as,
-                                   unw_word_t ip,
-                                   unw_dyn_info_t *di,
-                                   unw_proc_info_t *pi,
-                                   int need_unwind_info, void *arg);
-
-#define dwarf_search_unwind_table UNW_OBJ(dwarf_search_unwind_table)
-
-extern int
-UNW_OBJ(dwarf_find_debug_frame) (int found, unw_dyn_info_t *di_debug,
-                                unw_word_t ip,
-                                unw_word_t segbase,
-                                const char *obj_name, unw_word_t start,
-                                unw_word_t end);
-
-#define dwarf_find_debug_frame UNW_OBJ(dwarf_find_debug_frame)
-
-#define DW_EH_PE_FORMAT_MASK   0x0f    /* format of the encoded value */
-#define DW_EH_PE_APPL_MASK     0x70    /* how the value is to be applied */
-
-/* Pointer-encoding formats: */
-#define DW_EH_PE_omit          0xff
-#define DW_EH_PE_ptr           0x00    /* pointer-sized unsigned value */
-#define DW_EH_PE_udata4                0x03    /* unsigned 32-bit value */
-#define DW_EH_PE_udata8                0x04    /* unsigned 64-bit value */
-#define DW_EH_PE_sdata4                0x0b    /* signed 32-bit value */
-#define DW_EH_PE_sdata8                0x0c    /* signed 64-bit value */
-
-/* Pointer-encoding application: */
-#define DW_EH_PE_absptr                0x00    /* absolute value */
-#define DW_EH_PE_pcrel         0x10    /* rel. to addr. of encoded value */
-
-/*
- * The following are not documented by LSB v1.3, yet they are used by
- * GCC, presumably they aren't documented by LSB since they aren't
- * used on Linux:
- */
-#define DW_EH_PE_funcrel       0x40    /* start-of-procedure-relative */
-#define DW_EH_PE_aligned       0x50    /* aligned pointer */
+#include "arch/common.h"
  
-/* Flags intentionaly not handled, since they're not needed:
- * #define DW_EH_PE_indirect      0x80
- * #define DW_EH_PE_uleb128       0x01
- * #define DW_EH_PE_udata2        0x02
- * #define DW_EH_PE_sleb128       0x09
- * #define DW_EH_PE_sdata2        0x0a
- * #define DW_EH_PE_textrel       0x20
- * #define DW_EH_PE_datarel       0x30
- */
+struct unwind_libunwind_ops __weak *local_unwind_libunwind_ops;
+struct unwind_libunwind_ops __weak *x86_32_unwind_libunwind_ops;
+struct unwind_libunwind_ops __weak *arm64_unwind_libunwind_ops;
  
-struct unwind_info {
-       struct perf_sample      *sample;
-       struct machine          *machine;
-       struct thread           *thread;
-};
-
-#define dw_read(ptr, type, end) ({     \
-       type *__p = (type *) ptr;       \
-       type  __v;                      \
-       if ((__p + 1) > (type *) end)   \
-               return -EINVAL;         \
-       __v = *__p++;                   \
-       ptr = (typeof(ptr)) __p;        \
-       __v;                            \
-       })
-
-static int __dw_read_encoded_value(u8 **p, u8 *end, u64 *val,
-                                  u8 encoding)
+static void unwind__register_ops(struct thread *thread,
+                         struct unwind_libunwind_ops *ops)
  {
-       u8 *cur = *p;
-       *val = 0;
-
-       switch (encoding) {
-       case DW_EH_PE_omit:
-               *val = 0;
-               goto out;
-       case DW_EH_PE_ptr:
-               *val = dw_read(cur, unsigned long, end);
-               goto out;
-       default:
-               break;
-       }
-
-       switch (encoding & DW_EH_PE_APPL_MASK) {
-       case DW_EH_PE_absptr:
-               break;
-       case DW_EH_PE_pcrel:
-               *val = (unsigned long) cur;
-               break;
-       default:
-               return -EINVAL;
-       }
-
-       if ((encoding & 0x07) == 0x00)
-               encoding |= DW_EH_PE_udata4;
-
-       switch (encoding & DW_EH_PE_FORMAT_MASK) {
-       case DW_EH_PE_sdata4:
-               *val += dw_read(cur, s32, end);
-               break;
-       case DW_EH_PE_udata4:
-               *val += dw_read(cur, u32, end);
-               break;
-       case DW_EH_PE_sdata8:
-               *val += dw_read(cur, s64, end);
-               break;
-       case DW_EH_PE_udata8:
-               *val += dw_read(cur, u64, end);
-               break;
-       default:
-               return -EINVAL;
-       }
-
- out:
-       *p = cur;
-       return 0;
-}
-
-#define dw_read_encoded_value(ptr, end, enc) ({                        \
-       u64 __v;                                                \
-       if (__dw_read_encoded_value(&ptr, end, &__v, enc)) {    \
-               return -EINVAL;                                 \
-       }                                                       \
-       __v;                                                    \
-       })
-
-static u64 elf_section_offset(int fd, const char *name)
-{
-       Elf *elf;
-       GElf_Ehdr ehdr;
-       GElf_Shdr shdr;
-       u64 offset = 0;
-
-       elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL);
-       if (elf == NULL)
-               return 0;
-
-       do {
-               if (gelf_getehdr(elf, &ehdr) == NULL)
-                       break;
-
-               if (!elf_section_by_name(elf, &ehdr, &shdr, name, NULL))
-                       break;
-
-               offset = shdr.sh_offset;
-       } while (0);
-
-       elf_end(elf);
-       return offset;
+       thread->unwind_libunwind_ops = ops;
  }
  
-#ifndef NO_LIBUNWIND_DEBUG_FRAME
-static int elf_is_exec(int fd, const char *name)
+int unwind__prepare_access(struct thread *thread, struct map *map)
  {
-       Elf *elf;
-       GElf_Ehdr ehdr;
-       int retval = 0;
+       const char *arch;
+       enum dso_type dso_type;
+       struct unwind_libunwind_ops *ops = local_unwind_libunwind_ops;
  
-       elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL);
-       if (elf == NULL)
+       if (thread->addr_space) {
+               pr_debug("unwind: thread map already set, dso=%s\n",
+                        map->dso->name);
                 return 0;
-       if (gelf_getehdr(elf, &ehdr) == NULL)
-               goto out;
-
-       retval = (ehdr.e_type == ET_EXEC);
-
-out:
-       elf_end(elf);
-       pr_debug("unwind: elf_is_exec(%s): %d\n", name, retval);
-       return retval;
-}
-#endif
-
-struct table_entry {
-       u32 start_ip_offset;
-       u32 fde_offset;
-};
-
-struct eh_frame_hdr {
-       unsigned char version;
-       unsigned char eh_frame_ptr_enc;
-       unsigned char fde_count_enc;
-       unsigned char table_enc;
-
-       /*
-        * The rest of the header is variable-length and consists of the
-        * following members:
-        *
-        *      encoded_t eh_frame_ptr;
-        *      encoded_t fde_count;
-        */
-
-       /* A single encoded pointer should not be more than 8 bytes. */
-       u64 enc[2];
-
-       /*
-        * struct {
-        *    encoded_t start_ip;
-        *    encoded_t fde_addr;
-        * } binary_search_table[fde_count];
-        */
-       char data[0];
-} __packed;
-
-static int unwind_spec_ehframe(struct dso *dso, struct machine *machine,
-                              u64 offset, u64 *table_data, u64 *segbase,
-                              u64 *fde_count)
-{
-       struct eh_frame_hdr hdr;
-       u8 *enc = (u8 *) &hdr.enc;
-       u8 *end = (u8 *) &hdr.data;
-       ssize_t r;
-
-       r = dso__data_read_offset(dso, machine, offset,
-                                 (u8 *) &hdr, sizeof(hdr));
-       if (r != sizeof(hdr))
-               return -EINVAL;
-
-       /* We dont need eh_frame_ptr, just skip it. */
-       dw_read_encoded_value(enc, end, hdr.eh_frame_ptr_enc);
-
-       *fde_count  = dw_read_encoded_value(enc, end, hdr.fde_count_enc);
-       *segbase    = offset;
-       *table_data = (enc - (u8 *) &hdr) + offset;
-       return 0;
-}
-
-static int read_unwind_spec_eh_frame(struct dso *dso, struct machine *machine,
-                                    u64 *table_data, u64 *segbase,
-                                    u64 *fde_count)
-{
-       int ret = -EINVAL, fd;
-       u64 offset = dso->data.eh_frame_hdr_offset;
-
-       if (offset == 0) {
-               fd = dso__data_get_fd(dso, machine);
-               if (fd < 0)
-                       return -EINVAL;
-
-               /* Check the .eh_frame section for unwinding info */
-               offset = elf_section_offset(fd, ".eh_frame_hdr");
-               dso->data.eh_frame_hdr_offset = offset;
-               dso__data_put_fd(dso);
         }
  
-       if (offset)
-               ret = unwind_spec_ehframe(dso, machine, offset,
-                                         table_data, segbase,
-                                         fde_count);
+       /* env->arch is NULL for live-mode (i.e. perf top) */
+       if (!thread->mg->machine->env || !thread->mg->machine->env->arch)
+               goto out_register;
  
-       return ret;
-}
-
-#ifndef NO_LIBUNWIND_DEBUG_FRAME
-static int read_unwind_spec_debug_frame(struct dso *dso,
-                                       struct machine *machine, u64 *offset)
-{
-       int fd;
-       u64 ofs = dso->data.debug_frame_offset;
-
-       if (ofs == 0) {
-               fd = dso__data_get_fd(dso, machine);
-               if (fd < 0)
-                       return -EINVAL;
-
-               /* Check the .debug_frame section for unwinding info */
-               ofs = elf_section_offset(fd, ".debug_frame");
-               dso->data.debug_frame_offset = ofs;
-               dso__data_put_fd(dso);
-       }
-
-       *offset = ofs;
-       if (*offset)
+       dso_type = dso__type(map->dso, thread->mg->machine);
+       if (dso_type == DSO__TYPE_UNKNOWN)
                 return 0;
  
-       return -EINVAL;
-}
-#endif
-
-static struct map *find_map(unw_word_t ip, struct unwind_info *ui)
-{
-       struct addr_location al;
-
-       thread__find_addr_map(ui->thread, PERF_RECORD_MISC_USER,
-                             MAP__FUNCTION, ip, &al);
-       if (!al.map) {
-               /*
-                * We've seen cases (softice) where DWARF unwinder went
-                * through non executable mmaps, which we need to lookup
-                * in MAP__VARIABLE tree.
-                */
-               thread__find_addr_map(ui->thread, PERF_RECORD_MISC_USER,
-                                     MAP__VARIABLE, ip, &al);
-       }
-       return al.map;
-}
-
-static int
-find_proc_info(unw_addr_space_t as, unw_word_t ip, unw_proc_info_t *pi,
-              int need_unwind_info, void *arg)
-{
-       struct unwind_info *ui = arg;
-       struct map *map;
-       unw_dyn_info_t di;
-       u64 table_data, segbase, fde_count;
-       int ret = -EINVAL;
-
-       map = find_map(ip, ui);
-       if (!map || !map->dso)
-               return -EINVAL;
-
-       pr_debug("unwind: find_proc_info dso %s\n", map->dso->name);
-
-       /* Check the .eh_frame section for unwinding info */
-       if (!read_unwind_spec_eh_frame(map->dso, ui->machine,
-                                      &table_data, &segbase, &fde_count)) {
-               memset(&di, 0, sizeof(di));
-               di.format   = UNW_INFO_FORMAT_REMOTE_TABLE;
-               di.start_ip = map->start;
-               di.end_ip   = map->end;
-               di.u.rti.segbase    = map->start + segbase;
-               di.u.rti.table_data = map->start + table_data;
-               di.u.rti.table_len  = fde_count * sizeof(struct table_entry)
-                                     / sizeof(unw_word_t);
-               ret = dwarf_search_unwind_table(as, ip, &di, pi,
-                                               need_unwind_info, arg);
-       }
-
-#ifndef NO_LIBUNWIND_DEBUG_FRAME
-       /* Check the .debug_frame section for unwinding info */
-       if (ret < 0 &&
-           !read_unwind_spec_debug_frame(map->dso, ui->machine, &segbase)) {
-               int fd = dso__data_get_fd(map->dso, ui->machine);
-               int is_exec = elf_is_exec(fd, map->dso->name);
-               unw_word_t base = is_exec ? 0 : map->start;
-               const char *symfile;
-
-               if (fd >= 0)
-                       dso__data_put_fd(map->dso);
-
-               symfile = map->dso->symsrc_filename ?: map->dso->name;
-
-               memset(&di, 0, sizeof(di));
-               if (dwarf_find_debug_frame(0, &di, ip, base, symfile,
-                                          map->start, map->end))
-                       return dwarf_search_unwind_table(as, ip, &di, pi,
-                                                        need_unwind_info, arg);
-       }
-#endif
-
-       return ret;
-}
-
-static int access_fpreg(unw_addr_space_t __maybe_unused as,
-                       unw_regnum_t __maybe_unused num,
-                       unw_fpreg_t __maybe_unused *val,
-                       int __maybe_unused __write,
-                       void __maybe_unused *arg)
-{
-       pr_err("unwind: access_fpreg unsupported\n");
-       return -UNW_EINVAL;
-}
-
-static int get_dyn_info_list_addr(unw_addr_space_t __maybe_unused as,
-                                 unw_word_t __maybe_unused *dil_addr,
-                                 void __maybe_unused *arg)
-{
-       return -UNW_ENOINFO;
-}
-
-static int resume(unw_addr_space_t __maybe_unused as,
-                 unw_cursor_t __maybe_unused *cu,
-                 void __maybe_unused *arg)
-{
-       pr_err("unwind: resume unsupported\n");
-       return -UNW_EINVAL;
-}
+       arch = normalize_arch(thread->mg->machine->env->arch);
  
-static int
-get_proc_name(unw_addr_space_t __maybe_unused as,
-             unw_word_t __maybe_unused addr,
-               char __maybe_unused *bufp, size_t __maybe_unused buf_len,
-               unw_word_t __maybe_unused *offp, void __maybe_unused *arg)
-{
-       pr_err("unwind: get_proc_name unsupported\n");
-       return -UNW_EINVAL;
-}
-
-static int access_dso_mem(struct unwind_info *ui, unw_word_t addr,
-                         unw_word_t *data)
-{
-       struct map *map;
-       ssize_t size;
-
-       map = find_map(addr, ui);
-       if (!map) {
-               pr_debug("unwind: no map for %lx\n", (unsigned long)addr);
-               return -1;
+       if (!strcmp(arch, "x86")) {
+               if (dso_type != DSO__TYPE_64BIT)
+                       ops = x86_32_unwind_libunwind_ops;
+       } else if (!strcmp(arch, "arm64") || !strcmp(arch, "arm")) {
+               if (dso_type == DSO__TYPE_64BIT)
+                       ops = arm64_unwind_libunwind_ops;
         }
  
-       if (!map->dso)
+       if (!ops) {
+               pr_err("unwind: target platform=%s is not supported\n", arch);
                 return -1;
-
-       size = dso__data_read_addr(map->dso, map, ui->machine,
-                                  addr, (u8 *) data, sizeof(*data));
-
-       return !(size == sizeof(*data));
-}
-
-static int access_mem(unw_addr_space_t __maybe_unused as,
-                     unw_word_t addr, unw_word_t *valp,
-                     int __write, void *arg)
-{
-       struct unwind_info *ui = arg;
-       struct stack_dump *stack = &ui->sample->user_stack;
-       u64 start, end;
-       int offset;
-       int ret;
-
-       /* Don't support write, probably not needed. */
-       if (__write || !stack || !ui->sample->user_regs.regs) {
-               *valp = 0;
-               return 0;
-       }
-
-       ret = perf_reg_value(&start, &ui->sample->user_regs, PERF_REG_SP);
-       if (ret)
-               return ret;
-
-       end = start + stack->size;
-
-       /* Check overflow. */
-       if (addr + sizeof(unw_word_t) < addr)
-               return -EINVAL;
-
-       if (addr < start || addr + sizeof(unw_word_t) >= end) {
-               ret = access_dso_mem(ui, addr, valp);
-               if (ret) {
-                       pr_debug("unwind: access_mem %p not inside range"
-                                " 0x%" PRIx64 "-0x%" PRIx64 "\n",
-                                (void *) (uintptr_t) addr, start, end);
-                       *valp = 0;
-                       return ret;
-               }
-               return 0;
-       }
-
-       offset = addr - start;
-       *valp  = *(unw_word_t *)&stack->data[offset];
-       pr_debug("unwind: access_mem addr %p val %lx, offset %d\n",
-                (void *) (uintptr_t) addr, (unsigned long)*valp, offset);
-       return 0;
-}
-
-static int access_reg(unw_addr_space_t __maybe_unused as,
-                     unw_regnum_t regnum, unw_word_t *valp,
-                     int __write, void *arg)
-{
-       struct unwind_info *ui = arg;
-       int id, ret;
-       u64 val;
-
-       /* Don't support write, I suspect we don't need it. */
-       if (__write) {
-               pr_err("unwind: access_reg w %d\n", regnum);
-               return 0;
-       }
-
-       if (!ui->sample->user_regs.regs) {
-               *valp = 0;
-               return 0;
-       }
-
-       id = libunwind__arch_reg_id(regnum);
-       if (id < 0)
-               return -EINVAL;
-
-       ret = perf_reg_value(&val, &ui->sample->user_regs, id);
-       if (ret) {
-               pr_err("unwind: can't read reg %d\n", regnum);
-               return ret;
-       }
-
-       *valp = (unw_word_t) val;
-       pr_debug("unwind: reg %d, val %lx\n", regnum, (unsigned long)*valp);
-       return 0;
-}
-
-static void put_unwind_info(unw_addr_space_t __maybe_unused as,
-                           unw_proc_info_t *pi __maybe_unused,
-                           void *arg __maybe_unused)
-{
-       pr_debug("unwind: put_unwind_info called\n");
-}
-
-static int entry(u64 ip, struct thread *thread,
-                unwind_entry_cb_t cb, void *arg)
-{
-       struct unwind_entry e;
-       struct addr_location al;
-
-       thread__find_addr_location(thread, PERF_RECORD_MISC_USER,
-                                  MAP__FUNCTION, ip, &al);
-
-       e.ip = ip;
-       e.map = al.map;
-       e.sym = al.sym;
-
-       pr_debug("unwind: %s:ip = 0x%" PRIx64 " (0x%" PRIx64 ")\n",
-                al.sym ? al.sym->name : "''",
-                ip,
-                al.map ? al.map->map_ip(al.map, ip) : (u64) 0);
-
-       return cb(&e, arg);
-}
-
-static void display_error(int err)
-{
-       switch (err) {
-       case UNW_EINVAL:
-               pr_err("unwind: Only supports local.\n");
-               break;
-       case UNW_EUNSPEC:
-               pr_err("unwind: Unspecified error.\n");
-               break;
-       case UNW_EBADREG:
-               pr_err("unwind: Register unavailable.\n");
-               break;
-       default:
-               break;
-       }
-}
-
-static unw_accessors_t accessors = {
-       .find_proc_info         = find_proc_info,
-       .put_unwind_info        = put_unwind_info,
-       .get_dyn_info_list_addr = get_dyn_info_list_addr,
-       .access_mem             = access_mem,
-       .access_reg             = access_reg,
-       .access_fpreg           = access_fpreg,
-       .resume                 = resume,
-       .get_proc_name          = get_proc_name,
-};
-
-int unwind__prepare_access(struct thread *thread)
-{
-       if (callchain_param.record_mode != CALLCHAIN_DWARF)
-               return 0;
-
-       thread->addr_space = unw_create_addr_space(&accessors, 0);
-       if (!thread->addr_space) {
-               pr_err("unwind: Can't create unwind address space.\n");
-               return -ENOMEM;
         }
+out_register:
+       unwind__register_ops(thread, ops);
  
-       unw_set_caching_policy(thread->addr_space, UNW_CACHE_GLOBAL);
-       return 0;
+       return thread->unwind_libunwind_ops->prepare_access(thread);
  }
  
  void unwind__flush_access(struct thread *thread)
  {
-       if (callchain_param.record_mode != CALLCHAIN_DWARF)
-               return;
-
-       unw_flush_cache(thread->addr_space, 0, 0);
+       if (thread->unwind_libunwind_ops)
+               thread->unwind_libunwind_ops->flush_access(thread);
  }
  
  void unwind__finish_access(struct thread *thread)
  {
-       if (callchain_param.record_mode != CALLCHAIN_DWARF)
-               return;
-
-       unw_destroy_addr_space(thread->addr_space);
-}
-
-static int get_entries(struct unwind_info *ui, unwind_entry_cb_t cb,
-                      void *arg, int max_stack)
-{
-       u64 val;
-       unw_word_t ips[max_stack];
-       unw_addr_space_t addr_space;
-       unw_cursor_t c;
-       int ret, i = 0;
-
-       ret = perf_reg_value(&val, &ui->sample->user_regs, PERF_REG_IP);
-       if (ret)
-               return ret;
-
-       ips[i++] = (unw_word_t) val;
-
-       /*
-        * If we need more than one entry, do the DWARF
-        * unwind itself.
-        */
-       if (max_stack - 1 > 0) {
-               WARN_ONCE(!ui->thread, "WARNING: ui->thread is NULL");
-               addr_space = ui->thread->addr_space;
-
-               if (addr_space == NULL)
-                       return -1;
-
-               ret = unw_init_remote(&c, addr_space, ui);
-               if (ret)
-                       display_error(ret);
-
-               while (!ret && (unw_step(&c) > 0) && i < max_stack) {
-                       unw_get_reg(&c, UNW_REG_IP, &ips[i]);
-                       ++i;
-               }
-
-               max_stack = i;
-       }
-
-       /*
-        * Display what we got based on the order setup.
-        */
-       for (i = 0; i < max_stack && !ret; i++) {
-               int j = i;
-
-               if (callchain_param.order == ORDER_CALLER)
-                       j = max_stack - i - 1;
-               ret = ips[j] ? entry(ips[j], ui->thread, cb, arg) : 0;
-       }
-
-       return ret;
+       if (thread->unwind_libunwind_ops)
+               thread->unwind_libunwind_ops->finish_access(thread);
  }
  
  int unwind__get_entries(unwind_entry_cb_t cb, void *arg,
-                       struct thread *thread,
-                       struct perf_sample *data, int max_stack)
+                        struct thread *thread,
+                        struct perf_sample *data, int max_stack)
  {
-       struct unwind_info ui = {
-               .sample       = data,
-               .thread       = thread,
-               .machine      = thread->mg->machine,
-       };
-
-       if (!data->user_regs.regs)
-               return -EINVAL;
-
-       if (max_stack <= 0)
-               return -EINVAL;
-
-       return get_entries(&ui, cb, arg, max_stack);
+       if (thread->unwind_libunwind_ops)
+               return thread->unwind_libunwind_ops->get_entries(cb, arg, thread, data, max_stack);
+       return 0;
  }
diff --git a/tools/perf/util/unwind.h b/tools/perf/util/unwind.h

index 12790cf..b074662 100644 (file)
--- a/tools/perf/util/unwind.h
+++ b/tools/perf/util/unwind.h
@@ -14,18 +14,31 @@ struct unwind_entry {
  
  typedef int (*unwind_entry_cb_t)(struct unwind_entry *entry, void *arg);
  
+struct unwind_libunwind_ops {
+       int (*prepare_access)(struct thread *thread);
+       void (*flush_access)(struct thread *thread);
+       void (*finish_access)(struct thread *thread);
+       int (*get_entries)(unwind_entry_cb_t cb, void *arg,
+                          struct thread *thread,
+                          struct perf_sample *data, int max_stack);
+};
+
  #ifdef HAVE_DWARF_UNWIND_SUPPORT
  int unwind__get_entries(unwind_entry_cb_t cb, void *arg,
                         struct thread *thread,
                         struct perf_sample *data, int max_stack);
  /* libunwind specific */
  #ifdef HAVE_LIBUNWIND_SUPPORT
-int libunwind__arch_reg_id(int regnum);
-int unwind__prepare_access(struct thread *thread);
+#ifndef LIBUNWIND__ARCH_REG_ID
+#define LIBUNWIND__ARCH_REG_ID(regnum) libunwind__arch_reg_id(regnum)
+#endif
+int LIBUNWIND__ARCH_REG_ID(int regnum);
+int unwind__prepare_access(struct thread *thread, struct map *map);
  void unwind__flush_access(struct thread *thread);
  void unwind__finish_access(struct thread *thread);
  #else
-static inline int unwind__prepare_access(struct thread *thread __maybe_unused)
+static inline int unwind__prepare_access(struct thread *thread __maybe_unused,
+                                        struct map *map __maybe_unused)
  {
         return 0;
  }
@@ -44,7 +57,8 @@ unwind__get_entries(unwind_entry_cb_t cb __maybe_unused,
         return 0;
  }
  
-static inline int unwind__prepare_access(struct thread *thread __maybe_unused)
+static inline int unwind__prepare_access(struct thread *thread __maybe_unused,
+                                        struct map *map __maybe_unused)
  {
         return 0;
  }
author	Ingo Molnar <mingo@kernel.org>
	Tue, 14 Jun 2016 09:14:34 +0000 (11:14 +0200)
committer	Ingo Molnar <mingo@kernel.org>
	Tue, 14 Jun 2016 09:14:34 +0000 (11:14 +0200)
arch/x86/events/core.c		patch \| blob \| history
arch/x86/events/intel/core.c		patch \| blob \| history
arch/x86/events/intel/cstate.c		patch \| blob \| history
arch/x86/events/intel/rapl.c		patch \| blob \| history
arch/x86/events/intel/uncore.c		patch \| blob \| history
arch/x86/events/intel/uncore.h		patch \| blob \| history
arch/x86/events/intel/uncore_snbep.c		patch \| blob \| history
arch/x86/events/msr.c		patch \| blob \| history
arch/x86/events/perf_event.h		patch \| blob \| history
arch/x86/include/asm/topology.h		patch \| blob \| history
arch/x86/kernel/smpboot.c		patch \| blob \| history
drivers/platform/x86/intel_pmc_core.c		patch \| blob \| history
include/linux/perf_event.h		patch \| blob \| history
include/uapi/linux/perf_event.h		patch \| blob \| history
kernel/bpf/stackmap.c		patch \| blob \| history
kernel/events/callchain.c		patch \| blob \| history
kernel/events/core.c		patch \| blob \| history
tools/lib/api/Makefile		patch \| blob \| history
tools/lib/api/fd/array.c		patch \| blob \| history
tools/lib/api/fd/array.h		patch \| blob \| history
tools/lib/bpf/libbpf.c		patch \| blob \| history
tools/lib/bpf/libbpf.h		patch \| blob \| history
tools/perf/.gitignore		patch \| blob \| history
tools/perf/Documentation/perf-stat.txt		patch \| blob \| history
tools/perf/arch/arm/util/Build		patch \| blob \| history
tools/perf/arch/arm64/util/Build		patch \| blob \| history
tools/perf/arch/arm64/util/unwind-libunwind.c		patch \| blob \| history
tools/perf/arch/common.c		patch \| blob \| history
tools/perf/arch/common.h		patch \| blob \| history
tools/perf/arch/x86/util/Build		patch \| blob \| history
tools/perf/arch/x86/util/group.c	[new file with mode: 0644]	patch \| blob
tools/perf/arch/x86/util/tsc.c		patch \| blob \| history
tools/perf/arch/x86/util/unwind-libunwind.c		patch \| blob \| history
tools/perf/builtin-record.c		patch \| blob \| history
tools/perf/builtin-script.c		patch \| blob \| history
tools/perf/builtin-stat.c		patch \| blob \| history
tools/perf/config/Makefile		patch \| blob \| history
tools/perf/tests/fdarray.c		patch \| blob \| history
tools/perf/tests/parse-events.c		patch \| blob \| history
tools/perf/util/Build		patch \| blob \| history
tools/perf/util/bpf-loader.c		patch \| blob \| history
tools/perf/util/build-id.c		patch \| blob \| history
tools/perf/util/build-id.h		patch \| blob \| history
tools/perf/util/callchain.h		patch \| blob \| history
tools/perf/util/config.c		patch \| blob \| history
tools/perf/util/db-export.c		patch \| blob \| history
tools/perf/util/dso.h		patch \| blob \| history
tools/perf/util/evlist.c		patch \| blob \| history
tools/perf/util/evlist.h		patch \| blob \| history
tools/perf/util/evsel.c		patch \| blob \| history
tools/perf/util/evsel.h		patch \| blob \| history
tools/perf/util/group.h	[new file with mode: 0644]	patch \| blob
tools/perf/util/libunwind/arm64.c	[new file with mode: 0644]	patch \| blob
tools/perf/util/libunwind/x86_32.c	[new file with mode: 0644]	patch \| blob
tools/perf/util/machine.c		patch \| blob \| history
tools/perf/util/parse-events.c		patch \| blob \| history
tools/perf/util/parse-events.h		patch \| blob \| history
tools/perf/util/parse-events.l		patch \| blob \| history
tools/perf/util/session.c		patch \| blob \| history
tools/perf/util/stat-shadow.c		patch \| blob \| history
tools/perf/util/stat.c		patch \| blob \| history
tools/perf/util/stat.h		patch \| blob \| history
tools/perf/util/symbol.c		patch \| blob \| history
tools/perf/util/thread.c		patch \| blob \| history
tools/perf/util/thread.h		patch \| blob \| history
tools/perf/util/unwind-libunwind-local.c	[new file with mode: 0644]	patch \| blob
tools/perf/util/unwind-libunwind.c		patch \| blob \| history
tools/perf/util/unwind.h		patch \| blob \| history