Merge tag 'perf-core-for-mingo-20160606' of git://git.kernel.org/pub/scm/linux/kernel...
authorIngo Molnar <mingo@kernel.org>
Wed, 8 Jun 2016 07:29:23 +0000 (09:29 +0200)
committerIngo Molnar <mingo@kernel.org>
Wed, 8 Jun 2016 07:29:23 +0000 (09:29 +0200)
Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:

User visible changes:

- Tooling support for TopDown counters, recently added to the kernel (Andi Kleen)

- Show call graphs in 'perf script' when 1st event doesn't have it but some other has (He Kuang)

- Fix terminal cleanup when handling invalid .perfconfig files in 'perf top' (Taeung Song)

Build fixes:

- Respect CROSS_COMPILE for the linker in libapi (Lucas Stach)

Infrastructure changes:

- Fix perf_evlist__alloc_mmap() failure path (Wang Nan)

- Provide way to extract integer value from format_field (Arnaldo Carvalho de Melo)

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
16 files changed:
tools/lib/api/Makefile
tools/perf/Documentation/perf-stat.txt
tools/perf/arch/x86/util/Build
tools/perf/arch/x86/util/group.c [new file with mode: 0644]
tools/perf/builtin-script.c
tools/perf/builtin-stat.c
tools/perf/tests/parse-events.c
tools/perf/util/config.c
tools/perf/util/evlist.c
tools/perf/util/evsel.c
tools/perf/util/evsel.h
tools/perf/util/group.h [new file with mode: 0644]
tools/perf/util/parse-events.l
tools/perf/util/stat-shadow.c
tools/perf/util/stat.c
tools/perf/util/stat.h

index 316f308..67ff93e 100644 (file)
@@ -10,6 +10,7 @@ endif
 
 CC = $(CROSS_COMPILE)gcc
 AR = $(CROSS_COMPILE)ar
+LD = $(CROSS_COMPILE)ld
 
 MAKEFLAGS += --no-print-directory
 
index 04f23b4..d96ccd4 100644 (file)
@@ -204,6 +204,38 @@ Aggregate counts per physical processor for system-wide mode measurements.
 --no-aggr::
 Do not aggregate counts across all monitored CPUs.
 
+--topdown::
+Print top down level 1 metrics if supported by the CPU. This allows to
+determine bottle necks in the CPU pipeline for CPU bound workloads,
+by breaking the cycles consumed down into frontend bound, backend bound,
+bad speculation and retiring.
+
+Frontend bound means that the CPU cannot fetch and decode instructions fast
+enough. Backend bound means that computation or memory access is the bottle
+neck. Bad Speculation means that the CPU wasted cycles due to branch
+mispredictions and similar issues. Retiring means that the CPU computed without
+an apparently bottleneck. The bottleneck is only the real bottleneck
+if the workload is actually bound by the CPU and not by something else.
+
+For best results it is usually a good idea to use it with interval
+mode like -I 1000, as the bottleneck of workloads can change often.
+
+The top down metrics are collected per core instead of per
+CPU thread. Per core mode is automatically enabled
+and -a (global monitoring) is needed, requiring root rights or
+perf.perf_event_paranoid=-1.
+
+Topdown uses the full Performance Monitoring Unit, and needs
+disabling of the NMI watchdog (as root):
+echo 0 > /proc/sys/kernel/nmi_watchdog
+for best results. Otherwise the bottlenecks may be inconsistent
+on workload with changing phases.
+
+This enables --metric-only, unless overriden with --no-metric-only.
+
+To interpret the results it is usually needed to know on which
+CPUs the workload runs on. If needed the CPUs can be forced using
+taskset.
 
 EXAMPLES
 --------
index 4659703..4cd8a16 100644 (file)
@@ -3,6 +3,7 @@ libperf-y += tsc.o
 libperf-y += pmu.o
 libperf-y += kvm-stat.o
 libperf-y += perf_regs.o
+libperf-y += group.o
 
 libperf-$(CONFIG_DWARF) += dwarf-regs.o
 libperf-$(CONFIG_BPF_PROLOGUE) += dwarf-regs.o
diff --git a/tools/perf/arch/x86/util/group.c b/tools/perf/arch/x86/util/group.c
new file mode 100644 (file)
index 0000000..37f92aa
--- /dev/null
@@ -0,0 +1,27 @@
+#include <stdio.h>
+#include "api/fs/fs.h"
+#include "util/group.h"
+
+/*
+ * Check whether we can use a group for top down.
+ * Without a group may get bad results due to multiplexing.
+ */
+bool arch_topdown_check_group(bool *warn)
+{
+       int n;
+
+       if (sysctl__read_int("kernel/nmi_watchdog", &n) < 0)
+               return false;
+       if (n > 0) {
+               *warn = true;
+               return false;
+       }
+       return true;
+}
+
+void arch_topdown_group_warn(void)
+{
+       fprintf(stderr,
+               "nmi_watchdog enabled with topdown. May give wrong results.\n"
+               "Disable with echo 0 > /proc/sys/kernel/nmi_watchdog\n");
+}
index e3ce2f3..4601123 100644 (file)
@@ -339,7 +339,7 @@ static void set_print_ip_opts(struct perf_event_attr *attr)
  */
 static int perf_session__check_output_opt(struct perf_session *session)
 {
-       int j;
+       unsigned int j;
        struct perf_evsel *evsel;
 
        for (j = 0; j < PERF_TYPE_MAX; ++j) {
@@ -388,17 +388,20 @@ static int perf_session__check_output_opt(struct perf_session *session)
                struct perf_event_attr *attr;
 
                j = PERF_TYPE_TRACEPOINT;
-               evsel = perf_session__find_first_evtype(session, j);
-               if (evsel == NULL)
-                       goto out;
 
-               attr = &evsel->attr;
+               evlist__for_each(session->evlist, evsel) {
+                       if (evsel->attr.type != j)
+                               continue;
+
+                       attr = &evsel->attr;
 
-               if (attr->sample_type & PERF_SAMPLE_CALLCHAIN) {
-                       output[j].fields |= PERF_OUTPUT_IP;
-                       output[j].fields |= PERF_OUTPUT_SYM;
-                       output[j].fields |= PERF_OUTPUT_DSO;
-                       set_print_ip_opts(attr);
+                       if (attr->sample_type & PERF_SAMPLE_CALLCHAIN) {
+                               output[j].fields |= PERF_OUTPUT_IP;
+                               output[j].fields |= PERF_OUTPUT_SYM;
+                               output[j].fields |= PERF_OUTPUT_DSO;
+                               set_print_ip_opts(attr);
+                               goto out;
+                       }
                }
        }
 
index ee7ada7..dff6373 100644 (file)
 #include "util/thread.h"
 #include "util/thread_map.h"
 #include "util/counts.h"
+#include "util/group.h"
 #include "util/session.h"
 #include "util/tool.h"
+#include "util/group.h"
 #include "asm/bug.h"
 
+#include <api/fs/fs.h>
 #include <stdlib.h>
 #include <sys/prctl.h>
 #include <locale.h>
@@ -98,6 +101,15 @@ static const char * transaction_limited_attrs = {
        "}"
 };
 
+static const char * topdown_attrs[] = {
+       "topdown-total-slots",
+       "topdown-slots-retired",
+       "topdown-recovery-bubbles",
+       "topdown-fetch-bubbles",
+       "topdown-slots-issued",
+       NULL,
+};
+
 static struct perf_evlist      *evsel_list;
 
 static struct target target = {
@@ -112,6 +124,7 @@ static volatile pid_t               child_pid                       = -1;
 static bool                    null_run                        =  false;
 static int                     detailed_run                    =  0;
 static bool                    transaction_run;
+static bool                    topdown_run                     = false;
 static bool                    big_num                         =  true;
 static int                     big_num_opt                     =  -1;
 static const char              *csv_sep                        = NULL;
@@ -124,6 +137,7 @@ static unsigned int         initial_delay                   = 0;
 static unsigned int            unit_width                      = 4; /* strlen("unit") */
 static bool                    forever                         = false;
 static bool                    metric_only                     = false;
+static bool                    force_metric_only               = false;
 static struct timespec         ref_time;
 static struct cpu_map          *aggr_map;
 static aggr_get_id_t           aggr_get_id;
@@ -1302,7 +1316,15 @@ static int aggr_header_lens[] = {
        [AGGR_GLOBAL] = 0,
 };
 
-static void print_metric_headers(char *prefix)
+static const char *aggr_header_csv[] = {
+       [AGGR_CORE]     =       "core,cpus,",
+       [AGGR_SOCKET]   =       "socket,cpus",
+       [AGGR_NONE]     =       "cpu,",
+       [AGGR_THREAD]   =       "comm-pid,",
+       [AGGR_GLOBAL]   =       ""
+};
+
+static void print_metric_headers(const char *prefix, bool no_indent)
 {
        struct perf_stat_output_ctx out;
        struct perf_evsel *counter;
@@ -1313,9 +1335,15 @@ static void print_metric_headers(char *prefix)
        if (prefix)
                fprintf(stat_config.output, "%s", prefix);
 
-       if (!csv_output)
+       if (!csv_output && !no_indent)
                fprintf(stat_config.output, "%*s",
                        aggr_header_lens[stat_config.aggr_mode], "");
+       if (csv_output) {
+               if (stat_config.interval)
+                       fputs("time,", stat_config.output);
+               fputs(aggr_header_csv[stat_config.aggr_mode],
+                       stat_config.output);
+       }
 
        /* Print metrics headers only */
        evlist__for_each(evsel_list, counter) {
@@ -1338,28 +1366,40 @@ static void print_interval(char *prefix, struct timespec *ts)
 
        sprintf(prefix, "%6lu.%09lu%s", ts->tv_sec, ts->tv_nsec, csv_sep);
 
-       if (num_print_interval == 0 && !csv_output && !metric_only) {
+       if (num_print_interval == 0 && !csv_output) {
                switch (stat_config.aggr_mode) {
                case AGGR_SOCKET:
-                       fprintf(output, "#           time socket cpus             counts %*s events\n", unit_width, "unit");
+                       fprintf(output, "#           time socket cpus");
+                       if (!metric_only)
+                               fprintf(output, "             counts %*s events\n", unit_width, "unit");
                        break;
                case AGGR_CORE:
-                       fprintf(output, "#           time core         cpus             counts %*s events\n", unit_width, "unit");
+                       fprintf(output, "#           time core         cpus");
+                       if (!metric_only)
+                               fprintf(output, "             counts %*s events\n", unit_width, "unit");
                        break;
                case AGGR_NONE:
-                       fprintf(output, "#           time CPU                counts %*s events\n", unit_width, "unit");
+                       fprintf(output, "#           time CPU");
+                       if (!metric_only)
+                               fprintf(output, "                counts %*s events\n", unit_width, "unit");
                        break;
                case AGGR_THREAD:
-                       fprintf(output, "#           time             comm-pid                  counts %*s events\n", unit_width, "unit");
+                       fprintf(output, "#           time             comm-pid");
+                       if (!metric_only)
+                               fprintf(output, "                  counts %*s events\n", unit_width, "unit");
                        break;
                case AGGR_GLOBAL:
                default:
-                       fprintf(output, "#           time             counts %*s events\n", unit_width, "unit");
+                       fprintf(output, "#           time");
+                       if (!metric_only)
+                               fprintf(output, "             counts %*s events\n", unit_width, "unit");
                case AGGR_UNSET:
                        break;
                }
        }
 
+       if (num_print_interval == 0 && metric_only)
+               print_metric_headers(" ", true);
        if (++num_print_interval == 25)
                num_print_interval = 0;
 }
@@ -1428,8 +1468,8 @@ static void print_counters(struct timespec *ts, int argc, const char **argv)
        if (metric_only) {
                static int num_print_iv;
 
-               if (num_print_iv == 0)
-                       print_metric_headers(prefix);
+               if (num_print_iv == 0 && !interval)
+                       print_metric_headers(prefix, false);
                if (num_print_iv++ == 25)
                        num_print_iv = 0;
                if (stat_config.aggr_mode == AGGR_GLOBAL && prefix)
@@ -1520,6 +1560,14 @@ static int stat__set_big_num(const struct option *opt __maybe_unused,
        return 0;
 }
 
+static int enable_metric_only(const struct option *opt __maybe_unused,
+                             const char *s __maybe_unused, int unset)
+{
+       force_metric_only = true;
+       metric_only = !unset;
+       return 0;
+}
+
 static const struct option stat_options[] = {
        OPT_BOOLEAN('T', "transaction", &transaction_run,
                    "hardware transaction statistics"),
@@ -1578,8 +1626,10 @@ static const struct option stat_options[] = {
                     "aggregate counts per thread", AGGR_THREAD),
        OPT_UINTEGER('D', "delay", &initial_delay,
                     "ms to wait before starting measurement after program start"),
-       OPT_BOOLEAN(0, "metric-only", &metric_only,
-                       "Only print computed metrics. No raw values"),
+       OPT_CALLBACK_NOOPT(0, "metric-only", &metric_only, NULL,
+                       "Only print computed metrics. No raw values", enable_metric_only),
+       OPT_BOOLEAN(0, "topdown", &topdown_run,
+                       "measure topdown level 1 statistics"),
        OPT_END()
 };
 
@@ -1772,12 +1822,62 @@ static int perf_stat_init_aggr_mode_file(struct perf_stat *st)
        return 0;
 }
 
+static int topdown_filter_events(const char **attr, char **str, bool use_group)
+{
+       int off = 0;
+       int i;
+       int len = 0;
+       char *s;
+
+       for (i = 0; attr[i]; i++) {
+               if (pmu_have_event("cpu", attr[i])) {
+                       len += strlen(attr[i]) + 1;
+                       attr[i - off] = attr[i];
+               } else
+                       off++;
+       }
+       attr[i - off] = NULL;
+
+       *str = malloc(len + 1 + 2);
+       if (!*str)
+               return -1;
+       s = *str;
+       if (i - off == 0) {
+               *s = 0;
+               return 0;
+       }
+       if (use_group)
+               *s++ = '{';
+       for (i = 0; attr[i]; i++) {
+               strcpy(s, attr[i]);
+               s += strlen(s);
+               *s++ = ',';
+       }
+       if (use_group) {
+               s[-1] = '}';
+               *s = 0;
+       } else
+               s[-1] = 0;
+       return 0;
+}
+
+__weak bool arch_topdown_check_group(bool *warn)
+{
+       *warn = false;
+       return false;
+}
+
+__weak void arch_topdown_group_warn(void)
+{
+}
+
 /*
  * Add default attributes, if there were no attributes specified or
  * if -d/--detailed, -d -d or -d -d -d is used:
  */
 static int add_default_attributes(void)
 {
+       int err;
        struct perf_event_attr default_attrs0[] = {
 
   { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK             },
@@ -1896,7 +1996,6 @@ static int add_default_attributes(void)
                return 0;
 
        if (transaction_run) {
-               int err;
                if (pmu_have_event("cpu", "cycles-ct") &&
                    pmu_have_event("cpu", "el-start"))
                        err = parse_events(evsel_list, transaction_attrs, NULL);
@@ -1909,6 +2008,46 @@ static int add_default_attributes(void)
                return 0;
        }
 
+       if (topdown_run) {
+               char *str = NULL;
+               bool warn = false;
+
+               if (stat_config.aggr_mode != AGGR_GLOBAL &&
+                   stat_config.aggr_mode != AGGR_CORE) {
+                       pr_err("top down event configuration requires --per-core mode\n");
+                       return -1;
+               }
+               stat_config.aggr_mode = AGGR_CORE;
+               if (nr_cgroups || !target__has_cpu(&target)) {
+                       pr_err("top down event configuration requires system-wide mode (-a)\n");
+                       return -1;
+               }
+
+               if (!force_metric_only)
+                       metric_only = true;
+               if (topdown_filter_events(topdown_attrs, &str,
+                               arch_topdown_check_group(&warn)) < 0) {
+                       pr_err("Out of memory\n");
+                       return -1;
+               }
+               if (topdown_attrs[0] && str) {
+                       if (warn)
+                               arch_topdown_group_warn();
+                       err = parse_events(evsel_list, str, NULL);
+                       if (err) {
+                               fprintf(stderr,
+                                       "Cannot set up top down events %s: %d\n",
+                                       str, err);
+                               free(str);
+                               return -1;
+                       }
+               } else {
+                       fprintf(stderr, "System does not support topdown\n");
+                       return -1;
+               }
+               free(str);
+       }
+
        if (!evsel_list->nr_entries) {
                if (target__has_cpu(&target))
                        default_attrs0[0].config = PERF_COUNT_SW_CPU_CLOCK;
index 7865f68..b2a2c74 100644 (file)
@@ -1783,8 +1783,8 @@ static int test_pmu_events(void)
                struct evlist_test e;
                char name[MAX_NAME];
 
-               if (!strcmp(ent->d_name, ".") ||
-                   !strcmp(ent->d_name, ".."))
+               /* Names containing . are special and cannot be used directly */
+               if (strchr(ent->d_name, '.'))
                        continue;
 
                snprintf(name, MAX_NAME, "cpu/event=%s/u", ent->d_name);
index dad7d82..c73f1c4 100644 (file)
@@ -275,7 +275,8 @@ static int perf_parse_file(config_fn_t fn, void *data)
                        break;
                }
        }
-       die("bad config file line %d in %s", config_linenr, config_file_name);
+       pr_err("bad config file line %d in %s\n", config_linenr, config_file_name);
+       return -1;
 }
 
 static int parse_unit_factor(const char *end, unsigned long *val)
@@ -479,16 +480,15 @@ static int perf_config_global(void)
 
 int perf_config(config_fn_t fn, void *data)
 {
-       int ret = 0, found = 0;
+       int ret = -1;
        const char *home = NULL;
 
        /* Setting $PERF_CONFIG makes perf read _only_ the given config file. */
        if (config_exclusive_filename)
                return perf_config_from_file(fn, config_exclusive_filename, data);
        if (perf_config_system() && !access(perf_etc_perfconfig(), R_OK)) {
-               ret += perf_config_from_file(fn, perf_etc_perfconfig(),
-                                           data);
-               found += 1;
+               if (perf_config_from_file(fn, perf_etc_perfconfig(), data) < 0)
+                       goto out;
        }
 
        home = getenv("HOME");
@@ -514,14 +514,12 @@ int perf_config(config_fn_t fn, void *data)
                if (!st.st_size)
                        goto out_free;
 
-               ret += perf_config_from_file(fn, user_config, data);
-               found += 1;
+               ret = perf_config_from_file(fn, user_config, data);
+
 out_free:
                free(user_config);
        }
 out:
-       if (found == 0)
-               return -1;
        return ret;
 }
 
@@ -609,8 +607,12 @@ static int collect_config(const char *var, const char *value,
        struct perf_config_section *section = NULL;
        struct perf_config_item *item = NULL;
        struct perf_config_set *set = perf_config_set;
-       struct list_head *sections = &set->sections;
+       struct list_head *sections;
+
+       if (set == NULL)
+               return -1;
 
+       sections = &set->sections;
        key = ptr = strdup(var);
        if (!key) {
                pr_debug("%s: strdup failed\n", __func__);
index e0f3094..1b918aa 100644 (file)
@@ -946,9 +946,12 @@ static int perf_evlist__alloc_mmap(struct perf_evlist *evlist)
        if (cpu_map__empty(evlist->cpus))
                evlist->nr_mmaps = thread_map__nr(evlist->threads);
        evlist->mmap = zalloc(evlist->nr_mmaps * sizeof(struct perf_mmap));
+       if (!evlist->mmap)
+               return -ENOMEM;
+
        for (i = 0; i < evlist->nr_mmaps; i++)
                evlist->mmap[i].fd = -1;
-       return evlist->mmap != NULL ? 0 : -ENOMEM;
+       return 0;
 }
 
 struct mmap_params {
index 18e18f1..9b2e3e6 100644 (file)
@@ -2251,17 +2251,11 @@ void *perf_evsel__rawptr(struct perf_evsel *evsel, struct perf_sample *sample,
        return sample->raw_data + offset;
 }
 
-u64 perf_evsel__intval(struct perf_evsel *evsel, struct perf_sample *sample,
-                      const char *name)
+u64 format_field__intval(struct format_field *field, struct perf_sample *sample,
+                        bool needs_swap)
 {
-       struct format_field *field = perf_evsel__field(evsel, name);
-       void *ptr;
        u64 value;
-
-       if (!field)
-               return 0;
-
-       ptr = sample->raw_data + field->offset;
+       void *ptr = sample->raw_data + field->offset;
 
        switch (field->size) {
        case 1:
@@ -2279,7 +2273,7 @@ u64 perf_evsel__intval(struct perf_evsel *evsel, struct perf_sample *sample,
                return 0;
        }
 
-       if (!evsel->needs_swap)
+       if (!needs_swap)
                return value;
 
        switch (field->size) {
@@ -2296,6 +2290,17 @@ u64 perf_evsel__intval(struct perf_evsel *evsel, struct perf_sample *sample,
        return 0;
 }
 
+u64 perf_evsel__intval(struct perf_evsel *evsel, struct perf_sample *sample,
+                      const char *name)
+{
+       struct format_field *field = perf_evsel__field(evsel, name);
+
+       if (!field)
+               return 0;
+
+       return field ? format_field__intval(field, sample, evsel->needs_swap) : 0;
+}
+
 bool perf_evsel__fallback(struct perf_evsel *evsel, int err,
                          char *msg, size_t msgsize)
 {
index 028412b..828ddd1 100644 (file)
@@ -261,6 +261,8 @@ static inline char *perf_evsel__strval(struct perf_evsel *evsel,
 
 struct format_field;
 
+u64 format_field__intval(struct format_field *field, struct perf_sample *sample, bool needs_swap);
+
 struct format_field *perf_evsel__field(struct perf_evsel *evsel, const char *name);
 
 #define perf_evsel__match(evsel, t, c)         \
diff --git a/tools/perf/util/group.h b/tools/perf/util/group.h
new file mode 100644 (file)
index 0000000..116debe
--- /dev/null
@@ -0,0 +1,7 @@
+#ifndef GROUP_H
+#define GROUP_H 1
+
+bool arch_topdown_check_group(bool *warn);
+void arch_topdown_group_warn(void);
+
+#endif
index 01af1ee..3c15b33 100644 (file)
@@ -260,6 +260,7 @@ cycles-ct                                   { return str(yyscanner, PE_KERNEL_PMU_EVENT); }
 cycles-t                                       { return str(yyscanner, PE_KERNEL_PMU_EVENT); }
 mem-loads                                      { return str(yyscanner, PE_KERNEL_PMU_EVENT); }
 mem-stores                                     { return str(yyscanner, PE_KERNEL_PMU_EVENT); }
+topdown-[a-z-]+                                        { return str(yyscanner, PE_KERNEL_PMU_EVENT); }
 
 L1-dcache|l1-d|l1d|L1-data             |
 L1-icache|l1-i|l1i|L1-instruction      |
index aa9efe0..8a2bbd2 100644 (file)
@@ -36,6 +36,11 @@ static struct stats runtime_dtlb_cache_stats[NUM_CTX][MAX_NR_CPUS];
 static struct stats runtime_cycles_in_tx_stats[NUM_CTX][MAX_NR_CPUS];
 static struct stats runtime_transaction_stats[NUM_CTX][MAX_NR_CPUS];
 static struct stats runtime_elision_stats[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_topdown_total_slots[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_topdown_slots_issued[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_topdown_slots_retired[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_topdown_fetch_bubbles[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_topdown_recovery_bubbles[NUM_CTX][MAX_NR_CPUS];
 static bool have_frontend_stalled;
 
 struct stats walltime_nsecs_stats;
@@ -82,6 +87,11 @@ void perf_stat__reset_shadow_stats(void)
                sizeof(runtime_transaction_stats));
        memset(runtime_elision_stats, 0, sizeof(runtime_elision_stats));
        memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats));
+       memset(runtime_topdown_total_slots, 0, sizeof(runtime_topdown_total_slots));
+       memset(runtime_topdown_slots_retired, 0, sizeof(runtime_topdown_slots_retired));
+       memset(runtime_topdown_slots_issued, 0, sizeof(runtime_topdown_slots_issued));
+       memset(runtime_topdown_fetch_bubbles, 0, sizeof(runtime_topdown_fetch_bubbles));
+       memset(runtime_topdown_recovery_bubbles, 0, sizeof(runtime_topdown_recovery_bubbles));
 }
 
 /*
@@ -105,6 +115,16 @@ void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count,
                update_stats(&runtime_transaction_stats[ctx][cpu], count[0]);
        else if (perf_stat_evsel__is(counter, ELISION_START))
                update_stats(&runtime_elision_stats[ctx][cpu], count[0]);
+       else if (perf_stat_evsel__is(counter, TOPDOWN_TOTAL_SLOTS))
+               update_stats(&runtime_topdown_total_slots[ctx][cpu], count[0]);
+       else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_ISSUED))
+               update_stats(&runtime_topdown_slots_issued[ctx][cpu], count[0]);
+       else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_RETIRED))
+               update_stats(&runtime_topdown_slots_retired[ctx][cpu], count[0]);
+       else if (perf_stat_evsel__is(counter, TOPDOWN_FETCH_BUBBLES))
+               update_stats(&runtime_topdown_fetch_bubbles[ctx][cpu],count[0]);
+       else if (perf_stat_evsel__is(counter, TOPDOWN_RECOVERY_BUBBLES))
+               update_stats(&runtime_topdown_recovery_bubbles[ctx][cpu], count[0]);
        else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
                update_stats(&runtime_stalled_cycles_front_stats[ctx][cpu], count[0]);
        else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND))
@@ -302,6 +322,107 @@ static void print_ll_cache_misses(int cpu,
        out->print_metric(out->ctx, color, "%7.2f%%", "of all LL-cache hits", ratio);
 }
 
+/*
+ * High level "TopDown" CPU core pipe line bottleneck break down.
+ *
+ * Basic concept following
+ * Yasin, A Top Down Method for Performance analysis and Counter architecture
+ * ISPASS14
+ *
+ * The CPU pipeline is divided into 4 areas that can be bottlenecks:
+ *
+ * Frontend -> Backend -> Retiring
+ * BadSpeculation in addition means out of order execution that is thrown away
+ * (for example branch mispredictions)
+ * Frontend is instruction decoding.
+ * Backend is execution, like computation and accessing data in memory
+ * Retiring is good execution that is not directly bottlenecked
+ *
+ * The formulas are computed in slots.
+ * A slot is an entry in the pipeline each for the pipeline width
+ * (for example a 4-wide pipeline has 4 slots for each cycle)
+ *
+ * Formulas:
+ * BadSpeculation = ((SlotsIssued - SlotsRetired) + RecoveryBubbles) /
+ *                     TotalSlots
+ * Retiring = SlotsRetired / TotalSlots
+ * FrontendBound = FetchBubbles / TotalSlots
+ * BackendBound = 1.0 - BadSpeculation - Retiring - FrontendBound
+ *
+ * The kernel provides the mapping to the low level CPU events and any scaling
+ * needed for the CPU pipeline width, for example:
+ *
+ * TotalSlots = Cycles * 4
+ *
+ * The scaling factor is communicated in the sysfs unit.
+ *
+ * In some cases the CPU may not be able to measure all the formulas due to
+ * missing events. In this case multiple formulas are combined, as possible.
+ *
+ * Full TopDown supports more levels to sub-divide each area: for example
+ * BackendBound into computing bound and memory bound. For now we only
+ * support Level 1 TopDown.
+ */
+
+static double sanitize_val(double x)
+{
+       if (x < 0 && x >= -0.02)
+               return 0.0;
+       return x;
+}
+
+static double td_total_slots(int ctx, int cpu)
+{
+       return avg_stats(&runtime_topdown_total_slots[ctx][cpu]);
+}
+
+static double td_bad_spec(int ctx, int cpu)
+{
+       double bad_spec = 0;
+       double total_slots;
+       double total;
+
+       total = avg_stats(&runtime_topdown_slots_issued[ctx][cpu]) -
+               avg_stats(&runtime_topdown_slots_retired[ctx][cpu]) +
+               avg_stats(&runtime_topdown_recovery_bubbles[ctx][cpu]);
+       total_slots = td_total_slots(ctx, cpu);
+       if (total_slots)
+               bad_spec = total / total_slots;
+       return sanitize_val(bad_spec);
+}
+
+static double td_retiring(int ctx, int cpu)
+{
+       double retiring = 0;
+       double total_slots = td_total_slots(ctx, cpu);
+       double ret_slots = avg_stats(&runtime_topdown_slots_retired[ctx][cpu]);
+
+       if (total_slots)
+               retiring = ret_slots / total_slots;
+       return retiring;
+}
+
+static double td_fe_bound(int ctx, int cpu)
+{
+       double fe_bound = 0;
+       double total_slots = td_total_slots(ctx, cpu);
+       double fetch_bub = avg_stats(&runtime_topdown_fetch_bubbles[ctx][cpu]);
+
+       if (total_slots)
+               fe_bound = fetch_bub / total_slots;
+       return fe_bound;
+}
+
+static double td_be_bound(int ctx, int cpu)
+{
+       double sum = (td_fe_bound(ctx, cpu) +
+                     td_bad_spec(ctx, cpu) +
+                     td_retiring(ctx, cpu));
+       if (sum == 0)
+               return 0;
+       return sanitize_val(1.0 - sum);
+}
+
 void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
                                   double avg, int cpu,
                                   struct perf_stat_output_ctx *out)
@@ -309,6 +430,7 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
        void *ctxp = out->ctx;
        print_metric_t print_metric = out->print_metric;
        double total, ratio = 0.0, total2;
+       const char *color = NULL;
        int ctx = evsel_context(evsel);
 
        if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) {
@@ -452,6 +574,46 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
                                     avg / ratio);
                else
                        print_metric(ctxp, NULL, NULL, "CPUs utilized", 0);
+       } else if (perf_stat_evsel__is(evsel, TOPDOWN_FETCH_BUBBLES)) {
+               double fe_bound = td_fe_bound(ctx, cpu);
+
+               if (fe_bound > 0.2)
+                       color = PERF_COLOR_RED;
+               print_metric(ctxp, color, "%8.1f%%", "frontend bound",
+                               fe_bound * 100.);
+       } else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_RETIRED)) {
+               double retiring = td_retiring(ctx, cpu);
+
+               if (retiring > 0.7)
+                       color = PERF_COLOR_GREEN;
+               print_metric(ctxp, color, "%8.1f%%", "retiring",
+                               retiring * 100.);
+       } else if (perf_stat_evsel__is(evsel, TOPDOWN_RECOVERY_BUBBLES)) {
+               double bad_spec = td_bad_spec(ctx, cpu);
+
+               if (bad_spec > 0.1)
+                       color = PERF_COLOR_RED;
+               print_metric(ctxp, color, "%8.1f%%", "bad speculation",
+                               bad_spec * 100.);
+       } else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_ISSUED)) {
+               double be_bound = td_be_bound(ctx, cpu);
+               const char *name = "backend bound";
+               static int have_recovery_bubbles = -1;
+
+               /* In case the CPU does not support topdown-recovery-bubbles */
+               if (have_recovery_bubbles < 0)
+                       have_recovery_bubbles = pmu_have_event("cpu",
+                                       "topdown-recovery-bubbles");
+               if (!have_recovery_bubbles)
+                       name = "backend bound/bad spec";
+
+               if (be_bound > 0.2)
+                       color = PERF_COLOR_RED;
+               if (td_total_slots(ctx, cpu) > 0)
+                       print_metric(ctxp, color, "%8.1f%%", name,
+                                       be_bound * 100.);
+               else
+                       print_metric(ctxp, NULL, NULL, name, 0);
        } else if (runtime_nsecs_stats[cpu].n != 0) {
                char unit = 'M';
                char unit_buf[10];
index ffa1d06..c1ba255 100644 (file)
@@ -79,6 +79,11 @@ static const char *id_str[PERF_STAT_EVSEL_ID__MAX] = {
        ID(TRANSACTION_START,   cpu/tx-start/),
        ID(ELISION_START,       cpu/el-start/),
        ID(CYCLES_IN_TX_CP,     cpu/cycles-ct/),
+       ID(TOPDOWN_TOTAL_SLOTS, topdown-total-slots),
+       ID(TOPDOWN_SLOTS_ISSUED, topdown-slots-issued),
+       ID(TOPDOWN_SLOTS_RETIRED, topdown-slots-retired),
+       ID(TOPDOWN_FETCH_BUBBLES, topdown-fetch-bubbles),
+       ID(TOPDOWN_RECOVERY_BUBBLES, topdown-recovery-bubbles),
 };
 #undef ID
 
index 0150e78..c29bb94 100644 (file)
@@ -17,6 +17,11 @@ enum perf_stat_evsel_id {
        PERF_STAT_EVSEL_ID__TRANSACTION_START,
        PERF_STAT_EVSEL_ID__ELISION_START,
        PERF_STAT_EVSEL_ID__CYCLES_IN_TX_CP,
+       PERF_STAT_EVSEL_ID__TOPDOWN_TOTAL_SLOTS,
+       PERF_STAT_EVSEL_ID__TOPDOWN_SLOTS_ISSUED,
+       PERF_STAT_EVSEL_ID__TOPDOWN_SLOTS_RETIRED,
+       PERF_STAT_EVSEL_ID__TOPDOWN_FETCH_BUBBLES,
+       PERF_STAT_EVSEL_ID__TOPDOWN_RECOVERY_BUBBLES,
        PERF_STAT_EVSEL_ID__MAX,
 };