Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
authorLinus Torvalds <torvalds@linux-foundation.org>
Wed, 6 May 2015 17:47:25 +0000 (10:47 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Wed, 6 May 2015 17:47:25 +0000 (10:47 -0700)
Pull perf fixes from Ingo Molnar:
 "Mostly tooling fixes, but also an uncore PMU driver fix and an uncore
  PMU driver hardware-enablement addition"

* 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  perf probe: Fix segfault if passed with ''.
  perf report: Fix -T/--threads option to work again
  perf bench numa: Fix immediate meeting of convergence condition
  perf bench numa: Fixes of --quiet argument
  perf bench futex: Fix hung wakeup tasks after requeueing
  perf probe: Fix bug with global variables handling
  perf top: Fix a segfault when kernel map is restricted.
  tools lib traceevent: Fix build failure on 32-bit arch
  perf kmem: Fix compiles on RHEL6/OL6
  tools lib api: Undefine _FORTIFY_SOURCE before setting it
  perf kmem: Consistently use PRIu64 for printing u64 values
  perf trace: Disable events and drain events when forked workload ends
  perf trace: Enable events when doing system wide tracing and starting a workload
  perf/x86/intel/uncore: Move PCI IDs for IMC to uncore driver
  perf/x86/intel/uncore: Add support for Intel Haswell ULT (lower power Mobile Processor) IMC uncore PMUs
  perf/x86/intel: Add cpu_(prepare|starting|dying) for core_pmu

13 files changed:
arch/x86/kernel/cpu/perf_event_intel.c
arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c
include/linux/pci_ids.h
tools/lib/api/Makefile
tools/lib/traceevent/event-parse.c
tools/perf/bench/futex-requeue.c
tools/perf/bench/numa.c
tools/perf/builtin-kmem.c
tools/perf/builtin-report.c
tools/perf/builtin-top.c
tools/perf/builtin-trace.c
tools/perf/util/probe-event.c
tools/perf/util/probe-finder.c

index 219d3fb..960e85d 100644 (file)
@@ -2533,34 +2533,6 @@ ssize_t intel_event_sysfs_show(char *page, u64 config)
        return x86_event_sysfs_show(page, config, event);
 }
 
-static __initconst const struct x86_pmu core_pmu = {
-       .name                   = "core",
-       .handle_irq             = x86_pmu_handle_irq,
-       .disable_all            = x86_pmu_disable_all,
-       .enable_all             = core_pmu_enable_all,
-       .enable                 = core_pmu_enable_event,
-       .disable                = x86_pmu_disable_event,
-       .hw_config              = x86_pmu_hw_config,
-       .schedule_events        = x86_schedule_events,
-       .eventsel               = MSR_ARCH_PERFMON_EVENTSEL0,
-       .perfctr                = MSR_ARCH_PERFMON_PERFCTR0,
-       .event_map              = intel_pmu_event_map,
-       .max_events             = ARRAY_SIZE(intel_perfmon_event_map),
-       .apic                   = 1,
-       /*
-        * Intel PMCs cannot be accessed sanely above 32 bit width,
-        * so we install an artificial 1<<31 period regardless of
-        * the generic event period:
-        */
-       .max_period             = (1ULL << 31) - 1,
-       .get_event_constraints  = intel_get_event_constraints,
-       .put_event_constraints  = intel_put_event_constraints,
-       .event_constraints      = intel_core_event_constraints,
-       .guest_get_msrs         = core_guest_get_msrs,
-       .format_attrs           = intel_arch_formats_attr,
-       .events_sysfs_show      = intel_event_sysfs_show,
-};
-
 struct intel_shared_regs *allocate_shared_regs(int cpu)
 {
        struct intel_shared_regs *regs;
@@ -2743,6 +2715,44 @@ static struct attribute *intel_arch3_formats_attr[] = {
        NULL,
 };
 
+static __initconst const struct x86_pmu core_pmu = {
+       .name                   = "core",
+       .handle_irq             = x86_pmu_handle_irq,
+       .disable_all            = x86_pmu_disable_all,
+       .enable_all             = core_pmu_enable_all,
+       .enable                 = core_pmu_enable_event,
+       .disable                = x86_pmu_disable_event,
+       .hw_config              = x86_pmu_hw_config,
+       .schedule_events        = x86_schedule_events,
+       .eventsel               = MSR_ARCH_PERFMON_EVENTSEL0,
+       .perfctr                = MSR_ARCH_PERFMON_PERFCTR0,
+       .event_map              = intel_pmu_event_map,
+       .max_events             = ARRAY_SIZE(intel_perfmon_event_map),
+       .apic                   = 1,
+       /*
+        * Intel PMCs cannot be accessed sanely above 32-bit width,
+        * so we install an artificial 1<<31 period regardless of
+        * the generic event period:
+        */
+       .max_period             = (1ULL<<31) - 1,
+       .get_event_constraints  = intel_get_event_constraints,
+       .put_event_constraints  = intel_put_event_constraints,
+       .event_constraints      = intel_core_event_constraints,
+       .guest_get_msrs         = core_guest_get_msrs,
+       .format_attrs           = intel_arch_formats_attr,
+       .events_sysfs_show      = intel_event_sysfs_show,
+
+       /*
+        * Virtual (or funny metal) CPU can define x86_pmu.extra_regs
+        * together with PMU version 1 and thus be using core_pmu with
+        * shared_regs. We need following callbacks here to allocate
+        * it properly.
+        */
+       .cpu_prepare            = intel_pmu_cpu_prepare,
+       .cpu_starting           = intel_pmu_cpu_starting,
+       .cpu_dying              = intel_pmu_cpu_dying,
+};
+
 static __initconst const struct x86_pmu intel_pmu = {
        .name                   = "Intel",
        .handle_irq             = intel_pmu_handle_irq,
index 3001015..4562e9e 100644 (file)
@@ -1,6 +1,13 @@
 /* Nehalem/SandBridge/Haswell uncore support */
 #include "perf_event_intel_uncore.h"
 
+/* Uncore IMC PCI IDs */
+#define PCI_DEVICE_ID_INTEL_SNB_IMC    0x0100
+#define PCI_DEVICE_ID_INTEL_IVB_IMC    0x0154
+#define PCI_DEVICE_ID_INTEL_IVB_E3_IMC 0x0150
+#define PCI_DEVICE_ID_INTEL_HSW_IMC    0x0c00
+#define PCI_DEVICE_ID_INTEL_HSW_U_IMC  0x0a04
+
 /* SNB event control */
 #define SNB_UNC_CTL_EV_SEL_MASK                        0x000000ff
 #define SNB_UNC_CTL_UMASK_MASK                 0x0000ff00
@@ -472,6 +479,10 @@ static const struct pci_device_id hsw_uncore_pci_ids[] = {
                PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_HSW_IMC),
                .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
        },
+       { /* IMC */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_HSW_U_IMC),
+               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+       },
        { /* end: all zeroes */ },
 };
 
@@ -502,6 +513,7 @@ static const struct imc_uncore_pci_dev desktop_imc_pci_ids[] = {
        IMC_DEV(IVB_IMC, &ivb_uncore_pci_driver),    /* 3rd Gen Core processor */
        IMC_DEV(IVB_E3_IMC, &ivb_uncore_pci_driver), /* Xeon E3-1200 v2/3rd Gen Core processor */
        IMC_DEV(HSW_IMC, &hsw_uncore_pci_driver),    /* 4th Gen Core Processor */
+       IMC_DEV(HSW_U_IMC, &hsw_uncore_pci_driver),  /* 4th Gen Core ULT Mobile Processor */
        {  /* end marker */ }
 };
 
index 38cff8f..2f7b9a4 100644 (file)
 
 #define PCI_VENDOR_ID_INTEL            0x8086
 #define PCI_DEVICE_ID_INTEL_EESSC      0x0008
-#define PCI_DEVICE_ID_INTEL_SNB_IMC    0x0100
-#define PCI_DEVICE_ID_INTEL_IVB_IMC    0x0154
-#define PCI_DEVICE_ID_INTEL_IVB_E3_IMC 0x0150
-#define PCI_DEVICE_ID_INTEL_HSW_IMC    0x0c00
 #define PCI_DEVICE_ID_INTEL_PXHD_0     0x0320
 #define PCI_DEVICE_ID_INTEL_PXHD_1     0x0321
 #define PCI_DEVICE_ID_INTEL_PXH_0      0x0329
index d8fe29f..8bd9606 100644 (file)
@@ -16,7 +16,7 @@ MAKEFLAGS += --no-print-directory
 LIBFILE = $(OUTPUT)libapi.a
 
 CFLAGS := $(EXTRA_WARNINGS) $(EXTRA_CFLAGS)
-CFLAGS += -ggdb3 -Wall -Wextra -std=gnu99 -Werror -O6 -D_FORTIFY_SOURCE=2 -fPIC
+CFLAGS += -ggdb3 -Wall -Wextra -std=gnu99 -Werror -O6 -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2 -fPIC
 CFLAGS += -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64
 
 RM = rm -f
index e0917c0..29f94f6 100644 (file)
@@ -3865,7 +3865,7 @@ static void print_str_arg(struct trace_seq *s, void *data, int size,
                        } else if (el_size == 4) {
                                trace_seq_printf(s, "%u", *(uint32_t *)num);
                        } else if (el_size == 8) {
-                               trace_seq_printf(s, "%lu", *(uint64_t *)num);
+                               trace_seq_printf(s, "%"PRIu64, *(uint64_t *)num);
                        } else {
                                trace_seq_printf(s, "BAD SIZE:%d 0x%x",
                                                 el_size, *(uint8_t *)num);
index bedff6b..ad0d9b5 100644 (file)
@@ -132,6 +132,9 @@ int bench_futex_requeue(int argc, const char **argv,
        if (!fshared)
                futex_flag = FUTEX_PRIVATE_FLAG;
 
+       if (nrequeue > nthreads)
+               nrequeue = nthreads;
+
        printf("Run summary [PID %d]: Requeuing %d threads (from [%s] %p to %p), "
               "%d at a time.\n\n",  getpid(), nthreads,
               fshared ? "shared":"private", &futex1, &futex2, nrequeue);
@@ -161,20 +164,18 @@ int bench_futex_requeue(int argc, const char **argv,
 
                /* Ok, all threads are patiently blocked, start requeueing */
                gettimeofday(&start, NULL);
-               for (nrequeued = 0; nrequeued < nthreads; nrequeued += nrequeue) {
+               while (nrequeued < nthreads) {
                        /*
                         * Do not wakeup any tasks blocked on futex1, allowing
                         * us to really measure futex_wait functionality.
                         */
-                       futex_cmp_requeue(&futex1, 0, &futex2, 0,
-                                         nrequeue, futex_flag);
+                       nrequeued += futex_cmp_requeue(&futex1, 0, &futex2, 0,
+                                                      nrequeue, futex_flag);
                }
+
                gettimeofday(&end, NULL);
                timersub(&end, &start, &runtime);
 
-               if (nrequeued > nthreads)
-                       nrequeued = nthreads;
-
                update_stats(&requeued_stats, nrequeued);
                update_stats(&requeuetime_stats, runtime.tv_usec);
 
@@ -184,7 +185,7 @@ int bench_futex_requeue(int argc, const char **argv,
                }
 
                /* everybody should be blocked on futex2, wake'em up */
-               nrequeued = futex_wake(&futex2, nthreads, futex_flag);
+               nrequeued = futex_wake(&futex2, nrequeued, futex_flag);
                if (nthreads != nrequeued)
                        warnx("couldn't wakeup all tasks (%d/%d)", nrequeued, nthreads);
 
index ebfa163..ba5efa4 100644 (file)
@@ -180,7 +180,7 @@ static const struct option options[] = {
        OPT_INTEGER('H', "thp"          , &p0.thp,              "MADV_NOHUGEPAGE < 0 < MADV_HUGEPAGE"),
        OPT_BOOLEAN('c', "show_convergence", &p0.show_convergence, "show convergence details"),
        OPT_BOOLEAN('m', "measure_convergence", &p0.measure_convergence, "measure convergence latency"),
-       OPT_BOOLEAN('q', "quiet"        , &p0.show_quiet,       "bzero the initial allocations"),
+       OPT_BOOLEAN('q', "quiet"        , &p0.show_quiet,       "quiet mode"),
        OPT_BOOLEAN('S', "serialize-startup", &p0.serialize_startup,"serialize thread startup"),
 
        /* Special option string parsing callbacks: */
@@ -828,6 +828,9 @@ static int count_process_nodes(int process_nr)
                td = g->threads + task_nr;
 
                node = numa_node_of_cpu(td->curr_cpu);
+               if (node < 0) /* curr_cpu was likely still -1 */
+                       return 0;
+
                node_present[node] = 1;
        }
 
@@ -882,6 +885,11 @@ static void calc_convergence_compression(int *strong)
        for (p = 0; p < g->p.nr_proc; p++) {
                unsigned int nodes = count_process_nodes(p);
 
+               if (!nodes) {
+                       *strong = 0;
+                       return;
+               }
+
                nodes_min = min(nodes, nodes_min);
                nodes_max = max(nodes, nodes_max);
        }
@@ -1395,7 +1403,7 @@ static void print_res(const char *name, double val,
        if (!name)
                name = "main,";
 
-       if (g->p.show_quiet)
+       if (!g->p.show_quiet)
                printf(" %-30s %15.3f, %-15s %s\n", name, val, txt_unit, txt_short);
        else
                printf(" %14.3f %s\n", val, txt_long);
index 63ea013..1634186 100644 (file)
@@ -319,7 +319,7 @@ static int page_stat_cmp(struct page_stat *a, struct page_stat *b)
        return 0;
 }
 
-static struct page_stat *search_page_alloc_stat(struct page_stat *stat, bool create)
+static struct page_stat *search_page_alloc_stat(struct page_stat *pstat, bool create)
 {
        struct rb_node **node = &page_alloc_tree.rb_node;
        struct rb_node *parent = NULL;
@@ -331,7 +331,7 @@ static struct page_stat *search_page_alloc_stat(struct page_stat *stat, bool cre
                parent = *node;
                data = rb_entry(*node, struct page_stat, node);
 
-               cmp = page_stat_cmp(data, stat);
+               cmp = page_stat_cmp(data, pstat);
                if (cmp < 0)
                        node = &parent->rb_left;
                else if (cmp > 0)
@@ -345,10 +345,10 @@ static struct page_stat *search_page_alloc_stat(struct page_stat *stat, bool cre
 
        data = zalloc(sizeof(*data));
        if (data != NULL) {
-               data->page = stat->page;
-               data->order = stat->order;
-               data->gfp_flags = stat->gfp_flags;
-               data->migrate_type = stat->migrate_type;
+               data->page = pstat->page;
+               data->order = pstat->order;
+               data->gfp_flags = pstat->gfp_flags;
+               data->migrate_type = pstat->migrate_type;
 
                rb_link_node(&data->node, parent, node);
                rb_insert_color(&data->node, &page_alloc_tree);
@@ -375,7 +375,7 @@ static int perf_evsel__process_page_alloc_event(struct perf_evsel *evsel,
        unsigned int migrate_type = perf_evsel__intval(evsel, sample,
                                                       "migratetype");
        u64 bytes = kmem_page_size << order;
-       struct page_stat *stat;
+       struct page_stat *pstat;
        struct page_stat this = {
                .order = order,
                .gfp_flags = gfp_flags,
@@ -401,21 +401,21 @@ static int perf_evsel__process_page_alloc_event(struct perf_evsel *evsel,
         * This is to find the current page (with correct gfp flags and
         * migrate type) at free event.
         */
-       stat = search_page(page, true);
-       if (stat == NULL)
+       pstat = search_page(page, true);
+       if (pstat == NULL)
                return -ENOMEM;
 
-       stat->order = order;
-       stat->gfp_flags = gfp_flags;
-       stat->migrate_type = migrate_type;
+       pstat->order = order;
+       pstat->gfp_flags = gfp_flags;
+       pstat->migrate_type = migrate_type;
 
        this.page = page;
-       stat = search_page_alloc_stat(&this, true);
-       if (stat == NULL)
+       pstat = search_page_alloc_stat(&this, true);
+       if (pstat == NULL)
                return -ENOMEM;
 
-       stat->nr_alloc++;
-       stat->alloc_bytes += bytes;
+       pstat->nr_alloc++;
+       pstat->alloc_bytes += bytes;
 
        order_stats[order][migrate_type]++;
 
@@ -428,7 +428,7 @@ static int perf_evsel__process_page_free_event(struct perf_evsel *evsel,
        u64 page;
        unsigned int order = perf_evsel__intval(evsel, sample, "order");
        u64 bytes = kmem_page_size << order;
-       struct page_stat *stat;
+       struct page_stat *pstat;
        struct page_stat this = {
                .order = order,
        };
@@ -441,8 +441,8 @@ static int perf_evsel__process_page_free_event(struct perf_evsel *evsel,
        nr_page_frees++;
        total_page_free_bytes += bytes;
 
-       stat = search_page(page, false);
-       if (stat == NULL) {
+       pstat = search_page(page, false);
+       if (pstat == NULL) {
                pr_debug2("missing free at page %"PRIx64" (order: %d)\n",
                          page, order);
 
@@ -453,18 +453,18 @@ static int perf_evsel__process_page_free_event(struct perf_evsel *evsel,
        }
 
        this.page = page;
-       this.gfp_flags = stat->gfp_flags;
-       this.migrate_type = stat->migrate_type;
+       this.gfp_flags = pstat->gfp_flags;
+       this.migrate_type = pstat->migrate_type;
 
-       rb_erase(&stat->node, &page_tree);
-       free(stat);
+       rb_erase(&pstat->node, &page_tree);
+       free(pstat);
 
-       stat = search_page_alloc_stat(&this, false);
-       if (stat == NULL)
+       pstat = search_page_alloc_stat(&this, false);
+       if (pstat == NULL)
                return -ENOENT;
 
-       stat->nr_free++;
-       stat->free_bytes += bytes;
+       pstat->nr_free++;
+       pstat->free_bytes += bytes;
 
        return 0;
 }
@@ -640,9 +640,9 @@ static void print_page_summary(void)
               nr_page_frees, total_page_free_bytes / 1024);
        printf("\n");
 
-       printf("%-30s: %'16lu   [ %'16"PRIu64" KB ]\n", "Total alloc+freed requests",
+       printf("%-30s: %'16"PRIu64"   [ %'16"PRIu64" KB ]\n", "Total alloc+freed requests",
               nr_alloc_freed, (total_alloc_freed_bytes) / 1024);
-       printf("%-30s: %'16lu   [ %'16"PRIu64" KB ]\n", "Total alloc-only requests",
+       printf("%-30s: %'16"PRIu64"   [ %'16"PRIu64" KB ]\n", "Total alloc-only requests",
               nr_page_allocs - nr_alloc_freed,
               (total_page_alloc_bytes - total_alloc_freed_bytes) / 1024);
        printf("%-30s: %'16lu   [ %'16"PRIu64" KB ]\n", "Total free-only requests",
index 476cdf7..b63aeda 100644 (file)
@@ -329,7 +329,7 @@ static int perf_evlist__tty_browse_hists(struct perf_evlist *evlist,
                fprintf(stdout, "\n\n");
        }
 
-       if (sort_order == default_sort_order &&
+       if (sort_order == NULL &&
            parent_pattern == default_parent_pattern) {
                fprintf(stdout, "#\n# (%s)\n#\n", help);
 
index 1cb3436..6a4d5d4 100644 (file)
@@ -733,7 +733,7 @@ static void perf_event__process_sample(struct perf_tool *tool,
 "Kernel address maps (/proc/{kallsyms,modules}) are restricted.\n\n"
 "Check /proc/sys/kernel/kptr_restrict.\n\n"
 "Kernel%s samples will not be resolved.\n",
-                         !RB_EMPTY_ROOT(&al.map->dso->symbols[MAP__FUNCTION]) ?
+                         al.map && !RB_EMPTY_ROOT(&al.map->dso->symbols[MAP__FUNCTION]) ?
                          " modules" : "");
                if (use_browser <= 0)
                        sleep(5);
index e124741..e122970 100644 (file)
@@ -2241,10 +2241,11 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
        if (err < 0)
                goto out_error_mmap;
 
+       if (!target__none(&trace->opts.target))
+               perf_evlist__enable(evlist);
+
        if (forks)
                perf_evlist__start_workload(evlist);
-       else
-               perf_evlist__enable(evlist);
 
        trace->multiple_threads = evlist->threads->map[0] == -1 ||
                                  evlist->threads->nr > 1 ||
@@ -2272,6 +2273,11 @@ next_event:
 
                        if (interrupted)
                                goto out_disable;
+
+                       if (done && !draining) {
+                               perf_evlist__disable(evlist);
+                               draining = true;
+                       }
                }
        }
 
index d8bb616..d05b77c 100644 (file)
@@ -1084,6 +1084,8 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev)
         *
         * TODO:Group name support
         */
+       if (!arg)
+               return -EINVAL;
 
        ptr = strpbrk(arg, ";=@+%");
        if (ptr && *ptr == '=') {       /* Event name */
index b5bf9d5..2a76e14 100644 (file)
@@ -578,10 +578,12 @@ static int find_variable(Dwarf_Die *sc_die, struct probe_finder *pf)
        /* Search child die for local variables and parameters. */
        if (!die_find_variable_at(sc_die, pf->pvar->var, pf->addr, &vr_die)) {
                /* Search again in global variables */
-               if (!die_find_variable_at(&pf->cu_die, pf->pvar->var, 0, &vr_die))
+               if (!die_find_variable_at(&pf->cu_die, pf->pvar->var,
+                                               0, &vr_die)) {
                        pr_warning("Failed to find '%s' in this function.\n",
                                   pf->pvar->var);
                        ret = -ENOENT;
+               }
        }
        if (ret >= 0)
                ret = convert_variable(&vr_die, pf);