perf tools: Set the maximum allowed stack from /proc/sys/kernel/perf_event_max_stack
authorArnaldo Carvalho de Melo <acme@redhat.com>
Wed, 27 Apr 2016 13:16:24 +0000 (10:16 -0300)
committerArnaldo Carvalho de Melo <acme@redhat.com>
Wed, 27 Apr 2016 13:29:07 +0000 (10:29 -0300)
There is an upper limit to what tooling considers a valid callchain,
and it was tied to the hardcoded value in the kernel,
PERF_MAX_STACK_DEPTH (127), now that this can be tuned via a sysctl,
make it read it and use that as the upper limit, falling back to
PERF_MAX_STACK_DEPTH for kernels where this sysctl isn't present.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Brendan Gregg <brendan.d.gregg@gmail.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Milian Wolff <milian.wolff@kdab.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/n/tip-yjqsd30nnkogvj5oyx9ghir9@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
16 files changed:
tools/perf/Documentation/perf-report.txt
tools/perf/Documentation/perf-script.txt
tools/perf/Documentation/perf-top.txt
tools/perf/Documentation/perf-trace.txt
tools/perf/builtin-report.c
tools/perf/builtin-script.c
tools/perf/builtin-top.c
tools/perf/builtin-trace.c
tools/perf/perf.c
tools/perf/tests/hists_cumulate.c
tools/perf/tests/hists_filter.c
tools/perf/tests/hists_output.c
tools/perf/util/machine.c
tools/perf/util/scripting-engines/trace-event-perl.c
tools/perf/util/util.c
tools/perf/util/util.h

index 496d42c..ebaf849 100644 (file)
@@ -248,7 +248,7 @@ OPTIONS
        Note that when using the --itrace option the synthesized callchain size
        will override this value if the synthesized callchain size is bigger.
 
-       Default: 127
+       Default: /proc/sys/kernel/perf_event_max_stack when present, 127 otherwise.
 
 -G::
 --inverted::
index 4fc44c7..a856a10 100644 (file)
@@ -267,7 +267,7 @@ include::itrace.txt[]
         Note that when using the --itrace option the synthesized callchain size
         will override this value if the synthesized callchain size is bigger.
 
-        Default: 127
+        Default: /proc/sys/kernel/perf_event_max_stack when present, 127 otherwise.
 
 --ns::
        Use 9 decimal places when displaying time (i.e. show the nanoseconds)
index 19f046f..91d638d 100644 (file)
@@ -177,7 +177,7 @@ Default is to monitor all CPUS.
        between information loss and faster processing especially for
        workloads that can have a very long callchain stack.
 
-       Default: 127
+       Default: /proc/sys/kernel/perf_event_max_stack when present, 127 otherwise.
 
 --ignore-callees=<regex>::
         Ignore callees of the function(s) matching the given regex.
index c075c00..6afe201 100644 (file)
@@ -143,7 +143,7 @@ the thread executes on the designated CPUs. Default is to monitor all CPUs.
         Implies '--call-graph dwarf' when --call-graph not present on the
         command line, on systems where DWARF unwinding was built in.
 
-        Default: 127
+        Default: /proc/sys/kernel/perf_event_max_stack when present, 127 otherwise.
 
 --min-stack::
         Set the stack depth limit when parsing the callchain, anything
index 1d5be0b..8d9b88a 100644 (file)
@@ -691,7 +691,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
                        .ordered_events  = true,
                        .ordering_requires_timestamps = true,
                },
-               .max_stack               = PERF_MAX_STACK_DEPTH,
+               .max_stack               = sysctl_perf_event_max_stack,
                .pretty_printing_style   = "normal",
                .socket_filter           = -1,
        };
@@ -744,7 +744,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
        OPT_INTEGER(0, "max-stack", &report.max_stack,
                    "Set the maximum stack depth when parsing the callchain, "
                    "anything beyond the specified depth will be ignored. "
-                   "Default: " __stringify(PERF_MAX_STACK_DEPTH)),
+                   "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)),
        OPT_BOOLEAN('G', "inverted", &report.inverted_callchain,
                    "alias for inverted call graph"),
        OPT_CALLBACK(0, "ignore-callees", NULL, "regex",
index f43b0c6..efca816 100644 (file)
@@ -2031,7 +2031,7 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
        OPT_UINTEGER(0, "max-stack", &scripting_max_stack,
                     "Set the maximum stack depth when parsing the callchain, "
                     "anything beyond the specified depth will be ignored. "
-                    "Default: " __stringify(PERF_MAX_STACK_DEPTH)),
+                    "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)),
        OPT_BOOLEAN('I', "show-info", &show_full_info,
                    "display extended information from perf.data file"),
        OPT_BOOLEAN('\0', "show-kernel-path", &symbol_conf.show_kernel_path,
@@ -2067,6 +2067,8 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
                NULL
        };
 
+       scripting_max_stack = sysctl_perf_event_max_stack;
+
        setup_scripting();
 
        argc = parse_options_subcommand(argc, argv, options, script_subcommands, script_usage,
index c130a11..da18517 100644 (file)
@@ -1103,7 +1103,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
                        },
                        .proc_map_timeout    = 500,
                },
-               .max_stack           = PERF_MAX_STACK_DEPTH,
+               .max_stack           = sysctl_perf_event_max_stack,
                .sym_pcnt_filter     = 5,
        };
        struct record_opts *opts = &top.record_opts;
@@ -1171,7 +1171,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
                    "Accumulate callchains of children and show total overhead as well"),
        OPT_INTEGER(0, "max-stack", &top.max_stack,
                    "Set the maximum stack depth when parsing the callchain. "
-                   "Default: " __stringify(PERF_MAX_STACK_DEPTH)),
+                   "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)),
        OPT_CALLBACK(0, "ignore-callees", NULL, "regex",
                   "ignore callees of these functions in call graphs",
                   report_parse_ignore_callees_opt),
index 48b00f0..f4f3389 100644 (file)
@@ -3106,7 +3106,7 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
        OPT_UINTEGER(0, "max-stack", &trace.max_stack,
                     "Set the maximum stack depth when parsing the callchain, "
                     "anything beyond the specified depth will be ignored. "
-                    "Default: " __stringify(PERF_MAX_STACK_DEPTH)),
+                    "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)),
        OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout,
                        "per thread proc mmap processing timeout in ms"),
        OPT_END()
@@ -3150,7 +3150,7 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
                mmap_pages_user_set = false;
 
        if (trace.max_stack == UINT_MAX) {
-               trace.max_stack = PERF_MAX_STACK_DEPTH;
+               trace.max_stack = sysctl_perf_event_max_stack;
                max_stack_user_set = false;
        }
 
index 7b2df2b..83ffe7c 100644 (file)
@@ -17,6 +17,7 @@
 #include <subcmd/parse-options.h>
 #include "util/bpf-loader.h"
 #include "util/debug.h"
+#include <api/fs/fs.h>
 #include <api/fs/tracing_path.h>
 #include <pthread.h>
 #include <stdlib.h>
@@ -533,6 +534,7 @@ int main(int argc, const char **argv)
 {
        const char *cmd;
        char sbuf[STRERR_BUFSIZE];
+       int value;
 
        /* libsubcmd init */
        exec_cmd_init("perf", PREFIX, PERF_EXEC_PATH, EXEC_PATH_ENVIRONMENT);
@@ -542,6 +544,9 @@ int main(int argc, const char **argv)
        page_size = sysconf(_SC_PAGE_SIZE);
        cacheline_size = sysconf(_SC_LEVEL1_DCACHE_LINESIZE);
 
+       if (sysctl__read_int("kernel/perf_event_max_stack", &value) == 0)
+               sysctl_perf_event_max_stack = value;
+
        cmd = extract_argv0_path(argv[0]);
        if (!cmd)
                cmd = "perf-help";
index ed5aa9e..4a2bbff 100644 (file)
@@ -101,7 +101,7 @@ static int add_hist_entries(struct hists *hists, struct machine *machine)
                if (machine__resolve(machine, &al, &sample) < 0)
                        goto out;
 
-               if (hist_entry_iter__add(&iter, &al, PERF_MAX_STACK_DEPTH,
+               if (hist_entry_iter__add(&iter, &al, sysctl_perf_event_max_stack,
                                         NULL) < 0) {
                        addr_location__put(&al);
                        goto out;
index b825d24..e846f8c 100644 (file)
@@ -81,7 +81,7 @@ static int add_hist_entries(struct perf_evlist *evlist,
 
                        al.socket = fake_samples[i].socket;
                        if (hist_entry_iter__add(&iter, &al,
-                                                PERF_MAX_STACK_DEPTH, NULL) < 0) {
+                                                sysctl_perf_event_max_stack, NULL) < 0) {
                                addr_location__put(&al);
                                goto out;
                        }
index d3556fb..7cd8738 100644 (file)
@@ -67,7 +67,7 @@ static int add_hist_entries(struct hists *hists, struct machine *machine)
                if (machine__resolve(machine, &al, &sample) < 0)
                        goto out;
 
-               if (hist_entry_iter__add(&iter, &al, PERF_MAX_STACK_DEPTH,
+               if (hist_entry_iter__add(&iter, &al, sysctl_perf_event_max_stack,
                                         NULL) < 0) {
                        addr_location__put(&al);
                        goto out;
index 656c1d7..2cb95bb 100644 (file)
@@ -1764,7 +1764,7 @@ static int resolve_lbr_callchain_sample(struct thread *thread,
                 */
                int mix_chain_nr = i + 1 + lbr_nr + 1;
 
-               if (mix_chain_nr > PERF_MAX_STACK_DEPTH + PERF_MAX_BRANCH_DEPTH) {
+               if (mix_chain_nr > (int)sysctl_perf_event_max_stack + PERF_MAX_BRANCH_DEPTH) {
                        pr_warning("corrupted callchain. skipping...\n");
                        return 0;
                }
@@ -1825,7 +1825,7 @@ static int thread__resolve_callchain_sample(struct thread *thread,
         * Based on DWARF debug information, some architectures skip
         * a callchain entry saved by the kernel.
         */
-       if (chain->nr < PERF_MAX_STACK_DEPTH)
+       if (chain->nr < sysctl_perf_event_max_stack)
                skip_idx = arch_skip_callchain_idx(thread, chain);
 
        /*
@@ -1886,7 +1886,7 @@ static int thread__resolve_callchain_sample(struct thread *thread,
        }
 
 check_calls:
-       if (chain->nr > PERF_MAX_STACK_DEPTH && (int)chain->nr > max_stack) {
+       if (chain->nr > sysctl_perf_event_max_stack && (int)chain->nr > max_stack) {
                pr_warning("corrupted callchain. skipping...\n");
                return 0;
        }
index ae1cebc..62c7f69 100644 (file)
@@ -265,7 +265,7 @@ static SV *perl_process_callchain(struct perf_sample *sample,
 
        if (thread__resolve_callchain(al->thread, &callchain_cursor, evsel,
                                      sample, NULL, NULL,
-                                     PERF_MAX_STACK_DEPTH) != 0) {
+                                     sysctl_perf_event_max_stack) != 0) {
                pr_err("Failed to resolve callchain. Skipping\n");
                goto exit;
        }
index 9473d46..619ba20 100644 (file)
@@ -33,6 +33,8 @@ struct callchain_param        callchain_param = {
 unsigned int page_size;
 int cacheline_size;
 
+unsigned int sysctl_perf_event_max_stack = PERF_MAX_STACK_DEPTH;
+
 bool test_attr__enabled;
 
 bool perf_host  = true;
index 26a9246..88f607a 100644 (file)
@@ -267,6 +267,7 @@ void sighandler_dump_stack(int sig);
 
 extern unsigned int page_size;
 extern int cacheline_size;
+extern unsigned int sysctl_perf_event_max_stack;
 
 struct parse_tag {
        char tag;