perf core: Per event callchain limit
authorArnaldo Carvalho de Melo <acme@redhat.com>
Thu, 28 Apr 2016 16:16:33 +0000 (13:16 -0300)
committerArnaldo Carvalho de Melo <acme@redhat.com>
Mon, 30 May 2016 15:41:44 +0000 (12:41 -0300)
Additionally to being able to control the system wide maximum depth via
/proc/sys/kernel/perf_event_max_stack, now we are able to ask for
different depths per event, using perf_event_attr.sample_max_stack for
that.

This uses an u16 hole at the end of perf_event_attr, that, when
perf_event_attr.sample_type has the PERF_SAMPLE_CALLCHAIN, if
sample_max_stack is zero, means use perf_event_max_stack, otherwise
it'll be bounds checked under callchain_mutex.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Brendan Gregg <brendan.d.gregg@gmail.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: He Kuang <hekuang@huawei.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Milian Wolff <milian.wolff@kdab.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: Wang Nan <wangnan0@huawei.com>
Cc: Zefan Li <lizefan@huawei.com>
Link: http://lkml.kernel.org/n/tip-kolmn1yo40p7jhswxwrc7rrd@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
include/linux/perf_event.h
include/uapi/linux/perf_event.h
kernel/bpf/stackmap.c
kernel/events/callchain.c
kernel/events/core.c

index 6b87be9..0e43355 100644 (file)
@@ -1076,7 +1076,7 @@ extern void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct
 extern struct perf_callchain_entry *
 get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user,
                   u32 max_stack, bool crosstask, bool add_mark);
-extern int get_callchain_buffers(void);
+extern int get_callchain_buffers(int max_stack);
 extern void put_callchain_buffers(void);
 
 extern int sysctl_perf_event_max_stack;
index 36ce552..c66a485 100644 (file)
@@ -276,6 +276,9 @@ enum perf_event_read_format {
 
 /*
  * Hardware event_id to monitor via a performance monitoring event:
+ *
+ * @sample_max_stack: Max number of frame pointers in a callchain,
+ *                   should be < /proc/sys/kernel/perf_event_max_stack
  */
 struct perf_event_attr {
 
@@ -385,7 +388,8 @@ struct perf_event_attr {
         * Wakeup watermark for AUX area
         */
        __u32   aux_watermark;
-       __u32   __reserved_2;   /* align to __u64 */
+       __u16   sample_max_stack;
+       __u16   __reserved_2;   /* align to __u64 */
 };
 
 #define perf_flags(attr)       (*(&(attr)->read_format + 1))
index a82d760..f1de5c1 100644 (file)
@@ -99,7 +99,7 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr)
        if (err)
                goto free_smap;
 
-       err = get_callchain_buffers();
+       err = get_callchain_buffers(sysctl_perf_event_max_stack);
        if (err)
                goto free_smap;
 
index 179ef46..e9fdb52 100644 (file)
@@ -104,7 +104,7 @@ fail:
        return -ENOMEM;
 }
 
-int get_callchain_buffers(void)
+int get_callchain_buffers(int event_max_stack)
 {
        int err = 0;
        int count;
@@ -121,6 +121,15 @@ int get_callchain_buffers(void)
                /* If the allocation failed, give up */
                if (!callchain_cpus_entries)
                        err = -ENOMEM;
+               /*
+                * If requesting per event more than the global cap,
+                * return a different error to help userspace figure
+                * this out.
+                *
+                * And also do it here so that we have &callchain_mutex held.
+                */
+               if (event_max_stack > sysctl_perf_event_max_stack)
+                       err = -EOVERFLOW;
                goto exit;
        }
 
@@ -174,11 +183,12 @@ perf_callchain(struct perf_event *event, struct pt_regs *regs)
        bool user   = !event->attr.exclude_callchain_user;
        /* Disallow cross-task user callchains. */
        bool crosstask = event->ctx->task && event->ctx->task != current;
+       const u32 max_stack = event->attr.sample_max_stack;
 
        if (!kernel && !user)
                return NULL;
 
-       return get_perf_callchain(regs, 0, kernel, user, sysctl_perf_event_max_stack, crosstask, true);
+       return get_perf_callchain(regs, 0, kernel, user, max_stack, crosstask, true);
 }
 
 struct perf_callchain_entry *
index 050a290..79363f2 100644 (file)
@@ -8843,7 +8843,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
 
        if (!event->parent) {
                if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) {
-                       err = get_callchain_buffers();
+                       err = get_callchain_buffers(attr->sample_max_stack);
                        if (err)
                                goto err_addr_filters;
                }
@@ -9165,6 +9165,9 @@ SYSCALL_DEFINE5(perf_event_open,
                        return -EINVAL;
        }
 
+       if (!attr.sample_max_stack)
+               attr.sample_max_stack = sysctl_perf_event_max_stack;
+
        /*
         * In cgroup mode, the pid argument is used to pass the fd
         * opened to the cgroup directory in cgroupfs. The cpu argument