From: Frederic Weisbecker Date: Tue, 11 Aug 2009 18:22:53 +0000 (+0200) Subject: tracing: Support for syscall events raw records in perfcounters X-Git-Tag: v2.6.32-rc1~654^2~84 X-Git-Url: http://git.cascardo.info/?a=commitdiff_plain;h=19007a67a64f9b3cbbd7024f972654ebf14daade;p=cascardo%2Flinux.git tracing: Support for syscall events raw records in perfcounters This bring the support for raw syscall events in perfcounters. The arguments or exit value are saved as a raw sample using the PERF_SAMPLE_RAW attribute in a perf counter. Example (for now you must explicitly set the PERF_SAMPLE_RAW flag in perf record): perf record -e syscalls:sys_enter_open -f -F 1 -a perf report -D 0x2cbb8 [0x50]: event: 9 . . ... raw event: size 80 bytes . 0000: 09 00 00 00 02 00 50 00 20 e9 39 ab 0a 7f 00 00 ......P. .9.... . 0010: bc 14 00 00 bc 14 00 00 01 00 00 00 00 00 00 00 ............... . 0020: 2c 00 00 00 15 01 01 00 bc 14 00 00 bc 14 00 00 ,.............. ^ ^ ^ ^ ^ ^ ^ .......................... Event Size struct trace_entry . 0030: 00 00 00 00 46 98 43 02 00 00 00 00 80 08 00 00 ....F.C........ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ptr to file name open flags . 0040: 00 00 00 00 02 00 00 00 00 00 00 00 00 00 00 00 ............... ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ . open mode padding 0x2cbb8 [0x50]: PERF_EVENT_SAMPLE (IP, 2): 5308: 0x7f0aab39e920 period: 1 Signed-off-by: Frederic Weisbecker Cc: Lai Jiangshan Cc: Steven Rostedt Cc: Peter Zijlstra Cc: Mathieu Desnoyers Cc: Jiaying Zhang Cc: Martin Bligh Cc: Li Zefan Cc: Jason Baron Cc: Masami Hiramatsu --- diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 9ee6386cf842..f837cccabcf7 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -301,6 +301,17 @@ struct trace_event event_syscall_exit = { }; #ifdef CONFIG_EVENT_PROFILE + +struct syscall_enter_record { + struct trace_entry entry; + unsigned long args[0]; +}; + +struct syscall_exit_record { + struct trace_entry entry; + unsigned long ret; +}; + static DECLARE_BITMAP(enabled_prof_enter_syscalls, FTRACE_SYSCALL_MAX); static DECLARE_BITMAP(enabled_prof_exit_syscalls, FTRACE_SYSCALL_MAX); static int sys_prof_refcount_enter; @@ -308,8 +319,10 @@ static int sys_prof_refcount_exit; static void prof_syscall_enter(struct pt_regs *regs, long id) { + struct syscall_enter_record *rec; struct syscall_metadata *sys_data; int syscall_nr; + int size; syscall_nr = syscall_get_nr(current, regs); if (!test_bit(syscall_nr, enabled_prof_enter_syscalls)) @@ -319,7 +332,24 @@ static void prof_syscall_enter(struct pt_regs *regs, long id) if (!sys_data) return; - perf_tpcounter_event(sys_data->enter_id, 0, 1, NULL, 0); + /* get the size after alignment with the u32 buffer size field */ + size = sizeof(unsigned long) * sys_data->nb_args + sizeof(*rec); + size = ALIGN(size + sizeof(u32), sizeof(u64)); + size -= sizeof(u32); + + do { + char raw_data[size]; + + /* zero the dead bytes from align to not leak stack to user */ + *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; + + rec = (struct syscall_enter_record *) raw_data; + tracing_generic_entry_update(&rec->entry, 0, 0); + rec->entry.type = sys_data->enter_id; + syscall_get_arguments(current, regs, 0, sys_data->nb_args, + (unsigned long *)&rec->args); + perf_tpcounter_event(sys_data->enter_id, 0, 1, rec, size); + } while(0); } int reg_prof_syscall_enter(char *name) @@ -364,6 +394,7 @@ void unreg_prof_syscall_enter(char *name) static void prof_syscall_exit(struct pt_regs *regs, long ret) { struct syscall_metadata *sys_data; + struct syscall_exit_record rec; int syscall_nr; syscall_nr = syscall_get_nr(current, regs); @@ -374,7 +405,11 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret) if (!sys_data) return; - perf_tpcounter_event(sys_data->exit_id, 0, 1, NULL, 0); + tracing_generic_entry_update(&rec.entry, 0, 0); + rec.entry.type = sys_data->exit_id; + rec.ret = syscall_get_return_value(current, regs); + + perf_tpcounter_event(sys_data->exit_id, 0, 1, &rec, sizeof(rec)); } int reg_prof_syscall_exit(char *name)