Merge tag 'char-misc-3.17-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/gregk...
[cascardo/linux.git] / tools / perf / builtin-trace.c
index f954c26..a6c3752 100644 (file)
@@ -1108,6 +1108,7 @@ struct syscall {
        struct event_format *tp_format;
        const char          *name;
        bool                filtered;
+       bool                is_exit;
        struct syscall_fmt  *fmt;
        size_t              (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
        void                **arg_parm;
@@ -1132,6 +1133,7 @@ struct thread_trace {
        u64               exit_time;
        bool              entry_pending;
        unsigned long     nr_events;
+       unsigned long     pfmaj, pfmin;
        char              *entry_str;
        double            runtime_ms;
        struct {
@@ -1177,6 +1179,9 @@ fail:
        return NULL;
 }
 
+#define TRACE_PFMAJ            (1 << 0)
+#define TRACE_PFMIN            (1 << 1)
+
 struct trace {
        struct perf_tool        tool;
        struct {
@@ -1211,6 +1216,8 @@ struct trace {
        bool                    summary_only;
        bool                    show_comm;
        bool                    show_tool_stats;
+       bool                    trace_syscalls;
+       int                     trace_pgfaults;
 };
 
 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
@@ -1276,11 +1283,11 @@ static const char *thread__fd_path(struct thread *thread, int fd,
        if (fd < 0)
                return NULL;
 
-       if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL))
+       if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) {
                if (!trace->live)
                        return NULL;
                ++trace->stats.proc_getname;
-               if (thread__read_fd_path(thread, fd)) {
+               if (thread__read_fd_path(thread, fd))
                        return NULL;
        }
 
@@ -1473,6 +1480,8 @@ static int trace__read_syscall_info(struct trace *trace, int id)
        if (sc->tp_format == NULL)
                return -1;
 
+       sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
+
        return syscall__set_arg_fmts(sc);
 }
 
@@ -1535,6 +1544,7 @@ static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
 }
 
 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
+                                 union perf_event *event,
                                  struct perf_sample *sample);
 
 static struct syscall *trace__syscall_info(struct trace *trace,
@@ -1607,6 +1617,7 @@ static void thread__update_stats(struct thread_trace *ttrace,
 }
 
 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
+                           union perf_event *event __maybe_unused,
                            struct perf_sample *sample)
 {
        char *msg;
@@ -1629,7 +1640,6 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
                return -1;
 
        args = perf_evsel__sc_tp_ptr(evsel, args, sample);
-       ttrace = thread->priv;
 
        if (ttrace->entry_str == NULL) {
                ttrace->entry_str = malloc(1024);
@@ -1644,7 +1654,7 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
        printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed,
                                           args, trace, thread);
 
-       if (!strcmp(sc->name, "exit_group") || !strcmp(sc->name, "exit")) {
+       if (sc->is_exit) {
                if (!trace->duration_filter && !trace->summary_only) {
                        trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
                        fprintf(trace->output, "%-70s\n", ttrace->entry_str);
@@ -1656,6 +1666,7 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
 }
 
 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
+                          union perf_event *event __maybe_unused,
                           struct perf_sample *sample)
 {
        int ret;
@@ -1687,8 +1698,6 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
                ++trace->stats.vfs_getname;
        }
 
-       ttrace = thread->priv;
-
        ttrace->exit_time = sample->time;
 
        if (ttrace->entry_time) {
@@ -1735,6 +1744,7 @@ out:
 }
 
 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
+                             union perf_event *event __maybe_unused,
                              struct perf_sample *sample)
 {
        trace->last_vfs_getname = perf_evsel__rawptr(evsel, sample, "pathname");
@@ -1742,6 +1752,7 @@ static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
 }
 
 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
+                                    union perf_event *event __maybe_unused,
                                     struct perf_sample *sample)
 {
         u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
@@ -1768,6 +1779,80 @@ out_dump:
        return 0;
 }
 
+static void print_location(FILE *f, struct perf_sample *sample,
+                          struct addr_location *al,
+                          bool print_dso, bool print_sym)
+{
+
+       if ((verbose || print_dso) && al->map)
+               fprintf(f, "%s@", al->map->dso->long_name);
+
+       if ((verbose || print_sym) && al->sym)
+               fprintf(f, "%s+0x%" PRIx64, al->sym->name,
+                       al->addr - al->sym->start);
+       else if (al->map)
+               fprintf(f, "0x%" PRIx64, al->addr);
+       else
+               fprintf(f, "0x%" PRIx64, sample->addr);
+}
+
+static int trace__pgfault(struct trace *trace,
+                         struct perf_evsel *evsel,
+                         union perf_event *event,
+                         struct perf_sample *sample)
+{
+       struct thread *thread;
+       u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
+       struct addr_location al;
+       char map_type = 'd';
+       struct thread_trace *ttrace;
+
+       thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
+       ttrace = thread__trace(thread, trace->output);
+       if (ttrace == NULL)
+               return -1;
+
+       if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
+               ttrace->pfmaj++;
+       else
+               ttrace->pfmin++;
+
+       if (trace->summary_only)
+               return 0;
+
+       thread__find_addr_location(thread, trace->host, cpumode, MAP__FUNCTION,
+                             sample->ip, &al);
+
+       trace__fprintf_entry_head(trace, thread, 0, sample->time, trace->output);
+
+       fprintf(trace->output, "%sfault [",
+               evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
+               "maj" : "min");
+
+       print_location(trace->output, sample, &al, false, true);
+
+       fprintf(trace->output, "] => ");
+
+       thread__find_addr_location(thread, trace->host, cpumode, MAP__VARIABLE,
+                                  sample->addr, &al);
+
+       if (!al.map) {
+               thread__find_addr_location(thread, trace->host, cpumode,
+                                          MAP__FUNCTION, sample->addr, &al);
+
+               if (al.map)
+                       map_type = 'x';
+               else
+                       map_type = '?';
+       }
+
+       print_location(trace->output, sample, &al, true, false);
+
+       fprintf(trace->output, " (%c%c)\n", map_type, al.level);
+
+       return 0;
+}
+
 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
 {
        if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
@@ -1781,7 +1866,7 @@ static bool skip_sample(struct trace *trace, struct perf_sample *sample)
 }
 
 static int trace__process_sample(struct perf_tool *tool,
-                                union perf_event *event __maybe_unused,
+                                union perf_event *event,
                                 struct perf_sample *sample,
                                 struct perf_evsel *evsel,
                                 struct machine *machine __maybe_unused)
@@ -1799,7 +1884,7 @@ static int trace__process_sample(struct perf_tool *tool,
 
        if (handler) {
                ++trace->nr_events;
-               handler(trace, evsel, sample);
+               handler(trace, evsel, event, sample);
        }
 
        return err;
@@ -1826,7 +1911,7 @@ static int parse_target_str(struct trace *trace)
        return 0;
 }
 
-static int trace__record(int argc, const char **argv)
+static int trace__record(struct trace *trace, int argc, const char **argv)
 {
        unsigned int rec_argc, i, j;
        const char **rec_argv;
@@ -1835,34 +1920,54 @@ static int trace__record(int argc, const char **argv)
                "-R",
                "-m", "1024",
                "-c", "1",
-               "-e",
        };
 
+       const char * const sc_args[] = { "-e", };
+       unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
+       const char * const majpf_args[] = { "-e", "major-faults" };
+       unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
+       const char * const minpf_args[] = { "-e", "minor-faults" };
+       unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
+
        /* +1 is for the event string below */
-       rec_argc = ARRAY_SIZE(record_args) + 1 + argc;
+       rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 +
+               majpf_args_nr + minpf_args_nr + argc;
        rec_argv = calloc(rec_argc + 1, sizeof(char *));
 
        if (rec_argv == NULL)
                return -ENOMEM;
 
+       j = 0;
        for (i = 0; i < ARRAY_SIZE(record_args); i++)
-               rec_argv[i] = record_args[i];
-
-       /* event string may be different for older kernels - e.g., RHEL6 */
-       if (is_valid_tracepoint("raw_syscalls:sys_enter"))
-               rec_argv[i] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
-       else if (is_valid_tracepoint("syscalls:sys_enter"))
-               rec_argv[i] = "syscalls:sys_enter,syscalls:sys_exit";
-       else {
-               pr_err("Neither raw_syscalls nor syscalls events exist.\n");
-               return -1;
+               rec_argv[j++] = record_args[i];
+
+       if (trace->trace_syscalls) {
+               for (i = 0; i < sc_args_nr; i++)
+                       rec_argv[j++] = sc_args[i];
+
+               /* event string may be different for older kernels - e.g., RHEL6 */
+               if (is_valid_tracepoint("raw_syscalls:sys_enter"))
+                       rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
+               else if (is_valid_tracepoint("syscalls:sys_enter"))
+                       rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
+               else {
+                       pr_err("Neither raw_syscalls nor syscalls events exist.\n");
+                       return -1;
+               }
        }
-       i++;
 
-       for (j = 0; j < (unsigned int)argc; j++, i++)
-               rec_argv[i] = argv[j];
+       if (trace->trace_pgfaults & TRACE_PFMAJ)
+               for (i = 0; i < majpf_args_nr; i++)
+                       rec_argv[j++] = majpf_args[i];
+
+       if (trace->trace_pgfaults & TRACE_PFMIN)
+               for (i = 0; i < minpf_args_nr; i++)
+                       rec_argv[j++] = minpf_args[i];
+
+       for (i = 0; i < (unsigned int)argc; i++)
+               rec_argv[j++] = argv[i];
 
-       return cmd_record(i, rec_argv, NULL);
+       return cmd_record(j, rec_argv, NULL);
 }
 
 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
@@ -1882,6 +1987,30 @@ static void perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
        perf_evlist__add(evlist, evsel);
 }
 
+static int perf_evlist__add_pgfault(struct perf_evlist *evlist,
+                                   u64 config)
+{
+       struct perf_evsel *evsel;
+       struct perf_event_attr attr = {
+               .type = PERF_TYPE_SOFTWARE,
+               .mmap_data = 1,
+       };
+
+       attr.config = config;
+       attr.sample_period = 1;
+
+       event_attr_init(&attr);
+
+       evsel = perf_evsel__new(&attr);
+       if (!evsel)
+               return -ENOMEM;
+
+       evsel->handler = trace__pgfault;
+       perf_evlist__add(evlist, evsel);
+
+       return 0;
+}
+
 static int trace__run(struct trace *trace, int argc, const char **argv)
 {
        struct perf_evlist *evlist = perf_evlist__new();
@@ -1897,10 +2026,21 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
                goto out;
        }
 
-       if (perf_evlist__add_syscall_newtp(evlist, trace__sys_enter, trace__sys_exit))
+       if (trace->trace_syscalls &&
+           perf_evlist__add_syscall_newtp(evlist, trace__sys_enter,
+                                          trace__sys_exit))
                goto out_error_tp;
 
-       perf_evlist__add_vfs_getname(evlist);
+       if (trace->trace_syscalls)
+               perf_evlist__add_vfs_getname(evlist);
+
+       if ((trace->trace_pgfaults & TRACE_PFMAJ) &&
+           perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MAJ))
+               goto out_error_tp;
+
+       if ((trace->trace_pgfaults & TRACE_PFMIN) &&
+           perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MIN))
+               goto out_error_tp;
 
        if (trace->sched &&
                perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
@@ -1982,7 +2122,8 @@ again:
                                goto next_event;
                        }
 
-                       if (sample.raw_data == NULL) {
+                       if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
+                           sample.raw_data == NULL) {
                                fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
                                       perf_evsel__name(evsel), sample.tid,
                                       sample.cpu, sample.raw_size);
@@ -1990,7 +2131,7 @@ again:
                        }
 
                        handler = evsel->handler;
-                       handler(trace, evsel, &sample);
+                       handler(trace, evsel, event, &sample);
 next_event:
                        perf_evlist__mmap_consume(evlist, i);
 
@@ -2093,13 +2234,10 @@ static int trace__replay(struct trace *trace)
        if (evsel == NULL)
                evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
                                                             "syscalls:sys_enter");
-       if (evsel == NULL) {
-               pr_err("Data file does not have raw_syscalls:sys_enter event\n");
-               goto out;
-       }
 
-       if (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
-           perf_evsel__init_sc_tp_ptr_field(evsel, args)) {
+       if (evsel &&
+           (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
+           perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
                pr_err("Error during initialize raw_syscalls:sys_enter event\n");
                goto out;
        }
@@ -2109,15 +2247,19 @@ static int trace__replay(struct trace *trace)
        if (evsel == NULL)
                evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
                                                             "syscalls:sys_exit");
-       if (evsel == NULL) {
-               pr_err("Data file does not have raw_syscalls:sys_exit event\n");
+       if (evsel &&
+           (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
+           perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
+               pr_err("Error during initialize raw_syscalls:sys_exit event\n");
                goto out;
        }
 
-       if (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
-           perf_evsel__init_sc_tp_uint_field(evsel, ret)) {
-               pr_err("Error during initialize raw_syscalls:sys_exit event\n");
-               goto out;
+       evlist__for_each(session->evlist, evsel) {
+               if (evsel->attr.type == PERF_TYPE_SOFTWARE &&
+                   (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
+                    evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
+                    evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS))
+                       evsel->handler = trace__pgfault;
        }
 
        err = parse_target_str(trace);
@@ -2217,6 +2359,10 @@ static int trace__fprintf_one_thread(struct thread *thread, void *priv)
        printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
        printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
        printed += fprintf(fp, "%.1f%%", ratio);
+       if (ttrace->pfmaj)
+               printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
+       if (ttrace->pfmin)
+               printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
        printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
        printed += thread__dump_stats(ttrace, trace, fp);
 
@@ -2264,6 +2410,23 @@ static int trace__open_output(struct trace *trace, const char *filename)
        return trace->output == NULL ? -errno : 0;
 }
 
+static int parse_pagefaults(const struct option *opt, const char *str,
+                           int unset __maybe_unused)
+{
+       int *trace_pgfaults = opt->value;
+
+       if (strcmp(str, "all") == 0)
+               *trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
+       else if (strcmp(str, "maj") == 0)
+               *trace_pgfaults |= TRACE_PFMAJ;
+       else if (strcmp(str, "min") == 0)
+               *trace_pgfaults |= TRACE_PFMIN;
+       else
+               return -1;
+
+       return 0;
+}
+
 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
 {
        const char * const trace_usage[] = {
@@ -2293,6 +2456,7 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
                },
                .output = stdout,
                .show_comm = true,
+               .trace_syscalls = true,
        };
        const char *output_name = NULL;
        const char *ev_qualifier_str = NULL;
@@ -2330,20 +2494,34 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
                    "Show only syscall summary with statistics"),
        OPT_BOOLEAN('S', "with-summary", &trace.summary,
                    "Show all syscalls and summary with statistics"),
+       OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
+                    "Trace pagefaults", parse_pagefaults, "maj"),
+       OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
        OPT_END()
        };
        int err;
        char bf[BUFSIZ];
 
-       if ((argc > 1) && (strcmp(argv[1], "record") == 0))
-               return trace__record(argc-2, &argv[2]);
+       argc = parse_options(argc, argv, trace_options, trace_usage,
+                            PARSE_OPT_STOP_AT_NON_OPTION);
 
-       argc = parse_options(argc, argv, trace_options, trace_usage, 0);
+       if (trace.trace_pgfaults) {
+               trace.opts.sample_address = true;
+               trace.opts.sample_time = true;
+       }
+
+       if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
+               return trace__record(&trace, argc-1, &argv[1]);
 
        /* summary_only implies summary option, but don't overwrite summary if set */
        if (trace.summary_only)
                trace.summary = trace.summary_only;
 
+       if (!trace.trace_syscalls && !trace.trace_pgfaults) {
+               pr_err("Please specify something to trace.\n");
+               return -1;
+       }
+
        if (output_name != NULL) {
                err = trace__open_output(&trace, output_name);
                if (err < 0) {