tile: use pmd_pfn() instead of casting via pte_t
[cascardo/linux.git] / tools / perf / builtin-record.c
1 /*
2  * builtin-record.c
3  *
4  * Builtin record command: Record the profile of a workload
5  * (or a CPU, or a PID) into the perf.data output file - for
6  * later analysis via perf report.
7  */
8 #include "builtin.h"
9
10 #include "perf.h"
11
12 #include "util/build-id.h"
13 #include "util/util.h"
14 #include "util/parse-options.h"
15 #include "util/parse-events.h"
16
17 #include "util/header.h"
18 #include "util/event.h"
19 #include "util/evlist.h"
20 #include "util/evsel.h"
21 #include "util/debug.h"
22 #include "util/session.h"
23 #include "util/tool.h"
24 #include "util/symbol.h"
25 #include "util/cpumap.h"
26 #include "util/thread_map.h"
27
28 #include <unistd.h>
29 #include <sched.h>
30 #include <sys/mman.h>
31
32 #ifndef HAVE_ON_EXIT
33 #ifndef ATEXIT_MAX
34 #define ATEXIT_MAX 32
35 #endif
36 static int __on_exit_count = 0;
37 typedef void (*on_exit_func_t) (int, void *);
38 static on_exit_func_t __on_exit_funcs[ATEXIT_MAX];
39 static void *__on_exit_args[ATEXIT_MAX];
40 static int __exitcode = 0;
41 static void __handle_on_exit_funcs(void);
42 static int on_exit(on_exit_func_t function, void *arg);
43 #define exit(x) (exit)(__exitcode = (x))
44
45 static int on_exit(on_exit_func_t function, void *arg)
46 {
47         if (__on_exit_count == ATEXIT_MAX)
48                 return -ENOMEM;
49         else if (__on_exit_count == 0)
50                 atexit(__handle_on_exit_funcs);
51         __on_exit_funcs[__on_exit_count] = function;
52         __on_exit_args[__on_exit_count++] = arg;
53         return 0;
54 }
55
56 static void __handle_on_exit_funcs(void)
57 {
58         int i;
59         for (i = 0; i < __on_exit_count; i++)
60                 __on_exit_funcs[i] (__exitcode, __on_exit_args[i]);
61 }
62 #endif
63
64 struct perf_record {
65         struct perf_tool        tool;
66         struct perf_record_opts opts;
67         u64                     bytes_written;
68         const char              *output_name;
69         struct perf_evlist      *evlist;
70         struct perf_session     *session;
71         const char              *progname;
72         int                     output;
73         unsigned int            page_size;
74         int                     realtime_prio;
75         bool                    no_buildid;
76         bool                    no_buildid_cache;
77         long                    samples;
78         off_t                   post_processing_offset;
79 };
80
81 static void advance_output(struct perf_record *rec, size_t size)
82 {
83         rec->bytes_written += size;
84 }
85
86 static int write_output(struct perf_record *rec, void *buf, size_t size)
87 {
88         while (size) {
89                 int ret = write(rec->output, buf, size);
90
91                 if (ret < 0) {
92                         pr_err("failed to write\n");
93                         return -1;
94                 }
95
96                 size -= ret;
97                 buf += ret;
98
99                 rec->bytes_written += ret;
100         }
101
102         return 0;
103 }
104
105 static int process_synthesized_event(struct perf_tool *tool,
106                                      union perf_event *event,
107                                      struct perf_sample *sample __maybe_unused,
108                                      struct machine *machine __maybe_unused)
109 {
110         struct perf_record *rec = container_of(tool, struct perf_record, tool);
111         if (write_output(rec, event, event->header.size) < 0)
112                 return -1;
113
114         return 0;
115 }
116
117 static int perf_record__mmap_read(struct perf_record *rec,
118                                    struct perf_mmap *md)
119 {
120         unsigned int head = perf_mmap__read_head(md);
121         unsigned int old = md->prev;
122         unsigned char *data = md->base + rec->page_size;
123         unsigned long size;
124         void *buf;
125         int rc = 0;
126
127         if (old == head)
128                 return 0;
129
130         rec->samples++;
131
132         size = head - old;
133
134         if ((old & md->mask) + size != (head & md->mask)) {
135                 buf = &data[old & md->mask];
136                 size = md->mask + 1 - (old & md->mask);
137                 old += size;
138
139                 if (write_output(rec, buf, size) < 0) {
140                         rc = -1;
141                         goto out;
142                 }
143         }
144
145         buf = &data[old & md->mask];
146         size = head - old;
147         old += size;
148
149         if (write_output(rec, buf, size) < 0) {
150                 rc = -1;
151                 goto out;
152         }
153
154         md->prev = old;
155         perf_mmap__write_tail(md, old);
156
157 out:
158         return rc;
159 }
160
161 static volatile int done = 0;
162 static volatile int signr = -1;
163 static volatile int child_finished = 0;
164
165 static void sig_handler(int sig)
166 {
167         if (sig == SIGCHLD)
168                 child_finished = 1;
169
170         done = 1;
171         signr = sig;
172 }
173
174 static void perf_record__sig_exit(int exit_status __maybe_unused, void *arg)
175 {
176         struct perf_record *rec = arg;
177         int status;
178
179         if (rec->evlist->workload.pid > 0) {
180                 if (!child_finished)
181                         kill(rec->evlist->workload.pid, SIGTERM);
182
183                 wait(&status);
184                 if (WIFSIGNALED(status))
185                         psignal(WTERMSIG(status), rec->progname);
186         }
187
188         if (signr == -1 || signr == SIGUSR1)
189                 return;
190
191         signal(signr, SIG_DFL);
192 }
193
194 static int perf_record__open(struct perf_record *rec)
195 {
196         char msg[512];
197         struct perf_evsel *pos;
198         struct perf_evlist *evlist = rec->evlist;
199         struct perf_session *session = rec->session;
200         struct perf_record_opts *opts = &rec->opts;
201         int rc = 0;
202
203         perf_evlist__config(evlist, opts);
204
205         list_for_each_entry(pos, &evlist->entries, node) {
206 try_again:
207                 if (perf_evsel__open(pos, evlist->cpus, evlist->threads) < 0) {
208                         if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
209                                 if (verbose)
210                                         ui__warning("%s\n", msg);
211                                 goto try_again;
212                         }
213
214                         rc = -errno;
215                         perf_evsel__open_strerror(pos, &opts->target,
216                                                   errno, msg, sizeof(msg));
217                         ui__error("%s\n", msg);
218                         goto out;
219                 }
220         }
221
222         if (perf_evlist__apply_filters(evlist)) {
223                 error("failed to set filter with %d (%s)\n", errno,
224                         strerror(errno));
225                 rc = -1;
226                 goto out;
227         }
228
229         if (perf_evlist__mmap(evlist, opts->mmap_pages, false) < 0) {
230                 if (errno == EPERM) {
231                         pr_err("Permission error mapping pages.\n"
232                                "Consider increasing "
233                                "/proc/sys/kernel/perf_event_mlock_kb,\n"
234                                "or try again with a smaller value of -m/--mmap_pages.\n"
235                                "(current value: %d)\n", opts->mmap_pages);
236                         rc = -errno;
237                 } else if (!is_power_of_2(opts->mmap_pages) &&
238                            (opts->mmap_pages != UINT_MAX)) {
239                         pr_err("--mmap_pages/-m value must be a power of two.");
240                         rc = -EINVAL;
241                 } else {
242                         pr_err("failed to mmap with %d (%s)\n", errno, strerror(errno));
243                         rc = -errno;
244                 }
245                 goto out;
246         }
247
248         session->evlist = evlist;
249         perf_session__set_id_hdr_size(session);
250 out:
251         return rc;
252 }
253
254 static int process_buildids(struct perf_record *rec)
255 {
256         u64 size = lseek(rec->output, 0, SEEK_CUR);
257
258         if (size == 0)
259                 return 0;
260
261         rec->session->fd = rec->output;
262         return __perf_session__process_events(rec->session, rec->post_processing_offset,
263                                               size - rec->post_processing_offset,
264                                               size, &build_id__mark_dso_hit_ops);
265 }
266
267 static void perf_record__exit(int status, void *arg)
268 {
269         struct perf_record *rec = arg;
270
271         if (status != 0)
272                 return;
273
274         if (!rec->opts.pipe_output) {
275                 rec->session->header.data_size += rec->bytes_written;
276
277                 if (!rec->no_buildid)
278                         process_buildids(rec);
279                 perf_session__write_header(rec->session, rec->evlist,
280                                            rec->output, true);
281                 perf_session__delete(rec->session);
282                 perf_evlist__delete(rec->evlist);
283                 symbol__exit();
284         }
285 }
286
287 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
288 {
289         int err;
290         struct perf_tool *tool = data;
291         /*
292          *As for guest kernel when processing subcommand record&report,
293          *we arrange module mmap prior to guest kernel mmap and trigger
294          *a preload dso because default guest module symbols are loaded
295          *from guest kallsyms instead of /lib/modules/XXX/XXX. This
296          *method is used to avoid symbol missing when the first addr is
297          *in module instead of in guest kernel.
298          */
299         err = perf_event__synthesize_modules(tool, process_synthesized_event,
300                                              machine);
301         if (err < 0)
302                 pr_err("Couldn't record guest kernel [%d]'s reference"
303                        " relocation symbol.\n", machine->pid);
304
305         /*
306          * We use _stext for guest kernel because guest kernel's /proc/kallsyms
307          * have no _text sometimes.
308          */
309         err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
310                                                  machine, "_text");
311         if (err < 0)
312                 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
313                                                          machine, "_stext");
314         if (err < 0)
315                 pr_err("Couldn't record guest kernel [%d]'s reference"
316                        " relocation symbol.\n", machine->pid);
317 }
318
319 static struct perf_event_header finished_round_event = {
320         .size = sizeof(struct perf_event_header),
321         .type = PERF_RECORD_FINISHED_ROUND,
322 };
323
324 static int perf_record__mmap_read_all(struct perf_record *rec)
325 {
326         int i;
327         int rc = 0;
328
329         for (i = 0; i < rec->evlist->nr_mmaps; i++) {
330                 if (rec->evlist->mmap[i].base) {
331                         if (perf_record__mmap_read(rec, &rec->evlist->mmap[i]) != 0) {
332                                 rc = -1;
333                                 goto out;
334                         }
335                 }
336         }
337
338         if (perf_header__has_feat(&rec->session->header, HEADER_TRACING_DATA))
339                 rc = write_output(rec, &finished_round_event,
340                                   sizeof(finished_round_event));
341
342 out:
343         return rc;
344 }
345
346 static int __cmd_record(struct perf_record *rec, int argc, const char **argv)
347 {
348         struct stat st;
349         int flags;
350         int err, output, feat;
351         unsigned long waking = 0;
352         const bool forks = argc > 0;
353         struct machine *machine;
354         struct perf_tool *tool = &rec->tool;
355         struct perf_record_opts *opts = &rec->opts;
356         struct perf_evlist *evsel_list = rec->evlist;
357         const char *output_name = rec->output_name;
358         struct perf_session *session;
359         bool disabled = false;
360
361         rec->progname = argv[0];
362
363         rec->page_size = sysconf(_SC_PAGE_SIZE);
364
365         on_exit(perf_record__sig_exit, rec);
366         signal(SIGCHLD, sig_handler);
367         signal(SIGINT, sig_handler);
368         signal(SIGUSR1, sig_handler);
369         signal(SIGTERM, sig_handler);
370
371         if (!output_name) {
372                 if (!fstat(STDOUT_FILENO, &st) && S_ISFIFO(st.st_mode))
373                         opts->pipe_output = true;
374                 else
375                         rec->output_name = output_name = "perf.data";
376         }
377         if (output_name) {
378                 if (!strcmp(output_name, "-"))
379                         opts->pipe_output = true;
380                 else if (!stat(output_name, &st) && st.st_size) {
381                         char oldname[PATH_MAX];
382                         snprintf(oldname, sizeof(oldname), "%s.old",
383                                  output_name);
384                         unlink(oldname);
385                         rename(output_name, oldname);
386                 }
387         }
388
389         flags = O_CREAT|O_RDWR|O_TRUNC;
390
391         if (opts->pipe_output)
392                 output = STDOUT_FILENO;
393         else
394                 output = open(output_name, flags, S_IRUSR | S_IWUSR);
395         if (output < 0) {
396                 perror("failed to create output file");
397                 return -1;
398         }
399
400         rec->output = output;
401
402         session = perf_session__new(output_name, O_WRONLY,
403                                     true, false, NULL);
404         if (session == NULL) {
405                 pr_err("Not enough memory for reading perf file header\n");
406                 return -1;
407         }
408
409         rec->session = session;
410
411         for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
412                 perf_header__set_feat(&session->header, feat);
413
414         if (rec->no_buildid)
415                 perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
416
417         if (!have_tracepoints(&evsel_list->entries))
418                 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
419
420         if (!rec->opts.branch_stack)
421                 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
422
423         if (forks) {
424                 err = perf_evlist__prepare_workload(evsel_list, &opts->target,
425                                                     argv, opts->pipe_output,
426                                                     true);
427                 if (err < 0) {
428                         pr_err("Couldn't run the workload!\n");
429                         goto out_delete_session;
430                 }
431         }
432
433         if (perf_record__open(rec) != 0) {
434                 err = -1;
435                 goto out_delete_session;
436         }
437
438         if (!evsel_list->nr_groups)
439                 perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
440
441         /*
442          * perf_session__delete(session) will be called at perf_record__exit()
443          */
444         on_exit(perf_record__exit, rec);
445
446         if (opts->pipe_output) {
447                 err = perf_header__write_pipe(output);
448                 if (err < 0)
449                         goto out_delete_session;
450         } else {
451                 err = perf_session__write_header(session, evsel_list,
452                                                  output, false);
453                 if (err < 0)
454                         goto out_delete_session;
455         }
456
457         if (!rec->no_buildid
458             && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
459                 pr_err("Couldn't generate buildids. "
460                        "Use --no-buildid to profile anyway.\n");
461                 err = -1;
462                 goto out_delete_session;
463         }
464
465         rec->post_processing_offset = lseek(output, 0, SEEK_CUR);
466
467         machine = &session->machines.host;
468
469         if (opts->pipe_output) {
470                 err = perf_event__synthesize_attrs(tool, session,
471                                                    process_synthesized_event);
472                 if (err < 0) {
473                         pr_err("Couldn't synthesize attrs.\n");
474                         goto out_delete_session;
475                 }
476
477                 if (have_tracepoints(&evsel_list->entries)) {
478                         /*
479                          * FIXME err <= 0 here actually means that
480                          * there were no tracepoints so its not really
481                          * an error, just that we don't need to
482                          * synthesize anything.  We really have to
483                          * return this more properly and also
484                          * propagate errors that now are calling die()
485                          */
486                         err = perf_event__synthesize_tracing_data(tool, output, evsel_list,
487                                                                   process_synthesized_event);
488                         if (err <= 0) {
489                                 pr_err("Couldn't record tracing data.\n");
490                                 goto out_delete_session;
491                         }
492                         advance_output(rec, err);
493                 }
494         }
495
496         err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
497                                                  machine, "_text");
498         if (err < 0)
499                 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
500                                                          machine, "_stext");
501         if (err < 0)
502                 pr_err("Couldn't record kernel reference relocation symbol\n"
503                        "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
504                        "Check /proc/kallsyms permission or run as root.\n");
505
506         err = perf_event__synthesize_modules(tool, process_synthesized_event,
507                                              machine);
508         if (err < 0)
509                 pr_err("Couldn't record kernel module information.\n"
510                        "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
511                        "Check /proc/modules permission or run as root.\n");
512
513         if (perf_guest) {
514                 machines__process_guests(&session->machines,
515                                          perf_event__synthesize_guest_os, tool);
516         }
517
518         if (perf_target__has_task(&opts->target))
519                 err = perf_event__synthesize_thread_map(tool, evsel_list->threads,
520                                                   process_synthesized_event,
521                                                   machine);
522         else if (perf_target__has_cpu(&opts->target))
523                 err = perf_event__synthesize_threads(tool, process_synthesized_event,
524                                                machine);
525         else /* command specified */
526                 err = 0;
527
528         if (err != 0)
529                 goto out_delete_session;
530
531         if (rec->realtime_prio) {
532                 struct sched_param param;
533
534                 param.sched_priority = rec->realtime_prio;
535                 if (sched_setscheduler(0, SCHED_FIFO, &param)) {
536                         pr_err("Could not set realtime priority.\n");
537                         err = -1;
538                         goto out_delete_session;
539                 }
540         }
541
542         /*
543          * When perf is starting the traced process, all the events
544          * (apart from group members) have enable_on_exec=1 set,
545          * so don't spoil it by prematurely enabling them.
546          */
547         if (!perf_target__none(&opts->target))
548                 perf_evlist__enable(evsel_list);
549
550         /*
551          * Let the child rip
552          */
553         if (forks)
554                 perf_evlist__start_workload(evsel_list);
555
556         for (;;) {
557                 int hits = rec->samples;
558
559                 if (perf_record__mmap_read_all(rec) < 0) {
560                         err = -1;
561                         goto out_delete_session;
562                 }
563
564                 if (hits == rec->samples) {
565                         if (done)
566                                 break;
567                         err = poll(evsel_list->pollfd, evsel_list->nr_fds, -1);
568                         waking++;
569                 }
570
571                 /*
572                  * When perf is starting the traced process, at the end events
573                  * die with the process and we wait for that. Thus no need to
574                  * disable events in this case.
575                  */
576                 if (done && !disabled && !perf_target__none(&opts->target)) {
577                         perf_evlist__disable(evsel_list);
578                         disabled = true;
579                 }
580         }
581
582         if (quiet || signr == SIGUSR1)
583                 return 0;
584
585         fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
586
587         /*
588          * Approximate RIP event size: 24 bytes.
589          */
590         fprintf(stderr,
591                 "[ perf record: Captured and wrote %.3f MB %s (~%" PRIu64 " samples) ]\n",
592                 (double)rec->bytes_written / 1024.0 / 1024.0,
593                 output_name,
594                 rec->bytes_written / 24);
595
596         return 0;
597
598 out_delete_session:
599         perf_session__delete(session);
600         return err;
601 }
602
603 #define BRANCH_OPT(n, m) \
604         { .name = n, .mode = (m) }
605
606 #define BRANCH_END { .name = NULL }
607
608 struct branch_mode {
609         const char *name;
610         int mode;
611 };
612
613 static const struct branch_mode branch_modes[] = {
614         BRANCH_OPT("u", PERF_SAMPLE_BRANCH_USER),
615         BRANCH_OPT("k", PERF_SAMPLE_BRANCH_KERNEL),
616         BRANCH_OPT("hv", PERF_SAMPLE_BRANCH_HV),
617         BRANCH_OPT("any", PERF_SAMPLE_BRANCH_ANY),
618         BRANCH_OPT("any_call", PERF_SAMPLE_BRANCH_ANY_CALL),
619         BRANCH_OPT("any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN),
620         BRANCH_OPT("ind_call", PERF_SAMPLE_BRANCH_IND_CALL),
621         BRANCH_END
622 };
623
624 static int
625 parse_branch_stack(const struct option *opt, const char *str, int unset)
626 {
627 #define ONLY_PLM \
628         (PERF_SAMPLE_BRANCH_USER        |\
629          PERF_SAMPLE_BRANCH_KERNEL      |\
630          PERF_SAMPLE_BRANCH_HV)
631
632         uint64_t *mode = (uint64_t *)opt->value;
633         const struct branch_mode *br;
634         char *s, *os = NULL, *p;
635         int ret = -1;
636
637         if (unset)
638                 return 0;
639
640         /*
641          * cannot set it twice, -b + --branch-filter for instance
642          */
643         if (*mode)
644                 return -1;
645
646         /* str may be NULL in case no arg is passed to -b */
647         if (str) {
648                 /* because str is read-only */
649                 s = os = strdup(str);
650                 if (!s)
651                         return -1;
652
653                 for (;;) {
654                         p = strchr(s, ',');
655                         if (p)
656                                 *p = '\0';
657
658                         for (br = branch_modes; br->name; br++) {
659                                 if (!strcasecmp(s, br->name))
660                                         break;
661                         }
662                         if (!br->name) {
663                                 ui__warning("unknown branch filter %s,"
664                                             " check man page\n", s);
665                                 goto error;
666                         }
667
668                         *mode |= br->mode;
669
670                         if (!p)
671                                 break;
672
673                         s = p + 1;
674                 }
675         }
676         ret = 0;
677
678         /* default to any branch */
679         if ((*mode & ~ONLY_PLM) == 0) {
680                 *mode = PERF_SAMPLE_BRANCH_ANY;
681         }
682 error:
683         free(os);
684         return ret;
685 }
686
687 #ifdef LIBUNWIND_SUPPORT
688 static int get_stack_size(char *str, unsigned long *_size)
689 {
690         char *endptr;
691         unsigned long size;
692         unsigned long max_size = round_down(USHRT_MAX, sizeof(u64));
693
694         size = strtoul(str, &endptr, 0);
695
696         do {
697                 if (*endptr)
698                         break;
699
700                 size = round_up(size, sizeof(u64));
701                 if (!size || size > max_size)
702                         break;
703
704                 *_size = size;
705                 return 0;
706
707         } while (0);
708
709         pr_err("callchain: Incorrect stack dump size (max %ld): %s\n",
710                max_size, str);
711         return -1;
712 }
713 #endif /* LIBUNWIND_SUPPORT */
714
715 int record_parse_callchain_opt(const struct option *opt,
716                                const char *arg, int unset)
717 {
718         struct perf_record_opts *opts = opt->value;
719         char *tok, *name, *saveptr = NULL;
720         char *buf;
721         int ret = -1;
722
723         /* --no-call-graph */
724         if (unset)
725                 return 0;
726
727         /* We specified default option if none is provided. */
728         BUG_ON(!arg);
729
730         /* We need buffer that we know we can write to. */
731         buf = malloc(strlen(arg) + 1);
732         if (!buf)
733                 return -ENOMEM;
734
735         strcpy(buf, arg);
736
737         tok = strtok_r((char *)buf, ",", &saveptr);
738         name = tok ? : (char *)buf;
739
740         do {
741                 /* Framepointer style */
742                 if (!strncmp(name, "fp", sizeof("fp"))) {
743                         if (!strtok_r(NULL, ",", &saveptr)) {
744                                 opts->call_graph = CALLCHAIN_FP;
745                                 ret = 0;
746                         } else
747                                 pr_err("callchain: No more arguments "
748                                        "needed for -g fp\n");
749                         break;
750
751 #ifdef LIBUNWIND_SUPPORT
752                 /* Dwarf style */
753                 } else if (!strncmp(name, "dwarf", sizeof("dwarf"))) {
754                         const unsigned long default_stack_dump_size = 8192;
755
756                         ret = 0;
757                         opts->call_graph = CALLCHAIN_DWARF;
758                         opts->stack_dump_size = default_stack_dump_size;
759
760                         tok = strtok_r(NULL, ",", &saveptr);
761                         if (tok) {
762                                 unsigned long size = 0;
763
764                                 ret = get_stack_size(tok, &size);
765                                 opts->stack_dump_size = size;
766                         }
767
768                         if (!ret)
769                                 pr_debug("callchain: stack dump size %d\n",
770                                          opts->stack_dump_size);
771 #endif /* LIBUNWIND_SUPPORT */
772                 } else {
773                         pr_err("callchain: Unknown -g option "
774                                "value: %s\n", arg);
775                         break;
776                 }
777
778         } while (0);
779
780         free(buf);
781
782         if (!ret)
783                 pr_debug("callchain: type %d\n", opts->call_graph);
784
785         return ret;
786 }
787
788 static const char * const record_usage[] = {
789         "perf record [<options>] [<command>]",
790         "perf record [<options>] -- <command> [<options>]",
791         NULL
792 };
793
794 /*
795  * XXX Ideally would be local to cmd_record() and passed to a perf_record__new
796  * because we need to have access to it in perf_record__exit, that is called
797  * after cmd_record() exits, but since record_options need to be accessible to
798  * builtin-script, leave it here.
799  *
800  * At least we don't ouch it in all the other functions here directly.
801  *
802  * Just say no to tons of global variables, sigh.
803  */
804 static struct perf_record record = {
805         .opts = {
806                 .mmap_pages          = UINT_MAX,
807                 .user_freq           = UINT_MAX,
808                 .user_interval       = ULLONG_MAX,
809                 .freq                = 4000,
810                 .target              = {
811                         .uses_mmap   = true,
812                 },
813         },
814 };
815
816 #define CALLCHAIN_HELP "do call-graph (stack chain/backtrace) recording: "
817
818 #ifdef LIBUNWIND_SUPPORT
819 const char record_callchain_help[] = CALLCHAIN_HELP "[fp] dwarf";
820 #else
821 const char record_callchain_help[] = CALLCHAIN_HELP "[fp]";
822 #endif
823
824 /*
825  * XXX Will stay a global variable till we fix builtin-script.c to stop messing
826  * with it and switch to use the library functions in perf_evlist that came
827  * from builtin-record.c, i.e. use perf_record_opts,
828  * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
829  * using pipes, etc.
830  */
831 const struct option record_options[] = {
832         OPT_CALLBACK('e', "event", &record.evlist, "event",
833                      "event selector. use 'perf list' to list available events",
834                      parse_events_option),
835         OPT_CALLBACK(0, "filter", &record.evlist, "filter",
836                      "event filter", parse_filter),
837         OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
838                     "record events on existing process id"),
839         OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
840                     "record events on existing thread id"),
841         OPT_INTEGER('r', "realtime", &record.realtime_prio,
842                     "collect data with this RT SCHED_FIFO priority"),
843         OPT_BOOLEAN('D', "no-delay", &record.opts.no_delay,
844                     "collect data without buffering"),
845         OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
846                     "collect raw sample records from all opened counters"),
847         OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
848                             "system-wide collection from all CPUs"),
849         OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
850                     "list of cpus to monitor"),
851         OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
852         OPT_STRING('o', "output", &record.output_name, "file",
853                     "output file name"),
854         OPT_BOOLEAN('i', "no-inherit", &record.opts.no_inherit,
855                     "child tasks do not inherit counters"),
856         OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
857         OPT_UINTEGER('m', "mmap-pages", &record.opts.mmap_pages,
858                      "number of mmap data pages"),
859         OPT_BOOLEAN(0, "group", &record.opts.group,
860                     "put the counters into a counter group"),
861         OPT_CALLBACK_DEFAULT('g', "call-graph", &record.opts,
862                              "mode[,dump_size]", record_callchain_help,
863                              &record_parse_callchain_opt, "fp"),
864         OPT_INCR('v', "verbose", &verbose,
865                     "be more verbose (show counter open errors, etc)"),
866         OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
867         OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
868                     "per thread counts"),
869         OPT_BOOLEAN('d', "data", &record.opts.sample_address,
870                     "Sample addresses"),
871         OPT_BOOLEAN('T', "timestamp", &record.opts.sample_time, "Sample timestamps"),
872         OPT_BOOLEAN('P', "period", &record.opts.period, "Sample period"),
873         OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
874                     "don't sample"),
875         OPT_BOOLEAN('N', "no-buildid-cache", &record.no_buildid_cache,
876                     "do not update the buildid cache"),
877         OPT_BOOLEAN('B', "no-buildid", &record.no_buildid,
878                     "do not collect buildids in perf.data"),
879         OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
880                      "monitor event in cgroup name only",
881                      parse_cgroups),
882         OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
883                    "user to profile"),
884
885         OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
886                      "branch any", "sample any taken branches",
887                      parse_branch_stack),
888
889         OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
890                      "branch filter mask", "branch stack filter modes",
891                      parse_branch_stack),
892         OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
893                     "sample by weight (on special events only)"),
894         OPT_END()
895 };
896
897 int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
898 {
899         int err = -ENOMEM;
900         struct perf_evlist *evsel_list;
901         struct perf_record *rec = &record;
902         char errbuf[BUFSIZ];
903
904         evsel_list = perf_evlist__new();
905         if (evsel_list == NULL)
906                 return -ENOMEM;
907
908         rec->evlist = evsel_list;
909
910         argc = parse_options(argc, argv, record_options, record_usage,
911                             PARSE_OPT_STOP_AT_NON_OPTION);
912         if (!argc && perf_target__none(&rec->opts.target))
913                 usage_with_options(record_usage, record_options);
914
915         if (nr_cgroups && !rec->opts.target.system_wide) {
916                 ui__error("cgroup monitoring only available in"
917                           " system-wide mode\n");
918                 usage_with_options(record_usage, record_options);
919         }
920
921         symbol__init();
922
923         if (symbol_conf.kptr_restrict)
924                 pr_warning(
925 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
926 "check /proc/sys/kernel/kptr_restrict.\n\n"
927 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
928 "file is not found in the buildid cache or in the vmlinux path.\n\n"
929 "Samples in kernel modules won't be resolved at all.\n\n"
930 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
931 "even with a suitable vmlinux or kallsyms file.\n\n");
932
933         if (rec->no_buildid_cache || rec->no_buildid)
934                 disable_buildid_cache();
935
936         if (evsel_list->nr_entries == 0 &&
937             perf_evlist__add_default(evsel_list) < 0) {
938                 pr_err("Not enough memory for event selector list\n");
939                 goto out_symbol_exit;
940         }
941
942         err = perf_target__validate(&rec->opts.target);
943         if (err) {
944                 perf_target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
945                 ui__warning("%s", errbuf);
946         }
947
948         err = perf_target__parse_uid(&rec->opts.target);
949         if (err) {
950                 int saved_errno = errno;
951
952                 perf_target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
953                 ui__error("%s", errbuf);
954
955                 err = -saved_errno;
956                 goto out_symbol_exit;
957         }
958
959         err = -ENOMEM;
960         if (perf_evlist__create_maps(evsel_list, &rec->opts.target) < 0)
961                 usage_with_options(record_usage, record_options);
962
963         if (rec->opts.user_interval != ULLONG_MAX)
964                 rec->opts.default_interval = rec->opts.user_interval;
965         if (rec->opts.user_freq != UINT_MAX)
966                 rec->opts.freq = rec->opts.user_freq;
967
968         /*
969          * User specified count overrides default frequency.
970          */
971         if (rec->opts.default_interval)
972                 rec->opts.freq = 0;
973         else if (rec->opts.freq) {
974                 rec->opts.default_interval = rec->opts.freq;
975         } else {
976                 ui__error("frequency and count are zero, aborting\n");
977                 err = -EINVAL;
978                 goto out_free_fd;
979         }
980
981         err = __cmd_record(&record, argc, argv);
982
983         perf_evlist__munmap(evsel_list);
984         perf_evlist__close(evsel_list);
985 out_free_fd:
986         perf_evlist__delete_maps(evsel_list);
987 out_symbol_exit:
988         symbol__exit();
989         return err;
990 }