4 * Builtin record command: Record the profile of a workload
5 * (or a CPU, or a PID) into the perf.data output file - for
6 * later analysis via perf report.
12 #include "util/build-id.h"
13 #include "util/util.h"
14 #include <subcmd/parse-options.h>
15 #include "util/parse-events.h"
16 #include "util/config.h"
18 #include "util/callchain.h"
19 #include "util/cgroup.h"
20 #include "util/header.h"
21 #include "util/event.h"
22 #include "util/evlist.h"
23 #include "util/evsel.h"
24 #include "util/debug.h"
25 #include "util/session.h"
26 #include "util/tool.h"
27 #include "util/symbol.h"
28 #include "util/cpumap.h"
29 #include "util/thread_map.h"
30 #include "util/data.h"
31 #include "util/perf_regs.h"
32 #include "util/auxtrace.h"
34 #include "util/parse-branch-options.h"
35 #include "util/parse-regs-options.h"
36 #include "util/llvm-utils.h"
37 #include "util/bpf-loader.h"
38 #include "util/trigger.h"
48 struct perf_tool tool;
49 struct record_opts opts;
51 struct perf_data_file file;
52 struct auxtrace_record *itr;
53 struct perf_evlist *evlist;
54 struct perf_session *session;
59 bool no_buildid_cache;
60 bool no_buildid_cache_set;
62 bool timestamp_filename;
64 unsigned long long samples;
67 static int record__write(struct record *rec, void *bf, size_t size)
69 if (perf_data_file__write(rec->session->file, bf, size) < 0) {
70 pr_err("failed to write perf data, error: %m\n");
74 rec->bytes_written += size;
78 static int process_synthesized_event(struct perf_tool *tool,
79 union perf_event *event,
80 struct perf_sample *sample __maybe_unused,
81 struct machine *machine __maybe_unused)
83 struct record *rec = container_of(tool, struct record, tool);
84 return record__write(rec, event, event->header.size);
88 backward_rb_find_range(void *buf, int mask, u64 head, u64 *start, u64 *end)
90 struct perf_event_header *pheader;
94 pr_debug2("backward_rb_find_range: buf=%p, head=%"PRIx64"\n", buf, head);
95 pheader = (struct perf_event_header *)(buf + (head & mask));
98 if (evt_head - head >= (unsigned int)size) {
99 pr_debug("Finshed reading backward ring buffer: rewind\n");
100 if (evt_head - head > (unsigned int)size)
101 evt_head -= pheader->size;
106 pheader = (struct perf_event_header *)(buf + (evt_head & mask));
108 if (pheader->size == 0) {
109 pr_debug("Finshed reading backward ring buffer: get start\n");
114 evt_head += pheader->size;
115 pr_debug3("move evt_head: %"PRIx64"\n", evt_head);
117 WARN_ONCE(1, "Shouldn't get here\n");
122 rb_find_range(struct perf_evlist *evlist,
123 void *data, int mask, u64 head, u64 old,
124 u64 *start, u64 *end)
126 if (!evlist->backward) {
132 return backward_rb_find_range(data, mask, head, start, end);
135 static int record__mmap_read(struct record *rec, int idx)
137 struct perf_mmap *md = &rec->evlist->mmap[idx];
138 u64 head = perf_mmap__read_head(md);
140 u64 end = head, start = old;
141 unsigned char *data = md->base + page_size;
146 if (rb_find_range(rec->evlist, data, md->mask, head,
156 if (size > (unsigned long)(md->mask) + 1) {
157 WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n");
160 perf_evlist__mmap_consume(rec->evlist, idx);
164 if ((start & md->mask) + size != (end & md->mask)) {
165 buf = &data[start & md->mask];
166 size = md->mask + 1 - (start & md->mask);
169 if (record__write(rec, buf, size) < 0) {
175 buf = &data[start & md->mask];
179 if (record__write(rec, buf, size) < 0) {
185 perf_evlist__mmap_consume(rec->evlist, idx);
190 static volatile int done;
191 static volatile int signr = -1;
192 static volatile int child_finished;
194 static volatile int auxtrace_record__snapshot_started;
195 static DEFINE_TRIGGER(auxtrace_snapshot_trigger);
196 static DEFINE_TRIGGER(switch_output_trigger);
198 static void sig_handler(int sig)
208 static void record__sig_exit(void)
213 signal(signr, SIG_DFL);
217 #ifdef HAVE_AUXTRACE_SUPPORT
219 static int record__process_auxtrace(struct perf_tool *tool,
220 union perf_event *event, void *data1,
221 size_t len1, void *data2, size_t len2)
223 struct record *rec = container_of(tool, struct record, tool);
224 struct perf_data_file *file = &rec->file;
228 if (!perf_data_file__is_pipe(file)) {
230 int fd = perf_data_file__fd(file);
233 file_offset = lseek(fd, 0, SEEK_CUR);
234 if (file_offset == -1)
236 err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index,
242 /* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */
243 padding = (len1 + len2) & 7;
245 padding = 8 - padding;
247 record__write(rec, event, event->header.size);
248 record__write(rec, data1, len1);
250 record__write(rec, data2, len2);
251 record__write(rec, &pad, padding);
256 static int record__auxtrace_mmap_read(struct record *rec,
257 struct auxtrace_mmap *mm)
261 ret = auxtrace_mmap__read(mm, rec->itr, &rec->tool,
262 record__process_auxtrace);
272 static int record__auxtrace_mmap_read_snapshot(struct record *rec,
273 struct auxtrace_mmap *mm)
277 ret = auxtrace_mmap__read_snapshot(mm, rec->itr, &rec->tool,
278 record__process_auxtrace,
279 rec->opts.auxtrace_snapshot_size);
289 static int record__auxtrace_read_snapshot_all(struct record *rec)
294 for (i = 0; i < rec->evlist->nr_mmaps; i++) {
295 struct auxtrace_mmap *mm =
296 &rec->evlist->mmap[i].auxtrace_mmap;
301 if (record__auxtrace_mmap_read_snapshot(rec, mm) != 0) {
310 static void record__read_auxtrace_snapshot(struct record *rec)
312 pr_debug("Recording AUX area tracing snapshot\n");
313 if (record__auxtrace_read_snapshot_all(rec) < 0) {
314 trigger_error(&auxtrace_snapshot_trigger);
316 if (auxtrace_record__snapshot_finish(rec->itr))
317 trigger_error(&auxtrace_snapshot_trigger);
319 trigger_ready(&auxtrace_snapshot_trigger);
326 int record__auxtrace_mmap_read(struct record *rec __maybe_unused,
327 struct auxtrace_mmap *mm __maybe_unused)
333 void record__read_auxtrace_snapshot(struct record *rec __maybe_unused)
338 int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused)
345 static int record__mmap_evlist(struct record *rec,
346 struct perf_evlist *evlist)
348 struct record_opts *opts = &rec->opts;
351 if (perf_evlist__mmap_ex(evlist, opts->mmap_pages, false,
352 opts->auxtrace_mmap_pages,
353 opts->auxtrace_snapshot_mode) < 0) {
354 if (errno == EPERM) {
355 pr_err("Permission error mapping pages.\n"
356 "Consider increasing "
357 "/proc/sys/kernel/perf_event_mlock_kb,\n"
358 "or try again with a smaller value of -m/--mmap_pages.\n"
359 "(current value: %u,%u)\n",
360 opts->mmap_pages, opts->auxtrace_mmap_pages);
363 pr_err("failed to mmap with %d (%s)\n", errno,
364 strerror_r(errno, msg, sizeof(msg)));
374 static int record__mmap(struct record *rec)
376 return record__mmap_evlist(rec, rec->evlist);
379 static int record__open(struct record *rec)
382 struct perf_evsel *pos;
383 struct perf_evlist *evlist = rec->evlist;
384 struct perf_session *session = rec->session;
385 struct record_opts *opts = &rec->opts;
388 perf_evlist__config(evlist, opts, &callchain_param);
390 evlist__for_each_entry(evlist, pos) {
392 if (perf_evsel__open(pos, pos->cpus, pos->threads) < 0) {
393 if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
395 ui__warning("%s\n", msg);
400 perf_evsel__open_strerror(pos, &opts->target,
401 errno, msg, sizeof(msg));
402 ui__error("%s\n", msg);
407 if (perf_evlist__apply_filters(evlist, &pos)) {
408 error("failed to set filter \"%s\" on event %s with %d (%s)\n",
409 pos->filter, perf_evsel__name(pos), errno,
410 strerror_r(errno, msg, sizeof(msg)));
415 rc = record__mmap(rec);
419 session->evlist = evlist;
420 perf_session__set_id_hdr_size(session);
425 static int process_sample_event(struct perf_tool *tool,
426 union perf_event *event,
427 struct perf_sample *sample,
428 struct perf_evsel *evsel,
429 struct machine *machine)
431 struct record *rec = container_of(tool, struct record, tool);
435 return build_id__mark_dso_hit(tool, event, sample, evsel, machine);
438 static int process_buildids(struct record *rec)
440 struct perf_data_file *file = &rec->file;
441 struct perf_session *session = rec->session;
447 * During this process, it'll load kernel map and replace the
448 * dso->long_name to a real pathname it found. In this case
449 * we prefer the vmlinux path like
450 * /lib/modules/3.16.4/build/vmlinux
452 * rather than build-id path (in debug directory).
453 * $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551
455 symbol_conf.ignore_vmlinux_buildid = true;
458 * If --buildid-all is given, it marks all DSO regardless of hits,
459 * so no need to process samples.
461 if (rec->buildid_all)
462 rec->tool.sample = NULL;
464 return perf_session__process_events(session);
467 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
470 struct perf_tool *tool = data;
472 *As for guest kernel when processing subcommand record&report,
473 *we arrange module mmap prior to guest kernel mmap and trigger
474 *a preload dso because default guest module symbols are loaded
475 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
476 *method is used to avoid symbol missing when the first addr is
477 *in module instead of in guest kernel.
479 err = perf_event__synthesize_modules(tool, process_synthesized_event,
482 pr_err("Couldn't record guest kernel [%d]'s reference"
483 " relocation symbol.\n", machine->pid);
486 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
487 * have no _text sometimes.
489 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
492 pr_err("Couldn't record guest kernel [%d]'s reference"
493 " relocation symbol.\n", machine->pid);
496 static struct perf_event_header finished_round_event = {
497 .size = sizeof(struct perf_event_header),
498 .type = PERF_RECORD_FINISHED_ROUND,
501 static int record__mmap_read_all(struct record *rec)
503 u64 bytes_written = rec->bytes_written;
507 for (i = 0; i < rec->evlist->nr_mmaps; i++) {
508 struct auxtrace_mmap *mm = &rec->evlist->mmap[i].auxtrace_mmap;
510 if (rec->evlist->mmap[i].base) {
511 if (record__mmap_read(rec, i) != 0) {
517 if (mm->base && !rec->opts.auxtrace_snapshot_mode &&
518 record__auxtrace_mmap_read(rec, mm) != 0) {
525 * Mark the round finished in case we wrote
526 * at least one event.
528 if (bytes_written != rec->bytes_written)
529 rc = record__write(rec, &finished_round_event, sizeof(finished_round_event));
535 static void record__init_features(struct record *rec)
537 struct perf_session *session = rec->session;
540 for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
541 perf_header__set_feat(&session->header, feat);
544 perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
546 if (!have_tracepoints(&rec->evlist->entries))
547 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
549 if (!rec->opts.branch_stack)
550 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
552 if (!rec->opts.full_auxtrace)
553 perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
555 perf_header__clear_feat(&session->header, HEADER_STAT);
559 record__finish_output(struct record *rec)
561 struct perf_data_file *file = &rec->file;
562 int fd = perf_data_file__fd(file);
567 rec->session->header.data_size += rec->bytes_written;
568 file->size = lseek(perf_data_file__fd(file), 0, SEEK_CUR);
570 if (!rec->no_buildid) {
571 process_buildids(rec);
573 if (rec->buildid_all)
574 dsos__hit_all(rec->session);
576 perf_session__write_header(rec->session, rec->evlist, fd, true);
581 static int record__synthesize_workload(struct record *rec)
584 struct thread_map map;
585 struct thread_map_data map_data;
588 thread_map.map.nr = 1;
589 thread_map.map.map[0].pid = rec->evlist->workload.pid;
590 thread_map.map.map[0].comm = NULL;
591 return perf_event__synthesize_thread_map(&rec->tool, &thread_map.map,
592 process_synthesized_event,
593 &rec->session->machines.host,
594 rec->opts.sample_address,
595 rec->opts.proc_map_timeout);
598 static int record__synthesize(struct record *rec);
601 record__switch_output(struct record *rec, bool at_exit)
603 struct perf_data_file *file = &rec->file;
606 /* Same Size: "2015122520103046"*/
607 char timestamp[] = "InvalidTimestamp";
610 record__finish_output(rec);
611 err = fetch_current_timestamp(timestamp, sizeof(timestamp));
613 pr_err("Failed to get current timestamp\n");
617 fd = perf_data_file__switch(file, timestamp,
618 rec->session->header.data_offset,
620 if (fd >= 0 && !at_exit) {
621 rec->bytes_written = 0;
622 rec->session->header.data_size = 0;
626 fprintf(stderr, "[ perf record: Dump %s.%s ]\n",
627 file->path, timestamp);
629 /* Output tracking events */
631 record__synthesize(rec);
634 * In 'perf record --switch-output' without -a,
635 * record__synthesize() in record__switch_output() won't
636 * generate tracking events because there's no thread_map
637 * in evlist. Which causes newly created perf.data doesn't
638 * contain map and comm information.
639 * Create a fake thread_map and directly call
640 * perf_event__synthesize_thread_map() for those events.
642 if (target__none(&rec->opts.target))
643 record__synthesize_workload(rec);
648 static volatile int workload_exec_errno;
651 * perf_evlist__prepare_workload will send a SIGUSR1
652 * if the fork fails, since we asked by setting its
653 * want_signal to true.
655 static void workload_exec_failed_signal(int signo __maybe_unused,
657 void *ucontext __maybe_unused)
659 workload_exec_errno = info->si_value.sival_int;
664 static void snapshot_sig_handler(int sig);
667 perf_event__synth_time_conv(const struct perf_event_mmap_page *pc __maybe_unused,
668 struct perf_tool *tool __maybe_unused,
669 perf_event__handler_t process __maybe_unused,
670 struct machine *machine __maybe_unused)
675 static const struct perf_event_mmap_page *record__pick_pc(struct record *rec)
677 if (rec->evlist && rec->evlist->mmap && rec->evlist->mmap[0].base)
678 return rec->evlist->mmap[0].base;
682 static int record__synthesize(struct record *rec)
684 struct perf_session *session = rec->session;
685 struct machine *machine = &session->machines.host;
686 struct perf_data_file *file = &rec->file;
687 struct record_opts *opts = &rec->opts;
688 struct perf_tool *tool = &rec->tool;
689 int fd = perf_data_file__fd(file);
693 err = perf_event__synthesize_attrs(tool, session,
694 process_synthesized_event);
696 pr_err("Couldn't synthesize attrs.\n");
700 if (have_tracepoints(&rec->evlist->entries)) {
702 * FIXME err <= 0 here actually means that
703 * there were no tracepoints so its not really
704 * an error, just that we don't need to
705 * synthesize anything. We really have to
706 * return this more properly and also
707 * propagate errors that now are calling die()
709 err = perf_event__synthesize_tracing_data(tool, fd, rec->evlist,
710 process_synthesized_event);
712 pr_err("Couldn't record tracing data.\n");
715 rec->bytes_written += err;
719 err = perf_event__synth_time_conv(record__pick_pc(rec), tool,
720 process_synthesized_event, machine);
724 if (rec->opts.full_auxtrace) {
725 err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
726 session, process_synthesized_event);
731 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
733 WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n"
734 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
735 "Check /proc/kallsyms permission or run as root.\n");
737 err = perf_event__synthesize_modules(tool, process_synthesized_event,
739 WARN_ONCE(err < 0, "Couldn't record kernel module information.\n"
740 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
741 "Check /proc/modules permission or run as root.\n");
744 machines__process_guests(&session->machines,
745 perf_event__synthesize_guest_os, tool);
748 err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads,
749 process_synthesized_event, opts->sample_address,
750 opts->proc_map_timeout);
755 static int __cmd_record(struct record *rec, int argc, const char **argv)
759 unsigned long waking = 0;
760 const bool forks = argc > 0;
761 struct machine *machine;
762 struct perf_tool *tool = &rec->tool;
763 struct record_opts *opts = &rec->opts;
764 struct perf_data_file *file = &rec->file;
765 struct perf_session *session;
766 bool disabled = false, draining = false;
769 rec->progname = argv[0];
771 atexit(record__sig_exit);
772 signal(SIGCHLD, sig_handler);
773 signal(SIGINT, sig_handler);
774 signal(SIGTERM, sig_handler);
776 if (rec->opts.auxtrace_snapshot_mode || rec->switch_output) {
777 signal(SIGUSR2, snapshot_sig_handler);
778 if (rec->opts.auxtrace_snapshot_mode)
779 trigger_on(&auxtrace_snapshot_trigger);
780 if (rec->switch_output)
781 trigger_on(&switch_output_trigger);
783 signal(SIGUSR2, SIG_IGN);
786 session = perf_session__new(file, false, tool);
787 if (session == NULL) {
788 pr_err("Perf session creation failed.\n");
792 fd = perf_data_file__fd(file);
793 rec->session = session;
795 record__init_features(rec);
798 err = perf_evlist__prepare_workload(rec->evlist, &opts->target,
800 workload_exec_failed_signal);
802 pr_err("Couldn't run the workload!\n");
804 goto out_delete_session;
808 if (record__open(rec) != 0) {
813 err = bpf__apply_obj_config();
817 bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
818 pr_err("ERROR: Apply config to BPF failed: %s\n",
824 * Normally perf_session__new would do this, but it doesn't have the
827 if (rec->tool.ordered_events && !perf_evlist__sample_id_all(rec->evlist)) {
828 pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n");
829 rec->tool.ordered_events = false;
832 if (!rec->evlist->nr_groups)
833 perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
836 err = perf_header__write_pipe(fd);
840 err = perf_session__write_header(session, rec->evlist, fd, false);
846 && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
847 pr_err("Couldn't generate buildids. "
848 "Use --no-buildid to profile anyway.\n");
853 machine = &session->machines.host;
855 err = record__synthesize(rec);
859 if (rec->realtime_prio) {
860 struct sched_param param;
862 param.sched_priority = rec->realtime_prio;
863 if (sched_setscheduler(0, SCHED_FIFO, ¶m)) {
864 pr_err("Could not set realtime priority.\n");
871 * When perf is starting the traced process, all the events
872 * (apart from group members) have enable_on_exec=1 set,
873 * so don't spoil it by prematurely enabling them.
875 if (!target__none(&opts->target) && !opts->initial_delay)
876 perf_evlist__enable(rec->evlist);
882 union perf_event *event;
884 event = malloc(sizeof(event->comm) + machine->id_hdr_size);
891 * Some H/W events are generated before COMM event
892 * which is emitted during exec(), so perf script
893 * cannot see a correct process name for those events.
894 * Synthesize COMM event to prevent it.
896 perf_event__synthesize_comm(tool, event,
897 rec->evlist->workload.pid,
898 process_synthesized_event,
902 perf_evlist__start_workload(rec->evlist);
905 if (opts->initial_delay) {
906 usleep(opts->initial_delay * 1000);
907 perf_evlist__enable(rec->evlist);
910 trigger_ready(&auxtrace_snapshot_trigger);
911 trigger_ready(&switch_output_trigger);
913 unsigned long long hits = rec->samples;
915 if (record__mmap_read_all(rec) < 0) {
916 trigger_error(&auxtrace_snapshot_trigger);
917 trigger_error(&switch_output_trigger);
922 if (auxtrace_record__snapshot_started) {
923 auxtrace_record__snapshot_started = 0;
924 if (!trigger_is_error(&auxtrace_snapshot_trigger))
925 record__read_auxtrace_snapshot(rec);
926 if (trigger_is_error(&auxtrace_snapshot_trigger)) {
927 pr_err("AUX area tracing snapshot failed\n");
933 if (trigger_is_hit(&switch_output_trigger)) {
934 trigger_ready(&switch_output_trigger);
937 fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n",
940 fd = record__switch_output(rec, false);
942 pr_err("Failed to switch to new file\n");
943 trigger_error(&switch_output_trigger);
949 if (hits == rec->samples) {
950 if (done || draining)
952 err = perf_evlist__poll(rec->evlist, -1);
954 * Propagate error, only if there's any. Ignore positive
955 * number of returned events and interrupt error.
957 if (err > 0 || (err < 0 && errno == EINTR))
961 if (perf_evlist__filter_pollfd(rec->evlist, POLLERR | POLLHUP) == 0)
966 * When perf is starting the traced process, at the end events
967 * die with the process and we wait for that. Thus no need to
968 * disable events in this case.
970 if (done && !disabled && !target__none(&opts->target)) {
971 trigger_off(&auxtrace_snapshot_trigger);
972 perf_evlist__disable(rec->evlist);
976 trigger_off(&auxtrace_snapshot_trigger);
977 trigger_off(&switch_output_trigger);
979 if (forks && workload_exec_errno) {
980 char msg[STRERR_BUFSIZE];
981 const char *emsg = strerror_r(workload_exec_errno, msg, sizeof(msg));
982 pr_err("Workload failed: %s\n", emsg);
988 fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
995 kill(rec->evlist->workload.pid, SIGTERM);
1001 else if (WIFEXITED(exit_status))
1002 status = WEXITSTATUS(exit_status);
1003 else if (WIFSIGNALED(exit_status))
1004 signr = WTERMSIG(exit_status);
1008 /* this will be recalculated during process_buildids() */
1012 if (!rec->timestamp_filename) {
1013 record__finish_output(rec);
1015 fd = record__switch_output(rec, true);
1018 goto out_delete_session;
1023 if (!err && !quiet) {
1025 const char *postfix = rec->timestamp_filename ?
1026 ".<timestamp>" : "";
1028 if (rec->samples && !rec->opts.full_auxtrace)
1029 scnprintf(samples, sizeof(samples),
1030 " (%" PRIu64 " samples)", rec->samples);
1034 fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s ]\n",
1035 perf_data_file__size(file) / 1024.0 / 1024.0,
1036 file->path, postfix, samples);
1040 perf_session__delete(session);
1044 static void callchain_debug(struct callchain_param *callchain)
1046 static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
1048 pr_debug("callchain: type %s\n", str[callchain->record_mode]);
1050 if (callchain->record_mode == CALLCHAIN_DWARF)
1051 pr_debug("callchain: stack dump size %d\n",
1052 callchain->dump_size);
1055 int record_opts__parse_callchain(struct record_opts *record,
1056 struct callchain_param *callchain,
1057 const char *arg, bool unset)
1060 callchain->enabled = !unset;
1062 /* --no-call-graph */
1064 callchain->record_mode = CALLCHAIN_NONE;
1065 pr_debug("callchain: disabled\n");
1069 ret = parse_callchain_record_opt(arg, callchain);
1071 /* Enable data address sampling for DWARF unwind. */
1072 if (callchain->record_mode == CALLCHAIN_DWARF)
1073 record->sample_address = true;
1074 callchain_debug(callchain);
1080 int record_parse_callchain_opt(const struct option *opt,
1084 return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset);
1087 int record_callchain_opt(const struct option *opt,
1088 const char *arg __maybe_unused,
1089 int unset __maybe_unused)
1091 struct callchain_param *callchain = opt->value;
1093 callchain->enabled = true;
1095 if (callchain->record_mode == CALLCHAIN_NONE)
1096 callchain->record_mode = CALLCHAIN_FP;
1098 callchain_debug(callchain);
1102 static int perf_record_config(const char *var, const char *value, void *cb)
1104 struct record *rec = cb;
1106 if (!strcmp(var, "record.build-id")) {
1107 if (!strcmp(value, "cache"))
1108 rec->no_buildid_cache = false;
1109 else if (!strcmp(value, "no-cache"))
1110 rec->no_buildid_cache = true;
1111 else if (!strcmp(value, "skip"))
1112 rec->no_buildid = true;
1117 if (!strcmp(var, "record.call-graph"))
1118 var = "call-graph.record-mode"; /* fall-through */
1120 return perf_default_config(var, value, cb);
1123 struct clockid_map {
1128 #define CLOCKID_MAP(n, c) \
1129 { .name = n, .clockid = (c), }
1131 #define CLOCKID_END { .name = NULL, }
1135 * Add the missing ones, we need to build on many distros...
1137 #ifndef CLOCK_MONOTONIC_RAW
1138 #define CLOCK_MONOTONIC_RAW 4
1140 #ifndef CLOCK_BOOTTIME
1141 #define CLOCK_BOOTTIME 7
1144 #define CLOCK_TAI 11
1147 static const struct clockid_map clockids[] = {
1148 /* available for all events, NMI safe */
1149 CLOCKID_MAP("monotonic", CLOCK_MONOTONIC),
1150 CLOCKID_MAP("monotonic_raw", CLOCK_MONOTONIC_RAW),
1152 /* available for some events */
1153 CLOCKID_MAP("realtime", CLOCK_REALTIME),
1154 CLOCKID_MAP("boottime", CLOCK_BOOTTIME),
1155 CLOCKID_MAP("tai", CLOCK_TAI),
1157 /* available for the lazy */
1158 CLOCKID_MAP("mono", CLOCK_MONOTONIC),
1159 CLOCKID_MAP("raw", CLOCK_MONOTONIC_RAW),
1160 CLOCKID_MAP("real", CLOCK_REALTIME),
1161 CLOCKID_MAP("boot", CLOCK_BOOTTIME),
1166 static int parse_clockid(const struct option *opt, const char *str, int unset)
1168 struct record_opts *opts = (struct record_opts *)opt->value;
1169 const struct clockid_map *cm;
1170 const char *ostr = str;
1173 opts->use_clockid = 0;
1181 /* no setting it twice */
1182 if (opts->use_clockid)
1185 opts->use_clockid = true;
1187 /* if its a number, we're done */
1188 if (sscanf(str, "%d", &opts->clockid) == 1)
1191 /* allow a "CLOCK_" prefix to the name */
1192 if (!strncasecmp(str, "CLOCK_", 6))
1195 for (cm = clockids; cm->name; cm++) {
1196 if (!strcasecmp(str, cm->name)) {
1197 opts->clockid = cm->clockid;
1202 opts->use_clockid = false;
1203 ui__warning("unknown clockid %s, check man page\n", ostr);
1207 static int record__parse_mmap_pages(const struct option *opt,
1209 int unset __maybe_unused)
1211 struct record_opts *opts = opt->value;
1213 unsigned int mmap_pages;
1228 ret = __perf_evlist__parse_mmap_pages(&mmap_pages, s);
1231 opts->mmap_pages = mmap_pages;
1239 ret = __perf_evlist__parse_mmap_pages(&mmap_pages, p + 1);
1243 opts->auxtrace_mmap_pages = mmap_pages;
1250 static const char * const __record_usage[] = {
1251 "perf record [<options>] [<command>]",
1252 "perf record [<options>] -- <command> [<options>]",
1255 const char * const *record_usage = __record_usage;
1258 * XXX Ideally would be local to cmd_record() and passed to a record__new
1259 * because we need to have access to it in record__exit, that is called
1260 * after cmd_record() exits, but since record_options need to be accessible to
1261 * builtin-script, leave it here.
1263 * At least we don't ouch it in all the other functions here directly.
1265 * Just say no to tons of global variables, sigh.
1267 static struct record record = {
1269 .sample_time = true,
1270 .mmap_pages = UINT_MAX,
1271 .user_freq = UINT_MAX,
1272 .user_interval = ULLONG_MAX,
1276 .default_per_cpu = true,
1278 .proc_map_timeout = 500,
1281 .sample = process_sample_event,
1282 .fork = perf_event__process_fork,
1283 .exit = perf_event__process_exit,
1284 .comm = perf_event__process_comm,
1285 .mmap = perf_event__process_mmap,
1286 .mmap2 = perf_event__process_mmap2,
1287 .ordered_events = true,
1291 const char record_callchain_help[] = CALLCHAIN_RECORD_HELP
1292 "\n\t\t\t\tDefault: fp";
1294 static bool dry_run;
1297 * XXX Will stay a global variable till we fix builtin-script.c to stop messing
1298 * with it and switch to use the library functions in perf_evlist that came
1299 * from builtin-record.c, i.e. use record_opts,
1300 * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
1303 struct option __record_options[] = {
1304 OPT_CALLBACK('e', "event", &record.evlist, "event",
1305 "event selector. use 'perf list' to list available events",
1306 parse_events_option),
1307 OPT_CALLBACK(0, "filter", &record.evlist, "filter",
1308 "event filter", parse_filter),
1309 OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist,
1310 NULL, "don't record events from perf itself",
1312 OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
1313 "record events on existing process id"),
1314 OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
1315 "record events on existing thread id"),
1316 OPT_INTEGER('r', "realtime", &record.realtime_prio,
1317 "collect data with this RT SCHED_FIFO priority"),
1318 OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering,
1319 "collect data without buffering"),
1320 OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
1321 "collect raw sample records from all opened counters"),
1322 OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
1323 "system-wide collection from all CPUs"),
1324 OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
1325 "list of cpus to monitor"),
1326 OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
1327 OPT_STRING('o', "output", &record.file.path, "file",
1328 "output file name"),
1329 OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
1330 &record.opts.no_inherit_set,
1331 "child tasks do not inherit counters"),
1332 OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
1333 OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
1334 "number of mmap data pages and AUX area tracing mmap pages",
1335 record__parse_mmap_pages),
1336 OPT_BOOLEAN(0, "group", &record.opts.group,
1337 "put the counters into a counter group"),
1338 OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
1339 NULL, "enables call-graph recording" ,
1340 &record_callchain_opt),
1341 OPT_CALLBACK(0, "call-graph", &record.opts,
1342 "record_mode[,record_size]", record_callchain_help,
1343 &record_parse_callchain_opt),
1344 OPT_INCR('v', "verbose", &verbose,
1345 "be more verbose (show counter open errors, etc)"),
1346 OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
1347 OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
1348 "per thread counts"),
1349 OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"),
1350 OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time,
1351 &record.opts.sample_time_set,
1352 "Record the sample timestamps"),
1353 OPT_BOOLEAN('P', "period", &record.opts.period, "Record the sample period"),
1354 OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
1356 OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache,
1357 &record.no_buildid_cache_set,
1358 "do not update the buildid cache"),
1359 OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid,
1360 &record.no_buildid_set,
1361 "do not collect buildids in perf.data"),
1362 OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
1363 "monitor event in cgroup name only",
1365 OPT_UINTEGER('D', "delay", &record.opts.initial_delay,
1366 "ms to wait before starting measurement after program start"),
1367 OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
1370 OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
1371 "branch any", "sample any taken branches",
1372 parse_branch_stack),
1374 OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
1375 "branch filter mask", "branch stack filter modes",
1376 parse_branch_stack),
1377 OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
1378 "sample by weight (on special events only)"),
1379 OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
1380 "sample transaction flags (special events only)"),
1381 OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
1382 "use per-thread mmaps"),
1383 OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register",
1384 "sample selected machine registers on interrupt,"
1385 " use -I ? to list register names", parse_regs),
1386 OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
1387 "Record running/enabled time of read (:S) events"),
1388 OPT_CALLBACK('k', "clockid", &record.opts,
1389 "clockid", "clockid to use for events, see clock_gettime()",
1391 OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts,
1392 "opts", "AUX area tracing Snapshot Mode", ""),
1393 OPT_UINTEGER(0, "proc-map-timeout", &record.opts.proc_map_timeout,
1394 "per thread proc mmap processing timeout in ms"),
1395 OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events,
1396 "Record context switch events"),
1397 OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel,
1398 "Configure all used events to run in kernel space.",
1399 PARSE_OPT_EXCLUSIVE),
1400 OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user,
1401 "Configure all used events to run in user space.",
1402 PARSE_OPT_EXCLUSIVE),
1403 OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path",
1404 "clang binary to use for compiling BPF scriptlets"),
1405 OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options",
1406 "options passed to clang when compiling BPF scriptlets"),
1407 OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
1408 "file", "vmlinux pathname"),
1409 OPT_BOOLEAN(0, "buildid-all", &record.buildid_all,
1410 "Record build-id of all DSOs regardless of hits"),
1411 OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename,
1412 "append timestamp to output filename"),
1413 OPT_BOOLEAN(0, "switch-output", &record.switch_output,
1414 "Switch output when receive SIGUSR2"),
1415 OPT_BOOLEAN(0, "dry-run", &dry_run,
1416 "Parse options then exit"),
1420 struct option *record_options = __record_options;
1422 int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
1425 struct record *rec = &record;
1426 char errbuf[BUFSIZ];
1428 #ifndef HAVE_LIBBPF_SUPPORT
1429 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c)
1430 set_nobuild('\0', "clang-path", true);
1431 set_nobuild('\0', "clang-opt", true);
1435 #ifndef HAVE_BPF_PROLOGUE
1436 # if !defined (HAVE_DWARF_SUPPORT)
1437 # define REASON "NO_DWARF=1"
1438 # elif !defined (HAVE_LIBBPF_SUPPORT)
1439 # define REASON "NO_LIBBPF=1"
1441 # define REASON "this architecture doesn't support BPF prologue"
1443 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, REASON, c)
1444 set_nobuild('\0', "vmlinux", true);
1449 rec->evlist = perf_evlist__new();
1450 if (rec->evlist == NULL)
1453 perf_config(perf_record_config, rec);
1455 argc = parse_options(argc, argv, record_options, record_usage,
1456 PARSE_OPT_STOP_AT_NON_OPTION);
1457 if (!argc && target__none(&rec->opts.target))
1458 usage_with_options(record_usage, record_options);
1460 if (nr_cgroups && !rec->opts.target.system_wide) {
1461 usage_with_options_msg(record_usage, record_options,
1462 "cgroup monitoring only available in system-wide mode");
1465 if (rec->opts.record_switch_events &&
1466 !perf_can_record_switch_events()) {
1467 ui__error("kernel does not support recording context switch events\n");
1468 parse_options_usage(record_usage, record_options, "switch-events", 0);
1472 if (rec->switch_output)
1473 rec->timestamp_filename = true;
1476 rec->itr = auxtrace_record__init(rec->evlist, &err);
1481 err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
1482 rec->opts.auxtrace_snapshot_opts);
1489 err = bpf__setup_stdout(rec->evlist);
1491 bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf));
1492 pr_err("ERROR: Setup BPF stdout failed: %s\n",
1501 if (symbol_conf.kptr_restrict)
1503 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
1504 "check /proc/sys/kernel/kptr_restrict.\n\n"
1505 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
1506 "file is not found in the buildid cache or in the vmlinux path.\n\n"
1507 "Samples in kernel modules won't be resolved at all.\n\n"
1508 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
1509 "even with a suitable vmlinux or kallsyms file.\n\n");
1511 if (rec->no_buildid_cache || rec->no_buildid) {
1512 disable_buildid_cache();
1513 } else if (rec->switch_output) {
1515 * In 'perf record --switch-output', disable buildid
1516 * generation by default to reduce data file switching
1517 * overhead. Still generate buildid if they are required
1520 * perf record --signal-trigger --no-no-buildid \
1521 * --no-no-buildid-cache
1523 * Following code equals to:
1525 * if ((rec->no_buildid || !rec->no_buildid_set) &&
1526 * (rec->no_buildid_cache || !rec->no_buildid_cache_set))
1527 * disable_buildid_cache();
1529 bool disable = true;
1531 if (rec->no_buildid_set && !rec->no_buildid)
1533 if (rec->no_buildid_cache_set && !rec->no_buildid_cache)
1536 rec->no_buildid = true;
1537 rec->no_buildid_cache = true;
1538 disable_buildid_cache();
1542 if (rec->evlist->nr_entries == 0 &&
1543 perf_evlist__add_default(rec->evlist) < 0) {
1544 pr_err("Not enough memory for event selector list\n");
1545 goto out_symbol_exit;
1548 if (rec->opts.target.tid && !rec->opts.no_inherit_set)
1549 rec->opts.no_inherit = true;
1551 err = target__validate(&rec->opts.target);
1553 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1554 ui__warning("%s", errbuf);
1557 err = target__parse_uid(&rec->opts.target);
1559 int saved_errno = errno;
1561 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1562 ui__error("%s", errbuf);
1565 goto out_symbol_exit;
1569 if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
1570 usage_with_options(record_usage, record_options);
1572 err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts);
1574 goto out_symbol_exit;
1577 * We take all buildids when the file contains
1578 * AUX area tracing data because we do not decode the
1579 * trace because it would take too long.
1581 if (rec->opts.full_auxtrace)
1582 rec->buildid_all = true;
1584 if (record_opts__config(&rec->opts)) {
1586 goto out_symbol_exit;
1589 err = __cmd_record(&record, argc, argv);
1591 perf_evlist__delete(rec->evlist);
1593 auxtrace_record__free(rec->itr);
1597 static void snapshot_sig_handler(int sig __maybe_unused)
1599 if (trigger_is_ready(&auxtrace_snapshot_trigger)) {
1600 trigger_hit(&auxtrace_snapshot_trigger);
1601 auxtrace_record__snapshot_started = 1;
1602 if (auxtrace_record__snapshot_start(record.itr))
1603 trigger_error(&auxtrace_snapshot_trigger);
1606 if (trigger_is_ready(&switch_output_trigger))
1607 trigger_hit(&switch_output_trigger);