Fix off-by-one in __pipe_get_pages()
[cascardo/linux.git] / tools / perf / builtin-record.c
1 /*
2  * builtin-record.c
3  *
4  * Builtin record command: Record the profile of a workload
5  * (or a CPU, or a PID) into the perf.data output file - for
6  * later analysis via perf report.
7  */
8 #include "builtin.h"
9
10 #include "perf.h"
11
12 #include "util/build-id.h"
13 #include "util/util.h"
14 #include <subcmd/parse-options.h>
15 #include "util/parse-events.h"
16 #include "util/config.h"
17
18 #include "util/callchain.h"
19 #include "util/cgroup.h"
20 #include "util/header.h"
21 #include "util/event.h"
22 #include "util/evlist.h"
23 #include "util/evsel.h"
24 #include "util/debug.h"
25 #include "util/drv_configs.h"
26 #include "util/session.h"
27 #include "util/tool.h"
28 #include "util/symbol.h"
29 #include "util/cpumap.h"
30 #include "util/thread_map.h"
31 #include "util/data.h"
32 #include "util/perf_regs.h"
33 #include "util/auxtrace.h"
34 #include "util/tsc.h"
35 #include "util/parse-branch-options.h"
36 #include "util/parse-regs-options.h"
37 #include "util/llvm-utils.h"
38 #include "util/bpf-loader.h"
39 #include "util/trigger.h"
40 #include "asm/bug.h"
41
42 #include <unistd.h>
43 #include <sched.h>
44 #include <sys/mman.h>
45 #include <asm/bug.h>
46 #include <linux/time64.h>
47
48 struct record {
49         struct perf_tool        tool;
50         struct record_opts      opts;
51         u64                     bytes_written;
52         struct perf_data_file   file;
53         struct auxtrace_record  *itr;
54         struct perf_evlist      *evlist;
55         struct perf_session     *session;
56         const char              *progname;
57         int                     realtime_prio;
58         bool                    no_buildid;
59         bool                    no_buildid_set;
60         bool                    no_buildid_cache;
61         bool                    no_buildid_cache_set;
62         bool                    buildid_all;
63         bool                    timestamp_filename;
64         bool                    switch_output;
65         unsigned long long      samples;
66 };
67
68 static int record__write(struct record *rec, void *bf, size_t size)
69 {
70         if (perf_data_file__write(rec->session->file, bf, size) < 0) {
71                 pr_err("failed to write perf data, error: %m\n");
72                 return -1;
73         }
74
75         rec->bytes_written += size;
76         return 0;
77 }
78
79 static int process_synthesized_event(struct perf_tool *tool,
80                                      union perf_event *event,
81                                      struct perf_sample *sample __maybe_unused,
82                                      struct machine *machine __maybe_unused)
83 {
84         struct record *rec = container_of(tool, struct record, tool);
85         return record__write(rec, event, event->header.size);
86 }
87
88 static int
89 backward_rb_find_range(void *buf, int mask, u64 head, u64 *start, u64 *end)
90 {
91         struct perf_event_header *pheader;
92         u64 evt_head = head;
93         int size = mask + 1;
94
95         pr_debug2("backward_rb_find_range: buf=%p, head=%"PRIx64"\n", buf, head);
96         pheader = (struct perf_event_header *)(buf + (head & mask));
97         *start = head;
98         while (true) {
99                 if (evt_head - head >= (unsigned int)size) {
100                         pr_debug("Finished reading backward ring buffer: rewind\n");
101                         if (evt_head - head > (unsigned int)size)
102                                 evt_head -= pheader->size;
103                         *end = evt_head;
104                         return 0;
105                 }
106
107                 pheader = (struct perf_event_header *)(buf + (evt_head & mask));
108
109                 if (pheader->size == 0) {
110                         pr_debug("Finished reading backward ring buffer: get start\n");
111                         *end = evt_head;
112                         return 0;
113                 }
114
115                 evt_head += pheader->size;
116                 pr_debug3("move evt_head: %"PRIx64"\n", evt_head);
117         }
118         WARN_ONCE(1, "Shouldn't get here\n");
119         return -1;
120 }
121
122 static int
123 rb_find_range(void *data, int mask, u64 head, u64 old,
124               u64 *start, u64 *end, bool backward)
125 {
126         if (!backward) {
127                 *start = old;
128                 *end = head;
129                 return 0;
130         }
131
132         return backward_rb_find_range(data, mask, head, start, end);
133 }
134
135 static int
136 record__mmap_read(struct record *rec, struct perf_mmap *md,
137                   bool overwrite, bool backward)
138 {
139         u64 head = perf_mmap__read_head(md);
140         u64 old = md->prev;
141         u64 end = head, start = old;
142         unsigned char *data = md->base + page_size;
143         unsigned long size;
144         void *buf;
145         int rc = 0;
146
147         if (rb_find_range(data, md->mask, head,
148                           old, &start, &end, backward))
149                 return -1;
150
151         if (start == end)
152                 return 0;
153
154         rec->samples++;
155
156         size = end - start;
157         if (size > (unsigned long)(md->mask) + 1) {
158                 WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n");
159
160                 md->prev = head;
161                 perf_mmap__consume(md, overwrite || backward);
162                 return 0;
163         }
164
165         if ((start & md->mask) + size != (end & md->mask)) {
166                 buf = &data[start & md->mask];
167                 size = md->mask + 1 - (start & md->mask);
168                 start += size;
169
170                 if (record__write(rec, buf, size) < 0) {
171                         rc = -1;
172                         goto out;
173                 }
174         }
175
176         buf = &data[start & md->mask];
177         size = end - start;
178         start += size;
179
180         if (record__write(rec, buf, size) < 0) {
181                 rc = -1;
182                 goto out;
183         }
184
185         md->prev = head;
186         perf_mmap__consume(md, overwrite || backward);
187 out:
188         return rc;
189 }
190
191 static volatile int done;
192 static volatile int signr = -1;
193 static volatile int child_finished;
194
195 static volatile int auxtrace_record__snapshot_started;
196 static DEFINE_TRIGGER(auxtrace_snapshot_trigger);
197 static DEFINE_TRIGGER(switch_output_trigger);
198
199 static void sig_handler(int sig)
200 {
201         if (sig == SIGCHLD)
202                 child_finished = 1;
203         else
204                 signr = sig;
205
206         done = 1;
207 }
208
209 static void record__sig_exit(void)
210 {
211         if (signr == -1)
212                 return;
213
214         signal(signr, SIG_DFL);
215         raise(signr);
216 }
217
218 #ifdef HAVE_AUXTRACE_SUPPORT
219
220 static int record__process_auxtrace(struct perf_tool *tool,
221                                     union perf_event *event, void *data1,
222                                     size_t len1, void *data2, size_t len2)
223 {
224         struct record *rec = container_of(tool, struct record, tool);
225         struct perf_data_file *file = &rec->file;
226         size_t padding;
227         u8 pad[8] = {0};
228
229         if (!perf_data_file__is_pipe(file)) {
230                 off_t file_offset;
231                 int fd = perf_data_file__fd(file);
232                 int err;
233
234                 file_offset = lseek(fd, 0, SEEK_CUR);
235                 if (file_offset == -1)
236                         return -1;
237                 err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index,
238                                                      event, file_offset);
239                 if (err)
240                         return err;
241         }
242
243         /* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */
244         padding = (len1 + len2) & 7;
245         if (padding)
246                 padding = 8 - padding;
247
248         record__write(rec, event, event->header.size);
249         record__write(rec, data1, len1);
250         if (len2)
251                 record__write(rec, data2, len2);
252         record__write(rec, &pad, padding);
253
254         return 0;
255 }
256
257 static int record__auxtrace_mmap_read(struct record *rec,
258                                       struct auxtrace_mmap *mm)
259 {
260         int ret;
261
262         ret = auxtrace_mmap__read(mm, rec->itr, &rec->tool,
263                                   record__process_auxtrace);
264         if (ret < 0)
265                 return ret;
266
267         if (ret)
268                 rec->samples++;
269
270         return 0;
271 }
272
273 static int record__auxtrace_mmap_read_snapshot(struct record *rec,
274                                                struct auxtrace_mmap *mm)
275 {
276         int ret;
277
278         ret = auxtrace_mmap__read_snapshot(mm, rec->itr, &rec->tool,
279                                            record__process_auxtrace,
280                                            rec->opts.auxtrace_snapshot_size);
281         if (ret < 0)
282                 return ret;
283
284         if (ret)
285                 rec->samples++;
286
287         return 0;
288 }
289
290 static int record__auxtrace_read_snapshot_all(struct record *rec)
291 {
292         int i;
293         int rc = 0;
294
295         for (i = 0; i < rec->evlist->nr_mmaps; i++) {
296                 struct auxtrace_mmap *mm =
297                                 &rec->evlist->mmap[i].auxtrace_mmap;
298
299                 if (!mm->base)
300                         continue;
301
302                 if (record__auxtrace_mmap_read_snapshot(rec, mm) != 0) {
303                         rc = -1;
304                         goto out;
305                 }
306         }
307 out:
308         return rc;
309 }
310
311 static void record__read_auxtrace_snapshot(struct record *rec)
312 {
313         pr_debug("Recording AUX area tracing snapshot\n");
314         if (record__auxtrace_read_snapshot_all(rec) < 0) {
315                 trigger_error(&auxtrace_snapshot_trigger);
316         } else {
317                 if (auxtrace_record__snapshot_finish(rec->itr))
318                         trigger_error(&auxtrace_snapshot_trigger);
319                 else
320                         trigger_ready(&auxtrace_snapshot_trigger);
321         }
322 }
323
324 #else
325
326 static inline
327 int record__auxtrace_mmap_read(struct record *rec __maybe_unused,
328                                struct auxtrace_mmap *mm __maybe_unused)
329 {
330         return 0;
331 }
332
333 static inline
334 void record__read_auxtrace_snapshot(struct record *rec __maybe_unused)
335 {
336 }
337
338 static inline
339 int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused)
340 {
341         return 0;
342 }
343
344 #endif
345
346 static int record__mmap_evlist(struct record *rec,
347                                struct perf_evlist *evlist)
348 {
349         struct record_opts *opts = &rec->opts;
350         char msg[512];
351
352         if (perf_evlist__mmap_ex(evlist, opts->mmap_pages, false,
353                                  opts->auxtrace_mmap_pages,
354                                  opts->auxtrace_snapshot_mode) < 0) {
355                 if (errno == EPERM) {
356                         pr_err("Permission error mapping pages.\n"
357                                "Consider increasing "
358                                "/proc/sys/kernel/perf_event_mlock_kb,\n"
359                                "or try again with a smaller value of -m/--mmap_pages.\n"
360                                "(current value: %u,%u)\n",
361                                opts->mmap_pages, opts->auxtrace_mmap_pages);
362                         return -errno;
363                 } else {
364                         pr_err("failed to mmap with %d (%s)\n", errno,
365                                 str_error_r(errno, msg, sizeof(msg)));
366                         if (errno)
367                                 return -errno;
368                         else
369                                 return -EINVAL;
370                 }
371         }
372         return 0;
373 }
374
375 static int record__mmap(struct record *rec)
376 {
377         return record__mmap_evlist(rec, rec->evlist);
378 }
379
380 static int record__open(struct record *rec)
381 {
382         char msg[512];
383         struct perf_evsel *pos;
384         struct perf_evlist *evlist = rec->evlist;
385         struct perf_session *session = rec->session;
386         struct record_opts *opts = &rec->opts;
387         struct perf_evsel_config_term *err_term;
388         int rc = 0;
389
390         perf_evlist__config(evlist, opts, &callchain_param);
391
392         evlist__for_each_entry(evlist, pos) {
393 try_again:
394                 if (perf_evsel__open(pos, pos->cpus, pos->threads) < 0) {
395                         if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
396                                 if (verbose)
397                                         ui__warning("%s\n", msg);
398                                 goto try_again;
399                         }
400
401                         rc = -errno;
402                         perf_evsel__open_strerror(pos, &opts->target,
403                                                   errno, msg, sizeof(msg));
404                         ui__error("%s\n", msg);
405                         goto out;
406                 }
407         }
408
409         if (perf_evlist__apply_filters(evlist, &pos)) {
410                 error("failed to set filter \"%s\" on event %s with %d (%s)\n",
411                         pos->filter, perf_evsel__name(pos), errno,
412                         str_error_r(errno, msg, sizeof(msg)));
413                 rc = -1;
414                 goto out;
415         }
416
417         if (perf_evlist__apply_drv_configs(evlist, &pos, &err_term)) {
418                 error("failed to set config \"%s\" on event %s with %d (%s)\n",
419                       err_term->val.drv_cfg, perf_evsel__name(pos), errno,
420                       str_error_r(errno, msg, sizeof(msg)));
421                 rc = -1;
422                 goto out;
423         }
424
425         rc = record__mmap(rec);
426         if (rc)
427                 goto out;
428
429         session->evlist = evlist;
430         perf_session__set_id_hdr_size(session);
431 out:
432         return rc;
433 }
434
435 static int process_sample_event(struct perf_tool *tool,
436                                 union perf_event *event,
437                                 struct perf_sample *sample,
438                                 struct perf_evsel *evsel,
439                                 struct machine *machine)
440 {
441         struct record *rec = container_of(tool, struct record, tool);
442
443         rec->samples++;
444
445         return build_id__mark_dso_hit(tool, event, sample, evsel, machine);
446 }
447
448 static int process_buildids(struct record *rec)
449 {
450         struct perf_data_file *file  = &rec->file;
451         struct perf_session *session = rec->session;
452
453         if (file->size == 0)
454                 return 0;
455
456         /*
457          * During this process, it'll load kernel map and replace the
458          * dso->long_name to a real pathname it found.  In this case
459          * we prefer the vmlinux path like
460          *   /lib/modules/3.16.4/build/vmlinux
461          *
462          * rather than build-id path (in debug directory).
463          *   $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551
464          */
465         symbol_conf.ignore_vmlinux_buildid = true;
466
467         /*
468          * If --buildid-all is given, it marks all DSO regardless of hits,
469          * so no need to process samples.
470          */
471         if (rec->buildid_all)
472                 rec->tool.sample = NULL;
473
474         return perf_session__process_events(session);
475 }
476
477 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
478 {
479         int err;
480         struct perf_tool *tool = data;
481         /*
482          *As for guest kernel when processing subcommand record&report,
483          *we arrange module mmap prior to guest kernel mmap and trigger
484          *a preload dso because default guest module symbols are loaded
485          *from guest kallsyms instead of /lib/modules/XXX/XXX. This
486          *method is used to avoid symbol missing when the first addr is
487          *in module instead of in guest kernel.
488          */
489         err = perf_event__synthesize_modules(tool, process_synthesized_event,
490                                              machine);
491         if (err < 0)
492                 pr_err("Couldn't record guest kernel [%d]'s reference"
493                        " relocation symbol.\n", machine->pid);
494
495         /*
496          * We use _stext for guest kernel because guest kernel's /proc/kallsyms
497          * have no _text sometimes.
498          */
499         err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
500                                                  machine);
501         if (err < 0)
502                 pr_err("Couldn't record guest kernel [%d]'s reference"
503                        " relocation symbol.\n", machine->pid);
504 }
505
506 static struct perf_event_header finished_round_event = {
507         .size = sizeof(struct perf_event_header),
508         .type = PERF_RECORD_FINISHED_ROUND,
509 };
510
511 static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evlist,
512                                     bool backward)
513 {
514         u64 bytes_written = rec->bytes_written;
515         int i;
516         int rc = 0;
517         struct perf_mmap *maps;
518
519         if (!evlist)
520                 return 0;
521
522         maps = backward ? evlist->backward_mmap : evlist->mmap;
523         if (!maps)
524                 return 0;
525
526         if (backward && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING)
527                 return 0;
528
529         for (i = 0; i < evlist->nr_mmaps; i++) {
530                 struct auxtrace_mmap *mm = &maps[i].auxtrace_mmap;
531
532                 if (maps[i].base) {
533                         if (record__mmap_read(rec, &maps[i],
534                                               evlist->overwrite, backward) != 0) {
535                                 rc = -1;
536                                 goto out;
537                         }
538                 }
539
540                 if (mm->base && !rec->opts.auxtrace_snapshot_mode &&
541                     record__auxtrace_mmap_read(rec, mm) != 0) {
542                         rc = -1;
543                         goto out;
544                 }
545         }
546
547         /*
548          * Mark the round finished in case we wrote
549          * at least one event.
550          */
551         if (bytes_written != rec->bytes_written)
552                 rc = record__write(rec, &finished_round_event, sizeof(finished_round_event));
553
554         if (backward)
555                 perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
556 out:
557         return rc;
558 }
559
560 static int record__mmap_read_all(struct record *rec)
561 {
562         int err;
563
564         err = record__mmap_read_evlist(rec, rec->evlist, false);
565         if (err)
566                 return err;
567
568         return record__mmap_read_evlist(rec, rec->evlist, true);
569 }
570
571 static void record__init_features(struct record *rec)
572 {
573         struct perf_session *session = rec->session;
574         int feat;
575
576         for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
577                 perf_header__set_feat(&session->header, feat);
578
579         if (rec->no_buildid)
580                 perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
581
582         if (!have_tracepoints(&rec->evlist->entries))
583                 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
584
585         if (!rec->opts.branch_stack)
586                 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
587
588         if (!rec->opts.full_auxtrace)
589                 perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
590
591         perf_header__clear_feat(&session->header, HEADER_STAT);
592 }
593
594 static void
595 record__finish_output(struct record *rec)
596 {
597         struct perf_data_file *file = &rec->file;
598         int fd = perf_data_file__fd(file);
599
600         if (file->is_pipe)
601                 return;
602
603         rec->session->header.data_size += rec->bytes_written;
604         file->size = lseek(perf_data_file__fd(file), 0, SEEK_CUR);
605
606         if (!rec->no_buildid) {
607                 process_buildids(rec);
608
609                 if (rec->buildid_all)
610                         dsos__hit_all(rec->session);
611         }
612         perf_session__write_header(rec->session, rec->evlist, fd, true);
613
614         return;
615 }
616
617 static int record__synthesize_workload(struct record *rec, bool tail)
618 {
619         struct {
620                 struct thread_map map;
621                 struct thread_map_data map_data;
622         } thread_map;
623
624         if (rec->opts.tail_synthesize != tail)
625                 return 0;
626
627         thread_map.map.nr = 1;
628         thread_map.map.map[0].pid = rec->evlist->workload.pid;
629         thread_map.map.map[0].comm = NULL;
630         return perf_event__synthesize_thread_map(&rec->tool, &thread_map.map,
631                                                  process_synthesized_event,
632                                                  &rec->session->machines.host,
633                                                  rec->opts.sample_address,
634                                                  rec->opts.proc_map_timeout);
635 }
636
637 static int record__synthesize(struct record *rec, bool tail);
638
639 static int
640 record__switch_output(struct record *rec, bool at_exit)
641 {
642         struct perf_data_file *file = &rec->file;
643         int fd, err;
644
645         /* Same Size:      "2015122520103046"*/
646         char timestamp[] = "InvalidTimestamp";
647
648         record__synthesize(rec, true);
649         if (target__none(&rec->opts.target))
650                 record__synthesize_workload(rec, true);
651
652         rec->samples = 0;
653         record__finish_output(rec);
654         err = fetch_current_timestamp(timestamp, sizeof(timestamp));
655         if (err) {
656                 pr_err("Failed to get current timestamp\n");
657                 return -EINVAL;
658         }
659
660         fd = perf_data_file__switch(file, timestamp,
661                                     rec->session->header.data_offset,
662                                     at_exit);
663         if (fd >= 0 && !at_exit) {
664                 rec->bytes_written = 0;
665                 rec->session->header.data_size = 0;
666         }
667
668         if (!quiet)
669                 fprintf(stderr, "[ perf record: Dump %s.%s ]\n",
670                         file->path, timestamp);
671
672         /* Output tracking events */
673         if (!at_exit) {
674                 record__synthesize(rec, false);
675
676                 /*
677                  * In 'perf record --switch-output' without -a,
678                  * record__synthesize() in record__switch_output() won't
679                  * generate tracking events because there's no thread_map
680                  * in evlist. Which causes newly created perf.data doesn't
681                  * contain map and comm information.
682                  * Create a fake thread_map and directly call
683                  * perf_event__synthesize_thread_map() for those events.
684                  */
685                 if (target__none(&rec->opts.target))
686                         record__synthesize_workload(rec, false);
687         }
688         return fd;
689 }
690
691 static volatile int workload_exec_errno;
692
693 /*
694  * perf_evlist__prepare_workload will send a SIGUSR1
695  * if the fork fails, since we asked by setting its
696  * want_signal to true.
697  */
698 static void workload_exec_failed_signal(int signo __maybe_unused,
699                                         siginfo_t *info,
700                                         void *ucontext __maybe_unused)
701 {
702         workload_exec_errno = info->si_value.sival_int;
703         done = 1;
704         child_finished = 1;
705 }
706
707 static void snapshot_sig_handler(int sig);
708
709 int __weak
710 perf_event__synth_time_conv(const struct perf_event_mmap_page *pc __maybe_unused,
711                             struct perf_tool *tool __maybe_unused,
712                             perf_event__handler_t process __maybe_unused,
713                             struct machine *machine __maybe_unused)
714 {
715         return 0;
716 }
717
718 static const struct perf_event_mmap_page *
719 perf_evlist__pick_pc(struct perf_evlist *evlist)
720 {
721         if (evlist) {
722                 if (evlist->mmap && evlist->mmap[0].base)
723                         return evlist->mmap[0].base;
724                 if (evlist->backward_mmap && evlist->backward_mmap[0].base)
725                         return evlist->backward_mmap[0].base;
726         }
727         return NULL;
728 }
729
730 static const struct perf_event_mmap_page *record__pick_pc(struct record *rec)
731 {
732         const struct perf_event_mmap_page *pc;
733
734         pc = perf_evlist__pick_pc(rec->evlist);
735         if (pc)
736                 return pc;
737         return NULL;
738 }
739
740 static int record__synthesize(struct record *rec, bool tail)
741 {
742         struct perf_session *session = rec->session;
743         struct machine *machine = &session->machines.host;
744         struct perf_data_file *file = &rec->file;
745         struct record_opts *opts = &rec->opts;
746         struct perf_tool *tool = &rec->tool;
747         int fd = perf_data_file__fd(file);
748         int err = 0;
749
750         if (rec->opts.tail_synthesize != tail)
751                 return 0;
752
753         if (file->is_pipe) {
754                 err = perf_event__synthesize_attrs(tool, session,
755                                                    process_synthesized_event);
756                 if (err < 0) {
757                         pr_err("Couldn't synthesize attrs.\n");
758                         goto out;
759                 }
760
761                 if (have_tracepoints(&rec->evlist->entries)) {
762                         /*
763                          * FIXME err <= 0 here actually means that
764                          * there were no tracepoints so its not really
765                          * an error, just that we don't need to
766                          * synthesize anything.  We really have to
767                          * return this more properly and also
768                          * propagate errors that now are calling die()
769                          */
770                         err = perf_event__synthesize_tracing_data(tool, fd, rec->evlist,
771                                                                   process_synthesized_event);
772                         if (err <= 0) {
773                                 pr_err("Couldn't record tracing data.\n");
774                                 goto out;
775                         }
776                         rec->bytes_written += err;
777                 }
778         }
779
780         err = perf_event__synth_time_conv(record__pick_pc(rec), tool,
781                                           process_synthesized_event, machine);
782         if (err)
783                 goto out;
784
785         if (rec->opts.full_auxtrace) {
786                 err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
787                                         session, process_synthesized_event);
788                 if (err)
789                         goto out;
790         }
791
792         err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
793                                                  machine);
794         WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n"
795                            "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
796                            "Check /proc/kallsyms permission or run as root.\n");
797
798         err = perf_event__synthesize_modules(tool, process_synthesized_event,
799                                              machine);
800         WARN_ONCE(err < 0, "Couldn't record kernel module information.\n"
801                            "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
802                            "Check /proc/modules permission or run as root.\n");
803
804         if (perf_guest) {
805                 machines__process_guests(&session->machines,
806                                          perf_event__synthesize_guest_os, tool);
807         }
808
809         err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads,
810                                             process_synthesized_event, opts->sample_address,
811                                             opts->proc_map_timeout);
812 out:
813         return err;
814 }
815
816 static int __cmd_record(struct record *rec, int argc, const char **argv)
817 {
818         int err;
819         int status = 0;
820         unsigned long waking = 0;
821         const bool forks = argc > 0;
822         struct machine *machine;
823         struct perf_tool *tool = &rec->tool;
824         struct record_opts *opts = &rec->opts;
825         struct perf_data_file *file = &rec->file;
826         struct perf_session *session;
827         bool disabled = false, draining = false;
828         int fd;
829
830         rec->progname = argv[0];
831
832         atexit(record__sig_exit);
833         signal(SIGCHLD, sig_handler);
834         signal(SIGINT, sig_handler);
835         signal(SIGTERM, sig_handler);
836
837         if (rec->opts.auxtrace_snapshot_mode || rec->switch_output) {
838                 signal(SIGUSR2, snapshot_sig_handler);
839                 if (rec->opts.auxtrace_snapshot_mode)
840                         trigger_on(&auxtrace_snapshot_trigger);
841                 if (rec->switch_output)
842                         trigger_on(&switch_output_trigger);
843         } else {
844                 signal(SIGUSR2, SIG_IGN);
845         }
846
847         session = perf_session__new(file, false, tool);
848         if (session == NULL) {
849                 pr_err("Perf session creation failed.\n");
850                 return -1;
851         }
852
853         fd = perf_data_file__fd(file);
854         rec->session = session;
855
856         record__init_features(rec);
857
858         if (forks) {
859                 err = perf_evlist__prepare_workload(rec->evlist, &opts->target,
860                                                     argv, file->is_pipe,
861                                                     workload_exec_failed_signal);
862                 if (err < 0) {
863                         pr_err("Couldn't run the workload!\n");
864                         status = err;
865                         goto out_delete_session;
866                 }
867         }
868
869         if (record__open(rec) != 0) {
870                 err = -1;
871                 goto out_child;
872         }
873
874         err = bpf__apply_obj_config();
875         if (err) {
876                 char errbuf[BUFSIZ];
877
878                 bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
879                 pr_err("ERROR: Apply config to BPF failed: %s\n",
880                          errbuf);
881                 goto out_child;
882         }
883
884         /*
885          * Normally perf_session__new would do this, but it doesn't have the
886          * evlist.
887          */
888         if (rec->tool.ordered_events && !perf_evlist__sample_id_all(rec->evlist)) {
889                 pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n");
890                 rec->tool.ordered_events = false;
891         }
892
893         if (!rec->evlist->nr_groups)
894                 perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
895
896         if (file->is_pipe) {
897                 err = perf_header__write_pipe(fd);
898                 if (err < 0)
899                         goto out_child;
900         } else {
901                 err = perf_session__write_header(session, rec->evlist, fd, false);
902                 if (err < 0)
903                         goto out_child;
904         }
905
906         if (!rec->no_buildid
907             && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
908                 pr_err("Couldn't generate buildids. "
909                        "Use --no-buildid to profile anyway.\n");
910                 err = -1;
911                 goto out_child;
912         }
913
914         machine = &session->machines.host;
915
916         err = record__synthesize(rec, false);
917         if (err < 0)
918                 goto out_child;
919
920         if (rec->realtime_prio) {
921                 struct sched_param param;
922
923                 param.sched_priority = rec->realtime_prio;
924                 if (sched_setscheduler(0, SCHED_FIFO, &param)) {
925                         pr_err("Could not set realtime priority.\n");
926                         err = -1;
927                         goto out_child;
928                 }
929         }
930
931         /*
932          * When perf is starting the traced process, all the events
933          * (apart from group members) have enable_on_exec=1 set,
934          * so don't spoil it by prematurely enabling them.
935          */
936         if (!target__none(&opts->target) && !opts->initial_delay)
937                 perf_evlist__enable(rec->evlist);
938
939         /*
940          * Let the child rip
941          */
942         if (forks) {
943                 union perf_event *event;
944
945                 event = malloc(sizeof(event->comm) + machine->id_hdr_size);
946                 if (event == NULL) {
947                         err = -ENOMEM;
948                         goto out_child;
949                 }
950
951                 /*
952                  * Some H/W events are generated before COMM event
953                  * which is emitted during exec(), so perf script
954                  * cannot see a correct process name for those events.
955                  * Synthesize COMM event to prevent it.
956                  */
957                 perf_event__synthesize_comm(tool, event,
958                                             rec->evlist->workload.pid,
959                                             process_synthesized_event,
960                                             machine);
961                 free(event);
962
963                 perf_evlist__start_workload(rec->evlist);
964         }
965
966         if (opts->initial_delay) {
967                 usleep(opts->initial_delay * USEC_PER_MSEC);
968                 perf_evlist__enable(rec->evlist);
969         }
970
971         trigger_ready(&auxtrace_snapshot_trigger);
972         trigger_ready(&switch_output_trigger);
973         for (;;) {
974                 unsigned long long hits = rec->samples;
975
976                 /*
977                  * rec->evlist->bkw_mmap_state is possible to be
978                  * BKW_MMAP_EMPTY here: when done == true and
979                  * hits != rec->samples in previous round.
980                  *
981                  * perf_evlist__toggle_bkw_mmap ensure we never
982                  * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING.
983                  */
984                 if (trigger_is_hit(&switch_output_trigger) || done || draining)
985                         perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING);
986
987                 if (record__mmap_read_all(rec) < 0) {
988                         trigger_error(&auxtrace_snapshot_trigger);
989                         trigger_error(&switch_output_trigger);
990                         err = -1;
991                         goto out_child;
992                 }
993
994                 if (auxtrace_record__snapshot_started) {
995                         auxtrace_record__snapshot_started = 0;
996                         if (!trigger_is_error(&auxtrace_snapshot_trigger))
997                                 record__read_auxtrace_snapshot(rec);
998                         if (trigger_is_error(&auxtrace_snapshot_trigger)) {
999                                 pr_err("AUX area tracing snapshot failed\n");
1000                                 err = -1;
1001                                 goto out_child;
1002                         }
1003                 }
1004
1005                 if (trigger_is_hit(&switch_output_trigger)) {
1006                         /*
1007                          * If switch_output_trigger is hit, the data in
1008                          * overwritable ring buffer should have been collected,
1009                          * so bkw_mmap_state should be set to BKW_MMAP_EMPTY.
1010                          *
1011                          * If SIGUSR2 raise after or during record__mmap_read_all(),
1012                          * record__mmap_read_all() didn't collect data from
1013                          * overwritable ring buffer. Read again.
1014                          */
1015                         if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING)
1016                                 continue;
1017                         trigger_ready(&switch_output_trigger);
1018
1019                         /*
1020                          * Reenable events in overwrite ring buffer after
1021                          * record__mmap_read_all(): we should have collected
1022                          * data from it.
1023                          */
1024                         perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING);
1025
1026                         if (!quiet)
1027                                 fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n",
1028                                         waking);
1029                         waking = 0;
1030                         fd = record__switch_output(rec, false);
1031                         if (fd < 0) {
1032                                 pr_err("Failed to switch to new file\n");
1033                                 trigger_error(&switch_output_trigger);
1034                                 err = fd;
1035                                 goto out_child;
1036                         }
1037                 }
1038
1039                 if (hits == rec->samples) {
1040                         if (done || draining)
1041                                 break;
1042                         err = perf_evlist__poll(rec->evlist, -1);
1043                         /*
1044                          * Propagate error, only if there's any. Ignore positive
1045                          * number of returned events and interrupt error.
1046                          */
1047                         if (err > 0 || (err < 0 && errno == EINTR))
1048                                 err = 0;
1049                         waking++;
1050
1051                         if (perf_evlist__filter_pollfd(rec->evlist, POLLERR | POLLHUP) == 0)
1052                                 draining = true;
1053                 }
1054
1055                 /*
1056                  * When perf is starting the traced process, at the end events
1057                  * die with the process and we wait for that. Thus no need to
1058                  * disable events in this case.
1059                  */
1060                 if (done && !disabled && !target__none(&opts->target)) {
1061                         trigger_off(&auxtrace_snapshot_trigger);
1062                         perf_evlist__disable(rec->evlist);
1063                         disabled = true;
1064                 }
1065         }
1066         trigger_off(&auxtrace_snapshot_trigger);
1067         trigger_off(&switch_output_trigger);
1068
1069         if (forks && workload_exec_errno) {
1070                 char msg[STRERR_BUFSIZE];
1071                 const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
1072                 pr_err("Workload failed: %s\n", emsg);
1073                 err = -1;
1074                 goto out_child;
1075         }
1076
1077         if (!quiet)
1078                 fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
1079
1080         if (target__none(&rec->opts.target))
1081                 record__synthesize_workload(rec, true);
1082
1083 out_child:
1084         if (forks) {
1085                 int exit_status;
1086
1087                 if (!child_finished)
1088                         kill(rec->evlist->workload.pid, SIGTERM);
1089
1090                 wait(&exit_status);
1091
1092                 if (err < 0)
1093                         status = err;
1094                 else if (WIFEXITED(exit_status))
1095                         status = WEXITSTATUS(exit_status);
1096                 else if (WIFSIGNALED(exit_status))
1097                         signr = WTERMSIG(exit_status);
1098         } else
1099                 status = err;
1100
1101         record__synthesize(rec, true);
1102         /* this will be recalculated during process_buildids() */
1103         rec->samples = 0;
1104
1105         if (!err) {
1106                 if (!rec->timestamp_filename) {
1107                         record__finish_output(rec);
1108                 } else {
1109                         fd = record__switch_output(rec, true);
1110                         if (fd < 0) {
1111                                 status = fd;
1112                                 goto out_delete_session;
1113                         }
1114                 }
1115         }
1116
1117         if (!err && !quiet) {
1118                 char samples[128];
1119                 const char *postfix = rec->timestamp_filename ?
1120                                         ".<timestamp>" : "";
1121
1122                 if (rec->samples && !rec->opts.full_auxtrace)
1123                         scnprintf(samples, sizeof(samples),
1124                                   " (%" PRIu64 " samples)", rec->samples);
1125                 else
1126                         samples[0] = '\0';
1127
1128                 fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s ]\n",
1129                         perf_data_file__size(file) / 1024.0 / 1024.0,
1130                         file->path, postfix, samples);
1131         }
1132
1133 out_delete_session:
1134         perf_session__delete(session);
1135         return status;
1136 }
1137
1138 static void callchain_debug(struct callchain_param *callchain)
1139 {
1140         static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
1141
1142         pr_debug("callchain: type %s\n", str[callchain->record_mode]);
1143
1144         if (callchain->record_mode == CALLCHAIN_DWARF)
1145                 pr_debug("callchain: stack dump size %d\n",
1146                          callchain->dump_size);
1147 }
1148
1149 int record_opts__parse_callchain(struct record_opts *record,
1150                                  struct callchain_param *callchain,
1151                                  const char *arg, bool unset)
1152 {
1153         int ret;
1154         callchain->enabled = !unset;
1155
1156         /* --no-call-graph */
1157         if (unset) {
1158                 callchain->record_mode = CALLCHAIN_NONE;
1159                 pr_debug("callchain: disabled\n");
1160                 return 0;
1161         }
1162
1163         ret = parse_callchain_record_opt(arg, callchain);
1164         if (!ret) {
1165                 /* Enable data address sampling for DWARF unwind. */
1166                 if (callchain->record_mode == CALLCHAIN_DWARF)
1167                         record->sample_address = true;
1168                 callchain_debug(callchain);
1169         }
1170
1171         return ret;
1172 }
1173
1174 int record_parse_callchain_opt(const struct option *opt,
1175                                const char *arg,
1176                                int unset)
1177 {
1178         return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset);
1179 }
1180
1181 int record_callchain_opt(const struct option *opt,
1182                          const char *arg __maybe_unused,
1183                          int unset __maybe_unused)
1184 {
1185         struct callchain_param *callchain = opt->value;
1186
1187         callchain->enabled = true;
1188
1189         if (callchain->record_mode == CALLCHAIN_NONE)
1190                 callchain->record_mode = CALLCHAIN_FP;
1191
1192         callchain_debug(callchain);
1193         return 0;
1194 }
1195
1196 static int perf_record_config(const char *var, const char *value, void *cb)
1197 {
1198         struct record *rec = cb;
1199
1200         if (!strcmp(var, "record.build-id")) {
1201                 if (!strcmp(value, "cache"))
1202                         rec->no_buildid_cache = false;
1203                 else if (!strcmp(value, "no-cache"))
1204                         rec->no_buildid_cache = true;
1205                 else if (!strcmp(value, "skip"))
1206                         rec->no_buildid = true;
1207                 else
1208                         return -1;
1209                 return 0;
1210         }
1211         if (!strcmp(var, "record.call-graph"))
1212                 var = "call-graph.record-mode"; /* fall-through */
1213
1214         return perf_default_config(var, value, cb);
1215 }
1216
1217 struct clockid_map {
1218         const char *name;
1219         int clockid;
1220 };
1221
1222 #define CLOCKID_MAP(n, c)       \
1223         { .name = n, .clockid = (c), }
1224
1225 #define CLOCKID_END     { .name = NULL, }
1226
1227
1228 /*
1229  * Add the missing ones, we need to build on many distros...
1230  */
1231 #ifndef CLOCK_MONOTONIC_RAW
1232 #define CLOCK_MONOTONIC_RAW 4
1233 #endif
1234 #ifndef CLOCK_BOOTTIME
1235 #define CLOCK_BOOTTIME 7
1236 #endif
1237 #ifndef CLOCK_TAI
1238 #define CLOCK_TAI 11
1239 #endif
1240
1241 static const struct clockid_map clockids[] = {
1242         /* available for all events, NMI safe */
1243         CLOCKID_MAP("monotonic", CLOCK_MONOTONIC),
1244         CLOCKID_MAP("monotonic_raw", CLOCK_MONOTONIC_RAW),
1245
1246         /* available for some events */
1247         CLOCKID_MAP("realtime", CLOCK_REALTIME),
1248         CLOCKID_MAP("boottime", CLOCK_BOOTTIME),
1249         CLOCKID_MAP("tai", CLOCK_TAI),
1250
1251         /* available for the lazy */
1252         CLOCKID_MAP("mono", CLOCK_MONOTONIC),
1253         CLOCKID_MAP("raw", CLOCK_MONOTONIC_RAW),
1254         CLOCKID_MAP("real", CLOCK_REALTIME),
1255         CLOCKID_MAP("boot", CLOCK_BOOTTIME),
1256
1257         CLOCKID_END,
1258 };
1259
1260 static int parse_clockid(const struct option *opt, const char *str, int unset)
1261 {
1262         struct record_opts *opts = (struct record_opts *)opt->value;
1263         const struct clockid_map *cm;
1264         const char *ostr = str;
1265
1266         if (unset) {
1267                 opts->use_clockid = 0;
1268                 return 0;
1269         }
1270
1271         /* no arg passed */
1272         if (!str)
1273                 return 0;
1274
1275         /* no setting it twice */
1276         if (opts->use_clockid)
1277                 return -1;
1278
1279         opts->use_clockid = true;
1280
1281         /* if its a number, we're done */
1282         if (sscanf(str, "%d", &opts->clockid) == 1)
1283                 return 0;
1284
1285         /* allow a "CLOCK_" prefix to the name */
1286         if (!strncasecmp(str, "CLOCK_", 6))
1287                 str += 6;
1288
1289         for (cm = clockids; cm->name; cm++) {
1290                 if (!strcasecmp(str, cm->name)) {
1291                         opts->clockid = cm->clockid;
1292                         return 0;
1293                 }
1294         }
1295
1296         opts->use_clockid = false;
1297         ui__warning("unknown clockid %s, check man page\n", ostr);
1298         return -1;
1299 }
1300
1301 static int record__parse_mmap_pages(const struct option *opt,
1302                                     const char *str,
1303                                     int unset __maybe_unused)
1304 {
1305         struct record_opts *opts = opt->value;
1306         char *s, *p;
1307         unsigned int mmap_pages;
1308         int ret;
1309
1310         if (!str)
1311                 return -EINVAL;
1312
1313         s = strdup(str);
1314         if (!s)
1315                 return -ENOMEM;
1316
1317         p = strchr(s, ',');
1318         if (p)
1319                 *p = '\0';
1320
1321         if (*s) {
1322                 ret = __perf_evlist__parse_mmap_pages(&mmap_pages, s);
1323                 if (ret)
1324                         goto out_free;
1325                 opts->mmap_pages = mmap_pages;
1326         }
1327
1328         if (!p) {
1329                 ret = 0;
1330                 goto out_free;
1331         }
1332
1333         ret = __perf_evlist__parse_mmap_pages(&mmap_pages, p + 1);
1334         if (ret)
1335                 goto out_free;
1336
1337         opts->auxtrace_mmap_pages = mmap_pages;
1338
1339 out_free:
1340         free(s);
1341         return ret;
1342 }
1343
1344 static const char * const __record_usage[] = {
1345         "perf record [<options>] [<command>]",
1346         "perf record [<options>] -- <command> [<options>]",
1347         NULL
1348 };
1349 const char * const *record_usage = __record_usage;
1350
1351 /*
1352  * XXX Ideally would be local to cmd_record() and passed to a record__new
1353  * because we need to have access to it in record__exit, that is called
1354  * after cmd_record() exits, but since record_options need to be accessible to
1355  * builtin-script, leave it here.
1356  *
1357  * At least we don't ouch it in all the other functions here directly.
1358  *
1359  * Just say no to tons of global variables, sigh.
1360  */
1361 static struct record record = {
1362         .opts = {
1363                 .sample_time         = true,
1364                 .mmap_pages          = UINT_MAX,
1365                 .user_freq           = UINT_MAX,
1366                 .user_interval       = ULLONG_MAX,
1367                 .freq                = 4000,
1368                 .target              = {
1369                         .uses_mmap   = true,
1370                         .default_per_cpu = true,
1371                 },
1372                 .proc_map_timeout     = 500,
1373         },
1374         .tool = {
1375                 .sample         = process_sample_event,
1376                 .fork           = perf_event__process_fork,
1377                 .exit           = perf_event__process_exit,
1378                 .comm           = perf_event__process_comm,
1379                 .mmap           = perf_event__process_mmap,
1380                 .mmap2          = perf_event__process_mmap2,
1381                 .ordered_events = true,
1382         },
1383 };
1384
1385 const char record_callchain_help[] = CALLCHAIN_RECORD_HELP
1386         "\n\t\t\t\tDefault: fp";
1387
1388 static bool dry_run;
1389
1390 /*
1391  * XXX Will stay a global variable till we fix builtin-script.c to stop messing
1392  * with it and switch to use the library functions in perf_evlist that came
1393  * from builtin-record.c, i.e. use record_opts,
1394  * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
1395  * using pipes, etc.
1396  */
1397 struct option __record_options[] = {
1398         OPT_CALLBACK('e', "event", &record.evlist, "event",
1399                      "event selector. use 'perf list' to list available events",
1400                      parse_events_option),
1401         OPT_CALLBACK(0, "filter", &record.evlist, "filter",
1402                      "event filter", parse_filter),
1403         OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist,
1404                            NULL, "don't record events from perf itself",
1405                            exclude_perf),
1406         OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
1407                     "record events on existing process id"),
1408         OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
1409                     "record events on existing thread id"),
1410         OPT_INTEGER('r', "realtime", &record.realtime_prio,
1411                     "collect data with this RT SCHED_FIFO priority"),
1412         OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering,
1413                     "collect data without buffering"),
1414         OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
1415                     "collect raw sample records from all opened counters"),
1416         OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
1417                             "system-wide collection from all CPUs"),
1418         OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
1419                     "list of cpus to monitor"),
1420         OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
1421         OPT_STRING('o', "output", &record.file.path, "file",
1422                     "output file name"),
1423         OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
1424                         &record.opts.no_inherit_set,
1425                         "child tasks do not inherit counters"),
1426         OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize,
1427                     "synthesize non-sample events at the end of output"),
1428         OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"),
1429         OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
1430         OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
1431                      "number of mmap data pages and AUX area tracing mmap pages",
1432                      record__parse_mmap_pages),
1433         OPT_BOOLEAN(0, "group", &record.opts.group,
1434                     "put the counters into a counter group"),
1435         OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
1436                            NULL, "enables call-graph recording" ,
1437                            &record_callchain_opt),
1438         OPT_CALLBACK(0, "call-graph", &record.opts,
1439                      "record_mode[,record_size]", record_callchain_help,
1440                      &record_parse_callchain_opt),
1441         OPT_INCR('v', "verbose", &verbose,
1442                     "be more verbose (show counter open errors, etc)"),
1443         OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
1444         OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
1445                     "per thread counts"),
1446         OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"),
1447         OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"),
1448         OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time,
1449                         &record.opts.sample_time_set,
1450                         "Record the sample timestamps"),
1451         OPT_BOOLEAN('P', "period", &record.opts.period, "Record the sample period"),
1452         OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
1453                     "don't sample"),
1454         OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache,
1455                         &record.no_buildid_cache_set,
1456                         "do not update the buildid cache"),
1457         OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid,
1458                         &record.no_buildid_set,
1459                         "do not collect buildids in perf.data"),
1460         OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
1461                      "monitor event in cgroup name only",
1462                      parse_cgroups),
1463         OPT_UINTEGER('D', "delay", &record.opts.initial_delay,
1464                   "ms to wait before starting measurement after program start"),
1465         OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
1466                    "user to profile"),
1467
1468         OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
1469                      "branch any", "sample any taken branches",
1470                      parse_branch_stack),
1471
1472         OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
1473                      "branch filter mask", "branch stack filter modes",
1474                      parse_branch_stack),
1475         OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
1476                     "sample by weight (on special events only)"),
1477         OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
1478                     "sample transaction flags (special events only)"),
1479         OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
1480                     "use per-thread mmaps"),
1481         OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register",
1482                     "sample selected machine registers on interrupt,"
1483                     " use -I ? to list register names", parse_regs),
1484         OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
1485                     "Record running/enabled time of read (:S) events"),
1486         OPT_CALLBACK('k', "clockid", &record.opts,
1487         "clockid", "clockid to use for events, see clock_gettime()",
1488         parse_clockid),
1489         OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts,
1490                           "opts", "AUX area tracing Snapshot Mode", ""),
1491         OPT_UINTEGER(0, "proc-map-timeout", &record.opts.proc_map_timeout,
1492                         "per thread proc mmap processing timeout in ms"),
1493         OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events,
1494                     "Record context switch events"),
1495         OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel,
1496                          "Configure all used events to run in kernel space.",
1497                          PARSE_OPT_EXCLUSIVE),
1498         OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user,
1499                          "Configure all used events to run in user space.",
1500                          PARSE_OPT_EXCLUSIVE),
1501         OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path",
1502                    "clang binary to use for compiling BPF scriptlets"),
1503         OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options",
1504                    "options passed to clang when compiling BPF scriptlets"),
1505         OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
1506                    "file", "vmlinux pathname"),
1507         OPT_BOOLEAN(0, "buildid-all", &record.buildid_all,
1508                     "Record build-id of all DSOs regardless of hits"),
1509         OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename,
1510                     "append timestamp to output filename"),
1511         OPT_BOOLEAN(0, "switch-output", &record.switch_output,
1512                     "Switch output when receive SIGUSR2"),
1513         OPT_BOOLEAN(0, "dry-run", &dry_run,
1514                     "Parse options then exit"),
1515         OPT_END()
1516 };
1517
1518 struct option *record_options = __record_options;
1519
1520 int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
1521 {
1522         int err;
1523         struct record *rec = &record;
1524         char errbuf[BUFSIZ];
1525
1526 #ifndef HAVE_LIBBPF_SUPPORT
1527 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c)
1528         set_nobuild('\0', "clang-path", true);
1529         set_nobuild('\0', "clang-opt", true);
1530 # undef set_nobuild
1531 #endif
1532
1533 #ifndef HAVE_BPF_PROLOGUE
1534 # if !defined (HAVE_DWARF_SUPPORT)
1535 #  define REASON  "NO_DWARF=1"
1536 # elif !defined (HAVE_LIBBPF_SUPPORT)
1537 #  define REASON  "NO_LIBBPF=1"
1538 # else
1539 #  define REASON  "this architecture doesn't support BPF prologue"
1540 # endif
1541 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, REASON, c)
1542         set_nobuild('\0', "vmlinux", true);
1543 # undef set_nobuild
1544 # undef REASON
1545 #endif
1546
1547         rec->evlist = perf_evlist__new();
1548         if (rec->evlist == NULL)
1549                 return -ENOMEM;
1550
1551         perf_config(perf_record_config, rec);
1552
1553         argc = parse_options(argc, argv, record_options, record_usage,
1554                             PARSE_OPT_STOP_AT_NON_OPTION);
1555         if (!argc && target__none(&rec->opts.target))
1556                 usage_with_options(record_usage, record_options);
1557
1558         if (nr_cgroups && !rec->opts.target.system_wide) {
1559                 usage_with_options_msg(record_usage, record_options,
1560                         "cgroup monitoring only available in system-wide mode");
1561
1562         }
1563         if (rec->opts.record_switch_events &&
1564             !perf_can_record_switch_events()) {
1565                 ui__error("kernel does not support recording context switch events\n");
1566                 parse_options_usage(record_usage, record_options, "switch-events", 0);
1567                 return -EINVAL;
1568         }
1569
1570         if (rec->switch_output)
1571                 rec->timestamp_filename = true;
1572
1573         if (!rec->itr) {
1574                 rec->itr = auxtrace_record__init(rec->evlist, &err);
1575                 if (err)
1576                         goto out;
1577         }
1578
1579         err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
1580                                               rec->opts.auxtrace_snapshot_opts);
1581         if (err)
1582                 goto out;
1583
1584         /*
1585          * Allow aliases to facilitate the lookup of symbols for address
1586          * filters. Refer to auxtrace_parse_filters().
1587          */
1588         symbol_conf.allow_aliases = true;
1589
1590         symbol__init(NULL);
1591
1592         err = auxtrace_parse_filters(rec->evlist);
1593         if (err)
1594                 goto out;
1595
1596         if (dry_run)
1597                 goto out;
1598
1599         err = bpf__setup_stdout(rec->evlist);
1600         if (err) {
1601                 bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf));
1602                 pr_err("ERROR: Setup BPF stdout failed: %s\n",
1603                          errbuf);
1604                 goto out;
1605         }
1606
1607         err = -ENOMEM;
1608
1609         if (symbol_conf.kptr_restrict)
1610                 pr_warning(
1611 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
1612 "check /proc/sys/kernel/kptr_restrict.\n\n"
1613 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
1614 "file is not found in the buildid cache or in the vmlinux path.\n\n"
1615 "Samples in kernel modules won't be resolved at all.\n\n"
1616 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
1617 "even with a suitable vmlinux or kallsyms file.\n\n");
1618
1619         if (rec->no_buildid_cache || rec->no_buildid) {
1620                 disable_buildid_cache();
1621         } else if (rec->switch_output) {
1622                 /*
1623                  * In 'perf record --switch-output', disable buildid
1624                  * generation by default to reduce data file switching
1625                  * overhead. Still generate buildid if they are required
1626                  * explicitly using
1627                  *
1628                  *  perf record --signal-trigger --no-no-buildid \
1629                  *              --no-no-buildid-cache
1630                  *
1631                  * Following code equals to:
1632                  *
1633                  * if ((rec->no_buildid || !rec->no_buildid_set) &&
1634                  *     (rec->no_buildid_cache || !rec->no_buildid_cache_set))
1635                  *         disable_buildid_cache();
1636                  */
1637                 bool disable = true;
1638
1639                 if (rec->no_buildid_set && !rec->no_buildid)
1640                         disable = false;
1641                 if (rec->no_buildid_cache_set && !rec->no_buildid_cache)
1642                         disable = false;
1643                 if (disable) {
1644                         rec->no_buildid = true;
1645                         rec->no_buildid_cache = true;
1646                         disable_buildid_cache();
1647                 }
1648         }
1649
1650         if (record.opts.overwrite)
1651                 record.opts.tail_synthesize = true;
1652
1653         if (rec->evlist->nr_entries == 0 &&
1654             perf_evlist__add_default(rec->evlist) < 0) {
1655                 pr_err("Not enough memory for event selector list\n");
1656                 goto out;
1657         }
1658
1659         if (rec->opts.target.tid && !rec->opts.no_inherit_set)
1660                 rec->opts.no_inherit = true;
1661
1662         err = target__validate(&rec->opts.target);
1663         if (err) {
1664                 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1665                 ui__warning("%s", errbuf);
1666         }
1667
1668         err = target__parse_uid(&rec->opts.target);
1669         if (err) {
1670                 int saved_errno = errno;
1671
1672                 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1673                 ui__error("%s", errbuf);
1674
1675                 err = -saved_errno;
1676                 goto out;
1677         }
1678
1679         err = -ENOMEM;
1680         if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
1681                 usage_with_options(record_usage, record_options);
1682
1683         err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts);
1684         if (err)
1685                 goto out;
1686
1687         /*
1688          * We take all buildids when the file contains
1689          * AUX area tracing data because we do not decode the
1690          * trace because it would take too long.
1691          */
1692         if (rec->opts.full_auxtrace)
1693                 rec->buildid_all = true;
1694
1695         if (record_opts__config(&rec->opts)) {
1696                 err = -EINVAL;
1697                 goto out;
1698         }
1699
1700         err = __cmd_record(&record, argc, argv);
1701 out:
1702         perf_evlist__delete(rec->evlist);
1703         symbol__exit();
1704         auxtrace_record__free(rec->itr);
1705         return err;
1706 }
1707
1708 static void snapshot_sig_handler(int sig __maybe_unused)
1709 {
1710         if (trigger_is_ready(&auxtrace_snapshot_trigger)) {
1711                 trigger_hit(&auxtrace_snapshot_trigger);
1712                 auxtrace_record__snapshot_started = 1;
1713                 if (auxtrace_record__snapshot_start(record.itr))
1714                         trigger_error(&auxtrace_snapshot_trigger);
1715         }
1716
1717         if (trigger_is_ready(&switch_output_trigger))
1718                 trigger_hit(&switch_output_trigger);
1719 }