spi: pxa2xx: Constify ACPI device ids
[cascardo/linux.git] / tools / perf / builtin-trace.c
1 #include <traceevent/event-parse.h>
2 #include "builtin.h"
3 #include "util/color.h"
4 #include "util/debug.h"
5 #include "util/evlist.h"
6 #include "util/machine.h"
7 #include "util/session.h"
8 #include "util/thread.h"
9 #include "util/parse-options.h"
10 #include "util/strlist.h"
11 #include "util/intlist.h"
12 #include "util/thread_map.h"
13 #include "util/stat.h"
14 #include "trace-event.h"
15 #include "util/parse-events.h"
16
17 #include <libaudit.h>
18 #include <stdlib.h>
19 #include <sys/eventfd.h>
20 #include <sys/mman.h>
21 #include <linux/futex.h>
22
23 /* For older distros: */
24 #ifndef MAP_STACK
25 # define MAP_STACK              0x20000
26 #endif
27
28 #ifndef MADV_HWPOISON
29 # define MADV_HWPOISON          100
30 #endif
31
32 #ifndef MADV_MERGEABLE
33 # define MADV_MERGEABLE         12
34 #endif
35
36 #ifndef MADV_UNMERGEABLE
37 # define MADV_UNMERGEABLE       13
38 #endif
39
40 #ifndef EFD_SEMAPHORE
41 # define EFD_SEMAPHORE          1
42 #endif
43
44 struct tp_field {
45         int offset;
46         union {
47                 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
48                 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
49         };
50 };
51
52 #define TP_UINT_FIELD(bits) \
53 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
54 { \
55         u##bits value; \
56         memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
57         return value;  \
58 }
59
60 TP_UINT_FIELD(8);
61 TP_UINT_FIELD(16);
62 TP_UINT_FIELD(32);
63 TP_UINT_FIELD(64);
64
65 #define TP_UINT_FIELD__SWAPPED(bits) \
66 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
67 { \
68         u##bits value; \
69         memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
70         return bswap_##bits(value);\
71 }
72
73 TP_UINT_FIELD__SWAPPED(16);
74 TP_UINT_FIELD__SWAPPED(32);
75 TP_UINT_FIELD__SWAPPED(64);
76
77 static int tp_field__init_uint(struct tp_field *field,
78                                struct format_field *format_field,
79                                bool needs_swap)
80 {
81         field->offset = format_field->offset;
82
83         switch (format_field->size) {
84         case 1:
85                 field->integer = tp_field__u8;
86                 break;
87         case 2:
88                 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
89                 break;
90         case 4:
91                 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
92                 break;
93         case 8:
94                 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
95                 break;
96         default:
97                 return -1;
98         }
99
100         return 0;
101 }
102
103 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
104 {
105         return sample->raw_data + field->offset;
106 }
107
108 static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
109 {
110         field->offset = format_field->offset;
111         field->pointer = tp_field__ptr;
112         return 0;
113 }
114
115 struct syscall_tp {
116         struct tp_field id;
117         union {
118                 struct tp_field args, ret;
119         };
120 };
121
122 static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
123                                           struct tp_field *field,
124                                           const char *name)
125 {
126         struct format_field *format_field = perf_evsel__field(evsel, name);
127
128         if (format_field == NULL)
129                 return -1;
130
131         return tp_field__init_uint(field, format_field, evsel->needs_swap);
132 }
133
134 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \
135         ({ struct syscall_tp *sc = evsel->priv;\
136            perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
137
138 static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
139                                          struct tp_field *field,
140                                          const char *name)
141 {
142         struct format_field *format_field = perf_evsel__field(evsel, name);
143
144         if (format_field == NULL)
145                 return -1;
146
147         return tp_field__init_ptr(field, format_field);
148 }
149
150 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
151         ({ struct syscall_tp *sc = evsel->priv;\
152            perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
153
154 static void perf_evsel__delete_priv(struct perf_evsel *evsel)
155 {
156         zfree(&evsel->priv);
157         perf_evsel__delete(evsel);
158 }
159
160 static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
161 {
162         evsel->priv = malloc(sizeof(struct syscall_tp));
163         if (evsel->priv != NULL) {
164                 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
165                         goto out_delete;
166
167                 evsel->handler = handler;
168                 return 0;
169         }
170
171         return -ENOMEM;
172
173 out_delete:
174         zfree(&evsel->priv);
175         return -ENOENT;
176 }
177
178 static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
179 {
180         struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
181
182         /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
183         if (evsel == NULL)
184                 evsel = perf_evsel__newtp("syscalls", direction);
185
186         if (evsel) {
187                 if (perf_evsel__init_syscall_tp(evsel, handler))
188                         goto out_delete;
189         }
190
191         return evsel;
192
193 out_delete:
194         perf_evsel__delete_priv(evsel);
195         return NULL;
196 }
197
198 #define perf_evsel__sc_tp_uint(evsel, name, sample) \
199         ({ struct syscall_tp *fields = evsel->priv; \
200            fields->name.integer(&fields->name, sample); })
201
202 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \
203         ({ struct syscall_tp *fields = evsel->priv; \
204            fields->name.pointer(&fields->name, sample); })
205
206 static int perf_evlist__add_syscall_newtp(struct perf_evlist *evlist,
207                                           void *sys_enter_handler,
208                                           void *sys_exit_handler)
209 {
210         int ret = -1;
211         struct perf_evsel *sys_enter, *sys_exit;
212
213         sys_enter = perf_evsel__syscall_newtp("sys_enter", sys_enter_handler);
214         if (sys_enter == NULL)
215                 goto out;
216
217         if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
218                 goto out_delete_sys_enter;
219
220         sys_exit = perf_evsel__syscall_newtp("sys_exit", sys_exit_handler);
221         if (sys_exit == NULL)
222                 goto out_delete_sys_enter;
223
224         if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
225                 goto out_delete_sys_exit;
226
227         perf_evlist__add(evlist, sys_enter);
228         perf_evlist__add(evlist, sys_exit);
229
230         ret = 0;
231 out:
232         return ret;
233
234 out_delete_sys_exit:
235         perf_evsel__delete_priv(sys_exit);
236 out_delete_sys_enter:
237         perf_evsel__delete_priv(sys_enter);
238         goto out;
239 }
240
241
242 struct syscall_arg {
243         unsigned long val;
244         struct thread *thread;
245         struct trace  *trace;
246         void          *parm;
247         u8            idx;
248         u8            mask;
249 };
250
251 struct strarray {
252         int         offset;
253         int         nr_entries;
254         const char **entries;
255 };
256
257 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
258         .nr_entries = ARRAY_SIZE(array), \
259         .entries = array, \
260 }
261
262 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
263         .offset     = off, \
264         .nr_entries = ARRAY_SIZE(array), \
265         .entries = array, \
266 }
267
268 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
269                                                 const char *intfmt,
270                                                 struct syscall_arg *arg)
271 {
272         struct strarray *sa = arg->parm;
273         int idx = arg->val - sa->offset;
274
275         if (idx < 0 || idx >= sa->nr_entries)
276                 return scnprintf(bf, size, intfmt, arg->val);
277
278         return scnprintf(bf, size, "%s", sa->entries[idx]);
279 }
280
281 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
282                                               struct syscall_arg *arg)
283 {
284         return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
285 }
286
287 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
288
289 #if defined(__i386__) || defined(__x86_64__)
290 /*
291  * FIXME: Make this available to all arches as soon as the ioctl beautifier
292  *        gets rewritten to support all arches.
293  */
294 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
295                                                  struct syscall_arg *arg)
296 {
297         return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
298 }
299
300 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
301 #endif /* defined(__i386__) || defined(__x86_64__) */
302
303 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
304                                         struct syscall_arg *arg);
305
306 #define SCA_FD syscall_arg__scnprintf_fd
307
308 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
309                                            struct syscall_arg *arg)
310 {
311         int fd = arg->val;
312
313         if (fd == AT_FDCWD)
314                 return scnprintf(bf, size, "CWD");
315
316         return syscall_arg__scnprintf_fd(bf, size, arg);
317 }
318
319 #define SCA_FDAT syscall_arg__scnprintf_fd_at
320
321 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
322                                               struct syscall_arg *arg);
323
324 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
325
326 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
327                                          struct syscall_arg *arg)
328 {
329         return scnprintf(bf, size, "%#lx", arg->val);
330 }
331
332 #define SCA_HEX syscall_arg__scnprintf_hex
333
334 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
335                                                struct syscall_arg *arg)
336 {
337         int printed = 0, prot = arg->val;
338
339         if (prot == PROT_NONE)
340                 return scnprintf(bf, size, "NONE");
341 #define P_MMAP_PROT(n) \
342         if (prot & PROT_##n) { \
343                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
344                 prot &= ~PROT_##n; \
345         }
346
347         P_MMAP_PROT(EXEC);
348         P_MMAP_PROT(READ);
349         P_MMAP_PROT(WRITE);
350 #ifdef PROT_SEM
351         P_MMAP_PROT(SEM);
352 #endif
353         P_MMAP_PROT(GROWSDOWN);
354         P_MMAP_PROT(GROWSUP);
355 #undef P_MMAP_PROT
356
357         if (prot)
358                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
359
360         return printed;
361 }
362
363 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
364
365 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
366                                                 struct syscall_arg *arg)
367 {
368         int printed = 0, flags = arg->val;
369
370 #define P_MMAP_FLAG(n) \
371         if (flags & MAP_##n) { \
372                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
373                 flags &= ~MAP_##n; \
374         }
375
376         P_MMAP_FLAG(SHARED);
377         P_MMAP_FLAG(PRIVATE);
378 #ifdef MAP_32BIT
379         P_MMAP_FLAG(32BIT);
380 #endif
381         P_MMAP_FLAG(ANONYMOUS);
382         P_MMAP_FLAG(DENYWRITE);
383         P_MMAP_FLAG(EXECUTABLE);
384         P_MMAP_FLAG(FILE);
385         P_MMAP_FLAG(FIXED);
386         P_MMAP_FLAG(GROWSDOWN);
387 #ifdef MAP_HUGETLB
388         P_MMAP_FLAG(HUGETLB);
389 #endif
390         P_MMAP_FLAG(LOCKED);
391         P_MMAP_FLAG(NONBLOCK);
392         P_MMAP_FLAG(NORESERVE);
393         P_MMAP_FLAG(POPULATE);
394         P_MMAP_FLAG(STACK);
395 #ifdef MAP_UNINITIALIZED
396         P_MMAP_FLAG(UNINITIALIZED);
397 #endif
398 #undef P_MMAP_FLAG
399
400         if (flags)
401                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
402
403         return printed;
404 }
405
406 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
407
408 static size_t syscall_arg__scnprintf_mremap_flags(char *bf, size_t size,
409                                                   struct syscall_arg *arg)
410 {
411         int printed = 0, flags = arg->val;
412
413 #define P_MREMAP_FLAG(n) \
414         if (flags & MREMAP_##n) { \
415                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
416                 flags &= ~MREMAP_##n; \
417         }
418
419         P_MREMAP_FLAG(MAYMOVE);
420 #ifdef MREMAP_FIXED
421         P_MREMAP_FLAG(FIXED);
422 #endif
423 #undef P_MREMAP_FLAG
424
425         if (flags)
426                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
427
428         return printed;
429 }
430
431 #define SCA_MREMAP_FLAGS syscall_arg__scnprintf_mremap_flags
432
433 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
434                                                       struct syscall_arg *arg)
435 {
436         int behavior = arg->val;
437
438         switch (behavior) {
439 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
440         P_MADV_BHV(NORMAL);
441         P_MADV_BHV(RANDOM);
442         P_MADV_BHV(SEQUENTIAL);
443         P_MADV_BHV(WILLNEED);
444         P_MADV_BHV(DONTNEED);
445         P_MADV_BHV(REMOVE);
446         P_MADV_BHV(DONTFORK);
447         P_MADV_BHV(DOFORK);
448         P_MADV_BHV(HWPOISON);
449 #ifdef MADV_SOFT_OFFLINE
450         P_MADV_BHV(SOFT_OFFLINE);
451 #endif
452         P_MADV_BHV(MERGEABLE);
453         P_MADV_BHV(UNMERGEABLE);
454 #ifdef MADV_HUGEPAGE
455         P_MADV_BHV(HUGEPAGE);
456 #endif
457 #ifdef MADV_NOHUGEPAGE
458         P_MADV_BHV(NOHUGEPAGE);
459 #endif
460 #ifdef MADV_DONTDUMP
461         P_MADV_BHV(DONTDUMP);
462 #endif
463 #ifdef MADV_DODUMP
464         P_MADV_BHV(DODUMP);
465 #endif
466 #undef P_MADV_PHV
467         default: break;
468         }
469
470         return scnprintf(bf, size, "%#x", behavior);
471 }
472
473 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
474
475 static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
476                                            struct syscall_arg *arg)
477 {
478         int printed = 0, op = arg->val;
479
480         if (op == 0)
481                 return scnprintf(bf, size, "NONE");
482 #define P_CMD(cmd) \
483         if ((op & LOCK_##cmd) == LOCK_##cmd) { \
484                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
485                 op &= ~LOCK_##cmd; \
486         }
487
488         P_CMD(SH);
489         P_CMD(EX);
490         P_CMD(NB);
491         P_CMD(UN);
492         P_CMD(MAND);
493         P_CMD(RW);
494         P_CMD(READ);
495         P_CMD(WRITE);
496 #undef P_OP
497
498         if (op)
499                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
500
501         return printed;
502 }
503
504 #define SCA_FLOCK syscall_arg__scnprintf_flock
505
506 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
507 {
508         enum syscall_futex_args {
509                 SCF_UADDR   = (1 << 0),
510                 SCF_OP      = (1 << 1),
511                 SCF_VAL     = (1 << 2),
512                 SCF_TIMEOUT = (1 << 3),
513                 SCF_UADDR2  = (1 << 4),
514                 SCF_VAL3    = (1 << 5),
515         };
516         int op = arg->val;
517         int cmd = op & FUTEX_CMD_MASK;
518         size_t printed = 0;
519
520         switch (cmd) {
521 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
522         P_FUTEX_OP(WAIT);           arg->mask |= SCF_VAL3|SCF_UADDR2;             break;
523         P_FUTEX_OP(WAKE);           arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
524         P_FUTEX_OP(FD);             arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
525         P_FUTEX_OP(REQUEUE);        arg->mask |= SCF_VAL3|SCF_TIMEOUT;            break;
526         P_FUTEX_OP(CMP_REQUEUE);    arg->mask |= SCF_TIMEOUT;                     break;
527         P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT;                     break;
528         P_FUTEX_OP(WAKE_OP);                                                      break;
529         P_FUTEX_OP(LOCK_PI);        arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
530         P_FUTEX_OP(UNLOCK_PI);      arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
531         P_FUTEX_OP(TRYLOCK_PI);     arg->mask |= SCF_VAL3|SCF_UADDR2;             break;
532         P_FUTEX_OP(WAIT_BITSET);    arg->mask |= SCF_UADDR2;                      break;
533         P_FUTEX_OP(WAKE_BITSET);    arg->mask |= SCF_UADDR2;                      break;
534         P_FUTEX_OP(WAIT_REQUEUE_PI);                                              break;
535         default: printed = scnprintf(bf, size, "%#x", cmd);                       break;
536         }
537
538         if (op & FUTEX_PRIVATE_FLAG)
539                 printed += scnprintf(bf + printed, size - printed, "|PRIV");
540
541         if (op & FUTEX_CLOCK_REALTIME)
542                 printed += scnprintf(bf + printed, size - printed, "|CLKRT");
543
544         return printed;
545 }
546
547 #define SCA_FUTEX_OP  syscall_arg__scnprintf_futex_op
548
549 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
550 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
551
552 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
553 static DEFINE_STRARRAY(itimers);
554
555 static const char *whences[] = { "SET", "CUR", "END",
556 #ifdef SEEK_DATA
557 "DATA",
558 #endif
559 #ifdef SEEK_HOLE
560 "HOLE",
561 #endif
562 };
563 static DEFINE_STRARRAY(whences);
564
565 static const char *fcntl_cmds[] = {
566         "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
567         "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
568         "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
569         "F_GETOWNER_UIDS",
570 };
571 static DEFINE_STRARRAY(fcntl_cmds);
572
573 static const char *rlimit_resources[] = {
574         "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
575         "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
576         "RTTIME",
577 };
578 static DEFINE_STRARRAY(rlimit_resources);
579
580 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
581 static DEFINE_STRARRAY(sighow);
582
583 static const char *clockid[] = {
584         "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
585         "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE",
586 };
587 static DEFINE_STRARRAY(clockid);
588
589 static const char *socket_families[] = {
590         "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
591         "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
592         "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
593         "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
594         "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
595         "ALG", "NFC", "VSOCK",
596 };
597 static DEFINE_STRARRAY(socket_families);
598
599 #ifndef SOCK_TYPE_MASK
600 #define SOCK_TYPE_MASK 0xf
601 #endif
602
603 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
604                                                       struct syscall_arg *arg)
605 {
606         size_t printed;
607         int type = arg->val,
608             flags = type & ~SOCK_TYPE_MASK;
609
610         type &= SOCK_TYPE_MASK;
611         /*
612          * Can't use a strarray, MIPS may override for ABI reasons.
613          */
614         switch (type) {
615 #define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
616         P_SK_TYPE(STREAM);
617         P_SK_TYPE(DGRAM);
618         P_SK_TYPE(RAW);
619         P_SK_TYPE(RDM);
620         P_SK_TYPE(SEQPACKET);
621         P_SK_TYPE(DCCP);
622         P_SK_TYPE(PACKET);
623 #undef P_SK_TYPE
624         default:
625                 printed = scnprintf(bf, size, "%#x", type);
626         }
627
628 #define P_SK_FLAG(n) \
629         if (flags & SOCK_##n) { \
630                 printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
631                 flags &= ~SOCK_##n; \
632         }
633
634         P_SK_FLAG(CLOEXEC);
635         P_SK_FLAG(NONBLOCK);
636 #undef P_SK_FLAG
637
638         if (flags)
639                 printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
640
641         return printed;
642 }
643
644 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
645
646 #ifndef MSG_PROBE
647 #define MSG_PROBE            0x10
648 #endif
649 #ifndef MSG_WAITFORONE
650 #define MSG_WAITFORONE  0x10000
651 #endif
652 #ifndef MSG_SENDPAGE_NOTLAST
653 #define MSG_SENDPAGE_NOTLAST 0x20000
654 #endif
655 #ifndef MSG_FASTOPEN
656 #define MSG_FASTOPEN         0x20000000
657 #endif
658
659 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
660                                                struct syscall_arg *arg)
661 {
662         int printed = 0, flags = arg->val;
663
664         if (flags == 0)
665                 return scnprintf(bf, size, "NONE");
666 #define P_MSG_FLAG(n) \
667         if (flags & MSG_##n) { \
668                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
669                 flags &= ~MSG_##n; \
670         }
671
672         P_MSG_FLAG(OOB);
673         P_MSG_FLAG(PEEK);
674         P_MSG_FLAG(DONTROUTE);
675         P_MSG_FLAG(TRYHARD);
676         P_MSG_FLAG(CTRUNC);
677         P_MSG_FLAG(PROBE);
678         P_MSG_FLAG(TRUNC);
679         P_MSG_FLAG(DONTWAIT);
680         P_MSG_FLAG(EOR);
681         P_MSG_FLAG(WAITALL);
682         P_MSG_FLAG(FIN);
683         P_MSG_FLAG(SYN);
684         P_MSG_FLAG(CONFIRM);
685         P_MSG_FLAG(RST);
686         P_MSG_FLAG(ERRQUEUE);
687         P_MSG_FLAG(NOSIGNAL);
688         P_MSG_FLAG(MORE);
689         P_MSG_FLAG(WAITFORONE);
690         P_MSG_FLAG(SENDPAGE_NOTLAST);
691         P_MSG_FLAG(FASTOPEN);
692         P_MSG_FLAG(CMSG_CLOEXEC);
693 #undef P_MSG_FLAG
694
695         if (flags)
696                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
697
698         return printed;
699 }
700
701 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
702
703 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
704                                                  struct syscall_arg *arg)
705 {
706         size_t printed = 0;
707         int mode = arg->val;
708
709         if (mode == F_OK) /* 0 */
710                 return scnprintf(bf, size, "F");
711 #define P_MODE(n) \
712         if (mode & n##_OK) { \
713                 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
714                 mode &= ~n##_OK; \
715         }
716
717         P_MODE(R);
718         P_MODE(W);
719         P_MODE(X);
720 #undef P_MODE
721
722         if (mode)
723                 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
724
725         return printed;
726 }
727
728 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
729
730 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
731                                                struct syscall_arg *arg)
732 {
733         int printed = 0, flags = arg->val;
734
735         if (!(flags & O_CREAT))
736                 arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
737
738         if (flags == 0)
739                 return scnprintf(bf, size, "RDONLY");
740 #define P_FLAG(n) \
741         if (flags & O_##n) { \
742                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
743                 flags &= ~O_##n; \
744         }
745
746         P_FLAG(APPEND);
747         P_FLAG(ASYNC);
748         P_FLAG(CLOEXEC);
749         P_FLAG(CREAT);
750         P_FLAG(DIRECT);
751         P_FLAG(DIRECTORY);
752         P_FLAG(EXCL);
753         P_FLAG(LARGEFILE);
754         P_FLAG(NOATIME);
755         P_FLAG(NOCTTY);
756 #ifdef O_NONBLOCK
757         P_FLAG(NONBLOCK);
758 #elif O_NDELAY
759         P_FLAG(NDELAY);
760 #endif
761 #ifdef O_PATH
762         P_FLAG(PATH);
763 #endif
764         P_FLAG(RDWR);
765 #ifdef O_DSYNC
766         if ((flags & O_SYNC) == O_SYNC)
767                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
768         else {
769                 P_FLAG(DSYNC);
770         }
771 #else
772         P_FLAG(SYNC);
773 #endif
774         P_FLAG(TRUNC);
775         P_FLAG(WRONLY);
776 #undef P_FLAG
777
778         if (flags)
779                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
780
781         return printed;
782 }
783
784 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
785
786 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
787                                                    struct syscall_arg *arg)
788 {
789         int printed = 0, flags = arg->val;
790
791         if (flags == 0)
792                 return scnprintf(bf, size, "NONE");
793 #define P_FLAG(n) \
794         if (flags & EFD_##n) { \
795                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
796                 flags &= ~EFD_##n; \
797         }
798
799         P_FLAG(SEMAPHORE);
800         P_FLAG(CLOEXEC);
801         P_FLAG(NONBLOCK);
802 #undef P_FLAG
803
804         if (flags)
805                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
806
807         return printed;
808 }
809
810 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
811
812 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
813                                                 struct syscall_arg *arg)
814 {
815         int printed = 0, flags = arg->val;
816
817 #define P_FLAG(n) \
818         if (flags & O_##n) { \
819                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
820                 flags &= ~O_##n; \
821         }
822
823         P_FLAG(CLOEXEC);
824         P_FLAG(NONBLOCK);
825 #undef P_FLAG
826
827         if (flags)
828                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
829
830         return printed;
831 }
832
833 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
834
835 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
836 {
837         int sig = arg->val;
838
839         switch (sig) {
840 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
841         P_SIGNUM(HUP);
842         P_SIGNUM(INT);
843         P_SIGNUM(QUIT);
844         P_SIGNUM(ILL);
845         P_SIGNUM(TRAP);
846         P_SIGNUM(ABRT);
847         P_SIGNUM(BUS);
848         P_SIGNUM(FPE);
849         P_SIGNUM(KILL);
850         P_SIGNUM(USR1);
851         P_SIGNUM(SEGV);
852         P_SIGNUM(USR2);
853         P_SIGNUM(PIPE);
854         P_SIGNUM(ALRM);
855         P_SIGNUM(TERM);
856         P_SIGNUM(CHLD);
857         P_SIGNUM(CONT);
858         P_SIGNUM(STOP);
859         P_SIGNUM(TSTP);
860         P_SIGNUM(TTIN);
861         P_SIGNUM(TTOU);
862         P_SIGNUM(URG);
863         P_SIGNUM(XCPU);
864         P_SIGNUM(XFSZ);
865         P_SIGNUM(VTALRM);
866         P_SIGNUM(PROF);
867         P_SIGNUM(WINCH);
868         P_SIGNUM(IO);
869         P_SIGNUM(PWR);
870         P_SIGNUM(SYS);
871 #ifdef SIGEMT
872         P_SIGNUM(EMT);
873 #endif
874 #ifdef SIGSTKFLT
875         P_SIGNUM(STKFLT);
876 #endif
877 #ifdef SIGSWI
878         P_SIGNUM(SWI);
879 #endif
880         default: break;
881         }
882
883         return scnprintf(bf, size, "%#x", sig);
884 }
885
886 #define SCA_SIGNUM syscall_arg__scnprintf_signum
887
888 #if defined(__i386__) || defined(__x86_64__)
889 /*
890  * FIXME: Make this available to all arches.
891  */
892 #define TCGETS          0x5401
893
894 static const char *tioctls[] = {
895         "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
896         "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
897         "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
898         "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
899         "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
900         "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
901         "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
902         "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
903         "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
904         "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
905         "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
906         [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
907         "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
908         "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
909         "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
910 };
911
912 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
913 #endif /* defined(__i386__) || defined(__x86_64__) */
914
915 #define STRARRAY(arg, name, array) \
916           .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
917           .arg_parm      = { [arg] = &strarray__##array, }
918
919 static struct syscall_fmt {
920         const char *name;
921         const char *alias;
922         size_t     (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
923         void       *arg_parm[6];
924         bool       errmsg;
925         bool       timeout;
926         bool       hexret;
927 } syscall_fmts[] = {
928         { .name     = "access",     .errmsg = true,
929           .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, },
930         { .name     = "arch_prctl", .errmsg = true, .alias = "prctl", },
931         { .name     = "brk",        .hexret = true,
932           .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
933         { .name     = "clock_gettime",  .errmsg = true, STRARRAY(0, clk_id, clockid), },
934         { .name     = "close",      .errmsg = true,
935           .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, },
936         { .name     = "connect",    .errmsg = true, },
937         { .name     = "dup",        .errmsg = true,
938           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
939         { .name     = "dup2",       .errmsg = true,
940           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
941         { .name     = "dup3",       .errmsg = true,
942           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
943         { .name     = "epoll_ctl",  .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
944         { .name     = "eventfd2",   .errmsg = true,
945           .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
946         { .name     = "faccessat",  .errmsg = true,
947           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
948         { .name     = "fadvise64",  .errmsg = true,
949           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
950         { .name     = "fallocate",  .errmsg = true,
951           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
952         { .name     = "fchdir",     .errmsg = true,
953           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
954         { .name     = "fchmod",     .errmsg = true,
955           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
956         { .name     = "fchmodat",   .errmsg = true,
957           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
958         { .name     = "fchown",     .errmsg = true,
959           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
960         { .name     = "fchownat",   .errmsg = true,
961           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
962         { .name     = "fcntl",      .errmsg = true,
963           .arg_scnprintf = { [0] = SCA_FD, /* fd */
964                              [1] = SCA_STRARRAY, /* cmd */ },
965           .arg_parm      = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
966         { .name     = "fdatasync",  .errmsg = true,
967           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
968         { .name     = "flock",      .errmsg = true,
969           .arg_scnprintf = { [0] = SCA_FD, /* fd */
970                              [1] = SCA_FLOCK, /* cmd */ }, },
971         { .name     = "fsetxattr",  .errmsg = true,
972           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
973         { .name     = "fstat",      .errmsg = true, .alias = "newfstat",
974           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
975         { .name     = "fstatat",    .errmsg = true, .alias = "newfstatat",
976           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
977         { .name     = "fstatfs",    .errmsg = true,
978           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
979         { .name     = "fsync",    .errmsg = true,
980           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
981         { .name     = "ftruncate", .errmsg = true,
982           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
983         { .name     = "futex",      .errmsg = true,
984           .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
985         { .name     = "futimesat", .errmsg = true,
986           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
987         { .name     = "getdents",   .errmsg = true,
988           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
989         { .name     = "getdents64", .errmsg = true,
990           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
991         { .name     = "getitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
992         { .name     = "getrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
993         { .name     = "ioctl",      .errmsg = true,
994           .arg_scnprintf = { [0] = SCA_FD, /* fd */
995 #if defined(__i386__) || defined(__x86_64__)
996 /*
997  * FIXME: Make this available to all arches.
998  */
999                              [1] = SCA_STRHEXARRAY, /* cmd */
1000                              [2] = SCA_HEX, /* arg */ },
1001           .arg_parm      = { [1] = &strarray__tioctls, /* cmd */ }, },
1002 #else
1003                              [2] = SCA_HEX, /* arg */ }, },
1004 #endif
1005         { .name     = "kill",       .errmsg = true,
1006           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1007         { .name     = "linkat",     .errmsg = true,
1008           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1009         { .name     = "lseek",      .errmsg = true,
1010           .arg_scnprintf = { [0] = SCA_FD, /* fd */
1011                              [2] = SCA_STRARRAY, /* whence */ },
1012           .arg_parm      = { [2] = &strarray__whences, /* whence */ }, },
1013         { .name     = "lstat",      .errmsg = true, .alias = "newlstat", },
1014         { .name     = "madvise",    .errmsg = true,
1015           .arg_scnprintf = { [0] = SCA_HEX,      /* start */
1016                              [2] = SCA_MADV_BHV, /* behavior */ }, },
1017         { .name     = "mkdirat",    .errmsg = true,
1018           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1019         { .name     = "mknodat",    .errmsg = true,
1020           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1021         { .name     = "mlock",      .errmsg = true,
1022           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1023         { .name     = "mlockall",   .errmsg = true,
1024           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1025         { .name     = "mmap",       .hexret = true,
1026           .arg_scnprintf = { [0] = SCA_HEX,       /* addr */
1027                              [2] = SCA_MMAP_PROT, /* prot */
1028                              [3] = SCA_MMAP_FLAGS, /* flags */
1029                              [4] = SCA_FD,        /* fd */ }, },
1030         { .name     = "mprotect",   .errmsg = true,
1031           .arg_scnprintf = { [0] = SCA_HEX, /* start */
1032                              [2] = SCA_MMAP_PROT, /* prot */ }, },
1033         { .name     = "mremap",     .hexret = true,
1034           .arg_scnprintf = { [0] = SCA_HEX, /* addr */
1035                              [3] = SCA_MREMAP_FLAGS, /* flags */
1036                              [4] = SCA_HEX, /* new_addr */ }, },
1037         { .name     = "munlock",    .errmsg = true,
1038           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1039         { .name     = "munmap",     .errmsg = true,
1040           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1041         { .name     = "name_to_handle_at", .errmsg = true,
1042           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1043         { .name     = "newfstatat", .errmsg = true,
1044           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1045         { .name     = "open",       .errmsg = true,
1046           .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
1047         { .name     = "open_by_handle_at", .errmsg = true,
1048           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1049                              [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1050         { .name     = "openat",     .errmsg = true,
1051           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1052                              [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1053         { .name     = "pipe2",      .errmsg = true,
1054           .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
1055         { .name     = "poll",       .errmsg = true, .timeout = true, },
1056         { .name     = "ppoll",      .errmsg = true, .timeout = true, },
1057         { .name     = "pread",      .errmsg = true, .alias = "pread64",
1058           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1059         { .name     = "preadv",     .errmsg = true, .alias = "pread",
1060           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1061         { .name     = "prlimit64",  .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
1062         { .name     = "pwrite",     .errmsg = true, .alias = "pwrite64",
1063           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1064         { .name     = "pwritev",    .errmsg = true,
1065           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1066         { .name     = "read",       .errmsg = true,
1067           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1068         { .name     = "readlinkat", .errmsg = true,
1069           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1070         { .name     = "readv",      .errmsg = true,
1071           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1072         { .name     = "recvfrom",   .errmsg = true,
1073           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1074         { .name     = "recvmmsg",   .errmsg = true,
1075           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1076         { .name     = "recvmsg",    .errmsg = true,
1077           .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1078         { .name     = "renameat",   .errmsg = true,
1079           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1080         { .name     = "rt_sigaction", .errmsg = true,
1081           .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
1082         { .name     = "rt_sigprocmask",  .errmsg = true, STRARRAY(0, how, sighow), },
1083         { .name     = "rt_sigqueueinfo", .errmsg = true,
1084           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1085         { .name     = "rt_tgsigqueueinfo", .errmsg = true,
1086           .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1087         { .name     = "select",     .errmsg = true, .timeout = true, },
1088         { .name     = "sendmmsg",    .errmsg = true,
1089           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1090         { .name     = "sendmsg",    .errmsg = true,
1091           .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1092         { .name     = "sendto",     .errmsg = true,
1093           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1094         { .name     = "setitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
1095         { .name     = "setrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1096         { .name     = "shutdown",   .errmsg = true,
1097           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1098         { .name     = "socket",     .errmsg = true,
1099           .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1100                              [1] = SCA_SK_TYPE, /* type */ },
1101           .arg_parm      = { [0] = &strarray__socket_families, /* family */ }, },
1102         { .name     = "socketpair", .errmsg = true,
1103           .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1104                              [1] = SCA_SK_TYPE, /* type */ },
1105           .arg_parm      = { [0] = &strarray__socket_families, /* family */ }, },
1106         { .name     = "stat",       .errmsg = true, .alias = "newstat", },
1107         { .name     = "symlinkat",  .errmsg = true,
1108           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1109         { .name     = "tgkill",     .errmsg = true,
1110           .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1111         { .name     = "tkill",      .errmsg = true,
1112           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1113         { .name     = "uname",      .errmsg = true, .alias = "newuname", },
1114         { .name     = "unlinkat",   .errmsg = true,
1115           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1116         { .name     = "utimensat",  .errmsg = true,
1117           .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ }, },
1118         { .name     = "write",      .errmsg = true,
1119           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1120         { .name     = "writev",     .errmsg = true,
1121           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1122 };
1123
1124 static int syscall_fmt__cmp(const void *name, const void *fmtp)
1125 {
1126         const struct syscall_fmt *fmt = fmtp;
1127         return strcmp(name, fmt->name);
1128 }
1129
1130 static struct syscall_fmt *syscall_fmt__find(const char *name)
1131 {
1132         const int nmemb = ARRAY_SIZE(syscall_fmts);
1133         return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
1134 }
1135
1136 struct syscall {
1137         struct event_format *tp_format;
1138         int                 nr_args;
1139         struct format_field *args;
1140         const char          *name;
1141         bool                filtered;
1142         bool                is_exit;
1143         struct syscall_fmt  *fmt;
1144         size_t              (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
1145         void                **arg_parm;
1146 };
1147
1148 static size_t fprintf_duration(unsigned long t, FILE *fp)
1149 {
1150         double duration = (double)t / NSEC_PER_MSEC;
1151         size_t printed = fprintf(fp, "(");
1152
1153         if (duration >= 1.0)
1154                 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
1155         else if (duration >= 0.01)
1156                 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
1157         else
1158                 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
1159         return printed + fprintf(fp, "): ");
1160 }
1161
1162 struct thread_trace {
1163         u64               entry_time;
1164         u64               exit_time;
1165         bool              entry_pending;
1166         unsigned long     nr_events;
1167         unsigned long     pfmaj, pfmin;
1168         char              *entry_str;
1169         double            runtime_ms;
1170         struct {
1171                 int       max;
1172                 char      **table;
1173         } paths;
1174
1175         struct intlist *syscall_stats;
1176 };
1177
1178 static struct thread_trace *thread_trace__new(void)
1179 {
1180         struct thread_trace *ttrace =  zalloc(sizeof(struct thread_trace));
1181
1182         if (ttrace)
1183                 ttrace->paths.max = -1;
1184
1185         ttrace->syscall_stats = intlist__new(NULL);
1186
1187         return ttrace;
1188 }
1189
1190 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
1191 {
1192         struct thread_trace *ttrace;
1193
1194         if (thread == NULL)
1195                 goto fail;
1196
1197         if (thread__priv(thread) == NULL)
1198                 thread__set_priv(thread, thread_trace__new());
1199
1200         if (thread__priv(thread) == NULL)
1201                 goto fail;
1202
1203         ttrace = thread__priv(thread);
1204         ++ttrace->nr_events;
1205
1206         return ttrace;
1207 fail:
1208         color_fprintf(fp, PERF_COLOR_RED,
1209                       "WARNING: not enough memory, dropping samples!\n");
1210         return NULL;
1211 }
1212
1213 #define TRACE_PFMAJ             (1 << 0)
1214 #define TRACE_PFMIN             (1 << 1)
1215
1216 struct trace {
1217         struct perf_tool        tool;
1218         struct {
1219                 int             machine;
1220                 int             open_id;
1221         }                       audit;
1222         struct {
1223                 int             max;
1224                 struct syscall  *table;
1225         } syscalls;
1226         struct record_opts      opts;
1227         struct perf_evlist      *evlist;
1228         struct machine          *host;
1229         struct thread           *current;
1230         u64                     base_time;
1231         FILE                    *output;
1232         unsigned long           nr_events;
1233         struct strlist          *ev_qualifier;
1234         const char              *last_vfs_getname;
1235         struct intlist          *tid_list;
1236         struct intlist          *pid_list;
1237         struct {
1238                 size_t          nr;
1239                 pid_t           *entries;
1240         }                       filter_pids;
1241         double                  duration_filter;
1242         double                  runtime_ms;
1243         struct {
1244                 u64             vfs_getname,
1245                                 proc_getname;
1246         } stats;
1247         bool                    not_ev_qualifier;
1248         bool                    live;
1249         bool                    full_time;
1250         bool                    sched;
1251         bool                    multiple_threads;
1252         bool                    summary;
1253         bool                    summary_only;
1254         bool                    show_comm;
1255         bool                    show_tool_stats;
1256         bool                    trace_syscalls;
1257         bool                    force;
1258         int                     trace_pgfaults;
1259 };
1260
1261 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
1262 {
1263         struct thread_trace *ttrace = thread__priv(thread);
1264
1265         if (fd > ttrace->paths.max) {
1266                 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
1267
1268                 if (npath == NULL)
1269                         return -1;
1270
1271                 if (ttrace->paths.max != -1) {
1272                         memset(npath + ttrace->paths.max + 1, 0,
1273                                (fd - ttrace->paths.max) * sizeof(char *));
1274                 } else {
1275                         memset(npath, 0, (fd + 1) * sizeof(char *));
1276                 }
1277
1278                 ttrace->paths.table = npath;
1279                 ttrace->paths.max   = fd;
1280         }
1281
1282         ttrace->paths.table[fd] = strdup(pathname);
1283
1284         return ttrace->paths.table[fd] != NULL ? 0 : -1;
1285 }
1286
1287 static int thread__read_fd_path(struct thread *thread, int fd)
1288 {
1289         char linkname[PATH_MAX], pathname[PATH_MAX];
1290         struct stat st;
1291         int ret;
1292
1293         if (thread->pid_ == thread->tid) {
1294                 scnprintf(linkname, sizeof(linkname),
1295                           "/proc/%d/fd/%d", thread->pid_, fd);
1296         } else {
1297                 scnprintf(linkname, sizeof(linkname),
1298                           "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1299         }
1300
1301         if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1302                 return -1;
1303
1304         ret = readlink(linkname, pathname, sizeof(pathname));
1305
1306         if (ret < 0 || ret > st.st_size)
1307                 return -1;
1308
1309         pathname[ret] = '\0';
1310         return trace__set_fd_pathname(thread, fd, pathname);
1311 }
1312
1313 static const char *thread__fd_path(struct thread *thread, int fd,
1314                                    struct trace *trace)
1315 {
1316         struct thread_trace *ttrace = thread__priv(thread);
1317
1318         if (ttrace == NULL)
1319                 return NULL;
1320
1321         if (fd < 0)
1322                 return NULL;
1323
1324         if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) {
1325                 if (!trace->live)
1326                         return NULL;
1327                 ++trace->stats.proc_getname;
1328                 if (thread__read_fd_path(thread, fd))
1329                         return NULL;
1330         }
1331
1332         return ttrace->paths.table[fd];
1333 }
1334
1335 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
1336                                         struct syscall_arg *arg)
1337 {
1338         int fd = arg->val;
1339         size_t printed = scnprintf(bf, size, "%d", fd);
1340         const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1341
1342         if (path)
1343                 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1344
1345         return printed;
1346 }
1347
1348 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1349                                               struct syscall_arg *arg)
1350 {
1351         int fd = arg->val;
1352         size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1353         struct thread_trace *ttrace = thread__priv(arg->thread);
1354
1355         if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
1356                 zfree(&ttrace->paths.table[fd]);
1357
1358         return printed;
1359 }
1360
1361 static bool trace__filter_duration(struct trace *trace, double t)
1362 {
1363         return t < (trace->duration_filter * NSEC_PER_MSEC);
1364 }
1365
1366 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1367 {
1368         double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1369
1370         return fprintf(fp, "%10.3f ", ts);
1371 }
1372
1373 static bool done = false;
1374 static bool interrupted = false;
1375
1376 static void sig_handler(int sig)
1377 {
1378         done = true;
1379         interrupted = sig == SIGINT;
1380 }
1381
1382 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1383                                         u64 duration, u64 tstamp, FILE *fp)
1384 {
1385         size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1386         printed += fprintf_duration(duration, fp);
1387
1388         if (trace->multiple_threads) {
1389                 if (trace->show_comm)
1390                         printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1391                 printed += fprintf(fp, "%d ", thread->tid);
1392         }
1393
1394         return printed;
1395 }
1396
1397 static int trace__process_event(struct trace *trace, struct machine *machine,
1398                                 union perf_event *event, struct perf_sample *sample)
1399 {
1400         int ret = 0;
1401
1402         switch (event->header.type) {
1403         case PERF_RECORD_LOST:
1404                 color_fprintf(trace->output, PERF_COLOR_RED,
1405                               "LOST %" PRIu64 " events!\n", event->lost.lost);
1406                 ret = machine__process_lost_event(machine, event, sample);
1407         default:
1408                 ret = machine__process_event(machine, event, sample);
1409                 break;
1410         }
1411
1412         return ret;
1413 }
1414
1415 static int trace__tool_process(struct perf_tool *tool,
1416                                union perf_event *event,
1417                                struct perf_sample *sample,
1418                                struct machine *machine)
1419 {
1420         struct trace *trace = container_of(tool, struct trace, tool);
1421         return trace__process_event(trace, machine, event, sample);
1422 }
1423
1424 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1425 {
1426         int err = symbol__init(NULL);
1427
1428         if (err)
1429                 return err;
1430
1431         trace->host = machine__new_host();
1432         if (trace->host == NULL)
1433                 return -ENOMEM;
1434
1435         err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1436                                             evlist->threads, trace__tool_process, false);
1437         if (err)
1438                 symbol__exit();
1439
1440         return err;
1441 }
1442
1443 static int syscall__set_arg_fmts(struct syscall *sc)
1444 {
1445         struct format_field *field;
1446         int idx = 0;
1447
1448         sc->arg_scnprintf = calloc(sc->nr_args, sizeof(void *));
1449         if (sc->arg_scnprintf == NULL)
1450                 return -1;
1451
1452         if (sc->fmt)
1453                 sc->arg_parm = sc->fmt->arg_parm;
1454
1455         for (field = sc->args; field; field = field->next) {
1456                 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1457                         sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1458                 else if (field->flags & FIELD_IS_POINTER)
1459                         sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1460                 ++idx;
1461         }
1462
1463         return 0;
1464 }
1465
1466 static int trace__read_syscall_info(struct trace *trace, int id)
1467 {
1468         char tp_name[128];
1469         struct syscall *sc;
1470         const char *name = audit_syscall_to_name(id, trace->audit.machine);
1471
1472         if (name == NULL)
1473                 return -1;
1474
1475         if (id > trace->syscalls.max) {
1476                 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1477
1478                 if (nsyscalls == NULL)
1479                         return -1;
1480
1481                 if (trace->syscalls.max != -1) {
1482                         memset(nsyscalls + trace->syscalls.max + 1, 0,
1483                                (id - trace->syscalls.max) * sizeof(*sc));
1484                 } else {
1485                         memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1486                 }
1487
1488                 trace->syscalls.table = nsyscalls;
1489                 trace->syscalls.max   = id;
1490         }
1491
1492         sc = trace->syscalls.table + id;
1493         sc->name = name;
1494
1495         if (trace->ev_qualifier) {
1496                 bool in = strlist__find(trace->ev_qualifier, name) != NULL;
1497
1498                 if (!(in ^ trace->not_ev_qualifier)) {
1499                         sc->filtered = true;
1500                         /*
1501                          * No need to do read tracepoint information since this will be
1502                          * filtered out.
1503                          */
1504                         return 0;
1505                 }
1506         }
1507
1508         sc->fmt  = syscall_fmt__find(sc->name);
1509
1510         snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1511         sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1512
1513         if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) {
1514                 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1515                 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1516         }
1517
1518         if (sc->tp_format == NULL)
1519                 return -1;
1520
1521         sc->args = sc->tp_format->format.fields;
1522         sc->nr_args = sc->tp_format->format.nr_fields;
1523         /* drop nr field - not relevant here; does not exist on older kernels */
1524         if (sc->args && strcmp(sc->args->name, "nr") == 0) {
1525                 sc->args = sc->args->next;
1526                 --sc->nr_args;
1527         }
1528
1529         sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
1530
1531         return syscall__set_arg_fmts(sc);
1532 }
1533
1534 /*
1535  * args is to be interpreted as a series of longs but we need to handle
1536  * 8-byte unaligned accesses. args points to raw_data within the event
1537  * and raw_data is guaranteed to be 8-byte unaligned because it is
1538  * preceded by raw_size which is a u32. So we need to copy args to a temp
1539  * variable to read it. Most notably this avoids extended load instructions
1540  * on unaligned addresses
1541  */
1542
1543 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1544                                       unsigned char *args, struct trace *trace,
1545                                       struct thread *thread)
1546 {
1547         size_t printed = 0;
1548         unsigned char *p;
1549         unsigned long val;
1550
1551         if (sc->args != NULL) {
1552                 struct format_field *field;
1553                 u8 bit = 1;
1554                 struct syscall_arg arg = {
1555                         .idx    = 0,
1556                         .mask   = 0,
1557                         .trace  = trace,
1558                         .thread = thread,
1559                 };
1560
1561                 for (field = sc->args; field;
1562                      field = field->next, ++arg.idx, bit <<= 1) {
1563                         if (arg.mask & bit)
1564                                 continue;
1565
1566                         /* special care for unaligned accesses */
1567                         p = args + sizeof(unsigned long) * arg.idx;
1568                         memcpy(&val, p, sizeof(val));
1569
1570                         /*
1571                          * Suppress this argument if its value is zero and
1572                          * and we don't have a string associated in an
1573                          * strarray for it.
1574                          */
1575                         if (val == 0 &&
1576                             !(sc->arg_scnprintf &&
1577                               sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1578                               sc->arg_parm[arg.idx]))
1579                                 continue;
1580
1581                         printed += scnprintf(bf + printed, size - printed,
1582                                              "%s%s: ", printed ? ", " : "", field->name);
1583                         if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1584                                 arg.val = val;
1585                                 if (sc->arg_parm)
1586                                         arg.parm = sc->arg_parm[arg.idx];
1587                                 printed += sc->arg_scnprintf[arg.idx](bf + printed,
1588                                                                       size - printed, &arg);
1589                         } else {
1590                                 printed += scnprintf(bf + printed, size - printed,
1591                                                      "%ld", val);
1592                         }
1593                 }
1594         } else {
1595                 int i = 0;
1596
1597                 while (i < 6) {
1598                         /* special care for unaligned accesses */
1599                         p = args + sizeof(unsigned long) * i;
1600                         memcpy(&val, p, sizeof(val));
1601                         printed += scnprintf(bf + printed, size - printed,
1602                                              "%sarg%d: %ld",
1603                                              printed ? ", " : "", i, val);
1604                         ++i;
1605                 }
1606         }
1607
1608         return printed;
1609 }
1610
1611 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1612                                   union perf_event *event,
1613                                   struct perf_sample *sample);
1614
1615 static struct syscall *trace__syscall_info(struct trace *trace,
1616                                            struct perf_evsel *evsel, int id)
1617 {
1618
1619         if (id < 0) {
1620
1621                 /*
1622                  * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1623                  * before that, leaving at a higher verbosity level till that is
1624                  * explained. Reproduced with plain ftrace with:
1625                  *
1626                  * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1627                  * grep "NR -1 " /t/trace_pipe
1628                  *
1629                  * After generating some load on the machine.
1630                  */
1631                 if (verbose > 1) {
1632                         static u64 n;
1633                         fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1634                                 id, perf_evsel__name(evsel), ++n);
1635                 }
1636                 return NULL;
1637         }
1638
1639         if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1640             trace__read_syscall_info(trace, id))
1641                 goto out_cant_read;
1642
1643         if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1644                 goto out_cant_read;
1645
1646         return &trace->syscalls.table[id];
1647
1648 out_cant_read:
1649         if (verbose) {
1650                 fprintf(trace->output, "Problems reading syscall %d", id);
1651                 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1652                         fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1653                 fputs(" information\n", trace->output);
1654         }
1655         return NULL;
1656 }
1657
1658 static void thread__update_stats(struct thread_trace *ttrace,
1659                                  int id, struct perf_sample *sample)
1660 {
1661         struct int_node *inode;
1662         struct stats *stats;
1663         u64 duration = 0;
1664
1665         inode = intlist__findnew(ttrace->syscall_stats, id);
1666         if (inode == NULL)
1667                 return;
1668
1669         stats = inode->priv;
1670         if (stats == NULL) {
1671                 stats = malloc(sizeof(struct stats));
1672                 if (stats == NULL)
1673                         return;
1674                 init_stats(stats);
1675                 inode->priv = stats;
1676         }
1677
1678         if (ttrace->entry_time && sample->time > ttrace->entry_time)
1679                 duration = sample->time - ttrace->entry_time;
1680
1681         update_stats(stats, duration);
1682 }
1683
1684 static int trace__printf_interrupted_entry(struct trace *trace, struct perf_sample *sample)
1685 {
1686         struct thread_trace *ttrace;
1687         u64 duration;
1688         size_t printed;
1689
1690         if (trace->current == NULL)
1691                 return 0;
1692
1693         ttrace = thread__priv(trace->current);
1694
1695         if (!ttrace->entry_pending)
1696                 return 0;
1697
1698         duration = sample->time - ttrace->entry_time;
1699
1700         printed  = trace__fprintf_entry_head(trace, trace->current, duration, sample->time, trace->output);
1701         printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str);
1702         ttrace->entry_pending = false;
1703
1704         return printed;
1705 }
1706
1707 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1708                             union perf_event *event __maybe_unused,
1709                             struct perf_sample *sample)
1710 {
1711         char *msg;
1712         void *args;
1713         size_t printed = 0;
1714         struct thread *thread;
1715         int id = perf_evsel__sc_tp_uint(evsel, id, sample);
1716         struct syscall *sc = trace__syscall_info(trace, evsel, id);
1717         struct thread_trace *ttrace;
1718
1719         if (sc == NULL)
1720                 return -1;
1721
1722         if (sc->filtered)
1723                 return 0;
1724
1725         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1726         ttrace = thread__trace(thread, trace->output);
1727         if (ttrace == NULL)
1728                 return -1;
1729
1730         args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1731
1732         if (ttrace->entry_str == NULL) {
1733                 ttrace->entry_str = malloc(1024);
1734                 if (!ttrace->entry_str)
1735                         return -1;
1736         }
1737
1738         if (!trace->summary_only)
1739                 trace__printf_interrupted_entry(trace, sample);
1740
1741         ttrace->entry_time = sample->time;
1742         msg = ttrace->entry_str;
1743         printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name);
1744
1745         printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed,
1746                                            args, trace, thread);
1747
1748         if (sc->is_exit) {
1749                 if (!trace->duration_filter && !trace->summary_only) {
1750                         trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1751                         fprintf(trace->output, "%-70s\n", ttrace->entry_str);
1752                 }
1753         } else
1754                 ttrace->entry_pending = true;
1755
1756         if (trace->current != thread) {
1757                 thread__put(trace->current);
1758                 trace->current = thread__get(thread);
1759         }
1760
1761         return 0;
1762 }
1763
1764 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1765                            union perf_event *event __maybe_unused,
1766                            struct perf_sample *sample)
1767 {
1768         long ret;
1769         u64 duration = 0;
1770         struct thread *thread;
1771         int id = perf_evsel__sc_tp_uint(evsel, id, sample);
1772         struct syscall *sc = trace__syscall_info(trace, evsel, id);
1773         struct thread_trace *ttrace;
1774
1775         if (sc == NULL)
1776                 return -1;
1777
1778         if (sc->filtered)
1779                 return 0;
1780
1781         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1782         ttrace = thread__trace(thread, trace->output);
1783         if (ttrace == NULL)
1784                 return -1;
1785
1786         if (trace->summary)
1787                 thread__update_stats(ttrace, id, sample);
1788
1789         ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
1790
1791         if (id == trace->audit.open_id && ret >= 0 && trace->last_vfs_getname) {
1792                 trace__set_fd_pathname(thread, ret, trace->last_vfs_getname);
1793                 trace->last_vfs_getname = NULL;
1794                 ++trace->stats.vfs_getname;
1795         }
1796
1797         ttrace->exit_time = sample->time;
1798
1799         if (ttrace->entry_time) {
1800                 duration = sample->time - ttrace->entry_time;
1801                 if (trace__filter_duration(trace, duration))
1802                         goto out;
1803         } else if (trace->duration_filter)
1804                 goto out;
1805
1806         if (trace->summary_only)
1807                 goto out;
1808
1809         trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
1810
1811         if (ttrace->entry_pending) {
1812                 fprintf(trace->output, "%-70s", ttrace->entry_str);
1813         } else {
1814                 fprintf(trace->output, " ... [");
1815                 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1816                 fprintf(trace->output, "]: %s()", sc->name);
1817         }
1818
1819         if (sc->fmt == NULL) {
1820 signed_print:
1821                 fprintf(trace->output, ") = %ld", ret);
1822         } else if (ret < 0 && sc->fmt->errmsg) {
1823                 char bf[STRERR_BUFSIZE];
1824                 const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
1825                            *e = audit_errno_to_name(-ret);
1826
1827                 fprintf(trace->output, ") = -1 %s %s", e, emsg);
1828         } else if (ret == 0 && sc->fmt->timeout)
1829                 fprintf(trace->output, ") = 0 Timeout");
1830         else if (sc->fmt->hexret)
1831                 fprintf(trace->output, ") = %#lx", ret);
1832         else
1833                 goto signed_print;
1834
1835         fputc('\n', trace->output);
1836 out:
1837         ttrace->entry_pending = false;
1838
1839         return 0;
1840 }
1841
1842 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
1843                               union perf_event *event __maybe_unused,
1844                               struct perf_sample *sample)
1845 {
1846         trace->last_vfs_getname = perf_evsel__rawptr(evsel, sample, "pathname");
1847         return 0;
1848 }
1849
1850 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
1851                                      union perf_event *event __maybe_unused,
1852                                      struct perf_sample *sample)
1853 {
1854         u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1855         double runtime_ms = (double)runtime / NSEC_PER_MSEC;
1856         struct thread *thread = machine__findnew_thread(trace->host,
1857                                                         sample->pid,
1858                                                         sample->tid);
1859         struct thread_trace *ttrace = thread__trace(thread, trace->output);
1860
1861         if (ttrace == NULL)
1862                 goto out_dump;
1863
1864         ttrace->runtime_ms += runtime_ms;
1865         trace->runtime_ms += runtime_ms;
1866         return 0;
1867
1868 out_dump:
1869         fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1870                evsel->name,
1871                perf_evsel__strval(evsel, sample, "comm"),
1872                (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1873                runtime,
1874                perf_evsel__intval(evsel, sample, "vruntime"));
1875         return 0;
1876 }
1877
1878 static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
1879                                 union perf_event *event __maybe_unused,
1880                                 struct perf_sample *sample)
1881 {
1882         trace__printf_interrupted_entry(trace, sample);
1883         trace__fprintf_tstamp(trace, sample->time, trace->output);
1884
1885         if (trace->trace_syscalls)
1886                 fprintf(trace->output, "(         ): ");
1887
1888         fprintf(trace->output, "%s:", evsel->name);
1889
1890         if (evsel->tp_format) {
1891                 event_format__fprintf(evsel->tp_format, sample->cpu,
1892                                       sample->raw_data, sample->raw_size,
1893                                       trace->output);
1894         }
1895
1896         fprintf(trace->output, ")\n");
1897         return 0;
1898 }
1899
1900 static void print_location(FILE *f, struct perf_sample *sample,
1901                            struct addr_location *al,
1902                            bool print_dso, bool print_sym)
1903 {
1904
1905         if ((verbose || print_dso) && al->map)
1906                 fprintf(f, "%s@", al->map->dso->long_name);
1907
1908         if ((verbose || print_sym) && al->sym)
1909                 fprintf(f, "%s+0x%" PRIx64, al->sym->name,
1910                         al->addr - al->sym->start);
1911         else if (al->map)
1912                 fprintf(f, "0x%" PRIx64, al->addr);
1913         else
1914                 fprintf(f, "0x%" PRIx64, sample->addr);
1915 }
1916
1917 static int trace__pgfault(struct trace *trace,
1918                           struct perf_evsel *evsel,
1919                           union perf_event *event,
1920                           struct perf_sample *sample)
1921 {
1922         struct thread *thread;
1923         u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
1924         struct addr_location al;
1925         char map_type = 'd';
1926         struct thread_trace *ttrace;
1927
1928         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1929         ttrace = thread__trace(thread, trace->output);
1930         if (ttrace == NULL)
1931                 return -1;
1932
1933         if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
1934                 ttrace->pfmaj++;
1935         else
1936                 ttrace->pfmin++;
1937
1938         if (trace->summary_only)
1939                 return 0;
1940
1941         thread__find_addr_location(thread, cpumode, MAP__FUNCTION,
1942                               sample->ip, &al);
1943
1944         trace__fprintf_entry_head(trace, thread, 0, sample->time, trace->output);
1945
1946         fprintf(trace->output, "%sfault [",
1947                 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
1948                 "maj" : "min");
1949
1950         print_location(trace->output, sample, &al, false, true);
1951
1952         fprintf(trace->output, "] => ");
1953
1954         thread__find_addr_location(thread, cpumode, MAP__VARIABLE,
1955                                    sample->addr, &al);
1956
1957         if (!al.map) {
1958                 thread__find_addr_location(thread, cpumode,
1959                                            MAP__FUNCTION, sample->addr, &al);
1960
1961                 if (al.map)
1962                         map_type = 'x';
1963                 else
1964                         map_type = '?';
1965         }
1966
1967         print_location(trace->output, sample, &al, true, false);
1968
1969         fprintf(trace->output, " (%c%c)\n", map_type, al.level);
1970
1971         return 0;
1972 }
1973
1974 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
1975 {
1976         if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
1977             (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
1978                 return false;
1979
1980         if (trace->pid_list || trace->tid_list)
1981                 return true;
1982
1983         return false;
1984 }
1985
1986 static int trace__process_sample(struct perf_tool *tool,
1987                                  union perf_event *event,
1988                                  struct perf_sample *sample,
1989                                  struct perf_evsel *evsel,
1990                                  struct machine *machine __maybe_unused)
1991 {
1992         struct trace *trace = container_of(tool, struct trace, tool);
1993         int err = 0;
1994
1995         tracepoint_handler handler = evsel->handler;
1996
1997         if (skip_sample(trace, sample))
1998                 return 0;
1999
2000         if (!trace->full_time && trace->base_time == 0)
2001                 trace->base_time = sample->time;
2002
2003         if (handler) {
2004                 ++trace->nr_events;
2005                 handler(trace, evsel, event, sample);
2006         }
2007
2008         return err;
2009 }
2010
2011 static int parse_target_str(struct trace *trace)
2012 {
2013         if (trace->opts.target.pid) {
2014                 trace->pid_list = intlist__new(trace->opts.target.pid);
2015                 if (trace->pid_list == NULL) {
2016                         pr_err("Error parsing process id string\n");
2017                         return -EINVAL;
2018                 }
2019         }
2020
2021         if (trace->opts.target.tid) {
2022                 trace->tid_list = intlist__new(trace->opts.target.tid);
2023                 if (trace->tid_list == NULL) {
2024                         pr_err("Error parsing thread id string\n");
2025                         return -EINVAL;
2026                 }
2027         }
2028
2029         return 0;
2030 }
2031
2032 static int trace__record(struct trace *trace, int argc, const char **argv)
2033 {
2034         unsigned int rec_argc, i, j;
2035         const char **rec_argv;
2036         const char * const record_args[] = {
2037                 "record",
2038                 "-R",
2039                 "-m", "1024",
2040                 "-c", "1",
2041         };
2042
2043         const char * const sc_args[] = { "-e", };
2044         unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
2045         const char * const majpf_args[] = { "-e", "major-faults" };
2046         unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
2047         const char * const minpf_args[] = { "-e", "minor-faults" };
2048         unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
2049
2050         /* +1 is for the event string below */
2051         rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 +
2052                 majpf_args_nr + minpf_args_nr + argc;
2053         rec_argv = calloc(rec_argc + 1, sizeof(char *));
2054
2055         if (rec_argv == NULL)
2056                 return -ENOMEM;
2057
2058         j = 0;
2059         for (i = 0; i < ARRAY_SIZE(record_args); i++)
2060                 rec_argv[j++] = record_args[i];
2061
2062         if (trace->trace_syscalls) {
2063                 for (i = 0; i < sc_args_nr; i++)
2064                         rec_argv[j++] = sc_args[i];
2065
2066                 /* event string may be different for older kernels - e.g., RHEL6 */
2067                 if (is_valid_tracepoint("raw_syscalls:sys_enter"))
2068                         rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
2069                 else if (is_valid_tracepoint("syscalls:sys_enter"))
2070                         rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
2071                 else {
2072                         pr_err("Neither raw_syscalls nor syscalls events exist.\n");
2073                         return -1;
2074                 }
2075         }
2076
2077         if (trace->trace_pgfaults & TRACE_PFMAJ)
2078                 for (i = 0; i < majpf_args_nr; i++)
2079                         rec_argv[j++] = majpf_args[i];
2080
2081         if (trace->trace_pgfaults & TRACE_PFMIN)
2082                 for (i = 0; i < minpf_args_nr; i++)
2083                         rec_argv[j++] = minpf_args[i];
2084
2085         for (i = 0; i < (unsigned int)argc; i++)
2086                 rec_argv[j++] = argv[i];
2087
2088         return cmd_record(j, rec_argv, NULL);
2089 }
2090
2091 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
2092
2093 static void perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
2094 {
2095         struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
2096         if (evsel == NULL)
2097                 return;
2098
2099         if (perf_evsel__field(evsel, "pathname") == NULL) {
2100                 perf_evsel__delete(evsel);
2101                 return;
2102         }
2103
2104         evsel->handler = trace__vfs_getname;
2105         perf_evlist__add(evlist, evsel);
2106 }
2107
2108 static int perf_evlist__add_pgfault(struct perf_evlist *evlist,
2109                                     u64 config)
2110 {
2111         struct perf_evsel *evsel;
2112         struct perf_event_attr attr = {
2113                 .type = PERF_TYPE_SOFTWARE,
2114                 .mmap_data = 1,
2115         };
2116
2117         attr.config = config;
2118         attr.sample_period = 1;
2119
2120         event_attr_init(&attr);
2121
2122         evsel = perf_evsel__new(&attr);
2123         if (!evsel)
2124                 return -ENOMEM;
2125
2126         evsel->handler = trace__pgfault;
2127         perf_evlist__add(evlist, evsel);
2128
2129         return 0;
2130 }
2131
2132 static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
2133 {
2134         const u32 type = event->header.type;
2135         struct perf_evsel *evsel;
2136
2137         if (!trace->full_time && trace->base_time == 0)
2138                 trace->base_time = sample->time;
2139
2140         if (type != PERF_RECORD_SAMPLE) {
2141                 trace__process_event(trace, trace->host, event, sample);
2142                 return;
2143         }
2144
2145         evsel = perf_evlist__id2evsel(trace->evlist, sample->id);
2146         if (evsel == NULL) {
2147                 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id);
2148                 return;
2149         }
2150
2151         if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
2152             sample->raw_data == NULL) {
2153                 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
2154                        perf_evsel__name(evsel), sample->tid,
2155                        sample->cpu, sample->raw_size);
2156         } else {
2157                 tracepoint_handler handler = evsel->handler;
2158                 handler(trace, evsel, event, sample);
2159         }
2160 }
2161
2162 static int trace__run(struct trace *trace, int argc, const char **argv)
2163 {
2164         struct perf_evlist *evlist = trace->evlist;
2165         int err = -1, i;
2166         unsigned long before;
2167         const bool forks = argc > 0;
2168         bool draining = false;
2169
2170         trace->live = true;
2171
2172         if (trace->trace_syscalls &&
2173             perf_evlist__add_syscall_newtp(evlist, trace__sys_enter,
2174                                            trace__sys_exit))
2175                 goto out_error_raw_syscalls;
2176
2177         if (trace->trace_syscalls)
2178                 perf_evlist__add_vfs_getname(evlist);
2179
2180         if ((trace->trace_pgfaults & TRACE_PFMAJ) &&
2181             perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MAJ)) {
2182                 goto out_error_mem;
2183         }
2184
2185         if ((trace->trace_pgfaults & TRACE_PFMIN) &&
2186             perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MIN))
2187                 goto out_error_mem;
2188
2189         if (trace->sched &&
2190             perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
2191                                    trace__sched_stat_runtime))
2192                 goto out_error_sched_stat_runtime;
2193
2194         err = perf_evlist__create_maps(evlist, &trace->opts.target);
2195         if (err < 0) {
2196                 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
2197                 goto out_delete_evlist;
2198         }
2199
2200         err = trace__symbols_init(trace, evlist);
2201         if (err < 0) {
2202                 fprintf(trace->output, "Problems initializing symbol libraries!\n");
2203                 goto out_delete_evlist;
2204         }
2205
2206         perf_evlist__config(evlist, &trace->opts);
2207
2208         signal(SIGCHLD, sig_handler);
2209         signal(SIGINT, sig_handler);
2210
2211         if (forks) {
2212                 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
2213                                                     argv, false, NULL);
2214                 if (err < 0) {
2215                         fprintf(trace->output, "Couldn't run the workload!\n");
2216                         goto out_delete_evlist;
2217                 }
2218         }
2219
2220         err = perf_evlist__open(evlist);
2221         if (err < 0)
2222                 goto out_error_open;
2223
2224         /*
2225          * Better not use !target__has_task() here because we need to cover the
2226          * case where no threads were specified in the command line, but a
2227          * workload was, and in that case we will fill in the thread_map when
2228          * we fork the workload in perf_evlist__prepare_workload.
2229          */
2230         if (trace->filter_pids.nr > 0)
2231                 err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries);
2232         else if (evlist->threads->map[0] == -1)
2233                 err = perf_evlist__set_filter_pid(evlist, getpid());
2234
2235         if (err < 0) {
2236                 printf("err=%d,%s\n", -err, strerror(-err));
2237                 exit(1);
2238         }
2239
2240         err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
2241         if (err < 0)
2242                 goto out_error_mmap;
2243
2244         if (forks)
2245                 perf_evlist__start_workload(evlist);
2246         else
2247                 perf_evlist__enable(evlist);
2248
2249         trace->multiple_threads = evlist->threads->map[0] == -1 ||
2250                                   evlist->threads->nr > 1 ||
2251                                   perf_evlist__first(evlist)->attr.inherit;
2252 again:
2253         before = trace->nr_events;
2254
2255         for (i = 0; i < evlist->nr_mmaps; i++) {
2256                 union perf_event *event;
2257
2258                 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
2259                         struct perf_sample sample;
2260
2261                         ++trace->nr_events;
2262
2263                         err = perf_evlist__parse_sample(evlist, event, &sample);
2264                         if (err) {
2265                                 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
2266                                 goto next_event;
2267                         }
2268
2269                         trace__handle_event(trace, event, &sample);
2270 next_event:
2271                         perf_evlist__mmap_consume(evlist, i);
2272
2273                         if (interrupted)
2274                                 goto out_disable;
2275                 }
2276         }
2277
2278         if (trace->nr_events == before) {
2279                 int timeout = done ? 100 : -1;
2280
2281                 if (!draining && perf_evlist__poll(evlist, timeout) > 0) {
2282                         if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0)
2283                                 draining = true;
2284
2285                         goto again;
2286                 }
2287         } else {
2288                 goto again;
2289         }
2290
2291 out_disable:
2292         thread__zput(trace->current);
2293
2294         perf_evlist__disable(evlist);
2295
2296         if (!err) {
2297                 if (trace->summary)
2298                         trace__fprintf_thread_summary(trace, trace->output);
2299
2300                 if (trace->show_tool_stats) {
2301                         fprintf(trace->output, "Stats:\n "
2302                                                " vfs_getname : %" PRIu64 "\n"
2303                                                " proc_getname: %" PRIu64 "\n",
2304                                 trace->stats.vfs_getname,
2305                                 trace->stats.proc_getname);
2306                 }
2307         }
2308
2309 out_delete_evlist:
2310         perf_evlist__delete(evlist);
2311         trace->evlist = NULL;
2312         trace->live = false;
2313         return err;
2314 {
2315         char errbuf[BUFSIZ];
2316
2317 out_error_sched_stat_runtime:
2318         debugfs__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime");
2319         goto out_error;
2320
2321 out_error_raw_syscalls:
2322         debugfs__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)");
2323         goto out_error;
2324
2325 out_error_mmap:
2326         perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
2327         goto out_error;
2328
2329 out_error_open:
2330         perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2331
2332 out_error:
2333         fprintf(trace->output, "%s\n", errbuf);
2334         goto out_delete_evlist;
2335 }
2336 out_error_mem:
2337         fprintf(trace->output, "Not enough memory to run!\n");
2338         goto out_delete_evlist;
2339 }
2340
2341 static int trace__replay(struct trace *trace)
2342 {
2343         const struct perf_evsel_str_handler handlers[] = {
2344                 { "probe:vfs_getname",       trace__vfs_getname, },
2345         };
2346         struct perf_data_file file = {
2347                 .path  = input_name,
2348                 .mode  = PERF_DATA_MODE_READ,
2349                 .force = trace->force,
2350         };
2351         struct perf_session *session;
2352         struct perf_evsel *evsel;
2353         int err = -1;
2354
2355         trace->tool.sample        = trace__process_sample;
2356         trace->tool.mmap          = perf_event__process_mmap;
2357         trace->tool.mmap2         = perf_event__process_mmap2;
2358         trace->tool.comm          = perf_event__process_comm;
2359         trace->tool.exit          = perf_event__process_exit;
2360         trace->tool.fork          = perf_event__process_fork;
2361         trace->tool.attr          = perf_event__process_attr;
2362         trace->tool.tracing_data = perf_event__process_tracing_data;
2363         trace->tool.build_id      = perf_event__process_build_id;
2364
2365         trace->tool.ordered_events = true;
2366         trace->tool.ordering_requires_timestamps = true;
2367
2368         /* add tid to output */
2369         trace->multiple_threads = true;
2370
2371         session = perf_session__new(&file, false, &trace->tool);
2372         if (session == NULL)
2373                 return -1;
2374
2375         if (symbol__init(&session->header.env) < 0)
2376                 goto out;
2377
2378         trace->host = &session->machines.host;
2379
2380         err = perf_session__set_tracepoints_handlers(session, handlers);
2381         if (err)
2382                 goto out;
2383
2384         evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2385                                                      "raw_syscalls:sys_enter");
2386         /* older kernels have syscalls tp versus raw_syscalls */
2387         if (evsel == NULL)
2388                 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2389                                                              "syscalls:sys_enter");
2390
2391         if (evsel &&
2392             (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2393             perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
2394                 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2395                 goto out;
2396         }
2397
2398         evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2399                                                      "raw_syscalls:sys_exit");
2400         if (evsel == NULL)
2401                 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2402                                                              "syscalls:sys_exit");
2403         if (evsel &&
2404             (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2405             perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
2406                 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
2407                 goto out;
2408         }
2409
2410         evlist__for_each(session->evlist, evsel) {
2411                 if (evsel->attr.type == PERF_TYPE_SOFTWARE &&
2412                     (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
2413                      evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
2414                      evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS))
2415                         evsel->handler = trace__pgfault;
2416         }
2417
2418         err = parse_target_str(trace);
2419         if (err != 0)
2420                 goto out;
2421
2422         setup_pager();
2423
2424         err = perf_session__process_events(session);
2425         if (err)
2426                 pr_err("Failed to process events, error %d", err);
2427
2428         else if (trace->summary)
2429                 trace__fprintf_thread_summary(trace, trace->output);
2430
2431 out:
2432         perf_session__delete(session);
2433
2434         return err;
2435 }
2436
2437 static size_t trace__fprintf_threads_header(FILE *fp)
2438 {
2439         size_t printed;
2440
2441         printed  = fprintf(fp, "\n Summary of events:\n\n");
2442
2443         return printed;
2444 }
2445
2446 static size_t thread__dump_stats(struct thread_trace *ttrace,
2447                                  struct trace *trace, FILE *fp)
2448 {
2449         struct stats *stats;
2450         size_t printed = 0;
2451         struct syscall *sc;
2452         struct int_node *inode = intlist__first(ttrace->syscall_stats);
2453
2454         if (inode == NULL)
2455                 return 0;
2456
2457         printed += fprintf(fp, "\n");
2458
2459         printed += fprintf(fp, "   syscall            calls      min       avg       max      stddev\n");
2460         printed += fprintf(fp, "                               (msec)    (msec)    (msec)        (%%)\n");
2461         printed += fprintf(fp, "   --------------- -------- --------- --------- ---------     ------\n");
2462
2463         /* each int_node is a syscall */
2464         while (inode) {
2465                 stats = inode->priv;
2466                 if (stats) {
2467                         double min = (double)(stats->min) / NSEC_PER_MSEC;
2468                         double max = (double)(stats->max) / NSEC_PER_MSEC;
2469                         double avg = avg_stats(stats);
2470                         double pct;
2471                         u64 n = (u64) stats->n;
2472
2473                         pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2474                         avg /= NSEC_PER_MSEC;
2475
2476                         sc = &trace->syscalls.table[inode->i];
2477                         printed += fprintf(fp, "   %-15s", sc->name);
2478                         printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f",
2479                                            n, min, avg);
2480                         printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
2481                 }
2482
2483                 inode = intlist__next(inode);
2484         }
2485
2486         printed += fprintf(fp, "\n\n");
2487
2488         return printed;
2489 }
2490
2491 /* struct used to pass data to per-thread function */
2492 struct summary_data {
2493         FILE *fp;
2494         struct trace *trace;
2495         size_t printed;
2496 };
2497
2498 static int trace__fprintf_one_thread(struct thread *thread, void *priv)
2499 {
2500         struct summary_data *data = priv;
2501         FILE *fp = data->fp;
2502         size_t printed = data->printed;
2503         struct trace *trace = data->trace;
2504         struct thread_trace *ttrace = thread__priv(thread);
2505         double ratio;
2506
2507         if (ttrace == NULL)
2508                 return 0;
2509
2510         ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2511
2512         printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
2513         printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
2514         printed += fprintf(fp, "%.1f%%", ratio);
2515         if (ttrace->pfmaj)
2516                 printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
2517         if (ttrace->pfmin)
2518                 printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
2519         printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2520         printed += thread__dump_stats(ttrace, trace, fp);
2521
2522         data->printed += printed;
2523
2524         return 0;
2525 }
2526
2527 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2528 {
2529         struct summary_data data = {
2530                 .fp = fp,
2531                 .trace = trace
2532         };
2533         data.printed = trace__fprintf_threads_header(fp);
2534
2535         machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data);
2536
2537         return data.printed;
2538 }
2539
2540 static int trace__set_duration(const struct option *opt, const char *str,
2541                                int unset __maybe_unused)
2542 {
2543         struct trace *trace = opt->value;
2544
2545         trace->duration_filter = atof(str);
2546         return 0;
2547 }
2548
2549 static int trace__set_filter_pids(const struct option *opt, const char *str,
2550                                   int unset __maybe_unused)
2551 {
2552         int ret = -1;
2553         size_t i;
2554         struct trace *trace = opt->value;
2555         /*
2556          * FIXME: introduce a intarray class, plain parse csv and create a
2557          * { int nr, int entries[] } struct...
2558          */
2559         struct intlist *list = intlist__new(str);
2560
2561         if (list == NULL)
2562                 return -1;
2563
2564         i = trace->filter_pids.nr = intlist__nr_entries(list) + 1;
2565         trace->filter_pids.entries = calloc(i, sizeof(pid_t));
2566
2567         if (trace->filter_pids.entries == NULL)
2568                 goto out;
2569
2570         trace->filter_pids.entries[0] = getpid();
2571
2572         for (i = 1; i < trace->filter_pids.nr; ++i)
2573                 trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i;
2574
2575         intlist__delete(list);
2576         ret = 0;
2577 out:
2578         return ret;
2579 }
2580
2581 static int trace__open_output(struct trace *trace, const char *filename)
2582 {
2583         struct stat st;
2584
2585         if (!stat(filename, &st) && st.st_size) {
2586                 char oldname[PATH_MAX];
2587
2588                 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2589                 unlink(oldname);
2590                 rename(filename, oldname);
2591         }
2592
2593         trace->output = fopen(filename, "w");
2594
2595         return trace->output == NULL ? -errno : 0;
2596 }
2597
2598 static int parse_pagefaults(const struct option *opt, const char *str,
2599                             int unset __maybe_unused)
2600 {
2601         int *trace_pgfaults = opt->value;
2602
2603         if (strcmp(str, "all") == 0)
2604                 *trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
2605         else if (strcmp(str, "maj") == 0)
2606                 *trace_pgfaults |= TRACE_PFMAJ;
2607         else if (strcmp(str, "min") == 0)
2608                 *trace_pgfaults |= TRACE_PFMIN;
2609         else
2610                 return -1;
2611
2612         return 0;
2613 }
2614
2615 static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler)
2616 {
2617         struct perf_evsel *evsel;
2618
2619         evlist__for_each(evlist, evsel)
2620                 evsel->handler = handler;
2621 }
2622
2623 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
2624 {
2625         const char *trace_usage[] = {
2626                 "perf trace [<options>] [<command>]",
2627                 "perf trace [<options>] -- <command> [<options>]",
2628                 "perf trace record [<options>] [<command>]",
2629                 "perf trace record [<options>] -- <command> [<options>]",
2630                 NULL
2631         };
2632         struct trace trace = {
2633                 .audit = {
2634                         .machine = audit_detect_machine(),
2635                         .open_id = audit_name_to_syscall("open", trace.audit.machine),
2636                 },
2637                 .syscalls = {
2638                         . max = -1,
2639                 },
2640                 .opts = {
2641                         .target = {
2642                                 .uid       = UINT_MAX,
2643                                 .uses_mmap = true,
2644                         },
2645                         .user_freq     = UINT_MAX,
2646                         .user_interval = ULLONG_MAX,
2647                         .no_buffering  = true,
2648                         .mmap_pages    = UINT_MAX,
2649                 },
2650                 .output = stdout,
2651                 .show_comm = true,
2652                 .trace_syscalls = true,
2653         };
2654         const char *output_name = NULL;
2655         const char *ev_qualifier_str = NULL;
2656         const struct option trace_options[] = {
2657         OPT_CALLBACK(0, "event", &trace.evlist, "event",
2658                      "event selector. use 'perf list' to list available events",
2659                      parse_events_option),
2660         OPT_BOOLEAN(0, "comm", &trace.show_comm,
2661                     "show the thread COMM next to its id"),
2662         OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
2663         OPT_STRING('e', "expr", &ev_qualifier_str, "expr",
2664                     "list of events to trace"),
2665         OPT_STRING('o', "output", &output_name, "file", "output file name"),
2666         OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
2667         OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
2668                     "trace events on existing process id"),
2669         OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
2670                     "trace events on existing thread id"),
2671         OPT_CALLBACK(0, "filter-pids", &trace, "float",
2672                      "show only events with duration > N.M ms", trace__set_filter_pids),
2673         OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
2674                     "system-wide collection from all CPUs"),
2675         OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
2676                     "list of cpus to monitor"),
2677         OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
2678                     "child tasks do not inherit counters"),
2679         OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
2680                      "number of mmap data pages",
2681                      perf_evlist__parse_mmap_pages),
2682         OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
2683                    "user to profile"),
2684         OPT_CALLBACK(0, "duration", &trace, "float",
2685                      "show only events with duration > N.M ms",
2686                      trace__set_duration),
2687         OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
2688         OPT_INCR('v', "verbose", &verbose, "be more verbose"),
2689         OPT_BOOLEAN('T', "time", &trace.full_time,
2690                     "Show full timestamp, not time relative to first start"),
2691         OPT_BOOLEAN('s', "summary", &trace.summary_only,
2692                     "Show only syscall summary with statistics"),
2693         OPT_BOOLEAN('S', "with-summary", &trace.summary,
2694                     "Show all syscalls and summary with statistics"),
2695         OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
2696                      "Trace pagefaults", parse_pagefaults, "maj"),
2697         OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
2698         OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
2699         OPT_END()
2700         };
2701         const char * const trace_subcommands[] = { "record", NULL };
2702         int err;
2703         char bf[BUFSIZ];
2704
2705         signal(SIGSEGV, sighandler_dump_stack);
2706         signal(SIGFPE, sighandler_dump_stack);
2707
2708         trace.evlist = perf_evlist__new();
2709         if (trace.evlist == NULL)
2710                 return -ENOMEM;
2711
2712         if (trace.evlist == NULL) {
2713                 pr_err("Not enough memory to run!\n");
2714                 goto out;
2715         }
2716
2717         argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
2718                                  trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
2719
2720         if (trace.trace_pgfaults) {
2721                 trace.opts.sample_address = true;
2722                 trace.opts.sample_time = true;
2723         }
2724
2725         if (trace.evlist->nr_entries > 0)
2726                 evlist__set_evsel_handler(trace.evlist, trace__event_handler);
2727
2728         if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
2729                 return trace__record(&trace, argc-1, &argv[1]);
2730
2731         /* summary_only implies summary option, but don't overwrite summary if set */
2732         if (trace.summary_only)
2733                 trace.summary = trace.summary_only;
2734
2735         if (!trace.trace_syscalls && !trace.trace_pgfaults &&
2736             trace.evlist->nr_entries == 0 /* Was --events used? */) {
2737                 pr_err("Please specify something to trace.\n");
2738                 return -1;
2739         }
2740
2741         if (output_name != NULL) {
2742                 err = trace__open_output(&trace, output_name);
2743                 if (err < 0) {
2744                         perror("failed to create output file");
2745                         goto out;
2746                 }
2747         }
2748
2749         if (ev_qualifier_str != NULL) {
2750                 const char *s = ev_qualifier_str;
2751
2752                 trace.not_ev_qualifier = *s == '!';
2753                 if (trace.not_ev_qualifier)
2754                         ++s;
2755                 trace.ev_qualifier = strlist__new(true, s);
2756                 if (trace.ev_qualifier == NULL) {
2757                         fputs("Not enough memory to parse event qualifier",
2758                               trace.output);
2759                         err = -ENOMEM;
2760                         goto out_close;
2761                 }
2762         }
2763
2764         err = target__validate(&trace.opts.target);
2765         if (err) {
2766                 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2767                 fprintf(trace.output, "%s", bf);
2768                 goto out_close;
2769         }
2770
2771         err = target__parse_uid(&trace.opts.target);
2772         if (err) {
2773                 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2774                 fprintf(trace.output, "%s", bf);
2775                 goto out_close;
2776         }
2777
2778         if (!argc && target__none(&trace.opts.target))
2779                 trace.opts.target.system_wide = true;
2780
2781         if (input_name)
2782                 err = trace__replay(&trace);
2783         else
2784                 err = trace__run(&trace, argc, argv);
2785
2786 out_close:
2787         if (output_name != NULL)
2788                 fclose(trace.output);
2789 out:
2790         return err;
2791 }