spi: mpc512x-psc: Remove redundant code to get bus_num from DT
[cascardo/linux.git] / tools / perf / builtin-trace.c
1 #include <traceevent/event-parse.h>
2 #include "builtin.h"
3 #include "util/color.h"
4 #include "util/debug.h"
5 #include "util/evlist.h"
6 #include "util/machine.h"
7 #include "util/session.h"
8 #include "util/thread.h"
9 #include "util/parse-options.h"
10 #include "util/strlist.h"
11 #include "util/intlist.h"
12 #include "util/thread_map.h"
13 #include "util/stat.h"
14 #include "trace-event.h"
15 #include "util/parse-events.h"
16
17 #include <libaudit.h>
18 #include <stdlib.h>
19 #include <sys/eventfd.h>
20 #include <sys/mman.h>
21 #include <linux/futex.h>
22
23 /* For older distros: */
24 #ifndef MAP_STACK
25 # define MAP_STACK              0x20000
26 #endif
27
28 #ifndef MADV_HWPOISON
29 # define MADV_HWPOISON          100
30 #endif
31
32 #ifndef MADV_MERGEABLE
33 # define MADV_MERGEABLE         12
34 #endif
35
36 #ifndef MADV_UNMERGEABLE
37 # define MADV_UNMERGEABLE       13
38 #endif
39
40 struct tp_field {
41         int offset;
42         union {
43                 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
44                 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
45         };
46 };
47
48 #define TP_UINT_FIELD(bits) \
49 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
50 { \
51         return *(u##bits *)(sample->raw_data + field->offset); \
52 }
53
54 TP_UINT_FIELD(8);
55 TP_UINT_FIELD(16);
56 TP_UINT_FIELD(32);
57 TP_UINT_FIELD(64);
58
59 #define TP_UINT_FIELD__SWAPPED(bits) \
60 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
61 { \
62         u##bits value = *(u##bits *)(sample->raw_data + field->offset); \
63         return bswap_##bits(value);\
64 }
65
66 TP_UINT_FIELD__SWAPPED(16);
67 TP_UINT_FIELD__SWAPPED(32);
68 TP_UINT_FIELD__SWAPPED(64);
69
70 static int tp_field__init_uint(struct tp_field *field,
71                                struct format_field *format_field,
72                                bool needs_swap)
73 {
74         field->offset = format_field->offset;
75
76         switch (format_field->size) {
77         case 1:
78                 field->integer = tp_field__u8;
79                 break;
80         case 2:
81                 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
82                 break;
83         case 4:
84                 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
85                 break;
86         case 8:
87                 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
88                 break;
89         default:
90                 return -1;
91         }
92
93         return 0;
94 }
95
96 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
97 {
98         return sample->raw_data + field->offset;
99 }
100
101 static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
102 {
103         field->offset = format_field->offset;
104         field->pointer = tp_field__ptr;
105         return 0;
106 }
107
108 struct syscall_tp {
109         struct tp_field id;
110         union {
111                 struct tp_field args, ret;
112         };
113 };
114
115 static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
116                                           struct tp_field *field,
117                                           const char *name)
118 {
119         struct format_field *format_field = perf_evsel__field(evsel, name);
120
121         if (format_field == NULL)
122                 return -1;
123
124         return tp_field__init_uint(field, format_field, evsel->needs_swap);
125 }
126
127 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \
128         ({ struct syscall_tp *sc = evsel->priv;\
129            perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
130
131 static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
132                                          struct tp_field *field,
133                                          const char *name)
134 {
135         struct format_field *format_field = perf_evsel__field(evsel, name);
136
137         if (format_field == NULL)
138                 return -1;
139
140         return tp_field__init_ptr(field, format_field);
141 }
142
143 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
144         ({ struct syscall_tp *sc = evsel->priv;\
145            perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
146
147 static void perf_evsel__delete_priv(struct perf_evsel *evsel)
148 {
149         zfree(&evsel->priv);
150         perf_evsel__delete(evsel);
151 }
152
153 static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
154 {
155         evsel->priv = malloc(sizeof(struct syscall_tp));
156         if (evsel->priv != NULL) {
157                 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
158                         goto out_delete;
159
160                 evsel->handler = handler;
161                 return 0;
162         }
163
164         return -ENOMEM;
165
166 out_delete:
167         zfree(&evsel->priv);
168         return -ENOENT;
169 }
170
171 static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
172 {
173         struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
174
175         /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
176         if (evsel == NULL)
177                 evsel = perf_evsel__newtp("syscalls", direction);
178
179         if (evsel) {
180                 if (perf_evsel__init_syscall_tp(evsel, handler))
181                         goto out_delete;
182         }
183
184         return evsel;
185
186 out_delete:
187         perf_evsel__delete_priv(evsel);
188         return NULL;
189 }
190
191 #define perf_evsel__sc_tp_uint(evsel, name, sample) \
192         ({ struct syscall_tp *fields = evsel->priv; \
193            fields->name.integer(&fields->name, sample); })
194
195 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \
196         ({ struct syscall_tp *fields = evsel->priv; \
197            fields->name.pointer(&fields->name, sample); })
198
199 static int perf_evlist__add_syscall_newtp(struct perf_evlist *evlist,
200                                           void *sys_enter_handler,
201                                           void *sys_exit_handler)
202 {
203         int ret = -1;
204         struct perf_evsel *sys_enter, *sys_exit;
205
206         sys_enter = perf_evsel__syscall_newtp("sys_enter", sys_enter_handler);
207         if (sys_enter == NULL)
208                 goto out;
209
210         if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
211                 goto out_delete_sys_enter;
212
213         sys_exit = perf_evsel__syscall_newtp("sys_exit", sys_exit_handler);
214         if (sys_exit == NULL)
215                 goto out_delete_sys_enter;
216
217         if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
218                 goto out_delete_sys_exit;
219
220         perf_evlist__add(evlist, sys_enter);
221         perf_evlist__add(evlist, sys_exit);
222
223         ret = 0;
224 out:
225         return ret;
226
227 out_delete_sys_exit:
228         perf_evsel__delete_priv(sys_exit);
229 out_delete_sys_enter:
230         perf_evsel__delete_priv(sys_enter);
231         goto out;
232 }
233
234
235 struct syscall_arg {
236         unsigned long val;
237         struct thread *thread;
238         struct trace  *trace;
239         void          *parm;
240         u8            idx;
241         u8            mask;
242 };
243
244 struct strarray {
245         int         offset;
246         int         nr_entries;
247         const char **entries;
248 };
249
250 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
251         .nr_entries = ARRAY_SIZE(array), \
252         .entries = array, \
253 }
254
255 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
256         .offset     = off, \
257         .nr_entries = ARRAY_SIZE(array), \
258         .entries = array, \
259 }
260
261 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
262                                                 const char *intfmt,
263                                                 struct syscall_arg *arg)
264 {
265         struct strarray *sa = arg->parm;
266         int idx = arg->val - sa->offset;
267
268         if (idx < 0 || idx >= sa->nr_entries)
269                 return scnprintf(bf, size, intfmt, arg->val);
270
271         return scnprintf(bf, size, "%s", sa->entries[idx]);
272 }
273
274 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
275                                               struct syscall_arg *arg)
276 {
277         return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
278 }
279
280 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
281
282 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
283                                                  struct syscall_arg *arg)
284 {
285         return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
286 }
287
288 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
289
290 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
291                                         struct syscall_arg *arg);
292
293 #define SCA_FD syscall_arg__scnprintf_fd
294
295 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
296                                            struct syscall_arg *arg)
297 {
298         int fd = arg->val;
299
300         if (fd == AT_FDCWD)
301                 return scnprintf(bf, size, "CWD");
302
303         return syscall_arg__scnprintf_fd(bf, size, arg);
304 }
305
306 #define SCA_FDAT syscall_arg__scnprintf_fd_at
307
308 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
309                                               struct syscall_arg *arg);
310
311 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
312
313 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
314                                          struct syscall_arg *arg)
315 {
316         return scnprintf(bf, size, "%#lx", arg->val);
317 }
318
319 #define SCA_HEX syscall_arg__scnprintf_hex
320
321 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
322                                                struct syscall_arg *arg)
323 {
324         int printed = 0, prot = arg->val;
325
326         if (prot == PROT_NONE)
327                 return scnprintf(bf, size, "NONE");
328 #define P_MMAP_PROT(n) \
329         if (prot & PROT_##n) { \
330                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
331                 prot &= ~PROT_##n; \
332         }
333
334         P_MMAP_PROT(EXEC);
335         P_MMAP_PROT(READ);
336         P_MMAP_PROT(WRITE);
337 #ifdef PROT_SEM
338         P_MMAP_PROT(SEM);
339 #endif
340         P_MMAP_PROT(GROWSDOWN);
341         P_MMAP_PROT(GROWSUP);
342 #undef P_MMAP_PROT
343
344         if (prot)
345                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
346
347         return printed;
348 }
349
350 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
351
352 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
353                                                 struct syscall_arg *arg)
354 {
355         int printed = 0, flags = arg->val;
356
357 #define P_MMAP_FLAG(n) \
358         if (flags & MAP_##n) { \
359                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
360                 flags &= ~MAP_##n; \
361         }
362
363         P_MMAP_FLAG(SHARED);
364         P_MMAP_FLAG(PRIVATE);
365 #ifdef MAP_32BIT
366         P_MMAP_FLAG(32BIT);
367 #endif
368         P_MMAP_FLAG(ANONYMOUS);
369         P_MMAP_FLAG(DENYWRITE);
370         P_MMAP_FLAG(EXECUTABLE);
371         P_MMAP_FLAG(FILE);
372         P_MMAP_FLAG(FIXED);
373         P_MMAP_FLAG(GROWSDOWN);
374 #ifdef MAP_HUGETLB
375         P_MMAP_FLAG(HUGETLB);
376 #endif
377         P_MMAP_FLAG(LOCKED);
378         P_MMAP_FLAG(NONBLOCK);
379         P_MMAP_FLAG(NORESERVE);
380         P_MMAP_FLAG(POPULATE);
381         P_MMAP_FLAG(STACK);
382 #ifdef MAP_UNINITIALIZED
383         P_MMAP_FLAG(UNINITIALIZED);
384 #endif
385 #undef P_MMAP_FLAG
386
387         if (flags)
388                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
389
390         return printed;
391 }
392
393 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
394
395 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
396                                                       struct syscall_arg *arg)
397 {
398         int behavior = arg->val;
399
400         switch (behavior) {
401 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
402         P_MADV_BHV(NORMAL);
403         P_MADV_BHV(RANDOM);
404         P_MADV_BHV(SEQUENTIAL);
405         P_MADV_BHV(WILLNEED);
406         P_MADV_BHV(DONTNEED);
407         P_MADV_BHV(REMOVE);
408         P_MADV_BHV(DONTFORK);
409         P_MADV_BHV(DOFORK);
410         P_MADV_BHV(HWPOISON);
411 #ifdef MADV_SOFT_OFFLINE
412         P_MADV_BHV(SOFT_OFFLINE);
413 #endif
414         P_MADV_BHV(MERGEABLE);
415         P_MADV_BHV(UNMERGEABLE);
416 #ifdef MADV_HUGEPAGE
417         P_MADV_BHV(HUGEPAGE);
418 #endif
419 #ifdef MADV_NOHUGEPAGE
420         P_MADV_BHV(NOHUGEPAGE);
421 #endif
422 #ifdef MADV_DONTDUMP
423         P_MADV_BHV(DONTDUMP);
424 #endif
425 #ifdef MADV_DODUMP
426         P_MADV_BHV(DODUMP);
427 #endif
428 #undef P_MADV_PHV
429         default: break;
430         }
431
432         return scnprintf(bf, size, "%#x", behavior);
433 }
434
435 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
436
437 static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
438                                            struct syscall_arg *arg)
439 {
440         int printed = 0, op = arg->val;
441
442         if (op == 0)
443                 return scnprintf(bf, size, "NONE");
444 #define P_CMD(cmd) \
445         if ((op & LOCK_##cmd) == LOCK_##cmd) { \
446                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
447                 op &= ~LOCK_##cmd; \
448         }
449
450         P_CMD(SH);
451         P_CMD(EX);
452         P_CMD(NB);
453         P_CMD(UN);
454         P_CMD(MAND);
455         P_CMD(RW);
456         P_CMD(READ);
457         P_CMD(WRITE);
458 #undef P_OP
459
460         if (op)
461                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
462
463         return printed;
464 }
465
466 #define SCA_FLOCK syscall_arg__scnprintf_flock
467
468 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
469 {
470         enum syscall_futex_args {
471                 SCF_UADDR   = (1 << 0),
472                 SCF_OP      = (1 << 1),
473                 SCF_VAL     = (1 << 2),
474                 SCF_TIMEOUT = (1 << 3),
475                 SCF_UADDR2  = (1 << 4),
476                 SCF_VAL3    = (1 << 5),
477         };
478         int op = arg->val;
479         int cmd = op & FUTEX_CMD_MASK;
480         size_t printed = 0;
481
482         switch (cmd) {
483 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
484         P_FUTEX_OP(WAIT);           arg->mask |= SCF_VAL3|SCF_UADDR2;             break;
485         P_FUTEX_OP(WAKE);           arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
486         P_FUTEX_OP(FD);             arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
487         P_FUTEX_OP(REQUEUE);        arg->mask |= SCF_VAL3|SCF_TIMEOUT;            break;
488         P_FUTEX_OP(CMP_REQUEUE);    arg->mask |= SCF_TIMEOUT;                     break;
489         P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT;                     break;
490         P_FUTEX_OP(WAKE_OP);                                                      break;
491         P_FUTEX_OP(LOCK_PI);        arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
492         P_FUTEX_OP(UNLOCK_PI);      arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
493         P_FUTEX_OP(TRYLOCK_PI);     arg->mask |= SCF_VAL3|SCF_UADDR2;             break;
494         P_FUTEX_OP(WAIT_BITSET);    arg->mask |= SCF_UADDR2;                      break;
495         P_FUTEX_OP(WAKE_BITSET);    arg->mask |= SCF_UADDR2;                      break;
496         P_FUTEX_OP(WAIT_REQUEUE_PI);                                              break;
497         default: printed = scnprintf(bf, size, "%#x", cmd);                       break;
498         }
499
500         if (op & FUTEX_PRIVATE_FLAG)
501                 printed += scnprintf(bf + printed, size - printed, "|PRIV");
502
503         if (op & FUTEX_CLOCK_REALTIME)
504                 printed += scnprintf(bf + printed, size - printed, "|CLKRT");
505
506         return printed;
507 }
508
509 #define SCA_FUTEX_OP  syscall_arg__scnprintf_futex_op
510
511 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
512 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
513
514 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
515 static DEFINE_STRARRAY(itimers);
516
517 static const char *whences[] = { "SET", "CUR", "END",
518 #ifdef SEEK_DATA
519 "DATA",
520 #endif
521 #ifdef SEEK_HOLE
522 "HOLE",
523 #endif
524 };
525 static DEFINE_STRARRAY(whences);
526
527 static const char *fcntl_cmds[] = {
528         "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
529         "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
530         "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
531         "F_GETOWNER_UIDS",
532 };
533 static DEFINE_STRARRAY(fcntl_cmds);
534
535 static const char *rlimit_resources[] = {
536         "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
537         "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
538         "RTTIME",
539 };
540 static DEFINE_STRARRAY(rlimit_resources);
541
542 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
543 static DEFINE_STRARRAY(sighow);
544
545 static const char *clockid[] = {
546         "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
547         "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE",
548 };
549 static DEFINE_STRARRAY(clockid);
550
551 static const char *socket_families[] = {
552         "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
553         "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
554         "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
555         "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
556         "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
557         "ALG", "NFC", "VSOCK",
558 };
559 static DEFINE_STRARRAY(socket_families);
560
561 #ifndef SOCK_TYPE_MASK
562 #define SOCK_TYPE_MASK 0xf
563 #endif
564
565 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
566                                                       struct syscall_arg *arg)
567 {
568         size_t printed;
569         int type = arg->val,
570             flags = type & ~SOCK_TYPE_MASK;
571
572         type &= SOCK_TYPE_MASK;
573         /*
574          * Can't use a strarray, MIPS may override for ABI reasons.
575          */
576         switch (type) {
577 #define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
578         P_SK_TYPE(STREAM);
579         P_SK_TYPE(DGRAM);
580         P_SK_TYPE(RAW);
581         P_SK_TYPE(RDM);
582         P_SK_TYPE(SEQPACKET);
583         P_SK_TYPE(DCCP);
584         P_SK_TYPE(PACKET);
585 #undef P_SK_TYPE
586         default:
587                 printed = scnprintf(bf, size, "%#x", type);
588         }
589
590 #define P_SK_FLAG(n) \
591         if (flags & SOCK_##n) { \
592                 printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
593                 flags &= ~SOCK_##n; \
594         }
595
596         P_SK_FLAG(CLOEXEC);
597         P_SK_FLAG(NONBLOCK);
598 #undef P_SK_FLAG
599
600         if (flags)
601                 printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
602
603         return printed;
604 }
605
606 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
607
608 #ifndef MSG_PROBE
609 #define MSG_PROBE            0x10
610 #endif
611 #ifndef MSG_WAITFORONE
612 #define MSG_WAITFORONE  0x10000
613 #endif
614 #ifndef MSG_SENDPAGE_NOTLAST
615 #define MSG_SENDPAGE_NOTLAST 0x20000
616 #endif
617 #ifndef MSG_FASTOPEN
618 #define MSG_FASTOPEN         0x20000000
619 #endif
620
621 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
622                                                struct syscall_arg *arg)
623 {
624         int printed = 0, flags = arg->val;
625
626         if (flags == 0)
627                 return scnprintf(bf, size, "NONE");
628 #define P_MSG_FLAG(n) \
629         if (flags & MSG_##n) { \
630                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
631                 flags &= ~MSG_##n; \
632         }
633
634         P_MSG_FLAG(OOB);
635         P_MSG_FLAG(PEEK);
636         P_MSG_FLAG(DONTROUTE);
637         P_MSG_FLAG(TRYHARD);
638         P_MSG_FLAG(CTRUNC);
639         P_MSG_FLAG(PROBE);
640         P_MSG_FLAG(TRUNC);
641         P_MSG_FLAG(DONTWAIT);
642         P_MSG_FLAG(EOR);
643         P_MSG_FLAG(WAITALL);
644         P_MSG_FLAG(FIN);
645         P_MSG_FLAG(SYN);
646         P_MSG_FLAG(CONFIRM);
647         P_MSG_FLAG(RST);
648         P_MSG_FLAG(ERRQUEUE);
649         P_MSG_FLAG(NOSIGNAL);
650         P_MSG_FLAG(MORE);
651         P_MSG_FLAG(WAITFORONE);
652         P_MSG_FLAG(SENDPAGE_NOTLAST);
653         P_MSG_FLAG(FASTOPEN);
654         P_MSG_FLAG(CMSG_CLOEXEC);
655 #undef P_MSG_FLAG
656
657         if (flags)
658                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
659
660         return printed;
661 }
662
663 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
664
665 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
666                                                  struct syscall_arg *arg)
667 {
668         size_t printed = 0;
669         int mode = arg->val;
670
671         if (mode == F_OK) /* 0 */
672                 return scnprintf(bf, size, "F");
673 #define P_MODE(n) \
674         if (mode & n##_OK) { \
675                 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
676                 mode &= ~n##_OK; \
677         }
678
679         P_MODE(R);
680         P_MODE(W);
681         P_MODE(X);
682 #undef P_MODE
683
684         if (mode)
685                 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
686
687         return printed;
688 }
689
690 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
691
692 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
693                                                struct syscall_arg *arg)
694 {
695         int printed = 0, flags = arg->val;
696
697         if (!(flags & O_CREAT))
698                 arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
699
700         if (flags == 0)
701                 return scnprintf(bf, size, "RDONLY");
702 #define P_FLAG(n) \
703         if (flags & O_##n) { \
704                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
705                 flags &= ~O_##n; \
706         }
707
708         P_FLAG(APPEND);
709         P_FLAG(ASYNC);
710         P_FLAG(CLOEXEC);
711         P_FLAG(CREAT);
712         P_FLAG(DIRECT);
713         P_FLAG(DIRECTORY);
714         P_FLAG(EXCL);
715         P_FLAG(LARGEFILE);
716         P_FLAG(NOATIME);
717         P_FLAG(NOCTTY);
718 #ifdef O_NONBLOCK
719         P_FLAG(NONBLOCK);
720 #elif O_NDELAY
721         P_FLAG(NDELAY);
722 #endif
723 #ifdef O_PATH
724         P_FLAG(PATH);
725 #endif
726         P_FLAG(RDWR);
727 #ifdef O_DSYNC
728         if ((flags & O_SYNC) == O_SYNC)
729                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
730         else {
731                 P_FLAG(DSYNC);
732         }
733 #else
734         P_FLAG(SYNC);
735 #endif
736         P_FLAG(TRUNC);
737         P_FLAG(WRONLY);
738 #undef P_FLAG
739
740         if (flags)
741                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
742
743         return printed;
744 }
745
746 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
747
748 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
749                                                    struct syscall_arg *arg)
750 {
751         int printed = 0, flags = arg->val;
752
753         if (flags == 0)
754                 return scnprintf(bf, size, "NONE");
755 #define P_FLAG(n) \
756         if (flags & EFD_##n) { \
757                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
758                 flags &= ~EFD_##n; \
759         }
760
761         P_FLAG(SEMAPHORE);
762         P_FLAG(CLOEXEC);
763         P_FLAG(NONBLOCK);
764 #undef P_FLAG
765
766         if (flags)
767                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
768
769         return printed;
770 }
771
772 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
773
774 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
775                                                 struct syscall_arg *arg)
776 {
777         int printed = 0, flags = arg->val;
778
779 #define P_FLAG(n) \
780         if (flags & O_##n) { \
781                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
782                 flags &= ~O_##n; \
783         }
784
785         P_FLAG(CLOEXEC);
786         P_FLAG(NONBLOCK);
787 #undef P_FLAG
788
789         if (flags)
790                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
791
792         return printed;
793 }
794
795 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
796
797 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
798 {
799         int sig = arg->val;
800
801         switch (sig) {
802 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
803         P_SIGNUM(HUP);
804         P_SIGNUM(INT);
805         P_SIGNUM(QUIT);
806         P_SIGNUM(ILL);
807         P_SIGNUM(TRAP);
808         P_SIGNUM(ABRT);
809         P_SIGNUM(BUS);
810         P_SIGNUM(FPE);
811         P_SIGNUM(KILL);
812         P_SIGNUM(USR1);
813         P_SIGNUM(SEGV);
814         P_SIGNUM(USR2);
815         P_SIGNUM(PIPE);
816         P_SIGNUM(ALRM);
817         P_SIGNUM(TERM);
818         P_SIGNUM(STKFLT);
819         P_SIGNUM(CHLD);
820         P_SIGNUM(CONT);
821         P_SIGNUM(STOP);
822         P_SIGNUM(TSTP);
823         P_SIGNUM(TTIN);
824         P_SIGNUM(TTOU);
825         P_SIGNUM(URG);
826         P_SIGNUM(XCPU);
827         P_SIGNUM(XFSZ);
828         P_SIGNUM(VTALRM);
829         P_SIGNUM(PROF);
830         P_SIGNUM(WINCH);
831         P_SIGNUM(IO);
832         P_SIGNUM(PWR);
833         P_SIGNUM(SYS);
834         default: break;
835         }
836
837         return scnprintf(bf, size, "%#x", sig);
838 }
839
840 #define SCA_SIGNUM syscall_arg__scnprintf_signum
841
842 #define TCGETS          0x5401
843
844 static const char *tioctls[] = {
845         "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
846         "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
847         "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
848         "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
849         "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
850         "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
851         "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
852         "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
853         "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
854         "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
855         "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
856         [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
857         "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
858         "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
859         "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
860 };
861
862 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
863
864 #define STRARRAY(arg, name, array) \
865           .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
866           .arg_parm      = { [arg] = &strarray__##array, }
867
868 static struct syscall_fmt {
869         const char *name;
870         const char *alias;
871         size_t     (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
872         void       *arg_parm[6];
873         bool       errmsg;
874         bool       timeout;
875         bool       hexret;
876 } syscall_fmts[] = {
877         { .name     = "access",     .errmsg = true,
878           .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, },
879         { .name     = "arch_prctl", .errmsg = true, .alias = "prctl", },
880         { .name     = "brk",        .hexret = true,
881           .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
882         { .name     = "clock_gettime",  .errmsg = true, STRARRAY(0, clk_id, clockid), },
883         { .name     = "close",      .errmsg = true,
884           .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, }, 
885         { .name     = "connect",    .errmsg = true, },
886         { .name     = "dup",        .errmsg = true,
887           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
888         { .name     = "dup2",       .errmsg = true,
889           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
890         { .name     = "dup3",       .errmsg = true,
891           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
892         { .name     = "epoll_ctl",  .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
893         { .name     = "eventfd2",   .errmsg = true,
894           .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
895         { .name     = "faccessat",  .errmsg = true,
896           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
897         { .name     = "fadvise64",  .errmsg = true,
898           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
899         { .name     = "fallocate",  .errmsg = true,
900           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
901         { .name     = "fchdir",     .errmsg = true,
902           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
903         { .name     = "fchmod",     .errmsg = true,
904           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
905         { .name     = "fchmodat",   .errmsg = true,
906           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
907         { .name     = "fchown",     .errmsg = true,
908           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
909         { .name     = "fchownat",   .errmsg = true,
910           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
911         { .name     = "fcntl",      .errmsg = true,
912           .arg_scnprintf = { [0] = SCA_FD, /* fd */
913                              [1] = SCA_STRARRAY, /* cmd */ },
914           .arg_parm      = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
915         { .name     = "fdatasync",  .errmsg = true,
916           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
917         { .name     = "flock",      .errmsg = true,
918           .arg_scnprintf = { [0] = SCA_FD, /* fd */
919                              [1] = SCA_FLOCK, /* cmd */ }, },
920         { .name     = "fsetxattr",  .errmsg = true,
921           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
922         { .name     = "fstat",      .errmsg = true, .alias = "newfstat",
923           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
924         { .name     = "fstatat",    .errmsg = true, .alias = "newfstatat",
925           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
926         { .name     = "fstatfs",    .errmsg = true,
927           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
928         { .name     = "fsync",    .errmsg = true,
929           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
930         { .name     = "ftruncate", .errmsg = true,
931           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
932         { .name     = "futex",      .errmsg = true,
933           .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
934         { .name     = "futimesat", .errmsg = true,
935           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
936         { .name     = "getdents",   .errmsg = true,
937           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
938         { .name     = "getdents64", .errmsg = true,
939           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
940         { .name     = "getitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
941         { .name     = "getrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
942         { .name     = "ioctl",      .errmsg = true,
943           .arg_scnprintf = { [0] = SCA_FD, /* fd */ 
944                              [1] = SCA_STRHEXARRAY, /* cmd */
945                              [2] = SCA_HEX, /* arg */ },
946           .arg_parm      = { [1] = &strarray__tioctls, /* cmd */ }, },
947         { .name     = "kill",       .errmsg = true,
948           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
949         { .name     = "linkat",     .errmsg = true,
950           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
951         { .name     = "lseek",      .errmsg = true,
952           .arg_scnprintf = { [0] = SCA_FD, /* fd */
953                              [2] = SCA_STRARRAY, /* whence */ },
954           .arg_parm      = { [2] = &strarray__whences, /* whence */ }, },
955         { .name     = "lstat",      .errmsg = true, .alias = "newlstat", },
956         { .name     = "madvise",    .errmsg = true,
957           .arg_scnprintf = { [0] = SCA_HEX,      /* start */
958                              [2] = SCA_MADV_BHV, /* behavior */ }, },
959         { .name     = "mkdirat",    .errmsg = true,
960           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
961         { .name     = "mknodat",    .errmsg = true,
962           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
963         { .name     = "mlock",      .errmsg = true,
964           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
965         { .name     = "mlockall",   .errmsg = true,
966           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
967         { .name     = "mmap",       .hexret = true,
968           .arg_scnprintf = { [0] = SCA_HEX,       /* addr */
969                              [2] = SCA_MMAP_PROT, /* prot */
970                              [3] = SCA_MMAP_FLAGS, /* flags */
971                              [4] = SCA_FD,        /* fd */ }, },
972         { .name     = "mprotect",   .errmsg = true,
973           .arg_scnprintf = { [0] = SCA_HEX, /* start */
974                              [2] = SCA_MMAP_PROT, /* prot */ }, },
975         { .name     = "mremap",     .hexret = true,
976           .arg_scnprintf = { [0] = SCA_HEX, /* addr */
977                              [4] = SCA_HEX, /* new_addr */ }, },
978         { .name     = "munlock",    .errmsg = true,
979           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
980         { .name     = "munmap",     .errmsg = true,
981           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
982         { .name     = "name_to_handle_at", .errmsg = true,
983           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
984         { .name     = "newfstatat", .errmsg = true,
985           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
986         { .name     = "open",       .errmsg = true,
987           .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
988         { .name     = "open_by_handle_at", .errmsg = true,
989           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
990                              [2] = SCA_OPEN_FLAGS, /* flags */ }, },
991         { .name     = "openat",     .errmsg = true,
992           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
993                              [2] = SCA_OPEN_FLAGS, /* flags */ }, },
994         { .name     = "pipe2",      .errmsg = true,
995           .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
996         { .name     = "poll",       .errmsg = true, .timeout = true, },
997         { .name     = "ppoll",      .errmsg = true, .timeout = true, },
998         { .name     = "pread",      .errmsg = true, .alias = "pread64",
999           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1000         { .name     = "preadv",     .errmsg = true, .alias = "pread",
1001           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1002         { .name     = "prlimit64",  .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
1003         { .name     = "pwrite",     .errmsg = true, .alias = "pwrite64",
1004           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1005         { .name     = "pwritev",    .errmsg = true,
1006           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1007         { .name     = "read",       .errmsg = true,
1008           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1009         { .name     = "readlinkat", .errmsg = true,
1010           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
1011         { .name     = "readv",      .errmsg = true,
1012           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1013         { .name     = "recvfrom",   .errmsg = true,
1014           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1015         { .name     = "recvmmsg",   .errmsg = true,
1016           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1017         { .name     = "recvmsg",    .errmsg = true,
1018           .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1019         { .name     = "renameat",   .errmsg = true,
1020           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
1021         { .name     = "rt_sigaction", .errmsg = true,
1022           .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
1023         { .name     = "rt_sigprocmask",  .errmsg = true, STRARRAY(0, how, sighow), },
1024         { .name     = "rt_sigqueueinfo", .errmsg = true,
1025           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1026         { .name     = "rt_tgsigqueueinfo", .errmsg = true,
1027           .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1028         { .name     = "select",     .errmsg = true, .timeout = true, },
1029         { .name     = "sendmmsg",    .errmsg = true,
1030           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1031         { .name     = "sendmsg",    .errmsg = true,
1032           .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1033         { .name     = "sendto",     .errmsg = true,
1034           .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1035         { .name     = "setitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
1036         { .name     = "setrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1037         { .name     = "shutdown",   .errmsg = true,
1038           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1039         { .name     = "socket",     .errmsg = true,
1040           .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1041                              [1] = SCA_SK_TYPE, /* type */ },
1042           .arg_parm      = { [0] = &strarray__socket_families, /* family */ }, },
1043         { .name     = "socketpair", .errmsg = true,
1044           .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1045                              [1] = SCA_SK_TYPE, /* type */ },
1046           .arg_parm      = { [0] = &strarray__socket_families, /* family */ }, },
1047         { .name     = "stat",       .errmsg = true, .alias = "newstat", },
1048         { .name     = "symlinkat",  .errmsg = true,
1049           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
1050         { .name     = "tgkill",     .errmsg = true,
1051           .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1052         { .name     = "tkill",      .errmsg = true,
1053           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1054         { .name     = "uname",      .errmsg = true, .alias = "newuname", },
1055         { .name     = "unlinkat",   .errmsg = true,
1056           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1057         { .name     = "utimensat",  .errmsg = true,
1058           .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ }, },
1059         { .name     = "write",      .errmsg = true,
1060           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1061         { .name     = "writev",     .errmsg = true,
1062           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
1063 };
1064
1065 static int syscall_fmt__cmp(const void *name, const void *fmtp)
1066 {
1067         const struct syscall_fmt *fmt = fmtp;
1068         return strcmp(name, fmt->name);
1069 }
1070
1071 static struct syscall_fmt *syscall_fmt__find(const char *name)
1072 {
1073         const int nmemb = ARRAY_SIZE(syscall_fmts);
1074         return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
1075 }
1076
1077 struct syscall {
1078         struct event_format *tp_format;
1079         const char          *name;
1080         bool                filtered;
1081         struct syscall_fmt  *fmt;
1082         size_t              (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
1083         void                **arg_parm;
1084 };
1085
1086 static size_t fprintf_duration(unsigned long t, FILE *fp)
1087 {
1088         double duration = (double)t / NSEC_PER_MSEC;
1089         size_t printed = fprintf(fp, "(");
1090
1091         if (duration >= 1.0)
1092                 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
1093         else if (duration >= 0.01)
1094                 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
1095         else
1096                 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
1097         return printed + fprintf(fp, "): ");
1098 }
1099
1100 struct thread_trace {
1101         u64               entry_time;
1102         u64               exit_time;
1103         bool              entry_pending;
1104         unsigned long     nr_events;
1105         char              *entry_str;
1106         double            runtime_ms;
1107         struct {
1108                 int       max;
1109                 char      **table;
1110         } paths;
1111
1112         struct intlist *syscall_stats;
1113 };
1114
1115 static struct thread_trace *thread_trace__new(void)
1116 {
1117         struct thread_trace *ttrace =  zalloc(sizeof(struct thread_trace));
1118
1119         if (ttrace)
1120                 ttrace->paths.max = -1;
1121
1122         ttrace->syscall_stats = intlist__new(NULL);
1123
1124         return ttrace;
1125 }
1126
1127 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
1128 {
1129         struct thread_trace *ttrace;
1130
1131         if (thread == NULL)
1132                 goto fail;
1133
1134         if (thread->priv == NULL)
1135                 thread->priv = thread_trace__new();
1136                 
1137         if (thread->priv == NULL)
1138                 goto fail;
1139
1140         ttrace = thread->priv;
1141         ++ttrace->nr_events;
1142
1143         return ttrace;
1144 fail:
1145         color_fprintf(fp, PERF_COLOR_RED,
1146                       "WARNING: not enough memory, dropping samples!\n");
1147         return NULL;
1148 }
1149
1150 struct trace {
1151         struct perf_tool        tool;
1152         struct {
1153                 int             machine;
1154                 int             open_id;
1155         }                       audit;
1156         struct {
1157                 int             max;
1158                 struct syscall  *table;
1159         } syscalls;
1160         struct record_opts      opts;
1161         struct machine          *host;
1162         u64                     base_time;
1163         FILE                    *output;
1164         unsigned long           nr_events;
1165         struct strlist          *ev_qualifier;
1166         const char              *last_vfs_getname;
1167         struct intlist          *tid_list;
1168         struct intlist          *pid_list;
1169         double                  duration_filter;
1170         double                  runtime_ms;
1171         struct {
1172                 u64             vfs_getname,
1173                                 proc_getname;
1174         } stats;
1175         bool                    not_ev_qualifier;
1176         bool                    live;
1177         bool                    full_time;
1178         bool                    sched;
1179         bool                    multiple_threads;
1180         bool                    summary;
1181         bool                    summary_only;
1182         bool                    show_comm;
1183         bool                    show_tool_stats;
1184 };
1185
1186 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
1187 {
1188         struct thread_trace *ttrace = thread->priv;
1189
1190         if (fd > ttrace->paths.max) {
1191                 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
1192
1193                 if (npath == NULL)
1194                         return -1;
1195
1196                 if (ttrace->paths.max != -1) {
1197                         memset(npath + ttrace->paths.max + 1, 0,
1198                                (fd - ttrace->paths.max) * sizeof(char *));
1199                 } else {
1200                         memset(npath, 0, (fd + 1) * sizeof(char *));
1201                 }
1202
1203                 ttrace->paths.table = npath;
1204                 ttrace->paths.max   = fd;
1205         }
1206
1207         ttrace->paths.table[fd] = strdup(pathname);
1208
1209         return ttrace->paths.table[fd] != NULL ? 0 : -1;
1210 }
1211
1212 static int thread__read_fd_path(struct thread *thread, int fd)
1213 {
1214         char linkname[PATH_MAX], pathname[PATH_MAX];
1215         struct stat st;
1216         int ret;
1217
1218         if (thread->pid_ == thread->tid) {
1219                 scnprintf(linkname, sizeof(linkname),
1220                           "/proc/%d/fd/%d", thread->pid_, fd);
1221         } else {
1222                 scnprintf(linkname, sizeof(linkname),
1223                           "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1224         }
1225
1226         if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1227                 return -1;
1228
1229         ret = readlink(linkname, pathname, sizeof(pathname));
1230
1231         if (ret < 0 || ret > st.st_size)
1232                 return -1;
1233
1234         pathname[ret] = '\0';
1235         return trace__set_fd_pathname(thread, fd, pathname);
1236 }
1237
1238 static const char *thread__fd_path(struct thread *thread, int fd,
1239                                    struct trace *trace)
1240 {
1241         struct thread_trace *ttrace = thread->priv;
1242
1243         if (ttrace == NULL)
1244                 return NULL;
1245
1246         if (fd < 0)
1247                 return NULL;
1248
1249         if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL))
1250                 if (!trace->live)
1251                         return NULL;
1252                 ++trace->stats.proc_getname;
1253                 if (thread__read_fd_path(thread, fd)) {
1254                         return NULL;
1255         }
1256
1257         return ttrace->paths.table[fd];
1258 }
1259
1260 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
1261                                         struct syscall_arg *arg)
1262 {
1263         int fd = arg->val;
1264         size_t printed = scnprintf(bf, size, "%d", fd);
1265         const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1266
1267         if (path)
1268                 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1269
1270         return printed;
1271 }
1272
1273 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1274                                               struct syscall_arg *arg)
1275 {
1276         int fd = arg->val;
1277         size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1278         struct thread_trace *ttrace = arg->thread->priv;
1279
1280         if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
1281                 zfree(&ttrace->paths.table[fd]);
1282
1283         return printed;
1284 }
1285
1286 static bool trace__filter_duration(struct trace *trace, double t)
1287 {
1288         return t < (trace->duration_filter * NSEC_PER_MSEC);
1289 }
1290
1291 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1292 {
1293         double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1294
1295         return fprintf(fp, "%10.3f ", ts);
1296 }
1297
1298 static bool done = false;
1299 static bool interrupted = false;
1300
1301 static void sig_handler(int sig)
1302 {
1303         done = true;
1304         interrupted = sig == SIGINT;
1305 }
1306
1307 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1308                                         u64 duration, u64 tstamp, FILE *fp)
1309 {
1310         size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1311         printed += fprintf_duration(duration, fp);
1312
1313         if (trace->multiple_threads) {
1314                 if (trace->show_comm)
1315                         printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1316                 printed += fprintf(fp, "%d ", thread->tid);
1317         }
1318
1319         return printed;
1320 }
1321
1322 static int trace__process_event(struct trace *trace, struct machine *machine,
1323                                 union perf_event *event, struct perf_sample *sample)
1324 {
1325         int ret = 0;
1326
1327         switch (event->header.type) {
1328         case PERF_RECORD_LOST:
1329                 color_fprintf(trace->output, PERF_COLOR_RED,
1330                               "LOST %" PRIu64 " events!\n", event->lost.lost);
1331                 ret = machine__process_lost_event(machine, event, sample);
1332         default:
1333                 ret = machine__process_event(machine, event, sample);
1334                 break;
1335         }
1336
1337         return ret;
1338 }
1339
1340 static int trace__tool_process(struct perf_tool *tool,
1341                                union perf_event *event,
1342                                struct perf_sample *sample,
1343                                struct machine *machine)
1344 {
1345         struct trace *trace = container_of(tool, struct trace, tool);
1346         return trace__process_event(trace, machine, event, sample);
1347 }
1348
1349 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1350 {
1351         int err = symbol__init();
1352
1353         if (err)
1354                 return err;
1355
1356         trace->host = machine__new_host();
1357         if (trace->host == NULL)
1358                 return -ENOMEM;
1359
1360         err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1361                                             evlist->threads, trace__tool_process, false);
1362         if (err)
1363                 symbol__exit();
1364
1365         return err;
1366 }
1367
1368 static int syscall__set_arg_fmts(struct syscall *sc)
1369 {
1370         struct format_field *field;
1371         int idx = 0;
1372
1373         sc->arg_scnprintf = calloc(sc->tp_format->format.nr_fields - 1, sizeof(void *));
1374         if (sc->arg_scnprintf == NULL)
1375                 return -1;
1376
1377         if (sc->fmt)
1378                 sc->arg_parm = sc->fmt->arg_parm;
1379
1380         for (field = sc->tp_format->format.fields->next; field; field = field->next) {
1381                 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1382                         sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1383                 else if (field->flags & FIELD_IS_POINTER)
1384                         sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1385                 ++idx;
1386         }
1387
1388         return 0;
1389 }
1390
1391 static int trace__read_syscall_info(struct trace *trace, int id)
1392 {
1393         char tp_name[128];
1394         struct syscall *sc;
1395         const char *name = audit_syscall_to_name(id, trace->audit.machine);
1396
1397         if (name == NULL)
1398                 return -1;
1399
1400         if (id > trace->syscalls.max) {
1401                 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1402
1403                 if (nsyscalls == NULL)
1404                         return -1;
1405
1406                 if (trace->syscalls.max != -1) {
1407                         memset(nsyscalls + trace->syscalls.max + 1, 0,
1408                                (id - trace->syscalls.max) * sizeof(*sc));
1409                 } else {
1410                         memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1411                 }
1412
1413                 trace->syscalls.table = nsyscalls;
1414                 trace->syscalls.max   = id;
1415         }
1416
1417         sc = trace->syscalls.table + id;
1418         sc->name = name;
1419
1420         if (trace->ev_qualifier) {
1421                 bool in = strlist__find(trace->ev_qualifier, name) != NULL;
1422
1423                 if (!(in ^ trace->not_ev_qualifier)) {
1424                         sc->filtered = true;
1425                         /*
1426                          * No need to do read tracepoint information since this will be
1427                          * filtered out.
1428                          */
1429                         return 0;
1430                 }
1431         }
1432
1433         sc->fmt  = syscall_fmt__find(sc->name);
1434
1435         snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1436         sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1437
1438         if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) {
1439                 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1440                 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1441         }
1442
1443         if (sc->tp_format == NULL)
1444                 return -1;
1445
1446         return syscall__set_arg_fmts(sc);
1447 }
1448
1449 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1450                                       unsigned long *args, struct trace *trace,
1451                                       struct thread *thread)
1452 {
1453         size_t printed = 0;
1454
1455         if (sc->tp_format != NULL) {
1456                 struct format_field *field;
1457                 u8 bit = 1;
1458                 struct syscall_arg arg = {
1459                         .idx    = 0,
1460                         .mask   = 0,
1461                         .trace  = trace,
1462                         .thread = thread,
1463                 };
1464
1465                 for (field = sc->tp_format->format.fields->next; field;
1466                      field = field->next, ++arg.idx, bit <<= 1) {
1467                         if (arg.mask & bit)
1468                                 continue;
1469                         /*
1470                          * Suppress this argument if its value is zero and
1471                          * and we don't have a string associated in an
1472                          * strarray for it.
1473                          */
1474                         if (args[arg.idx] == 0 &&
1475                             !(sc->arg_scnprintf &&
1476                               sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1477                               sc->arg_parm[arg.idx]))
1478                                 continue;
1479
1480                         printed += scnprintf(bf + printed, size - printed,
1481                                              "%s%s: ", printed ? ", " : "", field->name);
1482                         if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1483                                 arg.val = args[arg.idx];
1484                                 if (sc->arg_parm)
1485                                         arg.parm = sc->arg_parm[arg.idx];
1486                                 printed += sc->arg_scnprintf[arg.idx](bf + printed,
1487                                                                       size - printed, &arg);
1488                         } else {
1489                                 printed += scnprintf(bf + printed, size - printed,
1490                                                      "%ld", args[arg.idx]);
1491                         }
1492                 }
1493         } else {
1494                 int i = 0;
1495
1496                 while (i < 6) {
1497                         printed += scnprintf(bf + printed, size - printed,
1498                                              "%sarg%d: %ld",
1499                                              printed ? ", " : "", i, args[i]);
1500                         ++i;
1501                 }
1502         }
1503
1504         return printed;
1505 }
1506
1507 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1508                                   struct perf_sample *sample);
1509
1510 static struct syscall *trace__syscall_info(struct trace *trace,
1511                                            struct perf_evsel *evsel, int id)
1512 {
1513
1514         if (id < 0) {
1515
1516                 /*
1517                  * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1518                  * before that, leaving at a higher verbosity level till that is
1519                  * explained. Reproduced with plain ftrace with:
1520                  *
1521                  * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1522                  * grep "NR -1 " /t/trace_pipe
1523                  *
1524                  * After generating some load on the machine.
1525                  */
1526                 if (verbose > 1) {
1527                         static u64 n;
1528                         fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1529                                 id, perf_evsel__name(evsel), ++n);
1530                 }
1531                 return NULL;
1532         }
1533
1534         if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1535             trace__read_syscall_info(trace, id))
1536                 goto out_cant_read;
1537
1538         if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1539                 goto out_cant_read;
1540
1541         return &trace->syscalls.table[id];
1542
1543 out_cant_read:
1544         if (verbose) {
1545                 fprintf(trace->output, "Problems reading syscall %d", id);
1546                 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1547                         fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1548                 fputs(" information\n", trace->output);
1549         }
1550         return NULL;
1551 }
1552
1553 static void thread__update_stats(struct thread_trace *ttrace,
1554                                  int id, struct perf_sample *sample)
1555 {
1556         struct int_node *inode;
1557         struct stats *stats;
1558         u64 duration = 0;
1559
1560         inode = intlist__findnew(ttrace->syscall_stats, id);
1561         if (inode == NULL)
1562                 return;
1563
1564         stats = inode->priv;
1565         if (stats == NULL) {
1566                 stats = malloc(sizeof(struct stats));
1567                 if (stats == NULL)
1568                         return;
1569                 init_stats(stats);
1570                 inode->priv = stats;
1571         }
1572
1573         if (ttrace->entry_time && sample->time > ttrace->entry_time)
1574                 duration = sample->time - ttrace->entry_time;
1575
1576         update_stats(stats, duration);
1577 }
1578
1579 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1580                             struct perf_sample *sample)
1581 {
1582         char *msg;
1583         void *args;
1584         size_t printed = 0;
1585         struct thread *thread;
1586         int id = perf_evsel__sc_tp_uint(evsel, id, sample);
1587         struct syscall *sc = trace__syscall_info(trace, evsel, id);
1588         struct thread_trace *ttrace;
1589
1590         if (sc == NULL)
1591                 return -1;
1592
1593         if (sc->filtered)
1594                 return 0;
1595
1596         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1597         ttrace = thread__trace(thread, trace->output);
1598         if (ttrace == NULL)
1599                 return -1;
1600
1601         args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1602         ttrace = thread->priv;
1603
1604         if (ttrace->entry_str == NULL) {
1605                 ttrace->entry_str = malloc(1024);
1606                 if (!ttrace->entry_str)
1607                         return -1;
1608         }
1609
1610         ttrace->entry_time = sample->time;
1611         msg = ttrace->entry_str;
1612         printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name);
1613
1614         printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed,
1615                                            args, trace, thread);
1616
1617         if (!strcmp(sc->name, "exit_group") || !strcmp(sc->name, "exit")) {
1618                 if (!trace->duration_filter && !trace->summary_only) {
1619                         trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1620                         fprintf(trace->output, "%-70s\n", ttrace->entry_str);
1621                 }
1622         } else
1623                 ttrace->entry_pending = true;
1624
1625         return 0;
1626 }
1627
1628 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1629                            struct perf_sample *sample)
1630 {
1631         int ret;
1632         u64 duration = 0;
1633         struct thread *thread;
1634         int id = perf_evsel__sc_tp_uint(evsel, id, sample);
1635         struct syscall *sc = trace__syscall_info(trace, evsel, id);
1636         struct thread_trace *ttrace;
1637
1638         if (sc == NULL)
1639                 return -1;
1640
1641         if (sc->filtered)
1642                 return 0;
1643
1644         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1645         ttrace = thread__trace(thread, trace->output);
1646         if (ttrace == NULL)
1647                 return -1;
1648
1649         if (trace->summary)
1650                 thread__update_stats(ttrace, id, sample);
1651
1652         ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
1653
1654         if (id == trace->audit.open_id && ret >= 0 && trace->last_vfs_getname) {
1655                 trace__set_fd_pathname(thread, ret, trace->last_vfs_getname);
1656                 trace->last_vfs_getname = NULL;
1657                 ++trace->stats.vfs_getname;
1658         }
1659
1660         ttrace = thread->priv;
1661
1662         ttrace->exit_time = sample->time;
1663
1664         if (ttrace->entry_time) {
1665                 duration = sample->time - ttrace->entry_time;
1666                 if (trace__filter_duration(trace, duration))
1667                         goto out;
1668         } else if (trace->duration_filter)
1669                 goto out;
1670
1671         if (trace->summary_only)
1672                 goto out;
1673
1674         trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
1675
1676         if (ttrace->entry_pending) {
1677                 fprintf(trace->output, "%-70s", ttrace->entry_str);
1678         } else {
1679                 fprintf(trace->output, " ... [");
1680                 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1681                 fprintf(trace->output, "]: %s()", sc->name);
1682         }
1683
1684         if (sc->fmt == NULL) {
1685 signed_print:
1686                 fprintf(trace->output, ") = %d", ret);
1687         } else if (ret < 0 && sc->fmt->errmsg) {
1688                 char bf[256];
1689                 const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
1690                            *e = audit_errno_to_name(-ret);
1691
1692                 fprintf(trace->output, ") = -1 %s %s", e, emsg);
1693         } else if (ret == 0 && sc->fmt->timeout)
1694                 fprintf(trace->output, ") = 0 Timeout");
1695         else if (sc->fmt->hexret)
1696                 fprintf(trace->output, ") = %#x", ret);
1697         else
1698                 goto signed_print;
1699
1700         fputc('\n', trace->output);
1701 out:
1702         ttrace->entry_pending = false;
1703
1704         return 0;
1705 }
1706
1707 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
1708                               struct perf_sample *sample)
1709 {
1710         trace->last_vfs_getname = perf_evsel__rawptr(evsel, sample, "pathname");
1711         return 0;
1712 }
1713
1714 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
1715                                      struct perf_sample *sample)
1716 {
1717         u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1718         double runtime_ms = (double)runtime / NSEC_PER_MSEC;
1719         struct thread *thread = machine__findnew_thread(trace->host,
1720                                                         sample->pid,
1721                                                         sample->tid);
1722         struct thread_trace *ttrace = thread__trace(thread, trace->output);
1723
1724         if (ttrace == NULL)
1725                 goto out_dump;
1726
1727         ttrace->runtime_ms += runtime_ms;
1728         trace->runtime_ms += runtime_ms;
1729         return 0;
1730
1731 out_dump:
1732         fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1733                evsel->name,
1734                perf_evsel__strval(evsel, sample, "comm"),
1735                (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1736                runtime,
1737                perf_evsel__intval(evsel, sample, "vruntime"));
1738         return 0;
1739 }
1740
1741 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
1742 {
1743         if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
1744             (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
1745                 return false;
1746
1747         if (trace->pid_list || trace->tid_list)
1748                 return true;
1749
1750         return false;
1751 }
1752
1753 static int trace__process_sample(struct perf_tool *tool,
1754                                  union perf_event *event __maybe_unused,
1755                                  struct perf_sample *sample,
1756                                  struct perf_evsel *evsel,
1757                                  struct machine *machine __maybe_unused)
1758 {
1759         struct trace *trace = container_of(tool, struct trace, tool);
1760         int err = 0;
1761
1762         tracepoint_handler handler = evsel->handler;
1763
1764         if (skip_sample(trace, sample))
1765                 return 0;
1766
1767         if (!trace->full_time && trace->base_time == 0)
1768                 trace->base_time = sample->time;
1769
1770         if (handler) {
1771                 ++trace->nr_events;
1772                 handler(trace, evsel, sample);
1773         }
1774
1775         return err;
1776 }
1777
1778 static int parse_target_str(struct trace *trace)
1779 {
1780         if (trace->opts.target.pid) {
1781                 trace->pid_list = intlist__new(trace->opts.target.pid);
1782                 if (trace->pid_list == NULL) {
1783                         pr_err("Error parsing process id string\n");
1784                         return -EINVAL;
1785                 }
1786         }
1787
1788         if (trace->opts.target.tid) {
1789                 trace->tid_list = intlist__new(trace->opts.target.tid);
1790                 if (trace->tid_list == NULL) {
1791                         pr_err("Error parsing thread id string\n");
1792                         return -EINVAL;
1793                 }
1794         }
1795
1796         return 0;
1797 }
1798
1799 static int trace__record(int argc, const char **argv)
1800 {
1801         unsigned int rec_argc, i, j;
1802         const char **rec_argv;
1803         const char * const record_args[] = {
1804                 "record",
1805                 "-R",
1806                 "-m", "1024",
1807                 "-c", "1",
1808                 "-e",
1809         };
1810
1811         /* +1 is for the event string below */
1812         rec_argc = ARRAY_SIZE(record_args) + 1 + argc;
1813         rec_argv = calloc(rec_argc + 1, sizeof(char *));
1814
1815         if (rec_argv == NULL)
1816                 return -ENOMEM;
1817
1818         for (i = 0; i < ARRAY_SIZE(record_args); i++)
1819                 rec_argv[i] = record_args[i];
1820
1821         /* event string may be different for older kernels - e.g., RHEL6 */
1822         if (is_valid_tracepoint("raw_syscalls:sys_enter"))
1823                 rec_argv[i] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
1824         else if (is_valid_tracepoint("syscalls:sys_enter"))
1825                 rec_argv[i] = "syscalls:sys_enter,syscalls:sys_exit";
1826         else {
1827                 pr_err("Neither raw_syscalls nor syscalls events exist.\n");
1828                 return -1;
1829         }
1830         i++;
1831
1832         for (j = 0; j < (unsigned int)argc; j++, i++)
1833                 rec_argv[i] = argv[j];
1834
1835         return cmd_record(i, rec_argv, NULL);
1836 }
1837
1838 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
1839
1840 static void perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
1841 {
1842         struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
1843         if (evsel == NULL)
1844                 return;
1845
1846         if (perf_evsel__field(evsel, "pathname") == NULL) {
1847                 perf_evsel__delete(evsel);
1848                 return;
1849         }
1850
1851         evsel->handler = trace__vfs_getname;
1852         perf_evlist__add(evlist, evsel);
1853 }
1854
1855 static int trace__run(struct trace *trace, int argc, const char **argv)
1856 {
1857         struct perf_evlist *evlist = perf_evlist__new();
1858         struct perf_evsel *evsel;
1859         int err = -1, i;
1860         unsigned long before;
1861         const bool forks = argc > 0;
1862
1863         trace->live = true;
1864
1865         if (evlist == NULL) {
1866                 fprintf(trace->output, "Not enough memory to run!\n");
1867                 goto out;
1868         }
1869
1870         if (perf_evlist__add_syscall_newtp(evlist, trace__sys_enter, trace__sys_exit))
1871                 goto out_error_tp;
1872
1873         perf_evlist__add_vfs_getname(evlist);
1874
1875         if (trace->sched &&
1876                 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
1877                                 trace__sched_stat_runtime))
1878                 goto out_error_tp;
1879
1880         err = perf_evlist__create_maps(evlist, &trace->opts.target);
1881         if (err < 0) {
1882                 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
1883                 goto out_delete_evlist;
1884         }
1885
1886         err = trace__symbols_init(trace, evlist);
1887         if (err < 0) {
1888                 fprintf(trace->output, "Problems initializing symbol libraries!\n");
1889                 goto out_delete_evlist;
1890         }
1891
1892         perf_evlist__config(evlist, &trace->opts);
1893
1894         signal(SIGCHLD, sig_handler);
1895         signal(SIGINT, sig_handler);
1896
1897         if (forks) {
1898                 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
1899                                                     argv, false, NULL);
1900                 if (err < 0) {
1901                         fprintf(trace->output, "Couldn't run the workload!\n");
1902                         goto out_delete_evlist;
1903                 }
1904         }
1905
1906         err = perf_evlist__open(evlist);
1907         if (err < 0)
1908                 goto out_error_open;
1909
1910         err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
1911         if (err < 0) {
1912                 fprintf(trace->output, "Couldn't mmap the events: %s\n", strerror(errno));
1913                 goto out_delete_evlist;
1914         }
1915
1916         perf_evlist__enable(evlist);
1917
1918         if (forks)
1919                 perf_evlist__start_workload(evlist);
1920
1921         trace->multiple_threads = evlist->threads->map[0] == -1 || evlist->threads->nr > 1;
1922 again:
1923         before = trace->nr_events;
1924
1925         for (i = 0; i < evlist->nr_mmaps; i++) {
1926                 union perf_event *event;
1927
1928                 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
1929                         const u32 type = event->header.type;
1930                         tracepoint_handler handler;
1931                         struct perf_sample sample;
1932
1933                         ++trace->nr_events;
1934
1935                         err = perf_evlist__parse_sample(evlist, event, &sample);
1936                         if (err) {
1937                                 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
1938                                 goto next_event;
1939                         }
1940
1941                         if (!trace->full_time && trace->base_time == 0)
1942                                 trace->base_time = sample.time;
1943
1944                         if (type != PERF_RECORD_SAMPLE) {
1945                                 trace__process_event(trace, trace->host, event, &sample);
1946                                 continue;
1947                         }
1948
1949                         evsel = perf_evlist__id2evsel(evlist, sample.id);
1950                         if (evsel == NULL) {
1951                                 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample.id);
1952                                 goto next_event;
1953                         }
1954
1955                         if (sample.raw_data == NULL) {
1956                                 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
1957                                        perf_evsel__name(evsel), sample.tid,
1958                                        sample.cpu, sample.raw_size);
1959                                 goto next_event;
1960                         }
1961
1962                         handler = evsel->handler;
1963                         handler(trace, evsel, &sample);
1964 next_event:
1965                         perf_evlist__mmap_consume(evlist, i);
1966
1967                         if (interrupted)
1968                                 goto out_disable;
1969                 }
1970         }
1971
1972         if (trace->nr_events == before) {
1973                 int timeout = done ? 100 : -1;
1974
1975                 if (poll(evlist->pollfd, evlist->nr_fds, timeout) > 0)
1976                         goto again;
1977         } else {
1978                 goto again;
1979         }
1980
1981 out_disable:
1982         perf_evlist__disable(evlist);
1983
1984         if (!err) {
1985                 if (trace->summary)
1986                         trace__fprintf_thread_summary(trace, trace->output);
1987
1988                 if (trace->show_tool_stats) {
1989                         fprintf(trace->output, "Stats:\n "
1990                                                " vfs_getname : %" PRIu64 "\n"
1991                                                " proc_getname: %" PRIu64 "\n",
1992                                 trace->stats.vfs_getname,
1993                                 trace->stats.proc_getname);
1994                 }
1995         }
1996
1997 out_delete_evlist:
1998         perf_evlist__delete(evlist);
1999 out:
2000         trace->live = false;
2001         return err;
2002 {
2003         char errbuf[BUFSIZ];
2004
2005 out_error_tp:
2006         perf_evlist__strerror_tp(evlist, errno, errbuf, sizeof(errbuf));
2007         goto out_error;
2008
2009 out_error_open:
2010         perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2011
2012 out_error:
2013         fprintf(trace->output, "%s\n", errbuf);
2014         goto out_delete_evlist;
2015 }
2016 }
2017
2018 static int trace__replay(struct trace *trace)
2019 {
2020         const struct perf_evsel_str_handler handlers[] = {
2021                 { "probe:vfs_getname",       trace__vfs_getname, },
2022         };
2023         struct perf_data_file file = {
2024                 .path  = input_name,
2025                 .mode  = PERF_DATA_MODE_READ,
2026         };
2027         struct perf_session *session;
2028         struct perf_evsel *evsel;
2029         int err = -1;
2030
2031         trace->tool.sample        = trace__process_sample;
2032         trace->tool.mmap          = perf_event__process_mmap;
2033         trace->tool.mmap2         = perf_event__process_mmap2;
2034         trace->tool.comm          = perf_event__process_comm;
2035         trace->tool.exit          = perf_event__process_exit;
2036         trace->tool.fork          = perf_event__process_fork;
2037         trace->tool.attr          = perf_event__process_attr;
2038         trace->tool.tracing_data = perf_event__process_tracing_data;
2039         trace->tool.build_id      = perf_event__process_build_id;
2040
2041         trace->tool.ordered_samples = true;
2042         trace->tool.ordering_requires_timestamps = true;
2043
2044         /* add tid to output */
2045         trace->multiple_threads = true;
2046
2047         if (symbol__init() < 0)
2048                 return -1;
2049
2050         session = perf_session__new(&file, false, &trace->tool);
2051         if (session == NULL)
2052                 return -ENOMEM;
2053
2054         trace->host = &session->machines.host;
2055
2056         err = perf_session__set_tracepoints_handlers(session, handlers);
2057         if (err)
2058                 goto out;
2059
2060         evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2061                                                      "raw_syscalls:sys_enter");
2062         /* older kernels have syscalls tp versus raw_syscalls */
2063         if (evsel == NULL)
2064                 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2065                                                              "syscalls:sys_enter");
2066         if (evsel == NULL) {
2067                 pr_err("Data file does not have raw_syscalls:sys_enter event\n");
2068                 goto out;
2069         }
2070
2071         if (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2072             perf_evsel__init_sc_tp_ptr_field(evsel, args)) {
2073                 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2074                 goto out;
2075         }
2076
2077         evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2078                                                      "raw_syscalls:sys_exit");
2079         if (evsel == NULL)
2080                 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2081                                                              "syscalls:sys_exit");
2082         if (evsel == NULL) {
2083                 pr_err("Data file does not have raw_syscalls:sys_exit event\n");
2084                 goto out;
2085         }
2086
2087         if (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2088             perf_evsel__init_sc_tp_uint_field(evsel, ret)) {
2089                 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
2090                 goto out;
2091         }
2092
2093         err = parse_target_str(trace);
2094         if (err != 0)
2095                 goto out;
2096
2097         setup_pager();
2098
2099         err = perf_session__process_events(session, &trace->tool);
2100         if (err)
2101                 pr_err("Failed to process events, error %d", err);
2102
2103         else if (trace->summary)
2104                 trace__fprintf_thread_summary(trace, trace->output);
2105
2106 out:
2107         perf_session__delete(session);
2108
2109         return err;
2110 }
2111
2112 static size_t trace__fprintf_threads_header(FILE *fp)
2113 {
2114         size_t printed;
2115
2116         printed  = fprintf(fp, "\n Summary of events:\n\n");
2117
2118         return printed;
2119 }
2120
2121 static size_t thread__dump_stats(struct thread_trace *ttrace,
2122                                  struct trace *trace, FILE *fp)
2123 {
2124         struct stats *stats;
2125         size_t printed = 0;
2126         struct syscall *sc;
2127         struct int_node *inode = intlist__first(ttrace->syscall_stats);
2128
2129         if (inode == NULL)
2130                 return 0;
2131
2132         printed += fprintf(fp, "\n");
2133
2134         printed += fprintf(fp, "   syscall            calls      min       avg       max      stddev\n");
2135         printed += fprintf(fp, "                               (msec)    (msec)    (msec)        (%%)\n");
2136         printed += fprintf(fp, "   --------------- -------- --------- --------- ---------     ------\n");
2137
2138         /* each int_node is a syscall */
2139         while (inode) {
2140                 stats = inode->priv;
2141                 if (stats) {
2142                         double min = (double)(stats->min) / NSEC_PER_MSEC;
2143                         double max = (double)(stats->max) / NSEC_PER_MSEC;
2144                         double avg = avg_stats(stats);
2145                         double pct;
2146                         u64 n = (u64) stats->n;
2147
2148                         pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2149                         avg /= NSEC_PER_MSEC;
2150
2151                         sc = &trace->syscalls.table[inode->i];
2152                         printed += fprintf(fp, "   %-15s", sc->name);
2153                         printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f",
2154                                            n, min, avg);
2155                         printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
2156                 }
2157
2158                 inode = intlist__next(inode);
2159         }
2160
2161         printed += fprintf(fp, "\n\n");
2162
2163         return printed;
2164 }
2165
2166 /* struct used to pass data to per-thread function */
2167 struct summary_data {
2168         FILE *fp;
2169         struct trace *trace;
2170         size_t printed;
2171 };
2172
2173 static int trace__fprintf_one_thread(struct thread *thread, void *priv)
2174 {
2175         struct summary_data *data = priv;
2176         FILE *fp = data->fp;
2177         size_t printed = data->printed;
2178         struct trace *trace = data->trace;
2179         struct thread_trace *ttrace = thread->priv;
2180         double ratio;
2181
2182         if (ttrace == NULL)
2183                 return 0;
2184
2185         ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2186
2187         printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
2188         printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
2189         printed += fprintf(fp, "%.1f%%", ratio);
2190         printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2191         printed += thread__dump_stats(ttrace, trace, fp);
2192
2193         data->printed += printed;
2194
2195         return 0;
2196 }
2197
2198 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2199 {
2200         struct summary_data data = {
2201                 .fp = fp,
2202                 .trace = trace
2203         };
2204         data.printed = trace__fprintf_threads_header(fp);
2205
2206         machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data);
2207
2208         return data.printed;
2209 }
2210
2211 static int trace__set_duration(const struct option *opt, const char *str,
2212                                int unset __maybe_unused)
2213 {
2214         struct trace *trace = opt->value;
2215
2216         trace->duration_filter = atof(str);
2217         return 0;
2218 }
2219
2220 static int trace__open_output(struct trace *trace, const char *filename)
2221 {
2222         struct stat st;
2223
2224         if (!stat(filename, &st) && st.st_size) {
2225                 char oldname[PATH_MAX];
2226
2227                 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2228                 unlink(oldname);
2229                 rename(filename, oldname);
2230         }
2231
2232         trace->output = fopen(filename, "w");
2233
2234         return trace->output == NULL ? -errno : 0;
2235 }
2236
2237 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
2238 {
2239         const char * const trace_usage[] = {
2240                 "perf trace [<options>] [<command>]",
2241                 "perf trace [<options>] -- <command> [<options>]",
2242                 "perf trace record [<options>] [<command>]",
2243                 "perf trace record [<options>] -- <command> [<options>]",
2244                 NULL
2245         };
2246         struct trace trace = {
2247                 .audit = {
2248                         .machine = audit_detect_machine(),
2249                         .open_id = audit_name_to_syscall("open", trace.audit.machine),
2250                 },
2251                 .syscalls = {
2252                         . max = -1,
2253                 },
2254                 .opts = {
2255                         .target = {
2256                                 .uid       = UINT_MAX,
2257                                 .uses_mmap = true,
2258                         },
2259                         .user_freq     = UINT_MAX,
2260                         .user_interval = ULLONG_MAX,
2261                         .no_buffering  = true,
2262                         .mmap_pages    = 1024,
2263                 },
2264                 .output = stdout,
2265                 .show_comm = true,
2266         };
2267         const char *output_name = NULL;
2268         const char *ev_qualifier_str = NULL;
2269         const struct option trace_options[] = {
2270         OPT_BOOLEAN(0, "comm", &trace.show_comm,
2271                     "show the thread COMM next to its id"),
2272         OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
2273         OPT_STRING('e', "expr", &ev_qualifier_str, "expr",
2274                     "list of events to trace"),
2275         OPT_STRING('o', "output", &output_name, "file", "output file name"),
2276         OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
2277         OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
2278                     "trace events on existing process id"),
2279         OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
2280                     "trace events on existing thread id"),
2281         OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
2282                     "system-wide collection from all CPUs"),
2283         OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
2284                     "list of cpus to monitor"),
2285         OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
2286                     "child tasks do not inherit counters"),
2287         OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
2288                      "number of mmap data pages",
2289                      perf_evlist__parse_mmap_pages),
2290         OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
2291                    "user to profile"),
2292         OPT_CALLBACK(0, "duration", &trace, "float",
2293                      "show only events with duration > N.M ms",
2294                      trace__set_duration),
2295         OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
2296         OPT_INCR('v', "verbose", &verbose, "be more verbose"),
2297         OPT_BOOLEAN('T', "time", &trace.full_time,
2298                     "Show full timestamp, not time relative to first start"),
2299         OPT_BOOLEAN('s', "summary", &trace.summary_only,
2300                     "Show only syscall summary with statistics"),
2301         OPT_BOOLEAN('S', "with-summary", &trace.summary,
2302                     "Show all syscalls and summary with statistics"),
2303         OPT_END()
2304         };
2305         int err;
2306         char bf[BUFSIZ];
2307
2308         if ((argc > 1) && (strcmp(argv[1], "record") == 0))
2309                 return trace__record(argc-2, &argv[2]);
2310
2311         argc = parse_options(argc, argv, trace_options, trace_usage, 0);
2312
2313         /* summary_only implies summary option, but don't overwrite summary if set */
2314         if (trace.summary_only)
2315                 trace.summary = trace.summary_only;
2316
2317         if (output_name != NULL) {
2318                 err = trace__open_output(&trace, output_name);
2319                 if (err < 0) {
2320                         perror("failed to create output file");
2321                         goto out;
2322                 }
2323         }
2324
2325         if (ev_qualifier_str != NULL) {
2326                 const char *s = ev_qualifier_str;
2327
2328                 trace.not_ev_qualifier = *s == '!';
2329                 if (trace.not_ev_qualifier)
2330                         ++s;
2331                 trace.ev_qualifier = strlist__new(true, s);
2332                 if (trace.ev_qualifier == NULL) {
2333                         fputs("Not enough memory to parse event qualifier",
2334                               trace.output);
2335                         err = -ENOMEM;
2336                         goto out_close;
2337                 }
2338         }
2339
2340         err = target__validate(&trace.opts.target);
2341         if (err) {
2342                 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2343                 fprintf(trace.output, "%s", bf);
2344                 goto out_close;
2345         }
2346
2347         err = target__parse_uid(&trace.opts.target);
2348         if (err) {
2349                 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2350                 fprintf(trace.output, "%s", bf);
2351                 goto out_close;
2352         }
2353
2354         if (!argc && target__none(&trace.opts.target))
2355                 trace.opts.target.system_wide = true;
2356
2357         if (input_name)
2358                 err = trace__replay(&trace);
2359         else
2360                 err = trace__run(&trace, argc, argv);
2361
2362 out_close:
2363         if (output_name != NULL)
2364                 fclose(trace.output);
2365 out:
2366         return err;
2367 }