1 #include <traceevent/event-parse.h>
3 #include "util/color.h"
4 #include "util/debug.h"
5 #include "util/evlist.h"
6 #include "util/machine.h"
7 #include "util/session.h"
8 #include "util/thread.h"
9 #include "util/parse-options.h"
10 #include "util/strlist.h"
11 #include "util/intlist.h"
12 #include "util/thread_map.h"
13 #include "util/stat.h"
14 #include "trace-event.h"
15 #include "util/parse-events.h"
19 #include <sys/eventfd.h>
21 #include <linux/futex.h>
23 /* For older distros: */
25 # define MAP_STACK 0x20000
29 # define MADV_HWPOISON 100
32 #ifndef MADV_MERGEABLE
33 # define MADV_MERGEABLE 12
36 #ifndef MADV_UNMERGEABLE
37 # define MADV_UNMERGEABLE 13
41 # define EFD_SEMAPHORE 1
47 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
48 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
52 #define TP_UINT_FIELD(bits) \
53 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
55 return *(u##bits *)(sample->raw_data + field->offset); \
63 #define TP_UINT_FIELD__SWAPPED(bits) \
64 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
66 u##bits value = *(u##bits *)(sample->raw_data + field->offset); \
67 return bswap_##bits(value);\
70 TP_UINT_FIELD__SWAPPED(16);
71 TP_UINT_FIELD__SWAPPED(32);
72 TP_UINT_FIELD__SWAPPED(64);
74 static int tp_field__init_uint(struct tp_field *field,
75 struct format_field *format_field,
78 field->offset = format_field->offset;
80 switch (format_field->size) {
82 field->integer = tp_field__u8;
85 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
88 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
91 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
100 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
102 return sample->raw_data + field->offset;
105 static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
107 field->offset = format_field->offset;
108 field->pointer = tp_field__ptr;
115 struct tp_field args, ret;
119 static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
120 struct tp_field *field,
123 struct format_field *format_field = perf_evsel__field(evsel, name);
125 if (format_field == NULL)
128 return tp_field__init_uint(field, format_field, evsel->needs_swap);
131 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \
132 ({ struct syscall_tp *sc = evsel->priv;\
133 perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
135 static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
136 struct tp_field *field,
139 struct format_field *format_field = perf_evsel__field(evsel, name);
141 if (format_field == NULL)
144 return tp_field__init_ptr(field, format_field);
147 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
148 ({ struct syscall_tp *sc = evsel->priv;\
149 perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
151 static void perf_evsel__delete_priv(struct perf_evsel *evsel)
154 perf_evsel__delete(evsel);
157 static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
159 evsel->priv = malloc(sizeof(struct syscall_tp));
160 if (evsel->priv != NULL) {
161 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
164 evsel->handler = handler;
175 static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
177 struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
179 /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
181 evsel = perf_evsel__newtp("syscalls", direction);
184 if (perf_evsel__init_syscall_tp(evsel, handler))
191 perf_evsel__delete_priv(evsel);
195 #define perf_evsel__sc_tp_uint(evsel, name, sample) \
196 ({ struct syscall_tp *fields = evsel->priv; \
197 fields->name.integer(&fields->name, sample); })
199 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \
200 ({ struct syscall_tp *fields = evsel->priv; \
201 fields->name.pointer(&fields->name, sample); })
203 static int perf_evlist__add_syscall_newtp(struct perf_evlist *evlist,
204 void *sys_enter_handler,
205 void *sys_exit_handler)
208 struct perf_evsel *sys_enter, *sys_exit;
210 sys_enter = perf_evsel__syscall_newtp("sys_enter", sys_enter_handler);
211 if (sys_enter == NULL)
214 if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
215 goto out_delete_sys_enter;
217 sys_exit = perf_evsel__syscall_newtp("sys_exit", sys_exit_handler);
218 if (sys_exit == NULL)
219 goto out_delete_sys_enter;
221 if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
222 goto out_delete_sys_exit;
224 perf_evlist__add(evlist, sys_enter);
225 perf_evlist__add(evlist, sys_exit);
232 perf_evsel__delete_priv(sys_exit);
233 out_delete_sys_enter:
234 perf_evsel__delete_priv(sys_enter);
241 struct thread *thread;
251 const char **entries;
254 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
255 .nr_entries = ARRAY_SIZE(array), \
259 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
261 .nr_entries = ARRAY_SIZE(array), \
265 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
267 struct syscall_arg *arg)
269 struct strarray *sa = arg->parm;
270 int idx = arg->val - sa->offset;
272 if (idx < 0 || idx >= sa->nr_entries)
273 return scnprintf(bf, size, intfmt, arg->val);
275 return scnprintf(bf, size, "%s", sa->entries[idx]);
278 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
279 struct syscall_arg *arg)
281 return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
284 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
286 #if defined(__i386__) || defined(__x86_64__)
288 * FIXME: Make this available to all arches as soon as the ioctl beautifier
289 * gets rewritten to support all arches.
291 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
292 struct syscall_arg *arg)
294 return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
297 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
298 #endif /* defined(__i386__) || defined(__x86_64__) */
300 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
301 struct syscall_arg *arg);
303 #define SCA_FD syscall_arg__scnprintf_fd
305 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
306 struct syscall_arg *arg)
311 return scnprintf(bf, size, "CWD");
313 return syscall_arg__scnprintf_fd(bf, size, arg);
316 #define SCA_FDAT syscall_arg__scnprintf_fd_at
318 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
319 struct syscall_arg *arg);
321 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
323 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
324 struct syscall_arg *arg)
326 return scnprintf(bf, size, "%#lx", arg->val);
329 #define SCA_HEX syscall_arg__scnprintf_hex
331 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
332 struct syscall_arg *arg)
334 int printed = 0, prot = arg->val;
336 if (prot == PROT_NONE)
337 return scnprintf(bf, size, "NONE");
338 #define P_MMAP_PROT(n) \
339 if (prot & PROT_##n) { \
340 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
350 P_MMAP_PROT(GROWSDOWN);
351 P_MMAP_PROT(GROWSUP);
355 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
360 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
362 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
363 struct syscall_arg *arg)
365 int printed = 0, flags = arg->val;
367 #define P_MMAP_FLAG(n) \
368 if (flags & MAP_##n) { \
369 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
374 P_MMAP_FLAG(PRIVATE);
378 P_MMAP_FLAG(ANONYMOUS);
379 P_MMAP_FLAG(DENYWRITE);
380 P_MMAP_FLAG(EXECUTABLE);
383 P_MMAP_FLAG(GROWSDOWN);
385 P_MMAP_FLAG(HUGETLB);
388 P_MMAP_FLAG(NONBLOCK);
389 P_MMAP_FLAG(NORESERVE);
390 P_MMAP_FLAG(POPULATE);
392 #ifdef MAP_UNINITIALIZED
393 P_MMAP_FLAG(UNINITIALIZED);
398 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
403 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
405 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
406 struct syscall_arg *arg)
408 int behavior = arg->val;
411 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
414 P_MADV_BHV(SEQUENTIAL);
415 P_MADV_BHV(WILLNEED);
416 P_MADV_BHV(DONTNEED);
418 P_MADV_BHV(DONTFORK);
420 P_MADV_BHV(HWPOISON);
421 #ifdef MADV_SOFT_OFFLINE
422 P_MADV_BHV(SOFT_OFFLINE);
424 P_MADV_BHV(MERGEABLE);
425 P_MADV_BHV(UNMERGEABLE);
427 P_MADV_BHV(HUGEPAGE);
429 #ifdef MADV_NOHUGEPAGE
430 P_MADV_BHV(NOHUGEPAGE);
433 P_MADV_BHV(DONTDUMP);
442 return scnprintf(bf, size, "%#x", behavior);
445 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
447 static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
448 struct syscall_arg *arg)
450 int printed = 0, op = arg->val;
453 return scnprintf(bf, size, "NONE");
455 if ((op & LOCK_##cmd) == LOCK_##cmd) { \
456 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
471 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
476 #define SCA_FLOCK syscall_arg__scnprintf_flock
478 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
480 enum syscall_futex_args {
481 SCF_UADDR = (1 << 0),
484 SCF_TIMEOUT = (1 << 3),
485 SCF_UADDR2 = (1 << 4),
489 int cmd = op & FUTEX_CMD_MASK;
493 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
494 P_FUTEX_OP(WAIT); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
495 P_FUTEX_OP(WAKE); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
496 P_FUTEX_OP(FD); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
497 P_FUTEX_OP(REQUEUE); arg->mask |= SCF_VAL3|SCF_TIMEOUT; break;
498 P_FUTEX_OP(CMP_REQUEUE); arg->mask |= SCF_TIMEOUT; break;
499 P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT; break;
500 P_FUTEX_OP(WAKE_OP); break;
501 P_FUTEX_OP(LOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
502 P_FUTEX_OP(UNLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
503 P_FUTEX_OP(TRYLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
504 P_FUTEX_OP(WAIT_BITSET); arg->mask |= SCF_UADDR2; break;
505 P_FUTEX_OP(WAKE_BITSET); arg->mask |= SCF_UADDR2; break;
506 P_FUTEX_OP(WAIT_REQUEUE_PI); break;
507 default: printed = scnprintf(bf, size, "%#x", cmd); break;
510 if (op & FUTEX_PRIVATE_FLAG)
511 printed += scnprintf(bf + printed, size - printed, "|PRIV");
513 if (op & FUTEX_CLOCK_REALTIME)
514 printed += scnprintf(bf + printed, size - printed, "|CLKRT");
519 #define SCA_FUTEX_OP syscall_arg__scnprintf_futex_op
521 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
522 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
524 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
525 static DEFINE_STRARRAY(itimers);
527 static const char *whences[] = { "SET", "CUR", "END",
535 static DEFINE_STRARRAY(whences);
537 static const char *fcntl_cmds[] = {
538 "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
539 "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
540 "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
543 static DEFINE_STRARRAY(fcntl_cmds);
545 static const char *rlimit_resources[] = {
546 "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
547 "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
550 static DEFINE_STRARRAY(rlimit_resources);
552 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
553 static DEFINE_STRARRAY(sighow);
555 static const char *clockid[] = {
556 "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
557 "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE",
559 static DEFINE_STRARRAY(clockid);
561 static const char *socket_families[] = {
562 "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
563 "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
564 "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
565 "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
566 "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
567 "ALG", "NFC", "VSOCK",
569 static DEFINE_STRARRAY(socket_families);
571 #ifndef SOCK_TYPE_MASK
572 #define SOCK_TYPE_MASK 0xf
575 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
576 struct syscall_arg *arg)
580 flags = type & ~SOCK_TYPE_MASK;
582 type &= SOCK_TYPE_MASK;
584 * Can't use a strarray, MIPS may override for ABI reasons.
587 #define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
592 P_SK_TYPE(SEQPACKET);
597 printed = scnprintf(bf, size, "%#x", type);
600 #define P_SK_FLAG(n) \
601 if (flags & SOCK_##n) { \
602 printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
603 flags &= ~SOCK_##n; \
611 printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
616 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
619 #define MSG_PROBE 0x10
621 #ifndef MSG_WAITFORONE
622 #define MSG_WAITFORONE 0x10000
624 #ifndef MSG_SENDPAGE_NOTLAST
625 #define MSG_SENDPAGE_NOTLAST 0x20000
628 #define MSG_FASTOPEN 0x20000000
631 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
632 struct syscall_arg *arg)
634 int printed = 0, flags = arg->val;
637 return scnprintf(bf, size, "NONE");
638 #define P_MSG_FLAG(n) \
639 if (flags & MSG_##n) { \
640 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
646 P_MSG_FLAG(DONTROUTE);
651 P_MSG_FLAG(DONTWAIT);
658 P_MSG_FLAG(ERRQUEUE);
659 P_MSG_FLAG(NOSIGNAL);
661 P_MSG_FLAG(WAITFORONE);
662 P_MSG_FLAG(SENDPAGE_NOTLAST);
663 P_MSG_FLAG(FASTOPEN);
664 P_MSG_FLAG(CMSG_CLOEXEC);
668 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
673 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
675 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
676 struct syscall_arg *arg)
681 if (mode == F_OK) /* 0 */
682 return scnprintf(bf, size, "F");
684 if (mode & n##_OK) { \
685 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
695 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
700 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
702 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
703 struct syscall_arg *arg)
705 int printed = 0, flags = arg->val;
707 if (!(flags & O_CREAT))
708 arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
711 return scnprintf(bf, size, "RDONLY");
713 if (flags & O_##n) { \
714 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
738 if ((flags & O_SYNC) == O_SYNC)
739 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
751 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
756 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
758 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
759 struct syscall_arg *arg)
761 int printed = 0, flags = arg->val;
764 return scnprintf(bf, size, "NONE");
766 if (flags & EFD_##n) { \
767 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
777 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
782 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
784 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
785 struct syscall_arg *arg)
787 int printed = 0, flags = arg->val;
790 if (flags & O_##n) { \
791 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
800 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
805 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
807 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
812 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
855 return scnprintf(bf, size, "%#x", sig);
858 #define SCA_SIGNUM syscall_arg__scnprintf_signum
860 #if defined(__i386__) || defined(__x86_64__)
862 * FIXME: Make this available to all arches.
864 #define TCGETS 0x5401
866 static const char *tioctls[] = {
867 "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
868 "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
869 "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
870 "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
871 "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
872 "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
873 "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
874 "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
875 "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
876 "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
877 "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
878 [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
879 "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
880 "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
881 "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
884 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
885 #endif /* defined(__i386__) || defined(__x86_64__) */
887 #define STRARRAY(arg, name, array) \
888 .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
889 .arg_parm = { [arg] = &strarray__##array, }
891 static struct syscall_fmt {
894 size_t (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
900 { .name = "access", .errmsg = true,
901 .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, },
902 { .name = "arch_prctl", .errmsg = true, .alias = "prctl", },
903 { .name = "brk", .hexret = true,
904 .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
905 { .name = "clock_gettime", .errmsg = true, STRARRAY(0, clk_id, clockid), },
906 { .name = "close", .errmsg = true,
907 .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, },
908 { .name = "connect", .errmsg = true, },
909 { .name = "dup", .errmsg = true,
910 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
911 { .name = "dup2", .errmsg = true,
912 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
913 { .name = "dup3", .errmsg = true,
914 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
915 { .name = "epoll_ctl", .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
916 { .name = "eventfd2", .errmsg = true,
917 .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
918 { .name = "faccessat", .errmsg = true,
919 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
920 { .name = "fadvise64", .errmsg = true,
921 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
922 { .name = "fallocate", .errmsg = true,
923 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
924 { .name = "fchdir", .errmsg = true,
925 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
926 { .name = "fchmod", .errmsg = true,
927 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
928 { .name = "fchmodat", .errmsg = true,
929 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
930 { .name = "fchown", .errmsg = true,
931 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
932 { .name = "fchownat", .errmsg = true,
933 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
934 { .name = "fcntl", .errmsg = true,
935 .arg_scnprintf = { [0] = SCA_FD, /* fd */
936 [1] = SCA_STRARRAY, /* cmd */ },
937 .arg_parm = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
938 { .name = "fdatasync", .errmsg = true,
939 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
940 { .name = "flock", .errmsg = true,
941 .arg_scnprintf = { [0] = SCA_FD, /* fd */
942 [1] = SCA_FLOCK, /* cmd */ }, },
943 { .name = "fsetxattr", .errmsg = true,
944 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
945 { .name = "fstat", .errmsg = true, .alias = "newfstat",
946 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
947 { .name = "fstatat", .errmsg = true, .alias = "newfstatat",
948 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
949 { .name = "fstatfs", .errmsg = true,
950 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
951 { .name = "fsync", .errmsg = true,
952 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
953 { .name = "ftruncate", .errmsg = true,
954 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
955 { .name = "futex", .errmsg = true,
956 .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
957 { .name = "futimesat", .errmsg = true,
958 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
959 { .name = "getdents", .errmsg = true,
960 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
961 { .name = "getdents64", .errmsg = true,
962 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
963 { .name = "getitimer", .errmsg = true, STRARRAY(0, which, itimers), },
964 { .name = "getrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
965 { .name = "ioctl", .errmsg = true,
966 .arg_scnprintf = { [0] = SCA_FD, /* fd */
967 #if defined(__i386__) || defined(__x86_64__)
969 * FIXME: Make this available to all arches.
971 [1] = SCA_STRHEXARRAY, /* cmd */
972 [2] = SCA_HEX, /* arg */ },
973 .arg_parm = { [1] = &strarray__tioctls, /* cmd */ }, },
975 [2] = SCA_HEX, /* arg */ }, },
977 { .name = "kill", .errmsg = true,
978 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
979 { .name = "linkat", .errmsg = true,
980 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
981 { .name = "lseek", .errmsg = true,
982 .arg_scnprintf = { [0] = SCA_FD, /* fd */
983 [2] = SCA_STRARRAY, /* whence */ },
984 .arg_parm = { [2] = &strarray__whences, /* whence */ }, },
985 { .name = "lstat", .errmsg = true, .alias = "newlstat", },
986 { .name = "madvise", .errmsg = true,
987 .arg_scnprintf = { [0] = SCA_HEX, /* start */
988 [2] = SCA_MADV_BHV, /* behavior */ }, },
989 { .name = "mkdirat", .errmsg = true,
990 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
991 { .name = "mknodat", .errmsg = true,
992 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
993 { .name = "mlock", .errmsg = true,
994 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
995 { .name = "mlockall", .errmsg = true,
996 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
997 { .name = "mmap", .hexret = true,
998 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
999 [2] = SCA_MMAP_PROT, /* prot */
1000 [3] = SCA_MMAP_FLAGS, /* flags */
1001 [4] = SCA_FD, /* fd */ }, },
1002 { .name = "mprotect", .errmsg = true,
1003 .arg_scnprintf = { [0] = SCA_HEX, /* start */
1004 [2] = SCA_MMAP_PROT, /* prot */ }, },
1005 { .name = "mremap", .hexret = true,
1006 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
1007 [4] = SCA_HEX, /* new_addr */ }, },
1008 { .name = "munlock", .errmsg = true,
1009 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1010 { .name = "munmap", .errmsg = true,
1011 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1012 { .name = "name_to_handle_at", .errmsg = true,
1013 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1014 { .name = "newfstatat", .errmsg = true,
1015 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1016 { .name = "open", .errmsg = true,
1017 .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
1018 { .name = "open_by_handle_at", .errmsg = true,
1019 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1020 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1021 { .name = "openat", .errmsg = true,
1022 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1023 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1024 { .name = "pipe2", .errmsg = true,
1025 .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
1026 { .name = "poll", .errmsg = true, .timeout = true, },
1027 { .name = "ppoll", .errmsg = true, .timeout = true, },
1028 { .name = "pread", .errmsg = true, .alias = "pread64",
1029 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1030 { .name = "preadv", .errmsg = true, .alias = "pread",
1031 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1032 { .name = "prlimit64", .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
1033 { .name = "pwrite", .errmsg = true, .alias = "pwrite64",
1034 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1035 { .name = "pwritev", .errmsg = true,
1036 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1037 { .name = "read", .errmsg = true,
1038 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1039 { .name = "readlinkat", .errmsg = true,
1040 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1041 { .name = "readv", .errmsg = true,
1042 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1043 { .name = "recvfrom", .errmsg = true,
1044 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1045 { .name = "recvmmsg", .errmsg = true,
1046 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1047 { .name = "recvmsg", .errmsg = true,
1048 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1049 { .name = "renameat", .errmsg = true,
1050 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1051 { .name = "rt_sigaction", .errmsg = true,
1052 .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
1053 { .name = "rt_sigprocmask", .errmsg = true, STRARRAY(0, how, sighow), },
1054 { .name = "rt_sigqueueinfo", .errmsg = true,
1055 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1056 { .name = "rt_tgsigqueueinfo", .errmsg = true,
1057 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1058 { .name = "select", .errmsg = true, .timeout = true, },
1059 { .name = "sendmmsg", .errmsg = true,
1060 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1061 { .name = "sendmsg", .errmsg = true,
1062 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1063 { .name = "sendto", .errmsg = true,
1064 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1065 { .name = "setitimer", .errmsg = true, STRARRAY(0, which, itimers), },
1066 { .name = "setrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1067 { .name = "shutdown", .errmsg = true,
1068 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1069 { .name = "socket", .errmsg = true,
1070 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1071 [1] = SCA_SK_TYPE, /* type */ },
1072 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
1073 { .name = "socketpair", .errmsg = true,
1074 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1075 [1] = SCA_SK_TYPE, /* type */ },
1076 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
1077 { .name = "stat", .errmsg = true, .alias = "newstat", },
1078 { .name = "symlinkat", .errmsg = true,
1079 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1080 { .name = "tgkill", .errmsg = true,
1081 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1082 { .name = "tkill", .errmsg = true,
1083 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1084 { .name = "uname", .errmsg = true, .alias = "newuname", },
1085 { .name = "unlinkat", .errmsg = true,
1086 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1087 { .name = "utimensat", .errmsg = true,
1088 .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ }, },
1089 { .name = "write", .errmsg = true,
1090 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1091 { .name = "writev", .errmsg = true,
1092 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1095 static int syscall_fmt__cmp(const void *name, const void *fmtp)
1097 const struct syscall_fmt *fmt = fmtp;
1098 return strcmp(name, fmt->name);
1101 static struct syscall_fmt *syscall_fmt__find(const char *name)
1103 const int nmemb = ARRAY_SIZE(syscall_fmts);
1104 return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
1108 struct event_format *tp_format;
1112 struct syscall_fmt *fmt;
1113 size_t (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
1117 static size_t fprintf_duration(unsigned long t, FILE *fp)
1119 double duration = (double)t / NSEC_PER_MSEC;
1120 size_t printed = fprintf(fp, "(");
1122 if (duration >= 1.0)
1123 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
1124 else if (duration >= 0.01)
1125 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
1127 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
1128 return printed + fprintf(fp, "): ");
1131 struct thread_trace {
1135 unsigned long nr_events;
1143 struct intlist *syscall_stats;
1146 static struct thread_trace *thread_trace__new(void)
1148 struct thread_trace *ttrace = zalloc(sizeof(struct thread_trace));
1151 ttrace->paths.max = -1;
1153 ttrace->syscall_stats = intlist__new(NULL);
1158 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
1160 struct thread_trace *ttrace;
1165 if (thread->priv == NULL)
1166 thread->priv = thread_trace__new();
1168 if (thread->priv == NULL)
1171 ttrace = thread->priv;
1172 ++ttrace->nr_events;
1176 color_fprintf(fp, PERF_COLOR_RED,
1177 "WARNING: not enough memory, dropping samples!\n");
1182 struct perf_tool tool;
1189 struct syscall *table;
1191 struct record_opts opts;
1192 struct machine *host;
1195 unsigned long nr_events;
1196 struct strlist *ev_qualifier;
1197 const char *last_vfs_getname;
1198 struct intlist *tid_list;
1199 struct intlist *pid_list;
1200 double duration_filter;
1206 bool not_ev_qualifier;
1210 bool multiple_threads;
1214 bool show_tool_stats;
1217 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
1219 struct thread_trace *ttrace = thread->priv;
1221 if (fd > ttrace->paths.max) {
1222 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
1227 if (ttrace->paths.max != -1) {
1228 memset(npath + ttrace->paths.max + 1, 0,
1229 (fd - ttrace->paths.max) * sizeof(char *));
1231 memset(npath, 0, (fd + 1) * sizeof(char *));
1234 ttrace->paths.table = npath;
1235 ttrace->paths.max = fd;
1238 ttrace->paths.table[fd] = strdup(pathname);
1240 return ttrace->paths.table[fd] != NULL ? 0 : -1;
1243 static int thread__read_fd_path(struct thread *thread, int fd)
1245 char linkname[PATH_MAX], pathname[PATH_MAX];
1249 if (thread->pid_ == thread->tid) {
1250 scnprintf(linkname, sizeof(linkname),
1251 "/proc/%d/fd/%d", thread->pid_, fd);
1253 scnprintf(linkname, sizeof(linkname),
1254 "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1257 if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1260 ret = readlink(linkname, pathname, sizeof(pathname));
1262 if (ret < 0 || ret > st.st_size)
1265 pathname[ret] = '\0';
1266 return trace__set_fd_pathname(thread, fd, pathname);
1269 static const char *thread__fd_path(struct thread *thread, int fd,
1270 struct trace *trace)
1272 struct thread_trace *ttrace = thread->priv;
1280 if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) {
1283 ++trace->stats.proc_getname;
1284 if (thread__read_fd_path(thread, fd))
1288 return ttrace->paths.table[fd];
1291 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
1292 struct syscall_arg *arg)
1295 size_t printed = scnprintf(bf, size, "%d", fd);
1296 const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1299 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1304 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1305 struct syscall_arg *arg)
1308 size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1309 struct thread_trace *ttrace = arg->thread->priv;
1311 if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
1312 zfree(&ttrace->paths.table[fd]);
1317 static bool trace__filter_duration(struct trace *trace, double t)
1319 return t < (trace->duration_filter * NSEC_PER_MSEC);
1322 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1324 double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1326 return fprintf(fp, "%10.3f ", ts);
1329 static bool done = false;
1330 static bool interrupted = false;
1332 static void sig_handler(int sig)
1335 interrupted = sig == SIGINT;
1338 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1339 u64 duration, u64 tstamp, FILE *fp)
1341 size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1342 printed += fprintf_duration(duration, fp);
1344 if (trace->multiple_threads) {
1345 if (trace->show_comm)
1346 printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1347 printed += fprintf(fp, "%d ", thread->tid);
1353 static int trace__process_event(struct trace *trace, struct machine *machine,
1354 union perf_event *event, struct perf_sample *sample)
1358 switch (event->header.type) {
1359 case PERF_RECORD_LOST:
1360 color_fprintf(trace->output, PERF_COLOR_RED,
1361 "LOST %" PRIu64 " events!\n", event->lost.lost);
1362 ret = machine__process_lost_event(machine, event, sample);
1364 ret = machine__process_event(machine, event, sample);
1371 static int trace__tool_process(struct perf_tool *tool,
1372 union perf_event *event,
1373 struct perf_sample *sample,
1374 struct machine *machine)
1376 struct trace *trace = container_of(tool, struct trace, tool);
1377 return trace__process_event(trace, machine, event, sample);
1380 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1382 int err = symbol__init();
1387 trace->host = machine__new_host();
1388 if (trace->host == NULL)
1391 err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1392 evlist->threads, trace__tool_process, false);
1399 static int syscall__set_arg_fmts(struct syscall *sc)
1401 struct format_field *field;
1404 sc->arg_scnprintf = calloc(sc->tp_format->format.nr_fields - 1, sizeof(void *));
1405 if (sc->arg_scnprintf == NULL)
1409 sc->arg_parm = sc->fmt->arg_parm;
1411 for (field = sc->tp_format->format.fields->next; field; field = field->next) {
1412 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1413 sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1414 else if (field->flags & FIELD_IS_POINTER)
1415 sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1422 static int trace__read_syscall_info(struct trace *trace, int id)
1426 const char *name = audit_syscall_to_name(id, trace->audit.machine);
1431 if (id > trace->syscalls.max) {
1432 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1434 if (nsyscalls == NULL)
1437 if (trace->syscalls.max != -1) {
1438 memset(nsyscalls + trace->syscalls.max + 1, 0,
1439 (id - trace->syscalls.max) * sizeof(*sc));
1441 memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1444 trace->syscalls.table = nsyscalls;
1445 trace->syscalls.max = id;
1448 sc = trace->syscalls.table + id;
1451 if (trace->ev_qualifier) {
1452 bool in = strlist__find(trace->ev_qualifier, name) != NULL;
1454 if (!(in ^ trace->not_ev_qualifier)) {
1455 sc->filtered = true;
1457 * No need to do read tracepoint information since this will be
1464 sc->fmt = syscall_fmt__find(sc->name);
1466 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1467 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1469 if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) {
1470 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1471 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1474 if (sc->tp_format == NULL)
1477 sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
1479 return syscall__set_arg_fmts(sc);
1482 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1483 unsigned long *args, struct trace *trace,
1484 struct thread *thread)
1488 if (sc->tp_format != NULL) {
1489 struct format_field *field;
1491 struct syscall_arg arg = {
1498 for (field = sc->tp_format->format.fields->next; field;
1499 field = field->next, ++arg.idx, bit <<= 1) {
1503 * Suppress this argument if its value is zero and
1504 * and we don't have a string associated in an
1507 if (args[arg.idx] == 0 &&
1508 !(sc->arg_scnprintf &&
1509 sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1510 sc->arg_parm[arg.idx]))
1513 printed += scnprintf(bf + printed, size - printed,
1514 "%s%s: ", printed ? ", " : "", field->name);
1515 if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1516 arg.val = args[arg.idx];
1518 arg.parm = sc->arg_parm[arg.idx];
1519 printed += sc->arg_scnprintf[arg.idx](bf + printed,
1520 size - printed, &arg);
1522 printed += scnprintf(bf + printed, size - printed,
1523 "%ld", args[arg.idx]);
1530 printed += scnprintf(bf + printed, size - printed,
1532 printed ? ", " : "", i, args[i]);
1540 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1541 struct perf_sample *sample);
1543 static struct syscall *trace__syscall_info(struct trace *trace,
1544 struct perf_evsel *evsel, int id)
1550 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1551 * before that, leaving at a higher verbosity level till that is
1552 * explained. Reproduced with plain ftrace with:
1554 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1555 * grep "NR -1 " /t/trace_pipe
1557 * After generating some load on the machine.
1561 fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1562 id, perf_evsel__name(evsel), ++n);
1567 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1568 trace__read_syscall_info(trace, id))
1571 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1574 return &trace->syscalls.table[id];
1578 fprintf(trace->output, "Problems reading syscall %d", id);
1579 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1580 fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1581 fputs(" information\n", trace->output);
1586 static void thread__update_stats(struct thread_trace *ttrace,
1587 int id, struct perf_sample *sample)
1589 struct int_node *inode;
1590 struct stats *stats;
1593 inode = intlist__findnew(ttrace->syscall_stats, id);
1597 stats = inode->priv;
1598 if (stats == NULL) {
1599 stats = malloc(sizeof(struct stats));
1603 inode->priv = stats;
1606 if (ttrace->entry_time && sample->time > ttrace->entry_time)
1607 duration = sample->time - ttrace->entry_time;
1609 update_stats(stats, duration);
1612 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1613 struct perf_sample *sample)
1618 struct thread *thread;
1619 int id = perf_evsel__sc_tp_uint(evsel, id, sample);
1620 struct syscall *sc = trace__syscall_info(trace, evsel, id);
1621 struct thread_trace *ttrace;
1629 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1630 ttrace = thread__trace(thread, trace->output);
1634 args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1636 if (ttrace->entry_str == NULL) {
1637 ttrace->entry_str = malloc(1024);
1638 if (!ttrace->entry_str)
1642 ttrace->entry_time = sample->time;
1643 msg = ttrace->entry_str;
1644 printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name);
1646 printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed,
1647 args, trace, thread);
1650 if (!trace->duration_filter && !trace->summary_only) {
1651 trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1652 fprintf(trace->output, "%-70s\n", ttrace->entry_str);
1655 ttrace->entry_pending = true;
1660 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1661 struct perf_sample *sample)
1665 struct thread *thread;
1666 int id = perf_evsel__sc_tp_uint(evsel, id, sample);
1667 struct syscall *sc = trace__syscall_info(trace, evsel, id);
1668 struct thread_trace *ttrace;
1676 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1677 ttrace = thread__trace(thread, trace->output);
1682 thread__update_stats(ttrace, id, sample);
1684 ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
1686 if (id == trace->audit.open_id && ret >= 0 && trace->last_vfs_getname) {
1687 trace__set_fd_pathname(thread, ret, trace->last_vfs_getname);
1688 trace->last_vfs_getname = NULL;
1689 ++trace->stats.vfs_getname;
1692 ttrace->exit_time = sample->time;
1694 if (ttrace->entry_time) {
1695 duration = sample->time - ttrace->entry_time;
1696 if (trace__filter_duration(trace, duration))
1698 } else if (trace->duration_filter)
1701 if (trace->summary_only)
1704 trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
1706 if (ttrace->entry_pending) {
1707 fprintf(trace->output, "%-70s", ttrace->entry_str);
1709 fprintf(trace->output, " ... [");
1710 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1711 fprintf(trace->output, "]: %s()", sc->name);
1714 if (sc->fmt == NULL) {
1716 fprintf(trace->output, ") = %d", ret);
1717 } else if (ret < 0 && sc->fmt->errmsg) {
1719 const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
1720 *e = audit_errno_to_name(-ret);
1722 fprintf(trace->output, ") = -1 %s %s", e, emsg);
1723 } else if (ret == 0 && sc->fmt->timeout)
1724 fprintf(trace->output, ") = 0 Timeout");
1725 else if (sc->fmt->hexret)
1726 fprintf(trace->output, ") = %#x", ret);
1730 fputc('\n', trace->output);
1732 ttrace->entry_pending = false;
1737 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
1738 struct perf_sample *sample)
1740 trace->last_vfs_getname = perf_evsel__rawptr(evsel, sample, "pathname");
1744 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
1745 struct perf_sample *sample)
1747 u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1748 double runtime_ms = (double)runtime / NSEC_PER_MSEC;
1749 struct thread *thread = machine__findnew_thread(trace->host,
1752 struct thread_trace *ttrace = thread__trace(thread, trace->output);
1757 ttrace->runtime_ms += runtime_ms;
1758 trace->runtime_ms += runtime_ms;
1762 fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1764 perf_evsel__strval(evsel, sample, "comm"),
1765 (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1767 perf_evsel__intval(evsel, sample, "vruntime"));
1771 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
1773 if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
1774 (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
1777 if (trace->pid_list || trace->tid_list)
1783 static int trace__process_sample(struct perf_tool *tool,
1784 union perf_event *event __maybe_unused,
1785 struct perf_sample *sample,
1786 struct perf_evsel *evsel,
1787 struct machine *machine __maybe_unused)
1789 struct trace *trace = container_of(tool, struct trace, tool);
1792 tracepoint_handler handler = evsel->handler;
1794 if (skip_sample(trace, sample))
1797 if (!trace->full_time && trace->base_time == 0)
1798 trace->base_time = sample->time;
1802 handler(trace, evsel, sample);
1808 static int parse_target_str(struct trace *trace)
1810 if (trace->opts.target.pid) {
1811 trace->pid_list = intlist__new(trace->opts.target.pid);
1812 if (trace->pid_list == NULL) {
1813 pr_err("Error parsing process id string\n");
1818 if (trace->opts.target.tid) {
1819 trace->tid_list = intlist__new(trace->opts.target.tid);
1820 if (trace->tid_list == NULL) {
1821 pr_err("Error parsing thread id string\n");
1829 static int trace__record(int argc, const char **argv)
1831 unsigned int rec_argc, i, j;
1832 const char **rec_argv;
1833 const char * const record_args[] = {
1841 /* +1 is for the event string below */
1842 rec_argc = ARRAY_SIZE(record_args) + 1 + argc;
1843 rec_argv = calloc(rec_argc + 1, sizeof(char *));
1845 if (rec_argv == NULL)
1848 for (i = 0; i < ARRAY_SIZE(record_args); i++)
1849 rec_argv[i] = record_args[i];
1851 /* event string may be different for older kernels - e.g., RHEL6 */
1852 if (is_valid_tracepoint("raw_syscalls:sys_enter"))
1853 rec_argv[i] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
1854 else if (is_valid_tracepoint("syscalls:sys_enter"))
1855 rec_argv[i] = "syscalls:sys_enter,syscalls:sys_exit";
1857 pr_err("Neither raw_syscalls nor syscalls events exist.\n");
1862 for (j = 0; j < (unsigned int)argc; j++, i++)
1863 rec_argv[i] = argv[j];
1865 return cmd_record(i, rec_argv, NULL);
1868 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
1870 static void perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
1872 struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
1876 if (perf_evsel__field(evsel, "pathname") == NULL) {
1877 perf_evsel__delete(evsel);
1881 evsel->handler = trace__vfs_getname;
1882 perf_evlist__add(evlist, evsel);
1885 static int trace__run(struct trace *trace, int argc, const char **argv)
1887 struct perf_evlist *evlist = perf_evlist__new();
1888 struct perf_evsel *evsel;
1890 unsigned long before;
1891 const bool forks = argc > 0;
1895 if (evlist == NULL) {
1896 fprintf(trace->output, "Not enough memory to run!\n");
1900 if (perf_evlist__add_syscall_newtp(evlist, trace__sys_enter, trace__sys_exit))
1903 perf_evlist__add_vfs_getname(evlist);
1906 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
1907 trace__sched_stat_runtime))
1910 err = perf_evlist__create_maps(evlist, &trace->opts.target);
1912 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
1913 goto out_delete_evlist;
1916 err = trace__symbols_init(trace, evlist);
1918 fprintf(trace->output, "Problems initializing symbol libraries!\n");
1919 goto out_delete_evlist;
1922 perf_evlist__config(evlist, &trace->opts);
1924 signal(SIGCHLD, sig_handler);
1925 signal(SIGINT, sig_handler);
1928 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
1931 fprintf(trace->output, "Couldn't run the workload!\n");
1932 goto out_delete_evlist;
1936 err = perf_evlist__open(evlist);
1938 goto out_error_open;
1940 err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
1942 fprintf(trace->output, "Couldn't mmap the events: %s\n", strerror(errno));
1943 goto out_delete_evlist;
1946 perf_evlist__enable(evlist);
1949 perf_evlist__start_workload(evlist);
1951 trace->multiple_threads = evlist->threads->map[0] == -1 || evlist->threads->nr > 1;
1953 before = trace->nr_events;
1955 for (i = 0; i < evlist->nr_mmaps; i++) {
1956 union perf_event *event;
1958 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
1959 const u32 type = event->header.type;
1960 tracepoint_handler handler;
1961 struct perf_sample sample;
1965 err = perf_evlist__parse_sample(evlist, event, &sample);
1967 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
1971 if (!trace->full_time && trace->base_time == 0)
1972 trace->base_time = sample.time;
1974 if (type != PERF_RECORD_SAMPLE) {
1975 trace__process_event(trace, trace->host, event, &sample);
1979 evsel = perf_evlist__id2evsel(evlist, sample.id);
1980 if (evsel == NULL) {
1981 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample.id);
1985 if (sample.raw_data == NULL) {
1986 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
1987 perf_evsel__name(evsel), sample.tid,
1988 sample.cpu, sample.raw_size);
1992 handler = evsel->handler;
1993 handler(trace, evsel, &sample);
1995 perf_evlist__mmap_consume(evlist, i);
2002 if (trace->nr_events == before) {
2003 int timeout = done ? 100 : -1;
2005 if (poll(evlist->pollfd, evlist->nr_fds, timeout) > 0)
2012 perf_evlist__disable(evlist);
2016 trace__fprintf_thread_summary(trace, trace->output);
2018 if (trace->show_tool_stats) {
2019 fprintf(trace->output, "Stats:\n "
2020 " vfs_getname : %" PRIu64 "\n"
2021 " proc_getname: %" PRIu64 "\n",
2022 trace->stats.vfs_getname,
2023 trace->stats.proc_getname);
2028 perf_evlist__delete(evlist);
2030 trace->live = false;
2033 char errbuf[BUFSIZ];
2036 perf_evlist__strerror_tp(evlist, errno, errbuf, sizeof(errbuf));
2040 perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2043 fprintf(trace->output, "%s\n", errbuf);
2044 goto out_delete_evlist;
2048 static int trace__replay(struct trace *trace)
2050 const struct perf_evsel_str_handler handlers[] = {
2051 { "probe:vfs_getname", trace__vfs_getname, },
2053 struct perf_data_file file = {
2055 .mode = PERF_DATA_MODE_READ,
2057 struct perf_session *session;
2058 struct perf_evsel *evsel;
2061 trace->tool.sample = trace__process_sample;
2062 trace->tool.mmap = perf_event__process_mmap;
2063 trace->tool.mmap2 = perf_event__process_mmap2;
2064 trace->tool.comm = perf_event__process_comm;
2065 trace->tool.exit = perf_event__process_exit;
2066 trace->tool.fork = perf_event__process_fork;
2067 trace->tool.attr = perf_event__process_attr;
2068 trace->tool.tracing_data = perf_event__process_tracing_data;
2069 trace->tool.build_id = perf_event__process_build_id;
2071 trace->tool.ordered_samples = true;
2072 trace->tool.ordering_requires_timestamps = true;
2074 /* add tid to output */
2075 trace->multiple_threads = true;
2077 if (symbol__init() < 0)
2080 session = perf_session__new(&file, false, &trace->tool);
2081 if (session == NULL)
2084 trace->host = &session->machines.host;
2086 err = perf_session__set_tracepoints_handlers(session, handlers);
2090 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2091 "raw_syscalls:sys_enter");
2092 /* older kernels have syscalls tp versus raw_syscalls */
2094 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2095 "syscalls:sys_enter");
2096 if (evsel == NULL) {
2097 pr_err("Data file does not have raw_syscalls:sys_enter event\n");
2101 if (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2102 perf_evsel__init_sc_tp_ptr_field(evsel, args)) {
2103 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2107 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2108 "raw_syscalls:sys_exit");
2110 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2111 "syscalls:sys_exit");
2112 if (evsel == NULL) {
2113 pr_err("Data file does not have raw_syscalls:sys_exit event\n");
2117 if (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2118 perf_evsel__init_sc_tp_uint_field(evsel, ret)) {
2119 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
2123 err = parse_target_str(trace);
2129 err = perf_session__process_events(session, &trace->tool);
2131 pr_err("Failed to process events, error %d", err);
2133 else if (trace->summary)
2134 trace__fprintf_thread_summary(trace, trace->output);
2137 perf_session__delete(session);
2142 static size_t trace__fprintf_threads_header(FILE *fp)
2146 printed = fprintf(fp, "\n Summary of events:\n\n");
2151 static size_t thread__dump_stats(struct thread_trace *ttrace,
2152 struct trace *trace, FILE *fp)
2154 struct stats *stats;
2157 struct int_node *inode = intlist__first(ttrace->syscall_stats);
2162 printed += fprintf(fp, "\n");
2164 printed += fprintf(fp, " syscall calls min avg max stddev\n");
2165 printed += fprintf(fp, " (msec) (msec) (msec) (%%)\n");
2166 printed += fprintf(fp, " --------------- -------- --------- --------- --------- ------\n");
2168 /* each int_node is a syscall */
2170 stats = inode->priv;
2172 double min = (double)(stats->min) / NSEC_PER_MSEC;
2173 double max = (double)(stats->max) / NSEC_PER_MSEC;
2174 double avg = avg_stats(stats);
2176 u64 n = (u64) stats->n;
2178 pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2179 avg /= NSEC_PER_MSEC;
2181 sc = &trace->syscalls.table[inode->i];
2182 printed += fprintf(fp, " %-15s", sc->name);
2183 printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f",
2185 printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
2188 inode = intlist__next(inode);
2191 printed += fprintf(fp, "\n\n");
2196 /* struct used to pass data to per-thread function */
2197 struct summary_data {
2199 struct trace *trace;
2203 static int trace__fprintf_one_thread(struct thread *thread, void *priv)
2205 struct summary_data *data = priv;
2206 FILE *fp = data->fp;
2207 size_t printed = data->printed;
2208 struct trace *trace = data->trace;
2209 struct thread_trace *ttrace = thread->priv;
2215 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2217 printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
2218 printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
2219 printed += fprintf(fp, "%.1f%%", ratio);
2220 printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2221 printed += thread__dump_stats(ttrace, trace, fp);
2223 data->printed += printed;
2228 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2230 struct summary_data data = {
2234 data.printed = trace__fprintf_threads_header(fp);
2236 machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data);
2238 return data.printed;
2241 static int trace__set_duration(const struct option *opt, const char *str,
2242 int unset __maybe_unused)
2244 struct trace *trace = opt->value;
2246 trace->duration_filter = atof(str);
2250 static int trace__open_output(struct trace *trace, const char *filename)
2254 if (!stat(filename, &st) && st.st_size) {
2255 char oldname[PATH_MAX];
2257 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2259 rename(filename, oldname);
2262 trace->output = fopen(filename, "w");
2264 return trace->output == NULL ? -errno : 0;
2267 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
2269 const char * const trace_usage[] = {
2270 "perf trace [<options>] [<command>]",
2271 "perf trace [<options>] -- <command> [<options>]",
2272 "perf trace record [<options>] [<command>]",
2273 "perf trace record [<options>] -- <command> [<options>]",
2276 struct trace trace = {
2278 .machine = audit_detect_machine(),
2279 .open_id = audit_name_to_syscall("open", trace.audit.machine),
2289 .user_freq = UINT_MAX,
2290 .user_interval = ULLONG_MAX,
2291 .no_buffering = true,
2297 const char *output_name = NULL;
2298 const char *ev_qualifier_str = NULL;
2299 const struct option trace_options[] = {
2300 OPT_BOOLEAN(0, "comm", &trace.show_comm,
2301 "show the thread COMM next to its id"),
2302 OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
2303 OPT_STRING('e', "expr", &ev_qualifier_str, "expr",
2304 "list of events to trace"),
2305 OPT_STRING('o', "output", &output_name, "file", "output file name"),
2306 OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
2307 OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
2308 "trace events on existing process id"),
2309 OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
2310 "trace events on existing thread id"),
2311 OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
2312 "system-wide collection from all CPUs"),
2313 OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
2314 "list of cpus to monitor"),
2315 OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
2316 "child tasks do not inherit counters"),
2317 OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
2318 "number of mmap data pages",
2319 perf_evlist__parse_mmap_pages),
2320 OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
2322 OPT_CALLBACK(0, "duration", &trace, "float",
2323 "show only events with duration > N.M ms",
2324 trace__set_duration),
2325 OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
2326 OPT_INCR('v', "verbose", &verbose, "be more verbose"),
2327 OPT_BOOLEAN('T', "time", &trace.full_time,
2328 "Show full timestamp, not time relative to first start"),
2329 OPT_BOOLEAN('s', "summary", &trace.summary_only,
2330 "Show only syscall summary with statistics"),
2331 OPT_BOOLEAN('S', "with-summary", &trace.summary,
2332 "Show all syscalls and summary with statistics"),
2338 if ((argc > 1) && (strcmp(argv[1], "record") == 0))
2339 return trace__record(argc-2, &argv[2]);
2341 argc = parse_options(argc, argv, trace_options, trace_usage, 0);
2343 /* summary_only implies summary option, but don't overwrite summary if set */
2344 if (trace.summary_only)
2345 trace.summary = trace.summary_only;
2347 if (output_name != NULL) {
2348 err = trace__open_output(&trace, output_name);
2350 perror("failed to create output file");
2355 if (ev_qualifier_str != NULL) {
2356 const char *s = ev_qualifier_str;
2358 trace.not_ev_qualifier = *s == '!';
2359 if (trace.not_ev_qualifier)
2361 trace.ev_qualifier = strlist__new(true, s);
2362 if (trace.ev_qualifier == NULL) {
2363 fputs("Not enough memory to parse event qualifier",
2370 err = target__validate(&trace.opts.target);
2372 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2373 fprintf(trace.output, "%s", bf);
2377 err = target__parse_uid(&trace.opts.target);
2379 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2380 fprintf(trace.output, "%s", bf);
2384 if (!argc && target__none(&trace.opts.target))
2385 trace.opts.target.system_wide = true;
2388 err = trace__replay(&trace);
2390 err = trace__run(&trace, argc, argv);
2393 if (output_name != NULL)
2394 fclose(trace.output);