perf tools: Remove tools/perf/util/include/asm/byteorder.h
[cascardo/linux.git] / tools / perf / builtin-trace.c
index 93ac724..b8c6766 100644 (file)
 #include "trace-event.h"
 #include "util/parse-events.h"
 #include "util/bpf-loader.h"
+#include "callchain.h"
+#include "syscalltbl.h"
+#include "rb_resort.h"
 
-#include <libaudit.h>
+#include <libaudit.h> /* FIXME: Still needed for audit_errno_to_name */
 #include <stdlib.h>
-#include <sys/mman.h>
-#include <linux/futex.h>
 #include <linux/err.h>
-
-/* For older distros: */
-#ifndef MAP_STACK
-# define MAP_STACK             0x20000
-#endif
-
-#ifndef MADV_HWPOISON
-# define MADV_HWPOISON         100
-
-#endif
-
-#ifndef MADV_MERGEABLE
-# define MADV_MERGEABLE                12
-#endif
-
-#ifndef MADV_UNMERGEABLE
-# define MADV_UNMERGEABLE      13
-#endif
-
-#ifndef EFD_SEMAPHORE
-# define EFD_SEMAPHORE         1
-#endif
-
-#ifndef EFD_NONBLOCK
-# define EFD_NONBLOCK          00004000
-#endif
-
-#ifndef EFD_CLOEXEC
-# define EFD_CLOEXEC           02000000
-#endif
+#include <linux/filter.h>
+#include <linux/audit.h>
+#include <linux/random.h>
+#include <linux/stringify.h>
 
 #ifndef O_CLOEXEC
 # define O_CLOEXEC             02000000
 #endif
 
-#ifndef SOCK_DCCP
-# define SOCK_DCCP             6
-#endif
-
-#ifndef SOCK_CLOEXEC
-# define SOCK_CLOEXEC          02000000
-#endif
-
-#ifndef SOCK_NONBLOCK
-# define SOCK_NONBLOCK         00004000
-#endif
-
-#ifndef MSG_CMSG_CLOEXEC
-# define MSG_CMSG_CLOEXEC      0x40000000
-#endif
-
-#ifndef PERF_FLAG_FD_NO_GROUP
-# define PERF_FLAG_FD_NO_GROUP         (1UL << 0)
-#endif
-
-#ifndef PERF_FLAG_FD_OUTPUT
-# define PERF_FLAG_FD_OUTPUT           (1UL << 1)
-#endif
-
-#ifndef PERF_FLAG_PID_CGROUP
-# define PERF_FLAG_PID_CGROUP          (1UL << 2) /* pid=cgroup id, per-cpu mode only */
-#endif
-
-#ifndef PERF_FLAG_FD_CLOEXEC
-# define PERF_FLAG_FD_CLOEXEC          (1UL << 3) /* O_CLOEXEC */
-#endif
-
+struct trace {
+       struct perf_tool        tool;
+       struct syscalltbl       *sctbl;
+       struct {
+               int             max;
+               struct syscall  *table;
+               struct {
+                       struct perf_evsel *sys_enter,
+                                         *sys_exit;
+               }               events;
+       } syscalls;
+       struct record_opts      opts;
+       struct perf_evlist      *evlist;
+       struct machine          *host;
+       struct thread           *current;
+       u64                     base_time;
+       FILE                    *output;
+       unsigned long           nr_events;
+       struct strlist          *ev_qualifier;
+       struct {
+               size_t          nr;
+               int             *entries;
+       }                       ev_qualifier_ids;
+       struct intlist          *tid_list;
+       struct intlist          *pid_list;
+       struct {
+               size_t          nr;
+               pid_t           *entries;
+       }                       filter_pids;
+       double                  duration_filter;
+       double                  runtime_ms;
+       struct {
+               u64             vfs_getname,
+                               proc_getname;
+       } stats;
+       unsigned int            max_stack;
+       unsigned int            min_stack;
+       bool                    not_ev_qualifier;
+       bool                    live;
+       bool                    full_time;
+       bool                    sched;
+       bool                    multiple_threads;
+       bool                    summary;
+       bool                    summary_only;
+       bool                    show_comm;
+       bool                    show_tool_stats;
+       bool                    trace_syscalls;
+       bool                    kernel_syscallchains;
+       bool                    force;
+       bool                    vfs_getname;
+       int                     trace_pgfaults;
+       int                     open_id;
+};
 
 struct tp_field {
        int offset;
@@ -337,6 +333,10 @@ static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
 
 #define SCA_FD syscall_arg__scnprintf_fd
 
+#ifndef AT_FDCWD
+#define AT_FDCWD       -100
+#endif
+
 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
                                           struct syscall_arg *arg)
 {
@@ -371,221 +371,6 @@ static size_t syscall_arg__scnprintf_int(char *bf, size_t size,
 
 #define SCA_INT syscall_arg__scnprintf_int
 
-static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
-                                              struct syscall_arg *arg)
-{
-       int printed = 0, prot = arg->val;
-
-       if (prot == PROT_NONE)
-               return scnprintf(bf, size, "NONE");
-#define        P_MMAP_PROT(n) \
-       if (prot & PROT_##n) { \
-               printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
-               prot &= ~PROT_##n; \
-       }
-
-       P_MMAP_PROT(EXEC);
-       P_MMAP_PROT(READ);
-       P_MMAP_PROT(WRITE);
-#ifdef PROT_SEM
-       P_MMAP_PROT(SEM);
-#endif
-       P_MMAP_PROT(GROWSDOWN);
-       P_MMAP_PROT(GROWSUP);
-#undef P_MMAP_PROT
-
-       if (prot)
-               printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
-
-       return printed;
-}
-
-#define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
-
-static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
-                                               struct syscall_arg *arg)
-{
-       int printed = 0, flags = arg->val;
-
-#define        P_MMAP_FLAG(n) \
-       if (flags & MAP_##n) { \
-               printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
-               flags &= ~MAP_##n; \
-       }
-
-       P_MMAP_FLAG(SHARED);
-       P_MMAP_FLAG(PRIVATE);
-#ifdef MAP_32BIT
-       P_MMAP_FLAG(32BIT);
-#endif
-       P_MMAP_FLAG(ANONYMOUS);
-       P_MMAP_FLAG(DENYWRITE);
-       P_MMAP_FLAG(EXECUTABLE);
-       P_MMAP_FLAG(FILE);
-       P_MMAP_FLAG(FIXED);
-       P_MMAP_FLAG(GROWSDOWN);
-#ifdef MAP_HUGETLB
-       P_MMAP_FLAG(HUGETLB);
-#endif
-       P_MMAP_FLAG(LOCKED);
-       P_MMAP_FLAG(NONBLOCK);
-       P_MMAP_FLAG(NORESERVE);
-       P_MMAP_FLAG(POPULATE);
-       P_MMAP_FLAG(STACK);
-#ifdef MAP_UNINITIALIZED
-       P_MMAP_FLAG(UNINITIALIZED);
-#endif
-#undef P_MMAP_FLAG
-
-       if (flags)
-               printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
-
-       return printed;
-}
-
-#define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
-
-static size_t syscall_arg__scnprintf_mremap_flags(char *bf, size_t size,
-                                                 struct syscall_arg *arg)
-{
-       int printed = 0, flags = arg->val;
-
-#define P_MREMAP_FLAG(n) \
-       if (flags & MREMAP_##n) { \
-               printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
-               flags &= ~MREMAP_##n; \
-       }
-
-       P_MREMAP_FLAG(MAYMOVE);
-#ifdef MREMAP_FIXED
-       P_MREMAP_FLAG(FIXED);
-#endif
-#undef P_MREMAP_FLAG
-
-       if (flags)
-               printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
-
-       return printed;
-}
-
-#define SCA_MREMAP_FLAGS syscall_arg__scnprintf_mremap_flags
-
-static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
-                                                     struct syscall_arg *arg)
-{
-       int behavior = arg->val;
-
-       switch (behavior) {
-#define        P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
-       P_MADV_BHV(NORMAL);
-       P_MADV_BHV(RANDOM);
-       P_MADV_BHV(SEQUENTIAL);
-       P_MADV_BHV(WILLNEED);
-       P_MADV_BHV(DONTNEED);
-       P_MADV_BHV(REMOVE);
-       P_MADV_BHV(DONTFORK);
-       P_MADV_BHV(DOFORK);
-       P_MADV_BHV(HWPOISON);
-#ifdef MADV_SOFT_OFFLINE
-       P_MADV_BHV(SOFT_OFFLINE);
-#endif
-       P_MADV_BHV(MERGEABLE);
-       P_MADV_BHV(UNMERGEABLE);
-#ifdef MADV_HUGEPAGE
-       P_MADV_BHV(HUGEPAGE);
-#endif
-#ifdef MADV_NOHUGEPAGE
-       P_MADV_BHV(NOHUGEPAGE);
-#endif
-#ifdef MADV_DONTDUMP
-       P_MADV_BHV(DONTDUMP);
-#endif
-#ifdef MADV_DODUMP
-       P_MADV_BHV(DODUMP);
-#endif
-#undef P_MADV_PHV
-       default: break;
-       }
-
-       return scnprintf(bf, size, "%#x", behavior);
-}
-
-#define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
-
-static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
-                                          struct syscall_arg *arg)
-{
-       int printed = 0, op = arg->val;
-
-       if (op == 0)
-               return scnprintf(bf, size, "NONE");
-#define        P_CMD(cmd) \
-       if ((op & LOCK_##cmd) == LOCK_##cmd) { \
-               printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
-               op &= ~LOCK_##cmd; \
-       }
-
-       P_CMD(SH);
-       P_CMD(EX);
-       P_CMD(NB);
-       P_CMD(UN);
-       P_CMD(MAND);
-       P_CMD(RW);
-       P_CMD(READ);
-       P_CMD(WRITE);
-#undef P_OP
-
-       if (op)
-               printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
-
-       return printed;
-}
-
-#define SCA_FLOCK syscall_arg__scnprintf_flock
-
-static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
-{
-       enum syscall_futex_args {
-               SCF_UADDR   = (1 << 0),
-               SCF_OP      = (1 << 1),
-               SCF_VAL     = (1 << 2),
-               SCF_TIMEOUT = (1 << 3),
-               SCF_UADDR2  = (1 << 4),
-               SCF_VAL3    = (1 << 5),
-       };
-       int op = arg->val;
-       int cmd = op & FUTEX_CMD_MASK;
-       size_t printed = 0;
-
-       switch (cmd) {
-#define        P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
-       P_FUTEX_OP(WAIT);           arg->mask |= SCF_VAL3|SCF_UADDR2;             break;
-       P_FUTEX_OP(WAKE);           arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
-       P_FUTEX_OP(FD);             arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
-       P_FUTEX_OP(REQUEUE);        arg->mask |= SCF_VAL3|SCF_TIMEOUT;            break;
-       P_FUTEX_OP(CMP_REQUEUE);    arg->mask |= SCF_TIMEOUT;                     break;
-       P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT;                     break;
-       P_FUTEX_OP(WAKE_OP);                                                      break;
-       P_FUTEX_OP(LOCK_PI);        arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
-       P_FUTEX_OP(UNLOCK_PI);      arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
-       P_FUTEX_OP(TRYLOCK_PI);     arg->mask |= SCF_VAL3|SCF_UADDR2;             break;
-       P_FUTEX_OP(WAIT_BITSET);    arg->mask |= SCF_UADDR2;                      break;
-       P_FUTEX_OP(WAKE_BITSET);    arg->mask |= SCF_UADDR2;                      break;
-       P_FUTEX_OP(WAIT_REQUEUE_PI);                                              break;
-       default: printed = scnprintf(bf, size, "%#x", cmd);                       break;
-       }
-
-       if (op & FUTEX_PRIVATE_FLAG)
-               printed += scnprintf(bf + printed, size - printed, "|PRIV");
-
-       if (op & FUTEX_CLOCK_REALTIME)
-               printed += scnprintf(bf + printed, size - printed, "|CLKRT");
-
-       return printed;
-}
-
-#define SCA_FUTEX_OP  syscall_arg__scnprintf_futex_op
-
 static const char *bpf_cmd[] = {
        "MAP_CREATE", "MAP_LOOKUP_ELEM", "MAP_UPDATE_ELEM", "MAP_DELETE_ELEM",
        "MAP_GET_NEXT_KEY", "PROG_LOAD",
@@ -652,110 +437,6 @@ static const char *socket_families[] = {
 };
 static DEFINE_STRARRAY(socket_families);
 
-#ifndef SOCK_TYPE_MASK
-#define SOCK_TYPE_MASK 0xf
-#endif
-
-static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
-                                                     struct syscall_arg *arg)
-{
-       size_t printed;
-       int type = arg->val,
-           flags = type & ~SOCK_TYPE_MASK;
-
-       type &= SOCK_TYPE_MASK;
-       /*
-        * Can't use a strarray, MIPS may override for ABI reasons.
-        */
-       switch (type) {
-#define        P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
-       P_SK_TYPE(STREAM);
-       P_SK_TYPE(DGRAM);
-       P_SK_TYPE(RAW);
-       P_SK_TYPE(RDM);
-       P_SK_TYPE(SEQPACKET);
-       P_SK_TYPE(DCCP);
-       P_SK_TYPE(PACKET);
-#undef P_SK_TYPE
-       default:
-               printed = scnprintf(bf, size, "%#x", type);
-       }
-
-#define        P_SK_FLAG(n) \
-       if (flags & SOCK_##n) { \
-               printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
-               flags &= ~SOCK_##n; \
-       }
-
-       P_SK_FLAG(CLOEXEC);
-       P_SK_FLAG(NONBLOCK);
-#undef P_SK_FLAG
-
-       if (flags)
-               printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
-
-       return printed;
-}
-
-#define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
-
-#ifndef MSG_PROBE
-#define MSG_PROBE           0x10
-#endif
-#ifndef MSG_WAITFORONE
-#define MSG_WAITFORONE 0x10000
-#endif
-#ifndef MSG_SENDPAGE_NOTLAST
-#define MSG_SENDPAGE_NOTLAST 0x20000
-#endif
-#ifndef MSG_FASTOPEN
-#define MSG_FASTOPEN        0x20000000
-#endif
-
-static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
-                                              struct syscall_arg *arg)
-{
-       int printed = 0, flags = arg->val;
-
-       if (flags == 0)
-               return scnprintf(bf, size, "NONE");
-#define        P_MSG_FLAG(n) \
-       if (flags & MSG_##n) { \
-               printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
-               flags &= ~MSG_##n; \
-       }
-
-       P_MSG_FLAG(OOB);
-       P_MSG_FLAG(PEEK);
-       P_MSG_FLAG(DONTROUTE);
-       P_MSG_FLAG(TRYHARD);
-       P_MSG_FLAG(CTRUNC);
-       P_MSG_FLAG(PROBE);
-       P_MSG_FLAG(TRUNC);
-       P_MSG_FLAG(DONTWAIT);
-       P_MSG_FLAG(EOR);
-       P_MSG_FLAG(WAITALL);
-       P_MSG_FLAG(FIN);
-       P_MSG_FLAG(SYN);
-       P_MSG_FLAG(CONFIRM);
-       P_MSG_FLAG(RST);
-       P_MSG_FLAG(ERRQUEUE);
-       P_MSG_FLAG(NOSIGNAL);
-       P_MSG_FLAG(MORE);
-       P_MSG_FLAG(WAITFORONE);
-       P_MSG_FLAG(SENDPAGE_NOTLAST);
-       P_MSG_FLAG(FASTOPEN);
-       P_MSG_FLAG(CMSG_CLOEXEC);
-#undef P_MSG_FLAG
-
-       if (flags)
-               printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
-
-       return printed;
-}
-
-#define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
-
 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
                                                 struct syscall_arg *arg)
 {
@@ -788,116 +469,6 @@ static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
 
 #define SCA_FILENAME syscall_arg__scnprintf_filename
 
-static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
-                                              struct syscall_arg *arg)
-{
-       int printed = 0, flags = arg->val;
-
-       if (!(flags & O_CREAT))
-               arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
-
-       if (flags == 0)
-               return scnprintf(bf, size, "RDONLY");
-#define        P_FLAG(n) \
-       if (flags & O_##n) { \
-               printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
-               flags &= ~O_##n; \
-       }
-
-       P_FLAG(APPEND);
-       P_FLAG(ASYNC);
-       P_FLAG(CLOEXEC);
-       P_FLAG(CREAT);
-       P_FLAG(DIRECT);
-       P_FLAG(DIRECTORY);
-       P_FLAG(EXCL);
-       P_FLAG(LARGEFILE);
-       P_FLAG(NOATIME);
-       P_FLAG(NOCTTY);
-#ifdef O_NONBLOCK
-       P_FLAG(NONBLOCK);
-#elif O_NDELAY
-       P_FLAG(NDELAY);
-#endif
-#ifdef O_PATH
-       P_FLAG(PATH);
-#endif
-       P_FLAG(RDWR);
-#ifdef O_DSYNC
-       if ((flags & O_SYNC) == O_SYNC)
-               printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
-       else {
-               P_FLAG(DSYNC);
-       }
-#else
-       P_FLAG(SYNC);
-#endif
-       P_FLAG(TRUNC);
-       P_FLAG(WRONLY);
-#undef P_FLAG
-
-       if (flags)
-               printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
-
-       return printed;
-}
-
-#define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
-
-static size_t syscall_arg__scnprintf_perf_flags(char *bf, size_t size,
-                                               struct syscall_arg *arg)
-{
-       int printed = 0, flags = arg->val;
-
-       if (flags == 0)
-               return 0;
-
-#define        P_FLAG(n) \
-       if (flags & PERF_FLAG_##n) { \
-               printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
-               flags &= ~PERF_FLAG_##n; \
-       }
-
-       P_FLAG(FD_NO_GROUP);
-       P_FLAG(FD_OUTPUT);
-       P_FLAG(PID_CGROUP);
-       P_FLAG(FD_CLOEXEC);
-#undef P_FLAG
-
-       if (flags)
-               printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
-
-       return printed;
-}
-
-#define SCA_PERF_FLAGS syscall_arg__scnprintf_perf_flags
-
-static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
-                                                  struct syscall_arg *arg)
-{
-       int printed = 0, flags = arg->val;
-
-       if (flags == 0)
-               return scnprintf(bf, size, "NONE");
-#define        P_FLAG(n) \
-       if (flags & EFD_##n) { \
-               printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
-               flags &= ~EFD_##n; \
-       }
-
-       P_FLAG(SEMAPHORE);
-       P_FLAG(CLOEXEC);
-       P_FLAG(NONBLOCK);
-#undef P_FLAG
-
-       if (flags)
-               printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
-
-       return printed;
-}
-
-#define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
-
 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
                                                struct syscall_arg *arg)
 {
@@ -921,59 +492,6 @@ static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
 
 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
 
-static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
-{
-       int sig = arg->val;
-
-       switch (sig) {
-#define        P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
-       P_SIGNUM(HUP);
-       P_SIGNUM(INT);
-       P_SIGNUM(QUIT);
-       P_SIGNUM(ILL);
-       P_SIGNUM(TRAP);
-       P_SIGNUM(ABRT);
-       P_SIGNUM(BUS);
-       P_SIGNUM(FPE);
-       P_SIGNUM(KILL);
-       P_SIGNUM(USR1);
-       P_SIGNUM(SEGV);
-       P_SIGNUM(USR2);
-       P_SIGNUM(PIPE);
-       P_SIGNUM(ALRM);
-       P_SIGNUM(TERM);
-       P_SIGNUM(CHLD);
-       P_SIGNUM(CONT);
-       P_SIGNUM(STOP);
-       P_SIGNUM(TSTP);
-       P_SIGNUM(TTIN);
-       P_SIGNUM(TTOU);
-       P_SIGNUM(URG);
-       P_SIGNUM(XCPU);
-       P_SIGNUM(XFSZ);
-       P_SIGNUM(VTALRM);
-       P_SIGNUM(PROF);
-       P_SIGNUM(WINCH);
-       P_SIGNUM(IO);
-       P_SIGNUM(PWR);
-       P_SIGNUM(SYS);
-#ifdef SIGEMT
-       P_SIGNUM(EMT);
-#endif
-#ifdef SIGSTKFLT
-       P_SIGNUM(STKFLT);
-#endif
-#ifdef SIGSWI
-       P_SIGNUM(SWI);
-#endif
-       default: break;
-       }
-
-       return scnprintf(bf, size, "%#x", sig);
-}
-
-#define SCA_SIGNUM syscall_arg__scnprintf_signum
-
 #if defined(__i386__) || defined(__x86_64__)
 /*
  * FIXME: Make this available to all arches.
@@ -1001,105 +519,125 @@ static const char *tioctls[] = {
 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
 #endif /* defined(__i386__) || defined(__x86_64__) */
 
+#ifndef GRND_NONBLOCK
+#define GRND_NONBLOCK  0x0001
+#endif
+#ifndef GRND_RANDOM
+#define GRND_RANDOM    0x0002
+#endif
+
+static size_t syscall_arg__scnprintf_getrandom_flags(char *bf, size_t size,
+                                                  struct syscall_arg *arg)
+{
+       int printed = 0, flags = arg->val;
+
+#define        P_FLAG(n) \
+       if (flags & GRND_##n) { \
+               printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
+               flags &= ~GRND_##n; \
+       }
+
+       P_FLAG(RANDOM);
+       P_FLAG(NONBLOCK);
+#undef P_FLAG
+
+       if (flags)
+               printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
+
+       return printed;
+}
+
+#define SCA_GETRANDOM_FLAGS syscall_arg__scnprintf_getrandom_flags
+
 #define STRARRAY(arg, name, array) \
          .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
          .arg_parm      = { [arg] = &strarray__##array, }
 
+#include "trace/beauty/eventfd.c"
+#include "trace/beauty/flock.c"
+#include "trace/beauty/futex_op.c"
+#include "trace/beauty/mmap.c"
+#include "trace/beauty/mode_t.c"
+#include "trace/beauty/msg_flags.c"
+#include "trace/beauty/open_flags.c"
+#include "trace/beauty/perf_event_open.c"
+#include "trace/beauty/pid.c"
+#include "trace/beauty/sched_policy.c"
+#include "trace/beauty/seccomp.c"
+#include "trace/beauty/signum.c"
+#include "trace/beauty/socket_type.c"
+#include "trace/beauty/waitid_options.c"
+
 static struct syscall_fmt {
        const char *name;
        const char *alias;
        size_t     (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
        void       *arg_parm[6];
        bool       errmsg;
+       bool       errpid;
        bool       timeout;
        bool       hexret;
 } syscall_fmts[] = {
        { .name     = "access",     .errmsg = true,
-         .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */
-                            [1] = SCA_ACCMODE,  /* mode */ }, },
+         .arg_scnprintf = { [1] = SCA_ACCMODE,  /* mode */ }, },
        { .name     = "arch_prctl", .errmsg = true, .alias = "prctl", },
        { .name     = "bpf",        .errmsg = true, STRARRAY(0, cmd, bpf_cmd), },
        { .name     = "brk",        .hexret = true,
          .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
-       { .name     = "chdir",      .errmsg = true,
-         .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
-       { .name     = "chmod",      .errmsg = true,
-         .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
-       { .name     = "chroot",     .errmsg = true,
-         .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
+       { .name     = "chdir",      .errmsg = true, },
+       { .name     = "chmod",      .errmsg = true, },
+       { .name     = "chroot",     .errmsg = true, },
        { .name     = "clock_gettime",  .errmsg = true, STRARRAY(0, clk_id, clockid), },
+       { .name     = "clone",      .errpid = true, },
        { .name     = "close",      .errmsg = true,
          .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, },
        { .name     = "connect",    .errmsg = true, },
-       { .name     = "creat",      .errmsg = true,
-         .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
-       { .name     = "dup",        .errmsg = true,
-         .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
-       { .name     = "dup2",       .errmsg = true,
-         .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
-       { .name     = "dup3",       .errmsg = true,
-         .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
+       { .name     = "creat",      .errmsg = true, },
+       { .name     = "dup",        .errmsg = true, },
+       { .name     = "dup2",       .errmsg = true, },
+       { .name     = "dup3",       .errmsg = true, },
        { .name     = "epoll_ctl",  .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
        { .name     = "eventfd2",   .errmsg = true,
          .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
-       { .name     = "faccessat",  .errmsg = true,
-         .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
-                            [1] = SCA_FILENAME, /* filename */ }, },
-       { .name     = "fadvise64",  .errmsg = true,
-         .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
-       { .name     = "fallocate",  .errmsg = true,
-         .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
-       { .name     = "fchdir",     .errmsg = true,
-         .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
-       { .name     = "fchmod",     .errmsg = true,
-         .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
+       { .name     = "faccessat",  .errmsg = true, },
+       { .name     = "fadvise64",  .errmsg = true, },
+       { .name     = "fallocate",  .errmsg = true, },
+       { .name     = "fchdir",     .errmsg = true, },
+       { .name     = "fchmod",     .errmsg = true, },
        { .name     = "fchmodat",   .errmsg = true,
-         .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
-                            [1] = SCA_FILENAME, /* filename */ }, },
-       { .name     = "fchown",     .errmsg = true,
-         .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
+         .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
+       { .name     = "fchown",     .errmsg = true, },
        { .name     = "fchownat",   .errmsg = true,
-         .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
-                            [1] = SCA_FILENAME, /* filename */ }, },
+         .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
        { .name     = "fcntl",      .errmsg = true,
-         .arg_scnprintf = { [0] = SCA_FD, /* fd */
-                            [1] = SCA_STRARRAY, /* cmd */ },
+         .arg_scnprintf = { [1] = SCA_STRARRAY, /* cmd */ },
          .arg_parm      = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
-       { .name     = "fdatasync",  .errmsg = true,
-         .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
+       { .name     = "fdatasync",  .errmsg = true, },
        { .name     = "flock",      .errmsg = true,
-         .arg_scnprintf = { [0] = SCA_FD, /* fd */
-                            [1] = SCA_FLOCK, /* cmd */ }, },
-       { .name     = "fsetxattr",  .errmsg = true,
-         .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
-       { .name     = "fstat",      .errmsg = true, .alias = "newfstat",
-         .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
-       { .name     = "fstatat",    .errmsg = true, .alias = "newfstatat",
-         .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
-                            [1] = SCA_FILENAME, /* filename */ }, },
-       { .name     = "fstatfs",    .errmsg = true,
-         .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
-       { .name     = "fsync",    .errmsg = true,
-         .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
-       { .name     = "ftruncate", .errmsg = true,
-         .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
+         .arg_scnprintf = { [1] = SCA_FLOCK, /* cmd */ }, },
+       { .name     = "fsetxattr",  .errmsg = true, },
+       { .name     = "fstat",      .errmsg = true, .alias = "newfstat", },
+       { .name     = "fstatat",    .errmsg = true, .alias = "newfstatat", },
+       { .name     = "fstatfs",    .errmsg = true, },
+       { .name     = "fsync",    .errmsg = true, },
+       { .name     = "ftruncate", .errmsg = true, },
        { .name     = "futex",      .errmsg = true,
          .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
        { .name     = "futimesat", .errmsg = true,
-         .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
-                            [1] = SCA_FILENAME, /* filename */ }, },
-       { .name     = "getdents",   .errmsg = true,
-         .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
-       { .name     = "getdents64", .errmsg = true,
-         .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
+         .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
+       { .name     = "getdents",   .errmsg = true, },
+       { .name     = "getdents64", .errmsg = true, },
        { .name     = "getitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
+       { .name     = "getpid",     .errpid = true, },
+       { .name     = "getpgid",    .errpid = true, },
+       { .name     = "getppid",    .errpid = true, },
+       { .name     = "getrandom",  .errmsg = true,
+         .arg_scnprintf = { [2] = SCA_GETRANDOM_FLAGS, /* flags */ }, },
        { .name     = "getrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
-       { .name     = "getxattr",    .errmsg = true,
-         .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
-       { .name     = "inotify_add_watch",          .errmsg = true,
-         .arg_scnprintf = { [1] = SCA_FILENAME, /* pathname */ }, },
+       { .name     = "getxattr",   .errmsg = true, },
+       { .name     = "inotify_add_watch",          .errmsg = true, },
        { .name     = "ioctl",      .errmsg = true,
-         .arg_scnprintf = { [0] = SCA_FD, /* fd */
+         .arg_scnprintf = {
 #if defined(__i386__) || defined(__x86_64__)
 /*
  * FIXME: Make this available to all arches.
@@ -1113,41 +651,28 @@ static struct syscall_fmt {
        { .name     = "keyctl",     .errmsg = true, STRARRAY(0, option, keyctl_options), },
        { .name     = "kill",       .errmsg = true,
          .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
-       { .name     = "lchown",    .errmsg = true,
-         .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
-       { .name     = "lgetxattr",  .errmsg = true,
-         .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
+       { .name     = "lchown",    .errmsg = true, },
+       { .name     = "lgetxattr",  .errmsg = true, },
        { .name     = "linkat",     .errmsg = true,
          .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
-       { .name     = "listxattr",  .errmsg = true,
-         .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
-       { .name     = "llistxattr", .errmsg = true,
-         .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
-       { .name     = "lremovexattr",  .errmsg = true,
-         .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
+       { .name     = "listxattr",  .errmsg = true, },
+       { .name     = "llistxattr", .errmsg = true, },
+       { .name     = "lremovexattr",  .errmsg = true, },
        { .name     = "lseek",      .errmsg = true,
-         .arg_scnprintf = { [0] = SCA_FD, /* fd */
-                            [2] = SCA_STRARRAY, /* whence */ },
+         .arg_scnprintf = { [2] = SCA_STRARRAY, /* whence */ },
          .arg_parm      = { [2] = &strarray__whences, /* whence */ }, },
-       { .name     = "lsetxattr",  .errmsg = true,
-         .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
-       { .name     = "lstat",      .errmsg = true, .alias = "newlstat",
-         .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
-       { .name     = "lsxattr",    .errmsg = true,
-         .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
+       { .name     = "lsetxattr",  .errmsg = true, },
+       { .name     = "lstat",      .errmsg = true, .alias = "newlstat", },
+       { .name     = "lsxattr",    .errmsg = true, },
        { .name     = "madvise",    .errmsg = true,
          .arg_scnprintf = { [0] = SCA_HEX,      /* start */
                             [2] = SCA_MADV_BHV, /* behavior */ }, },
-       { .name     = "mkdir",    .errmsg = true,
-         .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
+       { .name     = "mkdir",    .errmsg = true, },
        { .name     = "mkdirat",    .errmsg = true,
-         .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
-                            [1] = SCA_FILENAME, /* pathname */ }, },
-       { .name     = "mknod",      .errmsg = true,
-         .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
+         .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
+       { .name     = "mknod",      .errmsg = true, },
        { .name     = "mknodat",    .errmsg = true,
-         .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
-                            [1] = SCA_FILENAME, /* filename */ }, },
+         .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
        { .name     = "mlock",      .errmsg = true,
          .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
        { .name     = "mlockall",   .errmsg = true,
@@ -1155,8 +680,7 @@ static struct syscall_fmt {
        { .name     = "mmap",       .hexret = true,
          .arg_scnprintf = { [0] = SCA_HEX,       /* addr */
                             [2] = SCA_MMAP_PROT, /* prot */
-                            [3] = SCA_MMAP_FLAGS, /* flags */
-                            [4] = SCA_FD,        /* fd */ }, },
+                            [3] = SCA_MMAP_FLAGS, /* flags */ }, },
        { .name     = "mprotect",   .errmsg = true,
          .arg_scnprintf = { [0] = SCA_HEX, /* start */
                             [2] = SCA_MMAP_PROT, /* prot */ }, },
@@ -1173,60 +697,43 @@ static struct syscall_fmt {
        { .name     = "name_to_handle_at", .errmsg = true,
          .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
        { .name     = "newfstatat", .errmsg = true,
-         .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
-                            [1] = SCA_FILENAME, /* filename */ }, },
+         .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
        { .name     = "open",       .errmsg = true,
-         .arg_scnprintf = { [0] = SCA_FILENAME,   /* filename */
-                            [1] = SCA_OPEN_FLAGS, /* flags */ }, },
+         .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
        { .name     = "open_by_handle_at", .errmsg = true,
          .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
                             [2] = SCA_OPEN_FLAGS, /* flags */ }, },
        { .name     = "openat",     .errmsg = true,
          .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
-                            [1] = SCA_FILENAME, /* filename */
                             [2] = SCA_OPEN_FLAGS, /* flags */ }, },
        { .name     = "perf_event_open", .errmsg = true,
-         .arg_scnprintf = { [1] = SCA_INT, /* pid */
-                            [2] = SCA_INT, /* cpu */
+         .arg_scnprintf = { [2] = SCA_INT, /* cpu */
                             [3] = SCA_FD,  /* group_fd */
                             [4] = SCA_PERF_FLAGS,  /* flags */ }, },
        { .name     = "pipe2",      .errmsg = true,
          .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
        { .name     = "poll",       .errmsg = true, .timeout = true, },
        { .name     = "ppoll",      .errmsg = true, .timeout = true, },
-       { .name     = "pread",      .errmsg = true, .alias = "pread64",
-         .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
-       { .name     = "preadv",     .errmsg = true, .alias = "pread",
-         .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
+       { .name     = "pread",      .errmsg = true, .alias = "pread64", },
+       { .name     = "preadv",     .errmsg = true, .alias = "pread", },
        { .name     = "prlimit64",  .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
-       { .name     = "pwrite",     .errmsg = true, .alias = "pwrite64",
-         .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
-       { .name     = "pwritev",    .errmsg = true,
-         .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
-       { .name     = "read",       .errmsg = true,
-         .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
-       { .name     = "readlink",   .errmsg = true,
-         .arg_scnprintf = { [0] = SCA_FILENAME, /* path */ }, },
+       { .name     = "pwrite",     .errmsg = true, .alias = "pwrite64", },
+       { .name     = "pwritev",    .errmsg = true, },
+       { .name     = "read",       .errmsg = true, },
+       { .name     = "readlink",   .errmsg = true, },
        { .name     = "readlinkat", .errmsg = true,
-         .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
-                            [1] = SCA_FILENAME, /* pathname */ }, },
-       { .name     = "readv",      .errmsg = true,
-         .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
+         .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
+       { .name     = "readv",      .errmsg = true, },
        { .name     = "recvfrom",   .errmsg = true,
-         .arg_scnprintf = { [0] = SCA_FD, /* fd */
-                            [3] = SCA_MSG_FLAGS, /* flags */ }, },
+         .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
        { .name     = "recvmmsg",   .errmsg = true,
-         .arg_scnprintf = { [0] = SCA_FD, /* fd */
-                            [3] = SCA_MSG_FLAGS, /* flags */ }, },
+         .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
        { .name     = "recvmsg",    .errmsg = true,
-         .arg_scnprintf = { [0] = SCA_FD, /* fd */
-                            [2] = SCA_MSG_FLAGS, /* flags */ }, },
-       { .name     = "removexattr", .errmsg = true,
-         .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
+         .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
+       { .name     = "removexattr", .errmsg = true, },
        { .name     = "renameat",   .errmsg = true,
          .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
-       { .name     = "rmdir",    .errmsg = true,
-         .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
+       { .name     = "rmdir",    .errmsg = true, },
        { .name     = "rt_sigaction", .errmsg = true,
          .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
        { .name     = "rt_sigprocmask",  .errmsg = true, STRARRAY(0, how, sighow), },
@@ -1234,22 +741,24 @@ static struct syscall_fmt {
          .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
        { .name     = "rt_tgsigqueueinfo", .errmsg = true,
          .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
+       { .name     = "sched_setscheduler",   .errmsg = true,
+         .arg_scnprintf = { [1] = SCA_SCHED_POLICY, /* policy */ }, },
+       { .name     = "seccomp", .errmsg = true,
+         .arg_scnprintf = { [0] = SCA_SECCOMP_OP, /* op */
+                            [1] = SCA_SECCOMP_FLAGS, /* flags */ }, },
        { .name     = "select",     .errmsg = true, .timeout = true, },
        { .name     = "sendmmsg",    .errmsg = true,
-         .arg_scnprintf = { [0] = SCA_FD, /* fd */
-                            [3] = SCA_MSG_FLAGS, /* flags */ }, },
+         .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
        { .name     = "sendmsg",    .errmsg = true,
-         .arg_scnprintf = { [0] = SCA_FD, /* fd */
-                            [2] = SCA_MSG_FLAGS, /* flags */ }, },
+         .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
        { .name     = "sendto",     .errmsg = true,
-         .arg_scnprintf = { [0] = SCA_FD, /* fd */
-                            [3] = SCA_MSG_FLAGS, /* flags */ }, },
+         .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
+       { .name     = "set_tid_address", .errpid = true, },
        { .name     = "setitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
+       { .name     = "setpgid",    .errmsg = true, },
        { .name     = "setrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
-       { .name     = "setxattr",   .errmsg = true,
-         .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
-       { .name     = "shutdown",   .errmsg = true,
-         .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
+       { .name     = "setxattr",   .errmsg = true, },
+       { .name     = "shutdown",   .errmsg = true, },
        { .name     = "socket",     .errmsg = true,
          .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
                             [1] = SCA_SK_TYPE, /* type */ },
@@ -1258,10 +767,8 @@ static struct syscall_fmt {
          .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
                             [1] = SCA_SK_TYPE, /* type */ },
          .arg_parm      = { [0] = &strarray__socket_families, /* family */ }, },
-       { .name     = "stat",       .errmsg = true, .alias = "newstat",
-         .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
-       { .name     = "statfs",     .errmsg = true,
-         .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
+       { .name     = "stat",       .errmsg = true, .alias = "newstat", },
+       { .name     = "statfs",     .errmsg = true, },
        { .name     = "swapoff",    .errmsg = true,
          .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
        { .name     = "swapon",     .errmsg = true,
@@ -1272,25 +779,21 @@ static struct syscall_fmt {
          .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
        { .name     = "tkill",      .errmsg = true,
          .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
-       { .name     = "truncate",   .errmsg = true,
-         .arg_scnprintf = { [0] = SCA_FILENAME, /* path */ }, },
+       { .name     = "truncate",   .errmsg = true, },
        { .name     = "uname",      .errmsg = true, .alias = "newuname", },
        { .name     = "unlinkat",   .errmsg = true,
-         .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
-                            [1] = SCA_FILENAME, /* pathname */ }, },
-       { .name     = "utime",  .errmsg = true,
-         .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
+         .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
+       { .name     = "utime",  .errmsg = true, },
        { .name     = "utimensat",  .errmsg = true,
-         .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */
-                            [1] = SCA_FILENAME, /* filename */ }, },
-       { .name     = "utimes",  .errmsg = true,
-         .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
-       { .name     = "vmsplice",  .errmsg = true,
-         .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
-       { .name     = "write",      .errmsg = true,
-         .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
-       { .name     = "writev",     .errmsg = true,
-         .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
+         .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ }, },
+       { .name     = "utimes",  .errmsg = true, },
+       { .name     = "vmsplice",  .errmsg = true, },
+       { .name     = "wait4",      .errpid = true,
+         .arg_scnprintf = { [2] = SCA_WAITID_OPTIONS, /* options */ }, },
+       { .name     = "waitid",     .errpid = true,
+         .arg_scnprintf = { [3] = SCA_WAITID_OPTIONS, /* options */ }, },
+       { .name     = "write",      .errmsg = true, },
+       { .name     = "writev",     .errmsg = true, },
 };
 
 static int syscall_fmt__cmp(const void *name, const void *fmtp)
@@ -1398,59 +901,6 @@ fail:
 
 static const size_t trace__entry_str_size = 2048;
 
-struct trace {
-       struct perf_tool        tool;
-       struct {
-               int             machine;
-               int             open_id;
-       }                       audit;
-       struct {
-               int             max;
-               struct syscall  *table;
-               struct {
-                       struct perf_evsel *sys_enter,
-                                         *sys_exit;
-               }               events;
-       } syscalls;
-       struct record_opts      opts;
-       struct perf_evlist      *evlist;
-       struct machine          *host;
-       struct thread           *current;
-       u64                     base_time;
-       FILE                    *output;
-       unsigned long           nr_events;
-       struct strlist          *ev_qualifier;
-       struct {
-               size_t          nr;
-               int             *entries;
-       }                       ev_qualifier_ids;
-       struct intlist          *tid_list;
-       struct intlist          *pid_list;
-       struct {
-               size_t          nr;
-               pid_t           *entries;
-       }                       filter_pids;
-       double                  duration_filter;
-       double                  runtime_ms;
-       struct {
-               u64             vfs_getname,
-                               proc_getname;
-       } stats;
-       bool                    not_ev_qualifier;
-       bool                    live;
-       bool                    full_time;
-       bool                    sched;
-       bool                    multiple_threads;
-       bool                    summary;
-       bool                    summary_only;
-       bool                    show_comm;
-       bool                    show_tool_stats;
-       bool                    trace_syscalls;
-       bool                    force;
-       bool                    vfs_getname;
-       int                     trace_pgfaults;
-};
-
 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
 {
        struct thread_trace *ttrace = thread__priv(thread);
@@ -1618,6 +1068,7 @@ static int trace__process_event(struct trace *trace, struct machine *machine,
                color_fprintf(trace->output, PERF_COLOR_RED,
                              "LOST %" PRIu64 " events!\n", event->lost.lost);
                ret = machine__process_lost_event(machine, event, sample);
+               break;
        default:
                ret = machine__process_event(machine, event, sample);
                break;
@@ -1635,6 +1086,24 @@ static int trace__tool_process(struct perf_tool *tool,
        return trace__process_event(trace, machine, event, sample);
 }
 
+static char *trace__machine__resolve_kernel_addr(void *vmachine, unsigned long long *addrp, char **modp)
+{
+       struct machine *machine = vmachine;
+
+       if (machine->kptr_restrict_warned)
+               return NULL;
+
+       if (symbol_conf.kptr_restrict) {
+               pr_warning("Kernel address maps (/proc/{kallsyms,modules}) are restricted.\n\n"
+                          "Check /proc/sys/kernel/kptr_restrict.\n\n"
+                          "Kernel samples will not be resolved.\n");
+               machine->kptr_restrict_warned = true;
+               return NULL;
+       }
+
+       return machine__resolve_kernel_addr(vmachine, addrp, modp);
+}
+
 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
 {
        int err = symbol__init(NULL);
@@ -1646,7 +1115,7 @@ static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
        if (trace->host == NULL)
                return -ENOMEM;
 
-       if (trace_event__register_resolver(trace->host, machine__resolve_kernel_addr) < 0)
+       if (trace_event__register_resolver(trace->host, trace__machine__resolve_kernel_addr) < 0)
                return -errno;
 
        err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
@@ -1661,7 +1130,7 @@ static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
 static int syscall__set_arg_fmts(struct syscall *sc)
 {
        struct format_field *field;
-       int idx = 0;
+       int idx = 0, len;
 
        sc->arg_scnprintf = calloc(sc->nr_args, sizeof(void *));
        if (sc->arg_scnprintf == NULL)
@@ -1673,8 +1142,31 @@ static int syscall__set_arg_fmts(struct syscall *sc)
        for (field = sc->args; field; field = field->next) {
                if (sc->fmt && sc->fmt->arg_scnprintf[idx])
                        sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
+               else if (strcmp(field->type, "const char *") == 0 &&
+                        (strcmp(field->name, "filename") == 0 ||
+                         strcmp(field->name, "path") == 0 ||
+                         strcmp(field->name, "pathname") == 0))
+                       sc->arg_scnprintf[idx] = SCA_FILENAME;
                else if (field->flags & FIELD_IS_POINTER)
                        sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
+               else if (strcmp(field->type, "pid_t") == 0)
+                       sc->arg_scnprintf[idx] = SCA_PID;
+               else if (strcmp(field->type, "umode_t") == 0)
+                       sc->arg_scnprintf[idx] = SCA_MODE_T;
+               else if ((strcmp(field->type, "int") == 0 ||
+                         strcmp(field->type, "unsigned int") == 0 ||
+                         strcmp(field->type, "long") == 0) &&
+                        (len = strlen(field->name)) >= 2 &&
+                        strcmp(field->name + len - 2, "fd") == 0) {
+                       /*
+                        * /sys/kernel/tracing/events/syscalls/sys_enter*
+                        * egrep 'field:.*fd;' .../format|sed -r 's/.*field:([a-z ]+) [a-z_]*fd.+/\1/g'|sort|uniq -c
+                        * 65 int
+                        * 23 unsigned int
+                        * 7 unsigned long
+                        */
+                       sc->arg_scnprintf[idx] = SCA_FD;
+               }
                ++idx;
        }
 
@@ -1685,7 +1177,7 @@ static int trace__read_syscall_info(struct trace *trace, int id)
 {
        char tp_name[128];
        struct syscall *sc;
-       const char *name = audit_syscall_to_name(id, trace->audit.machine);
+       const char *name = syscalltbl__name(trace->sctbl, id);
 
        if (name == NULL)
                return -1;
@@ -1758,9 +1250,9 @@ static int trace__validate_ev_qualifier(struct trace *trace)
 
        i = 0;
 
-       strlist__for_each(pos, trace->ev_qualifier) {
+       strlist__for_each_entry(pos, trace->ev_qualifier) {
                const char *sc = pos->s;
-               int id = audit_name_to_syscall(sc, trace->audit.machine);
+               int id = syscalltbl__id(trace->sctbl, sc);
 
                if (id < 0) {
                        if (err == 0) {
@@ -1846,7 +1338,12 @@ static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
                                                     "%ld", val);
                        }
                }
-       } else {
+       } else if (IS_ERR(sc->tp_format)) {
+               /*
+                * If we managed to read the tracepoint /format file, then we
+                * may end up not having any args, like with gettid(), so only
+                * print the raw args when we didn't manage to read it.
+                */
                int i = 0;
 
                while (i < 6) {
@@ -1987,7 +1484,7 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
                        goto out_put;
        }
 
-       if (!trace->summary_only)
+       if (!(trace->duration_filter || trace->summary_only || trace->min_stack))
                trace__printf_interrupted_entry(trace, sample);
 
        ttrace->entry_time = sample->time;
@@ -1998,9 +1495,9 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
                                           args, trace, thread);
 
        if (sc->is_exit) {
-               if (!trace->duration_filter && !trace->summary_only) {
+               if (!(trace->duration_filter || trace->summary_only || trace->min_stack)) {
                        trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
-                       fprintf(trace->output, "%-70s\n", ttrace->entry_str);
+                       fprintf(trace->output, "%-70s)\n", ttrace->entry_str);
                }
        } else {
                ttrace->entry_pending = true;
@@ -2018,6 +1515,29 @@ out_put:
        return err;
 }
 
+static int trace__resolve_callchain(struct trace *trace, struct perf_evsel *evsel,
+                                   struct perf_sample *sample,
+                                   struct callchain_cursor *cursor)
+{
+       struct addr_location al;
+
+       if (machine__resolve(trace->host, &al, sample) < 0 ||
+           thread__resolve_callchain(al.thread, cursor, evsel, sample, NULL, NULL, trace->max_stack))
+               return -1;
+
+       return 0;
+}
+
+static int trace__fprintf_callchain(struct trace *trace, struct perf_sample *sample)
+{
+       /* TODO: user-configurable print_opts */
+       const unsigned int print_opts = EVSEL__PRINT_SYM |
+                                       EVSEL__PRINT_DSO |
+                                       EVSEL__PRINT_UNKNOWN_AS_ADDR;
+
+       return sample__fprintf_callchain(sample, 38, print_opts, &callchain_cursor, trace->output);
+}
+
 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
                           union perf_event *event __maybe_unused,
                           struct perf_sample *sample)
@@ -2025,7 +1545,7 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
        long ret;
        u64 duration = 0;
        struct thread *thread;
-       int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
+       int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1, callchain_ret = 0;
        struct syscall *sc = trace__syscall_info(trace, evsel, id);
        struct thread_trace *ttrace;
 
@@ -2042,7 +1562,7 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
 
        ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
 
-       if (id == trace->audit.open_id && ret >= 0 && ttrace->filename.pending_open) {
+       if (id == trace->open_id && ret >= 0 && ttrace->filename.pending_open) {
                trace__set_fd_pathname(thread, ret, ttrace->filename.name);
                ttrace->filename.pending_open = false;
                ++trace->stats.vfs_getname;
@@ -2057,6 +1577,15 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
        } else if (trace->duration_filter)
                goto out;
 
+       if (sample->callchain) {
+               callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
+               if (callchain_ret == 0) {
+                       if (callchain_cursor.nr < trace->min_stack)
+                               goto out;
+                       callchain_ret = 1;
+               }
+       }
+
        if (trace->summary_only)
                goto out;
 
@@ -2073,9 +1602,9 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
        if (sc->fmt == NULL) {
 signed_print:
                fprintf(trace->output, ") = %ld", ret);
-       } else if (ret < 0 && sc->fmt->errmsg) {
+       } else if (ret < 0 && (sc->fmt->errmsg || sc->fmt->errpid)) {
                char bf[STRERR_BUFSIZE];
-               const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
+               const char *emsg = str_error_r(-ret, bf, sizeof(bf)),
                           *e = audit_errno_to_name(-ret);
 
                fprintf(trace->output, ") = -1 %s %s", e, emsg);
@@ -2083,10 +1612,24 @@ signed_print:
                fprintf(trace->output, ") = 0 Timeout");
        else if (sc->fmt->hexret)
                fprintf(trace->output, ") = %#lx", ret);
-       else
+       else if (sc->fmt->errpid) {
+               struct thread *child = machine__find_thread(trace->host, ret, ret);
+
+               if (child != NULL) {
+                       fprintf(trace->output, ") = %ld", ret);
+                       if (child->comm_set)
+                               fprintf(trace->output, " (%s)", thread__comm_str(child));
+                       thread__put(child);
+               }
+       } else
                goto signed_print;
 
        fputc('\n', trace->output);
+
+       if (callchain_ret > 0)
+               trace__fprintf_callchain(trace, sample);
+       else if (callchain_ret < 0)
+               pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
 out:
        ttrace->entry_pending = false;
        err = 0;
@@ -2217,6 +1760,17 @@ static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
                                union perf_event *event __maybe_unused,
                                struct perf_sample *sample)
 {
+       int callchain_ret = 0;
+
+       if (sample->callchain) {
+               callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
+               if (callchain_ret == 0) {
+                       if (callchain_cursor.nr < trace->min_stack)
+                               goto out;
+                       callchain_ret = 1;
+               }
+       }
+
        trace__printf_interrupted_entry(trace, sample);
        trace__fprintf_tstamp(trace, sample->time, trace->output);
 
@@ -2234,6 +1788,12 @@ static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
        }
 
        fprintf(trace->output, ")\n");
+
+       if (callchain_ret > 0)
+               trace__fprintf_callchain(trace, sample);
+       else if (callchain_ret < 0)
+               pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
+out:
        return 0;
 }
 
@@ -2264,8 +1824,19 @@ static int trace__pgfault(struct trace *trace,
        char map_type = 'd';
        struct thread_trace *ttrace;
        int err = -1;
+       int callchain_ret = 0;
 
        thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
+
+       if (sample->callchain) {
+               callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
+               if (callchain_ret == 0) {
+                       if (callchain_cursor.nr < trace->min_stack)
+                               goto out_put;
+                       callchain_ret = 1;
+               }
+       }
+
        ttrace = thread__trace(thread, trace->output);
        if (ttrace == NULL)
                goto out_put;
@@ -2307,6 +1878,11 @@ static int trace__pgfault(struct trace *trace,
        print_location(trace->output, sample, &al, true, false);
 
        fprintf(trace->output, " (%c%c)\n", map_type, al.level);
+
+       if (callchain_ret > 0)
+               trace__fprintf_callchain(trace, sample);
+       else if (callchain_ret < 0)
+               pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
 out:
        err = 0;
 out_put:
@@ -2326,6 +1902,23 @@ static bool skip_sample(struct trace *trace, struct perf_sample *sample)
        return false;
 }
 
+static void trace__set_base_time(struct trace *trace,
+                                struct perf_evsel *evsel,
+                                struct perf_sample *sample)
+{
+       /*
+        * BPF events were not setting PERF_SAMPLE_TIME, so be more robust
+        * and don't use sample->time unconditionally, we may end up having
+        * some other event in the future without PERF_SAMPLE_TIME for good
+        * reason, i.e. we may not be interested in its timestamps, just in
+        * it taking place, picking some piece of information when it
+        * appears in our event stream (vfs_getname comes to mind).
+        */
+       if (trace->base_time == 0 && !trace->full_time &&
+           (evsel->attr.sample_type & PERF_SAMPLE_TIME))
+               trace->base_time = sample->time;
+}
+
 static int trace__process_sample(struct perf_tool *tool,
                                 union perf_event *event,
                                 struct perf_sample *sample,
@@ -2340,8 +1933,7 @@ static int trace__process_sample(struct perf_tool *tool,
        if (skip_sample(trace, sample))
                return 0;
 
-       if (!trace->full_time && trace->base_time == 0)
-               trace->base_time = sample->time;
+       trace__set_base_time(trace, evsel, sample);
 
        if (handler) {
                ++trace->nr_events;
@@ -2450,8 +2042,7 @@ static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
        return true;
 }
 
-static int perf_evlist__add_pgfault(struct perf_evlist *evlist,
-                                   u64 config)
+static struct perf_evsel *perf_evsel__new_pgfault(u64 config)
 {
        struct perf_evsel *evsel;
        struct perf_event_attr attr = {
@@ -2465,13 +2056,10 @@ static int perf_evlist__add_pgfault(struct perf_evlist *evlist,
        event_attr_init(&attr);
 
        evsel = perf_evsel__new(&attr);
-       if (!evsel)
-               return -ENOMEM;
-
-       evsel->handler = trace__pgfault;
-       perf_evlist__add(evlist, evsel);
+       if (evsel)
+               evsel->handler = trace__pgfault;
 
-       return 0;
+       return evsel;
 }
 
 static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
@@ -2479,9 +2067,6 @@ static void trace__handle_event(struct trace *trace, union perf_event *event, st
        const u32 type = event->header.type;
        struct perf_evsel *evsel;
 
-       if (!trace->full_time && trace->base_time == 0)
-               trace->base_time = sample->time;
-
        if (type != PERF_RECORD_SAMPLE) {
                trace__process_event(trace, trace->host, event, sample);
                return;
@@ -2493,6 +2078,8 @@ static void trace__handle_event(struct trace *trace, union perf_event *event, st
                return;
        }
 
+       trace__set_base_time(trace, evsel, sample);
+
        if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
            sample->raw_data == NULL) {
                fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
@@ -2527,6 +2114,15 @@ static int trace__add_syscall_newtp(struct trace *trace)
        perf_evlist__add(evlist, sys_enter);
        perf_evlist__add(evlist, sys_exit);
 
+       if (callchain_param.enabled && !trace->kernel_syscallchains) {
+               /*
+                * We're interested only in the user space callchain
+                * leading to the syscall, allow overriding that for
+                * debugging reasons using --kernel_syscall_callchains
+                */
+               sys_exit->attr.exclude_callchain_kernel = 1;
+       }
+
        trace->syscalls.events.sys_enter = sys_enter;
        trace->syscalls.events.sys_exit  = sys_exit;
 
@@ -2565,7 +2161,7 @@ out_enomem:
 static int trace__run(struct trace *trace, int argc, const char **argv)
 {
        struct perf_evlist *evlist = trace->evlist;
-       struct perf_evsel *evsel;
+       struct perf_evsel *evsel, *pgfault_maj = NULL, *pgfault_min = NULL;
        int err = -1, i;
        unsigned long before;
        const bool forks = argc > 0;
@@ -2579,14 +2175,19 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
        if (trace->trace_syscalls)
                trace->vfs_getname = perf_evlist__add_vfs_getname(evlist);
 
-       if ((trace->trace_pgfaults & TRACE_PFMAJ) &&
-           perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MAJ)) {
-               goto out_error_mem;
+       if ((trace->trace_pgfaults & TRACE_PFMAJ)) {
+               pgfault_maj = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MAJ);
+               if (pgfault_maj == NULL)
+                       goto out_error_mem;
+               perf_evlist__add(evlist, pgfault_maj);
        }
 
-       if ((trace->trace_pgfaults & TRACE_PFMIN) &&
-           perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MIN))
-               goto out_error_mem;
+       if ((trace->trace_pgfaults & TRACE_PFMIN)) {
+               pgfault_min = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MIN);
+               if (pgfault_min == NULL)
+                       goto out_error_mem;
+               perf_evlist__add(evlist, pgfault_min);
+       }
 
        if (trace->sched &&
            perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
@@ -2605,7 +2206,45 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
                goto out_delete_evlist;
        }
 
-       perf_evlist__config(evlist, &trace->opts);
+       perf_evlist__config(evlist, &trace->opts, NULL);
+
+       if (callchain_param.enabled) {
+               bool use_identifier = false;
+
+               if (trace->syscalls.events.sys_exit) {
+                       perf_evsel__config_callchain(trace->syscalls.events.sys_exit,
+                                                    &trace->opts, &callchain_param);
+                       use_identifier = true;
+               }
+
+               if (pgfault_maj) {
+                       perf_evsel__config_callchain(pgfault_maj, &trace->opts, &callchain_param);
+                       use_identifier = true;
+               }
+
+               if (pgfault_min) {
+                       perf_evsel__config_callchain(pgfault_min, &trace->opts, &callchain_param);
+                       use_identifier = true;
+               }
+
+               if (use_identifier) {
+                      /*
+                       * Now we have evsels with different sample_ids, use
+                       * PERF_SAMPLE_IDENTIFIER to map from sample to evsel
+                       * from a fixed position in each ring buffer record.
+                       *
+                       * As of this the changeset introducing this comment, this
+                       * isn't strictly needed, as the fields that can come before
+                       * PERF_SAMPLE_ID are all used, but we'll probably disable
+                       * some of those for things like copying the payload of
+                       * pointer syscall arguments, and for vfs_getname we don't
+                       * need PERF_SAMPLE_ADDR and PERF_SAMPLE_IP, so do this
+                       * here as a warning we need to use PERF_SAMPLE_IDENTIFIER.
+                       */
+                       perf_evlist__set_sample_bit(evlist, IDENTIFIER);
+                       perf_evlist__reset_sample_bit(evlist, ID);
+               }
+       }
 
        signal(SIGCHLD, sig_handler);
        signal(SIGINT, sig_handler);
@@ -2766,7 +2405,7 @@ out_error_apply_filters:
        fprintf(trace->output,
                "Failed to set filter \"%s\" on event %s with %d (%s)\n",
                evsel->filter, perf_evsel__name(evsel), errno,
-               strerror_r(errno, errbuf, sizeof(errbuf)));
+               str_error_r(errno, errbuf, sizeof(errbuf)));
        goto out_delete_evlist;
 }
 out_error_mem:
@@ -2847,7 +2486,7 @@ static int trace__replay(struct trace *trace)
                goto out;
        }
 
-       evlist__for_each(session->evlist, evsel) {
+       evlist__for_each_entry(session->evlist, evsel) {
                if (evsel->attr.type == PERF_TYPE_SOFTWARE &&
                    (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
                     evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
@@ -2883,15 +2522,29 @@ static size_t trace__fprintf_threads_header(FILE *fp)
        return printed;
 }
 
+DEFINE_RESORT_RB(syscall_stats, a->msecs > b->msecs,
+       struct stats    *stats;
+       double          msecs;
+       int             syscall;
+)
+{
+       struct int_node *source = rb_entry(nd, struct int_node, rb_node);
+       struct stats *stats = source->priv;
+
+       entry->syscall = source->i;
+       entry->stats   = stats;
+       entry->msecs   = stats ? (u64)stats->n * (avg_stats(stats) / NSEC_PER_MSEC) : 0;
+}
+
 static size_t thread__dump_stats(struct thread_trace *ttrace,
                                 struct trace *trace, FILE *fp)
 {
-       struct stats *stats;
        size_t printed = 0;
        struct syscall *sc;
-       struct int_node *inode = intlist__first(ttrace->syscall_stats);
+       struct rb_node *nd;
+       DECLARE_RESORT_RB_INTLIST(syscall_stats, ttrace->syscall_stats);
 
-       if (inode == NULL)
+       if (syscall_stats == NULL)
                return 0;
 
        printed += fprintf(fp, "\n");
@@ -2900,9 +2553,8 @@ static size_t thread__dump_stats(struct thread_trace *ttrace,
        printed += fprintf(fp, "                               (msec)    (msec)    (msec)    (msec)        (%%)\n");
        printed += fprintf(fp, "   --------------- -------- --------- --------- --------- ---------     ------\n");
 
-       /* each int_node is a syscall */
-       while (inode) {
-               stats = inode->priv;
+       resort_rb__for_each_entry(nd, syscall_stats) {
+               struct stats *stats = syscall_stats_entry->stats;
                if (stats) {
                        double min = (double)(stats->min) / NSEC_PER_MSEC;
                        double max = (double)(stats->max) / NSEC_PER_MSEC;
@@ -2913,34 +2565,23 @@ static size_t thread__dump_stats(struct thread_trace *ttrace,
                        pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
                        avg /= NSEC_PER_MSEC;
 
-                       sc = &trace->syscalls.table[inode->i];
+                       sc = &trace->syscalls.table[syscall_stats_entry->syscall];
                        printed += fprintf(fp, "   %-15s", sc->name);
                        printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f %9.3f",
-                                          n, avg * n, min, avg);
+                                          n, syscall_stats_entry->msecs, min, avg);
                        printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
                }
-
-               inode = intlist__next(inode);
        }
 
+       resort_rb__delete(syscall_stats);
        printed += fprintf(fp, "\n\n");
 
        return printed;
 }
 
-/* struct used to pass data to per-thread function */
-struct summary_data {
-       FILE *fp;
-       struct trace *trace;
-       size_t printed;
-};
-
-static int trace__fprintf_one_thread(struct thread *thread, void *priv)
+static size_t trace__fprintf_thread(FILE *fp, struct thread *thread, struct trace *trace)
 {
-       struct summary_data *data = priv;
-       FILE *fp = data->fp;
-       size_t printed = data->printed;
-       struct trace *trace = data->trace;
+       size_t printed = 0;
        struct thread_trace *ttrace = thread__priv(thread);
        double ratio;
 
@@ -2956,25 +2597,45 @@ static int trace__fprintf_one_thread(struct thread *thread, void *priv)
                printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
        if (ttrace->pfmin)
                printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
-       printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
+       if (trace->sched)
+               printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
+       else if (fputc('\n', fp) != EOF)
+               ++printed;
+
        printed += thread__dump_stats(ttrace, trace, fp);
 
-       data->printed += printed;
+       return printed;
+}
 
-       return 0;
+static unsigned long thread__nr_events(struct thread_trace *ttrace)
+{
+       return ttrace ? ttrace->nr_events : 0;
+}
+
+DEFINE_RESORT_RB(threads, (thread__nr_events(a->thread->priv) < thread__nr_events(b->thread->priv)),
+       struct thread *thread;
+)
+{
+       entry->thread = rb_entry(nd, struct thread, rb_node);
 }
 
 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
 {
-       struct summary_data data = {
-               .fp = fp,
-               .trace = trace
-       };
-       data.printed = trace__fprintf_threads_header(fp);
+       DECLARE_RESORT_RB_MACHINE_THREADS(threads, trace->host);
+       size_t printed = trace__fprintf_threads_header(fp);
+       struct rb_node *nd;
 
-       machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data);
+       if (threads == NULL) {
+               fprintf(fp, "%s", "Error sorting output by nr_events!\n");
+               return 0;
+       }
 
-       return data.printed;
+       resort_rb__for_each_entry(nd, threads)
+               printed += trace__fprintf_thread(fp, threads_entry->thread, trace);
+
+       resort_rb__delete(threads);
+
+       return printed;
 }
 
 static int trace__set_duration(const struct option *opt, const char *str,
@@ -3056,7 +2717,7 @@ static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler)
 {
        struct perf_evsel *evsel;
 
-       evlist__for_each(evlist, evsel)
+       evlist__for_each_entry(evlist, evsel)
                evsel->handler = handler;
 }
 
@@ -3070,10 +2731,6 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
                NULL
        };
        struct trace trace = {
-               .audit = {
-                       .machine = audit_detect_machine(),
-                       .open_id = audit_name_to_syscall("open", trace.audit.machine),
-               },
                .syscalls = {
                        . max = -1,
                },
@@ -3091,6 +2748,8 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
                .output = stderr,
                .show_comm = true,
                .trace_syscalls = true,
+               .kernel_syscallchains = false,
+               .max_stack = UINT_MAX,
        };
        const char *output_name = NULL;
        const char *ev_qualifier_str = NULL;
@@ -3136,10 +2795,24 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
                     "Trace pagefaults", parse_pagefaults, "maj"),
        OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
        OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
+       OPT_CALLBACK(0, "call-graph", &trace.opts,
+                    "record_mode[,record_size]", record_callchain_help,
+                    &record_parse_callchain_opt),
+       OPT_BOOLEAN(0, "kernel-syscall-graph", &trace.kernel_syscallchains,
+                   "Show the kernel callchains on the syscall exit path"),
+       OPT_UINTEGER(0, "min-stack", &trace.min_stack,
+                    "Set the minimum stack depth when parsing the callchain, "
+                    "anything below the specified depth will be ignored."),
+       OPT_UINTEGER(0, "max-stack", &trace.max_stack,
+                    "Set the maximum stack depth when parsing the callchain, "
+                    "anything beyond the specified depth will be ignored. "
+                    "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)),
        OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout,
                        "per thread proc mmap processing timeout in ms"),
        OPT_END()
        };
+       bool __maybe_unused max_stack_user_set = true;
+       bool mmap_pages_user_set = true;
        const char * const trace_subcommands[] = { "record", NULL };
        int err;
        char bf[BUFSIZ];
@@ -3148,8 +2821,9 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
        signal(SIGFPE, sighandler_dump_stack);
 
        trace.evlist = perf_evlist__new();
+       trace.sctbl = syscalltbl__new();
 
-       if (trace.evlist == NULL) {
+       if (trace.evlist == NULL || trace.sctbl == NULL) {
                pr_err("Not enough memory to run!\n");
                err = -ENOMEM;
                goto out;
@@ -3158,11 +2832,40 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
        argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
                                 trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
 
+       err = bpf__setup_stdout(trace.evlist);
+       if (err) {
+               bpf__strerror_setup_stdout(trace.evlist, err, bf, sizeof(bf));
+               pr_err("ERROR: Setup BPF stdout failed: %s\n", bf);
+               goto out;
+       }
+
+       err = -1;
+
        if (trace.trace_pgfaults) {
                trace.opts.sample_address = true;
                trace.opts.sample_time = true;
        }
 
+       if (trace.opts.mmap_pages == UINT_MAX)
+               mmap_pages_user_set = false;
+
+       if (trace.max_stack == UINT_MAX) {
+               trace.max_stack = input_name ? PERF_MAX_STACK_DEPTH : sysctl_perf_event_max_stack;
+               max_stack_user_set = false;
+       }
+
+#ifdef HAVE_DWARF_UNWIND_SUPPORT
+       if ((trace.min_stack || max_stack_user_set) && !callchain_param.enabled && trace.trace_syscalls)
+               record_opts__parse_callchain(&trace.opts, &callchain_param, "dwarf", false);
+#endif
+
+       if (callchain_param.enabled) {
+               if (!mmap_pages_user_set && geteuid() == 0)
+                       trace.opts.mmap_pages = perf_event_mlock_kb_in_pages() * 4;
+
+               symbol_conf.use_callchain = true;
+       }
+
        if (trace.evlist->nr_entries > 0)
                evlist__set_evsel_handler(trace.evlist, trace__event_handler);
 
@@ -3179,6 +2882,11 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
                return -1;
        }
 
+       if (!trace.trace_syscalls && ev_qualifier_str) {
+               pr_err("The -e option can't be used with --no-syscalls.\n");
+               goto out;
+       }
+
        if (output_name != NULL) {
                err = trace__open_output(&trace, output_name);
                if (err < 0) {
@@ -3187,6 +2895,8 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
                }
        }
 
+       trace.open_id = syscalltbl__id(trace.sctbl, "open");
+
        if (ev_qualifier_str != NULL) {
                const char *s = ev_qualifier_str;
                struct strlist_config slist_config = {