fs: Avoid premature clearing of capabilities
[cascardo/linux.git] / tools / perf / builtin-stat.c
1 /*
2  * builtin-stat.c
3  *
4  * Builtin stat command: Give a precise performance counters summary
5  * overview about any workload, CPU or specific PID.
6  *
7  * Sample output:
8
9    $ perf stat ./hackbench 10
10
11   Time: 0.118
12
13   Performance counter stats for './hackbench 10':
14
15        1708.761321 task-clock                #   11.037 CPUs utilized
16             41,190 context-switches          #    0.024 M/sec
17              6,735 CPU-migrations            #    0.004 M/sec
18             17,318 page-faults               #    0.010 M/sec
19      5,205,202,243 cycles                    #    3.046 GHz
20      3,856,436,920 stalled-cycles-frontend   #   74.09% frontend cycles idle
21      1,600,790,871 stalled-cycles-backend    #   30.75% backend  cycles idle
22      2,603,501,247 instructions              #    0.50  insns per cycle
23                                              #    1.48  stalled cycles per insn
24        484,357,498 branches                  #  283.455 M/sec
25          6,388,934 branch-misses             #    1.32% of all branches
26
27         0.154822978  seconds time elapsed
28
29  *
30  * Copyright (C) 2008-2011, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
31  *
32  * Improvements and fixes by:
33  *
34  *   Arjan van de Ven <arjan@linux.intel.com>
35  *   Yanmin Zhang <yanmin.zhang@intel.com>
36  *   Wu Fengguang <fengguang.wu@intel.com>
37  *   Mike Galbraith <efault@gmx.de>
38  *   Paul Mackerras <paulus@samba.org>
39  *   Jaswinder Singh Rajput <jaswinder@kernel.org>
40  *
41  * Released under the GPL v2. (and only v2, not any later version)
42  */
43
44 #include "perf.h"
45 #include "builtin.h"
46 #include "util/cgroup.h"
47 #include "util/util.h"
48 #include <subcmd/parse-options.h>
49 #include "util/parse-events.h"
50 #include "util/pmu.h"
51 #include "util/event.h"
52 #include "util/evlist.h"
53 #include "util/evsel.h"
54 #include "util/debug.h"
55 #include "util/color.h"
56 #include "util/stat.h"
57 #include "util/header.h"
58 #include "util/cpumap.h"
59 #include "util/thread.h"
60 #include "util/thread_map.h"
61 #include "util/counts.h"
62 #include "util/group.h"
63 #include "util/session.h"
64 #include "util/tool.h"
65 #include "util/group.h"
66 #include "asm/bug.h"
67
68 #include <api/fs/fs.h>
69 #include <stdlib.h>
70 #include <sys/prctl.h>
71 #include <locale.h>
72 #include <math.h>
73
74 #define DEFAULT_SEPARATOR       " "
75 #define CNTR_NOT_SUPPORTED      "<not supported>"
76 #define CNTR_NOT_COUNTED        "<not counted>"
77
78 static void print_counters(struct timespec *ts, int argc, const char **argv);
79
80 /* Default events used for perf stat -T */
81 static const char *transaction_attrs = {
82         "task-clock,"
83         "{"
84         "instructions,"
85         "cycles,"
86         "cpu/cycles-t/,"
87         "cpu/tx-start/,"
88         "cpu/el-start/,"
89         "cpu/cycles-ct/"
90         "}"
91 };
92
93 /* More limited version when the CPU does not have all events. */
94 static const char * transaction_limited_attrs = {
95         "task-clock,"
96         "{"
97         "instructions,"
98         "cycles,"
99         "cpu/cycles-t/,"
100         "cpu/tx-start/"
101         "}"
102 };
103
104 static const char * topdown_attrs[] = {
105         "topdown-total-slots",
106         "topdown-slots-retired",
107         "topdown-recovery-bubbles",
108         "topdown-fetch-bubbles",
109         "topdown-slots-issued",
110         NULL,
111 };
112
113 static struct perf_evlist       *evsel_list;
114
115 static struct target target = {
116         .uid    = UINT_MAX,
117 };
118
119 typedef int (*aggr_get_id_t)(struct cpu_map *m, int cpu);
120
121 static int                      run_count                       =  1;
122 static bool                     no_inherit                      = false;
123 static volatile pid_t           child_pid                       = -1;
124 static bool                     null_run                        =  false;
125 static int                      detailed_run                    =  0;
126 static bool                     transaction_run;
127 static bool                     topdown_run                     = false;
128 static bool                     big_num                         =  true;
129 static int                      big_num_opt                     =  -1;
130 static const char               *csv_sep                        = NULL;
131 static bool                     csv_output                      = false;
132 static bool                     group                           = false;
133 static const char               *pre_cmd                        = NULL;
134 static const char               *post_cmd                       = NULL;
135 static bool                     sync_run                        = false;
136 static unsigned int             initial_delay                   = 0;
137 static unsigned int             unit_width                      = 4; /* strlen("unit") */
138 static bool                     forever                         = false;
139 static bool                     metric_only                     = false;
140 static bool                     force_metric_only               = false;
141 static struct timespec          ref_time;
142 static struct cpu_map           *aggr_map;
143 static aggr_get_id_t            aggr_get_id;
144 static bool                     append_file;
145 static const char               *output_name;
146 static int                      output_fd;
147
148 struct perf_stat {
149         bool                     record;
150         struct perf_data_file    file;
151         struct perf_session     *session;
152         u64                      bytes_written;
153         struct perf_tool         tool;
154         bool                     maps_allocated;
155         struct cpu_map          *cpus;
156         struct thread_map       *threads;
157         enum aggr_mode           aggr_mode;
158 };
159
160 static struct perf_stat         perf_stat;
161 #define STAT_RECORD             perf_stat.record
162
163 static volatile int done = 0;
164
165 static struct perf_stat_config stat_config = {
166         .aggr_mode      = AGGR_GLOBAL,
167         .scale          = true,
168 };
169
170 static inline void diff_timespec(struct timespec *r, struct timespec *a,
171                                  struct timespec *b)
172 {
173         r->tv_sec = a->tv_sec - b->tv_sec;
174         if (a->tv_nsec < b->tv_nsec) {
175                 r->tv_nsec = a->tv_nsec + 1000000000L - b->tv_nsec;
176                 r->tv_sec--;
177         } else {
178                 r->tv_nsec = a->tv_nsec - b->tv_nsec ;
179         }
180 }
181
182 static void perf_stat__reset_stats(void)
183 {
184         perf_evlist__reset_stats(evsel_list);
185         perf_stat__reset_shadow_stats();
186 }
187
188 static int create_perf_stat_counter(struct perf_evsel *evsel)
189 {
190         struct perf_event_attr *attr = &evsel->attr;
191
192         if (stat_config.scale)
193                 attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
194                                     PERF_FORMAT_TOTAL_TIME_RUNNING;
195
196         attr->inherit = !no_inherit;
197
198         /*
199          * Some events get initialized with sample_(period/type) set,
200          * like tracepoints. Clear it up for counting.
201          */
202         attr->sample_period = 0;
203
204         /*
205          * But set sample_type to PERF_SAMPLE_IDENTIFIER, which should be harmless
206          * while avoiding that older tools show confusing messages.
207          *
208          * However for pipe sessions we need to keep it zero,
209          * because script's perf_evsel__check_attr is triggered
210          * by attr->sample_type != 0, and we can't run it on
211          * stat sessions.
212          */
213         if (!(STAT_RECORD && perf_stat.file.is_pipe))
214                 attr->sample_type = PERF_SAMPLE_IDENTIFIER;
215
216         /*
217          * Disabling all counters initially, they will be enabled
218          * either manually by us or by kernel via enable_on_exec
219          * set later.
220          */
221         if (perf_evsel__is_group_leader(evsel)) {
222                 attr->disabled = 1;
223
224                 /*
225                  * In case of initial_delay we enable tracee
226                  * events manually.
227                  */
228                 if (target__none(&target) && !initial_delay)
229                         attr->enable_on_exec = 1;
230         }
231
232         if (target__has_cpu(&target))
233                 return perf_evsel__open_per_cpu(evsel, perf_evsel__cpus(evsel));
234
235         return perf_evsel__open_per_thread(evsel, evsel_list->threads);
236 }
237
238 /*
239  * Does the counter have nsecs as a unit?
240  */
241 static inline int nsec_counter(struct perf_evsel *evsel)
242 {
243         if (perf_evsel__match(evsel, SOFTWARE, SW_CPU_CLOCK) ||
244             perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK))
245                 return 1;
246
247         return 0;
248 }
249
250 static int process_synthesized_event(struct perf_tool *tool __maybe_unused,
251                                      union perf_event *event,
252                                      struct perf_sample *sample __maybe_unused,
253                                      struct machine *machine __maybe_unused)
254 {
255         if (perf_data_file__write(&perf_stat.file, event, event->header.size) < 0) {
256                 pr_err("failed to write perf data, error: %m\n");
257                 return -1;
258         }
259
260         perf_stat.bytes_written += event->header.size;
261         return 0;
262 }
263
264 static int write_stat_round_event(u64 tm, u64 type)
265 {
266         return perf_event__synthesize_stat_round(NULL, tm, type,
267                                                  process_synthesized_event,
268                                                  NULL);
269 }
270
271 #define WRITE_STAT_ROUND_EVENT(time, interval) \
272         write_stat_round_event(time, PERF_STAT_ROUND_TYPE__ ## interval)
273
274 #define SID(e, x, y) xyarray__entry(e->sample_id, x, y)
275
276 static int
277 perf_evsel__write_stat_event(struct perf_evsel *counter, u32 cpu, u32 thread,
278                              struct perf_counts_values *count)
279 {
280         struct perf_sample_id *sid = SID(counter, cpu, thread);
281
282         return perf_event__synthesize_stat(NULL, cpu, thread, sid->id, count,
283                                            process_synthesized_event, NULL);
284 }
285
286 /*
287  * Read out the results of a single counter:
288  * do not aggregate counts across CPUs in system-wide mode
289  */
290 static int read_counter(struct perf_evsel *counter)
291 {
292         int nthreads = thread_map__nr(evsel_list->threads);
293         int ncpus, cpu, thread;
294
295         if (target__has_cpu(&target))
296                 ncpus = perf_evsel__nr_cpus(counter);
297         else
298                 ncpus = 1;
299
300         if (!counter->supported)
301                 return -ENOENT;
302
303         if (counter->system_wide)
304                 nthreads = 1;
305
306         for (thread = 0; thread < nthreads; thread++) {
307                 for (cpu = 0; cpu < ncpus; cpu++) {
308                         struct perf_counts_values *count;
309
310                         count = perf_counts(counter->counts, cpu, thread);
311                         if (perf_evsel__read(counter, cpu, thread, count))
312                                 return -1;
313
314                         if (STAT_RECORD) {
315                                 if (perf_evsel__write_stat_event(counter, cpu, thread, count)) {
316                                         pr_err("failed to write stat event\n");
317                                         return -1;
318                                 }
319                         }
320
321                         if (verbose > 1) {
322                                 fprintf(stat_config.output,
323                                         "%s: %d: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
324                                                 perf_evsel__name(counter),
325                                                 cpu,
326                                                 count->val, count->ena, count->run);
327                         }
328                 }
329         }
330
331         return 0;
332 }
333
334 static void read_counters(bool close_counters)
335 {
336         struct perf_evsel *counter;
337
338         evlist__for_each_entry(evsel_list, counter) {
339                 if (read_counter(counter))
340                         pr_debug("failed to read counter %s\n", counter->name);
341
342                 if (perf_stat_process_counter(&stat_config, counter))
343                         pr_warning("failed to process counter %s\n", counter->name);
344
345                 if (close_counters) {
346                         perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter),
347                                              thread_map__nr(evsel_list->threads));
348                 }
349         }
350 }
351
352 static void process_interval(void)
353 {
354         struct timespec ts, rs;
355
356         read_counters(false);
357
358         clock_gettime(CLOCK_MONOTONIC, &ts);
359         diff_timespec(&rs, &ts, &ref_time);
360
361         if (STAT_RECORD) {
362                 if (WRITE_STAT_ROUND_EVENT(rs.tv_sec * NSECS_PER_SEC + rs.tv_nsec, INTERVAL))
363                         pr_err("failed to write stat round event\n");
364         }
365
366         print_counters(&rs, 0, NULL);
367 }
368
369 static void enable_counters(void)
370 {
371         if (initial_delay)
372                 usleep(initial_delay * 1000);
373
374         /*
375          * We need to enable counters only if:
376          * - we don't have tracee (attaching to task or cpu)
377          * - we have initial delay configured
378          */
379         if (!target__none(&target) || initial_delay)
380                 perf_evlist__enable(evsel_list);
381 }
382
383 static volatile int workload_exec_errno;
384
385 /*
386  * perf_evlist__prepare_workload will send a SIGUSR1
387  * if the fork fails, since we asked by setting its
388  * want_signal to true.
389  */
390 static void workload_exec_failed_signal(int signo __maybe_unused, siginfo_t *info,
391                                         void *ucontext __maybe_unused)
392 {
393         workload_exec_errno = info->si_value.sival_int;
394 }
395
396 static bool has_unit(struct perf_evsel *counter)
397 {
398         return counter->unit && *counter->unit;
399 }
400
401 static bool has_scale(struct perf_evsel *counter)
402 {
403         return counter->scale != 1;
404 }
405
406 static int perf_stat_synthesize_config(bool is_pipe)
407 {
408         struct perf_evsel *counter;
409         int err;
410
411         if (is_pipe) {
412                 err = perf_event__synthesize_attrs(NULL, perf_stat.session,
413                                                    process_synthesized_event);
414                 if (err < 0) {
415                         pr_err("Couldn't synthesize attrs.\n");
416                         return err;
417                 }
418         }
419
420         /*
421          * Synthesize other events stuff not carried within
422          * attr event - unit, scale, name
423          */
424         evlist__for_each_entry(evsel_list, counter) {
425                 if (!counter->supported)
426                         continue;
427
428                 /*
429                  * Synthesize unit and scale only if it's defined.
430                  */
431                 if (has_unit(counter)) {
432                         err = perf_event__synthesize_event_update_unit(NULL, counter, process_synthesized_event);
433                         if (err < 0) {
434                                 pr_err("Couldn't synthesize evsel unit.\n");
435                                 return err;
436                         }
437                 }
438
439                 if (has_scale(counter)) {
440                         err = perf_event__synthesize_event_update_scale(NULL, counter, process_synthesized_event);
441                         if (err < 0) {
442                                 pr_err("Couldn't synthesize evsel scale.\n");
443                                 return err;
444                         }
445                 }
446
447                 if (counter->own_cpus) {
448                         err = perf_event__synthesize_event_update_cpus(NULL, counter, process_synthesized_event);
449                         if (err < 0) {
450                                 pr_err("Couldn't synthesize evsel scale.\n");
451                                 return err;
452                         }
453                 }
454
455                 /*
456                  * Name is needed only for pipe output,
457                  * perf.data carries event names.
458                  */
459                 if (is_pipe) {
460                         err = perf_event__synthesize_event_update_name(NULL, counter, process_synthesized_event);
461                         if (err < 0) {
462                                 pr_err("Couldn't synthesize evsel name.\n");
463                                 return err;
464                         }
465                 }
466         }
467
468         err = perf_event__synthesize_thread_map2(NULL, evsel_list->threads,
469                                                 process_synthesized_event,
470                                                 NULL);
471         if (err < 0) {
472                 pr_err("Couldn't synthesize thread map.\n");
473                 return err;
474         }
475
476         err = perf_event__synthesize_cpu_map(NULL, evsel_list->cpus,
477                                              process_synthesized_event, NULL);
478         if (err < 0) {
479                 pr_err("Couldn't synthesize thread map.\n");
480                 return err;
481         }
482
483         err = perf_event__synthesize_stat_config(NULL, &stat_config,
484                                                  process_synthesized_event, NULL);
485         if (err < 0) {
486                 pr_err("Couldn't synthesize config.\n");
487                 return err;
488         }
489
490         return 0;
491 }
492
493 #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
494
495 static int __store_counter_ids(struct perf_evsel *counter,
496                                struct cpu_map *cpus,
497                                struct thread_map *threads)
498 {
499         int cpu, thread;
500
501         for (cpu = 0; cpu < cpus->nr; cpu++) {
502                 for (thread = 0; thread < threads->nr; thread++) {
503                         int fd = FD(counter, cpu, thread);
504
505                         if (perf_evlist__id_add_fd(evsel_list, counter,
506                                                    cpu, thread, fd) < 0)
507                                 return -1;
508                 }
509         }
510
511         return 0;
512 }
513
514 static int store_counter_ids(struct perf_evsel *counter)
515 {
516         struct cpu_map *cpus = counter->cpus;
517         struct thread_map *threads = counter->threads;
518
519         if (perf_evsel__alloc_id(counter, cpus->nr, threads->nr))
520                 return -ENOMEM;
521
522         return __store_counter_ids(counter, cpus, threads);
523 }
524
525 static int __run_perf_stat(int argc, const char **argv)
526 {
527         int interval = stat_config.interval;
528         char msg[512];
529         unsigned long long t0, t1;
530         struct perf_evsel *counter;
531         struct timespec ts;
532         size_t l;
533         int status = 0;
534         const bool forks = (argc > 0);
535         bool is_pipe = STAT_RECORD ? perf_stat.file.is_pipe : false;
536
537         if (interval) {
538                 ts.tv_sec  = interval / 1000;
539                 ts.tv_nsec = (interval % 1000) * 1000000;
540         } else {
541                 ts.tv_sec  = 1;
542                 ts.tv_nsec = 0;
543         }
544
545         if (forks) {
546                 if (perf_evlist__prepare_workload(evsel_list, &target, argv, is_pipe,
547                                                   workload_exec_failed_signal) < 0) {
548                         perror("failed to prepare workload");
549                         return -1;
550                 }
551                 child_pid = evsel_list->workload.pid;
552         }
553
554         if (group)
555                 perf_evlist__set_leader(evsel_list);
556
557         evlist__for_each_entry(evsel_list, counter) {
558 try_again:
559                 if (create_perf_stat_counter(counter) < 0) {
560                         /*
561                          * PPC returns ENXIO for HW counters until 2.6.37
562                          * (behavior changed with commit b0a873e).
563                          */
564                         if (errno == EINVAL || errno == ENOSYS ||
565                             errno == ENOENT || errno == EOPNOTSUPP ||
566                             errno == ENXIO) {
567                                 if (verbose)
568                                         ui__warning("%s event is not supported by the kernel.\n",
569                                                     perf_evsel__name(counter));
570                                 counter->supported = false;
571
572                                 if ((counter->leader != counter) ||
573                                     !(counter->leader->nr_members > 1))
574                                         continue;
575                         } else if (perf_evsel__fallback(counter, errno, msg, sizeof(msg))) {
576                                 if (verbose)
577                                         ui__warning("%s\n", msg);
578                                 goto try_again;
579                         }
580
581                         perf_evsel__open_strerror(counter, &target,
582                                                   errno, msg, sizeof(msg));
583                         ui__error("%s\n", msg);
584
585                         if (child_pid != -1)
586                                 kill(child_pid, SIGTERM);
587
588                         return -1;
589                 }
590                 counter->supported = true;
591
592                 l = strlen(counter->unit);
593                 if (l > unit_width)
594                         unit_width = l;
595
596                 if (STAT_RECORD && store_counter_ids(counter))
597                         return -1;
598         }
599
600         if (perf_evlist__apply_filters(evsel_list, &counter)) {
601                 error("failed to set filter \"%s\" on event %s with %d (%s)\n",
602                         counter->filter, perf_evsel__name(counter), errno,
603                         str_error_r(errno, msg, sizeof(msg)));
604                 return -1;
605         }
606
607         if (STAT_RECORD) {
608                 int err, fd = perf_data_file__fd(&perf_stat.file);
609
610                 if (is_pipe) {
611                         err = perf_header__write_pipe(perf_data_file__fd(&perf_stat.file));
612                 } else {
613                         err = perf_session__write_header(perf_stat.session, evsel_list,
614                                                          fd, false);
615                 }
616
617                 if (err < 0)
618                         return err;
619
620                 err = perf_stat_synthesize_config(is_pipe);
621                 if (err < 0)
622                         return err;
623         }
624
625         /*
626          * Enable counters and exec the command:
627          */
628         t0 = rdclock();
629         clock_gettime(CLOCK_MONOTONIC, &ref_time);
630
631         if (forks) {
632                 perf_evlist__start_workload(evsel_list);
633                 enable_counters();
634
635                 if (interval) {
636                         while (!waitpid(child_pid, &status, WNOHANG)) {
637                                 nanosleep(&ts, NULL);
638                                 process_interval();
639                         }
640                 }
641                 wait(&status);
642
643                 if (workload_exec_errno) {
644                         const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
645                         pr_err("Workload failed: %s\n", emsg);
646                         return -1;
647                 }
648
649                 if (WIFSIGNALED(status))
650                         psignal(WTERMSIG(status), argv[0]);
651         } else {
652                 enable_counters();
653                 while (!done) {
654                         nanosleep(&ts, NULL);
655                         if (interval)
656                                 process_interval();
657                 }
658         }
659
660         t1 = rdclock();
661
662         update_stats(&walltime_nsecs_stats, t1 - t0);
663
664         read_counters(true);
665
666         return WEXITSTATUS(status);
667 }
668
669 static int run_perf_stat(int argc, const char **argv)
670 {
671         int ret;
672
673         if (pre_cmd) {
674                 ret = system(pre_cmd);
675                 if (ret)
676                         return ret;
677         }
678
679         if (sync_run)
680                 sync();
681
682         ret = __run_perf_stat(argc, argv);
683         if (ret)
684                 return ret;
685
686         if (post_cmd) {
687                 ret = system(post_cmd);
688                 if (ret)
689                         return ret;
690         }
691
692         return ret;
693 }
694
695 static void print_running(u64 run, u64 ena)
696 {
697         if (csv_output) {
698                 fprintf(stat_config.output, "%s%" PRIu64 "%s%.2f",
699                                         csv_sep,
700                                         run,
701                                         csv_sep,
702                                         ena ? 100.0 * run / ena : 100.0);
703         } else if (run != ena) {
704                 fprintf(stat_config.output, "  (%.2f%%)", 100.0 * run / ena);
705         }
706 }
707
708 static void print_noise_pct(double total, double avg)
709 {
710         double pct = rel_stddev_stats(total, avg);
711
712         if (csv_output)
713                 fprintf(stat_config.output, "%s%.2f%%", csv_sep, pct);
714         else if (pct)
715                 fprintf(stat_config.output, "  ( +-%6.2f%% )", pct);
716 }
717
718 static void print_noise(struct perf_evsel *evsel, double avg)
719 {
720         struct perf_stat_evsel *ps;
721
722         if (run_count == 1)
723                 return;
724
725         ps = evsel->priv;
726         print_noise_pct(stddev_stats(&ps->res_stats[0]), avg);
727 }
728
729 static void aggr_printout(struct perf_evsel *evsel, int id, int nr)
730 {
731         switch (stat_config.aggr_mode) {
732         case AGGR_CORE:
733                 fprintf(stat_config.output, "S%d-C%*d%s%*d%s",
734                         cpu_map__id_to_socket(id),
735                         csv_output ? 0 : -8,
736                         cpu_map__id_to_cpu(id),
737                         csv_sep,
738                         csv_output ? 0 : 4,
739                         nr,
740                         csv_sep);
741                 break;
742         case AGGR_SOCKET:
743                 fprintf(stat_config.output, "S%*d%s%*d%s",
744                         csv_output ? 0 : -5,
745                         id,
746                         csv_sep,
747                         csv_output ? 0 : 4,
748                         nr,
749                         csv_sep);
750                         break;
751         case AGGR_NONE:
752                 fprintf(stat_config.output, "CPU%*d%s",
753                         csv_output ? 0 : -4,
754                         perf_evsel__cpus(evsel)->map[id], csv_sep);
755                 break;
756         case AGGR_THREAD:
757                 fprintf(stat_config.output, "%*s-%*d%s",
758                         csv_output ? 0 : 16,
759                         thread_map__comm(evsel->threads, id),
760                         csv_output ? 0 : -8,
761                         thread_map__pid(evsel->threads, id),
762                         csv_sep);
763                 break;
764         case AGGR_GLOBAL:
765         case AGGR_UNSET:
766         default:
767                 break;
768         }
769 }
770
771 struct outstate {
772         FILE *fh;
773         bool newline;
774         const char *prefix;
775         int  nfields;
776         int  id, nr;
777         struct perf_evsel *evsel;
778 };
779
780 #define METRIC_LEN  35
781
782 static void new_line_std(void *ctx)
783 {
784         struct outstate *os = ctx;
785
786         os->newline = true;
787 }
788
789 static void do_new_line_std(struct outstate *os)
790 {
791         fputc('\n', os->fh);
792         fputs(os->prefix, os->fh);
793         aggr_printout(os->evsel, os->id, os->nr);
794         if (stat_config.aggr_mode == AGGR_NONE)
795                 fprintf(os->fh, "        ");
796         fprintf(os->fh, "                                                 ");
797 }
798
799 static void print_metric_std(void *ctx, const char *color, const char *fmt,
800                              const char *unit, double val)
801 {
802         struct outstate *os = ctx;
803         FILE *out = os->fh;
804         int n;
805         bool newline = os->newline;
806
807         os->newline = false;
808
809         if (unit == NULL || fmt == NULL) {
810                 fprintf(out, "%-*s", METRIC_LEN, "");
811                 return;
812         }
813
814         if (newline)
815                 do_new_line_std(os);
816
817         n = fprintf(out, " # ");
818         if (color)
819                 n += color_fprintf(out, color, fmt, val);
820         else
821                 n += fprintf(out, fmt, val);
822         fprintf(out, " %-*s", METRIC_LEN - n - 1, unit);
823 }
824
825 static void new_line_csv(void *ctx)
826 {
827         struct outstate *os = ctx;
828         int i;
829
830         fputc('\n', os->fh);
831         if (os->prefix)
832                 fprintf(os->fh, "%s%s", os->prefix, csv_sep);
833         aggr_printout(os->evsel, os->id, os->nr);
834         for (i = 0; i < os->nfields; i++)
835                 fputs(csv_sep, os->fh);
836 }
837
838 static void print_metric_csv(void *ctx,
839                              const char *color __maybe_unused,
840                              const char *fmt, const char *unit, double val)
841 {
842         struct outstate *os = ctx;
843         FILE *out = os->fh;
844         char buf[64], *vals, *ends;
845
846         if (unit == NULL || fmt == NULL) {
847                 fprintf(out, "%s%s%s%s", csv_sep, csv_sep, csv_sep, csv_sep);
848                 return;
849         }
850         snprintf(buf, sizeof(buf), fmt, val);
851         vals = buf;
852         while (isspace(*vals))
853                 vals++;
854         ends = vals;
855         while (isdigit(*ends) || *ends == '.')
856                 ends++;
857         *ends = 0;
858         while (isspace(*unit))
859                 unit++;
860         fprintf(out, "%s%s%s%s", csv_sep, vals, csv_sep, unit);
861 }
862
863 #define METRIC_ONLY_LEN 20
864
865 /* Filter out some columns that don't work well in metrics only mode */
866
867 static bool valid_only_metric(const char *unit)
868 {
869         if (!unit)
870                 return false;
871         if (strstr(unit, "/sec") ||
872             strstr(unit, "hz") ||
873             strstr(unit, "Hz") ||
874             strstr(unit, "CPUs utilized"))
875                 return false;
876         return true;
877 }
878
879 static const char *fixunit(char *buf, struct perf_evsel *evsel,
880                            const char *unit)
881 {
882         if (!strncmp(unit, "of all", 6)) {
883                 snprintf(buf, 1024, "%s %s", perf_evsel__name(evsel),
884                          unit);
885                 return buf;
886         }
887         return unit;
888 }
889
890 static void print_metric_only(void *ctx, const char *color, const char *fmt,
891                               const char *unit, double val)
892 {
893         struct outstate *os = ctx;
894         FILE *out = os->fh;
895         int n;
896         char buf[1024];
897         unsigned mlen = METRIC_ONLY_LEN;
898
899         if (!valid_only_metric(unit))
900                 return;
901         unit = fixunit(buf, os->evsel, unit);
902         if (color)
903                 n = color_fprintf(out, color, fmt, val);
904         else
905                 n = fprintf(out, fmt, val);
906         if (n > METRIC_ONLY_LEN)
907                 n = METRIC_ONLY_LEN;
908         if (mlen < strlen(unit))
909                 mlen = strlen(unit) + 1;
910         fprintf(out, "%*s", mlen - n, "");
911 }
912
913 static void print_metric_only_csv(void *ctx, const char *color __maybe_unused,
914                                   const char *fmt,
915                                   const char *unit, double val)
916 {
917         struct outstate *os = ctx;
918         FILE *out = os->fh;
919         char buf[64], *vals, *ends;
920         char tbuf[1024];
921
922         if (!valid_only_metric(unit))
923                 return;
924         unit = fixunit(tbuf, os->evsel, unit);
925         snprintf(buf, sizeof buf, fmt, val);
926         vals = buf;
927         while (isspace(*vals))
928                 vals++;
929         ends = vals;
930         while (isdigit(*ends) || *ends == '.')
931                 ends++;
932         *ends = 0;
933         fprintf(out, "%s%s", vals, csv_sep);
934 }
935
936 static void new_line_metric(void *ctx __maybe_unused)
937 {
938 }
939
940 static void print_metric_header(void *ctx, const char *color __maybe_unused,
941                                 const char *fmt __maybe_unused,
942                                 const char *unit, double val __maybe_unused)
943 {
944         struct outstate *os = ctx;
945         char tbuf[1024];
946
947         if (!valid_only_metric(unit))
948                 return;
949         unit = fixunit(tbuf, os->evsel, unit);
950         if (csv_output)
951                 fprintf(os->fh, "%s%s", unit, csv_sep);
952         else
953                 fprintf(os->fh, "%-*s ", METRIC_ONLY_LEN, unit);
954 }
955
956 static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg)
957 {
958         FILE *output = stat_config.output;
959         double msecs = avg / 1e6;
960         const char *fmt_v, *fmt_n;
961         char name[25];
962
963         fmt_v = csv_output ? "%.6f%s" : "%18.6f%s";
964         fmt_n = csv_output ? "%s" : "%-25s";
965
966         aggr_printout(evsel, id, nr);
967
968         scnprintf(name, sizeof(name), "%s%s",
969                   perf_evsel__name(evsel), csv_output ? "" : " (msec)");
970
971         fprintf(output, fmt_v, msecs, csv_sep);
972
973         if (csv_output)
974                 fprintf(output, "%s%s", evsel->unit, csv_sep);
975         else
976                 fprintf(output, "%-*s%s", unit_width, evsel->unit, csv_sep);
977
978         fprintf(output, fmt_n, name);
979
980         if (evsel->cgrp)
981                 fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
982 }
983
984 static int first_shadow_cpu(struct perf_evsel *evsel, int id)
985 {
986         int i;
987
988         if (!aggr_get_id)
989                 return 0;
990
991         if (stat_config.aggr_mode == AGGR_NONE)
992                 return id;
993
994         if (stat_config.aggr_mode == AGGR_GLOBAL)
995                 return 0;
996
997         for (i = 0; i < perf_evsel__nr_cpus(evsel); i++) {
998                 int cpu2 = perf_evsel__cpus(evsel)->map[i];
999
1000                 if (aggr_get_id(evsel_list->cpus, cpu2) == id)
1001                         return cpu2;
1002         }
1003         return 0;
1004 }
1005
1006 static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg)
1007 {
1008         FILE *output = stat_config.output;
1009         double sc =  evsel->scale;
1010         const char *fmt;
1011
1012         if (csv_output) {
1013                 fmt = floor(sc) != sc ?  "%.2f%s" : "%.0f%s";
1014         } else {
1015                 if (big_num)
1016                         fmt = floor(sc) != sc ? "%'18.2f%s" : "%'18.0f%s";
1017                 else
1018                         fmt = floor(sc) != sc ? "%18.2f%s" : "%18.0f%s";
1019         }
1020
1021         aggr_printout(evsel, id, nr);
1022
1023         fprintf(output, fmt, avg, csv_sep);
1024
1025         if (evsel->unit)
1026                 fprintf(output, "%-*s%s",
1027                         csv_output ? 0 : unit_width,
1028                         evsel->unit, csv_sep);
1029
1030         fprintf(output, "%-*s", csv_output ? 0 : 25, perf_evsel__name(evsel));
1031
1032         if (evsel->cgrp)
1033                 fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
1034 }
1035
1036 static void printout(int id, int nr, struct perf_evsel *counter, double uval,
1037                      char *prefix, u64 run, u64 ena, double noise)
1038 {
1039         struct perf_stat_output_ctx out;
1040         struct outstate os = {
1041                 .fh = stat_config.output,
1042                 .prefix = prefix ? prefix : "",
1043                 .id = id,
1044                 .nr = nr,
1045                 .evsel = counter,
1046         };
1047         print_metric_t pm = print_metric_std;
1048         void (*nl)(void *);
1049
1050         if (metric_only) {
1051                 nl = new_line_metric;
1052                 if (csv_output)
1053                         pm = print_metric_only_csv;
1054                 else
1055                         pm = print_metric_only;
1056         } else
1057                 nl = new_line_std;
1058
1059         if (csv_output && !metric_only) {
1060                 static int aggr_fields[] = {
1061                         [AGGR_GLOBAL] = 0,
1062                         [AGGR_THREAD] = 1,
1063                         [AGGR_NONE] = 1,
1064                         [AGGR_SOCKET] = 2,
1065                         [AGGR_CORE] = 2,
1066                 };
1067
1068                 pm = print_metric_csv;
1069                 nl = new_line_csv;
1070                 os.nfields = 3;
1071                 os.nfields += aggr_fields[stat_config.aggr_mode];
1072                 if (counter->cgrp)
1073                         os.nfields++;
1074         }
1075         if (run == 0 || ena == 0 || counter->counts->scaled == -1) {
1076                 if (metric_only) {
1077                         pm(&os, NULL, "", "", 0);
1078                         return;
1079                 }
1080                 aggr_printout(counter, id, nr);
1081
1082                 fprintf(stat_config.output, "%*s%s",
1083                         csv_output ? 0 : 18,
1084                         counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
1085                         csv_sep);
1086
1087                 fprintf(stat_config.output, "%-*s%s",
1088                         csv_output ? 0 : unit_width,
1089                         counter->unit, csv_sep);
1090
1091                 fprintf(stat_config.output, "%*s",
1092                         csv_output ? 0 : -25,
1093                         perf_evsel__name(counter));
1094
1095                 if (counter->cgrp)
1096                         fprintf(stat_config.output, "%s%s",
1097                                 csv_sep, counter->cgrp->name);
1098
1099                 if (!csv_output)
1100                         pm(&os, NULL, NULL, "", 0);
1101                 print_noise(counter, noise);
1102                 print_running(run, ena);
1103                 if (csv_output)
1104                         pm(&os, NULL, NULL, "", 0);
1105                 return;
1106         }
1107
1108         if (metric_only)
1109                 /* nothing */;
1110         else if (nsec_counter(counter))
1111                 nsec_printout(id, nr, counter, uval);
1112         else
1113                 abs_printout(id, nr, counter, uval);
1114
1115         out.print_metric = pm;
1116         out.new_line = nl;
1117         out.ctx = &os;
1118
1119         if (csv_output && !metric_only) {
1120                 print_noise(counter, noise);
1121                 print_running(run, ena);
1122         }
1123
1124         perf_stat__print_shadow_stats(counter, uval,
1125                                 first_shadow_cpu(counter, id),
1126                                 &out);
1127         if (!csv_output && !metric_only) {
1128                 print_noise(counter, noise);
1129                 print_running(run, ena);
1130         }
1131 }
1132
1133 static void aggr_update_shadow(void)
1134 {
1135         int cpu, s2, id, s;
1136         u64 val;
1137         struct perf_evsel *counter;
1138
1139         for (s = 0; s < aggr_map->nr; s++) {
1140                 id = aggr_map->map[s];
1141                 evlist__for_each_entry(evsel_list, counter) {
1142                         val = 0;
1143                         for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
1144                                 s2 = aggr_get_id(evsel_list->cpus, cpu);
1145                                 if (s2 != id)
1146                                         continue;
1147                                 val += perf_counts(counter->counts, cpu, 0)->val;
1148                         }
1149                         val = val * counter->scale;
1150                         perf_stat__update_shadow_stats(counter, &val,
1151                                                        first_shadow_cpu(counter, id));
1152                 }
1153         }
1154 }
1155
1156 static void print_aggr(char *prefix)
1157 {
1158         FILE *output = stat_config.output;
1159         struct perf_evsel *counter;
1160         int cpu, s, s2, id, nr;
1161         double uval;
1162         u64 ena, run, val;
1163         bool first;
1164
1165         if (!(aggr_map || aggr_get_id))
1166                 return;
1167
1168         aggr_update_shadow();
1169
1170         /*
1171          * With metric_only everything is on a single line.
1172          * Without each counter has its own line.
1173          */
1174         for (s = 0; s < aggr_map->nr; s++) {
1175                 if (prefix && metric_only)
1176                         fprintf(output, "%s", prefix);
1177
1178                 id = aggr_map->map[s];
1179                 first = true;
1180                 evlist__for_each_entry(evsel_list, counter) {
1181                         val = ena = run = 0;
1182                         nr = 0;
1183                         for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
1184                                 s2 = aggr_get_id(perf_evsel__cpus(counter), cpu);
1185                                 if (s2 != id)
1186                                         continue;
1187                                 val += perf_counts(counter->counts, cpu, 0)->val;
1188                                 ena += perf_counts(counter->counts, cpu, 0)->ena;
1189                                 run += perf_counts(counter->counts, cpu, 0)->run;
1190                                 nr++;
1191                         }
1192                         if (first && metric_only) {
1193                                 first = false;
1194                                 aggr_printout(counter, id, nr);
1195                         }
1196                         if (prefix && !metric_only)
1197                                 fprintf(output, "%s", prefix);
1198
1199                         uval = val * counter->scale;
1200                         printout(id, nr, counter, uval, prefix, run, ena, 1.0);
1201                         if (!metric_only)
1202                                 fputc('\n', output);
1203                 }
1204                 if (metric_only)
1205                         fputc('\n', output);
1206         }
1207 }
1208
1209 static void print_aggr_thread(struct perf_evsel *counter, char *prefix)
1210 {
1211         FILE *output = stat_config.output;
1212         int nthreads = thread_map__nr(counter->threads);
1213         int ncpus = cpu_map__nr(counter->cpus);
1214         int cpu, thread;
1215         double uval;
1216
1217         for (thread = 0; thread < nthreads; thread++) {
1218                 u64 ena = 0, run = 0, val = 0;
1219
1220                 for (cpu = 0; cpu < ncpus; cpu++) {
1221                         val += perf_counts(counter->counts, cpu, thread)->val;
1222                         ena += perf_counts(counter->counts, cpu, thread)->ena;
1223                         run += perf_counts(counter->counts, cpu, thread)->run;
1224                 }
1225
1226                 if (prefix)
1227                         fprintf(output, "%s", prefix);
1228
1229                 uval = val * counter->scale;
1230                 printout(thread, 0, counter, uval, prefix, run, ena, 1.0);
1231                 fputc('\n', output);
1232         }
1233 }
1234
1235 /*
1236  * Print out the results of a single counter:
1237  * aggregated counts in system-wide mode
1238  */
1239 static void print_counter_aggr(struct perf_evsel *counter, char *prefix)
1240 {
1241         FILE *output = stat_config.output;
1242         struct perf_stat_evsel *ps = counter->priv;
1243         double avg = avg_stats(&ps->res_stats[0]);
1244         double uval;
1245         double avg_enabled, avg_running;
1246
1247         avg_enabled = avg_stats(&ps->res_stats[1]);
1248         avg_running = avg_stats(&ps->res_stats[2]);
1249
1250         if (prefix && !metric_only)
1251                 fprintf(output, "%s", prefix);
1252
1253         uval = avg * counter->scale;
1254         printout(-1, 0, counter, uval, prefix, avg_running, avg_enabled, avg);
1255         if (!metric_only)
1256                 fprintf(output, "\n");
1257 }
1258
1259 /*
1260  * Print out the results of a single counter:
1261  * does not use aggregated count in system-wide
1262  */
1263 static void print_counter(struct perf_evsel *counter, char *prefix)
1264 {
1265         FILE *output = stat_config.output;
1266         u64 ena, run, val;
1267         double uval;
1268         int cpu;
1269
1270         for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
1271                 val = perf_counts(counter->counts, cpu, 0)->val;
1272                 ena = perf_counts(counter->counts, cpu, 0)->ena;
1273                 run = perf_counts(counter->counts, cpu, 0)->run;
1274
1275                 if (prefix)
1276                         fprintf(output, "%s", prefix);
1277
1278                 uval = val * counter->scale;
1279                 printout(cpu, 0, counter, uval, prefix, run, ena, 1.0);
1280
1281                 fputc('\n', output);
1282         }
1283 }
1284
1285 static void print_no_aggr_metric(char *prefix)
1286 {
1287         int cpu;
1288         int nrcpus = 0;
1289         struct perf_evsel *counter;
1290         u64 ena, run, val;
1291         double uval;
1292
1293         nrcpus = evsel_list->cpus->nr;
1294         for (cpu = 0; cpu < nrcpus; cpu++) {
1295                 bool first = true;
1296
1297                 if (prefix)
1298                         fputs(prefix, stat_config.output);
1299                 evlist__for_each_entry(evsel_list, counter) {
1300                         if (first) {
1301                                 aggr_printout(counter, cpu, 0);
1302                                 first = false;
1303                         }
1304                         val = perf_counts(counter->counts, cpu, 0)->val;
1305                         ena = perf_counts(counter->counts, cpu, 0)->ena;
1306                         run = perf_counts(counter->counts, cpu, 0)->run;
1307
1308                         uval = val * counter->scale;
1309                         printout(cpu, 0, counter, uval, prefix, run, ena, 1.0);
1310                 }
1311                 fputc('\n', stat_config.output);
1312         }
1313 }
1314
1315 static int aggr_header_lens[] = {
1316         [AGGR_CORE] = 18,
1317         [AGGR_SOCKET] = 12,
1318         [AGGR_NONE] = 6,
1319         [AGGR_THREAD] = 24,
1320         [AGGR_GLOBAL] = 0,
1321 };
1322
1323 static const char *aggr_header_csv[] = {
1324         [AGGR_CORE]     =       "core,cpus,",
1325         [AGGR_SOCKET]   =       "socket,cpus",
1326         [AGGR_NONE]     =       "cpu,",
1327         [AGGR_THREAD]   =       "comm-pid,",
1328         [AGGR_GLOBAL]   =       ""
1329 };
1330
1331 static void print_metric_headers(const char *prefix, bool no_indent)
1332 {
1333         struct perf_stat_output_ctx out;
1334         struct perf_evsel *counter;
1335         struct outstate os = {
1336                 .fh = stat_config.output
1337         };
1338
1339         if (prefix)
1340                 fprintf(stat_config.output, "%s", prefix);
1341
1342         if (!csv_output && !no_indent)
1343                 fprintf(stat_config.output, "%*s",
1344                         aggr_header_lens[stat_config.aggr_mode], "");
1345         if (csv_output) {
1346                 if (stat_config.interval)
1347                         fputs("time,", stat_config.output);
1348                 fputs(aggr_header_csv[stat_config.aggr_mode],
1349                         stat_config.output);
1350         }
1351
1352         /* Print metrics headers only */
1353         evlist__for_each_entry(evsel_list, counter) {
1354                 os.evsel = counter;
1355                 out.ctx = &os;
1356                 out.print_metric = print_metric_header;
1357                 out.new_line = new_line_metric;
1358                 os.evsel = counter;
1359                 perf_stat__print_shadow_stats(counter, 0,
1360                                               0,
1361                                               &out);
1362         }
1363         fputc('\n', stat_config.output);
1364 }
1365
1366 static void print_interval(char *prefix, struct timespec *ts)
1367 {
1368         FILE *output = stat_config.output;
1369         static int num_print_interval;
1370
1371         sprintf(prefix, "%6lu.%09lu%s", ts->tv_sec, ts->tv_nsec, csv_sep);
1372
1373         if (num_print_interval == 0 && !csv_output) {
1374                 switch (stat_config.aggr_mode) {
1375                 case AGGR_SOCKET:
1376                         fprintf(output, "#           time socket cpus");
1377                         if (!metric_only)
1378                                 fprintf(output, "             counts %*s events\n", unit_width, "unit");
1379                         break;
1380                 case AGGR_CORE:
1381                         fprintf(output, "#           time core         cpus");
1382                         if (!metric_only)
1383                                 fprintf(output, "             counts %*s events\n", unit_width, "unit");
1384                         break;
1385                 case AGGR_NONE:
1386                         fprintf(output, "#           time CPU");
1387                         if (!metric_only)
1388                                 fprintf(output, "                counts %*s events\n", unit_width, "unit");
1389                         break;
1390                 case AGGR_THREAD:
1391                         fprintf(output, "#           time             comm-pid");
1392                         if (!metric_only)
1393                                 fprintf(output, "                  counts %*s events\n", unit_width, "unit");
1394                         break;
1395                 case AGGR_GLOBAL:
1396                 default:
1397                         fprintf(output, "#           time");
1398                         if (!metric_only)
1399                                 fprintf(output, "             counts %*s events\n", unit_width, "unit");
1400                 case AGGR_UNSET:
1401                         break;
1402                 }
1403         }
1404
1405         if (num_print_interval == 0 && metric_only)
1406                 print_metric_headers(" ", true);
1407         if (++num_print_interval == 25)
1408                 num_print_interval = 0;
1409 }
1410
1411 static void print_header(int argc, const char **argv)
1412 {
1413         FILE *output = stat_config.output;
1414         int i;
1415
1416         fflush(stdout);
1417
1418         if (!csv_output) {
1419                 fprintf(output, "\n");
1420                 fprintf(output, " Performance counter stats for ");
1421                 if (target.system_wide)
1422                         fprintf(output, "\'system wide");
1423                 else if (target.cpu_list)
1424                         fprintf(output, "\'CPU(s) %s", target.cpu_list);
1425                 else if (!target__has_task(&target)) {
1426                         fprintf(output, "\'%s", argv ? argv[0] : "pipe");
1427                         for (i = 1; argv && (i < argc); i++)
1428                                 fprintf(output, " %s", argv[i]);
1429                 } else if (target.pid)
1430                         fprintf(output, "process id \'%s", target.pid);
1431                 else
1432                         fprintf(output, "thread id \'%s", target.tid);
1433
1434                 fprintf(output, "\'");
1435                 if (run_count > 1)
1436                         fprintf(output, " (%d runs)", run_count);
1437                 fprintf(output, ":\n\n");
1438         }
1439 }
1440
1441 static void print_footer(void)
1442 {
1443         FILE *output = stat_config.output;
1444
1445         if (!null_run)
1446                 fprintf(output, "\n");
1447         fprintf(output, " %17.9f seconds time elapsed",
1448                         avg_stats(&walltime_nsecs_stats)/1e9);
1449         if (run_count > 1) {
1450                 fprintf(output, "                                        ");
1451                 print_noise_pct(stddev_stats(&walltime_nsecs_stats),
1452                                 avg_stats(&walltime_nsecs_stats));
1453         }
1454         fprintf(output, "\n\n");
1455 }
1456
1457 static void print_counters(struct timespec *ts, int argc, const char **argv)
1458 {
1459         int interval = stat_config.interval;
1460         struct perf_evsel *counter;
1461         char buf[64], *prefix = NULL;
1462
1463         /* Do not print anything if we record to the pipe. */
1464         if (STAT_RECORD && perf_stat.file.is_pipe)
1465                 return;
1466
1467         if (interval)
1468                 print_interval(prefix = buf, ts);
1469         else
1470                 print_header(argc, argv);
1471
1472         if (metric_only) {
1473                 static int num_print_iv;
1474
1475                 if (num_print_iv == 0 && !interval)
1476                         print_metric_headers(prefix, false);
1477                 if (num_print_iv++ == 25)
1478                         num_print_iv = 0;
1479                 if (stat_config.aggr_mode == AGGR_GLOBAL && prefix)
1480                         fprintf(stat_config.output, "%s", prefix);
1481         }
1482
1483         switch (stat_config.aggr_mode) {
1484         case AGGR_CORE:
1485         case AGGR_SOCKET:
1486                 print_aggr(prefix);
1487                 break;
1488         case AGGR_THREAD:
1489                 evlist__for_each_entry(evsel_list, counter)
1490                         print_aggr_thread(counter, prefix);
1491                 break;
1492         case AGGR_GLOBAL:
1493                 evlist__for_each_entry(evsel_list, counter)
1494                         print_counter_aggr(counter, prefix);
1495                 if (metric_only)
1496                         fputc('\n', stat_config.output);
1497                 break;
1498         case AGGR_NONE:
1499                 if (metric_only)
1500                         print_no_aggr_metric(prefix);
1501                 else {
1502                         evlist__for_each_entry(evsel_list, counter)
1503                                 print_counter(counter, prefix);
1504                 }
1505                 break;
1506         case AGGR_UNSET:
1507         default:
1508                 break;
1509         }
1510
1511         if (!interval && !csv_output)
1512                 print_footer();
1513
1514         fflush(stat_config.output);
1515 }
1516
1517 static volatile int signr = -1;
1518
1519 static void skip_signal(int signo)
1520 {
1521         if ((child_pid == -1) || stat_config.interval)
1522                 done = 1;
1523
1524         signr = signo;
1525         /*
1526          * render child_pid harmless
1527          * won't send SIGTERM to a random
1528          * process in case of race condition
1529          * and fast PID recycling
1530          */
1531         child_pid = -1;
1532 }
1533
1534 static void sig_atexit(void)
1535 {
1536         sigset_t set, oset;
1537
1538         /*
1539          * avoid race condition with SIGCHLD handler
1540          * in skip_signal() which is modifying child_pid
1541          * goal is to avoid send SIGTERM to a random
1542          * process
1543          */
1544         sigemptyset(&set);
1545         sigaddset(&set, SIGCHLD);
1546         sigprocmask(SIG_BLOCK, &set, &oset);
1547
1548         if (child_pid != -1)
1549                 kill(child_pid, SIGTERM);
1550
1551         sigprocmask(SIG_SETMASK, &oset, NULL);
1552
1553         if (signr == -1)
1554                 return;
1555
1556         signal(signr, SIG_DFL);
1557         kill(getpid(), signr);
1558 }
1559
1560 static int stat__set_big_num(const struct option *opt __maybe_unused,
1561                              const char *s __maybe_unused, int unset)
1562 {
1563         big_num_opt = unset ? 0 : 1;
1564         return 0;
1565 }
1566
1567 static int enable_metric_only(const struct option *opt __maybe_unused,
1568                               const char *s __maybe_unused, int unset)
1569 {
1570         force_metric_only = true;
1571         metric_only = !unset;
1572         return 0;
1573 }
1574
1575 static const struct option stat_options[] = {
1576         OPT_BOOLEAN('T', "transaction", &transaction_run,
1577                     "hardware transaction statistics"),
1578         OPT_CALLBACK('e', "event", &evsel_list, "event",
1579                      "event selector. use 'perf list' to list available events",
1580                      parse_events_option),
1581         OPT_CALLBACK(0, "filter", &evsel_list, "filter",
1582                      "event filter", parse_filter),
1583         OPT_BOOLEAN('i', "no-inherit", &no_inherit,
1584                     "child tasks do not inherit counters"),
1585         OPT_STRING('p', "pid", &target.pid, "pid",
1586                    "stat events on existing process id"),
1587         OPT_STRING('t', "tid", &target.tid, "tid",
1588                    "stat events on existing thread id"),
1589         OPT_BOOLEAN('a', "all-cpus", &target.system_wide,
1590                     "system-wide collection from all CPUs"),
1591         OPT_BOOLEAN('g', "group", &group,
1592                     "put the counters into a counter group"),
1593         OPT_BOOLEAN('c', "scale", &stat_config.scale, "scale/normalize counters"),
1594         OPT_INCR('v', "verbose", &verbose,
1595                     "be more verbose (show counter open errors, etc)"),
1596         OPT_INTEGER('r', "repeat", &run_count,
1597                     "repeat command and print average + stddev (max: 100, forever: 0)"),
1598         OPT_BOOLEAN('n', "null", &null_run,
1599                     "null run - dont start any counters"),
1600         OPT_INCR('d', "detailed", &detailed_run,
1601                     "detailed run - start a lot of events"),
1602         OPT_BOOLEAN('S', "sync", &sync_run,
1603                     "call sync() before starting a run"),
1604         OPT_CALLBACK_NOOPT('B', "big-num", NULL, NULL,
1605                            "print large numbers with thousands\' separators",
1606                            stat__set_big_num),
1607         OPT_STRING('C', "cpu", &target.cpu_list, "cpu",
1608                     "list of cpus to monitor in system-wide"),
1609         OPT_SET_UINT('A', "no-aggr", &stat_config.aggr_mode,
1610                     "disable CPU count aggregation", AGGR_NONE),
1611         OPT_STRING('x', "field-separator", &csv_sep, "separator",
1612                    "print counts with custom separator"),
1613         OPT_CALLBACK('G', "cgroup", &evsel_list, "name",
1614                      "monitor event in cgroup name only", parse_cgroups),
1615         OPT_STRING('o', "output", &output_name, "file", "output file name"),
1616         OPT_BOOLEAN(0, "append", &append_file, "append to the output file"),
1617         OPT_INTEGER(0, "log-fd", &output_fd,
1618                     "log output to fd, instead of stderr"),
1619         OPT_STRING(0, "pre", &pre_cmd, "command",
1620                         "command to run prior to the measured command"),
1621         OPT_STRING(0, "post", &post_cmd, "command",
1622                         "command to run after to the measured command"),
1623         OPT_UINTEGER('I', "interval-print", &stat_config.interval,
1624                     "print counts at regular interval in ms (>= 10)"),
1625         OPT_SET_UINT(0, "per-socket", &stat_config.aggr_mode,
1626                      "aggregate counts per processor socket", AGGR_SOCKET),
1627         OPT_SET_UINT(0, "per-core", &stat_config.aggr_mode,
1628                      "aggregate counts per physical processor core", AGGR_CORE),
1629         OPT_SET_UINT(0, "per-thread", &stat_config.aggr_mode,
1630                      "aggregate counts per thread", AGGR_THREAD),
1631         OPT_UINTEGER('D', "delay", &initial_delay,
1632                      "ms to wait before starting measurement after program start"),
1633         OPT_CALLBACK_NOOPT(0, "metric-only", &metric_only, NULL,
1634                         "Only print computed metrics. No raw values", enable_metric_only),
1635         OPT_BOOLEAN(0, "topdown", &topdown_run,
1636                         "measure topdown level 1 statistics"),
1637         OPT_END()
1638 };
1639
1640 static int perf_stat__get_socket(struct cpu_map *map, int cpu)
1641 {
1642         return cpu_map__get_socket(map, cpu, NULL);
1643 }
1644
1645 static int perf_stat__get_core(struct cpu_map *map, int cpu)
1646 {
1647         return cpu_map__get_core(map, cpu, NULL);
1648 }
1649
1650 static int cpu_map__get_max(struct cpu_map *map)
1651 {
1652         int i, max = -1;
1653
1654         for (i = 0; i < map->nr; i++) {
1655                 if (map->map[i] > max)
1656                         max = map->map[i];
1657         }
1658
1659         return max;
1660 }
1661
1662 static struct cpu_map *cpus_aggr_map;
1663
1664 static int perf_stat__get_aggr(aggr_get_id_t get_id, struct cpu_map *map, int idx)
1665 {
1666         int cpu;
1667
1668         if (idx >= map->nr)
1669                 return -1;
1670
1671         cpu = map->map[idx];
1672
1673         if (cpus_aggr_map->map[cpu] == -1)
1674                 cpus_aggr_map->map[cpu] = get_id(map, idx);
1675
1676         return cpus_aggr_map->map[cpu];
1677 }
1678
1679 static int perf_stat__get_socket_cached(struct cpu_map *map, int idx)
1680 {
1681         return perf_stat__get_aggr(perf_stat__get_socket, map, idx);
1682 }
1683
1684 static int perf_stat__get_core_cached(struct cpu_map *map, int idx)
1685 {
1686         return perf_stat__get_aggr(perf_stat__get_core, map, idx);
1687 }
1688
1689 static int perf_stat_init_aggr_mode(void)
1690 {
1691         int nr;
1692
1693         switch (stat_config.aggr_mode) {
1694         case AGGR_SOCKET:
1695                 if (cpu_map__build_socket_map(evsel_list->cpus, &aggr_map)) {
1696                         perror("cannot build socket map");
1697                         return -1;
1698                 }
1699                 aggr_get_id = perf_stat__get_socket_cached;
1700                 break;
1701         case AGGR_CORE:
1702                 if (cpu_map__build_core_map(evsel_list->cpus, &aggr_map)) {
1703                         perror("cannot build core map");
1704                         return -1;
1705                 }
1706                 aggr_get_id = perf_stat__get_core_cached;
1707                 break;
1708         case AGGR_NONE:
1709         case AGGR_GLOBAL:
1710         case AGGR_THREAD:
1711         case AGGR_UNSET:
1712         default:
1713                 break;
1714         }
1715
1716         /*
1717          * The evsel_list->cpus is the base we operate on,
1718          * taking the highest cpu number to be the size of
1719          * the aggregation translate cpumap.
1720          */
1721         nr = cpu_map__get_max(evsel_list->cpus);
1722         cpus_aggr_map = cpu_map__empty_new(nr + 1);
1723         return cpus_aggr_map ? 0 : -ENOMEM;
1724 }
1725
1726 static void perf_stat__exit_aggr_mode(void)
1727 {
1728         cpu_map__put(aggr_map);
1729         cpu_map__put(cpus_aggr_map);
1730         aggr_map = NULL;
1731         cpus_aggr_map = NULL;
1732 }
1733
1734 static inline int perf_env__get_cpu(struct perf_env *env, struct cpu_map *map, int idx)
1735 {
1736         int cpu;
1737
1738         if (idx > map->nr)
1739                 return -1;
1740
1741         cpu = map->map[idx];
1742
1743         if (cpu >= env->nr_cpus_online)
1744                 return -1;
1745
1746         return cpu;
1747 }
1748
1749 static int perf_env__get_socket(struct cpu_map *map, int idx, void *data)
1750 {
1751         struct perf_env *env = data;
1752         int cpu = perf_env__get_cpu(env, map, idx);
1753
1754         return cpu == -1 ? -1 : env->cpu[cpu].socket_id;
1755 }
1756
1757 static int perf_env__get_core(struct cpu_map *map, int idx, void *data)
1758 {
1759         struct perf_env *env = data;
1760         int core = -1, cpu = perf_env__get_cpu(env, map, idx);
1761
1762         if (cpu != -1) {
1763                 int socket_id = env->cpu[cpu].socket_id;
1764
1765                 /*
1766                  * Encode socket in upper 16 bits
1767                  * core_id is relative to socket, and
1768                  * we need a global id. So we combine
1769                  * socket + core id.
1770                  */
1771                 core = (socket_id << 16) | (env->cpu[cpu].core_id & 0xffff);
1772         }
1773
1774         return core;
1775 }
1776
1777 static int perf_env__build_socket_map(struct perf_env *env, struct cpu_map *cpus,
1778                                       struct cpu_map **sockp)
1779 {
1780         return cpu_map__build_map(cpus, sockp, perf_env__get_socket, env);
1781 }
1782
1783 static int perf_env__build_core_map(struct perf_env *env, struct cpu_map *cpus,
1784                                     struct cpu_map **corep)
1785 {
1786         return cpu_map__build_map(cpus, corep, perf_env__get_core, env);
1787 }
1788
1789 static int perf_stat__get_socket_file(struct cpu_map *map, int idx)
1790 {
1791         return perf_env__get_socket(map, idx, &perf_stat.session->header.env);
1792 }
1793
1794 static int perf_stat__get_core_file(struct cpu_map *map, int idx)
1795 {
1796         return perf_env__get_core(map, idx, &perf_stat.session->header.env);
1797 }
1798
1799 static int perf_stat_init_aggr_mode_file(struct perf_stat *st)
1800 {
1801         struct perf_env *env = &st->session->header.env;
1802
1803         switch (stat_config.aggr_mode) {
1804         case AGGR_SOCKET:
1805                 if (perf_env__build_socket_map(env, evsel_list->cpus, &aggr_map)) {
1806                         perror("cannot build socket map");
1807                         return -1;
1808                 }
1809                 aggr_get_id = perf_stat__get_socket_file;
1810                 break;
1811         case AGGR_CORE:
1812                 if (perf_env__build_core_map(env, evsel_list->cpus, &aggr_map)) {
1813                         perror("cannot build core map");
1814                         return -1;
1815                 }
1816                 aggr_get_id = perf_stat__get_core_file;
1817                 break;
1818         case AGGR_NONE:
1819         case AGGR_GLOBAL:
1820         case AGGR_THREAD:
1821         case AGGR_UNSET:
1822         default:
1823                 break;
1824         }
1825
1826         return 0;
1827 }
1828
1829 static int topdown_filter_events(const char **attr, char **str, bool use_group)
1830 {
1831         int off = 0;
1832         int i;
1833         int len = 0;
1834         char *s;
1835
1836         for (i = 0; attr[i]; i++) {
1837                 if (pmu_have_event("cpu", attr[i])) {
1838                         len += strlen(attr[i]) + 1;
1839                         attr[i - off] = attr[i];
1840                 } else
1841                         off++;
1842         }
1843         attr[i - off] = NULL;
1844
1845         *str = malloc(len + 1 + 2);
1846         if (!*str)
1847                 return -1;
1848         s = *str;
1849         if (i - off == 0) {
1850                 *s = 0;
1851                 return 0;
1852         }
1853         if (use_group)
1854                 *s++ = '{';
1855         for (i = 0; attr[i]; i++) {
1856                 strcpy(s, attr[i]);
1857                 s += strlen(s);
1858                 *s++ = ',';
1859         }
1860         if (use_group) {
1861                 s[-1] = '}';
1862                 *s = 0;
1863         } else
1864                 s[-1] = 0;
1865         return 0;
1866 }
1867
1868 __weak bool arch_topdown_check_group(bool *warn)
1869 {
1870         *warn = false;
1871         return false;
1872 }
1873
1874 __weak void arch_topdown_group_warn(void)
1875 {
1876 }
1877
1878 /*
1879  * Add default attributes, if there were no attributes specified or
1880  * if -d/--detailed, -d -d or -d -d -d is used:
1881  */
1882 static int add_default_attributes(void)
1883 {
1884         int err;
1885         struct perf_event_attr default_attrs0[] = {
1886
1887   { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK              },
1888   { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES        },
1889   { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS          },
1890   { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS             },
1891
1892   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES              },
1893 };
1894         struct perf_event_attr frontend_attrs[] = {
1895   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND },
1896 };
1897         struct perf_event_attr backend_attrs[] = {
1898   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_BACKEND  },
1899 };
1900         struct perf_event_attr default_attrs1[] = {
1901   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS            },
1902   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS     },
1903   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES           },
1904
1905 };
1906
1907 /*
1908  * Detailed stats (-d), covering the L1 and last level data caches:
1909  */
1910         struct perf_event_attr detailed_attrs[] = {
1911
1912   { .type = PERF_TYPE_HW_CACHE,
1913     .config =
1914          PERF_COUNT_HW_CACHE_L1D                <<  0  |
1915         (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
1916         (PERF_COUNT_HW_CACHE_RESULT_ACCESS      << 16)                          },
1917
1918   { .type = PERF_TYPE_HW_CACHE,
1919     .config =
1920          PERF_COUNT_HW_CACHE_L1D                <<  0  |
1921         (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
1922         (PERF_COUNT_HW_CACHE_RESULT_MISS        << 16)                          },
1923
1924   { .type = PERF_TYPE_HW_CACHE,
1925     .config =
1926          PERF_COUNT_HW_CACHE_LL                 <<  0  |
1927         (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
1928         (PERF_COUNT_HW_CACHE_RESULT_ACCESS      << 16)                          },
1929
1930   { .type = PERF_TYPE_HW_CACHE,
1931     .config =
1932          PERF_COUNT_HW_CACHE_LL                 <<  0  |
1933         (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
1934         (PERF_COUNT_HW_CACHE_RESULT_MISS        << 16)                          },
1935 };
1936
1937 /*
1938  * Very detailed stats (-d -d), covering the instruction cache and the TLB caches:
1939  */
1940         struct perf_event_attr very_detailed_attrs[] = {
1941
1942   { .type = PERF_TYPE_HW_CACHE,
1943     .config =
1944          PERF_COUNT_HW_CACHE_L1I                <<  0  |
1945         (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
1946         (PERF_COUNT_HW_CACHE_RESULT_ACCESS      << 16)                          },
1947
1948   { .type = PERF_TYPE_HW_CACHE,
1949     .config =
1950          PERF_COUNT_HW_CACHE_L1I                <<  0  |
1951         (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
1952         (PERF_COUNT_HW_CACHE_RESULT_MISS        << 16)                          },
1953
1954   { .type = PERF_TYPE_HW_CACHE,
1955     .config =
1956          PERF_COUNT_HW_CACHE_DTLB               <<  0  |
1957         (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
1958         (PERF_COUNT_HW_CACHE_RESULT_ACCESS      << 16)                          },
1959
1960   { .type = PERF_TYPE_HW_CACHE,
1961     .config =
1962          PERF_COUNT_HW_CACHE_DTLB               <<  0  |
1963         (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
1964         (PERF_COUNT_HW_CACHE_RESULT_MISS        << 16)                          },
1965
1966   { .type = PERF_TYPE_HW_CACHE,
1967     .config =
1968          PERF_COUNT_HW_CACHE_ITLB               <<  0  |
1969         (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
1970         (PERF_COUNT_HW_CACHE_RESULT_ACCESS      << 16)                          },
1971
1972   { .type = PERF_TYPE_HW_CACHE,
1973     .config =
1974          PERF_COUNT_HW_CACHE_ITLB               <<  0  |
1975         (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
1976         (PERF_COUNT_HW_CACHE_RESULT_MISS        << 16)                          },
1977
1978 };
1979
1980 /*
1981  * Very, very detailed stats (-d -d -d), adding prefetch events:
1982  */
1983         struct perf_event_attr very_very_detailed_attrs[] = {
1984
1985   { .type = PERF_TYPE_HW_CACHE,
1986     .config =
1987          PERF_COUNT_HW_CACHE_L1D                <<  0  |
1988         (PERF_COUNT_HW_CACHE_OP_PREFETCH        <<  8) |
1989         (PERF_COUNT_HW_CACHE_RESULT_ACCESS      << 16)                          },
1990
1991   { .type = PERF_TYPE_HW_CACHE,
1992     .config =
1993          PERF_COUNT_HW_CACHE_L1D                <<  0  |
1994         (PERF_COUNT_HW_CACHE_OP_PREFETCH        <<  8) |
1995         (PERF_COUNT_HW_CACHE_RESULT_MISS        << 16)                          },
1996 };
1997
1998         /* Set attrs if no event is selected and !null_run: */
1999         if (null_run)
2000                 return 0;
2001
2002         if (transaction_run) {
2003                 if (pmu_have_event("cpu", "cycles-ct") &&
2004                     pmu_have_event("cpu", "el-start"))
2005                         err = parse_events(evsel_list, transaction_attrs, NULL);
2006                 else
2007                         err = parse_events(evsel_list, transaction_limited_attrs, NULL);
2008                 if (err) {
2009                         fprintf(stderr, "Cannot set up transaction events\n");
2010                         return -1;
2011                 }
2012                 return 0;
2013         }
2014
2015         if (topdown_run) {
2016                 char *str = NULL;
2017                 bool warn = false;
2018
2019                 if (stat_config.aggr_mode != AGGR_GLOBAL &&
2020                     stat_config.aggr_mode != AGGR_CORE) {
2021                         pr_err("top down event configuration requires --per-core mode\n");
2022                         return -1;
2023                 }
2024                 stat_config.aggr_mode = AGGR_CORE;
2025                 if (nr_cgroups || !target__has_cpu(&target)) {
2026                         pr_err("top down event configuration requires system-wide mode (-a)\n");
2027                         return -1;
2028                 }
2029
2030                 if (!force_metric_only)
2031                         metric_only = true;
2032                 if (topdown_filter_events(topdown_attrs, &str,
2033                                 arch_topdown_check_group(&warn)) < 0) {
2034                         pr_err("Out of memory\n");
2035                         return -1;
2036                 }
2037                 if (topdown_attrs[0] && str) {
2038                         if (warn)
2039                                 arch_topdown_group_warn();
2040                         err = parse_events(evsel_list, str, NULL);
2041                         if (err) {
2042                                 fprintf(stderr,
2043                                         "Cannot set up top down events %s: %d\n",
2044                                         str, err);
2045                                 free(str);
2046                                 return -1;
2047                         }
2048                 } else {
2049                         fprintf(stderr, "System does not support topdown\n");
2050                         return -1;
2051                 }
2052                 free(str);
2053         }
2054
2055         if (!evsel_list->nr_entries) {
2056                 if (target__has_cpu(&target))
2057                         default_attrs0[0].config = PERF_COUNT_SW_CPU_CLOCK;
2058
2059                 if (perf_evlist__add_default_attrs(evsel_list, default_attrs0) < 0)
2060                         return -1;
2061                 if (pmu_have_event("cpu", "stalled-cycles-frontend")) {
2062                         if (perf_evlist__add_default_attrs(evsel_list,
2063                                                 frontend_attrs) < 0)
2064                                 return -1;
2065                 }
2066                 if (pmu_have_event("cpu", "stalled-cycles-backend")) {
2067                         if (perf_evlist__add_default_attrs(evsel_list,
2068                                                 backend_attrs) < 0)
2069                                 return -1;
2070                 }
2071                 if (perf_evlist__add_default_attrs(evsel_list, default_attrs1) < 0)
2072                         return -1;
2073         }
2074
2075         /* Detailed events get appended to the event list: */
2076
2077         if (detailed_run <  1)
2078                 return 0;
2079
2080         /* Append detailed run extra attributes: */
2081         if (perf_evlist__add_default_attrs(evsel_list, detailed_attrs) < 0)
2082                 return -1;
2083
2084         if (detailed_run < 2)
2085                 return 0;
2086
2087         /* Append very detailed run extra attributes: */
2088         if (perf_evlist__add_default_attrs(evsel_list, very_detailed_attrs) < 0)
2089                 return -1;
2090
2091         if (detailed_run < 3)
2092                 return 0;
2093
2094         /* Append very, very detailed run extra attributes: */
2095         return perf_evlist__add_default_attrs(evsel_list, very_very_detailed_attrs);
2096 }
2097
2098 static const char * const stat_record_usage[] = {
2099         "perf stat record [<options>]",
2100         NULL,
2101 };
2102
2103 static void init_features(struct perf_session *session)
2104 {
2105         int feat;
2106
2107         for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
2108                 perf_header__set_feat(&session->header, feat);
2109
2110         perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
2111         perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
2112         perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
2113         perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
2114 }
2115
2116 static int __cmd_record(int argc, const char **argv)
2117 {
2118         struct perf_session *session;
2119         struct perf_data_file *file = &perf_stat.file;
2120
2121         argc = parse_options(argc, argv, stat_options, stat_record_usage,
2122                              PARSE_OPT_STOP_AT_NON_OPTION);
2123
2124         if (output_name)
2125                 file->path = output_name;
2126
2127         if (run_count != 1 || forever) {
2128                 pr_err("Cannot use -r option with perf stat record.\n");
2129                 return -1;
2130         }
2131
2132         session = perf_session__new(file, false, NULL);
2133         if (session == NULL) {
2134                 pr_err("Perf session creation failed.\n");
2135                 return -1;
2136         }
2137
2138         init_features(session);
2139
2140         session->evlist   = evsel_list;
2141         perf_stat.session = session;
2142         perf_stat.record  = true;
2143         return argc;
2144 }
2145
2146 static int process_stat_round_event(struct perf_tool *tool __maybe_unused,
2147                                     union perf_event *event,
2148                                     struct perf_session *session)
2149 {
2150         struct stat_round_event *stat_round = &event->stat_round;
2151         struct perf_evsel *counter;
2152         struct timespec tsh, *ts = NULL;
2153         const char **argv = session->header.env.cmdline_argv;
2154         int argc = session->header.env.nr_cmdline;
2155
2156         evlist__for_each_entry(evsel_list, counter)
2157                 perf_stat_process_counter(&stat_config, counter);
2158
2159         if (stat_round->type == PERF_STAT_ROUND_TYPE__FINAL)
2160                 update_stats(&walltime_nsecs_stats, stat_round->time);
2161
2162         if (stat_config.interval && stat_round->time) {
2163                 tsh.tv_sec  = stat_round->time / NSECS_PER_SEC;
2164                 tsh.tv_nsec = stat_round->time % NSECS_PER_SEC;
2165                 ts = &tsh;
2166         }
2167
2168         print_counters(ts, argc, argv);
2169         return 0;
2170 }
2171
2172 static
2173 int process_stat_config_event(struct perf_tool *tool __maybe_unused,
2174                               union perf_event *event,
2175                               struct perf_session *session __maybe_unused)
2176 {
2177         struct perf_stat *st = container_of(tool, struct perf_stat, tool);
2178
2179         perf_event__read_stat_config(&stat_config, &event->stat_config);
2180
2181         if (cpu_map__empty(st->cpus)) {
2182                 if (st->aggr_mode != AGGR_UNSET)
2183                         pr_warning("warning: processing task data, aggregation mode not set\n");
2184                 return 0;
2185         }
2186
2187         if (st->aggr_mode != AGGR_UNSET)
2188                 stat_config.aggr_mode = st->aggr_mode;
2189
2190         if (perf_stat.file.is_pipe)
2191                 perf_stat_init_aggr_mode();
2192         else
2193                 perf_stat_init_aggr_mode_file(st);
2194
2195         return 0;
2196 }
2197
2198 static int set_maps(struct perf_stat *st)
2199 {
2200         if (!st->cpus || !st->threads)
2201                 return 0;
2202
2203         if (WARN_ONCE(st->maps_allocated, "stats double allocation\n"))
2204                 return -EINVAL;
2205
2206         perf_evlist__set_maps(evsel_list, st->cpus, st->threads);
2207
2208         if (perf_evlist__alloc_stats(evsel_list, true))
2209                 return -ENOMEM;
2210
2211         st->maps_allocated = true;
2212         return 0;
2213 }
2214
2215 static
2216 int process_thread_map_event(struct perf_tool *tool __maybe_unused,
2217                              union perf_event *event,
2218                              struct perf_session *session __maybe_unused)
2219 {
2220         struct perf_stat *st = container_of(tool, struct perf_stat, tool);
2221
2222         if (st->threads) {
2223                 pr_warning("Extra thread map event, ignoring.\n");
2224                 return 0;
2225         }
2226
2227         st->threads = thread_map__new_event(&event->thread_map);
2228         if (!st->threads)
2229                 return -ENOMEM;
2230
2231         return set_maps(st);
2232 }
2233
2234 static
2235 int process_cpu_map_event(struct perf_tool *tool __maybe_unused,
2236                           union perf_event *event,
2237                           struct perf_session *session __maybe_unused)
2238 {
2239         struct perf_stat *st = container_of(tool, struct perf_stat, tool);
2240         struct cpu_map *cpus;
2241
2242         if (st->cpus) {
2243                 pr_warning("Extra cpu map event, ignoring.\n");
2244                 return 0;
2245         }
2246
2247         cpus = cpu_map__new_data(&event->cpu_map.data);
2248         if (!cpus)
2249                 return -ENOMEM;
2250
2251         st->cpus = cpus;
2252         return set_maps(st);
2253 }
2254
2255 static const char * const stat_report_usage[] = {
2256         "perf stat report [<options>]",
2257         NULL,
2258 };
2259
2260 static struct perf_stat perf_stat = {
2261         .tool = {
2262                 .attr           = perf_event__process_attr,
2263                 .event_update   = perf_event__process_event_update,
2264                 .thread_map     = process_thread_map_event,
2265                 .cpu_map        = process_cpu_map_event,
2266                 .stat_config    = process_stat_config_event,
2267                 .stat           = perf_event__process_stat_event,
2268                 .stat_round     = process_stat_round_event,
2269         },
2270         .aggr_mode = AGGR_UNSET,
2271 };
2272
2273 static int __cmd_report(int argc, const char **argv)
2274 {
2275         struct perf_session *session;
2276         const struct option options[] = {
2277         OPT_STRING('i', "input", &input_name, "file", "input file name"),
2278         OPT_SET_UINT(0, "per-socket", &perf_stat.aggr_mode,
2279                      "aggregate counts per processor socket", AGGR_SOCKET),
2280         OPT_SET_UINT(0, "per-core", &perf_stat.aggr_mode,
2281                      "aggregate counts per physical processor core", AGGR_CORE),
2282         OPT_SET_UINT('A', "no-aggr", &perf_stat.aggr_mode,
2283                      "disable CPU count aggregation", AGGR_NONE),
2284         OPT_END()
2285         };
2286         struct stat st;
2287         int ret;
2288
2289         argc = parse_options(argc, argv, options, stat_report_usage, 0);
2290
2291         if (!input_name || !strlen(input_name)) {
2292                 if (!fstat(STDIN_FILENO, &st) && S_ISFIFO(st.st_mode))
2293                         input_name = "-";
2294                 else
2295                         input_name = "perf.data";
2296         }
2297
2298         perf_stat.file.path = input_name;
2299         perf_stat.file.mode = PERF_DATA_MODE_READ;
2300
2301         session = perf_session__new(&perf_stat.file, false, &perf_stat.tool);
2302         if (session == NULL)
2303                 return -1;
2304
2305         perf_stat.session  = session;
2306         stat_config.output = stderr;
2307         evsel_list         = session->evlist;
2308
2309         ret = perf_session__process_events(session);
2310         if (ret)
2311                 return ret;
2312
2313         perf_session__delete(session);
2314         return 0;
2315 }
2316
2317 int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
2318 {
2319         const char * const stat_usage[] = {
2320                 "perf stat [<options>] [<command>]",
2321                 NULL
2322         };
2323         int status = -EINVAL, run_idx;
2324         const char *mode;
2325         FILE *output = stderr;
2326         unsigned int interval;
2327         const char * const stat_subcommands[] = { "record", "report" };
2328
2329         setlocale(LC_ALL, "");
2330
2331         evsel_list = perf_evlist__new();
2332         if (evsel_list == NULL)
2333                 return -ENOMEM;
2334
2335         parse_events__shrink_config_terms();
2336         argc = parse_options_subcommand(argc, argv, stat_options, stat_subcommands,
2337                                         (const char **) stat_usage,
2338                                         PARSE_OPT_STOP_AT_NON_OPTION);
2339         perf_stat__init_shadow_stats();
2340
2341         if (csv_sep) {
2342                 csv_output = true;
2343                 if (!strcmp(csv_sep, "\\t"))
2344                         csv_sep = "\t";
2345         } else
2346                 csv_sep = DEFAULT_SEPARATOR;
2347
2348         if (argc && !strncmp(argv[0], "rec", 3)) {
2349                 argc = __cmd_record(argc, argv);
2350                 if (argc < 0)
2351                         return -1;
2352         } else if (argc && !strncmp(argv[0], "rep", 3))
2353                 return __cmd_report(argc, argv);
2354
2355         interval = stat_config.interval;
2356
2357         /*
2358          * For record command the -o is already taken care of.
2359          */
2360         if (!STAT_RECORD && output_name && strcmp(output_name, "-"))
2361                 output = NULL;
2362
2363         if (output_name && output_fd) {
2364                 fprintf(stderr, "cannot use both --output and --log-fd\n");
2365                 parse_options_usage(stat_usage, stat_options, "o", 1);
2366                 parse_options_usage(NULL, stat_options, "log-fd", 0);
2367                 goto out;
2368         }
2369
2370         if (metric_only && stat_config.aggr_mode == AGGR_THREAD) {
2371                 fprintf(stderr, "--metric-only is not supported with --per-thread\n");
2372                 goto out;
2373         }
2374
2375         if (metric_only && run_count > 1) {
2376                 fprintf(stderr, "--metric-only is not supported with -r\n");
2377                 goto out;
2378         }
2379
2380         if (output_fd < 0) {
2381                 fprintf(stderr, "argument to --log-fd must be a > 0\n");
2382                 parse_options_usage(stat_usage, stat_options, "log-fd", 0);
2383                 goto out;
2384         }
2385
2386         if (!output) {
2387                 struct timespec tm;
2388                 mode = append_file ? "a" : "w";
2389
2390                 output = fopen(output_name, mode);
2391                 if (!output) {
2392                         perror("failed to create output file");
2393                         return -1;
2394                 }
2395                 clock_gettime(CLOCK_REALTIME, &tm);
2396                 fprintf(output, "# started on %s\n", ctime(&tm.tv_sec));
2397         } else if (output_fd > 0) {
2398                 mode = append_file ? "a" : "w";
2399                 output = fdopen(output_fd, mode);
2400                 if (!output) {
2401                         perror("Failed opening logfd");
2402                         return -errno;
2403                 }
2404         }
2405
2406         stat_config.output = output;
2407
2408         /*
2409          * let the spreadsheet do the pretty-printing
2410          */
2411         if (csv_output) {
2412                 /* User explicitly passed -B? */
2413                 if (big_num_opt == 1) {
2414                         fprintf(stderr, "-B option not supported with -x\n");
2415                         parse_options_usage(stat_usage, stat_options, "B", 1);
2416                         parse_options_usage(NULL, stat_options, "x", 1);
2417                         goto out;
2418                 } else /* Nope, so disable big number formatting */
2419                         big_num = false;
2420         } else if (big_num_opt == 0) /* User passed --no-big-num */
2421                 big_num = false;
2422
2423         if (!argc && target__none(&target))
2424                 usage_with_options(stat_usage, stat_options);
2425
2426         if (run_count < 0) {
2427                 pr_err("Run count must be a positive number\n");
2428                 parse_options_usage(stat_usage, stat_options, "r", 1);
2429                 goto out;
2430         } else if (run_count == 0) {
2431                 forever = true;
2432                 run_count = 1;
2433         }
2434
2435         if ((stat_config.aggr_mode == AGGR_THREAD) && !target__has_task(&target)) {
2436                 fprintf(stderr, "The --per-thread option is only available "
2437                         "when monitoring via -p -t options.\n");
2438                 parse_options_usage(NULL, stat_options, "p", 1);
2439                 parse_options_usage(NULL, stat_options, "t", 1);
2440                 goto out;
2441         }
2442
2443         /*
2444          * no_aggr, cgroup are for system-wide only
2445          * --per-thread is aggregated per thread, we dont mix it with cpu mode
2446          */
2447         if (((stat_config.aggr_mode != AGGR_GLOBAL &&
2448               stat_config.aggr_mode != AGGR_THREAD) || nr_cgroups) &&
2449             !target__has_cpu(&target)) {
2450                 fprintf(stderr, "both cgroup and no-aggregation "
2451                         "modes only available in system-wide mode\n");
2452
2453                 parse_options_usage(stat_usage, stat_options, "G", 1);
2454                 parse_options_usage(NULL, stat_options, "A", 1);
2455                 parse_options_usage(NULL, stat_options, "a", 1);
2456                 goto out;
2457         }
2458
2459         if (add_default_attributes())
2460                 goto out;
2461
2462         target__validate(&target);
2463
2464         if (perf_evlist__create_maps(evsel_list, &target) < 0) {
2465                 if (target__has_task(&target)) {
2466                         pr_err("Problems finding threads of monitor\n");
2467                         parse_options_usage(stat_usage, stat_options, "p", 1);
2468                         parse_options_usage(NULL, stat_options, "t", 1);
2469                 } else if (target__has_cpu(&target)) {
2470                         perror("failed to parse CPUs map");
2471                         parse_options_usage(stat_usage, stat_options, "C", 1);
2472                         parse_options_usage(NULL, stat_options, "a", 1);
2473                 }
2474                 goto out;
2475         }
2476
2477         /*
2478          * Initialize thread_map with comm names,
2479          * so we could print it out on output.
2480          */
2481         if (stat_config.aggr_mode == AGGR_THREAD)
2482                 thread_map__read_comms(evsel_list->threads);
2483
2484         if (interval && interval < 100) {
2485                 if (interval < 10) {
2486                         pr_err("print interval must be >= 10ms\n");
2487                         parse_options_usage(stat_usage, stat_options, "I", 1);
2488                         goto out;
2489                 } else
2490                         pr_warning("print interval < 100ms. "
2491                                    "The overhead percentage could be high in some cases. "
2492                                    "Please proceed with caution.\n");
2493         }
2494
2495         if (perf_evlist__alloc_stats(evsel_list, interval))
2496                 goto out;
2497
2498         if (perf_stat_init_aggr_mode())
2499                 goto out;
2500
2501         /*
2502          * We dont want to block the signals - that would cause
2503          * child tasks to inherit that and Ctrl-C would not work.
2504          * What we want is for Ctrl-C to work in the exec()-ed
2505          * task, but being ignored by perf stat itself:
2506          */
2507         atexit(sig_atexit);
2508         if (!forever)
2509                 signal(SIGINT,  skip_signal);
2510         signal(SIGCHLD, skip_signal);
2511         signal(SIGALRM, skip_signal);
2512         signal(SIGABRT, skip_signal);
2513
2514         status = 0;
2515         for (run_idx = 0; forever || run_idx < run_count; run_idx++) {
2516                 if (run_count != 1 && verbose)
2517                         fprintf(output, "[ perf stat: executing run #%d ... ]\n",
2518                                 run_idx + 1);
2519
2520                 status = run_perf_stat(argc, argv);
2521                 if (forever && status != -1) {
2522                         print_counters(NULL, argc, argv);
2523                         perf_stat__reset_stats();
2524                 }
2525         }
2526
2527         if (!forever && status != -1 && !interval)
2528                 print_counters(NULL, argc, argv);
2529
2530         if (STAT_RECORD) {
2531                 /*
2532                  * We synthesize the kernel mmap record just so that older tools
2533                  * don't emit warnings about not being able to resolve symbols
2534                  * due to /proc/sys/kernel/kptr_restrict settings and instear provide
2535                  * a saner message about no samples being in the perf.data file.
2536                  *
2537                  * This also serves to suppress a warning about f_header.data.size == 0
2538                  * in header.c at the moment 'perf stat record' gets introduced, which
2539                  * is not really needed once we start adding the stat specific PERF_RECORD_
2540                  * records, but the need to suppress the kptr_restrict messages in older
2541                  * tools remain  -acme
2542                  */
2543                 int fd = perf_data_file__fd(&perf_stat.file);
2544                 int err = perf_event__synthesize_kernel_mmap((void *)&perf_stat,
2545                                                              process_synthesized_event,
2546                                                              &perf_stat.session->machines.host);
2547                 if (err) {
2548                         pr_warning("Couldn't synthesize the kernel mmap record, harmless, "
2549                                    "older tools may produce warnings about this file\n.");
2550                 }
2551
2552                 if (!interval) {
2553                         if (WRITE_STAT_ROUND_EVENT(walltime_nsecs_stats.max, FINAL))
2554                                 pr_err("failed to write stat round event\n");
2555                 }
2556
2557                 if (!perf_stat.file.is_pipe) {
2558                         perf_stat.session->header.data_size += perf_stat.bytes_written;
2559                         perf_session__write_header(perf_stat.session, evsel_list, fd, true);
2560                 }
2561
2562                 perf_session__delete(perf_stat.session);
2563         }
2564
2565         perf_stat__exit_aggr_mode();
2566         perf_evlist__free_stats(evsel_list);
2567 out:
2568         perf_evlist__delete(evsel_list);
2569         return status;
2570 }