perf intel-pt: Add a helper function for processing AUXTRACE_INFO
[cascardo/linux.git] / tools / perf / util / intel-pt.c
1 /*
2  * intel_pt.c: Intel Processor Trace support
3  * Copyright (c) 2013-2015, Intel Corporation.
4  *
5  * This program is free software; you can redistribute it and/or modify it
6  * under the terms and conditions of the GNU General Public License,
7  * version 2, as published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
12  * more details.
13  *
14  */
15
16 #include <stdio.h>
17 #include <stdbool.h>
18 #include <errno.h>
19 #include <linux/kernel.h>
20 #include <linux/types.h>
21
22 #include "../perf.h"
23 #include "session.h"
24 #include "machine.h"
25 #include "sort.h"
26 #include "tool.h"
27 #include "event.h"
28 #include "evlist.h"
29 #include "evsel.h"
30 #include "map.h"
31 #include "color.h"
32 #include "util.h"
33 #include "thread.h"
34 #include "thread-stack.h"
35 #include "symbol.h"
36 #include "callchain.h"
37 #include "dso.h"
38 #include "debug.h"
39 #include "auxtrace.h"
40 #include "tsc.h"
41 #include "intel-pt.h"
42 #include "config.h"
43
44 #include "intel-pt-decoder/intel-pt-log.h"
45 #include "intel-pt-decoder/intel-pt-decoder.h"
46 #include "intel-pt-decoder/intel-pt-insn-decoder.h"
47 #include "intel-pt-decoder/intel-pt-pkt-decoder.h"
48
49 #define MAX_TIMESTAMP (~0ULL)
50
51 struct intel_pt {
52         struct auxtrace auxtrace;
53         struct auxtrace_queues queues;
54         struct auxtrace_heap heap;
55         u32 auxtrace_type;
56         struct perf_session *session;
57         struct machine *machine;
58         struct perf_evsel *switch_evsel;
59         struct thread *unknown_thread;
60         bool timeless_decoding;
61         bool sampling_mode;
62         bool snapshot_mode;
63         bool per_cpu_mmaps;
64         bool have_tsc;
65         bool data_queued;
66         bool est_tsc;
67         bool sync_switch;
68         bool mispred_all;
69         int have_sched_switch;
70         u32 pmu_type;
71         u64 kernel_start;
72         u64 switch_ip;
73         u64 ptss_ip;
74
75         struct perf_tsc_conversion tc;
76         bool cap_user_time_zero;
77
78         struct itrace_synth_opts synth_opts;
79
80         bool sample_instructions;
81         u64 instructions_sample_type;
82         u64 instructions_sample_period;
83         u64 instructions_id;
84
85         bool sample_branches;
86         u32 branches_filter;
87         u64 branches_sample_type;
88         u64 branches_id;
89
90         bool sample_transactions;
91         u64 transactions_sample_type;
92         u64 transactions_id;
93
94         bool synth_needs_swap;
95
96         u64 tsc_bit;
97         u64 mtc_bit;
98         u64 mtc_freq_bits;
99         u32 tsc_ctc_ratio_n;
100         u32 tsc_ctc_ratio_d;
101         u64 cyc_bit;
102         u64 noretcomp_bit;
103         unsigned max_non_turbo_ratio;
104
105         unsigned long num_events;
106 };
107
108 enum switch_state {
109         INTEL_PT_SS_NOT_TRACING,
110         INTEL_PT_SS_UNKNOWN,
111         INTEL_PT_SS_TRACING,
112         INTEL_PT_SS_EXPECTING_SWITCH_EVENT,
113         INTEL_PT_SS_EXPECTING_SWITCH_IP,
114 };
115
116 struct intel_pt_queue {
117         struct intel_pt *pt;
118         unsigned int queue_nr;
119         struct auxtrace_buffer *buffer;
120         void *decoder;
121         const struct intel_pt_state *state;
122         struct ip_callchain *chain;
123         struct branch_stack *last_branch;
124         struct branch_stack *last_branch_rb;
125         size_t last_branch_pos;
126         union perf_event *event_buf;
127         bool on_heap;
128         bool stop;
129         bool step_through_buffers;
130         bool use_buffer_pid_tid;
131         pid_t pid, tid;
132         int cpu;
133         int switch_state;
134         pid_t next_tid;
135         struct thread *thread;
136         bool exclude_kernel;
137         bool have_sample;
138         u64 time;
139         u64 timestamp;
140         u32 flags;
141         u16 insn_len;
142         u64 last_insn_cnt;
143 };
144
145 static void intel_pt_dump(struct intel_pt *pt __maybe_unused,
146                           unsigned char *buf, size_t len)
147 {
148         struct intel_pt_pkt packet;
149         size_t pos = 0;
150         int ret, pkt_len, i;
151         char desc[INTEL_PT_PKT_DESC_MAX];
152         const char *color = PERF_COLOR_BLUE;
153
154         color_fprintf(stdout, color,
155                       ". ... Intel Processor Trace data: size %zu bytes\n",
156                       len);
157
158         while (len) {
159                 ret = intel_pt_get_packet(buf, len, &packet);
160                 if (ret > 0)
161                         pkt_len = ret;
162                 else
163                         pkt_len = 1;
164                 printf(".");
165                 color_fprintf(stdout, color, "  %08x: ", pos);
166                 for (i = 0; i < pkt_len; i++)
167                         color_fprintf(stdout, color, " %02x", buf[i]);
168                 for (; i < 16; i++)
169                         color_fprintf(stdout, color, "   ");
170                 if (ret > 0) {
171                         ret = intel_pt_pkt_desc(&packet, desc,
172                                                 INTEL_PT_PKT_DESC_MAX);
173                         if (ret > 0)
174                                 color_fprintf(stdout, color, " %s\n", desc);
175                 } else {
176                         color_fprintf(stdout, color, " Bad packet!\n");
177                 }
178                 pos += pkt_len;
179                 buf += pkt_len;
180                 len -= pkt_len;
181         }
182 }
183
184 static void intel_pt_dump_event(struct intel_pt *pt, unsigned char *buf,
185                                 size_t len)
186 {
187         printf(".\n");
188         intel_pt_dump(pt, buf, len);
189 }
190
191 static int intel_pt_do_fix_overlap(struct intel_pt *pt, struct auxtrace_buffer *a,
192                                    struct auxtrace_buffer *b)
193 {
194         void *start;
195
196         start = intel_pt_find_overlap(a->data, a->size, b->data, b->size,
197                                       pt->have_tsc);
198         if (!start)
199                 return -EINVAL;
200         b->use_size = b->data + b->size - start;
201         b->use_data = start;
202         return 0;
203 }
204
205 static void intel_pt_use_buffer_pid_tid(struct intel_pt_queue *ptq,
206                                         struct auxtrace_queue *queue,
207                                         struct auxtrace_buffer *buffer)
208 {
209         if (queue->cpu == -1 && buffer->cpu != -1)
210                 ptq->cpu = buffer->cpu;
211
212         ptq->pid = buffer->pid;
213         ptq->tid = buffer->tid;
214
215         intel_pt_log("queue %u cpu %d pid %d tid %d\n",
216                      ptq->queue_nr, ptq->cpu, ptq->pid, ptq->tid);
217
218         thread__zput(ptq->thread);
219
220         if (ptq->tid != -1) {
221                 if (ptq->pid != -1)
222                         ptq->thread = machine__findnew_thread(ptq->pt->machine,
223                                                               ptq->pid,
224                                                               ptq->tid);
225                 else
226                         ptq->thread = machine__find_thread(ptq->pt->machine, -1,
227                                                            ptq->tid);
228         }
229 }
230
231 /* This function assumes data is processed sequentially only */
232 static int intel_pt_get_trace(struct intel_pt_buffer *b, void *data)
233 {
234         struct intel_pt_queue *ptq = data;
235         struct auxtrace_buffer *buffer = ptq->buffer, *old_buffer = buffer;
236         struct auxtrace_queue *queue;
237
238         if (ptq->stop) {
239                 b->len = 0;
240                 return 0;
241         }
242
243         queue = &ptq->pt->queues.queue_array[ptq->queue_nr];
244 next:
245         buffer = auxtrace_buffer__next(queue, buffer);
246         if (!buffer) {
247                 if (old_buffer)
248                         auxtrace_buffer__drop_data(old_buffer);
249                 b->len = 0;
250                 return 0;
251         }
252
253         ptq->buffer = buffer;
254
255         if (!buffer->data) {
256                 int fd = perf_data_file__fd(ptq->pt->session->file);
257
258                 buffer->data = auxtrace_buffer__get_data(buffer, fd);
259                 if (!buffer->data)
260                         return -ENOMEM;
261         }
262
263         if (ptq->pt->snapshot_mode && !buffer->consecutive && old_buffer &&
264             intel_pt_do_fix_overlap(ptq->pt, old_buffer, buffer))
265                 return -ENOMEM;
266
267         if (buffer->use_data) {
268                 b->len = buffer->use_size;
269                 b->buf = buffer->use_data;
270         } else {
271                 b->len = buffer->size;
272                 b->buf = buffer->data;
273         }
274         b->ref_timestamp = buffer->reference;
275
276         /*
277          * If in snapshot mode and the buffer has no usable data, get next
278          * buffer and again check overlap against old_buffer.
279          */
280         if (ptq->pt->snapshot_mode && !b->len)
281                 goto next;
282
283         if (old_buffer)
284                 auxtrace_buffer__drop_data(old_buffer);
285
286         if (!old_buffer || ptq->pt->sampling_mode || (ptq->pt->snapshot_mode &&
287                                                       !buffer->consecutive)) {
288                 b->consecutive = false;
289                 b->trace_nr = buffer->buffer_nr + 1;
290         } else {
291                 b->consecutive = true;
292         }
293
294         if (ptq->use_buffer_pid_tid && (ptq->pid != buffer->pid ||
295                                         ptq->tid != buffer->tid))
296                 intel_pt_use_buffer_pid_tid(ptq, queue, buffer);
297
298         if (ptq->step_through_buffers)
299                 ptq->stop = true;
300
301         if (!b->len)
302                 return intel_pt_get_trace(b, data);
303
304         return 0;
305 }
306
307 struct intel_pt_cache_entry {
308         struct auxtrace_cache_entry     entry;
309         u64                             insn_cnt;
310         u64                             byte_cnt;
311         enum intel_pt_insn_op           op;
312         enum intel_pt_insn_branch       branch;
313         int                             length;
314         int32_t                         rel;
315 };
316
317 static int intel_pt_config_div(const char *var, const char *value, void *data)
318 {
319         int *d = data;
320         long val;
321
322         if (!strcmp(var, "intel-pt.cache-divisor")) {
323                 val = strtol(value, NULL, 0);
324                 if (val > 0 && val <= INT_MAX)
325                         *d = val;
326         }
327
328         return 0;
329 }
330
331 static int intel_pt_cache_divisor(void)
332 {
333         static int d;
334
335         if (d)
336                 return d;
337
338         perf_config(intel_pt_config_div, &d);
339
340         if (!d)
341                 d = 64;
342
343         return d;
344 }
345
346 static unsigned int intel_pt_cache_size(struct dso *dso,
347                                         struct machine *machine)
348 {
349         off_t size;
350
351         size = dso__data_size(dso, machine);
352         size /= intel_pt_cache_divisor();
353         if (size < 1000)
354                 return 10;
355         if (size > (1 << 21))
356                 return 21;
357         return 32 - __builtin_clz(size);
358 }
359
360 static struct auxtrace_cache *intel_pt_cache(struct dso *dso,
361                                              struct machine *machine)
362 {
363         struct auxtrace_cache *c;
364         unsigned int bits;
365
366         if (dso->auxtrace_cache)
367                 return dso->auxtrace_cache;
368
369         bits = intel_pt_cache_size(dso, machine);
370
371         /* Ignoring cache creation failure */
372         c = auxtrace_cache__new(bits, sizeof(struct intel_pt_cache_entry), 200);
373
374         dso->auxtrace_cache = c;
375
376         return c;
377 }
378
379 static int intel_pt_cache_add(struct dso *dso, struct machine *machine,
380                               u64 offset, u64 insn_cnt, u64 byte_cnt,
381                               struct intel_pt_insn *intel_pt_insn)
382 {
383         struct auxtrace_cache *c = intel_pt_cache(dso, machine);
384         struct intel_pt_cache_entry *e;
385         int err;
386
387         if (!c)
388                 return -ENOMEM;
389
390         e = auxtrace_cache__alloc_entry(c);
391         if (!e)
392                 return -ENOMEM;
393
394         e->insn_cnt = insn_cnt;
395         e->byte_cnt = byte_cnt;
396         e->op = intel_pt_insn->op;
397         e->branch = intel_pt_insn->branch;
398         e->length = intel_pt_insn->length;
399         e->rel = intel_pt_insn->rel;
400
401         err = auxtrace_cache__add(c, offset, &e->entry);
402         if (err)
403                 auxtrace_cache__free_entry(c, e);
404
405         return err;
406 }
407
408 static struct intel_pt_cache_entry *
409 intel_pt_cache_lookup(struct dso *dso, struct machine *machine, u64 offset)
410 {
411         struct auxtrace_cache *c = intel_pt_cache(dso, machine);
412
413         if (!c)
414                 return NULL;
415
416         return auxtrace_cache__lookup(dso->auxtrace_cache, offset);
417 }
418
419 static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn,
420                                    uint64_t *insn_cnt_ptr, uint64_t *ip,
421                                    uint64_t to_ip, uint64_t max_insn_cnt,
422                                    void *data)
423 {
424         struct intel_pt_queue *ptq = data;
425         struct machine *machine = ptq->pt->machine;
426         struct thread *thread;
427         struct addr_location al;
428         unsigned char buf[1024];
429         size_t bufsz;
430         ssize_t len;
431         int x86_64;
432         u8 cpumode;
433         u64 offset, start_offset, start_ip;
434         u64 insn_cnt = 0;
435         bool one_map = true;
436
437         if (to_ip && *ip == to_ip)
438                 goto out_no_cache;
439
440         bufsz = intel_pt_insn_max_size();
441
442         if (*ip >= ptq->pt->kernel_start)
443                 cpumode = PERF_RECORD_MISC_KERNEL;
444         else
445                 cpumode = PERF_RECORD_MISC_USER;
446
447         thread = ptq->thread;
448         if (!thread) {
449                 if (cpumode != PERF_RECORD_MISC_KERNEL)
450                         return -EINVAL;
451                 thread = ptq->pt->unknown_thread;
452         }
453
454         while (1) {
455                 thread__find_addr_map(thread, cpumode, MAP__FUNCTION, *ip, &al);
456                 if (!al.map || !al.map->dso)
457                         return -EINVAL;
458
459                 if (al.map->dso->data.status == DSO_DATA_STATUS_ERROR &&
460                     dso__data_status_seen(al.map->dso,
461                                           DSO_DATA_STATUS_SEEN_ITRACE))
462                         return -ENOENT;
463
464                 offset = al.map->map_ip(al.map, *ip);
465
466                 if (!to_ip && one_map) {
467                         struct intel_pt_cache_entry *e;
468
469                         e = intel_pt_cache_lookup(al.map->dso, machine, offset);
470                         if (e &&
471                             (!max_insn_cnt || e->insn_cnt <= max_insn_cnt)) {
472                                 *insn_cnt_ptr = e->insn_cnt;
473                                 *ip += e->byte_cnt;
474                                 intel_pt_insn->op = e->op;
475                                 intel_pt_insn->branch = e->branch;
476                                 intel_pt_insn->length = e->length;
477                                 intel_pt_insn->rel = e->rel;
478                                 intel_pt_log_insn_no_data(intel_pt_insn, *ip);
479                                 return 0;
480                         }
481                 }
482
483                 start_offset = offset;
484                 start_ip = *ip;
485
486                 /* Load maps to ensure dso->is_64_bit has been updated */
487                 map__load(al.map);
488
489                 x86_64 = al.map->dso->is_64_bit;
490
491                 while (1) {
492                         len = dso__data_read_offset(al.map->dso, machine,
493                                                     offset, buf, bufsz);
494                         if (len <= 0)
495                                 return -EINVAL;
496
497                         if (intel_pt_get_insn(buf, len, x86_64, intel_pt_insn))
498                                 return -EINVAL;
499
500                         intel_pt_log_insn(intel_pt_insn, *ip);
501
502                         insn_cnt += 1;
503
504                         if (intel_pt_insn->branch != INTEL_PT_BR_NO_BRANCH)
505                                 goto out;
506
507                         if (max_insn_cnt && insn_cnt >= max_insn_cnt)
508                                 goto out_no_cache;
509
510                         *ip += intel_pt_insn->length;
511
512                         if (to_ip && *ip == to_ip)
513                                 goto out_no_cache;
514
515                         if (*ip >= al.map->end)
516                                 break;
517
518                         offset += intel_pt_insn->length;
519                 }
520                 one_map = false;
521         }
522 out:
523         *insn_cnt_ptr = insn_cnt;
524
525         if (!one_map)
526                 goto out_no_cache;
527
528         /*
529          * Didn't lookup in the 'to_ip' case, so do it now to prevent duplicate
530          * entries.
531          */
532         if (to_ip) {
533                 struct intel_pt_cache_entry *e;
534
535                 e = intel_pt_cache_lookup(al.map->dso, machine, start_offset);
536                 if (e)
537                         return 0;
538         }
539
540         /* Ignore cache errors */
541         intel_pt_cache_add(al.map->dso, machine, start_offset, insn_cnt,
542                            *ip - start_ip, intel_pt_insn);
543
544         return 0;
545
546 out_no_cache:
547         *insn_cnt_ptr = insn_cnt;
548         return 0;
549 }
550
551 static bool intel_pt_get_config(struct intel_pt *pt,
552                                 struct perf_event_attr *attr, u64 *config)
553 {
554         if (attr->type == pt->pmu_type) {
555                 if (config)
556                         *config = attr->config;
557                 return true;
558         }
559
560         return false;
561 }
562
563 static bool intel_pt_exclude_kernel(struct intel_pt *pt)
564 {
565         struct perf_evsel *evsel;
566
567         evlist__for_each_entry(pt->session->evlist, evsel) {
568                 if (intel_pt_get_config(pt, &evsel->attr, NULL) &&
569                     !evsel->attr.exclude_kernel)
570                         return false;
571         }
572         return true;
573 }
574
575 static bool intel_pt_return_compression(struct intel_pt *pt)
576 {
577         struct perf_evsel *evsel;
578         u64 config;
579
580         if (!pt->noretcomp_bit)
581                 return true;
582
583         evlist__for_each_entry(pt->session->evlist, evsel) {
584                 if (intel_pt_get_config(pt, &evsel->attr, &config) &&
585                     (config & pt->noretcomp_bit))
586                         return false;
587         }
588         return true;
589 }
590
591 static unsigned int intel_pt_mtc_period(struct intel_pt *pt)
592 {
593         struct perf_evsel *evsel;
594         unsigned int shift;
595         u64 config;
596
597         if (!pt->mtc_freq_bits)
598                 return 0;
599
600         for (shift = 0, config = pt->mtc_freq_bits; !(config & 1); shift++)
601                 config >>= 1;
602
603         evlist__for_each_entry(pt->session->evlist, evsel) {
604                 if (intel_pt_get_config(pt, &evsel->attr, &config))
605                         return (config & pt->mtc_freq_bits) >> shift;
606         }
607         return 0;
608 }
609
610 static bool intel_pt_timeless_decoding(struct intel_pt *pt)
611 {
612         struct perf_evsel *evsel;
613         bool timeless_decoding = true;
614         u64 config;
615
616         if (!pt->tsc_bit || !pt->cap_user_time_zero)
617                 return true;
618
619         evlist__for_each_entry(pt->session->evlist, evsel) {
620                 if (!(evsel->attr.sample_type & PERF_SAMPLE_TIME))
621                         return true;
622                 if (intel_pt_get_config(pt, &evsel->attr, &config)) {
623                         if (config & pt->tsc_bit)
624                                 timeless_decoding = false;
625                         else
626                                 return true;
627                 }
628         }
629         return timeless_decoding;
630 }
631
632 static bool intel_pt_tracing_kernel(struct intel_pt *pt)
633 {
634         struct perf_evsel *evsel;
635
636         evlist__for_each_entry(pt->session->evlist, evsel) {
637                 if (intel_pt_get_config(pt, &evsel->attr, NULL) &&
638                     !evsel->attr.exclude_kernel)
639                         return true;
640         }
641         return false;
642 }
643
644 static bool intel_pt_have_tsc(struct intel_pt *pt)
645 {
646         struct perf_evsel *evsel;
647         bool have_tsc = false;
648         u64 config;
649
650         if (!pt->tsc_bit)
651                 return false;
652
653         evlist__for_each_entry(pt->session->evlist, evsel) {
654                 if (intel_pt_get_config(pt, &evsel->attr, &config)) {
655                         if (config & pt->tsc_bit)
656                                 have_tsc = true;
657                         else
658                                 return false;
659                 }
660         }
661         return have_tsc;
662 }
663
664 static u64 intel_pt_ns_to_ticks(const struct intel_pt *pt, u64 ns)
665 {
666         u64 quot, rem;
667
668         quot = ns / pt->tc.time_mult;
669         rem  = ns % pt->tc.time_mult;
670         return (quot << pt->tc.time_shift) + (rem << pt->tc.time_shift) /
671                 pt->tc.time_mult;
672 }
673
674 static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt,
675                                                    unsigned int queue_nr)
676 {
677         struct intel_pt_params params = { .get_trace = 0, };
678         struct intel_pt_queue *ptq;
679
680         ptq = zalloc(sizeof(struct intel_pt_queue));
681         if (!ptq)
682                 return NULL;
683
684         if (pt->synth_opts.callchain) {
685                 size_t sz = sizeof(struct ip_callchain);
686
687                 sz += pt->synth_opts.callchain_sz * sizeof(u64);
688                 ptq->chain = zalloc(sz);
689                 if (!ptq->chain)
690                         goto out_free;
691         }
692
693         if (pt->synth_opts.last_branch) {
694                 size_t sz = sizeof(struct branch_stack);
695
696                 sz += pt->synth_opts.last_branch_sz *
697                       sizeof(struct branch_entry);
698                 ptq->last_branch = zalloc(sz);
699                 if (!ptq->last_branch)
700                         goto out_free;
701                 ptq->last_branch_rb = zalloc(sz);
702                 if (!ptq->last_branch_rb)
703                         goto out_free;
704         }
705
706         ptq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE);
707         if (!ptq->event_buf)
708                 goto out_free;
709
710         ptq->pt = pt;
711         ptq->queue_nr = queue_nr;
712         ptq->exclude_kernel = intel_pt_exclude_kernel(pt);
713         ptq->pid = -1;
714         ptq->tid = -1;
715         ptq->cpu = -1;
716         ptq->next_tid = -1;
717
718         params.get_trace = intel_pt_get_trace;
719         params.walk_insn = intel_pt_walk_next_insn;
720         params.data = ptq;
721         params.return_compression = intel_pt_return_compression(pt);
722         params.max_non_turbo_ratio = pt->max_non_turbo_ratio;
723         params.mtc_period = intel_pt_mtc_period(pt);
724         params.tsc_ctc_ratio_n = pt->tsc_ctc_ratio_n;
725         params.tsc_ctc_ratio_d = pt->tsc_ctc_ratio_d;
726
727         if (pt->synth_opts.instructions) {
728                 if (pt->synth_opts.period) {
729                         switch (pt->synth_opts.period_type) {
730                         case PERF_ITRACE_PERIOD_INSTRUCTIONS:
731                                 params.period_type =
732                                                 INTEL_PT_PERIOD_INSTRUCTIONS;
733                                 params.period = pt->synth_opts.period;
734                                 break;
735                         case PERF_ITRACE_PERIOD_TICKS:
736                                 params.period_type = INTEL_PT_PERIOD_TICKS;
737                                 params.period = pt->synth_opts.period;
738                                 break;
739                         case PERF_ITRACE_PERIOD_NANOSECS:
740                                 params.period_type = INTEL_PT_PERIOD_TICKS;
741                                 params.period = intel_pt_ns_to_ticks(pt,
742                                                         pt->synth_opts.period);
743                                 break;
744                         default:
745                                 break;
746                         }
747                 }
748
749                 if (!params.period) {
750                         params.period_type = INTEL_PT_PERIOD_INSTRUCTIONS;
751                         params.period = 1;
752                 }
753         }
754
755         ptq->decoder = intel_pt_decoder_new(&params);
756         if (!ptq->decoder)
757                 goto out_free;
758
759         return ptq;
760
761 out_free:
762         zfree(&ptq->event_buf);
763         zfree(&ptq->last_branch);
764         zfree(&ptq->last_branch_rb);
765         zfree(&ptq->chain);
766         free(ptq);
767         return NULL;
768 }
769
770 static void intel_pt_free_queue(void *priv)
771 {
772         struct intel_pt_queue *ptq = priv;
773
774         if (!ptq)
775                 return;
776         thread__zput(ptq->thread);
777         intel_pt_decoder_free(ptq->decoder);
778         zfree(&ptq->event_buf);
779         zfree(&ptq->last_branch);
780         zfree(&ptq->last_branch_rb);
781         zfree(&ptq->chain);
782         free(ptq);
783 }
784
785 static void intel_pt_set_pid_tid_cpu(struct intel_pt *pt,
786                                      struct auxtrace_queue *queue)
787 {
788         struct intel_pt_queue *ptq = queue->priv;
789
790         if (queue->tid == -1 || pt->have_sched_switch) {
791                 ptq->tid = machine__get_current_tid(pt->machine, ptq->cpu);
792                 thread__zput(ptq->thread);
793         }
794
795         if (!ptq->thread && ptq->tid != -1)
796                 ptq->thread = machine__find_thread(pt->machine, -1, ptq->tid);
797
798         if (ptq->thread) {
799                 ptq->pid = ptq->thread->pid_;
800                 if (queue->cpu == -1)
801                         ptq->cpu = ptq->thread->cpu;
802         }
803 }
804
805 static void intel_pt_sample_flags(struct intel_pt_queue *ptq)
806 {
807         if (ptq->state->flags & INTEL_PT_ABORT_TX) {
808                 ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TX_ABORT;
809         } else if (ptq->state->flags & INTEL_PT_ASYNC) {
810                 if (ptq->state->to_ip)
811                         ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL |
812                                      PERF_IP_FLAG_ASYNC |
813                                      PERF_IP_FLAG_INTERRUPT;
814                 else
815                         ptq->flags = PERF_IP_FLAG_BRANCH |
816                                      PERF_IP_FLAG_TRACE_END;
817                 ptq->insn_len = 0;
818         } else {
819                 if (ptq->state->from_ip)
820                         ptq->flags = intel_pt_insn_type(ptq->state->insn_op);
821                 else
822                         ptq->flags = PERF_IP_FLAG_BRANCH |
823                                      PERF_IP_FLAG_TRACE_BEGIN;
824                 if (ptq->state->flags & INTEL_PT_IN_TX)
825                         ptq->flags |= PERF_IP_FLAG_IN_TX;
826                 ptq->insn_len = ptq->state->insn_len;
827         }
828 }
829
830 static int intel_pt_setup_queue(struct intel_pt *pt,
831                                 struct auxtrace_queue *queue,
832                                 unsigned int queue_nr)
833 {
834         struct intel_pt_queue *ptq = queue->priv;
835
836         if (list_empty(&queue->head))
837                 return 0;
838
839         if (!ptq) {
840                 ptq = intel_pt_alloc_queue(pt, queue_nr);
841                 if (!ptq)
842                         return -ENOMEM;
843                 queue->priv = ptq;
844
845                 if (queue->cpu != -1)
846                         ptq->cpu = queue->cpu;
847                 ptq->tid = queue->tid;
848
849                 if (pt->sampling_mode) {
850                         if (pt->timeless_decoding)
851                                 ptq->step_through_buffers = true;
852                         if (pt->timeless_decoding || !pt->have_sched_switch)
853                                 ptq->use_buffer_pid_tid = true;
854                 }
855         }
856
857         if (!ptq->on_heap &&
858             (!pt->sync_switch ||
859              ptq->switch_state != INTEL_PT_SS_EXPECTING_SWITCH_EVENT)) {
860                 const struct intel_pt_state *state;
861                 int ret;
862
863                 if (pt->timeless_decoding)
864                         return 0;
865
866                 intel_pt_log("queue %u getting timestamp\n", queue_nr);
867                 intel_pt_log("queue %u decoding cpu %d pid %d tid %d\n",
868                              queue_nr, ptq->cpu, ptq->pid, ptq->tid);
869                 while (1) {
870                         state = intel_pt_decode(ptq->decoder);
871                         if (state->err) {
872                                 if (state->err == INTEL_PT_ERR_NODATA) {
873                                         intel_pt_log("queue %u has no timestamp\n",
874                                                      queue_nr);
875                                         return 0;
876                                 }
877                                 continue;
878                         }
879                         if (state->timestamp)
880                                 break;
881                 }
882
883                 ptq->timestamp = state->timestamp;
884                 intel_pt_log("queue %u timestamp 0x%" PRIx64 "\n",
885                              queue_nr, ptq->timestamp);
886                 ptq->state = state;
887                 ptq->have_sample = true;
888                 intel_pt_sample_flags(ptq);
889                 ret = auxtrace_heap__add(&pt->heap, queue_nr, ptq->timestamp);
890                 if (ret)
891                         return ret;
892                 ptq->on_heap = true;
893         }
894
895         return 0;
896 }
897
898 static int intel_pt_setup_queues(struct intel_pt *pt)
899 {
900         unsigned int i;
901         int ret;
902
903         for (i = 0; i < pt->queues.nr_queues; i++) {
904                 ret = intel_pt_setup_queue(pt, &pt->queues.queue_array[i], i);
905                 if (ret)
906                         return ret;
907         }
908         return 0;
909 }
910
911 static inline void intel_pt_copy_last_branch_rb(struct intel_pt_queue *ptq)
912 {
913         struct branch_stack *bs_src = ptq->last_branch_rb;
914         struct branch_stack *bs_dst = ptq->last_branch;
915         size_t nr = 0;
916
917         bs_dst->nr = bs_src->nr;
918
919         if (!bs_src->nr)
920                 return;
921
922         nr = ptq->pt->synth_opts.last_branch_sz - ptq->last_branch_pos;
923         memcpy(&bs_dst->entries[0],
924                &bs_src->entries[ptq->last_branch_pos],
925                sizeof(struct branch_entry) * nr);
926
927         if (bs_src->nr >= ptq->pt->synth_opts.last_branch_sz) {
928                 memcpy(&bs_dst->entries[nr],
929                        &bs_src->entries[0],
930                        sizeof(struct branch_entry) * ptq->last_branch_pos);
931         }
932 }
933
934 static inline void intel_pt_reset_last_branch_rb(struct intel_pt_queue *ptq)
935 {
936         ptq->last_branch_pos = 0;
937         ptq->last_branch_rb->nr = 0;
938 }
939
940 static void intel_pt_update_last_branch_rb(struct intel_pt_queue *ptq)
941 {
942         const struct intel_pt_state *state = ptq->state;
943         struct branch_stack *bs = ptq->last_branch_rb;
944         struct branch_entry *be;
945
946         if (!ptq->last_branch_pos)
947                 ptq->last_branch_pos = ptq->pt->synth_opts.last_branch_sz;
948
949         ptq->last_branch_pos -= 1;
950
951         be              = &bs->entries[ptq->last_branch_pos];
952         be->from        = state->from_ip;
953         be->to          = state->to_ip;
954         be->flags.abort = !!(state->flags & INTEL_PT_ABORT_TX);
955         be->flags.in_tx = !!(state->flags & INTEL_PT_IN_TX);
956         /* No support for mispredict */
957         be->flags.mispred = ptq->pt->mispred_all;
958
959         if (bs->nr < ptq->pt->synth_opts.last_branch_sz)
960                 bs->nr += 1;
961 }
962
963 static int intel_pt_inject_event(union perf_event *event,
964                                  struct perf_sample *sample, u64 type,
965                                  bool swapped)
966 {
967         event->header.size = perf_event__sample_event_size(sample, type, 0);
968         return perf_event__synthesize_sample(event, type, 0, sample, swapped);
969 }
970
971 static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq)
972 {
973         int ret;
974         struct intel_pt *pt = ptq->pt;
975         union perf_event *event = ptq->event_buf;
976         struct perf_sample sample = { .ip = 0, };
977         struct dummy_branch_stack {
978                 u64                     nr;
979                 struct branch_entry     entries;
980         } dummy_bs;
981
982         if (pt->branches_filter && !(pt->branches_filter & ptq->flags))
983                 return 0;
984
985         if (pt->synth_opts.initial_skip &&
986             pt->num_events++ < pt->synth_opts.initial_skip)
987                 return 0;
988
989         event->sample.header.type = PERF_RECORD_SAMPLE;
990         event->sample.header.misc = PERF_RECORD_MISC_USER;
991         event->sample.header.size = sizeof(struct perf_event_header);
992
993         if (!pt->timeless_decoding)
994                 sample.time = tsc_to_perf_time(ptq->timestamp, &pt->tc);
995
996         sample.cpumode = PERF_RECORD_MISC_USER;
997         sample.ip = ptq->state->from_ip;
998         sample.pid = ptq->pid;
999         sample.tid = ptq->tid;
1000         sample.addr = ptq->state->to_ip;
1001         sample.id = ptq->pt->branches_id;
1002         sample.stream_id = ptq->pt->branches_id;
1003         sample.period = 1;
1004         sample.cpu = ptq->cpu;
1005         sample.flags = ptq->flags;
1006         sample.insn_len = ptq->insn_len;
1007
1008         /*
1009          * perf report cannot handle events without a branch stack when using
1010          * SORT_MODE__BRANCH so make a dummy one.
1011          */
1012         if (pt->synth_opts.last_branch && sort__mode == SORT_MODE__BRANCH) {
1013                 dummy_bs = (struct dummy_branch_stack){
1014                         .nr = 1,
1015                         .entries = {
1016                                 .from = sample.ip,
1017                                 .to = sample.addr,
1018                         },
1019                 };
1020                 sample.branch_stack = (struct branch_stack *)&dummy_bs;
1021         }
1022
1023         if (pt->synth_opts.inject) {
1024                 ret = intel_pt_inject_event(event, &sample,
1025                                             pt->branches_sample_type,
1026                                             pt->synth_needs_swap);
1027                 if (ret)
1028                         return ret;
1029         }
1030
1031         ret = perf_session__deliver_synth_event(pt->session, event, &sample);
1032         if (ret)
1033                 pr_err("Intel Processor Trace: failed to deliver branch event, error %d\n",
1034                        ret);
1035
1036         return ret;
1037 }
1038
1039 static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq)
1040 {
1041         int ret;
1042         struct intel_pt *pt = ptq->pt;
1043         union perf_event *event = ptq->event_buf;
1044         struct perf_sample sample = { .ip = 0, };
1045
1046         if (pt->synth_opts.initial_skip &&
1047             pt->num_events++ < pt->synth_opts.initial_skip)
1048                 return 0;
1049
1050         event->sample.header.type = PERF_RECORD_SAMPLE;
1051         event->sample.header.misc = PERF_RECORD_MISC_USER;
1052         event->sample.header.size = sizeof(struct perf_event_header);
1053
1054         if (!pt->timeless_decoding)
1055                 sample.time = tsc_to_perf_time(ptq->timestamp, &pt->tc);
1056
1057         sample.cpumode = PERF_RECORD_MISC_USER;
1058         sample.ip = ptq->state->from_ip;
1059         sample.pid = ptq->pid;
1060         sample.tid = ptq->tid;
1061         sample.addr = ptq->state->to_ip;
1062         sample.id = ptq->pt->instructions_id;
1063         sample.stream_id = ptq->pt->instructions_id;
1064         sample.period = ptq->state->tot_insn_cnt - ptq->last_insn_cnt;
1065         sample.cpu = ptq->cpu;
1066         sample.flags = ptq->flags;
1067         sample.insn_len = ptq->insn_len;
1068
1069         ptq->last_insn_cnt = ptq->state->tot_insn_cnt;
1070
1071         if (pt->synth_opts.callchain) {
1072                 thread_stack__sample(ptq->thread, ptq->chain,
1073                                      pt->synth_opts.callchain_sz, sample.ip);
1074                 sample.callchain = ptq->chain;
1075         }
1076
1077         if (pt->synth_opts.last_branch) {
1078                 intel_pt_copy_last_branch_rb(ptq);
1079                 sample.branch_stack = ptq->last_branch;
1080         }
1081
1082         if (pt->synth_opts.inject) {
1083                 ret = intel_pt_inject_event(event, &sample,
1084                                             pt->instructions_sample_type,
1085                                             pt->synth_needs_swap);
1086                 if (ret)
1087                         return ret;
1088         }
1089
1090         ret = perf_session__deliver_synth_event(pt->session, event, &sample);
1091         if (ret)
1092                 pr_err("Intel Processor Trace: failed to deliver instruction event, error %d\n",
1093                        ret);
1094
1095         if (pt->synth_opts.last_branch)
1096                 intel_pt_reset_last_branch_rb(ptq);
1097
1098         return ret;
1099 }
1100
1101 static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq)
1102 {
1103         int ret;
1104         struct intel_pt *pt = ptq->pt;
1105         union perf_event *event = ptq->event_buf;
1106         struct perf_sample sample = { .ip = 0, };
1107
1108         if (pt->synth_opts.initial_skip &&
1109             pt->num_events++ < pt->synth_opts.initial_skip)
1110                 return 0;
1111
1112         event->sample.header.type = PERF_RECORD_SAMPLE;
1113         event->sample.header.misc = PERF_RECORD_MISC_USER;
1114         event->sample.header.size = sizeof(struct perf_event_header);
1115
1116         if (!pt->timeless_decoding)
1117                 sample.time = tsc_to_perf_time(ptq->timestamp, &pt->tc);
1118
1119         sample.cpumode = PERF_RECORD_MISC_USER;
1120         sample.ip = ptq->state->from_ip;
1121         sample.pid = ptq->pid;
1122         sample.tid = ptq->tid;
1123         sample.addr = ptq->state->to_ip;
1124         sample.id = ptq->pt->transactions_id;
1125         sample.stream_id = ptq->pt->transactions_id;
1126         sample.period = 1;
1127         sample.cpu = ptq->cpu;
1128         sample.flags = ptq->flags;
1129         sample.insn_len = ptq->insn_len;
1130
1131         if (pt->synth_opts.callchain) {
1132                 thread_stack__sample(ptq->thread, ptq->chain,
1133                                      pt->synth_opts.callchain_sz, sample.ip);
1134                 sample.callchain = ptq->chain;
1135         }
1136
1137         if (pt->synth_opts.last_branch) {
1138                 intel_pt_copy_last_branch_rb(ptq);
1139                 sample.branch_stack = ptq->last_branch;
1140         }
1141
1142         if (pt->synth_opts.inject) {
1143                 ret = intel_pt_inject_event(event, &sample,
1144                                             pt->transactions_sample_type,
1145                                             pt->synth_needs_swap);
1146                 if (ret)
1147                         return ret;
1148         }
1149
1150         ret = perf_session__deliver_synth_event(pt->session, event, &sample);
1151         if (ret)
1152                 pr_err("Intel Processor Trace: failed to deliver transaction event, error %d\n",
1153                        ret);
1154
1155         if (pt->synth_opts.last_branch)
1156                 intel_pt_reset_last_branch_rb(ptq);
1157
1158         return ret;
1159 }
1160
1161 static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu,
1162                                 pid_t pid, pid_t tid, u64 ip)
1163 {
1164         union perf_event event;
1165         char msg[MAX_AUXTRACE_ERROR_MSG];
1166         int err;
1167
1168         intel_pt__strerror(code, msg, MAX_AUXTRACE_ERROR_MSG);
1169
1170         auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE,
1171                              code, cpu, pid, tid, ip, msg);
1172
1173         err = perf_session__deliver_synth_event(pt->session, &event, NULL);
1174         if (err)
1175                 pr_err("Intel Processor Trace: failed to deliver error event, error %d\n",
1176                        err);
1177
1178         return err;
1179 }
1180
1181 static int intel_pt_next_tid(struct intel_pt *pt, struct intel_pt_queue *ptq)
1182 {
1183         struct auxtrace_queue *queue;
1184         pid_t tid = ptq->next_tid;
1185         int err;
1186
1187         if (tid == -1)
1188                 return 0;
1189
1190         intel_pt_log("switch: cpu %d tid %d\n", ptq->cpu, tid);
1191
1192         err = machine__set_current_tid(pt->machine, ptq->cpu, -1, tid);
1193
1194         queue = &pt->queues.queue_array[ptq->queue_nr];
1195         intel_pt_set_pid_tid_cpu(pt, queue);
1196
1197         ptq->next_tid = -1;
1198
1199         return err;
1200 }
1201
1202 static inline bool intel_pt_is_switch_ip(struct intel_pt_queue *ptq, u64 ip)
1203 {
1204         struct intel_pt *pt = ptq->pt;
1205
1206         return ip == pt->switch_ip &&
1207                (ptq->flags & PERF_IP_FLAG_BRANCH) &&
1208                !(ptq->flags & (PERF_IP_FLAG_CONDITIONAL | PERF_IP_FLAG_ASYNC |
1209                                PERF_IP_FLAG_INTERRUPT | PERF_IP_FLAG_TX_ABORT));
1210 }
1211
1212 static int intel_pt_sample(struct intel_pt_queue *ptq)
1213 {
1214         const struct intel_pt_state *state = ptq->state;
1215         struct intel_pt *pt = ptq->pt;
1216         int err;
1217
1218         if (!ptq->have_sample)
1219                 return 0;
1220
1221         ptq->have_sample = false;
1222
1223         if (pt->sample_instructions &&
1224             (state->type & INTEL_PT_INSTRUCTION) &&
1225             (!pt->synth_opts.initial_skip ||
1226              pt->num_events++ >= pt->synth_opts.initial_skip)) {
1227                 err = intel_pt_synth_instruction_sample(ptq);
1228                 if (err)
1229                         return err;
1230         }
1231
1232         if (pt->sample_transactions &&
1233             (state->type & INTEL_PT_TRANSACTION) &&
1234             (!pt->synth_opts.initial_skip ||
1235              pt->num_events++ >= pt->synth_opts.initial_skip)) {
1236                 err = intel_pt_synth_transaction_sample(ptq);
1237                 if (err)
1238                         return err;
1239         }
1240
1241         if (!(state->type & INTEL_PT_BRANCH))
1242                 return 0;
1243
1244         if (pt->synth_opts.callchain || pt->synth_opts.thread_stack)
1245                 thread_stack__event(ptq->thread, ptq->flags, state->from_ip,
1246                                     state->to_ip, ptq->insn_len,
1247                                     state->trace_nr);
1248         else
1249                 thread_stack__set_trace_nr(ptq->thread, state->trace_nr);
1250
1251         if (pt->sample_branches) {
1252                 err = intel_pt_synth_branch_sample(ptq);
1253                 if (err)
1254                         return err;
1255         }
1256
1257         if (pt->synth_opts.last_branch)
1258                 intel_pt_update_last_branch_rb(ptq);
1259
1260         if (!pt->sync_switch)
1261                 return 0;
1262
1263         if (intel_pt_is_switch_ip(ptq, state->to_ip)) {
1264                 switch (ptq->switch_state) {
1265                 case INTEL_PT_SS_UNKNOWN:
1266                 case INTEL_PT_SS_EXPECTING_SWITCH_IP:
1267                         err = intel_pt_next_tid(pt, ptq);
1268                         if (err)
1269                                 return err;
1270                         ptq->switch_state = INTEL_PT_SS_TRACING;
1271                         break;
1272                 default:
1273                         ptq->switch_state = INTEL_PT_SS_EXPECTING_SWITCH_EVENT;
1274                         return 1;
1275                 }
1276         } else if (!state->to_ip) {
1277                 ptq->switch_state = INTEL_PT_SS_NOT_TRACING;
1278         } else if (ptq->switch_state == INTEL_PT_SS_NOT_TRACING) {
1279                 ptq->switch_state = INTEL_PT_SS_UNKNOWN;
1280         } else if (ptq->switch_state == INTEL_PT_SS_UNKNOWN &&
1281                    state->to_ip == pt->ptss_ip &&
1282                    (ptq->flags & PERF_IP_FLAG_CALL)) {
1283                 ptq->switch_state = INTEL_PT_SS_TRACING;
1284         }
1285
1286         return 0;
1287 }
1288
1289 static u64 intel_pt_switch_ip(struct intel_pt *pt, u64 *ptss_ip)
1290 {
1291         struct machine *machine = pt->machine;
1292         struct map *map;
1293         struct symbol *sym, *start;
1294         u64 ip, switch_ip = 0;
1295         const char *ptss;
1296
1297         if (ptss_ip)
1298                 *ptss_ip = 0;
1299
1300         map = machine__kernel_map(machine);
1301         if (!map)
1302                 return 0;
1303
1304         if (map__load(map))
1305                 return 0;
1306
1307         start = dso__first_symbol(map->dso, MAP__FUNCTION);
1308
1309         for (sym = start; sym; sym = dso__next_symbol(sym)) {
1310                 if (sym->binding == STB_GLOBAL &&
1311                     !strcmp(sym->name, "__switch_to")) {
1312                         ip = map->unmap_ip(map, sym->start);
1313                         if (ip >= map->start && ip < map->end) {
1314                                 switch_ip = ip;
1315                                 break;
1316                         }
1317                 }
1318         }
1319
1320         if (!switch_ip || !ptss_ip)
1321                 return 0;
1322
1323         if (pt->have_sched_switch == 1)
1324                 ptss = "perf_trace_sched_switch";
1325         else
1326                 ptss = "__perf_event_task_sched_out";
1327
1328         for (sym = start; sym; sym = dso__next_symbol(sym)) {
1329                 if (!strcmp(sym->name, ptss)) {
1330                         ip = map->unmap_ip(map, sym->start);
1331                         if (ip >= map->start && ip < map->end) {
1332                                 *ptss_ip = ip;
1333                                 break;
1334                         }
1335                 }
1336         }
1337
1338         return switch_ip;
1339 }
1340
1341 static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp)
1342 {
1343         const struct intel_pt_state *state = ptq->state;
1344         struct intel_pt *pt = ptq->pt;
1345         int err;
1346
1347         if (!pt->kernel_start) {
1348                 pt->kernel_start = machine__kernel_start(pt->machine);
1349                 if (pt->per_cpu_mmaps &&
1350                     (pt->have_sched_switch == 1 || pt->have_sched_switch == 3) &&
1351                     !pt->timeless_decoding && intel_pt_tracing_kernel(pt) &&
1352                     !pt->sampling_mode) {
1353                         pt->switch_ip = intel_pt_switch_ip(pt, &pt->ptss_ip);
1354                         if (pt->switch_ip) {
1355                                 intel_pt_log("switch_ip: %"PRIx64" ptss_ip: %"PRIx64"\n",
1356                                              pt->switch_ip, pt->ptss_ip);
1357                                 pt->sync_switch = true;
1358                         }
1359                 }
1360         }
1361
1362         intel_pt_log("queue %u decoding cpu %d pid %d tid %d\n",
1363                      ptq->queue_nr, ptq->cpu, ptq->pid, ptq->tid);
1364         while (1) {
1365                 err = intel_pt_sample(ptq);
1366                 if (err)
1367                         return err;
1368
1369                 state = intel_pt_decode(ptq->decoder);
1370                 if (state->err) {
1371                         if (state->err == INTEL_PT_ERR_NODATA)
1372                                 return 1;
1373                         if (pt->sync_switch &&
1374                             state->from_ip >= pt->kernel_start) {
1375                                 pt->sync_switch = false;
1376                                 intel_pt_next_tid(pt, ptq);
1377                         }
1378                         if (pt->synth_opts.errors) {
1379                                 err = intel_pt_synth_error(pt, state->err,
1380                                                            ptq->cpu, ptq->pid,
1381                                                            ptq->tid,
1382                                                            state->from_ip);
1383                                 if (err)
1384                                         return err;
1385                         }
1386                         continue;
1387                 }
1388
1389                 ptq->state = state;
1390                 ptq->have_sample = true;
1391                 intel_pt_sample_flags(ptq);
1392
1393                 /* Use estimated TSC upon return to user space */
1394                 if (pt->est_tsc &&
1395                     (state->from_ip >= pt->kernel_start || !state->from_ip) &&
1396                     state->to_ip && state->to_ip < pt->kernel_start) {
1397                         intel_pt_log("TSC %"PRIx64" est. TSC %"PRIx64"\n",
1398                                      state->timestamp, state->est_timestamp);
1399                         ptq->timestamp = state->est_timestamp;
1400                 /* Use estimated TSC in unknown switch state */
1401                 } else if (pt->sync_switch &&
1402                            ptq->switch_state == INTEL_PT_SS_UNKNOWN &&
1403                            intel_pt_is_switch_ip(ptq, state->to_ip) &&
1404                            ptq->next_tid == -1) {
1405                         intel_pt_log("TSC %"PRIx64" est. TSC %"PRIx64"\n",
1406                                      state->timestamp, state->est_timestamp);
1407                         ptq->timestamp = state->est_timestamp;
1408                 } else if (state->timestamp > ptq->timestamp) {
1409                         ptq->timestamp = state->timestamp;
1410                 }
1411
1412                 if (!pt->timeless_decoding && ptq->timestamp >= *timestamp) {
1413                         *timestamp = ptq->timestamp;
1414                         return 0;
1415                 }
1416         }
1417         return 0;
1418 }
1419
1420 static inline int intel_pt_update_queues(struct intel_pt *pt)
1421 {
1422         if (pt->queues.new_data) {
1423                 pt->queues.new_data = false;
1424                 return intel_pt_setup_queues(pt);
1425         }
1426         return 0;
1427 }
1428
1429 static int intel_pt_process_queues(struct intel_pt *pt, u64 timestamp)
1430 {
1431         unsigned int queue_nr;
1432         u64 ts;
1433         int ret;
1434
1435         while (1) {
1436                 struct auxtrace_queue *queue;
1437                 struct intel_pt_queue *ptq;
1438
1439                 if (!pt->heap.heap_cnt)
1440                         return 0;
1441
1442                 if (pt->heap.heap_array[0].ordinal >= timestamp)
1443                         return 0;
1444
1445                 queue_nr = pt->heap.heap_array[0].queue_nr;
1446                 queue = &pt->queues.queue_array[queue_nr];
1447                 ptq = queue->priv;
1448
1449                 intel_pt_log("queue %u processing 0x%" PRIx64 " to 0x%" PRIx64 "\n",
1450                              queue_nr, pt->heap.heap_array[0].ordinal,
1451                              timestamp);
1452
1453                 auxtrace_heap__pop(&pt->heap);
1454
1455                 if (pt->heap.heap_cnt) {
1456                         ts = pt->heap.heap_array[0].ordinal + 1;
1457                         if (ts > timestamp)
1458                                 ts = timestamp;
1459                 } else {
1460                         ts = timestamp;
1461                 }
1462
1463                 intel_pt_set_pid_tid_cpu(pt, queue);
1464
1465                 ret = intel_pt_run_decoder(ptq, &ts);
1466
1467                 if (ret < 0) {
1468                         auxtrace_heap__add(&pt->heap, queue_nr, ts);
1469                         return ret;
1470                 }
1471
1472                 if (!ret) {
1473                         ret = auxtrace_heap__add(&pt->heap, queue_nr, ts);
1474                         if (ret < 0)
1475                                 return ret;
1476                 } else {
1477                         ptq->on_heap = false;
1478                 }
1479         }
1480
1481         return 0;
1482 }
1483
1484 static int intel_pt_process_timeless_queues(struct intel_pt *pt, pid_t tid,
1485                                             u64 time_)
1486 {
1487         struct auxtrace_queues *queues = &pt->queues;
1488         unsigned int i;
1489         u64 ts = 0;
1490
1491         for (i = 0; i < queues->nr_queues; i++) {
1492                 struct auxtrace_queue *queue = &pt->queues.queue_array[i];
1493                 struct intel_pt_queue *ptq = queue->priv;
1494
1495                 if (ptq && (tid == -1 || ptq->tid == tid)) {
1496                         ptq->time = time_;
1497                         intel_pt_set_pid_tid_cpu(pt, queue);
1498                         intel_pt_run_decoder(ptq, &ts);
1499                 }
1500         }
1501         return 0;
1502 }
1503
1504 static int intel_pt_lost(struct intel_pt *pt, struct perf_sample *sample)
1505 {
1506         return intel_pt_synth_error(pt, INTEL_PT_ERR_LOST, sample->cpu,
1507                                     sample->pid, sample->tid, 0);
1508 }
1509
1510 static struct intel_pt_queue *intel_pt_cpu_to_ptq(struct intel_pt *pt, int cpu)
1511 {
1512         unsigned i, j;
1513
1514         if (cpu < 0 || !pt->queues.nr_queues)
1515                 return NULL;
1516
1517         if ((unsigned)cpu >= pt->queues.nr_queues)
1518                 i = pt->queues.nr_queues - 1;
1519         else
1520                 i = cpu;
1521
1522         if (pt->queues.queue_array[i].cpu == cpu)
1523                 return pt->queues.queue_array[i].priv;
1524
1525         for (j = 0; i > 0; j++) {
1526                 if (pt->queues.queue_array[--i].cpu == cpu)
1527                         return pt->queues.queue_array[i].priv;
1528         }
1529
1530         for (; j < pt->queues.nr_queues; j++) {
1531                 if (pt->queues.queue_array[j].cpu == cpu)
1532                         return pt->queues.queue_array[j].priv;
1533         }
1534
1535         return NULL;
1536 }
1537
1538 static int intel_pt_sync_switch(struct intel_pt *pt, int cpu, pid_t tid,
1539                                 u64 timestamp)
1540 {
1541         struct intel_pt_queue *ptq;
1542         int err;
1543
1544         if (!pt->sync_switch)
1545                 return 1;
1546
1547         ptq = intel_pt_cpu_to_ptq(pt, cpu);
1548         if (!ptq)
1549                 return 1;
1550
1551         switch (ptq->switch_state) {
1552         case INTEL_PT_SS_NOT_TRACING:
1553                 ptq->next_tid = -1;
1554                 break;
1555         case INTEL_PT_SS_UNKNOWN:
1556         case INTEL_PT_SS_TRACING:
1557                 ptq->next_tid = tid;
1558                 ptq->switch_state = INTEL_PT_SS_EXPECTING_SWITCH_IP;
1559                 return 0;
1560         case INTEL_PT_SS_EXPECTING_SWITCH_EVENT:
1561                 if (!ptq->on_heap) {
1562                         ptq->timestamp = perf_time_to_tsc(timestamp,
1563                                                           &pt->tc);
1564                         err = auxtrace_heap__add(&pt->heap, ptq->queue_nr,
1565                                                  ptq->timestamp);
1566                         if (err)
1567                                 return err;
1568                         ptq->on_heap = true;
1569                 }
1570                 ptq->switch_state = INTEL_PT_SS_TRACING;
1571                 break;
1572         case INTEL_PT_SS_EXPECTING_SWITCH_IP:
1573                 ptq->next_tid = tid;
1574                 intel_pt_log("ERROR: cpu %d expecting switch ip\n", cpu);
1575                 break;
1576         default:
1577                 break;
1578         }
1579
1580         return 1;
1581 }
1582
1583 static int intel_pt_process_switch(struct intel_pt *pt,
1584                                    struct perf_sample *sample)
1585 {
1586         struct perf_evsel *evsel;
1587         pid_t tid;
1588         int cpu, ret;
1589
1590         evsel = perf_evlist__id2evsel(pt->session->evlist, sample->id);
1591         if (evsel != pt->switch_evsel)
1592                 return 0;
1593
1594         tid = perf_evsel__intval(evsel, sample, "next_pid");
1595         cpu = sample->cpu;
1596
1597         intel_pt_log("sched_switch: cpu %d tid %d time %"PRIu64" tsc %#"PRIx64"\n",
1598                      cpu, tid, sample->time, perf_time_to_tsc(sample->time,
1599                      &pt->tc));
1600
1601         ret = intel_pt_sync_switch(pt, cpu, tid, sample->time);
1602         if (ret <= 0)
1603                 return ret;
1604
1605         return machine__set_current_tid(pt->machine, cpu, -1, tid);
1606 }
1607
1608 static int intel_pt_context_switch(struct intel_pt *pt, union perf_event *event,
1609                                    struct perf_sample *sample)
1610 {
1611         bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT;
1612         pid_t pid, tid;
1613         int cpu, ret;
1614
1615         cpu = sample->cpu;
1616
1617         if (pt->have_sched_switch == 3) {
1618                 if (!out)
1619                         return 0;
1620                 if (event->header.type != PERF_RECORD_SWITCH_CPU_WIDE) {
1621                         pr_err("Expecting CPU-wide context switch event\n");
1622                         return -EINVAL;
1623                 }
1624                 pid = event->context_switch.next_prev_pid;
1625                 tid = event->context_switch.next_prev_tid;
1626         } else {
1627                 if (out)
1628                         return 0;
1629                 pid = sample->pid;
1630                 tid = sample->tid;
1631         }
1632
1633         if (tid == -1) {
1634                 pr_err("context_switch event has no tid\n");
1635                 return -EINVAL;
1636         }
1637
1638         intel_pt_log("context_switch: cpu %d pid %d tid %d time %"PRIu64" tsc %#"PRIx64"\n",
1639                      cpu, pid, tid, sample->time, perf_time_to_tsc(sample->time,
1640                      &pt->tc));
1641
1642         ret = intel_pt_sync_switch(pt, cpu, tid, sample->time);
1643         if (ret <= 0)
1644                 return ret;
1645
1646         return machine__set_current_tid(pt->machine, cpu, pid, tid);
1647 }
1648
1649 static int intel_pt_process_itrace_start(struct intel_pt *pt,
1650                                          union perf_event *event,
1651                                          struct perf_sample *sample)
1652 {
1653         if (!pt->per_cpu_mmaps)
1654                 return 0;
1655
1656         intel_pt_log("itrace_start: cpu %d pid %d tid %d time %"PRIu64" tsc %#"PRIx64"\n",
1657                      sample->cpu, event->itrace_start.pid,
1658                      event->itrace_start.tid, sample->time,
1659                      perf_time_to_tsc(sample->time, &pt->tc));
1660
1661         return machine__set_current_tid(pt->machine, sample->cpu,
1662                                         event->itrace_start.pid,
1663                                         event->itrace_start.tid);
1664 }
1665
1666 static int intel_pt_process_event(struct perf_session *session,
1667                                   union perf_event *event,
1668                                   struct perf_sample *sample,
1669                                   struct perf_tool *tool)
1670 {
1671         struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
1672                                            auxtrace);
1673         u64 timestamp;
1674         int err = 0;
1675
1676         if (dump_trace)
1677                 return 0;
1678
1679         if (!tool->ordered_events) {
1680                 pr_err("Intel Processor Trace requires ordered events\n");
1681                 return -EINVAL;
1682         }
1683
1684         if (sample->time && sample->time != (u64)-1)
1685                 timestamp = perf_time_to_tsc(sample->time, &pt->tc);
1686         else
1687                 timestamp = 0;
1688
1689         if (timestamp || pt->timeless_decoding) {
1690                 err = intel_pt_update_queues(pt);
1691                 if (err)
1692                         return err;
1693         }
1694
1695         if (pt->timeless_decoding) {
1696                 if (event->header.type == PERF_RECORD_EXIT) {
1697                         err = intel_pt_process_timeless_queues(pt,
1698                                                                event->fork.tid,
1699                                                                sample->time);
1700                 }
1701         } else if (timestamp) {
1702                 err = intel_pt_process_queues(pt, timestamp);
1703         }
1704         if (err)
1705                 return err;
1706
1707         if (event->header.type == PERF_RECORD_AUX &&
1708             (event->aux.flags & PERF_AUX_FLAG_TRUNCATED) &&
1709             pt->synth_opts.errors) {
1710                 err = intel_pt_lost(pt, sample);
1711                 if (err)
1712                         return err;
1713         }
1714
1715         if (pt->switch_evsel && event->header.type == PERF_RECORD_SAMPLE)
1716                 err = intel_pt_process_switch(pt, sample);
1717         else if (event->header.type == PERF_RECORD_ITRACE_START)
1718                 err = intel_pt_process_itrace_start(pt, event, sample);
1719         else if (event->header.type == PERF_RECORD_SWITCH ||
1720                  event->header.type == PERF_RECORD_SWITCH_CPU_WIDE)
1721                 err = intel_pt_context_switch(pt, event, sample);
1722
1723         intel_pt_log("event %s (%u): cpu %d time %"PRIu64" tsc %#"PRIx64"\n",
1724                      perf_event__name(event->header.type), event->header.type,
1725                      sample->cpu, sample->time, timestamp);
1726
1727         return err;
1728 }
1729
1730 static int intel_pt_flush(struct perf_session *session, struct perf_tool *tool)
1731 {
1732         struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
1733                                            auxtrace);
1734         int ret;
1735
1736         if (dump_trace)
1737                 return 0;
1738
1739         if (!tool->ordered_events)
1740                 return -EINVAL;
1741
1742         ret = intel_pt_update_queues(pt);
1743         if (ret < 0)
1744                 return ret;
1745
1746         if (pt->timeless_decoding)
1747                 return intel_pt_process_timeless_queues(pt, -1,
1748                                                         MAX_TIMESTAMP - 1);
1749
1750         return intel_pt_process_queues(pt, MAX_TIMESTAMP);
1751 }
1752
1753 static void intel_pt_free_events(struct perf_session *session)
1754 {
1755         struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
1756                                            auxtrace);
1757         struct auxtrace_queues *queues = &pt->queues;
1758         unsigned int i;
1759
1760         for (i = 0; i < queues->nr_queues; i++) {
1761                 intel_pt_free_queue(queues->queue_array[i].priv);
1762                 queues->queue_array[i].priv = NULL;
1763         }
1764         intel_pt_log_disable();
1765         auxtrace_queues__free(queues);
1766 }
1767
1768 static void intel_pt_free(struct perf_session *session)
1769 {
1770         struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
1771                                            auxtrace);
1772
1773         auxtrace_heap__free(&pt->heap);
1774         intel_pt_free_events(session);
1775         session->auxtrace = NULL;
1776         thread__put(pt->unknown_thread);
1777         free(pt);
1778 }
1779
1780 static int intel_pt_process_auxtrace_event(struct perf_session *session,
1781                                            union perf_event *event,
1782                                            struct perf_tool *tool __maybe_unused)
1783 {
1784         struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
1785                                            auxtrace);
1786
1787         if (pt->sampling_mode)
1788                 return 0;
1789
1790         if (!pt->data_queued) {
1791                 struct auxtrace_buffer *buffer;
1792                 off_t data_offset;
1793                 int fd = perf_data_file__fd(session->file);
1794                 int err;
1795
1796                 if (perf_data_file__is_pipe(session->file)) {
1797                         data_offset = 0;
1798                 } else {
1799                         data_offset = lseek(fd, 0, SEEK_CUR);
1800                         if (data_offset == -1)
1801                                 return -errno;
1802                 }
1803
1804                 err = auxtrace_queues__add_event(&pt->queues, session, event,
1805                                                  data_offset, &buffer);
1806                 if (err)
1807                         return err;
1808
1809                 /* Dump here now we have copied a piped trace out of the pipe */
1810                 if (dump_trace) {
1811                         if (auxtrace_buffer__get_data(buffer, fd)) {
1812                                 intel_pt_dump_event(pt, buffer->data,
1813                                                     buffer->size);
1814                                 auxtrace_buffer__put_data(buffer);
1815                         }
1816                 }
1817         }
1818
1819         return 0;
1820 }
1821
1822 struct intel_pt_synth {
1823         struct perf_tool dummy_tool;
1824         struct perf_session *session;
1825 };
1826
1827 static int intel_pt_event_synth(struct perf_tool *tool,
1828                                 union perf_event *event,
1829                                 struct perf_sample *sample __maybe_unused,
1830                                 struct machine *machine __maybe_unused)
1831 {
1832         struct intel_pt_synth *intel_pt_synth =
1833                         container_of(tool, struct intel_pt_synth, dummy_tool);
1834
1835         return perf_session__deliver_synth_event(intel_pt_synth->session, event,
1836                                                  NULL);
1837 }
1838
1839 static int intel_pt_synth_event(struct perf_session *session,
1840                                 struct perf_event_attr *attr, u64 id)
1841 {
1842         struct intel_pt_synth intel_pt_synth;
1843
1844         memset(&intel_pt_synth, 0, sizeof(struct intel_pt_synth));
1845         intel_pt_synth.session = session;
1846
1847         return perf_event__synthesize_attr(&intel_pt_synth.dummy_tool, attr, 1,
1848                                            &id, intel_pt_event_synth);
1849 }
1850
1851 static int intel_pt_synth_events(struct intel_pt *pt,
1852                                  struct perf_session *session)
1853 {
1854         struct perf_evlist *evlist = session->evlist;
1855         struct perf_evsel *evsel;
1856         struct perf_event_attr attr;
1857         bool found = false;
1858         u64 id;
1859         int err;
1860
1861         evlist__for_each_entry(evlist, evsel) {
1862                 if (evsel->attr.type == pt->pmu_type && evsel->ids) {
1863                         found = true;
1864                         break;
1865                 }
1866         }
1867
1868         if (!found) {
1869                 pr_debug("There are no selected events with Intel Processor Trace data\n");
1870                 return 0;
1871         }
1872
1873         memset(&attr, 0, sizeof(struct perf_event_attr));
1874         attr.size = sizeof(struct perf_event_attr);
1875         attr.type = PERF_TYPE_HARDWARE;
1876         attr.sample_type = evsel->attr.sample_type & PERF_SAMPLE_MASK;
1877         attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID |
1878                             PERF_SAMPLE_PERIOD;
1879         if (pt->timeless_decoding)
1880                 attr.sample_type &= ~(u64)PERF_SAMPLE_TIME;
1881         else
1882                 attr.sample_type |= PERF_SAMPLE_TIME;
1883         if (!pt->per_cpu_mmaps)
1884                 attr.sample_type &= ~(u64)PERF_SAMPLE_CPU;
1885         attr.exclude_user = evsel->attr.exclude_user;
1886         attr.exclude_kernel = evsel->attr.exclude_kernel;
1887         attr.exclude_hv = evsel->attr.exclude_hv;
1888         attr.exclude_host = evsel->attr.exclude_host;
1889         attr.exclude_guest = evsel->attr.exclude_guest;
1890         attr.sample_id_all = evsel->attr.sample_id_all;
1891         attr.read_format = evsel->attr.read_format;
1892
1893         id = evsel->id[0] + 1000000000;
1894         if (!id)
1895                 id = 1;
1896
1897         if (pt->synth_opts.instructions) {
1898                 attr.config = PERF_COUNT_HW_INSTRUCTIONS;
1899                 if (pt->synth_opts.period_type == PERF_ITRACE_PERIOD_NANOSECS)
1900                         attr.sample_period =
1901                                 intel_pt_ns_to_ticks(pt, pt->synth_opts.period);
1902                 else
1903                         attr.sample_period = pt->synth_opts.period;
1904                 pt->instructions_sample_period = attr.sample_period;
1905                 if (pt->synth_opts.callchain)
1906                         attr.sample_type |= PERF_SAMPLE_CALLCHAIN;
1907                 if (pt->synth_opts.last_branch)
1908                         attr.sample_type |= PERF_SAMPLE_BRANCH_STACK;
1909                 pr_debug("Synthesizing 'instructions' event with id %" PRIu64 " sample type %#" PRIx64 "\n",
1910                          id, (u64)attr.sample_type);
1911                 err = intel_pt_synth_event(session, &attr, id);
1912                 if (err) {
1913                         pr_err("%s: failed to synthesize 'instructions' event type\n",
1914                                __func__);
1915                         return err;
1916                 }
1917                 pt->sample_instructions = true;
1918                 pt->instructions_sample_type = attr.sample_type;
1919                 pt->instructions_id = id;
1920                 id += 1;
1921         }
1922
1923         if (pt->synth_opts.transactions) {
1924                 attr.config = PERF_COUNT_HW_INSTRUCTIONS;
1925                 attr.sample_period = 1;
1926                 if (pt->synth_opts.callchain)
1927                         attr.sample_type |= PERF_SAMPLE_CALLCHAIN;
1928                 if (pt->synth_opts.last_branch)
1929                         attr.sample_type |= PERF_SAMPLE_BRANCH_STACK;
1930                 pr_debug("Synthesizing 'transactions' event with id %" PRIu64 " sample type %#" PRIx64 "\n",
1931                          id, (u64)attr.sample_type);
1932                 err = intel_pt_synth_event(session, &attr, id);
1933                 if (err) {
1934                         pr_err("%s: failed to synthesize 'transactions' event type\n",
1935                                __func__);
1936                         return err;
1937                 }
1938                 pt->sample_transactions = true;
1939                 pt->transactions_id = id;
1940                 id += 1;
1941                 evlist__for_each_entry(evlist, evsel) {
1942                         if (evsel->id && evsel->id[0] == pt->transactions_id) {
1943                                 if (evsel->name)
1944                                         zfree(&evsel->name);
1945                                 evsel->name = strdup("transactions");
1946                                 break;
1947                         }
1948                 }
1949         }
1950
1951         if (pt->synth_opts.branches) {
1952                 attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS;
1953                 attr.sample_period = 1;
1954                 attr.sample_type |= PERF_SAMPLE_ADDR;
1955                 attr.sample_type &= ~(u64)PERF_SAMPLE_CALLCHAIN;
1956                 attr.sample_type &= ~(u64)PERF_SAMPLE_BRANCH_STACK;
1957                 pr_debug("Synthesizing 'branches' event with id %" PRIu64 " sample type %#" PRIx64 "\n",
1958                          id, (u64)attr.sample_type);
1959                 err = intel_pt_synth_event(session, &attr, id);
1960                 if (err) {
1961                         pr_err("%s: failed to synthesize 'branches' event type\n",
1962                                __func__);
1963                         return err;
1964                 }
1965                 pt->sample_branches = true;
1966                 pt->branches_sample_type = attr.sample_type;
1967                 pt->branches_id = id;
1968         }
1969
1970         pt->synth_needs_swap = evsel->needs_swap;
1971
1972         return 0;
1973 }
1974
1975 static struct perf_evsel *intel_pt_find_sched_switch(struct perf_evlist *evlist)
1976 {
1977         struct perf_evsel *evsel;
1978
1979         evlist__for_each_entry_reverse(evlist, evsel) {
1980                 const char *name = perf_evsel__name(evsel);
1981
1982                 if (!strcmp(name, "sched:sched_switch"))
1983                         return evsel;
1984         }
1985
1986         return NULL;
1987 }
1988
1989 static bool intel_pt_find_switch(struct perf_evlist *evlist)
1990 {
1991         struct perf_evsel *evsel;
1992
1993         evlist__for_each_entry(evlist, evsel) {
1994                 if (evsel->attr.context_switch)
1995                         return true;
1996         }
1997
1998         return false;
1999 }
2000
2001 static int intel_pt_perf_config(const char *var, const char *value, void *data)
2002 {
2003         struct intel_pt *pt = data;
2004
2005         if (!strcmp(var, "intel-pt.mispred-all"))
2006                 pt->mispred_all = perf_config_bool(var, value);
2007
2008         return 0;
2009 }
2010
2011 static const char * const intel_pt_info_fmts[] = {
2012         [INTEL_PT_PMU_TYPE]             = "  PMU Type            %"PRId64"\n",
2013         [INTEL_PT_TIME_SHIFT]           = "  Time Shift          %"PRIu64"\n",
2014         [INTEL_PT_TIME_MULT]            = "  Time Muliplier      %"PRIu64"\n",
2015         [INTEL_PT_TIME_ZERO]            = "  Time Zero           %"PRIu64"\n",
2016         [INTEL_PT_CAP_USER_TIME_ZERO]   = "  Cap Time Zero       %"PRId64"\n",
2017         [INTEL_PT_TSC_BIT]              = "  TSC bit             %#"PRIx64"\n",
2018         [INTEL_PT_NORETCOMP_BIT]        = "  NoRETComp bit       %#"PRIx64"\n",
2019         [INTEL_PT_HAVE_SCHED_SWITCH]    = "  Have sched_switch   %"PRId64"\n",
2020         [INTEL_PT_SNAPSHOT_MODE]        = "  Snapshot mode       %"PRId64"\n",
2021         [INTEL_PT_PER_CPU_MMAPS]        = "  Per-cpu maps        %"PRId64"\n",
2022         [INTEL_PT_MTC_BIT]              = "  MTC bit             %#"PRIx64"\n",
2023         [INTEL_PT_TSC_CTC_N]            = "  TSC:CTC numerator   %"PRIu64"\n",
2024         [INTEL_PT_TSC_CTC_D]            = "  TSC:CTC denominator %"PRIu64"\n",
2025         [INTEL_PT_CYC_BIT]              = "  CYC bit             %#"PRIx64"\n",
2026         [INTEL_PT_MAX_NONTURBO_RATIO]   = "  Max non-turbo ratio %"PRIu64"\n",
2027 };
2028
2029 static void intel_pt_print_info(u64 *arr, int start, int finish)
2030 {
2031         int i;
2032
2033         if (!dump_trace)
2034                 return;
2035
2036         for (i = start; i <= finish; i++)
2037                 fprintf(stdout, intel_pt_info_fmts[i], arr[i]);
2038 }
2039
2040 static bool intel_pt_has(struct auxtrace_info_event *auxtrace_info, int pos)
2041 {
2042         return auxtrace_info->header.size >=
2043                 sizeof(struct auxtrace_info_event) + (sizeof(u64) * (pos + 1));
2044 }
2045
2046 int intel_pt_process_auxtrace_info(union perf_event *event,
2047                                    struct perf_session *session)
2048 {
2049         struct auxtrace_info_event *auxtrace_info = &event->auxtrace_info;
2050         size_t min_sz = sizeof(u64) * INTEL_PT_PER_CPU_MMAPS;
2051         struct intel_pt *pt;
2052         int err;
2053
2054         if (auxtrace_info->header.size < sizeof(struct auxtrace_info_event) +
2055                                         min_sz)
2056                 return -EINVAL;
2057
2058         pt = zalloc(sizeof(struct intel_pt));
2059         if (!pt)
2060                 return -ENOMEM;
2061
2062         perf_config(intel_pt_perf_config, pt);
2063
2064         err = auxtrace_queues__init(&pt->queues);
2065         if (err)
2066                 goto err_free;
2067
2068         intel_pt_log_set_name(INTEL_PT_PMU_NAME);
2069
2070         pt->session = session;
2071         pt->machine = &session->machines.host; /* No kvm support */
2072         pt->auxtrace_type = auxtrace_info->type;
2073         pt->pmu_type = auxtrace_info->priv[INTEL_PT_PMU_TYPE];
2074         pt->tc.time_shift = auxtrace_info->priv[INTEL_PT_TIME_SHIFT];
2075         pt->tc.time_mult = auxtrace_info->priv[INTEL_PT_TIME_MULT];
2076         pt->tc.time_zero = auxtrace_info->priv[INTEL_PT_TIME_ZERO];
2077         pt->cap_user_time_zero = auxtrace_info->priv[INTEL_PT_CAP_USER_TIME_ZERO];
2078         pt->tsc_bit = auxtrace_info->priv[INTEL_PT_TSC_BIT];
2079         pt->noretcomp_bit = auxtrace_info->priv[INTEL_PT_NORETCOMP_BIT];
2080         pt->have_sched_switch = auxtrace_info->priv[INTEL_PT_HAVE_SCHED_SWITCH];
2081         pt->snapshot_mode = auxtrace_info->priv[INTEL_PT_SNAPSHOT_MODE];
2082         pt->per_cpu_mmaps = auxtrace_info->priv[INTEL_PT_PER_CPU_MMAPS];
2083         intel_pt_print_info(&auxtrace_info->priv[0], INTEL_PT_PMU_TYPE,
2084                             INTEL_PT_PER_CPU_MMAPS);
2085
2086         if (intel_pt_has(auxtrace_info, INTEL_PT_CYC_BIT)) {
2087                 pt->mtc_bit = auxtrace_info->priv[INTEL_PT_MTC_BIT];
2088                 pt->mtc_freq_bits = auxtrace_info->priv[INTEL_PT_MTC_FREQ_BITS];
2089                 pt->tsc_ctc_ratio_n = auxtrace_info->priv[INTEL_PT_TSC_CTC_N];
2090                 pt->tsc_ctc_ratio_d = auxtrace_info->priv[INTEL_PT_TSC_CTC_D];
2091                 pt->cyc_bit = auxtrace_info->priv[INTEL_PT_CYC_BIT];
2092                 intel_pt_print_info(&auxtrace_info->priv[0], INTEL_PT_MTC_BIT,
2093                                     INTEL_PT_CYC_BIT);
2094         }
2095
2096         if (intel_pt_has(auxtrace_info, INTEL_PT_MAX_NONTURBO_RATIO)) {
2097                 pt->max_non_turbo_ratio =
2098                         auxtrace_info->priv[INTEL_PT_MAX_NONTURBO_RATIO];
2099                 intel_pt_print_info(&auxtrace_info->priv[0],
2100                                     INTEL_PT_MAX_NONTURBO_RATIO,
2101                                     INTEL_PT_MAX_NONTURBO_RATIO);
2102         }
2103
2104         pt->timeless_decoding = intel_pt_timeless_decoding(pt);
2105         pt->have_tsc = intel_pt_have_tsc(pt);
2106         pt->sampling_mode = false;
2107         pt->est_tsc = !pt->timeless_decoding;
2108
2109         pt->unknown_thread = thread__new(999999999, 999999999);
2110         if (!pt->unknown_thread) {
2111                 err = -ENOMEM;
2112                 goto err_free_queues;
2113         }
2114
2115         /*
2116          * Since this thread will not be kept in any rbtree not in a
2117          * list, initialize its list node so that at thread__put() the
2118          * current thread lifetime assuption is kept and we don't segfault
2119          * at list_del_init().
2120          */
2121         INIT_LIST_HEAD(&pt->unknown_thread->node);
2122
2123         err = thread__set_comm(pt->unknown_thread, "unknown", 0);
2124         if (err)
2125                 goto err_delete_thread;
2126         if (thread__init_map_groups(pt->unknown_thread, pt->machine)) {
2127                 err = -ENOMEM;
2128                 goto err_delete_thread;
2129         }
2130
2131         pt->auxtrace.process_event = intel_pt_process_event;
2132         pt->auxtrace.process_auxtrace_event = intel_pt_process_auxtrace_event;
2133         pt->auxtrace.flush_events = intel_pt_flush;
2134         pt->auxtrace.free_events = intel_pt_free_events;
2135         pt->auxtrace.free = intel_pt_free;
2136         session->auxtrace = &pt->auxtrace;
2137
2138         if (dump_trace)
2139                 return 0;
2140
2141         if (pt->have_sched_switch == 1) {
2142                 pt->switch_evsel = intel_pt_find_sched_switch(session->evlist);
2143                 if (!pt->switch_evsel) {
2144                         pr_err("%s: missing sched_switch event\n", __func__);
2145                         err = -EINVAL;
2146                         goto err_delete_thread;
2147                 }
2148         } else if (pt->have_sched_switch == 2 &&
2149                    !intel_pt_find_switch(session->evlist)) {
2150                 pr_err("%s: missing context_switch attribute flag\n", __func__);
2151                 err = -EINVAL;
2152                 goto err_delete_thread;
2153         }
2154
2155         if (session->itrace_synth_opts && session->itrace_synth_opts->set) {
2156                 pt->synth_opts = *session->itrace_synth_opts;
2157         } else {
2158                 itrace_synth_opts__set_default(&pt->synth_opts);
2159                 if (use_browser != -1) {
2160                         pt->synth_opts.branches = false;
2161                         pt->synth_opts.callchain = true;
2162                 }
2163                 if (session->itrace_synth_opts)
2164                         pt->synth_opts.thread_stack =
2165                                 session->itrace_synth_opts->thread_stack;
2166         }
2167
2168         if (pt->synth_opts.log)
2169                 intel_pt_log_enable();
2170
2171         /* Maximum non-turbo ratio is TSC freq / 100 MHz */
2172         if (pt->tc.time_mult) {
2173                 u64 tsc_freq = intel_pt_ns_to_ticks(pt, 1000000000);
2174
2175                 if (!pt->max_non_turbo_ratio)
2176                         pt->max_non_turbo_ratio =
2177                                         (tsc_freq + 50000000) / 100000000;
2178                 intel_pt_log("TSC frequency %"PRIu64"\n", tsc_freq);
2179                 intel_pt_log("Maximum non-turbo ratio %u\n",
2180                              pt->max_non_turbo_ratio);
2181         }
2182
2183         if (pt->synth_opts.calls)
2184                 pt->branches_filter |= PERF_IP_FLAG_CALL | PERF_IP_FLAG_ASYNC |
2185                                        PERF_IP_FLAG_TRACE_END;
2186         if (pt->synth_opts.returns)
2187                 pt->branches_filter |= PERF_IP_FLAG_RETURN |
2188                                        PERF_IP_FLAG_TRACE_BEGIN;
2189
2190         if (pt->synth_opts.callchain && !symbol_conf.use_callchain) {
2191                 symbol_conf.use_callchain = true;
2192                 if (callchain_register_param(&callchain_param) < 0) {
2193                         symbol_conf.use_callchain = false;
2194                         pt->synth_opts.callchain = false;
2195                 }
2196         }
2197
2198         err = intel_pt_synth_events(pt, session);
2199         if (err)
2200                 goto err_delete_thread;
2201
2202         err = auxtrace_queues__process_index(&pt->queues, session);
2203         if (err)
2204                 goto err_delete_thread;
2205
2206         if (pt->queues.populated)
2207                 pt->data_queued = true;
2208
2209         if (pt->timeless_decoding)
2210                 pr_debug2("Intel PT decoding without timestamps\n");
2211
2212         return 0;
2213
2214 err_delete_thread:
2215         thread__zput(pt->unknown_thread);
2216 err_free_queues:
2217         intel_pt_log_disable();
2218         auxtrace_queues__free(&pt->queues);
2219         session->auxtrace = NULL;
2220 err_free:
2221         free(pt);
2222         return err;
2223 }