Merge branch 'perf/urgent' into perf/core, to resolve a conflict
[cascardo/linux.git] / tools / perf / util / evlist.c
1 /*
2  * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
3  *
4  * Parts came from builtin-{top,stat,record}.c, see those files for further
5  * copyright notes.
6  *
7  * Released under the GPL v2. (and only v2, not any later version)
8  */
9 #include "util.h"
10 #include <api/fs/fs.h>
11 #include <poll.h>
12 #include "cpumap.h"
13 #include "thread_map.h"
14 #include "target.h"
15 #include "evlist.h"
16 #include "evsel.h"
17 #include "debug.h"
18 #include <unistd.h>
19
20 #include "parse-events.h"
21 #include "parse-options.h"
22
23 #include <sys/mman.h>
24
25 #include <linux/bitops.h>
26 #include <linux/hash.h>
27 #include <linux/log2.h>
28 #include <linux/err.h>
29
30 static void perf_evlist__mmap_put(struct perf_evlist *evlist, int idx);
31 static void __perf_evlist__munmap(struct perf_evlist *evlist, int idx);
32
33 #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
34 #define SID(e, x, y) xyarray__entry(e->sample_id, x, y)
35
36 void perf_evlist__init(struct perf_evlist *evlist, struct cpu_map *cpus,
37                        struct thread_map *threads)
38 {
39         int i;
40
41         for (i = 0; i < PERF_EVLIST__HLIST_SIZE; ++i)
42                 INIT_HLIST_HEAD(&evlist->heads[i]);
43         INIT_LIST_HEAD(&evlist->entries);
44         perf_evlist__set_maps(evlist, cpus, threads);
45         fdarray__init(&evlist->pollfd, 64);
46         evlist->workload.pid = -1;
47 }
48
49 struct perf_evlist *perf_evlist__new(void)
50 {
51         struct perf_evlist *evlist = zalloc(sizeof(*evlist));
52
53         if (evlist != NULL)
54                 perf_evlist__init(evlist, NULL, NULL);
55
56         return evlist;
57 }
58
59 struct perf_evlist *perf_evlist__new_default(void)
60 {
61         struct perf_evlist *evlist = perf_evlist__new();
62
63         if (evlist && perf_evlist__add_default(evlist)) {
64                 perf_evlist__delete(evlist);
65                 evlist = NULL;
66         }
67
68         return evlist;
69 }
70
71 /**
72  * perf_evlist__set_id_pos - set the positions of event ids.
73  * @evlist: selected event list
74  *
75  * Events with compatible sample types all have the same id_pos
76  * and is_pos.  For convenience, put a copy on evlist.
77  */
78 void perf_evlist__set_id_pos(struct perf_evlist *evlist)
79 {
80         struct perf_evsel *first = perf_evlist__first(evlist);
81
82         evlist->id_pos = first->id_pos;
83         evlist->is_pos = first->is_pos;
84 }
85
86 static void perf_evlist__update_id_pos(struct perf_evlist *evlist)
87 {
88         struct perf_evsel *evsel;
89
90         evlist__for_each(evlist, evsel)
91                 perf_evsel__calc_id_pos(evsel);
92
93         perf_evlist__set_id_pos(evlist);
94 }
95
96 static void perf_evlist__purge(struct perf_evlist *evlist)
97 {
98         struct perf_evsel *pos, *n;
99
100         evlist__for_each_safe(evlist, n, pos) {
101                 list_del_init(&pos->node);
102                 pos->evlist = NULL;
103                 perf_evsel__delete(pos);
104         }
105
106         evlist->nr_entries = 0;
107 }
108
109 void perf_evlist__exit(struct perf_evlist *evlist)
110 {
111         zfree(&evlist->mmap);
112         fdarray__exit(&evlist->pollfd);
113 }
114
115 void perf_evlist__delete(struct perf_evlist *evlist)
116 {
117         perf_evlist__munmap(evlist);
118         perf_evlist__close(evlist);
119         cpu_map__put(evlist->cpus);
120         thread_map__put(evlist->threads);
121         evlist->cpus = NULL;
122         evlist->threads = NULL;
123         perf_evlist__purge(evlist);
124         perf_evlist__exit(evlist);
125         free(evlist);
126 }
127
128 static void __perf_evlist__propagate_maps(struct perf_evlist *evlist,
129                                           struct perf_evsel *evsel)
130 {
131         /*
132          * We already have cpus for evsel (via PMU sysfs) so
133          * keep it, if there's no target cpu list defined.
134          */
135         if (!evsel->own_cpus || evlist->has_user_cpus) {
136                 cpu_map__put(evsel->cpus);
137                 evsel->cpus = cpu_map__get(evlist->cpus);
138         } else if (evsel->cpus != evsel->own_cpus) {
139                 cpu_map__put(evsel->cpus);
140                 evsel->cpus = cpu_map__get(evsel->own_cpus);
141         }
142
143         thread_map__put(evsel->threads);
144         evsel->threads = thread_map__get(evlist->threads);
145 }
146
147 static void perf_evlist__propagate_maps(struct perf_evlist *evlist)
148 {
149         struct perf_evsel *evsel;
150
151         evlist__for_each(evlist, evsel)
152                 __perf_evlist__propagate_maps(evlist, evsel);
153 }
154
155 void perf_evlist__add(struct perf_evlist *evlist, struct perf_evsel *entry)
156 {
157         entry->evlist = evlist;
158         list_add_tail(&entry->node, &evlist->entries);
159         entry->idx = evlist->nr_entries;
160         entry->tracking = !entry->idx;
161
162         if (!evlist->nr_entries++)
163                 perf_evlist__set_id_pos(evlist);
164
165         __perf_evlist__propagate_maps(evlist, entry);
166 }
167
168 void perf_evlist__splice_list_tail(struct perf_evlist *evlist,
169                                    struct list_head *list)
170 {
171         struct perf_evsel *evsel, *temp;
172
173         __evlist__for_each_safe(list, temp, evsel) {
174                 list_del_init(&evsel->node);
175                 perf_evlist__add(evlist, evsel);
176         }
177 }
178
179 void __perf_evlist__set_leader(struct list_head *list)
180 {
181         struct perf_evsel *evsel, *leader;
182
183         leader = list_entry(list->next, struct perf_evsel, node);
184         evsel = list_entry(list->prev, struct perf_evsel, node);
185
186         leader->nr_members = evsel->idx - leader->idx + 1;
187
188         __evlist__for_each(list, evsel) {
189                 evsel->leader = leader;
190         }
191 }
192
193 void perf_evlist__set_leader(struct perf_evlist *evlist)
194 {
195         if (evlist->nr_entries) {
196                 evlist->nr_groups = evlist->nr_entries > 1 ? 1 : 0;
197                 __perf_evlist__set_leader(&evlist->entries);
198         }
199 }
200
201 int perf_evlist__add_default(struct perf_evlist *evlist)
202 {
203         struct perf_event_attr attr = {
204                 .type = PERF_TYPE_HARDWARE,
205                 .config = PERF_COUNT_HW_CPU_CYCLES,
206         };
207         struct perf_evsel *evsel;
208
209         event_attr_init(&attr);
210
211         evsel = perf_evsel__new(&attr);
212         if (evsel == NULL)
213                 goto error;
214
215         /* use strdup() because free(evsel) assumes name is allocated */
216         evsel->name = strdup("cycles");
217         if (!evsel->name)
218                 goto error_free;
219
220         perf_evlist__add(evlist, evsel);
221         return 0;
222 error_free:
223         perf_evsel__delete(evsel);
224 error:
225         return -ENOMEM;
226 }
227
228 static int perf_evlist__add_attrs(struct perf_evlist *evlist,
229                                   struct perf_event_attr *attrs, size_t nr_attrs)
230 {
231         struct perf_evsel *evsel, *n;
232         LIST_HEAD(head);
233         size_t i;
234
235         for (i = 0; i < nr_attrs; i++) {
236                 evsel = perf_evsel__new_idx(attrs + i, evlist->nr_entries + i);
237                 if (evsel == NULL)
238                         goto out_delete_partial_list;
239                 list_add_tail(&evsel->node, &head);
240         }
241
242         perf_evlist__splice_list_tail(evlist, &head);
243
244         return 0;
245
246 out_delete_partial_list:
247         __evlist__for_each_safe(&head, n, evsel)
248                 perf_evsel__delete(evsel);
249         return -1;
250 }
251
252 int __perf_evlist__add_default_attrs(struct perf_evlist *evlist,
253                                      struct perf_event_attr *attrs, size_t nr_attrs)
254 {
255         size_t i;
256
257         for (i = 0; i < nr_attrs; i++)
258                 event_attr_init(attrs + i);
259
260         return perf_evlist__add_attrs(evlist, attrs, nr_attrs);
261 }
262
263 struct perf_evsel *
264 perf_evlist__find_tracepoint_by_id(struct perf_evlist *evlist, int id)
265 {
266         struct perf_evsel *evsel;
267
268         evlist__for_each(evlist, evsel) {
269                 if (evsel->attr.type   == PERF_TYPE_TRACEPOINT &&
270                     (int)evsel->attr.config == id)
271                         return evsel;
272         }
273
274         return NULL;
275 }
276
277 struct perf_evsel *
278 perf_evlist__find_tracepoint_by_name(struct perf_evlist *evlist,
279                                      const char *name)
280 {
281         struct perf_evsel *evsel;
282
283         evlist__for_each(evlist, evsel) {
284                 if ((evsel->attr.type == PERF_TYPE_TRACEPOINT) &&
285                     (strcmp(evsel->name, name) == 0))
286                         return evsel;
287         }
288
289         return NULL;
290 }
291
292 int perf_evlist__add_newtp(struct perf_evlist *evlist,
293                            const char *sys, const char *name, void *handler)
294 {
295         struct perf_evsel *evsel = perf_evsel__newtp(sys, name);
296
297         if (IS_ERR(evsel))
298                 return -1;
299
300         evsel->handler = handler;
301         perf_evlist__add(evlist, evsel);
302         return 0;
303 }
304
305 static int perf_evlist__nr_threads(struct perf_evlist *evlist,
306                                    struct perf_evsel *evsel)
307 {
308         if (evsel->system_wide)
309                 return 1;
310         else
311                 return thread_map__nr(evlist->threads);
312 }
313
314 void perf_evlist__disable(struct perf_evlist *evlist)
315 {
316         int cpu, thread;
317         struct perf_evsel *pos;
318         int nr_cpus = cpu_map__nr(evlist->cpus);
319         int nr_threads;
320
321         for (cpu = 0; cpu < nr_cpus; cpu++) {
322                 evlist__for_each(evlist, pos) {
323                         if (!perf_evsel__is_group_leader(pos) || !pos->fd)
324                                 continue;
325                         nr_threads = perf_evlist__nr_threads(evlist, pos);
326                         for (thread = 0; thread < nr_threads; thread++)
327                                 ioctl(FD(pos, cpu, thread),
328                                       PERF_EVENT_IOC_DISABLE, 0);
329                 }
330         }
331
332         evlist->enabled = false;
333 }
334
335 void perf_evlist__enable(struct perf_evlist *evlist)
336 {
337         int cpu, thread;
338         struct perf_evsel *pos;
339         int nr_cpus = cpu_map__nr(evlist->cpus);
340         int nr_threads;
341
342         for (cpu = 0; cpu < nr_cpus; cpu++) {
343                 evlist__for_each(evlist, pos) {
344                         if (!perf_evsel__is_group_leader(pos) || !pos->fd)
345                                 continue;
346                         nr_threads = perf_evlist__nr_threads(evlist, pos);
347                         for (thread = 0; thread < nr_threads; thread++)
348                                 ioctl(FD(pos, cpu, thread),
349                                       PERF_EVENT_IOC_ENABLE, 0);
350                 }
351         }
352
353         evlist->enabled = true;
354 }
355
356 void perf_evlist__toggle_enable(struct perf_evlist *evlist)
357 {
358         (evlist->enabled ? perf_evlist__disable : perf_evlist__enable)(evlist);
359 }
360
361 int perf_evlist__disable_event(struct perf_evlist *evlist,
362                                struct perf_evsel *evsel)
363 {
364         int cpu, thread, err;
365         int nr_cpus = cpu_map__nr(evlist->cpus);
366         int nr_threads = perf_evlist__nr_threads(evlist, evsel);
367
368         if (!evsel->fd)
369                 return 0;
370
371         for (cpu = 0; cpu < nr_cpus; cpu++) {
372                 for (thread = 0; thread < nr_threads; thread++) {
373                         err = ioctl(FD(evsel, cpu, thread),
374                                     PERF_EVENT_IOC_DISABLE, 0);
375                         if (err)
376                                 return err;
377                 }
378         }
379         return 0;
380 }
381
382 int perf_evlist__enable_event(struct perf_evlist *evlist,
383                               struct perf_evsel *evsel)
384 {
385         int cpu, thread, err;
386         int nr_cpus = cpu_map__nr(evlist->cpus);
387         int nr_threads = perf_evlist__nr_threads(evlist, evsel);
388
389         if (!evsel->fd)
390                 return -EINVAL;
391
392         for (cpu = 0; cpu < nr_cpus; cpu++) {
393                 for (thread = 0; thread < nr_threads; thread++) {
394                         err = ioctl(FD(evsel, cpu, thread),
395                                     PERF_EVENT_IOC_ENABLE, 0);
396                         if (err)
397                                 return err;
398                 }
399         }
400         return 0;
401 }
402
403 static int perf_evlist__enable_event_cpu(struct perf_evlist *evlist,
404                                          struct perf_evsel *evsel, int cpu)
405 {
406         int thread, err;
407         int nr_threads = perf_evlist__nr_threads(evlist, evsel);
408
409         if (!evsel->fd)
410                 return -EINVAL;
411
412         for (thread = 0; thread < nr_threads; thread++) {
413                 err = ioctl(FD(evsel, cpu, thread),
414                             PERF_EVENT_IOC_ENABLE, 0);
415                 if (err)
416                         return err;
417         }
418         return 0;
419 }
420
421 static int perf_evlist__enable_event_thread(struct perf_evlist *evlist,
422                                             struct perf_evsel *evsel,
423                                             int thread)
424 {
425         int cpu, err;
426         int nr_cpus = cpu_map__nr(evlist->cpus);
427
428         if (!evsel->fd)
429                 return -EINVAL;
430
431         for (cpu = 0; cpu < nr_cpus; cpu++) {
432                 err = ioctl(FD(evsel, cpu, thread), PERF_EVENT_IOC_ENABLE, 0);
433                 if (err)
434                         return err;
435         }
436         return 0;
437 }
438
439 int perf_evlist__enable_event_idx(struct perf_evlist *evlist,
440                                   struct perf_evsel *evsel, int idx)
441 {
442         bool per_cpu_mmaps = !cpu_map__empty(evlist->cpus);
443
444         if (per_cpu_mmaps)
445                 return perf_evlist__enable_event_cpu(evlist, evsel, idx);
446         else
447                 return perf_evlist__enable_event_thread(evlist, evsel, idx);
448 }
449
450 int perf_evlist__alloc_pollfd(struct perf_evlist *evlist)
451 {
452         int nr_cpus = cpu_map__nr(evlist->cpus);
453         int nr_threads = thread_map__nr(evlist->threads);
454         int nfds = 0;
455         struct perf_evsel *evsel;
456
457         evlist__for_each(evlist, evsel) {
458                 if (evsel->system_wide)
459                         nfds += nr_cpus;
460                 else
461                         nfds += nr_cpus * nr_threads;
462         }
463
464         if (fdarray__available_entries(&evlist->pollfd) < nfds &&
465             fdarray__grow(&evlist->pollfd, nfds) < 0)
466                 return -ENOMEM;
467
468         return 0;
469 }
470
471 static int __perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd, int idx)
472 {
473         int pos = fdarray__add(&evlist->pollfd, fd, POLLIN | POLLERR | POLLHUP);
474         /*
475          * Save the idx so that when we filter out fds POLLHUP'ed we can
476          * close the associated evlist->mmap[] entry.
477          */
478         if (pos >= 0) {
479                 evlist->pollfd.priv[pos].idx = idx;
480
481                 fcntl(fd, F_SETFL, O_NONBLOCK);
482         }
483
484         return pos;
485 }
486
487 int perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd)
488 {
489         return __perf_evlist__add_pollfd(evlist, fd, -1);
490 }
491
492 static void perf_evlist__munmap_filtered(struct fdarray *fda, int fd)
493 {
494         struct perf_evlist *evlist = container_of(fda, struct perf_evlist, pollfd);
495
496         perf_evlist__mmap_put(evlist, fda->priv[fd].idx);
497 }
498
499 int perf_evlist__filter_pollfd(struct perf_evlist *evlist, short revents_and_mask)
500 {
501         return fdarray__filter(&evlist->pollfd, revents_and_mask,
502                                perf_evlist__munmap_filtered);
503 }
504
505 int perf_evlist__poll(struct perf_evlist *evlist, int timeout)
506 {
507         return fdarray__poll(&evlist->pollfd, timeout);
508 }
509
510 static void perf_evlist__id_hash(struct perf_evlist *evlist,
511                                  struct perf_evsel *evsel,
512                                  int cpu, int thread, u64 id)
513 {
514         int hash;
515         struct perf_sample_id *sid = SID(evsel, cpu, thread);
516
517         sid->id = id;
518         sid->evsel = evsel;
519         hash = hash_64(sid->id, PERF_EVLIST__HLIST_BITS);
520         hlist_add_head(&sid->node, &evlist->heads[hash]);
521 }
522
523 void perf_evlist__id_add(struct perf_evlist *evlist, struct perf_evsel *evsel,
524                          int cpu, int thread, u64 id)
525 {
526         perf_evlist__id_hash(evlist, evsel, cpu, thread, id);
527         evsel->id[evsel->ids++] = id;
528 }
529
530 static int perf_evlist__id_add_fd(struct perf_evlist *evlist,
531                                   struct perf_evsel *evsel,
532                                   int cpu, int thread, int fd)
533 {
534         u64 read_data[4] = { 0, };
535         int id_idx = 1; /* The first entry is the counter value */
536         u64 id;
537         int ret;
538
539         ret = ioctl(fd, PERF_EVENT_IOC_ID, &id);
540         if (!ret)
541                 goto add;
542
543         if (errno != ENOTTY)
544                 return -1;
545
546         /* Legacy way to get event id.. All hail to old kernels! */
547
548         /*
549          * This way does not work with group format read, so bail
550          * out in that case.
551          */
552         if (perf_evlist__read_format(evlist) & PERF_FORMAT_GROUP)
553                 return -1;
554
555         if (!(evsel->attr.read_format & PERF_FORMAT_ID) ||
556             read(fd, &read_data, sizeof(read_data)) == -1)
557                 return -1;
558
559         if (evsel->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
560                 ++id_idx;
561         if (evsel->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
562                 ++id_idx;
563
564         id = read_data[id_idx];
565
566  add:
567         perf_evlist__id_add(evlist, evsel, cpu, thread, id);
568         return 0;
569 }
570
571 static void perf_evlist__set_sid_idx(struct perf_evlist *evlist,
572                                      struct perf_evsel *evsel, int idx, int cpu,
573                                      int thread)
574 {
575         struct perf_sample_id *sid = SID(evsel, cpu, thread);
576         sid->idx = idx;
577         if (evlist->cpus && cpu >= 0)
578                 sid->cpu = evlist->cpus->map[cpu];
579         else
580                 sid->cpu = -1;
581         if (!evsel->system_wide && evlist->threads && thread >= 0)
582                 sid->tid = thread_map__pid(evlist->threads, thread);
583         else
584                 sid->tid = -1;
585 }
586
587 struct perf_sample_id *perf_evlist__id2sid(struct perf_evlist *evlist, u64 id)
588 {
589         struct hlist_head *head;
590         struct perf_sample_id *sid;
591         int hash;
592
593         hash = hash_64(id, PERF_EVLIST__HLIST_BITS);
594         head = &evlist->heads[hash];
595
596         hlist_for_each_entry(sid, head, node)
597                 if (sid->id == id)
598                         return sid;
599
600         return NULL;
601 }
602
603 struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id)
604 {
605         struct perf_sample_id *sid;
606
607         if (evlist->nr_entries == 1 || !id)
608                 return perf_evlist__first(evlist);
609
610         sid = perf_evlist__id2sid(evlist, id);
611         if (sid)
612                 return sid->evsel;
613
614         if (!perf_evlist__sample_id_all(evlist))
615                 return perf_evlist__first(evlist);
616
617         return NULL;
618 }
619
620 static int perf_evlist__event2id(struct perf_evlist *evlist,
621                                  union perf_event *event, u64 *id)
622 {
623         const u64 *array = event->sample.array;
624         ssize_t n;
625
626         n = (event->header.size - sizeof(event->header)) >> 3;
627
628         if (event->header.type == PERF_RECORD_SAMPLE) {
629                 if (evlist->id_pos >= n)
630                         return -1;
631                 *id = array[evlist->id_pos];
632         } else {
633                 if (evlist->is_pos > n)
634                         return -1;
635                 n -= evlist->is_pos;
636                 *id = array[n];
637         }
638         return 0;
639 }
640
641 static struct perf_evsel *perf_evlist__event2evsel(struct perf_evlist *evlist,
642                                                    union perf_event *event)
643 {
644         struct perf_evsel *first = perf_evlist__first(evlist);
645         struct hlist_head *head;
646         struct perf_sample_id *sid;
647         int hash;
648         u64 id;
649
650         if (evlist->nr_entries == 1)
651                 return first;
652
653         if (!first->attr.sample_id_all &&
654             event->header.type != PERF_RECORD_SAMPLE)
655                 return first;
656
657         if (perf_evlist__event2id(evlist, event, &id))
658                 return NULL;
659
660         /* Synthesized events have an id of zero */
661         if (!id)
662                 return first;
663
664         hash = hash_64(id, PERF_EVLIST__HLIST_BITS);
665         head = &evlist->heads[hash];
666
667         hlist_for_each_entry(sid, head, node) {
668                 if (sid->id == id)
669                         return sid->evsel;
670         }
671         return NULL;
672 }
673
674 union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx)
675 {
676         struct perf_mmap *md = &evlist->mmap[idx];
677         u64 head;
678         u64 old = md->prev;
679         unsigned char *data = md->base + page_size;
680         union perf_event *event = NULL;
681
682         /*
683          * Check if event was unmapped due to a POLLHUP/POLLERR.
684          */
685         if (!atomic_read(&md->refcnt))
686                 return NULL;
687
688         head = perf_mmap__read_head(md);
689         if (evlist->overwrite) {
690                 /*
691                  * If we're further behind than half the buffer, there's a chance
692                  * the writer will bite our tail and mess up the samples under us.
693                  *
694                  * If we somehow ended up ahead of the head, we got messed up.
695                  *
696                  * In either case, truncate and restart at head.
697                  */
698                 int diff = head - old;
699                 if (diff > md->mask / 2 || diff < 0) {
700                         fprintf(stderr, "WARNING: failed to keep up with mmap data.\n");
701
702                         /*
703                          * head points to a known good entry, start there.
704                          */
705                         old = head;
706                 }
707         }
708
709         if (old != head) {
710                 size_t size;
711
712                 event = (union perf_event *)&data[old & md->mask];
713                 size = event->header.size;
714
715                 /*
716                  * Event straddles the mmap boundary -- header should always
717                  * be inside due to u64 alignment of output.
718                  */
719                 if ((old & md->mask) + size != ((old + size) & md->mask)) {
720                         unsigned int offset = old;
721                         unsigned int len = min(sizeof(*event), size), cpy;
722                         void *dst = md->event_copy;
723
724                         do {
725                                 cpy = min(md->mask + 1 - (offset & md->mask), len);
726                                 memcpy(dst, &data[offset & md->mask], cpy);
727                                 offset += cpy;
728                                 dst += cpy;
729                                 len -= cpy;
730                         } while (len);
731
732                         event = (union perf_event *) md->event_copy;
733                 }
734
735                 old += size;
736         }
737
738         md->prev = old;
739
740         return event;
741 }
742
743 static bool perf_mmap__empty(struct perf_mmap *md)
744 {
745         return perf_mmap__read_head(md) == md->prev && !md->auxtrace_mmap.base;
746 }
747
748 static void perf_evlist__mmap_get(struct perf_evlist *evlist, int idx)
749 {
750         atomic_inc(&evlist->mmap[idx].refcnt);
751 }
752
753 static void perf_evlist__mmap_put(struct perf_evlist *evlist, int idx)
754 {
755         BUG_ON(atomic_read(&evlist->mmap[idx].refcnt) == 0);
756
757         if (atomic_dec_and_test(&evlist->mmap[idx].refcnt))
758                 __perf_evlist__munmap(evlist, idx);
759 }
760
761 void perf_evlist__mmap_consume(struct perf_evlist *evlist, int idx)
762 {
763         struct perf_mmap *md = &evlist->mmap[idx];
764
765         if (!evlist->overwrite) {
766                 u64 old = md->prev;
767
768                 perf_mmap__write_tail(md, old);
769         }
770
771         if (atomic_read(&md->refcnt) == 1 && perf_mmap__empty(md))
772                 perf_evlist__mmap_put(evlist, idx);
773 }
774
775 int __weak auxtrace_mmap__mmap(struct auxtrace_mmap *mm __maybe_unused,
776                                struct auxtrace_mmap_params *mp __maybe_unused,
777                                void *userpg __maybe_unused,
778                                int fd __maybe_unused)
779 {
780         return 0;
781 }
782
783 void __weak auxtrace_mmap__munmap(struct auxtrace_mmap *mm __maybe_unused)
784 {
785 }
786
787 void __weak auxtrace_mmap_params__init(
788                         struct auxtrace_mmap_params *mp __maybe_unused,
789                         off_t auxtrace_offset __maybe_unused,
790                         unsigned int auxtrace_pages __maybe_unused,
791                         bool auxtrace_overwrite __maybe_unused)
792 {
793 }
794
795 void __weak auxtrace_mmap_params__set_idx(
796                         struct auxtrace_mmap_params *mp __maybe_unused,
797                         struct perf_evlist *evlist __maybe_unused,
798                         int idx __maybe_unused,
799                         bool per_cpu __maybe_unused)
800 {
801 }
802
803 static void __perf_evlist__munmap(struct perf_evlist *evlist, int idx)
804 {
805         if (evlist->mmap[idx].base != NULL) {
806                 munmap(evlist->mmap[idx].base, evlist->mmap_len);
807                 evlist->mmap[idx].base = NULL;
808                 atomic_set(&evlist->mmap[idx].refcnt, 0);
809         }
810         auxtrace_mmap__munmap(&evlist->mmap[idx].auxtrace_mmap);
811 }
812
813 void perf_evlist__munmap(struct perf_evlist *evlist)
814 {
815         int i;
816
817         if (evlist->mmap == NULL)
818                 return;
819
820         for (i = 0; i < evlist->nr_mmaps; i++)
821                 __perf_evlist__munmap(evlist, i);
822
823         zfree(&evlist->mmap);
824 }
825
826 static int perf_evlist__alloc_mmap(struct perf_evlist *evlist)
827 {
828         evlist->nr_mmaps = cpu_map__nr(evlist->cpus);
829         if (cpu_map__empty(evlist->cpus))
830                 evlist->nr_mmaps = thread_map__nr(evlist->threads);
831         evlist->mmap = zalloc(evlist->nr_mmaps * sizeof(struct perf_mmap));
832         return evlist->mmap != NULL ? 0 : -ENOMEM;
833 }
834
835 struct mmap_params {
836         int prot;
837         int mask;
838         struct auxtrace_mmap_params auxtrace_mp;
839 };
840
841 static int __perf_evlist__mmap(struct perf_evlist *evlist, int idx,
842                                struct mmap_params *mp, int fd)
843 {
844         /*
845          * The last one will be done at perf_evlist__mmap_consume(), so that we
846          * make sure we don't prevent tools from consuming every last event in
847          * the ring buffer.
848          *
849          * I.e. we can get the POLLHUP meaning that the fd doesn't exist
850          * anymore, but the last events for it are still in the ring buffer,
851          * waiting to be consumed.
852          *
853          * Tools can chose to ignore this at their own discretion, but the
854          * evlist layer can't just drop it when filtering events in
855          * perf_evlist__filter_pollfd().
856          */
857         atomic_set(&evlist->mmap[idx].refcnt, 2);
858         evlist->mmap[idx].prev = 0;
859         evlist->mmap[idx].mask = mp->mask;
860         evlist->mmap[idx].base = mmap(NULL, evlist->mmap_len, mp->prot,
861                                       MAP_SHARED, fd, 0);
862         if (evlist->mmap[idx].base == MAP_FAILED) {
863                 pr_debug2("failed to mmap perf event ring buffer, error %d\n",
864                           errno);
865                 evlist->mmap[idx].base = NULL;
866                 return -1;
867         }
868
869         if (auxtrace_mmap__mmap(&evlist->mmap[idx].auxtrace_mmap,
870                                 &mp->auxtrace_mp, evlist->mmap[idx].base, fd))
871                 return -1;
872
873         return 0;
874 }
875
876 static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx,
877                                        struct mmap_params *mp, int cpu,
878                                        int thread, int *output)
879 {
880         struct perf_evsel *evsel;
881
882         evlist__for_each(evlist, evsel) {
883                 int fd;
884
885                 if (evsel->system_wide && thread)
886                         continue;
887
888                 fd = FD(evsel, cpu, thread);
889
890                 if (*output == -1) {
891                         *output = fd;
892                         if (__perf_evlist__mmap(evlist, idx, mp, *output) < 0)
893                                 return -1;
894                 } else {
895                         if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, *output) != 0)
896                                 return -1;
897
898                         perf_evlist__mmap_get(evlist, idx);
899                 }
900
901                 /*
902                  * The system_wide flag causes a selected event to be opened
903                  * always without a pid.  Consequently it will never get a
904                  * POLLHUP, but it is used for tracking in combination with
905                  * other events, so it should not need to be polled anyway.
906                  * Therefore don't add it for polling.
907                  */
908                 if (!evsel->system_wide &&
909                     __perf_evlist__add_pollfd(evlist, fd, idx) < 0) {
910                         perf_evlist__mmap_put(evlist, idx);
911                         return -1;
912                 }
913
914                 if (evsel->attr.read_format & PERF_FORMAT_ID) {
915                         if (perf_evlist__id_add_fd(evlist, evsel, cpu, thread,
916                                                    fd) < 0)
917                                 return -1;
918                         perf_evlist__set_sid_idx(evlist, evsel, idx, cpu,
919                                                  thread);
920                 }
921         }
922
923         return 0;
924 }
925
926 static int perf_evlist__mmap_per_cpu(struct perf_evlist *evlist,
927                                      struct mmap_params *mp)
928 {
929         int cpu, thread;
930         int nr_cpus = cpu_map__nr(evlist->cpus);
931         int nr_threads = thread_map__nr(evlist->threads);
932
933         pr_debug2("perf event ring buffer mmapped per cpu\n");
934         for (cpu = 0; cpu < nr_cpus; cpu++) {
935                 int output = -1;
936
937                 auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, cpu,
938                                               true);
939
940                 for (thread = 0; thread < nr_threads; thread++) {
941                         if (perf_evlist__mmap_per_evsel(evlist, cpu, mp, cpu,
942                                                         thread, &output))
943                                 goto out_unmap;
944                 }
945         }
946
947         return 0;
948
949 out_unmap:
950         for (cpu = 0; cpu < nr_cpus; cpu++)
951                 __perf_evlist__munmap(evlist, cpu);
952         return -1;
953 }
954
955 static int perf_evlist__mmap_per_thread(struct perf_evlist *evlist,
956                                         struct mmap_params *mp)
957 {
958         int thread;
959         int nr_threads = thread_map__nr(evlist->threads);
960
961         pr_debug2("perf event ring buffer mmapped per thread\n");
962         for (thread = 0; thread < nr_threads; thread++) {
963                 int output = -1;
964
965                 auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, thread,
966                                               false);
967
968                 if (perf_evlist__mmap_per_evsel(evlist, thread, mp, 0, thread,
969                                                 &output))
970                         goto out_unmap;
971         }
972
973         return 0;
974
975 out_unmap:
976         for (thread = 0; thread < nr_threads; thread++)
977                 __perf_evlist__munmap(evlist, thread);
978         return -1;
979 }
980
981 static size_t perf_evlist__mmap_size(unsigned long pages)
982 {
983         if (pages == UINT_MAX) {
984                 int max;
985
986                 if (sysctl__read_int("kernel/perf_event_mlock_kb", &max) < 0) {
987                         /*
988                          * Pick a once upon a time good value, i.e. things look
989                          * strange since we can't read a sysctl value, but lets not
990                          * die yet...
991                          */
992                         max = 512;
993                 } else {
994                         max -= (page_size / 1024);
995                 }
996
997                 pages = (max * 1024) / page_size;
998                 if (!is_power_of_2(pages))
999                         pages = rounddown_pow_of_two(pages);
1000         } else if (!is_power_of_2(pages))
1001                 return 0;
1002
1003         return (pages + 1) * page_size;
1004 }
1005
1006 static long parse_pages_arg(const char *str, unsigned long min,
1007                             unsigned long max)
1008 {
1009         unsigned long pages, val;
1010         static struct parse_tag tags[] = {
1011                 { .tag  = 'B', .mult = 1       },
1012                 { .tag  = 'K', .mult = 1 << 10 },
1013                 { .tag  = 'M', .mult = 1 << 20 },
1014                 { .tag  = 'G', .mult = 1 << 30 },
1015                 { .tag  = 0 },
1016         };
1017
1018         if (str == NULL)
1019                 return -EINVAL;
1020
1021         val = parse_tag_value(str, tags);
1022         if (val != (unsigned long) -1) {
1023                 /* we got file size value */
1024                 pages = PERF_ALIGN(val, page_size) / page_size;
1025         } else {
1026                 /* we got pages count value */
1027                 char *eptr;
1028                 pages = strtoul(str, &eptr, 10);
1029                 if (*eptr != '\0')
1030                         return -EINVAL;
1031         }
1032
1033         if (pages == 0 && min == 0) {
1034                 /* leave number of pages at 0 */
1035         } else if (!is_power_of_2(pages)) {
1036                 /* round pages up to next power of 2 */
1037                 pages = roundup_pow_of_two(pages);
1038                 if (!pages)
1039                         return -EINVAL;
1040                 pr_info("rounding mmap pages size to %lu bytes (%lu pages)\n",
1041                         pages * page_size, pages);
1042         }
1043
1044         if (pages > max)
1045                 return -EINVAL;
1046
1047         return pages;
1048 }
1049
1050 int __perf_evlist__parse_mmap_pages(unsigned int *mmap_pages, const char *str)
1051 {
1052         unsigned long max = UINT_MAX;
1053         long pages;
1054
1055         if (max > SIZE_MAX / page_size)
1056                 max = SIZE_MAX / page_size;
1057
1058         pages = parse_pages_arg(str, 1, max);
1059         if (pages < 0) {
1060                 pr_err("Invalid argument for --mmap_pages/-m\n");
1061                 return -1;
1062         }
1063
1064         *mmap_pages = pages;
1065         return 0;
1066 }
1067
1068 int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str,
1069                                   int unset __maybe_unused)
1070 {
1071         return __perf_evlist__parse_mmap_pages(opt->value, str);
1072 }
1073
1074 /**
1075  * perf_evlist__mmap_ex - Create mmaps to receive events.
1076  * @evlist: list of events
1077  * @pages: map length in pages
1078  * @overwrite: overwrite older events?
1079  * @auxtrace_pages - auxtrace map length in pages
1080  * @auxtrace_overwrite - overwrite older auxtrace data?
1081  *
1082  * If @overwrite is %false the user needs to signal event consumption using
1083  * perf_mmap__write_tail().  Using perf_evlist__mmap_read() does this
1084  * automatically.
1085  *
1086  * Similarly, if @auxtrace_overwrite is %false the user needs to signal data
1087  * consumption using auxtrace_mmap__write_tail().
1088  *
1089  * Return: %0 on success, negative error code otherwise.
1090  */
1091 int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages,
1092                          bool overwrite, unsigned int auxtrace_pages,
1093                          bool auxtrace_overwrite)
1094 {
1095         struct perf_evsel *evsel;
1096         const struct cpu_map *cpus = evlist->cpus;
1097         const struct thread_map *threads = evlist->threads;
1098         struct mmap_params mp = {
1099                 .prot = PROT_READ | (overwrite ? 0 : PROT_WRITE),
1100         };
1101
1102         if (evlist->mmap == NULL && perf_evlist__alloc_mmap(evlist) < 0)
1103                 return -ENOMEM;
1104
1105         if (evlist->pollfd.entries == NULL && perf_evlist__alloc_pollfd(evlist) < 0)
1106                 return -ENOMEM;
1107
1108         evlist->overwrite = overwrite;
1109         evlist->mmap_len = perf_evlist__mmap_size(pages);
1110         pr_debug("mmap size %zuB\n", evlist->mmap_len);
1111         mp.mask = evlist->mmap_len - page_size - 1;
1112
1113         auxtrace_mmap_params__init(&mp.auxtrace_mp, evlist->mmap_len,
1114                                    auxtrace_pages, auxtrace_overwrite);
1115
1116         evlist__for_each(evlist, evsel) {
1117                 if ((evsel->attr.read_format & PERF_FORMAT_ID) &&
1118                     evsel->sample_id == NULL &&
1119                     perf_evsel__alloc_id(evsel, cpu_map__nr(cpus), threads->nr) < 0)
1120                         return -ENOMEM;
1121         }
1122
1123         if (cpu_map__empty(cpus))
1124                 return perf_evlist__mmap_per_thread(evlist, &mp);
1125
1126         return perf_evlist__mmap_per_cpu(evlist, &mp);
1127 }
1128
1129 int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages,
1130                       bool overwrite)
1131 {
1132         return perf_evlist__mmap_ex(evlist, pages, overwrite, 0, false);
1133 }
1134
1135 int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target)
1136 {
1137         struct cpu_map *cpus;
1138         struct thread_map *threads;
1139
1140         threads = thread_map__new_str(target->pid, target->tid, target->uid);
1141
1142         if (!threads)
1143                 return -1;
1144
1145         if (target__uses_dummy_map(target))
1146                 cpus = cpu_map__dummy_new();
1147         else
1148                 cpus = cpu_map__new(target->cpu_list);
1149
1150         if (!cpus)
1151                 goto out_delete_threads;
1152
1153         evlist->has_user_cpus = !!target->cpu_list;
1154
1155         perf_evlist__set_maps(evlist, cpus, threads);
1156
1157         return 0;
1158
1159 out_delete_threads:
1160         thread_map__put(threads);
1161         return -1;
1162 }
1163
1164 void perf_evlist__set_maps(struct perf_evlist *evlist, struct cpu_map *cpus,
1165                            struct thread_map *threads)
1166 {
1167         /*
1168          * Allow for the possibility that one or another of the maps isn't being
1169          * changed i.e. don't put it.  Note we are assuming the maps that are
1170          * being applied are brand new and evlist is taking ownership of the
1171          * original reference count of 1.  If that is not the case it is up to
1172          * the caller to increase the reference count.
1173          */
1174         if (cpus != evlist->cpus) {
1175                 cpu_map__put(evlist->cpus);
1176                 evlist->cpus = cpus;
1177         }
1178
1179         if (threads != evlist->threads) {
1180                 thread_map__put(evlist->threads);
1181                 evlist->threads = threads;
1182         }
1183
1184         perf_evlist__propagate_maps(evlist);
1185 }
1186
1187 int perf_evlist__apply_filters(struct perf_evlist *evlist, struct perf_evsel **err_evsel)
1188 {
1189         struct perf_evsel *evsel;
1190         int err = 0;
1191         const int ncpus = cpu_map__nr(evlist->cpus),
1192                   nthreads = thread_map__nr(evlist->threads);
1193
1194         evlist__for_each(evlist, evsel) {
1195                 if (evsel->filter == NULL)
1196                         continue;
1197
1198                 /*
1199                  * filters only work for tracepoint event, which doesn't have cpu limit.
1200                  * So evlist and evsel should always be same.
1201                  */
1202                 err = perf_evsel__apply_filter(evsel, ncpus, nthreads, evsel->filter);
1203                 if (err) {
1204                         *err_evsel = evsel;
1205                         break;
1206                 }
1207         }
1208
1209         return err;
1210 }
1211
1212 int perf_evlist__set_filter(struct perf_evlist *evlist, const char *filter)
1213 {
1214         struct perf_evsel *evsel;
1215         int err = 0;
1216
1217         evlist__for_each(evlist, evsel) {
1218                 err = perf_evsel__set_filter(evsel, filter);
1219                 if (err)
1220                         break;
1221         }
1222
1223         return err;
1224 }
1225
1226 int perf_evlist__set_filter_pids(struct perf_evlist *evlist, size_t npids, pid_t *pids)
1227 {
1228         char *filter;
1229         int ret = -1;
1230         size_t i;
1231
1232         for (i = 0; i < npids; ++i) {
1233                 if (i == 0) {
1234                         if (asprintf(&filter, "common_pid != %d", pids[i]) < 0)
1235                                 return -1;
1236                 } else {
1237                         char *tmp;
1238
1239                         if (asprintf(&tmp, "%s && common_pid != %d", filter, pids[i]) < 0)
1240                                 goto out_free;
1241
1242                         free(filter);
1243                         filter = tmp;
1244                 }
1245         }
1246
1247         ret = perf_evlist__set_filter(evlist, filter);
1248 out_free:
1249         free(filter);
1250         return ret;
1251 }
1252
1253 int perf_evlist__set_filter_pid(struct perf_evlist *evlist, pid_t pid)
1254 {
1255         return perf_evlist__set_filter_pids(evlist, 1, &pid);
1256 }
1257
1258 bool perf_evlist__valid_sample_type(struct perf_evlist *evlist)
1259 {
1260         struct perf_evsel *pos;
1261
1262         if (evlist->nr_entries == 1)
1263                 return true;
1264
1265         if (evlist->id_pos < 0 || evlist->is_pos < 0)
1266                 return false;
1267
1268         evlist__for_each(evlist, pos) {
1269                 if (pos->id_pos != evlist->id_pos ||
1270                     pos->is_pos != evlist->is_pos)
1271                         return false;
1272         }
1273
1274         return true;
1275 }
1276
1277 u64 __perf_evlist__combined_sample_type(struct perf_evlist *evlist)
1278 {
1279         struct perf_evsel *evsel;
1280
1281         if (evlist->combined_sample_type)
1282                 return evlist->combined_sample_type;
1283
1284         evlist__for_each(evlist, evsel)
1285                 evlist->combined_sample_type |= evsel->attr.sample_type;
1286
1287         return evlist->combined_sample_type;
1288 }
1289
1290 u64 perf_evlist__combined_sample_type(struct perf_evlist *evlist)
1291 {
1292         evlist->combined_sample_type = 0;
1293         return __perf_evlist__combined_sample_type(evlist);
1294 }
1295
1296 u64 perf_evlist__combined_branch_type(struct perf_evlist *evlist)
1297 {
1298         struct perf_evsel *evsel;
1299         u64 branch_type = 0;
1300
1301         evlist__for_each(evlist, evsel)
1302                 branch_type |= evsel->attr.branch_sample_type;
1303         return branch_type;
1304 }
1305
1306 bool perf_evlist__valid_read_format(struct perf_evlist *evlist)
1307 {
1308         struct perf_evsel *first = perf_evlist__first(evlist), *pos = first;
1309         u64 read_format = first->attr.read_format;
1310         u64 sample_type = first->attr.sample_type;
1311
1312         evlist__for_each(evlist, pos) {
1313                 if (read_format != pos->attr.read_format)
1314                         return false;
1315         }
1316
1317         /* PERF_SAMPLE_READ imples PERF_FORMAT_ID. */
1318         if ((sample_type & PERF_SAMPLE_READ) &&
1319             !(read_format & PERF_FORMAT_ID)) {
1320                 return false;
1321         }
1322
1323         return true;
1324 }
1325
1326 u64 perf_evlist__read_format(struct perf_evlist *evlist)
1327 {
1328         struct perf_evsel *first = perf_evlist__first(evlist);
1329         return first->attr.read_format;
1330 }
1331
1332 u16 perf_evlist__id_hdr_size(struct perf_evlist *evlist)
1333 {
1334         struct perf_evsel *first = perf_evlist__first(evlist);
1335         struct perf_sample *data;
1336         u64 sample_type;
1337         u16 size = 0;
1338
1339         if (!first->attr.sample_id_all)
1340                 goto out;
1341
1342         sample_type = first->attr.sample_type;
1343
1344         if (sample_type & PERF_SAMPLE_TID)
1345                 size += sizeof(data->tid) * 2;
1346
1347        if (sample_type & PERF_SAMPLE_TIME)
1348                 size += sizeof(data->time);
1349
1350         if (sample_type & PERF_SAMPLE_ID)
1351                 size += sizeof(data->id);
1352
1353         if (sample_type & PERF_SAMPLE_STREAM_ID)
1354                 size += sizeof(data->stream_id);
1355
1356         if (sample_type & PERF_SAMPLE_CPU)
1357                 size += sizeof(data->cpu) * 2;
1358
1359         if (sample_type & PERF_SAMPLE_IDENTIFIER)
1360                 size += sizeof(data->id);
1361 out:
1362         return size;
1363 }
1364
1365 bool perf_evlist__valid_sample_id_all(struct perf_evlist *evlist)
1366 {
1367         struct perf_evsel *first = perf_evlist__first(evlist), *pos = first;
1368
1369         evlist__for_each_continue(evlist, pos) {
1370                 if (first->attr.sample_id_all != pos->attr.sample_id_all)
1371                         return false;
1372         }
1373
1374         return true;
1375 }
1376
1377 bool perf_evlist__sample_id_all(struct perf_evlist *evlist)
1378 {
1379         struct perf_evsel *first = perf_evlist__first(evlist);
1380         return first->attr.sample_id_all;
1381 }
1382
1383 void perf_evlist__set_selected(struct perf_evlist *evlist,
1384                                struct perf_evsel *evsel)
1385 {
1386         evlist->selected = evsel;
1387 }
1388
1389 void perf_evlist__close(struct perf_evlist *evlist)
1390 {
1391         struct perf_evsel *evsel;
1392         int ncpus = cpu_map__nr(evlist->cpus);
1393         int nthreads = thread_map__nr(evlist->threads);
1394         int n;
1395
1396         evlist__for_each_reverse(evlist, evsel) {
1397                 n = evsel->cpus ? evsel->cpus->nr : ncpus;
1398                 perf_evsel__close(evsel, n, nthreads);
1399         }
1400 }
1401
1402 static int perf_evlist__create_syswide_maps(struct perf_evlist *evlist)
1403 {
1404         struct cpu_map    *cpus;
1405         struct thread_map *threads;
1406         int err = -ENOMEM;
1407
1408         /*
1409          * Try reading /sys/devices/system/cpu/online to get
1410          * an all cpus map.
1411          *
1412          * FIXME: -ENOMEM is the best we can do here, the cpu_map
1413          * code needs an overhaul to properly forward the
1414          * error, and we may not want to do that fallback to a
1415          * default cpu identity map :-\
1416          */
1417         cpus = cpu_map__new(NULL);
1418         if (!cpus)
1419                 goto out;
1420
1421         threads = thread_map__new_dummy();
1422         if (!threads)
1423                 goto out_put;
1424
1425         perf_evlist__set_maps(evlist, cpus, threads);
1426 out:
1427         return err;
1428 out_put:
1429         cpu_map__put(cpus);
1430         goto out;
1431 }
1432
1433 int perf_evlist__open(struct perf_evlist *evlist)
1434 {
1435         struct perf_evsel *evsel;
1436         int err;
1437
1438         /*
1439          * Default: one fd per CPU, all threads, aka systemwide
1440          * as sys_perf_event_open(cpu = -1, thread = -1) is EINVAL
1441          */
1442         if (evlist->threads == NULL && evlist->cpus == NULL) {
1443                 err = perf_evlist__create_syswide_maps(evlist);
1444                 if (err < 0)
1445                         goto out_err;
1446         }
1447
1448         perf_evlist__update_id_pos(evlist);
1449
1450         evlist__for_each(evlist, evsel) {
1451                 err = perf_evsel__open(evsel, evlist->cpus, evlist->threads);
1452                 if (err < 0)
1453                         goto out_err;
1454         }
1455
1456         return 0;
1457 out_err:
1458         perf_evlist__close(evlist);
1459         errno = -err;
1460         return err;
1461 }
1462
1463 int perf_evlist__prepare_workload(struct perf_evlist *evlist, struct target *target,
1464                                   const char *argv[], bool pipe_output,
1465                                   void (*exec_error)(int signo, siginfo_t *info, void *ucontext))
1466 {
1467         int child_ready_pipe[2], go_pipe[2];
1468         char bf;
1469
1470         if (pipe(child_ready_pipe) < 0) {
1471                 perror("failed to create 'ready' pipe");
1472                 return -1;
1473         }
1474
1475         if (pipe(go_pipe) < 0) {
1476                 perror("failed to create 'go' pipe");
1477                 goto out_close_ready_pipe;
1478         }
1479
1480         evlist->workload.pid = fork();
1481         if (evlist->workload.pid < 0) {
1482                 perror("failed to fork");
1483                 goto out_close_pipes;
1484         }
1485
1486         if (!evlist->workload.pid) {
1487                 int ret;
1488
1489                 if (pipe_output)
1490                         dup2(2, 1);
1491
1492                 signal(SIGTERM, SIG_DFL);
1493
1494                 close(child_ready_pipe[0]);
1495                 close(go_pipe[1]);
1496                 fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC);
1497
1498                 /*
1499                  * Tell the parent we're ready to go
1500                  */
1501                 close(child_ready_pipe[1]);
1502
1503                 /*
1504                  * Wait until the parent tells us to go.
1505                  */
1506                 ret = read(go_pipe[0], &bf, 1);
1507                 /*
1508                  * The parent will ask for the execvp() to be performed by
1509                  * writing exactly one byte, in workload.cork_fd, usually via
1510                  * perf_evlist__start_workload().
1511                  *
1512                  * For cancelling the workload without actually running it,
1513                  * the parent will just close workload.cork_fd, without writing
1514                  * anything, i.e. read will return zero and we just exit()
1515                  * here.
1516                  */
1517                 if (ret != 1) {
1518                         if (ret == -1)
1519                                 perror("unable to read pipe");
1520                         exit(ret);
1521                 }
1522
1523                 execvp(argv[0], (char **)argv);
1524
1525                 if (exec_error) {
1526                         union sigval val;
1527
1528                         val.sival_int = errno;
1529                         if (sigqueue(getppid(), SIGUSR1, val))
1530                                 perror(argv[0]);
1531                 } else
1532                         perror(argv[0]);
1533                 exit(-1);
1534         }
1535
1536         if (exec_error) {
1537                 struct sigaction act = {
1538                         .sa_flags     = SA_SIGINFO,
1539                         .sa_sigaction = exec_error,
1540                 };
1541                 sigaction(SIGUSR1, &act, NULL);
1542         }
1543
1544         if (target__none(target)) {
1545                 if (evlist->threads == NULL) {
1546                         fprintf(stderr, "FATAL: evlist->threads need to be set at this point (%s:%d).\n",
1547                                 __func__, __LINE__);
1548                         goto out_close_pipes;
1549                 }
1550                 thread_map__set_pid(evlist->threads, 0, evlist->workload.pid);
1551         }
1552
1553         close(child_ready_pipe[1]);
1554         close(go_pipe[0]);
1555         /*
1556          * wait for child to settle
1557          */
1558         if (read(child_ready_pipe[0], &bf, 1) == -1) {
1559                 perror("unable to read pipe");
1560                 goto out_close_pipes;
1561         }
1562
1563         fcntl(go_pipe[1], F_SETFD, FD_CLOEXEC);
1564         evlist->workload.cork_fd = go_pipe[1];
1565         close(child_ready_pipe[0]);
1566         return 0;
1567
1568 out_close_pipes:
1569         close(go_pipe[0]);
1570         close(go_pipe[1]);
1571 out_close_ready_pipe:
1572         close(child_ready_pipe[0]);
1573         close(child_ready_pipe[1]);
1574         return -1;
1575 }
1576
1577 int perf_evlist__start_workload(struct perf_evlist *evlist)
1578 {
1579         if (evlist->workload.cork_fd > 0) {
1580                 char bf = 0;
1581                 int ret;
1582                 /*
1583                  * Remove the cork, let it rip!
1584                  */
1585                 ret = write(evlist->workload.cork_fd, &bf, 1);
1586                 if (ret < 0)
1587                         perror("enable to write to pipe");
1588
1589                 close(evlist->workload.cork_fd);
1590                 return ret;
1591         }
1592
1593         return 0;
1594 }
1595
1596 int perf_evlist__parse_sample(struct perf_evlist *evlist, union perf_event *event,
1597                               struct perf_sample *sample)
1598 {
1599         struct perf_evsel *evsel = perf_evlist__event2evsel(evlist, event);
1600
1601         if (!evsel)
1602                 return -EFAULT;
1603         return perf_evsel__parse_sample(evsel, event, sample);
1604 }
1605
1606 size_t perf_evlist__fprintf(struct perf_evlist *evlist, FILE *fp)
1607 {
1608         struct perf_evsel *evsel;
1609         size_t printed = 0;
1610
1611         evlist__for_each(evlist, evsel) {
1612                 printed += fprintf(fp, "%s%s", evsel->idx ? ", " : "",
1613                                    perf_evsel__name(evsel));
1614         }
1615
1616         return printed + fprintf(fp, "\n");
1617 }
1618
1619 int perf_evlist__strerror_open(struct perf_evlist *evlist __maybe_unused,
1620                                int err, char *buf, size_t size)
1621 {
1622         int printed, value;
1623         char sbuf[STRERR_BUFSIZE], *emsg = strerror_r(err, sbuf, sizeof(sbuf));
1624
1625         switch (err) {
1626         case EACCES:
1627         case EPERM:
1628                 printed = scnprintf(buf, size,
1629                                     "Error:\t%s.\n"
1630                                     "Hint:\tCheck /proc/sys/kernel/perf_event_paranoid setting.", emsg);
1631
1632                 value = perf_event_paranoid();
1633
1634                 printed += scnprintf(buf + printed, size - printed, "\nHint:\t");
1635
1636                 if (value >= 2) {
1637                         printed += scnprintf(buf + printed, size - printed,
1638                                              "For your workloads it needs to be <= 1\nHint:\t");
1639                 }
1640                 printed += scnprintf(buf + printed, size - printed,
1641                                      "For system wide tracing it needs to be set to -1.\n");
1642
1643                 printed += scnprintf(buf + printed, size - printed,
1644                                     "Hint:\tTry: 'sudo sh -c \"echo -1 > /proc/sys/kernel/perf_event_paranoid\"'\n"
1645                                     "Hint:\tThe current value is %d.", value);
1646                 break;
1647         default:
1648                 scnprintf(buf, size, "%s", emsg);
1649                 break;
1650         }
1651
1652         return 0;
1653 }
1654
1655 int perf_evlist__strerror_mmap(struct perf_evlist *evlist, int err, char *buf, size_t size)
1656 {
1657         char sbuf[STRERR_BUFSIZE], *emsg = strerror_r(err, sbuf, sizeof(sbuf));
1658         int pages_attempted = evlist->mmap_len / 1024, pages_max_per_user, printed = 0;
1659
1660         switch (err) {
1661         case EPERM:
1662                 sysctl__read_int("kernel/perf_event_mlock_kb", &pages_max_per_user);
1663                 printed += scnprintf(buf + printed, size - printed,
1664                                      "Error:\t%s.\n"
1665                                      "Hint:\tCheck /proc/sys/kernel/perf_event_mlock_kb (%d kB) setting.\n"
1666                                      "Hint:\tTried using %zd kB.\n",
1667                                      emsg, pages_max_per_user, pages_attempted);
1668
1669                 if (pages_attempted >= pages_max_per_user) {
1670                         printed += scnprintf(buf + printed, size - printed,
1671                                              "Hint:\tTry 'sudo sh -c \"echo %d > /proc/sys/kernel/perf_event_mlock_kb\"', or\n",
1672                                              pages_max_per_user + pages_attempted);
1673                 }
1674
1675                 printed += scnprintf(buf + printed, size - printed,
1676                                      "Hint:\tTry using a smaller -m/--mmap-pages value.");
1677                 break;
1678         default:
1679                 scnprintf(buf, size, "%s", emsg);
1680                 break;
1681         }
1682
1683         return 0;
1684 }
1685
1686 void perf_evlist__to_front(struct perf_evlist *evlist,
1687                            struct perf_evsel *move_evsel)
1688 {
1689         struct perf_evsel *evsel, *n;
1690         LIST_HEAD(move);
1691
1692         if (move_evsel == perf_evlist__first(evlist))
1693                 return;
1694
1695         evlist__for_each_safe(evlist, n, evsel) {
1696                 if (evsel->leader == move_evsel->leader)
1697                         list_move_tail(&evsel->node, &move);
1698         }
1699
1700         list_splice(&move, &evlist->entries);
1701 }
1702
1703 void perf_evlist__set_tracking_event(struct perf_evlist *evlist,
1704                                      struct perf_evsel *tracking_evsel)
1705 {
1706         struct perf_evsel *evsel;
1707
1708         if (tracking_evsel->tracking)
1709                 return;
1710
1711         evlist__for_each(evlist, evsel) {
1712                 if (evsel != tracking_evsel)
1713                         evsel->tracking = false;
1714         }
1715
1716         tracking_evsel->tracking = true;
1717 }