Merge branch 'acpi-bus'
[cascardo/linux.git] / arch / x86 / events / amd / uncore.c
1 /*
2  * Copyright (C) 2013 Advanced Micro Devices, Inc.
3  *
4  * Author: Jacob Shin <jacob.shin@amd.com>
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 as
8  * published by the Free Software Foundation.
9  */
10
11 #include <linux/perf_event.h>
12 #include <linux/percpu.h>
13 #include <linux/types.h>
14 #include <linux/slab.h>
15 #include <linux/init.h>
16 #include <linux/cpu.h>
17 #include <linux/cpumask.h>
18
19 #include <asm/cpufeature.h>
20 #include <asm/perf_event.h>
21 #include <asm/msr.h>
22
23 #define NUM_COUNTERS_NB         4
24 #define NUM_COUNTERS_L2         4
25 #define MAX_COUNTERS            NUM_COUNTERS_NB
26
27 #define RDPMC_BASE_NB           6
28 #define RDPMC_BASE_L2           10
29
30 #define COUNTER_SHIFT           16
31
32 static HLIST_HEAD(uncore_unused_list);
33
34 struct amd_uncore {
35         int id;
36         int refcnt;
37         int cpu;
38         int num_counters;
39         int rdpmc_base;
40         u32 msr_base;
41         cpumask_t *active_mask;
42         struct pmu *pmu;
43         struct perf_event *events[MAX_COUNTERS];
44         struct hlist_node node;
45 };
46
47 static struct amd_uncore * __percpu *amd_uncore_nb;
48 static struct amd_uncore * __percpu *amd_uncore_l2;
49
50 static struct pmu amd_nb_pmu;
51 static struct pmu amd_l2_pmu;
52
53 static cpumask_t amd_nb_active_mask;
54 static cpumask_t amd_l2_active_mask;
55
56 static bool is_nb_event(struct perf_event *event)
57 {
58         return event->pmu->type == amd_nb_pmu.type;
59 }
60
61 static bool is_l2_event(struct perf_event *event)
62 {
63         return event->pmu->type == amd_l2_pmu.type;
64 }
65
66 static struct amd_uncore *event_to_amd_uncore(struct perf_event *event)
67 {
68         if (is_nb_event(event) && amd_uncore_nb)
69                 return *per_cpu_ptr(amd_uncore_nb, event->cpu);
70         else if (is_l2_event(event) && amd_uncore_l2)
71                 return *per_cpu_ptr(amd_uncore_l2, event->cpu);
72
73         return NULL;
74 }
75
76 static void amd_uncore_read(struct perf_event *event)
77 {
78         struct hw_perf_event *hwc = &event->hw;
79         u64 prev, new;
80         s64 delta;
81
82         /*
83          * since we do not enable counter overflow interrupts,
84          * we do not have to worry about prev_count changing on us
85          */
86
87         prev = local64_read(&hwc->prev_count);
88         rdpmcl(hwc->event_base_rdpmc, new);
89         local64_set(&hwc->prev_count, new);
90         delta = (new << COUNTER_SHIFT) - (prev << COUNTER_SHIFT);
91         delta >>= COUNTER_SHIFT;
92         local64_add(delta, &event->count);
93 }
94
95 static void amd_uncore_start(struct perf_event *event, int flags)
96 {
97         struct hw_perf_event *hwc = &event->hw;
98
99         if (flags & PERF_EF_RELOAD)
100                 wrmsrl(hwc->event_base, (u64)local64_read(&hwc->prev_count));
101
102         hwc->state = 0;
103         wrmsrl(hwc->config_base, (hwc->config | ARCH_PERFMON_EVENTSEL_ENABLE));
104         perf_event_update_userpage(event);
105 }
106
107 static void amd_uncore_stop(struct perf_event *event, int flags)
108 {
109         struct hw_perf_event *hwc = &event->hw;
110
111         wrmsrl(hwc->config_base, hwc->config);
112         hwc->state |= PERF_HES_STOPPED;
113
114         if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
115                 amd_uncore_read(event);
116                 hwc->state |= PERF_HES_UPTODATE;
117         }
118 }
119
120 static int amd_uncore_add(struct perf_event *event, int flags)
121 {
122         int i;
123         struct amd_uncore *uncore = event_to_amd_uncore(event);
124         struct hw_perf_event *hwc = &event->hw;
125
126         /* are we already assigned? */
127         if (hwc->idx != -1 && uncore->events[hwc->idx] == event)
128                 goto out;
129
130         for (i = 0; i < uncore->num_counters; i++) {
131                 if (uncore->events[i] == event) {
132                         hwc->idx = i;
133                         goto out;
134                 }
135         }
136
137         /* if not, take the first available counter */
138         hwc->idx = -1;
139         for (i = 0; i < uncore->num_counters; i++) {
140                 if (cmpxchg(&uncore->events[i], NULL, event) == NULL) {
141                         hwc->idx = i;
142                         break;
143                 }
144         }
145
146 out:
147         if (hwc->idx == -1)
148                 return -EBUSY;
149
150         hwc->config_base = uncore->msr_base + (2 * hwc->idx);
151         hwc->event_base = uncore->msr_base + 1 + (2 * hwc->idx);
152         hwc->event_base_rdpmc = uncore->rdpmc_base + hwc->idx;
153         hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
154
155         if (flags & PERF_EF_START)
156                 amd_uncore_start(event, PERF_EF_RELOAD);
157
158         return 0;
159 }
160
161 static void amd_uncore_del(struct perf_event *event, int flags)
162 {
163         int i;
164         struct amd_uncore *uncore = event_to_amd_uncore(event);
165         struct hw_perf_event *hwc = &event->hw;
166
167         amd_uncore_stop(event, PERF_EF_UPDATE);
168
169         for (i = 0; i < uncore->num_counters; i++) {
170                 if (cmpxchg(&uncore->events[i], event, NULL) == event)
171                         break;
172         }
173
174         hwc->idx = -1;
175 }
176
177 static int amd_uncore_event_init(struct perf_event *event)
178 {
179         struct amd_uncore *uncore;
180         struct hw_perf_event *hwc = &event->hw;
181
182         if (event->attr.type != event->pmu->type)
183                 return -ENOENT;
184
185         /*
186          * NB and L2 counters (MSRs) are shared across all cores that share the
187          * same NB / L2 cache. Interrupts can be directed to a single target
188          * core, however, event counts generated by processes running on other
189          * cores cannot be masked out. So we do not support sampling and
190          * per-thread events.
191          */
192         if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK)
193                 return -EINVAL;
194
195         /* NB and L2 counters do not have usr/os/guest/host bits */
196         if (event->attr.exclude_user || event->attr.exclude_kernel ||
197             event->attr.exclude_host || event->attr.exclude_guest)
198                 return -EINVAL;
199
200         /* and we do not enable counter overflow interrupts */
201         hwc->config = event->attr.config & AMD64_RAW_EVENT_MASK_NB;
202         hwc->idx = -1;
203
204         if (event->cpu < 0)
205                 return -EINVAL;
206
207         uncore = event_to_amd_uncore(event);
208         if (!uncore)
209                 return -ENODEV;
210
211         /*
212          * since request can come in to any of the shared cores, we will remap
213          * to a single common cpu.
214          */
215         event->cpu = uncore->cpu;
216
217         return 0;
218 }
219
220 static ssize_t amd_uncore_attr_show_cpumask(struct device *dev,
221                                             struct device_attribute *attr,
222                                             char *buf)
223 {
224         cpumask_t *active_mask;
225         struct pmu *pmu = dev_get_drvdata(dev);
226
227         if (pmu->type == amd_nb_pmu.type)
228                 active_mask = &amd_nb_active_mask;
229         else if (pmu->type == amd_l2_pmu.type)
230                 active_mask = &amd_l2_active_mask;
231         else
232                 return 0;
233
234         return cpumap_print_to_pagebuf(true, buf, active_mask);
235 }
236 static DEVICE_ATTR(cpumask, S_IRUGO, amd_uncore_attr_show_cpumask, NULL);
237
238 static struct attribute *amd_uncore_attrs[] = {
239         &dev_attr_cpumask.attr,
240         NULL,
241 };
242
243 static struct attribute_group amd_uncore_attr_group = {
244         .attrs = amd_uncore_attrs,
245 };
246
247 PMU_FORMAT_ATTR(event, "config:0-7,32-35");
248 PMU_FORMAT_ATTR(umask, "config:8-15");
249
250 static struct attribute *amd_uncore_format_attr[] = {
251         &format_attr_event.attr,
252         &format_attr_umask.attr,
253         NULL,
254 };
255
256 static struct attribute_group amd_uncore_format_group = {
257         .name = "format",
258         .attrs = amd_uncore_format_attr,
259 };
260
261 static const struct attribute_group *amd_uncore_attr_groups[] = {
262         &amd_uncore_attr_group,
263         &amd_uncore_format_group,
264         NULL,
265 };
266
267 static struct pmu amd_nb_pmu = {
268         .task_ctx_nr    = perf_invalid_context,
269         .attr_groups    = amd_uncore_attr_groups,
270         .name           = "amd_nb",
271         .event_init     = amd_uncore_event_init,
272         .add            = amd_uncore_add,
273         .del            = amd_uncore_del,
274         .start          = amd_uncore_start,
275         .stop           = amd_uncore_stop,
276         .read           = amd_uncore_read,
277 };
278
279 static struct pmu amd_l2_pmu = {
280         .task_ctx_nr    = perf_invalid_context,
281         .attr_groups    = amd_uncore_attr_groups,
282         .name           = "amd_l2",
283         .event_init     = amd_uncore_event_init,
284         .add            = amd_uncore_add,
285         .del            = amd_uncore_del,
286         .start          = amd_uncore_start,
287         .stop           = amd_uncore_stop,
288         .read           = amd_uncore_read,
289 };
290
291 static struct amd_uncore *amd_uncore_alloc(unsigned int cpu)
292 {
293         return kzalloc_node(sizeof(struct amd_uncore), GFP_KERNEL,
294                         cpu_to_node(cpu));
295 }
296
297 static int amd_uncore_cpu_up_prepare(unsigned int cpu)
298 {
299         struct amd_uncore *uncore_nb = NULL, *uncore_l2;
300
301         if (amd_uncore_nb) {
302                 uncore_nb = amd_uncore_alloc(cpu);
303                 if (!uncore_nb)
304                         goto fail;
305                 uncore_nb->cpu = cpu;
306                 uncore_nb->num_counters = NUM_COUNTERS_NB;
307                 uncore_nb->rdpmc_base = RDPMC_BASE_NB;
308                 uncore_nb->msr_base = MSR_F15H_NB_PERF_CTL;
309                 uncore_nb->active_mask = &amd_nb_active_mask;
310                 uncore_nb->pmu = &amd_nb_pmu;
311                 uncore_nb->id = -1;
312                 *per_cpu_ptr(amd_uncore_nb, cpu) = uncore_nb;
313         }
314
315         if (amd_uncore_l2) {
316                 uncore_l2 = amd_uncore_alloc(cpu);
317                 if (!uncore_l2)
318                         goto fail;
319                 uncore_l2->cpu = cpu;
320                 uncore_l2->num_counters = NUM_COUNTERS_L2;
321                 uncore_l2->rdpmc_base = RDPMC_BASE_L2;
322                 uncore_l2->msr_base = MSR_F16H_L2I_PERF_CTL;
323                 uncore_l2->active_mask = &amd_l2_active_mask;
324                 uncore_l2->pmu = &amd_l2_pmu;
325                 uncore_l2->id = -1;
326                 *per_cpu_ptr(amd_uncore_l2, cpu) = uncore_l2;
327         }
328
329         return 0;
330
331 fail:
332         if (amd_uncore_nb)
333                 *per_cpu_ptr(amd_uncore_nb, cpu) = NULL;
334         kfree(uncore_nb);
335         return -ENOMEM;
336 }
337
338 static struct amd_uncore *
339 amd_uncore_find_online_sibling(struct amd_uncore *this,
340                                struct amd_uncore * __percpu *uncores)
341 {
342         unsigned int cpu;
343         struct amd_uncore *that;
344
345         for_each_online_cpu(cpu) {
346                 that = *per_cpu_ptr(uncores, cpu);
347
348                 if (!that)
349                         continue;
350
351                 if (this == that)
352                         continue;
353
354                 if (this->id == that->id) {
355                         hlist_add_head(&this->node, &uncore_unused_list);
356                         this = that;
357                         break;
358                 }
359         }
360
361         this->refcnt++;
362         return this;
363 }
364
365 static int amd_uncore_cpu_starting(unsigned int cpu)
366 {
367         unsigned int eax, ebx, ecx, edx;
368         struct amd_uncore *uncore;
369
370         if (amd_uncore_nb) {
371                 uncore = *per_cpu_ptr(amd_uncore_nb, cpu);
372                 cpuid(0x8000001e, &eax, &ebx, &ecx, &edx);
373                 uncore->id = ecx & 0xff;
374
375                 uncore = amd_uncore_find_online_sibling(uncore, amd_uncore_nb);
376                 *per_cpu_ptr(amd_uncore_nb, cpu) = uncore;
377         }
378
379         if (amd_uncore_l2) {
380                 unsigned int apicid = cpu_data(cpu).apicid;
381                 unsigned int nshared;
382
383                 uncore = *per_cpu_ptr(amd_uncore_l2, cpu);
384                 cpuid_count(0x8000001d, 2, &eax, &ebx, &ecx, &edx);
385                 nshared = ((eax >> 14) & 0xfff) + 1;
386                 uncore->id = apicid - (apicid % nshared);
387
388                 uncore = amd_uncore_find_online_sibling(uncore, amd_uncore_l2);
389                 *per_cpu_ptr(amd_uncore_l2, cpu) = uncore;
390         }
391
392         return 0;
393 }
394
395 static void uncore_clean_online(void)
396 {
397         struct amd_uncore *uncore;
398         struct hlist_node *n;
399
400         hlist_for_each_entry_safe(uncore, n, &uncore_unused_list, node) {
401                 hlist_del(&uncore->node);
402                 kfree(uncore);
403         }
404 }
405
406 static void uncore_online(unsigned int cpu,
407                           struct amd_uncore * __percpu *uncores)
408 {
409         struct amd_uncore *uncore = *per_cpu_ptr(uncores, cpu);
410
411         uncore_clean_online();
412
413         if (cpu == uncore->cpu)
414                 cpumask_set_cpu(cpu, uncore->active_mask);
415 }
416
417 static int amd_uncore_cpu_online(unsigned int cpu)
418 {
419         if (amd_uncore_nb)
420                 uncore_online(cpu, amd_uncore_nb);
421
422         if (amd_uncore_l2)
423                 uncore_online(cpu, amd_uncore_l2);
424
425         return 0;
426 }
427
428 static void uncore_down_prepare(unsigned int cpu,
429                                 struct amd_uncore * __percpu *uncores)
430 {
431         unsigned int i;
432         struct amd_uncore *this = *per_cpu_ptr(uncores, cpu);
433
434         if (this->cpu != cpu)
435                 return;
436
437         /* this cpu is going down, migrate to a shared sibling if possible */
438         for_each_online_cpu(i) {
439                 struct amd_uncore *that = *per_cpu_ptr(uncores, i);
440
441                 if (cpu == i)
442                         continue;
443
444                 if (this == that) {
445                         perf_pmu_migrate_context(this->pmu, cpu, i);
446                         cpumask_clear_cpu(cpu, that->active_mask);
447                         cpumask_set_cpu(i, that->active_mask);
448                         that->cpu = i;
449                         break;
450                 }
451         }
452 }
453
454 static int amd_uncore_cpu_down_prepare(unsigned int cpu)
455 {
456         if (amd_uncore_nb)
457                 uncore_down_prepare(cpu, amd_uncore_nb);
458
459         if (amd_uncore_l2)
460                 uncore_down_prepare(cpu, amd_uncore_l2);
461
462         return 0;
463 }
464
465 static void uncore_dead(unsigned int cpu, struct amd_uncore * __percpu *uncores)
466 {
467         struct amd_uncore *uncore = *per_cpu_ptr(uncores, cpu);
468
469         if (cpu == uncore->cpu)
470                 cpumask_clear_cpu(cpu, uncore->active_mask);
471
472         if (!--uncore->refcnt)
473                 kfree(uncore);
474         *per_cpu_ptr(uncores, cpu) = NULL;
475 }
476
477 static int amd_uncore_cpu_dead(unsigned int cpu)
478 {
479         if (amd_uncore_nb)
480                 uncore_dead(cpu, amd_uncore_nb);
481
482         if (amd_uncore_l2)
483                 uncore_dead(cpu, amd_uncore_l2);
484
485         return 0;
486 }
487
488 static int __init amd_uncore_init(void)
489 {
490         int ret = -ENODEV;
491
492         if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
493                 goto fail_nodev;
494
495         if (!boot_cpu_has(X86_FEATURE_TOPOEXT))
496                 goto fail_nodev;
497
498         if (boot_cpu_has(X86_FEATURE_PERFCTR_NB)) {
499                 amd_uncore_nb = alloc_percpu(struct amd_uncore *);
500                 if (!amd_uncore_nb) {
501                         ret = -ENOMEM;
502                         goto fail_nb;
503                 }
504                 ret = perf_pmu_register(&amd_nb_pmu, amd_nb_pmu.name, -1);
505                 if (ret)
506                         goto fail_nb;
507
508                 pr_info("perf: AMD NB counters detected\n");
509                 ret = 0;
510         }
511
512         if (boot_cpu_has(X86_FEATURE_PERFCTR_L2)) {
513                 amd_uncore_l2 = alloc_percpu(struct amd_uncore *);
514                 if (!amd_uncore_l2) {
515                         ret = -ENOMEM;
516                         goto fail_l2;
517                 }
518                 ret = perf_pmu_register(&amd_l2_pmu, amd_l2_pmu.name, -1);
519                 if (ret)
520                         goto fail_l2;
521
522                 pr_info("perf: AMD L2I counters detected\n");
523                 ret = 0;
524         }
525
526         /*
527          * Install callbacks. Core will call them for each online cpu.
528          */
529         if (cpuhp_setup_state(CPUHP_PERF_X86_AMD_UNCORE_PREP,
530                               "PERF_X86_AMD_UNCORE_PREP",
531                               amd_uncore_cpu_up_prepare, amd_uncore_cpu_dead))
532                 goto fail_l2;
533
534         if (cpuhp_setup_state(CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING,
535                               "AP_PERF_X86_AMD_UNCORE_STARTING",
536                               amd_uncore_cpu_starting, NULL))
537                 goto fail_prep;
538         if (cpuhp_setup_state(CPUHP_AP_PERF_X86_AMD_UNCORE_ONLINE,
539                               "AP_PERF_X86_AMD_UNCORE_ONLINE",
540                               amd_uncore_cpu_online,
541                               amd_uncore_cpu_down_prepare))
542                 goto fail_start;
543         return 0;
544
545 fail_start:
546         cpuhp_remove_state(CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING);
547 fail_prep:
548         cpuhp_remove_state(CPUHP_PERF_X86_AMD_UNCORE_PREP);
549 fail_l2:
550         if (boot_cpu_has(X86_FEATURE_PERFCTR_NB))
551                 perf_pmu_unregister(&amd_nb_pmu);
552         if (amd_uncore_l2)
553                 free_percpu(amd_uncore_l2);
554 fail_nb:
555         if (amd_uncore_nb)
556                 free_percpu(amd_uncore_nb);
557
558 fail_nodev:
559         return ret;
560 }
561 device_initcall(amd_uncore_init);