3b09ecfd0aee20861ae7c2d5f48a19965a21953f
[cascardo/linux.git] / arch / powerpc / perf / hv-24x7.c
1 /*
2  * Hypervisor supplied "24x7" performance counter support
3  *
4  * Author: Cody P Schafer <cody@linux.vnet.ibm.com>
5  * Copyright 2014 IBM Corporation.
6  *
7  * This program is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License
9  * as published by the Free Software Foundation; either version
10  * 2 of the License, or (at your option) any later version.
11  */
12
13 #define pr_fmt(fmt) "hv-24x7: " fmt
14
15 #include <linux/perf_event.h>
16 #include <linux/rbtree.h>
17 #include <linux/module.h>
18 #include <linux/slab.h>
19 #include <linux/vmalloc.h>
20
21 #include <asm/firmware.h>
22 #include <asm/hvcall.h>
23 #include <asm/io.h>
24 #include <linux/byteorder/generic.h>
25
26 #include "hv-24x7.h"
27 #include "hv-24x7-catalog.h"
28 #include "hv-common.h"
29
30 static const char *event_domain_suffix(unsigned domain)
31 {
32         switch (domain) {
33 #define DOMAIN(n, v, x, c)              \
34         case HV_PERF_DOMAIN_##n:        \
35                 return "__" #n;
36 #include "hv-24x7-domains.h"
37 #undef DOMAIN
38         default:
39                 WARN(1, "unknown domain %d\n", domain);
40                 return "__UNKNOWN_DOMAIN_SUFFIX";
41         }
42 }
43
44 static bool domain_is_valid(unsigned domain)
45 {
46         switch (domain) {
47 #define DOMAIN(n, v, x, c)              \
48         case HV_PERF_DOMAIN_##n:        \
49                 /* fall through */
50 #include "hv-24x7-domains.h"
51 #undef DOMAIN
52                 return true;
53         default:
54                 return false;
55         }
56 }
57
58 static bool is_physical_domain(unsigned domain)
59 {
60         switch (domain) {
61 #define DOMAIN(n, v, x, c)              \
62         case HV_PERF_DOMAIN_##n:        \
63                 return c;
64 #include "hv-24x7-domains.h"
65 #undef DOMAIN
66         default:
67                 return false;
68         }
69 }
70
71 static bool catalog_entry_domain_is_valid(unsigned domain)
72 {
73         return is_physical_domain(domain);
74 }
75
76 /*
77  * TODO: Merging events:
78  * - Think of the hcall as an interface to a 4d array of counters:
79  *   - x = domains
80  *   - y = indexes in the domain (core, chip, vcpu, node, etc)
81  *   - z = offset into the counter space
82  *   - w = lpars (guest vms, "logical partitions")
83  * - A single request is: x,y,y_last,z,z_last,w,w_last
84  *   - this means we can retrieve a rectangle of counters in y,z for a single x.
85  *
86  * - Things to consider (ignoring w):
87  *   - input  cost_per_request = 16
88  *   - output cost_per_result(ys,zs)  = 8 + 8 * ys + ys * zs
89  *   - limited number of requests per hcall (must fit into 4K bytes)
90  *     - 4k = 16 [buffer header] - 16 [request size] * request_count
91  *     - 255 requests per hcall
92  *   - sometimes it will be more efficient to read extra data and discard
93  */
94
95 /*
96  * Example usage:
97  *  perf stat -e 'hv_24x7/domain=2,offset=8,vcpu=0,lpar=0xffffffff/'
98  */
99
100 /* u3 0-6, one of HV_24X7_PERF_DOMAIN */
101 EVENT_DEFINE_RANGE_FORMAT(domain, config, 0, 3);
102 /* u16 */
103 EVENT_DEFINE_RANGE_FORMAT(core, config, 16, 31);
104 EVENT_DEFINE_RANGE_FORMAT(vcpu, config, 16, 31);
105 /* u32, see "data_offset" */
106 EVENT_DEFINE_RANGE_FORMAT(offset, config, 32, 63);
107 /* u16 */
108 EVENT_DEFINE_RANGE_FORMAT(lpar, config1, 0, 15);
109
110 EVENT_DEFINE_RANGE(reserved1, config,   4, 15);
111 EVENT_DEFINE_RANGE(reserved2, config1, 16, 63);
112 EVENT_DEFINE_RANGE(reserved3, config2,  0, 63);
113
114 static struct attribute *format_attrs[] = {
115         &format_attr_domain.attr,
116         &format_attr_offset.attr,
117         &format_attr_core.attr,
118         &format_attr_vcpu.attr,
119         &format_attr_lpar.attr,
120         NULL,
121 };
122
123 static struct attribute_group format_group = {
124         .name = "format",
125         .attrs = format_attrs,
126 };
127
128 static struct attribute_group event_group = {
129         .name = "events",
130         /* .attrs is set in init */
131 };
132
133 static struct attribute_group event_desc_group = {
134         .name = "event_descs",
135         /* .attrs is set in init */
136 };
137
138 static struct attribute_group event_long_desc_group = {
139         .name = "event_long_descs",
140         /* .attrs is set in init */
141 };
142
143 static struct kmem_cache *hv_page_cache;
144
145 DEFINE_PER_CPU(int, hv_24x7_txn_flags);
146 DEFINE_PER_CPU(int, hv_24x7_txn_err);
147
148 struct hv_24x7_hw {
149         struct perf_event *events[255];
150 };
151
152 DEFINE_PER_CPU(struct hv_24x7_hw, hv_24x7_hw);
153
154 /*
155  * request_buffer and result_buffer are not required to be 4k aligned,
156  * but are not allowed to cross any 4k boundary. Aligning them to 4k is
157  * the simplest way to ensure that.
158  */
159 #define H24x7_DATA_BUFFER_SIZE  4096
160 DEFINE_PER_CPU(char, hv_24x7_reqb[H24x7_DATA_BUFFER_SIZE]) __aligned(4096);
161 DEFINE_PER_CPU(char, hv_24x7_resb[H24x7_DATA_BUFFER_SIZE]) __aligned(4096);
162
163 static char *event_name(struct hv_24x7_event_data *ev, int *len)
164 {
165         *len = be16_to_cpu(ev->event_name_len) - 2;
166         return (char *)ev->remainder;
167 }
168
169 static char *event_desc(struct hv_24x7_event_data *ev, int *len)
170 {
171         unsigned nl = be16_to_cpu(ev->event_name_len);
172         __be16 *desc_len = (__be16 *)(ev->remainder + nl - 2);
173
174         *len = be16_to_cpu(*desc_len) - 2;
175         return (char *)ev->remainder + nl;
176 }
177
178 static char *event_long_desc(struct hv_24x7_event_data *ev, int *len)
179 {
180         unsigned nl = be16_to_cpu(ev->event_name_len);
181         __be16 *desc_len_ = (__be16 *)(ev->remainder + nl - 2);
182         unsigned desc_len = be16_to_cpu(*desc_len_);
183         __be16 *long_desc_len = (__be16 *)(ev->remainder + nl + desc_len - 2);
184
185         *len = be16_to_cpu(*long_desc_len) - 2;
186         return (char *)ev->remainder + nl + desc_len;
187 }
188
189 static bool event_fixed_portion_is_within(struct hv_24x7_event_data *ev,
190                                           void *end)
191 {
192         void *start = ev;
193
194         return (start + offsetof(struct hv_24x7_event_data, remainder)) < end;
195 }
196
197 /*
198  * Things we don't check:
199  *  - padding for desc, name, and long/detailed desc is required to be '\0'
200  *    bytes.
201  *
202  *  Return NULL if we pass end,
203  *  Otherwise return the address of the byte just following the event.
204  */
205 static void *event_end(struct hv_24x7_event_data *ev, void *end)
206 {
207         void *start = ev;
208         __be16 *dl_, *ldl_;
209         unsigned dl, ldl;
210         unsigned nl = be16_to_cpu(ev->event_name_len);
211
212         if (nl < 2) {
213                 pr_debug("%s: name length too short: %d", __func__, nl);
214                 return NULL;
215         }
216
217         if (start + nl > end) {
218                 pr_debug("%s: start=%p + nl=%u > end=%p",
219                                 __func__, start, nl, end);
220                 return NULL;
221         }
222
223         dl_ = (__be16 *)(ev->remainder + nl - 2);
224         if (!IS_ALIGNED((uintptr_t)dl_, 2))
225                 pr_warn("desc len not aligned %p", dl_);
226         dl = be16_to_cpu(*dl_);
227         if (dl < 2) {
228                 pr_debug("%s: desc len too short: %d", __func__, dl);
229                 return NULL;
230         }
231
232         if (start + nl + dl > end) {
233                 pr_debug("%s: (start=%p + nl=%u + dl=%u)=%p > end=%p",
234                                 __func__, start, nl, dl, start + nl + dl, end);
235                 return NULL;
236         }
237
238         ldl_ = (__be16 *)(ev->remainder + nl + dl - 2);
239         if (!IS_ALIGNED((uintptr_t)ldl_, 2))
240                 pr_warn("long desc len not aligned %p", ldl_);
241         ldl = be16_to_cpu(*ldl_);
242         if (ldl < 2) {
243                 pr_debug("%s: long desc len too short (ldl=%u)",
244                                 __func__, ldl);
245                 return NULL;
246         }
247
248         if (start + nl + dl + ldl > end) {
249                 pr_debug("%s: start=%p + nl=%u + dl=%u + ldl=%u > end=%p",
250                                 __func__, start, nl, dl, ldl, end);
251                 return NULL;
252         }
253
254         return start + nl + dl + ldl;
255 }
256
257 static unsigned long h_get_24x7_catalog_page_(unsigned long phys_4096,
258                                               unsigned long version,
259                                               unsigned long index)
260 {
261         pr_devel("h_get_24x7_catalog_page(0x%lx, %lu, %lu)",
262                         phys_4096, version, index);
263
264         WARN_ON(!IS_ALIGNED(phys_4096, 4096));
265
266         return plpar_hcall_norets(H_GET_24X7_CATALOG_PAGE,
267                         phys_4096, version, index);
268 }
269
270 static unsigned long h_get_24x7_catalog_page(char page[],
271                                              u64 version, u32 index)
272 {
273         return h_get_24x7_catalog_page_(virt_to_phys(page),
274                                         version, index);
275 }
276
277 static unsigned core_domains[] = {
278         HV_PERF_DOMAIN_PHYS_CORE,
279         HV_PERF_DOMAIN_VCPU_HOME_CORE,
280         HV_PERF_DOMAIN_VCPU_HOME_CHIP,
281         HV_PERF_DOMAIN_VCPU_HOME_NODE,
282         HV_PERF_DOMAIN_VCPU_REMOTE_NODE,
283 };
284 /* chip event data always yeilds a single event, core yeilds multiple */
285 #define MAX_EVENTS_PER_EVENT_DATA ARRAY_SIZE(core_domains)
286
287 static char *event_fmt(struct hv_24x7_event_data *event, unsigned domain)
288 {
289         const char *sindex;
290         const char *lpar;
291
292         if (is_physical_domain(domain)) {
293                 lpar = "0x0";
294                 sindex = "core";
295         } else {
296                 lpar = "?";
297                 sindex = "vcpu";
298         }
299
300         return kasprintf(GFP_KERNEL,
301                         "domain=0x%x,offset=0x%x,%s=?,lpar=%s",
302                         domain,
303                         be16_to_cpu(event->event_counter_offs) +
304                                 be16_to_cpu(event->event_group_record_offs),
305                         sindex,
306                         lpar);
307 }
308
309 /* Avoid trusting fw to NUL terminate strings */
310 static char *memdup_to_str(char *maybe_str, int max_len, gfp_t gfp)
311 {
312         return kasprintf(gfp, "%.*s", max_len, maybe_str);
313 }
314
315 static ssize_t device_show_string(struct device *dev,
316                 struct device_attribute *attr, char *buf)
317 {
318         struct dev_ext_attribute *d;
319
320         d = container_of(attr, struct dev_ext_attribute, attr);
321
322         return sprintf(buf, "%s\n", (char *)d->var);
323 }
324
325 static struct attribute *device_str_attr_create_(char *name, char *str)
326 {
327         struct dev_ext_attribute *attr = kzalloc(sizeof(*attr), GFP_KERNEL);
328
329         if (!attr)
330                 return NULL;
331
332         sysfs_attr_init(&attr->attr.attr);
333
334         attr->var = str;
335         attr->attr.attr.name = name;
336         attr->attr.attr.mode = 0444;
337         attr->attr.show = device_show_string;
338
339         return &attr->attr.attr;
340 }
341
342 static struct attribute *device_str_attr_create(char *name, int name_max,
343                                                 int name_nonce,
344                                                 char *str, size_t str_max)
345 {
346         char *n;
347         char *s = memdup_to_str(str, str_max, GFP_KERNEL);
348         struct attribute *a;
349
350         if (!s)
351                 return NULL;
352
353         if (!name_nonce)
354                 n = kasprintf(GFP_KERNEL, "%.*s", name_max, name);
355         else
356                 n = kasprintf(GFP_KERNEL, "%.*s__%d", name_max, name,
357                                         name_nonce);
358         if (!n)
359                 goto out_s;
360
361         a = device_str_attr_create_(n, s);
362         if (!a)
363                 goto out_n;
364
365         return a;
366 out_n:
367         kfree(n);
368 out_s:
369         kfree(s);
370         return NULL;
371 }
372
373 static void device_str_attr_destroy(struct attribute *attr)
374 {
375         struct dev_ext_attribute *d;
376
377         d = container_of(attr, struct dev_ext_attribute, attr.attr);
378         kfree(d->var);
379         kfree(d->attr.attr.name);
380         kfree(d);
381 }
382
383 static struct attribute *event_to_attr(unsigned ix,
384                                        struct hv_24x7_event_data *event,
385                                        unsigned domain,
386                                        int nonce)
387 {
388         int event_name_len;
389         char *ev_name, *a_ev_name, *val;
390         const char *ev_suffix;
391         struct attribute *attr;
392
393         if (!domain_is_valid(domain)) {
394                 pr_warn("catalog event %u has invalid domain %u\n",
395                                 ix, domain);
396                 return NULL;
397         }
398
399         val = event_fmt(event, domain);
400         if (!val)
401                 return NULL;
402
403         ev_suffix = event_domain_suffix(domain);
404         ev_name = event_name(event, &event_name_len);
405         if (!nonce)
406                 a_ev_name = kasprintf(GFP_KERNEL, "%.*s%s",
407                                 (int)event_name_len, ev_name, ev_suffix);
408         else
409                 a_ev_name = kasprintf(GFP_KERNEL, "%.*s%s__%d",
410                                 (int)event_name_len, ev_name, ev_suffix, nonce);
411
412         if (!a_ev_name)
413                 goto out_val;
414
415         attr = device_str_attr_create_(a_ev_name, val);
416         if (!attr)
417                 goto out_name;
418
419         return attr;
420 out_name:
421         kfree(a_ev_name);
422 out_val:
423         kfree(val);
424         return NULL;
425 }
426
427 static struct attribute *event_to_desc_attr(struct hv_24x7_event_data *event,
428                                             int nonce)
429 {
430         int nl, dl;
431         char *name = event_name(event, &nl);
432         char *desc = event_desc(event, &dl);
433
434         /* If there isn't a description, don't create the sysfs file */
435         if (!dl)
436                 return NULL;
437
438         return device_str_attr_create(name, nl, nonce, desc, dl);
439 }
440
441 static struct attribute *
442 event_to_long_desc_attr(struct hv_24x7_event_data *event, int nonce)
443 {
444         int nl, dl;
445         char *name = event_name(event, &nl);
446         char *desc = event_long_desc(event, &dl);
447
448         /* If there isn't a description, don't create the sysfs file */
449         if (!dl)
450                 return NULL;
451
452         return device_str_attr_create(name, nl, nonce, desc, dl);
453 }
454
455 static ssize_t event_data_to_attrs(unsigned ix, struct attribute **attrs,
456                                    struct hv_24x7_event_data *event, int nonce)
457 {
458         unsigned i;
459
460         switch (event->domain) {
461         case HV_PERF_DOMAIN_PHYS_CHIP:
462                 *attrs = event_to_attr(ix, event, event->domain, nonce);
463                 return 1;
464         case HV_PERF_DOMAIN_PHYS_CORE:
465                 for (i = 0; i < ARRAY_SIZE(core_domains); i++) {
466                         attrs[i] = event_to_attr(ix, event, core_domains[i],
467                                                 nonce);
468                         if (!attrs[i]) {
469                                 pr_warn("catalog event %u: individual attr %u "
470                                         "creation failure\n", ix, i);
471                                 for (; i; i--)
472                                         device_str_attr_destroy(attrs[i - 1]);
473                                 return -1;
474                         }
475                 }
476                 return i;
477         default:
478                 pr_warn("catalog event %u: domain %u is not allowed in the "
479                                 "catalog\n", ix, event->domain);
480                 return -1;
481         }
482 }
483
484 static size_t event_to_attr_ct(struct hv_24x7_event_data *event)
485 {
486         switch (event->domain) {
487         case HV_PERF_DOMAIN_PHYS_CHIP:
488                 return 1;
489         case HV_PERF_DOMAIN_PHYS_CORE:
490                 return ARRAY_SIZE(core_domains);
491         default:
492                 return 0;
493         }
494 }
495
496 /* */
497 struct event_uniq {
498         struct rb_node node;
499         const char *name;
500         int nl;
501         unsigned ct;
502         unsigned domain;
503 };
504
505 static int memord(const void *d1, size_t s1, const void *d2, size_t s2)
506 {
507         if (s1 < s2)
508                 return 1;
509         if (s2 > s1)
510                 return -1;
511
512         return memcmp(d1, d2, s1);
513 }
514
515 static int ev_uniq_ord(const void *v1, size_t s1, unsigned d1, const void *v2,
516                        size_t s2, unsigned d2)
517 {
518         int r = memord(v1, s1, v2, s2);
519
520         if (r)
521                 return r;
522         if (d1 > d2)
523                 return 1;
524         if (d2 > d1)
525                 return -1;
526         return 0;
527 }
528
529 static int event_uniq_add(struct rb_root *root, const char *name, int nl,
530                           unsigned domain)
531 {
532         struct rb_node **new = &(root->rb_node), *parent = NULL;
533         struct event_uniq *data;
534
535         /* Figure out where to put new node */
536         while (*new) {
537                 struct event_uniq *it;
538                 int result;
539
540                 it = container_of(*new, struct event_uniq, node);
541                 result = ev_uniq_ord(name, nl, domain, it->name, it->nl,
542                                         it->domain);
543
544                 parent = *new;
545                 if (result < 0)
546                         new = &((*new)->rb_left);
547                 else if (result > 0)
548                         new = &((*new)->rb_right);
549                 else {
550                         it->ct++;
551                         pr_info("found a duplicate event %.*s, ct=%u\n", nl,
552                                                 name, it->ct);
553                         return it->ct;
554                 }
555         }
556
557         data = kmalloc(sizeof(*data), GFP_KERNEL);
558         if (!data)
559                 return -ENOMEM;
560
561         *data = (struct event_uniq) {
562                 .name = name,
563                 .nl = nl,
564                 .ct = 0,
565                 .domain = domain,
566         };
567
568         /* Add new node and rebalance tree. */
569         rb_link_node(&data->node, parent, new);
570         rb_insert_color(&data->node, root);
571
572         /* data->ct */
573         return 0;
574 }
575
576 static void event_uniq_destroy(struct rb_root *root)
577 {
578         /*
579          * the strings we point to are in the giant block of memory filled by
580          * the catalog, and are freed separately.
581          */
582         struct event_uniq *pos, *n;
583
584         rbtree_postorder_for_each_entry_safe(pos, n, root, node)
585                 kfree(pos);
586 }
587
588
589 /*
590  * ensure the event structure's sizes are self consistent and don't cause us to
591  * read outside of the event
592  *
593  * On success, return the event length in bytes.
594  * Otherwise, return -1 (and print as appropriate).
595  */
596 static ssize_t catalog_event_len_validate(struct hv_24x7_event_data *event,
597                                           size_t event_idx,
598                                           size_t event_data_bytes,
599                                           size_t event_entry_count,
600                                           size_t offset, void *end)
601 {
602         ssize_t ev_len;
603         void *ev_end, *calc_ev_end;
604
605         if (offset >= event_data_bytes)
606                 return -1;
607
608         if (event_idx >= event_entry_count) {
609                 pr_devel("catalog event data has %zu bytes of padding after last event\n",
610                                 event_data_bytes - offset);
611                 return -1;
612         }
613
614         if (!event_fixed_portion_is_within(event, end)) {
615                 pr_warn("event %zu fixed portion is not within range\n",
616                                 event_idx);
617                 return -1;
618         }
619
620         ev_len = be16_to_cpu(event->length);
621
622         if (ev_len % 16)
623                 pr_info("event %zu has length %zu not divisible by 16: event=%pK\n",
624                                 event_idx, ev_len, event);
625
626         ev_end = (__u8 *)event + ev_len;
627         if (ev_end > end) {
628                 pr_warn("event %zu has .length=%zu, ends after buffer end: ev_end=%pK > end=%pK, offset=%zu\n",
629                                 event_idx, ev_len, ev_end, end,
630                                 offset);
631                 return -1;
632         }
633
634         calc_ev_end = event_end(event, end);
635         if (!calc_ev_end) {
636                 pr_warn("event %zu has a calculated length which exceeds buffer length %zu: event=%pK end=%pK, offset=%zu\n",
637                         event_idx, event_data_bytes, event, end,
638                         offset);
639                 return -1;
640         }
641
642         if (calc_ev_end > ev_end) {
643                 pr_warn("event %zu exceeds it's own length: event=%pK, end=%pK, offset=%zu, calc_ev_end=%pK\n",
644                         event_idx, event, ev_end, offset, calc_ev_end);
645                 return -1;
646         }
647
648         return ev_len;
649 }
650
651 #define MAX_4K (SIZE_MAX / 4096)
652
653 static int create_events_from_catalog(struct attribute ***events_,
654                                       struct attribute ***event_descs_,
655                                       struct attribute ***event_long_descs_)
656 {
657         unsigned long hret;
658         size_t catalog_len, catalog_page_len, event_entry_count,
659                event_data_len, event_data_offs,
660                event_data_bytes, junk_events, event_idx, event_attr_ct, i,
661                attr_max, event_idx_last, desc_ct, long_desc_ct;
662         ssize_t ct, ev_len;
663         uint32_t catalog_version_num;
664         struct attribute **events, **event_descs, **event_long_descs;
665         struct hv_24x7_catalog_page_0 *page_0 =
666                 kmem_cache_alloc(hv_page_cache, GFP_KERNEL);
667         void *page = page_0;
668         void *event_data, *end;
669         struct hv_24x7_event_data *event;
670         struct rb_root ev_uniq = RB_ROOT;
671         int ret = 0;
672
673         if (!page) {
674                 ret = -ENOMEM;
675                 goto e_out;
676         }
677
678         hret = h_get_24x7_catalog_page(page, 0, 0);
679         if (hret) {
680                 ret = -EIO;
681                 goto e_free;
682         }
683
684         catalog_version_num = be64_to_cpu(page_0->version);
685         catalog_page_len = be32_to_cpu(page_0->length);
686
687         if (MAX_4K < catalog_page_len) {
688                 pr_err("invalid page count: %zu\n", catalog_page_len);
689                 ret = -EIO;
690                 goto e_free;
691         }
692
693         catalog_len = catalog_page_len * 4096;
694
695         event_entry_count = be16_to_cpu(page_0->event_entry_count);
696         event_data_offs   = be16_to_cpu(page_0->event_data_offs);
697         event_data_len    = be16_to_cpu(page_0->event_data_len);
698
699         pr_devel("cv %zu cl %zu eec %zu edo %zu edl %zu\n",
700                         (size_t)catalog_version_num, catalog_len,
701                         event_entry_count, event_data_offs, event_data_len);
702
703         if ((MAX_4K < event_data_len)
704                         || (MAX_4K < event_data_offs)
705                         || (MAX_4K - event_data_offs < event_data_len)) {
706                 pr_err("invalid event data offs %zu and/or len %zu\n",
707                                 event_data_offs, event_data_len);
708                 ret = -EIO;
709                 goto e_free;
710         }
711
712         if ((event_data_offs + event_data_len) > catalog_page_len) {
713                 pr_err("event data %zu-%zu does not fit inside catalog 0-%zu\n",
714                                 event_data_offs,
715                                 event_data_offs + event_data_len,
716                                 catalog_page_len);
717                 ret = -EIO;
718                 goto e_free;
719         }
720
721         if (SIZE_MAX / MAX_EVENTS_PER_EVENT_DATA - 1 < event_entry_count) {
722                 pr_err("event_entry_count %zu is invalid\n",
723                                 event_entry_count);
724                 ret = -EIO;
725                 goto e_free;
726         }
727
728         event_data_bytes = event_data_len * 4096;
729
730         /*
731          * event data can span several pages, events can cross between these
732          * pages. Use vmalloc to make this easier.
733          */
734         event_data = vmalloc(event_data_bytes);
735         if (!event_data) {
736                 pr_err("could not allocate event data\n");
737                 ret = -ENOMEM;
738                 goto e_free;
739         }
740
741         end = event_data + event_data_bytes;
742
743         /*
744          * using vmalloc_to_phys() like this only works if PAGE_SIZE is
745          * divisible by 4096
746          */
747         BUILD_BUG_ON(PAGE_SIZE % 4096);
748
749         for (i = 0; i < event_data_len; i++) {
750                 hret = h_get_24x7_catalog_page_(
751                                 vmalloc_to_phys(event_data + i * 4096),
752                                 catalog_version_num,
753                                 i + event_data_offs);
754                 if (hret) {
755                         pr_err("failed to get event data in page %zu\n",
756                                         i + event_data_offs);
757                         ret = -EIO;
758                         goto e_event_data;
759                 }
760         }
761
762         /*
763          * scan the catalog to determine the number of attributes we need, and
764          * verify it at the same time.
765          */
766         for (junk_events = 0, event = event_data, event_idx = 0, attr_max = 0;
767              ;
768              event_idx++, event = (void *)event + ev_len) {
769                 size_t offset = (void *)event - (void *)event_data;
770                 char *name;
771                 int nl;
772
773                 ev_len = catalog_event_len_validate(event, event_idx,
774                                                     event_data_bytes,
775                                                     event_entry_count,
776                                                     offset, end);
777                 if (ev_len < 0)
778                         break;
779
780                 name = event_name(event, &nl);
781
782                 if (event->event_group_record_len == 0) {
783                         pr_devel("invalid event %zu (%.*s): group_record_len == 0, skipping\n",
784                                         event_idx, nl, name);
785                         junk_events++;
786                         continue;
787                 }
788
789                 if (!catalog_entry_domain_is_valid(event->domain)) {
790                         pr_info("event %zu (%.*s) has invalid domain %d\n",
791                                         event_idx, nl, name, event->domain);
792                         junk_events++;
793                         continue;
794                 }
795
796                 attr_max += event_to_attr_ct(event);
797         }
798
799         event_idx_last = event_idx;
800         if (event_idx_last != event_entry_count)
801                 pr_warn("event buffer ended before listed # of events were parsed (got %zu, wanted %zu, junk %zu)\n",
802                                 event_idx_last, event_entry_count, junk_events);
803
804         events = kmalloc_array(attr_max + 1, sizeof(*events), GFP_KERNEL);
805         if (!events) {
806                 ret = -ENOMEM;
807                 goto e_event_data;
808         }
809
810         event_descs = kmalloc_array(event_idx + 1, sizeof(*event_descs),
811                                 GFP_KERNEL);
812         if (!event_descs) {
813                 ret = -ENOMEM;
814                 goto e_event_attrs;
815         }
816
817         event_long_descs = kmalloc_array(event_idx + 1,
818                         sizeof(*event_long_descs), GFP_KERNEL);
819         if (!event_long_descs) {
820                 ret = -ENOMEM;
821                 goto e_event_descs;
822         }
823
824         /* Iterate over the catalog filling in the attribute vector */
825         for (junk_events = 0, event_attr_ct = 0, desc_ct = 0, long_desc_ct = 0,
826                                 event = event_data, event_idx = 0;
827                         event_idx < event_idx_last;
828                         event_idx++, ev_len = be16_to_cpu(event->length),
829                                 event = (void *)event + ev_len) {
830                 char *name;
831                 int nl;
832                 int nonce;
833                 /*
834                  * these are the only "bad" events that are intermixed and that
835                  * we can ignore without issue. make sure to skip them here
836                  */
837                 if (event->event_group_record_len == 0)
838                         continue;
839                 if (!catalog_entry_domain_is_valid(event->domain))
840                         continue;
841
842                 name  = event_name(event, &nl);
843                 nonce = event_uniq_add(&ev_uniq, name, nl, event->domain);
844                 ct    = event_data_to_attrs(event_idx, events + event_attr_ct,
845                                             event, nonce);
846                 if (ct <= 0) {
847                         pr_warn("event %zu (%.*s) creation failure, skipping\n",
848                                 event_idx, nl, name);
849                         junk_events++;
850                 } else {
851                         event_attr_ct += ct;
852                         event_descs[desc_ct] = event_to_desc_attr(event, nonce);
853                         if (event_descs[desc_ct])
854                                 desc_ct++;
855                         event_long_descs[long_desc_ct] =
856                                         event_to_long_desc_attr(event, nonce);
857                         if (event_long_descs[long_desc_ct])
858                                 long_desc_ct++;
859                 }
860         }
861
862         pr_info("read %zu catalog entries, created %zu event attrs (%zu failures), %zu descs\n",
863                         event_idx, event_attr_ct, junk_events, desc_ct);
864
865         events[event_attr_ct] = NULL;
866         event_descs[desc_ct] = NULL;
867         event_long_descs[long_desc_ct] = NULL;
868
869         event_uniq_destroy(&ev_uniq);
870         vfree(event_data);
871         kmem_cache_free(hv_page_cache, page);
872
873         *events_ = events;
874         *event_descs_ = event_descs;
875         *event_long_descs_ = event_long_descs;
876         return 0;
877
878 e_event_descs:
879         kfree(event_descs);
880 e_event_attrs:
881         kfree(events);
882 e_event_data:
883         vfree(event_data);
884 e_free:
885         kmem_cache_free(hv_page_cache, page);
886 e_out:
887         *events_ = NULL;
888         *event_descs_ = NULL;
889         *event_long_descs_ = NULL;
890         return ret;
891 }
892
893 static ssize_t catalog_read(struct file *filp, struct kobject *kobj,
894                             struct bin_attribute *bin_attr, char *buf,
895                             loff_t offset, size_t count)
896 {
897         unsigned long hret;
898         ssize_t ret = 0;
899         size_t catalog_len = 0, catalog_page_len = 0;
900         loff_t page_offset = 0;
901         loff_t offset_in_page;
902         size_t copy_len;
903         uint64_t catalog_version_num = 0;
904         void *page = kmem_cache_alloc(hv_page_cache, GFP_USER);
905         struct hv_24x7_catalog_page_0 *page_0 = page;
906
907         if (!page)
908                 return -ENOMEM;
909
910         hret = h_get_24x7_catalog_page(page, 0, 0);
911         if (hret) {
912                 ret = -EIO;
913                 goto e_free;
914         }
915
916         catalog_version_num = be64_to_cpu(page_0->version);
917         catalog_page_len = be32_to_cpu(page_0->length);
918         catalog_len = catalog_page_len * 4096;
919
920         page_offset = offset / 4096;
921         offset_in_page = offset % 4096;
922
923         if (page_offset >= catalog_page_len)
924                 goto e_free;
925
926         if (page_offset != 0) {
927                 hret = h_get_24x7_catalog_page(page, catalog_version_num,
928                                                page_offset);
929                 if (hret) {
930                         ret = -EIO;
931                         goto e_free;
932                 }
933         }
934
935         copy_len = 4096 - offset_in_page;
936         if (copy_len > count)
937                 copy_len = count;
938
939         memcpy(buf, page+offset_in_page, copy_len);
940         ret = copy_len;
941
942 e_free:
943         if (hret)
944                 pr_err("h_get_24x7_catalog_page(ver=%lld, page=%lld) failed:"
945                        " rc=%ld\n",
946                        catalog_version_num, page_offset, hret);
947         kmem_cache_free(hv_page_cache, page);
948
949         pr_devel("catalog_read: offset=%lld(%lld) count=%zu "
950                         "catalog_len=%zu(%zu) => %zd\n", offset, page_offset,
951                         count, catalog_len, catalog_page_len, ret);
952
953         return ret;
954 }
955
956 #define PAGE_0_ATTR(_name, _fmt, _expr)                         \
957 static ssize_t _name##_show(struct device *dev,                 \
958                             struct device_attribute *dev_attr,  \
959                             char *buf)                          \
960 {                                                               \
961         unsigned long hret;                                     \
962         ssize_t ret = 0;                                        \
963         void *page = kmem_cache_alloc(hv_page_cache, GFP_USER); \
964         struct hv_24x7_catalog_page_0 *page_0 = page;           \
965         if (!page)                                              \
966                 return -ENOMEM;                                 \
967         hret = h_get_24x7_catalog_page(page, 0, 0);             \
968         if (hret) {                                             \
969                 ret = -EIO;                                     \
970                 goto e_free;                                    \
971         }                                                       \
972         ret = sprintf(buf, _fmt, _expr);                        \
973 e_free:                                                         \
974         kmem_cache_free(hv_page_cache, page);                   \
975         return ret;                                             \
976 }                                                               \
977 static DEVICE_ATTR_RO(_name)
978
979 PAGE_0_ATTR(catalog_version, "%lld\n",
980                 (unsigned long long)be64_to_cpu(page_0->version));
981 PAGE_0_ATTR(catalog_len, "%lld\n",
982                 (unsigned long long)be32_to_cpu(page_0->length) * 4096);
983 static BIN_ATTR_RO(catalog, 0/* real length varies */);
984
985 static struct bin_attribute *if_bin_attrs[] = {
986         &bin_attr_catalog,
987         NULL,
988 };
989
990 static struct attribute *if_attrs[] = {
991         &dev_attr_catalog_len.attr,
992         &dev_attr_catalog_version.attr,
993         NULL,
994 };
995
996 static struct attribute_group if_group = {
997         .name = "interface",
998         .bin_attrs = if_bin_attrs,
999         .attrs = if_attrs,
1000 };
1001
1002 static const struct attribute_group *attr_groups[] = {
1003         &format_group,
1004         &event_group,
1005         &event_desc_group,
1006         &event_long_desc_group,
1007         &if_group,
1008         NULL,
1009 };
1010
1011 static void log_24x7_hcall(struct hv_24x7_request_buffer *request_buffer,
1012                            struct hv_24x7_data_result_buffer *result_buffer,
1013                            unsigned long ret)
1014 {
1015         struct hv_24x7_request *req;
1016
1017         req = &request_buffer->requests[0];
1018         pr_notice_ratelimited("hcall failed: [%d %#x %#x %d] => "
1019                         "ret 0x%lx (%ld) detail=0x%x failing ix=%x\n",
1020                         req->performance_domain, req->data_offset,
1021                         req->starting_ix, req->starting_lpar_ix, ret, ret,
1022                         result_buffer->detailed_rc,
1023                         result_buffer->failing_request_ix);
1024 }
1025
1026 /*
1027  * Start the process for a new H_GET_24x7_DATA hcall.
1028  */
1029 static void init_24x7_request(struct hv_24x7_request_buffer *request_buffer,
1030                               struct hv_24x7_data_result_buffer *result_buffer)
1031 {
1032
1033         memset(request_buffer, 0, 4096);
1034         memset(result_buffer, 0, 4096);
1035
1036         request_buffer->interface_version = HV_24X7_IF_VERSION_CURRENT;
1037         /* memset above set request_buffer->num_requests to 0 */
1038 }
1039
1040 /*
1041  * Commit (i.e perform) the H_GET_24x7_DATA hcall using the data collected
1042  * by 'init_24x7_request()' and 'add_event_to_24x7_request()'.
1043  */
1044 static int make_24x7_request(struct hv_24x7_request_buffer *request_buffer,
1045                              struct hv_24x7_data_result_buffer *result_buffer)
1046 {
1047         unsigned long ret;
1048
1049         /*
1050          * NOTE: Due to variable number of array elements in request and
1051          *       result buffer(s), sizeof() is not reliable. Use the actual
1052          *       allocated buffer size, H24x7_DATA_BUFFER_SIZE.
1053          */
1054         ret = plpar_hcall_norets(H_GET_24X7_DATA,
1055                         virt_to_phys(request_buffer), H24x7_DATA_BUFFER_SIZE,
1056                         virt_to_phys(result_buffer),  H24x7_DATA_BUFFER_SIZE);
1057
1058         if (ret)
1059                 log_24x7_hcall(request_buffer, result_buffer, ret);
1060
1061         return ret;
1062 }
1063
1064 /*
1065  * Add the given @event to the next slot in the 24x7 request_buffer.
1066  *
1067  * Note that H_GET_24X7_DATA hcall allows reading several counters'
1068  * values in a single HCALL. We expect the caller to add events to the
1069  * request buffer one by one, make the HCALL and process the results.
1070  */
1071 static int add_event_to_24x7_request(struct perf_event *event,
1072                                 struct hv_24x7_request_buffer *request_buffer)
1073 {
1074         u16 idx;
1075         int i;
1076         struct hv_24x7_request *req;
1077
1078         if (request_buffer->num_requests > 254) {
1079                 pr_devel("Too many requests for 24x7 HCALL %d\n",
1080                                 request_buffer->num_requests);
1081                 return -EINVAL;
1082         }
1083
1084         if (is_physical_domain(event_get_domain(event)))
1085                 idx = event_get_core(event);
1086         else
1087                 idx = event_get_vcpu(event);
1088
1089         i = request_buffer->num_requests++;
1090         req = &request_buffer->requests[i];
1091
1092         req->performance_domain = event_get_domain(event);
1093         req->data_size = cpu_to_be16(8);
1094         req->data_offset = cpu_to_be32(event_get_offset(event));
1095         req->starting_lpar_ix = cpu_to_be16(event_get_lpar(event)),
1096         req->max_num_lpars = cpu_to_be16(1);
1097         req->starting_ix = cpu_to_be16(idx);
1098         req->max_ix = cpu_to_be16(1);
1099
1100         return 0;
1101 }
1102
1103 static unsigned long single_24x7_request(struct perf_event *event, u64 *count)
1104 {
1105         unsigned long ret;
1106         struct hv_24x7_request_buffer *request_buffer;
1107         struct hv_24x7_data_result_buffer *result_buffer;
1108
1109         BUILD_BUG_ON(sizeof(*request_buffer) > 4096);
1110         BUILD_BUG_ON(sizeof(*result_buffer) > 4096);
1111
1112         request_buffer = (void *)get_cpu_var(hv_24x7_reqb);
1113         result_buffer = (void *)get_cpu_var(hv_24x7_resb);
1114
1115         init_24x7_request(request_buffer, result_buffer);
1116
1117         ret = add_event_to_24x7_request(event, request_buffer);
1118         if (ret)
1119                 goto out;
1120
1121         ret = make_24x7_request(request_buffer, result_buffer);
1122         if (ret) {
1123                 log_24x7_hcall(request_buffer, result_buffer, ret);
1124                 goto out;
1125         }
1126
1127         /* process result from hcall */
1128         *count = be64_to_cpu(result_buffer->results[0].elements[0].element_data[0]);
1129
1130 out:
1131         put_cpu_var(hv_24x7_reqb);
1132         put_cpu_var(hv_24x7_resb);
1133         return ret;
1134 }
1135
1136
1137 static int h_24x7_event_init(struct perf_event *event)
1138 {
1139         struct hv_perf_caps caps;
1140         unsigned domain;
1141         unsigned long hret;
1142         u64 ct;
1143
1144         /* Not our event */
1145         if (event->attr.type != event->pmu->type)
1146                 return -ENOENT;
1147
1148         /* Unused areas must be 0 */
1149         if (event_get_reserved1(event) ||
1150             event_get_reserved2(event) ||
1151             event_get_reserved3(event)) {
1152                 pr_devel("reserved set when forbidden 0x%llx(0x%llx) 0x%llx(0x%llx) 0x%llx(0x%llx)\n",
1153                                 event->attr.config,
1154                                 event_get_reserved1(event),
1155                                 event->attr.config1,
1156                                 event_get_reserved2(event),
1157                                 event->attr.config2,
1158                                 event_get_reserved3(event));
1159                 return -EINVAL;
1160         }
1161
1162         /* unsupported modes and filters */
1163         if (event->attr.exclude_user   ||
1164             event->attr.exclude_kernel ||
1165             event->attr.exclude_hv     ||
1166             event->attr.exclude_idle   ||
1167             event->attr.exclude_host   ||
1168             event->attr.exclude_guest)
1169                 return -EINVAL;
1170
1171         /* no branch sampling */
1172         if (has_branch_stack(event))
1173                 return -EOPNOTSUPP;
1174
1175         /* offset must be 8 byte aligned */
1176         if (event_get_offset(event) % 8) {
1177                 pr_devel("bad alignment\n");
1178                 return -EINVAL;
1179         }
1180
1181         /* Domains above 6 are invalid */
1182         domain = event_get_domain(event);
1183         if (domain > 6) {
1184                 pr_devel("invalid domain %d\n", domain);
1185                 return -EINVAL;
1186         }
1187
1188         hret = hv_perf_caps_get(&caps);
1189         if (hret) {
1190                 pr_devel("could not get capabilities: rc=%ld\n", hret);
1191                 return -EIO;
1192         }
1193
1194         /* Physical domains & other lpars require extra capabilities */
1195         if (!caps.collect_privileged && (is_physical_domain(domain) ||
1196                 (event_get_lpar(event) != event_get_lpar_max()))) {
1197                 pr_devel("hv permissions disallow: is_physical_domain:%d, lpar=0x%llx\n",
1198                                 is_physical_domain(domain),
1199                                 event_get_lpar(event));
1200                 return -EACCES;
1201         }
1202
1203         /* see if the event complains */
1204         if (single_24x7_request(event, &ct)) {
1205                 pr_devel("test hcall failed\n");
1206                 return -EIO;
1207         }
1208
1209         return 0;
1210 }
1211
1212 static u64 h_24x7_get_value(struct perf_event *event)
1213 {
1214         unsigned long ret;
1215         u64 ct;
1216         ret = single_24x7_request(event, &ct);
1217         if (ret)
1218                 /* We checked this in event init, shouldn't fail here... */
1219                 return 0;
1220
1221         return ct;
1222 }
1223
1224 static void update_event_count(struct perf_event *event, u64 now)
1225 {
1226         s64 prev;
1227
1228         prev = local64_xchg(&event->hw.prev_count, now);
1229         local64_add(now - prev, &event->count);
1230 }
1231
1232 static void h_24x7_event_read(struct perf_event *event)
1233 {
1234         u64 now;
1235         struct hv_24x7_request_buffer *request_buffer;
1236         struct hv_24x7_hw *h24x7hw;
1237         int txn_flags;
1238
1239         txn_flags = __this_cpu_read(hv_24x7_txn_flags);
1240
1241         /*
1242          * If in a READ transaction, add this counter to the list of
1243          * counters to read during the next HCALL (i.e commit_txn()).
1244          * If not in a READ transaction, go ahead and make the HCALL
1245          * to read this counter by itself.
1246          */
1247
1248         if (txn_flags & PERF_PMU_TXN_READ) {
1249                 int i;
1250                 int ret;
1251
1252                 if (__this_cpu_read(hv_24x7_txn_err))
1253                         return;
1254
1255                 request_buffer = (void *)get_cpu_var(hv_24x7_reqb);
1256
1257                 ret = add_event_to_24x7_request(event, request_buffer);
1258                 if (ret) {
1259                         __this_cpu_write(hv_24x7_txn_err, ret);
1260                 } else {
1261                         /*
1262                          * Assoicate the event with the HCALL request index,
1263                          * so ->commit_txn() can quickly find/update count.
1264                          */
1265                         i = request_buffer->num_requests - 1;
1266
1267                         h24x7hw = &get_cpu_var(hv_24x7_hw);
1268                         h24x7hw->events[i] = event;
1269                         put_cpu_var(h24x7hw);
1270                 }
1271
1272                 put_cpu_var(hv_24x7_reqb);
1273         } else {
1274                 now = h_24x7_get_value(event);
1275                 update_event_count(event, now);
1276         }
1277 }
1278
1279 static void h_24x7_event_start(struct perf_event *event, int flags)
1280 {
1281         if (flags & PERF_EF_RELOAD)
1282                 local64_set(&event->hw.prev_count, h_24x7_get_value(event));
1283 }
1284
1285 static void h_24x7_event_stop(struct perf_event *event, int flags)
1286 {
1287         h_24x7_event_read(event);
1288 }
1289
1290 static int h_24x7_event_add(struct perf_event *event, int flags)
1291 {
1292         if (flags & PERF_EF_START)
1293                 h_24x7_event_start(event, flags);
1294
1295         return 0;
1296 }
1297
1298 /*
1299  * 24x7 counters only support READ transactions. They are
1300  * always counting and dont need/support ADD transactions.
1301  * Cache the flags, but otherwise ignore transactions that
1302  * are not PERF_PMU_TXN_READ.
1303  */
1304 static void h_24x7_event_start_txn(struct pmu *pmu, unsigned int flags)
1305 {
1306         struct hv_24x7_request_buffer *request_buffer;
1307         struct hv_24x7_data_result_buffer *result_buffer;
1308
1309         /* We should not be called if we are already in a txn */
1310         WARN_ON_ONCE(__this_cpu_read(hv_24x7_txn_flags));
1311
1312         __this_cpu_write(hv_24x7_txn_flags, flags);
1313         if (flags & ~PERF_PMU_TXN_READ)
1314                 return;
1315
1316         request_buffer = (void *)get_cpu_var(hv_24x7_reqb);
1317         result_buffer = (void *)get_cpu_var(hv_24x7_resb);
1318
1319         init_24x7_request(request_buffer, result_buffer);
1320
1321         put_cpu_var(hv_24x7_resb);
1322         put_cpu_var(hv_24x7_reqb);
1323 }
1324
1325 /*
1326  * Clean up transaction state.
1327  *
1328  * NOTE: Ignore state of request and result buffers for now.
1329  *       We will initialize them during the next read/txn.
1330  */
1331 static void reset_txn(void)
1332 {
1333         __this_cpu_write(hv_24x7_txn_flags, 0);
1334         __this_cpu_write(hv_24x7_txn_err, 0);
1335 }
1336
1337 /*
1338  * 24x7 counters only support READ transactions. They are always counting
1339  * and dont need/support ADD transactions. Clear ->txn_flags but otherwise
1340  * ignore transactions that are not of type PERF_PMU_TXN_READ.
1341  *
1342  * For READ transactions, submit all pending 24x7 requests (i.e requests
1343  * that were queued by h_24x7_event_read()), to the hypervisor and update
1344  * the event counts.
1345  */
1346 static int h_24x7_event_commit_txn(struct pmu *pmu)
1347 {
1348         struct hv_24x7_request_buffer *request_buffer;
1349         struct hv_24x7_data_result_buffer *result_buffer;
1350         struct hv_24x7_result *resb;
1351         struct perf_event *event;
1352         u64 count;
1353         int i, ret, txn_flags;
1354         struct hv_24x7_hw *h24x7hw;
1355
1356         txn_flags = __this_cpu_read(hv_24x7_txn_flags);
1357         WARN_ON_ONCE(!txn_flags);
1358
1359         ret = 0;
1360         if (txn_flags & ~PERF_PMU_TXN_READ)
1361                 goto out;
1362
1363         ret = __this_cpu_read(hv_24x7_txn_err);
1364         if (ret)
1365                 goto out;
1366
1367         request_buffer = (void *)get_cpu_var(hv_24x7_reqb);
1368         result_buffer = (void *)get_cpu_var(hv_24x7_resb);
1369
1370         ret = make_24x7_request(request_buffer, result_buffer);
1371         if (ret) {
1372                 log_24x7_hcall(request_buffer, result_buffer, ret);
1373                 goto put_reqb;
1374         }
1375
1376         h24x7hw = &get_cpu_var(hv_24x7_hw);
1377
1378         /* Update event counts from hcall */
1379         for (i = 0; i < request_buffer->num_requests; i++) {
1380                 resb = &result_buffer->results[i];
1381                 count = be64_to_cpu(resb->elements[0].element_data[0]);
1382                 event = h24x7hw->events[i];
1383                 h24x7hw->events[i] = NULL;
1384                 update_event_count(event, count);
1385         }
1386
1387         put_cpu_var(hv_24x7_hw);
1388
1389 put_reqb:
1390         put_cpu_var(hv_24x7_resb);
1391         put_cpu_var(hv_24x7_reqb);
1392 out:
1393         reset_txn();
1394         return ret;
1395 }
1396
1397 /*
1398  * 24x7 counters only support READ transactions. They are always counting
1399  * and dont need/support ADD transactions. However, regardless of type
1400  * of transaction, all we need to do is cleanup, so we don't have to check
1401  * the type of transaction.
1402  */
1403 static void h_24x7_event_cancel_txn(struct pmu *pmu)
1404 {
1405         WARN_ON_ONCE(!__this_cpu_read(hv_24x7_txn_flags));
1406         reset_txn();
1407 }
1408
1409 static struct pmu h_24x7_pmu = {
1410         .task_ctx_nr = perf_invalid_context,
1411
1412         .name = "hv_24x7",
1413         .attr_groups = attr_groups,
1414         .event_init  = h_24x7_event_init,
1415         .add         = h_24x7_event_add,
1416         .del         = h_24x7_event_stop,
1417         .start       = h_24x7_event_start,
1418         .stop        = h_24x7_event_stop,
1419         .read        = h_24x7_event_read,
1420         .start_txn   = h_24x7_event_start_txn,
1421         .commit_txn  = h_24x7_event_commit_txn,
1422         .cancel_txn  = h_24x7_event_cancel_txn,
1423 };
1424
1425 static int hv_24x7_init(void)
1426 {
1427         int r;
1428         unsigned long hret;
1429         struct hv_perf_caps caps;
1430
1431         if (!firmware_has_feature(FW_FEATURE_LPAR)) {
1432                 pr_debug("not a virtualized system, not enabling\n");
1433                 return -ENODEV;
1434         }
1435
1436         hret = hv_perf_caps_get(&caps);
1437         if (hret) {
1438                 pr_debug("could not obtain capabilities, not enabling, rc=%ld\n",
1439                                 hret);
1440                 return -ENODEV;
1441         }
1442
1443         hv_page_cache = kmem_cache_create("hv-page-4096", 4096, 4096, 0, NULL);
1444         if (!hv_page_cache)
1445                 return -ENOMEM;
1446
1447         /* sampling not supported */
1448         h_24x7_pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
1449
1450         r = create_events_from_catalog(&event_group.attrs,
1451                                    &event_desc_group.attrs,
1452                                    &event_long_desc_group.attrs);
1453
1454         if (r)
1455                 return r;
1456
1457         r = perf_pmu_register(&h_24x7_pmu, h_24x7_pmu.name, -1);
1458         if (r)
1459                 return r;
1460
1461         return 0;
1462 }
1463
1464 device_initcall(hv_24x7_init);