mm/page_ext: rename offset to index
[cascardo/linux.git] / mm / page_owner.c
1 #include <linux/debugfs.h>
2 #include <linux/mm.h>
3 #include <linux/slab.h>
4 #include <linux/uaccess.h>
5 #include <linux/bootmem.h>
6 #include <linux/stacktrace.h>
7 #include <linux/page_owner.h>
8 #include <linux/jump_label.h>
9 #include <linux/migrate.h>
10 #include <linux/stackdepot.h>
11 #include <linux/seq_file.h>
12
13 #include "internal.h"
14
15 /*
16  * TODO: teach PAGE_OWNER_STACK_DEPTH (__dump_page_owner and save_stack)
17  * to use off stack temporal storage
18  */
19 #define PAGE_OWNER_STACK_DEPTH (16)
20
21 static bool page_owner_disabled = true;
22 DEFINE_STATIC_KEY_FALSE(page_owner_inited);
23
24 static depot_stack_handle_t dummy_handle;
25 static depot_stack_handle_t failure_handle;
26
27 static void init_early_allocated_pages(void);
28
29 static int early_page_owner_param(char *buf)
30 {
31         if (!buf)
32                 return -EINVAL;
33
34         if (strcmp(buf, "on") == 0)
35                 page_owner_disabled = false;
36
37         return 0;
38 }
39 early_param("page_owner", early_page_owner_param);
40
41 static bool need_page_owner(void)
42 {
43         if (page_owner_disabled)
44                 return false;
45
46         return true;
47 }
48
49 static noinline void register_dummy_stack(void)
50 {
51         unsigned long entries[4];
52         struct stack_trace dummy;
53
54         dummy.nr_entries = 0;
55         dummy.max_entries = ARRAY_SIZE(entries);
56         dummy.entries = &entries[0];
57         dummy.skip = 0;
58
59         save_stack_trace(&dummy);
60         dummy_handle = depot_save_stack(&dummy, GFP_KERNEL);
61 }
62
63 static noinline void register_failure_stack(void)
64 {
65         unsigned long entries[4];
66         struct stack_trace failure;
67
68         failure.nr_entries = 0;
69         failure.max_entries = ARRAY_SIZE(entries);
70         failure.entries = &entries[0];
71         failure.skip = 0;
72
73         save_stack_trace(&failure);
74         failure_handle = depot_save_stack(&failure, GFP_KERNEL);
75 }
76
77 static void init_page_owner(void)
78 {
79         if (page_owner_disabled)
80                 return;
81
82         register_dummy_stack();
83         register_failure_stack();
84         static_branch_enable(&page_owner_inited);
85         init_early_allocated_pages();
86 }
87
88 struct page_ext_operations page_owner_ops = {
89         .need = need_page_owner,
90         .init = init_page_owner,
91 };
92
93 void __reset_page_owner(struct page *page, unsigned int order)
94 {
95         int i;
96         struct page_ext *page_ext;
97
98         for (i = 0; i < (1 << order); i++) {
99                 page_ext = lookup_page_ext(page + i);
100                 if (unlikely(!page_ext))
101                         continue;
102                 __clear_bit(PAGE_EXT_OWNER, &page_ext->flags);
103         }
104 }
105
106 static inline bool check_recursive_alloc(struct stack_trace *trace,
107                                         unsigned long ip)
108 {
109         int i, count;
110
111         if (!trace->nr_entries)
112                 return false;
113
114         for (i = 0, count = 0; i < trace->nr_entries; i++) {
115                 if (trace->entries[i] == ip && ++count == 2)
116                         return true;
117         }
118
119         return false;
120 }
121
122 static noinline depot_stack_handle_t save_stack(gfp_t flags)
123 {
124         unsigned long entries[PAGE_OWNER_STACK_DEPTH];
125         struct stack_trace trace = {
126                 .nr_entries = 0,
127                 .entries = entries,
128                 .max_entries = PAGE_OWNER_STACK_DEPTH,
129                 .skip = 0
130         };
131         depot_stack_handle_t handle;
132
133         save_stack_trace(&trace);
134         if (trace.nr_entries != 0 &&
135             trace.entries[trace.nr_entries-1] == ULONG_MAX)
136                 trace.nr_entries--;
137
138         /*
139          * We need to check recursion here because our request to stackdepot
140          * could trigger memory allocation to save new entry. New memory
141          * allocation would reach here and call depot_save_stack() again
142          * if we don't catch it. There is still not enough memory in stackdepot
143          * so it would try to allocate memory again and loop forever.
144          */
145         if (check_recursive_alloc(&trace, _RET_IP_))
146                 return dummy_handle;
147
148         handle = depot_save_stack(&trace, flags);
149         if (!handle)
150                 handle = failure_handle;
151
152         return handle;
153 }
154
155 noinline void __set_page_owner(struct page *page, unsigned int order,
156                                         gfp_t gfp_mask)
157 {
158         struct page_ext *page_ext = lookup_page_ext(page);
159
160         if (unlikely(!page_ext))
161                 return;
162
163         page_ext->handle = save_stack(gfp_mask);
164         page_ext->order = order;
165         page_ext->gfp_mask = gfp_mask;
166         page_ext->last_migrate_reason = -1;
167
168         __set_bit(PAGE_EXT_OWNER, &page_ext->flags);
169 }
170
171 void __set_page_owner_migrate_reason(struct page *page, int reason)
172 {
173         struct page_ext *page_ext = lookup_page_ext(page);
174         if (unlikely(!page_ext))
175                 return;
176
177         page_ext->last_migrate_reason = reason;
178 }
179
180 void __split_page_owner(struct page *page, unsigned int order)
181 {
182         int i;
183         struct page_ext *page_ext = lookup_page_ext(page);
184
185         if (unlikely(!page_ext))
186                 return;
187
188         page_ext->order = 0;
189         for (i = 1; i < (1 << order); i++)
190                 __copy_page_owner(page, page + i);
191 }
192
193 void __copy_page_owner(struct page *oldpage, struct page *newpage)
194 {
195         struct page_ext *old_ext = lookup_page_ext(oldpage);
196         struct page_ext *new_ext = lookup_page_ext(newpage);
197
198         if (unlikely(!old_ext || !new_ext))
199                 return;
200
201         new_ext->order = old_ext->order;
202         new_ext->gfp_mask = old_ext->gfp_mask;
203         new_ext->last_migrate_reason = old_ext->last_migrate_reason;
204         new_ext->handle = old_ext->handle;
205
206         /*
207          * We don't clear the bit on the oldpage as it's going to be freed
208          * after migration. Until then, the info can be useful in case of
209          * a bug, and the overal stats will be off a bit only temporarily.
210          * Also, migrate_misplaced_transhuge_page() can still fail the
211          * migration and then we want the oldpage to retain the info. But
212          * in that case we also don't need to explicitly clear the info from
213          * the new page, which will be freed.
214          */
215         __set_bit(PAGE_EXT_OWNER, &new_ext->flags);
216 }
217
218 void pagetypeinfo_showmixedcount_print(struct seq_file *m,
219                                        pg_data_t *pgdat, struct zone *zone)
220 {
221         struct page *page;
222         struct page_ext *page_ext;
223         unsigned long pfn = zone->zone_start_pfn, block_end_pfn;
224         unsigned long end_pfn = pfn + zone->spanned_pages;
225         unsigned long count[MIGRATE_TYPES] = { 0, };
226         int pageblock_mt, page_mt;
227         int i;
228
229         /* Scan block by block. First and last block may be incomplete */
230         pfn = zone->zone_start_pfn;
231
232         /*
233          * Walk the zone in pageblock_nr_pages steps. If a page block spans
234          * a zone boundary, it will be double counted between zones. This does
235          * not matter as the mixed block count will still be correct
236          */
237         for (; pfn < end_pfn; ) {
238                 if (!pfn_valid(pfn)) {
239                         pfn = ALIGN(pfn + 1, MAX_ORDER_NR_PAGES);
240                         continue;
241                 }
242
243                 block_end_pfn = ALIGN(pfn + 1, pageblock_nr_pages);
244                 block_end_pfn = min(block_end_pfn, end_pfn);
245
246                 page = pfn_to_page(pfn);
247                 pageblock_mt = get_pageblock_migratetype(page);
248
249                 for (; pfn < block_end_pfn; pfn++) {
250                         if (!pfn_valid_within(pfn))
251                                 continue;
252
253                         page = pfn_to_page(pfn);
254
255                         if (page_zone(page) != zone)
256                                 continue;
257
258                         if (PageBuddy(page)) {
259                                 pfn += (1UL << page_order(page)) - 1;
260                                 continue;
261                         }
262
263                         if (PageReserved(page))
264                                 continue;
265
266                         page_ext = lookup_page_ext(page);
267                         if (unlikely(!page_ext))
268                                 continue;
269
270                         if (!test_bit(PAGE_EXT_OWNER, &page_ext->flags))
271                                 continue;
272
273                         page_mt = gfpflags_to_migratetype(page_ext->gfp_mask);
274                         if (pageblock_mt != page_mt) {
275                                 if (is_migrate_cma(pageblock_mt))
276                                         count[MIGRATE_MOVABLE]++;
277                                 else
278                                         count[pageblock_mt]++;
279
280                                 pfn = block_end_pfn;
281                                 break;
282                         }
283                         pfn += (1UL << page_ext->order) - 1;
284                 }
285         }
286
287         /* Print counts */
288         seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name);
289         for (i = 0; i < MIGRATE_TYPES; i++)
290                 seq_printf(m, "%12lu ", count[i]);
291         seq_putc(m, '\n');
292 }
293
294 static ssize_t
295 print_page_owner(char __user *buf, size_t count, unsigned long pfn,
296                 struct page *page, struct page_ext *page_ext,
297                 depot_stack_handle_t handle)
298 {
299         int ret;
300         int pageblock_mt, page_mt;
301         char *kbuf;
302         unsigned long entries[PAGE_OWNER_STACK_DEPTH];
303         struct stack_trace trace = {
304                 .nr_entries = 0,
305                 .entries = entries,
306                 .max_entries = PAGE_OWNER_STACK_DEPTH,
307                 .skip = 0
308         };
309
310         kbuf = kmalloc(count, GFP_KERNEL);
311         if (!kbuf)
312                 return -ENOMEM;
313
314         ret = snprintf(kbuf, count,
315                         "Page allocated via order %u, mask %#x(%pGg)\n",
316                         page_ext->order, page_ext->gfp_mask,
317                         &page_ext->gfp_mask);
318
319         if (ret >= count)
320                 goto err;
321
322         /* Print information relevant to grouping pages by mobility */
323         pageblock_mt = get_pageblock_migratetype(page);
324         page_mt  = gfpflags_to_migratetype(page_ext->gfp_mask);
325         ret += snprintf(kbuf + ret, count - ret,
326                         "PFN %lu type %s Block %lu type %s Flags %#lx(%pGp)\n",
327                         pfn,
328                         migratetype_names[page_mt],
329                         pfn >> pageblock_order,
330                         migratetype_names[pageblock_mt],
331                         page->flags, &page->flags);
332
333         if (ret >= count)
334                 goto err;
335
336         depot_fetch_stack(handle, &trace);
337         ret += snprint_stack_trace(kbuf + ret, count - ret, &trace, 0);
338         if (ret >= count)
339                 goto err;
340
341         if (page_ext->last_migrate_reason != -1) {
342                 ret += snprintf(kbuf + ret, count - ret,
343                         "Page has been migrated, last migrate reason: %s\n",
344                         migrate_reason_names[page_ext->last_migrate_reason]);
345                 if (ret >= count)
346                         goto err;
347         }
348
349         ret += snprintf(kbuf + ret, count - ret, "\n");
350         if (ret >= count)
351                 goto err;
352
353         if (copy_to_user(buf, kbuf, ret))
354                 ret = -EFAULT;
355
356         kfree(kbuf);
357         return ret;
358
359 err:
360         kfree(kbuf);
361         return -ENOMEM;
362 }
363
364 void __dump_page_owner(struct page *page)
365 {
366         struct page_ext *page_ext = lookup_page_ext(page);
367         unsigned long entries[PAGE_OWNER_STACK_DEPTH];
368         struct stack_trace trace = {
369                 .nr_entries = 0,
370                 .entries = entries,
371                 .max_entries = PAGE_OWNER_STACK_DEPTH,
372                 .skip = 0
373         };
374         depot_stack_handle_t handle;
375         gfp_t gfp_mask;
376         int mt;
377
378         if (unlikely(!page_ext)) {
379                 pr_alert("There is not page extension available.\n");
380                 return;
381         }
382         gfp_mask = page_ext->gfp_mask;
383         mt = gfpflags_to_migratetype(gfp_mask);
384
385         if (!test_bit(PAGE_EXT_OWNER, &page_ext->flags)) {
386                 pr_alert("page_owner info is not active (free page?)\n");
387                 return;
388         }
389
390         handle = READ_ONCE(page_ext->handle);
391         if (!handle) {
392                 pr_alert("page_owner info is not active (free page?)\n");
393                 return;
394         }
395
396         depot_fetch_stack(handle, &trace);
397         pr_alert("page allocated via order %u, migratetype %s, gfp_mask %#x(%pGg)\n",
398                  page_ext->order, migratetype_names[mt], gfp_mask, &gfp_mask);
399         print_stack_trace(&trace, 0);
400
401         if (page_ext->last_migrate_reason != -1)
402                 pr_alert("page has been migrated, last migrate reason: %s\n",
403                         migrate_reason_names[page_ext->last_migrate_reason]);
404 }
405
406 static ssize_t
407 read_page_owner(struct file *file, char __user *buf, size_t count, loff_t *ppos)
408 {
409         unsigned long pfn;
410         struct page *page;
411         struct page_ext *page_ext;
412         depot_stack_handle_t handle;
413
414         if (!static_branch_unlikely(&page_owner_inited))
415                 return -EINVAL;
416
417         page = NULL;
418         pfn = min_low_pfn + *ppos;
419
420         /* Find a valid PFN or the start of a MAX_ORDER_NR_PAGES area */
421         while (!pfn_valid(pfn) && (pfn & (MAX_ORDER_NR_PAGES - 1)) != 0)
422                 pfn++;
423
424         drain_all_pages(NULL);
425
426         /* Find an allocated page */
427         for (; pfn < max_pfn; pfn++) {
428                 /*
429                  * If the new page is in a new MAX_ORDER_NR_PAGES area,
430                  * validate the area as existing, skip it if not
431                  */
432                 if ((pfn & (MAX_ORDER_NR_PAGES - 1)) == 0 && !pfn_valid(pfn)) {
433                         pfn += MAX_ORDER_NR_PAGES - 1;
434                         continue;
435                 }
436
437                 /* Check for holes within a MAX_ORDER area */
438                 if (!pfn_valid_within(pfn))
439                         continue;
440
441                 page = pfn_to_page(pfn);
442                 if (PageBuddy(page)) {
443                         unsigned long freepage_order = page_order_unsafe(page);
444
445                         if (freepage_order < MAX_ORDER)
446                                 pfn += (1UL << freepage_order) - 1;
447                         continue;
448                 }
449
450                 page_ext = lookup_page_ext(page);
451                 if (unlikely(!page_ext))
452                         continue;
453
454                 /*
455                  * Some pages could be missed by concurrent allocation or free,
456                  * because we don't hold the zone lock.
457                  */
458                 if (!test_bit(PAGE_EXT_OWNER, &page_ext->flags))
459                         continue;
460
461                 /*
462                  * Access to page_ext->handle isn't synchronous so we should
463                  * be careful to access it.
464                  */
465                 handle = READ_ONCE(page_ext->handle);
466                 if (!handle)
467                         continue;
468
469                 /* Record the next PFN to read in the file offset */
470                 *ppos = (pfn - min_low_pfn) + 1;
471
472                 return print_page_owner(buf, count, pfn, page,
473                                 page_ext, handle);
474         }
475
476         return 0;
477 }
478
479 static void init_pages_in_zone(pg_data_t *pgdat, struct zone *zone)
480 {
481         struct page *page;
482         struct page_ext *page_ext;
483         unsigned long pfn = zone->zone_start_pfn, block_end_pfn;
484         unsigned long end_pfn = pfn + zone->spanned_pages;
485         unsigned long count = 0;
486
487         /* Scan block by block. First and last block may be incomplete */
488         pfn = zone->zone_start_pfn;
489
490         /*
491          * Walk the zone in pageblock_nr_pages steps. If a page block spans
492          * a zone boundary, it will be double counted between zones. This does
493          * not matter as the mixed block count will still be correct
494          */
495         for (; pfn < end_pfn; ) {
496                 if (!pfn_valid(pfn)) {
497                         pfn = ALIGN(pfn + 1, MAX_ORDER_NR_PAGES);
498                         continue;
499                 }
500
501                 block_end_pfn = ALIGN(pfn + 1, pageblock_nr_pages);
502                 block_end_pfn = min(block_end_pfn, end_pfn);
503
504                 page = pfn_to_page(pfn);
505
506                 for (; pfn < block_end_pfn; pfn++) {
507                         if (!pfn_valid_within(pfn))
508                                 continue;
509
510                         page = pfn_to_page(pfn);
511
512                         if (page_zone(page) != zone)
513                                 continue;
514
515                         /*
516                          * We are safe to check buddy flag and order, because
517                          * this is init stage and only single thread runs.
518                          */
519                         if (PageBuddy(page)) {
520                                 pfn += (1UL << page_order(page)) - 1;
521                                 continue;
522                         }
523
524                         if (PageReserved(page))
525                                 continue;
526
527                         page_ext = lookup_page_ext(page);
528                         if (unlikely(!page_ext))
529                                 continue;
530
531                         /* Maybe overraping zone */
532                         if (test_bit(PAGE_EXT_OWNER, &page_ext->flags))
533                                 continue;
534
535                         /* Found early allocated page */
536                         set_page_owner(page, 0, 0);
537                         count++;
538                 }
539         }
540
541         pr_info("Node %d, zone %8s: page owner found early allocated %lu pages\n",
542                 pgdat->node_id, zone->name, count);
543 }
544
545 static void init_zones_in_node(pg_data_t *pgdat)
546 {
547         struct zone *zone;
548         struct zone *node_zones = pgdat->node_zones;
549         unsigned long flags;
550
551         for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; ++zone) {
552                 if (!populated_zone(zone))
553                         continue;
554
555                 spin_lock_irqsave(&zone->lock, flags);
556                 init_pages_in_zone(pgdat, zone);
557                 spin_unlock_irqrestore(&zone->lock, flags);
558         }
559 }
560
561 static void init_early_allocated_pages(void)
562 {
563         pg_data_t *pgdat;
564
565         drain_all_pages(NULL);
566         for_each_online_pgdat(pgdat)
567                 init_zones_in_node(pgdat);
568 }
569
570 static const struct file_operations proc_page_owner_operations = {
571         .read           = read_page_owner,
572 };
573
574 static int __init pageowner_init(void)
575 {
576         struct dentry *dentry;
577
578         if (!static_branch_unlikely(&page_owner_inited)) {
579                 pr_info("page_owner is disabled\n");
580                 return 0;
581         }
582
583         dentry = debugfs_create_file("page_owner", S_IRUSR, NULL,
584                         NULL, &proc_page_owner_operations);
585         if (IS_ERR(dentry))
586                 return PTR_ERR(dentry);
587
588         return 0;
589 }
590 late_initcall(pageowner_init)