Merge branch 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
[cascardo/linux.git] / mm / page_owner.c
index c6cda3e..ec6dc18 100644 (file)
@@ -7,11 +7,22 @@
 #include <linux/page_owner.h>
 #include <linux/jump_label.h>
 #include <linux/migrate.h>
+#include <linux/stackdepot.h>
+
 #include "internal.h"
 
+/*
+ * TODO: teach PAGE_OWNER_STACK_DEPTH (__dump_page_owner and save_stack)
+ * to use off stack temporal storage
+ */
+#define PAGE_OWNER_STACK_DEPTH (16)
+
 static bool page_owner_disabled = true;
 DEFINE_STATIC_KEY_FALSE(page_owner_inited);
 
+static depot_stack_handle_t dummy_handle;
+static depot_stack_handle_t failure_handle;
+
 static void init_early_allocated_pages(void);
 
 static int early_page_owner_param(char *buf)
@@ -34,11 +45,41 @@ static bool need_page_owner(void)
        return true;
 }
 
+static noinline void register_dummy_stack(void)
+{
+       unsigned long entries[4];
+       struct stack_trace dummy;
+
+       dummy.nr_entries = 0;
+       dummy.max_entries = ARRAY_SIZE(entries);
+       dummy.entries = &entries[0];
+       dummy.skip = 0;
+
+       save_stack_trace(&dummy);
+       dummy_handle = depot_save_stack(&dummy, GFP_KERNEL);
+}
+
+static noinline void register_failure_stack(void)
+{
+       unsigned long entries[4];
+       struct stack_trace failure;
+
+       failure.nr_entries = 0;
+       failure.max_entries = ARRAY_SIZE(entries);
+       failure.entries = &entries[0];
+       failure.skip = 0;
+
+       save_stack_trace(&failure);
+       failure_handle = depot_save_stack(&failure, GFP_KERNEL);
+}
+
 static void init_page_owner(void)
 {
        if (page_owner_disabled)
                return;
 
+       register_dummy_stack();
+       register_failure_stack();
        static_branch_enable(&page_owner_inited);
        init_early_allocated_pages();
 }
@@ -61,25 +102,66 @@ void __reset_page_owner(struct page *page, unsigned int order)
        }
 }
 
-void __set_page_owner(struct page *page, unsigned int order, gfp_t gfp_mask)
+static inline bool check_recursive_alloc(struct stack_trace *trace,
+                                       unsigned long ip)
 {
-       struct page_ext *page_ext = lookup_page_ext(page);
+       int i, count;
+
+       if (!trace->nr_entries)
+               return false;
+
+       for (i = 0, count = 0; i < trace->nr_entries; i++) {
+               if (trace->entries[i] == ip && ++count == 2)
+                       return true;
+       }
+
+       return false;
+}
 
+static noinline depot_stack_handle_t save_stack(gfp_t flags)
+{
+       unsigned long entries[PAGE_OWNER_STACK_DEPTH];
        struct stack_trace trace = {
                .nr_entries = 0,
-               .max_entries = ARRAY_SIZE(page_ext->trace_entries),
-               .entries = &page_ext->trace_entries[0],
-               .skip = 3,
+               .entries = entries,
+               .max_entries = PAGE_OWNER_STACK_DEPTH,
+               .skip = 0
        };
+       depot_stack_handle_t handle;
+
+       save_stack_trace(&trace);
+       if (trace.nr_entries != 0 &&
+           trace.entries[trace.nr_entries-1] == ULONG_MAX)
+               trace.nr_entries--;
+
+       /*
+        * We need to check recursion here because our request to stackdepot
+        * could trigger memory allocation to save new entry. New memory
+        * allocation would reach here and call depot_save_stack() again
+        * if we don't catch it. There is still not enough memory in stackdepot
+        * so it would try to allocate memory again and loop forever.
+        */
+       if (check_recursive_alloc(&trace, _RET_IP_))
+               return dummy_handle;
+
+       handle = depot_save_stack(&trace, flags);
+       if (!handle)
+               handle = failure_handle;
+
+       return handle;
+}
+
+noinline void __set_page_owner(struct page *page, unsigned int order,
+                                       gfp_t gfp_mask)
+{
+       struct page_ext *page_ext = lookup_page_ext(page);
 
        if (unlikely(!page_ext))
                return;
 
-       save_stack_trace(&trace);
-
+       page_ext->handle = save_stack(gfp_mask);
        page_ext->order = order;
        page_ext->gfp_mask = gfp_mask;
-       page_ext->nr_entries = trace.nr_entries;
        page_ext->last_migrate_reason = -1;
 
        __set_bit(PAGE_EXT_OWNER, &page_ext->flags);
@@ -94,34 +176,31 @@ void __set_page_owner_migrate_reason(struct page *page, int reason)
        page_ext->last_migrate_reason = reason;
 }
 
-gfp_t __get_page_owner_gfp(struct page *page)
+void __split_page_owner(struct page *page, unsigned int order)
 {
+       int i;
        struct page_ext *page_ext = lookup_page_ext(page);
+
        if (unlikely(!page_ext))
-               /*
-                * The caller just returns 0 if no valid gfp
-                * So return 0 here too.
-                */
-               return 0;
+               return;
 
-       return page_ext->gfp_mask;
+       page_ext->order = 0;
+       for (i = 1; i < (1 << order); i++)
+               __copy_page_owner(page, page + i);
 }
 
 void __copy_page_owner(struct page *oldpage, struct page *newpage)
 {
        struct page_ext *old_ext = lookup_page_ext(oldpage);
        struct page_ext *new_ext = lookup_page_ext(newpage);
-       int i;
 
        if (unlikely(!old_ext || !new_ext))
                return;
 
        new_ext->order = old_ext->order;
        new_ext->gfp_mask = old_ext->gfp_mask;
-       new_ext->nr_entries = old_ext->nr_entries;
-
-       for (i = 0; i < ARRAY_SIZE(new_ext->trace_entries); i++)
-               new_ext->trace_entries[i] = old_ext->trace_entries[i];
+       new_ext->last_migrate_reason = old_ext->last_migrate_reason;
+       new_ext->handle = old_ext->handle;
 
        /*
         * We don't clear the bit on the oldpage as it's going to be freed
@@ -137,14 +216,18 @@ void __copy_page_owner(struct page *oldpage, struct page *newpage)
 
 static ssize_t
 print_page_owner(char __user *buf, size_t count, unsigned long pfn,
-               struct page *page, struct page_ext *page_ext)
+               struct page *page, struct page_ext *page_ext,
+               depot_stack_handle_t handle)
 {
        int ret;
        int pageblock_mt, page_mt;
        char *kbuf;
+       unsigned long entries[PAGE_OWNER_STACK_DEPTH];
        struct stack_trace trace = {
-               .nr_entries = page_ext->nr_entries,
-               .entries = &page_ext->trace_entries[0],
+               .nr_entries = 0,
+               .entries = entries,
+               .max_entries = PAGE_OWNER_STACK_DEPTH,
+               .skip = 0
        };
 
        kbuf = kmalloc(count, GFP_KERNEL);
@@ -173,6 +256,7 @@ print_page_owner(char __user *buf, size_t count, unsigned long pfn,
        if (ret >= count)
                goto err;
 
+       depot_fetch_stack(handle, &trace);
        ret += snprint_stack_trace(kbuf + ret, count - ret, &trace, 0);
        if (ret >= count)
                goto err;
@@ -203,23 +287,36 @@ err:
 void __dump_page_owner(struct page *page)
 {
        struct page_ext *page_ext = lookup_page_ext(page);
+       unsigned long entries[PAGE_OWNER_STACK_DEPTH];
        struct stack_trace trace = {
-               .nr_entries = page_ext->nr_entries,
-               .entries = &page_ext->trace_entries[0],
+               .nr_entries = 0,
+               .entries = entries,
+               .max_entries = PAGE_OWNER_STACK_DEPTH,
+               .skip = 0
        };
-       gfp_t gfp_mask = page_ext->gfp_mask;
-       int mt = gfpflags_to_migratetype(gfp_mask);
+       depot_stack_handle_t handle;
+       gfp_t gfp_mask;
+       int mt;
 
        if (unlikely(!page_ext)) {
                pr_alert("There is not page extension available.\n");
                return;
        }
+       gfp_mask = page_ext->gfp_mask;
+       mt = gfpflags_to_migratetype(gfp_mask);
 
        if (!test_bit(PAGE_EXT_OWNER, &page_ext->flags)) {
                pr_alert("page_owner info is not active (free page?)\n");
                return;
        }
 
+       handle = READ_ONCE(page_ext->handle);
+       if (!handle) {
+               pr_alert("page_owner info is not active (free page?)\n");
+               return;
+       }
+
+       depot_fetch_stack(handle, &trace);
        pr_alert("page allocated via order %u, migratetype %s, gfp_mask %#x(%pGg)\n",
                 page_ext->order, migratetype_names[mt], gfp_mask, &gfp_mask);
        print_stack_trace(&trace, 0);
@@ -235,6 +332,7 @@ read_page_owner(struct file *file, char __user *buf, size_t count, loff_t *ppos)
        unsigned long pfn;
        struct page *page;
        struct page_ext *page_ext;
+       depot_stack_handle_t handle;
 
        if (!static_branch_unlikely(&page_owner_inited))
                return -EINVAL;
@@ -283,10 +381,19 @@ read_page_owner(struct file *file, char __user *buf, size_t count, loff_t *ppos)
                if (!test_bit(PAGE_EXT_OWNER, &page_ext->flags))
                        continue;
 
+               /*
+                * Access to page_ext->handle isn't synchronous so we should
+                * be careful to access it.
+                */
+               handle = READ_ONCE(page_ext->handle);
+               if (!handle)
+                       continue;
+
                /* Record the next PFN to read in the file offset */
                *ppos = (pfn - min_low_pfn) + 1;
 
-               return print_page_owner(buf, count, pfn, page, page_ext);
+               return print_page_owner(buf, count, pfn, page,
+                               page_ext, handle);
        }
 
        return 0;