Merge tag 'wireless-drivers-for-davem-2014-12-26' of git://git.kernel.org/pub/scm...
[cascardo/linux.git] / mm / vmstat.c
index e9ab104..1284f89 100644 (file)
@@ -7,6 +7,7 @@
  *  zoned VM statistics
  *  Copyright (C) 2006 Silicon Graphics, Inc.,
  *             Christoph Lameter <christoph@lameter.com>
+ *  Copyright (C) 2008-2014 Christoph Lameter
  */
 #include <linux/fs.h>
 #include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/cpu.h>
+#include <linux/cpumask.h>
 #include <linux/vmstat.h>
 #include <linux/sched.h>
 #include <linux/math64.h>
 #include <linux/writeback.h>
 #include <linux/compaction.h>
 #include <linux/mm_inline.h>
+#include <linux/page_ext.h>
+#include <linux/page_owner.h>
 
 #include "internal.h"
 
@@ -419,13 +423,22 @@ void dec_zone_page_state(struct page *page, enum zone_stat_item item)
 EXPORT_SYMBOL(dec_zone_page_state);
 #endif
 
-static inline void fold_diff(int *diff)
+
+/*
+ * Fold a differential into the global counters.
+ * Returns the number of counters updated.
+ */
+static int fold_diff(int *diff)
 {
        int i;
+       int changes = 0;
 
        for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
-               if (diff[i])
+               if (diff[i]) {
                        atomic_long_add(diff[i], &vm_stat[i]);
+                       changes++;
+       }
+       return changes;
 }
 
 /*
@@ -441,12 +454,15 @@ static inline void fold_diff(int *diff)
  * statistics in the remote zone struct as well as the global cachelines
  * with the global counters. These could cause remote node cache line
  * bouncing and will have to be only done when necessary.
+ *
+ * The function returns the number of global counters updated.
  */
-static void refresh_cpu_vm_stats(void)
+static int refresh_cpu_vm_stats(void)
 {
        struct zone *zone;
        int i;
        int global_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, };
+       int changes = 0;
 
        for_each_populated_zone(zone) {
                struct per_cpu_pageset __percpu *p = zone->pageset;
@@ -486,15 +502,17 @@ static void refresh_cpu_vm_stats(void)
                        continue;
                }
 
-
                if (__this_cpu_dec_return(p->expire))
                        continue;
 
-               if (__this_cpu_read(p->pcp.count))
+               if (__this_cpu_read(p->pcp.count)) {
                        drain_zone_pages(zone, this_cpu_ptr(&p->pcp));
+                       changes++;
+               }
 #endif
        }
-       fold_diff(global_diff);
+       changes += fold_diff(global_diff);
+       return changes;
 }
 
 /*
@@ -735,7 +753,7 @@ static void walk_zones_in_node(struct seq_file *m, pg_data_t *pgdat,
                                        TEXT_FOR_HIGHMEM(xx) xx "_movable",
 
 const char * const vmstat_text[] = {
-       /* Zoned VM counters */
+       /* enum zone_stat_item countes */
        "nr_free_pages",
        "nr_alloc_batch",
        "nr_inactive_anon",
@@ -778,10 +796,13 @@ const char * const vmstat_text[] = {
        "workingset_nodereclaim",
        "nr_anon_transparent_hugepages",
        "nr_free_cma",
+
+       /* enum writeback_stat_item counters */
        "nr_dirty_threshold",
        "nr_dirty_background_threshold",
 
 #ifdef CONFIG_VM_EVENT_COUNTERS
+       /* enum vm_event_item counters */
        "pgpgin",
        "pgpgout",
        "pswpin",
@@ -860,6 +881,13 @@ const char * const vmstat_text[] = {
        "thp_zero_page_alloc",
        "thp_zero_page_alloc_failed",
 #endif
+#ifdef CONFIG_MEMORY_BALLOON
+       "balloon_inflate",
+       "balloon_deflate",
+#ifdef CONFIG_BALLOON_COMPACTION
+       "balloon_migrate",
+#endif
+#endif /* CONFIG_MEMORY_BALLOON */
 #ifdef CONFIG_DEBUG_TLBFLUSH
 #ifdef CONFIG_SMP
        "nr_tlb_remote_flush",
@@ -872,6 +900,7 @@ const char * const vmstat_text[] = {
 #ifdef CONFIG_DEBUG_VM_VMACACHE
        "vmacache_find_calls",
        "vmacache_find_hits",
+       "vmacache_full_flushes",
 #endif
 #endif /* CONFIG_VM_EVENTS_COUNTERS */
 };
@@ -991,6 +1020,104 @@ static int pagetypeinfo_showblockcount(struct seq_file *m, void *arg)
        return 0;
 }
 
+#ifdef CONFIG_PAGE_OWNER
+static void pagetypeinfo_showmixedcount_print(struct seq_file *m,
+                                                       pg_data_t *pgdat,
+                                                       struct zone *zone)
+{
+       struct page *page;
+       struct page_ext *page_ext;
+       unsigned long pfn = zone->zone_start_pfn, block_end_pfn;
+       unsigned long end_pfn = pfn + zone->spanned_pages;
+       unsigned long count[MIGRATE_TYPES] = { 0, };
+       int pageblock_mt, page_mt;
+       int i;
+
+       /* Scan block by block. First and last block may be incomplete */
+       pfn = zone->zone_start_pfn;
+
+       /*
+        * Walk the zone in pageblock_nr_pages steps. If a page block spans
+        * a zone boundary, it will be double counted between zones. This does
+        * not matter as the mixed block count will still be correct
+        */
+       for (; pfn < end_pfn; ) {
+               if (!pfn_valid(pfn)) {
+                       pfn = ALIGN(pfn + 1, MAX_ORDER_NR_PAGES);
+                       continue;
+               }
+
+               block_end_pfn = ALIGN(pfn + 1, pageblock_nr_pages);
+               block_end_pfn = min(block_end_pfn, end_pfn);
+
+               page = pfn_to_page(pfn);
+               pageblock_mt = get_pfnblock_migratetype(page, pfn);
+
+               for (; pfn < block_end_pfn; pfn++) {
+                       if (!pfn_valid_within(pfn))
+                               continue;
+
+                       page = pfn_to_page(pfn);
+                       if (PageBuddy(page)) {
+                               pfn += (1UL << page_order(page)) - 1;
+                               continue;
+                       }
+
+                       if (PageReserved(page))
+                               continue;
+
+                       page_ext = lookup_page_ext(page);
+
+                       if (!test_bit(PAGE_EXT_OWNER, &page_ext->flags))
+                               continue;
+
+                       page_mt = gfpflags_to_migratetype(page_ext->gfp_mask);
+                       if (pageblock_mt != page_mt) {
+                               if (is_migrate_cma(pageblock_mt))
+                                       count[MIGRATE_MOVABLE]++;
+                               else
+                                       count[pageblock_mt]++;
+
+                               pfn = block_end_pfn;
+                               break;
+                       }
+                       pfn += (1UL << page_ext->order) - 1;
+               }
+       }
+
+       /* Print counts */
+       seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name);
+       for (i = 0; i < MIGRATE_TYPES; i++)
+               seq_printf(m, "%12lu ", count[i]);
+       seq_putc(m, '\n');
+}
+#endif /* CONFIG_PAGE_OWNER */
+
+/*
+ * Print out the number of pageblocks for each migratetype that contain pages
+ * of other types. This gives an indication of how well fallbacks are being
+ * contained by rmqueue_fallback(). It requires information from PAGE_OWNER
+ * to determine what is going on
+ */
+static void pagetypeinfo_showmixedcount(struct seq_file *m, pg_data_t *pgdat)
+{
+#ifdef CONFIG_PAGE_OWNER
+       int mtype;
+
+       if (!page_owner_inited)
+               return;
+
+       drain_all_pages(NULL);
+
+       seq_printf(m, "\n%-23s", "Number of mixed blocks ");
+       for (mtype = 0; mtype < MIGRATE_TYPES; mtype++)
+               seq_printf(m, "%12s ", migratetype_names[mtype]);
+       seq_putc(m, '\n');
+
+       walk_zones_in_node(m, pgdat, pagetypeinfo_showmixedcount_print);
+#endif /* CONFIG_PAGE_OWNER */
+}
+
 /*
  * This prints out statistics in relation to grouping pages by mobility.
  * It is expensive to collect so do not constantly read the file.
@@ -1008,6 +1135,7 @@ static int pagetypeinfo_show(struct seq_file *m, void *arg)
        seq_putc(m, '\n');
        pagetypeinfo_showfree(m, pgdat);
        pagetypeinfo_showblockcount(m, pgdat);
+       pagetypeinfo_showmixedcount(m, pgdat);
 
        return 0;
 }
@@ -1229,20 +1357,108 @@ static const struct file_operations proc_vmstat_file_operations = {
 #ifdef CONFIG_SMP
 static DEFINE_PER_CPU(struct delayed_work, vmstat_work);
 int sysctl_stat_interval __read_mostly = HZ;
+static cpumask_var_t cpu_stat_off;
 
 static void vmstat_update(struct work_struct *w)
 {
-       refresh_cpu_vm_stats();
-       schedule_delayed_work(this_cpu_ptr(&vmstat_work),
+       if (refresh_cpu_vm_stats())
+               /*
+                * Counters were updated so we expect more updates
+                * to occur in the future. Keep on running the
+                * update worker thread.
+                */
+               schedule_delayed_work(this_cpu_ptr(&vmstat_work),
+                       round_jiffies_relative(sysctl_stat_interval));
+       else {
+               /*
+                * We did not update any counters so the app may be in
+                * a mode where it does not cause counter updates.
+                * We may be uselessly running vmstat_update.
+                * Defer the checking for differentials to the
+                * shepherd thread on a different processor.
+                */
+               int r;
+               /*
+                * Shepherd work thread does not race since it never
+                * changes the bit if its zero but the cpu
+                * online / off line code may race if
+                * worker threads are still allowed during
+                * shutdown / startup.
+                */
+               r = cpumask_test_and_set_cpu(smp_processor_id(),
+                       cpu_stat_off);
+               VM_BUG_ON(r);
+       }
+}
+
+/*
+ * Check if the diffs for a certain cpu indicate that
+ * an update is needed.
+ */
+static bool need_update(int cpu)
+{
+       struct zone *zone;
+
+       for_each_populated_zone(zone) {
+               struct per_cpu_pageset *p = per_cpu_ptr(zone->pageset, cpu);
+
+               BUILD_BUG_ON(sizeof(p->vm_stat_diff[0]) != 1);
+               /*
+                * The fast way of checking if there are any vmstat diffs.
+                * This works because the diffs are byte sized items.
+                */
+               if (memchr_inv(p->vm_stat_diff, 0, NR_VM_ZONE_STAT_ITEMS))
+                       return true;
+
+       }
+       return false;
+}
+
+
+/*
+ * Shepherd worker thread that checks the
+ * differentials of processors that have their worker
+ * threads for vm statistics updates disabled because of
+ * inactivity.
+ */
+static void vmstat_shepherd(struct work_struct *w);
+
+static DECLARE_DELAYED_WORK(shepherd, vmstat_shepherd);
+
+static void vmstat_shepherd(struct work_struct *w)
+{
+       int cpu;
+
+       get_online_cpus();
+       /* Check processors whose vmstat worker threads have been disabled */
+       for_each_cpu(cpu, cpu_stat_off)
+               if (need_update(cpu) &&
+                       cpumask_test_and_clear_cpu(cpu, cpu_stat_off))
+
+                       schedule_delayed_work_on(cpu, &per_cpu(vmstat_work, cpu),
+                               __round_jiffies_relative(sysctl_stat_interval, cpu));
+
+       put_online_cpus();
+
+       schedule_delayed_work(&shepherd,
                round_jiffies_relative(sysctl_stat_interval));
+
 }
 
-static void start_cpu_timer(int cpu)
+static void __init start_shepherd_timer(void)
 {
-       struct delayed_work *work = &per_cpu(vmstat_work, cpu);
+       int cpu;
+
+       for_each_possible_cpu(cpu)
+               INIT_DEFERRABLE_WORK(per_cpu_ptr(&vmstat_work, cpu),
+                       vmstat_update);
+
+       if (!alloc_cpumask_var(&cpu_stat_off, GFP_KERNEL))
+               BUG();
+       cpumask_copy(cpu_stat_off, cpu_online_mask);
 
-       INIT_DEFERRABLE_WORK(work, vmstat_update);
-       schedule_delayed_work_on(cpu, work, __round_jiffies_relative(HZ, cpu));
+       schedule_delayed_work(&shepherd,
+               round_jiffies_relative(sysctl_stat_interval));
 }
 
 static void vmstat_cpu_dead(int node)
@@ -1273,17 +1489,17 @@ static int vmstat_cpuup_callback(struct notifier_block *nfb,
        case CPU_ONLINE:
        case CPU_ONLINE_FROZEN:
                refresh_zone_stat_thresholds();
-               start_cpu_timer(cpu);
                node_set_state(cpu_to_node(cpu), N_CPU);
+               cpumask_set_cpu(cpu, cpu_stat_off);
                break;
        case CPU_DOWN_PREPARE:
        case CPU_DOWN_PREPARE_FROZEN:
                cancel_delayed_work_sync(&per_cpu(vmstat_work, cpu));
-               per_cpu(vmstat_work, cpu).work.func = NULL;
+               cpumask_clear_cpu(cpu, cpu_stat_off);
                break;
        case CPU_DOWN_FAILED:
        case CPU_DOWN_FAILED_FROZEN:
-               start_cpu_timer(cpu);
+               cpumask_set_cpu(cpu, cpu_stat_off);
                break;
        case CPU_DEAD:
        case CPU_DEAD_FROZEN:
@@ -1303,15 +1519,10 @@ static struct notifier_block vmstat_notifier =
 static int __init setup_vmstat(void)
 {
 #ifdef CONFIG_SMP
-       int cpu;
-
        cpu_notifier_register_begin();
        __register_cpu_notifier(&vmstat_notifier);
 
-       for_each_online_cpu(cpu) {
-               start_cpu_timer(cpu);
-               node_set_state(cpu_to_node(cpu), N_CPU);
-       }
+       start_shepherd_timer();
        cpu_notifier_register_done();
 #endif
 #ifdef CONFIG_PROC_FS