sched/numa: Fix numa balancing stats in /proc/pid/sched
authorSrikar Dronamraju <srikar@linux.vnet.ibm.com>
Thu, 25 Jun 2015 17:21:43 +0000 (22:51 +0530)
committerIngo Molnar <mingo@kernel.org>
Sat, 4 Jul 2015 08:04:33 +0000 (10:04 +0200)
Commit 44dba3d5d6a1 ("sched: Refactor task_struct to use
numa_faults instead of numa_* pointers") modified the way
tsk->numa_faults stats are accounted.

However that commit never touched show_numa_stats() that is displayed
in /proc/pid/sched and thus the numbers displayed in /proc/pid/sched
don't match the actual numbers.

Fix it by making sure that /proc/pid/sched reflects the task
fault numbers. Also add group fault stats too.

Also couple of more modifications are added here:

1. Format changes:

  - Previously we would list two entries per node, one for private
    and one for shared. Also the home node info was listed in each entry.

  - Now preferred node, total_faults and current node are
    displayed separately.

  - Now there is one entry per node, that lists private,shared task and
    group faults.

2. Unit changes:

  - p->numa_pages_migrated was getting reset after every read of
    /proc/pid/sched. It's more useful to have absolute numbers since
    differential migrations between two accesses can be more easily
    calculated.

Signed-off-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Acked-by: Rik van Riel <riel@redhat.com>
Cc: Iulia Manda <iulia.manda21@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1435252903-1081-4-git-send-email-srikar@linux.vnet.ibm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
kernel/sched/debug.c
kernel/sched/fair.c
kernel/sched/sched.h

index f1dcd1d..4222ec5 100644 (file)
@@ -517,11 +517,21 @@ __initcall(init_sched_debug_procfs);
        SEQ_printf(m, "%-45s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)p->F))
 
 
+#ifdef CONFIG_NUMA_BALANCING
+void print_numa_stats(struct seq_file *m, int node, unsigned long tsf,
+               unsigned long tpf, unsigned long gsf, unsigned long gpf)
+{
+       SEQ_printf(m, "numa_faults node=%d ", node);
+       SEQ_printf(m, "task_private=%lu task_shared=%lu ", tsf, tpf);
+       SEQ_printf(m, "group_private=%lu group_shared=%lu\n", gsf, gpf);
+}
+#endif
+
+
 static void sched_show_numa(struct task_struct *p, struct seq_file *m)
 {
 #ifdef CONFIG_NUMA_BALANCING
        struct mempolicy *pol;
-       int node, i;
 
        if (p->mm)
                P(mm->numa_scan_seq);
@@ -533,26 +543,12 @@ static void sched_show_numa(struct task_struct *p, struct seq_file *m)
        mpol_get(pol);
        task_unlock(p);
 
-       SEQ_printf(m, "numa_migrations, %ld\n", xchg(&p->numa_pages_migrated, 0));
-
-       for_each_online_node(node) {
-               for (i = 0; i < 2; i++) {
-                       unsigned long nr_faults = -1;
-                       int cpu_current, home_node;
-
-                       if (p->numa_faults)
-                               nr_faults = p->numa_faults[2*node + i];
-
-                       cpu_current = !i ? (task_node(p) == node) :
-                               (pol && node_isset(node, pol->v.nodes));
-
-                       home_node = (p->numa_preferred_nid == node);
-
-                       SEQ_printf(m, "numa_faults_memory, %d, %d, %d, %d, %ld\n",
-                               i, node, cpu_current, home_node, nr_faults);
-               }
-       }
-
+       P(numa_pages_migrated);
+       P(numa_preferred_nid);
+       P(total_numa_faults);
+       SEQ_printf(m, "current_node=%d, numa_group_id=%d\n",
+                       task_node(p), task_numa_group_id(p));
+       show_numa_stats(p, m);
        mpol_put(pol);
 #endif
 }
index 40a7fcb..7245039 100644 (file)
@@ -8468,7 +8468,27 @@ void print_cfs_stats(struct seq_file *m, int cpu)
                print_cfs_rq(m, cpu, cfs_rq);
        rcu_read_unlock();
 }
-#endif
+
+#ifdef CONFIG_NUMA_BALANCING
+void show_numa_stats(struct task_struct *p, struct seq_file *m)
+{
+       int node;
+       unsigned long tsf = 0, tpf = 0, gsf = 0, gpf = 0;
+
+       for_each_online_node(node) {
+               if (p->numa_faults) {
+                       tsf = p->numa_faults[task_faults_idx(NUMA_MEM, node, 0)];
+                       tpf = p->numa_faults[task_faults_idx(NUMA_MEM, node, 1)];
+               }
+               if (p->numa_group) {
+                       gsf = p->numa_group->faults[task_faults_idx(NUMA_MEM, node, 0)],
+                       gpf = p->numa_group->faults[task_faults_idx(NUMA_MEM, node, 1)];
+               }
+               print_numa_stats(m, node, tsf, tpf, gsf, gpf);
+       }
+}
+#endif /* CONFIG_NUMA_BALANCING */
+#endif /* CONFIG_SCHED_DEBUG */
 
 __init void init_sched_fair_class(void)
 {
index 7d58952..7ef5968 100644 (file)
@@ -1675,7 +1675,15 @@ extern void print_rt_stats(struct seq_file *m, int cpu);
 extern void print_dl_stats(struct seq_file *m, int cpu);
 extern void
 print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq);
-#endif
+
+#ifdef CONFIG_NUMA_BALANCING
+extern void
+show_numa_stats(struct task_struct *p, struct seq_file *m);
+extern void
+print_numa_stats(struct seq_file *m, int node, unsigned long tsf,
+       unsigned long tpf, unsigned long gsf, unsigned long gpf);
+#endif /* CONFIG_NUMA_BALANCING */
+#endif /* CONFIG_SCHED_DEBUG */
 
 extern void init_cfs_rq(struct cfs_rq *cfs_rq);
 extern void init_rt_rq(struct rt_rq *rt_rq);