mm: memcontrol: introduce CONFIG_MEMCG_LEGACY_KMEM
[cascardo/linux.git] / mm / vmpressure.c
index c5afd57..89b1d44 100644 (file)
@@ -137,14 +137,11 @@ struct vmpressure_event {
 };
 
 static bool vmpressure_event(struct vmpressure *vmpr,
-                            unsigned long scanned, unsigned long reclaimed)
+                            enum vmpressure_levels level)
 {
        struct vmpressure_event *ev;
-       enum vmpressure_levels level;
        bool signalled = false;
 
-       level = vmpressure_calc_level(scanned, reclaimed);
-
        mutex_lock(&vmpr->events_lock);
 
        list_for_each_entry(ev, &vmpr->events, node) {
@@ -164,6 +161,7 @@ static void vmpressure_work_fn(struct work_struct *work)
        struct vmpressure *vmpr = work_to_vmpressure(work);
        unsigned long scanned;
        unsigned long reclaimed;
+       enum vmpressure_levels level;
 
        spin_lock(&vmpr->sr_lock);
        /*
@@ -174,19 +172,21 @@ static void vmpressure_work_fn(struct work_struct *work)
         * here. No need for any locks here since we don't care if
         * vmpr->reclaimed is in sync.
         */
-       scanned = vmpr->scanned;
+       scanned = vmpr->tree_scanned;
        if (!scanned) {
                spin_unlock(&vmpr->sr_lock);
                return;
        }
 
-       reclaimed = vmpr->reclaimed;
-       vmpr->scanned = 0;
-       vmpr->reclaimed = 0;
+       reclaimed = vmpr->tree_reclaimed;
+       vmpr->tree_scanned = 0;
+       vmpr->tree_reclaimed = 0;
        spin_unlock(&vmpr->sr_lock);
 
+       level = vmpressure_calc_level(scanned, reclaimed);
+
        do {
-               if (vmpressure_event(vmpr, scanned, reclaimed))
+               if (vmpressure_event(vmpr, level))
                        break;
                /*
                 * If not handled, propagate the event upward into the
@@ -199,6 +199,7 @@ static void vmpressure_work_fn(struct work_struct *work)
  * vmpressure() - Account memory pressure through scanned/reclaimed ratio
  * @gfp:       reclaimer's gfp mask
  * @memcg:     cgroup memory controller handle
+ * @tree:      legacy subtree mode
  * @scanned:   number of pages scanned
  * @reclaimed: number of pages reclaimed
  *
@@ -206,9 +207,16 @@ static void vmpressure_work_fn(struct work_struct *work)
  * "instantaneous" memory pressure (scanned/reclaimed ratio). The raw
  * pressure index is then further refined and averaged over time.
  *
+ * If @tree is set, vmpressure is in traditional userspace reporting
+ * mode: @memcg is considered the pressure root and userspace is
+ * notified of the entire subtree's reclaim efficiency.
+ *
+ * If @tree is not set, reclaim efficiency is recorded for @memcg, and
+ * only in-kernel users are notified.
+ *
  * This function does not return any value.
  */
-void vmpressure(gfp_t gfp, struct mem_cgroup *memcg,
+void vmpressure(gfp_t gfp, struct mem_cgroup *memcg, bool tree,
                unsigned long scanned, unsigned long reclaimed)
 {
        struct vmpressure *vmpr = memcg_to_vmpressure(memcg);
@@ -238,15 +246,49 @@ void vmpressure(gfp_t gfp, struct mem_cgroup *memcg,
        if (!scanned)
                return;
 
-       spin_lock(&vmpr->sr_lock);
-       vmpr->scanned += scanned;
-       vmpr->reclaimed += reclaimed;
-       scanned = vmpr->scanned;
-       spin_unlock(&vmpr->sr_lock);
+       if (tree) {
+               spin_lock(&vmpr->sr_lock);
+               vmpr->tree_scanned += scanned;
+               vmpr->tree_reclaimed += reclaimed;
+               scanned = vmpr->scanned;
+               spin_unlock(&vmpr->sr_lock);
 
-       if (scanned < vmpressure_win)
-               return;
-       schedule_work(&vmpr->work);
+               if (scanned < vmpressure_win)
+                       return;
+               schedule_work(&vmpr->work);
+       } else {
+               enum vmpressure_levels level;
+
+               /* For now, no users for root-level efficiency */
+               if (!memcg || memcg == root_mem_cgroup)
+                       return;
+
+               spin_lock(&vmpr->sr_lock);
+               scanned = vmpr->scanned += scanned;
+               reclaimed = vmpr->reclaimed += reclaimed;
+               if (scanned < vmpressure_win) {
+                       spin_unlock(&vmpr->sr_lock);
+                       return;
+               }
+               vmpr->scanned = vmpr->reclaimed = 0;
+               spin_unlock(&vmpr->sr_lock);
+
+               level = vmpressure_calc_level(scanned, reclaimed);
+
+#ifdef CONFIG_INET
+               if (level > VMPRESSURE_LOW) {
+                       /*
+                        * Let the socket buffer allocator know that
+                        * we are having trouble reclaiming LRU pages.
+                        *
+                        * For hysteresis keep the pressure state
+                        * asserted for a second in which subsequent
+                        * pressure events can occur.
+                        */
+                       memcg->socket_pressure = jiffies + HZ;
+               }
+#endif
+       }
 }
 
 /**
@@ -276,7 +318,7 @@ void vmpressure_prio(gfp_t gfp, struct mem_cgroup *memcg, int prio)
         * to the vmpressure() basically means that we signal 'critical'
         * level.
         */
-       vmpressure(gfp, memcg, vmpressure_win, 0);
+       vmpressure(gfp, memcg, true, vmpressure_win, 0);
 }
 
 /**