tmpfs: security xattr setting on inode creation
[cascardo/linux.git] / mm / vmscan.c
index c52b235..57d8ef6 100644 (file)
@@ -1138,7 +1138,7 @@ int __isolate_lru_page(struct page *page, isolate_mode_t mode, int file)
  * @mz:                The mem_cgroup_zone to pull pages from.
  * @dst:       The temp list to put pages on to.
  * @nr_scanned:        The number of pages that were scanned.
- * @order:     The caller's attempted allocation order
+ * @sc:                The scan_control struct for this reclaim session
  * @mode:      One of the LRU isolation modes
  * @active:    True [1] if isolating active pages
  * @file:      True [1] if isolating file [!anon] pages
@@ -1147,8 +1147,8 @@ int __isolate_lru_page(struct page *page, isolate_mode_t mode, int file)
  */
 static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
                struct mem_cgroup_zone *mz, struct list_head *dst,
-               unsigned long *nr_scanned, int order, isolate_mode_t mode,
-               int active, int file)
+               unsigned long *nr_scanned, struct scan_control *sc,
+               isolate_mode_t mode, int active, int file)
 {
        struct lruvec *lruvec;
        struct list_head *src;
@@ -1194,7 +1194,7 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
                        BUG();
                }
 
-               if (!order)
+               if (!sc->order || !(sc->reclaim_mode & RECLAIM_MODE_LUMPYRECLAIM))
                        continue;
 
                /*
@@ -1208,8 +1208,8 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
                 */
                zone_id = page_zone_id(page);
                page_pfn = page_to_pfn(page);
-               pfn = page_pfn & ~((1 << order) - 1);
-               end_pfn = pfn + (1 << order);
+               pfn = page_pfn & ~((1 << sc->order) - 1);
+               end_pfn = pfn + (1 << sc->order);
                for (; pfn < end_pfn; pfn++) {
                        struct page *cursor_page;
 
@@ -1275,7 +1275,7 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
 
        *nr_scanned = scan;
 
-       trace_mm_vmscan_lru_isolate(order,
+       trace_mm_vmscan_lru_isolate(sc->order,
                        nr_to_scan, scan,
                        nr_taken,
                        nr_lumpy_taken, nr_lumpy_dirty, nr_lumpy_failed,
@@ -1413,7 +1413,6 @@ update_isolated_counts(struct mem_cgroup_zone *mz,
                       unsigned long *nr_anon,
                       unsigned long *nr_file)
 {
-       struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(mz);
        struct zone *zone = mz->zone;
        unsigned int count[NR_LRU_LISTS] = { 0, };
        unsigned long nr_active = 0;
@@ -1434,6 +1433,7 @@ update_isolated_counts(struct mem_cgroup_zone *mz,
                count[lru] += numpages;
        }
 
+       preempt_disable();
        __count_vm_events(PGDEACTIVATE, nr_active);
 
        __mod_zone_page_state(zone, NR_ACTIVE_FILE,
@@ -1448,8 +1448,9 @@ update_isolated_counts(struct mem_cgroup_zone *mz,
        *nr_anon = count[LRU_ACTIVE_ANON] + count[LRU_INACTIVE_ANON];
        *nr_file = count[LRU_ACTIVE_FILE] + count[LRU_INACTIVE_FILE];
 
-       reclaim_stat->recent_scanned[0] += *nr_anon;
-       reclaim_stat->recent_scanned[1] += *nr_file;
+       __mod_zone_page_state(zone, NR_ISOLATED_ANON, *nr_anon);
+       __mod_zone_page_state(zone, NR_ISOLATED_FILE, *nr_file);
+       preempt_enable();
 }
 
 /*
@@ -1509,8 +1510,9 @@ shrink_inactive_list(unsigned long nr_to_scan, struct mem_cgroup_zone *mz,
        unsigned long nr_file;
        unsigned long nr_dirty = 0;
        unsigned long nr_writeback = 0;
-       isolate_mode_t reclaim_mode = ISOLATE_INACTIVE;
+       isolate_mode_t isolate_mode = ISOLATE_INACTIVE;
        struct zone *zone = mz->zone;
+       struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(mz);
 
        while (unlikely(too_many_isolated(zone, file, sc))) {
                congestion_wait(BLK_RW_ASYNC, HZ/10);
@@ -1522,20 +1524,19 @@ shrink_inactive_list(unsigned long nr_to_scan, struct mem_cgroup_zone *mz,
 
        set_reclaim_mode(priority, sc, false);
        if (sc->reclaim_mode & RECLAIM_MODE_LUMPYRECLAIM)
-               reclaim_mode |= ISOLATE_ACTIVE;
+               isolate_mode |= ISOLATE_ACTIVE;
 
        lru_add_drain();
 
        if (!sc->may_unmap)
-               reclaim_mode |= ISOLATE_UNMAPPED;
+               isolate_mode |= ISOLATE_UNMAPPED;
        if (!sc->may_writepage)
-               reclaim_mode |= ISOLATE_CLEAN;
+               isolate_mode |= ISOLATE_CLEAN;
 
        spin_lock_irq(&zone->lru_lock);
 
-       nr_taken = isolate_lru_pages(nr_to_scan, mz, &page_list,
-                                    &nr_scanned, sc->order,
-                                    reclaim_mode, 0, file);
+       nr_taken = isolate_lru_pages(nr_to_scan, mz, &page_list, &nr_scanned,
+                                    sc, isolate_mode, 0, file);
        if (global_reclaim(sc)) {
                zone->pages_scanned += nr_scanned;
                if (current_is_kswapd())
@@ -1545,19 +1546,13 @@ shrink_inactive_list(unsigned long nr_to_scan, struct mem_cgroup_zone *mz,
                        __count_zone_vm_events(PGSCAN_DIRECT, zone,
                                               nr_scanned);
        }
+       spin_unlock_irq(&zone->lru_lock);
 
-       if (nr_taken == 0) {
-               spin_unlock_irq(&zone->lru_lock);
+       if (nr_taken == 0)
                return 0;
-       }
 
        update_isolated_counts(mz, &page_list, &nr_anon, &nr_file);
 
-       __mod_zone_page_state(zone, NR_ISOLATED_ANON, nr_anon);
-       __mod_zone_page_state(zone, NR_ISOLATED_FILE, nr_file);
-
-       spin_unlock_irq(&zone->lru_lock);
-
        nr_reclaimed = shrink_page_list(&page_list, mz, sc, priority,
                                                &nr_dirty, &nr_writeback);
 
@@ -1570,6 +1565,9 @@ shrink_inactive_list(unsigned long nr_to_scan, struct mem_cgroup_zone *mz,
 
        spin_lock_irq(&zone->lru_lock);
 
+       reclaim_stat->recent_scanned[0] += nr_anon;
+       reclaim_stat->recent_scanned[1] += nr_file;
+
        if (current_is_kswapd())
                __count_vm_events(KSWAPD_STEAL, nr_reclaimed);
        __count_zone_vm_events(PGSTEAL, zone, nr_reclaimed);
@@ -1643,18 +1641,6 @@ static void move_active_pages_to_lru(struct zone *zone,
        unsigned long pgmoved = 0;
        struct page *page;
 
-       if (buffer_heads_over_limit) {
-               spin_unlock_irq(&zone->lru_lock);
-               list_for_each_entry(page, list, lru) {
-                       if (page_has_private(page) && trylock_page(page)) {
-                               if (page_has_private(page))
-                                       try_to_release_page(page, 0);
-                               unlock_page(page);
-                       }
-               }
-               spin_lock_irq(&zone->lru_lock);
-       }
-
        while (!list_empty(list)) {
                struct lruvec *lruvec;
 
@@ -1699,21 +1685,20 @@ static void shrink_active_list(unsigned long nr_to_scan,
        struct page *page;
        struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(mz);
        unsigned long nr_rotated = 0;
-       isolate_mode_t reclaim_mode = ISOLATE_ACTIVE;
+       isolate_mode_t isolate_mode = ISOLATE_ACTIVE;
        struct zone *zone = mz->zone;
 
        lru_add_drain();
 
        if (!sc->may_unmap)
-               reclaim_mode |= ISOLATE_UNMAPPED;
+               isolate_mode |= ISOLATE_UNMAPPED;
        if (!sc->may_writepage)
-               reclaim_mode |= ISOLATE_CLEAN;
+               isolate_mode |= ISOLATE_CLEAN;
 
        spin_lock_irq(&zone->lru_lock);
 
-       nr_taken = isolate_lru_pages(nr_to_scan, mz, &l_hold,
-                                    &nr_scanned, sc->order,
-                                    reclaim_mode, 1, file);
+       nr_taken = isolate_lru_pages(nr_to_scan, mz, &l_hold, &nr_scanned, sc,
+                                    isolate_mode, 1, file);
        if (global_reclaim(sc))
                zone->pages_scanned += nr_scanned;
 
@@ -1737,6 +1722,14 @@ static void shrink_active_list(unsigned long nr_to_scan,
                        continue;
                }
 
+               if (unlikely(buffer_heads_over_limit)) {
+                       if (page_has_private(page) && trylock_page(page)) {
+                               if (page_has_private(page))
+                                       try_to_release_page(page, 0);
+                               unlock_page(page);
+                       }
+               }
+
                if (page_referenced(page, 0, mz->mem_cgroup, &vm_flags)) {
                        nr_rotated += hpage_nr_pages(page);
                        /*
@@ -2112,7 +2105,12 @@ restart:
                 * with multiple processes reclaiming pages, the total
                 * freeing target can get unreasonably large.
                 */
-               if (nr_reclaimed >= nr_to_reclaim && priority < DEF_PRIORITY)
+               if (nr_reclaimed >= nr_to_reclaim)
+                       nr_to_reclaim = 0;
+               else
+                       nr_to_reclaim -= nr_reclaimed;
+
+               if (!nr_to_reclaim && priority < DEF_PRIORITY)
                        break;
        }
        blk_finish_plug(&plug);
@@ -2195,7 +2193,7 @@ static inline bool compaction_ready(struct zone *zone, struct scan_control *sc)
         * If compaction is deferred, reclaim up to a point where
         * compaction will have a chance of success when re-enabled
         */
-       if (compaction_deferred(zone))
+       if (compaction_deferred(zone, sc->order))
                return watermark_ok;
 
        /* If compaction is not ready to start, keep reclaiming */
@@ -2235,6 +2233,14 @@ static bool shrink_zones(int priority, struct zonelist *zonelist,
        unsigned long nr_soft_scanned;
        bool aborted_reclaim = false;
 
+       /*
+        * If the number of buffer_heads in the machine exceeds the maximum
+        * allowed level, force direct reclaim to scan the highmem zone as
+        * highmem pages could be pinning lowmem pages storing buffer_heads
+        */
+       if (buffer_heads_over_limit)
+               sc->gfp_mask |= __GFP_HIGHMEM;
+
        for_each_zone_zonelist_nodemask(zone, z, zonelist,
                                        gfp_zone(sc->gfp_mask), sc->nodemask) {
                if (!populated_zone(zone))
@@ -2724,6 +2730,17 @@ loop_again:
                         */
                        age_active_anon(zone, &sc, priority);
 
+                       /*
+                        * If the number of buffer_heads in the machine
+                        * exceeds the maximum allowed level and this node
+                        * has a highmem zone, force kswapd to reclaim from
+                        * it to relieve lowmem pressure.
+                        */
+                       if (buffer_heads_over_limit && is_highmem_idx(i)) {
+                               end_zone = i;
+                               break;
+                       }
+
                        if (!zone_watermark_ok_safe(zone, order,
                                        high_wmark_pages(zone), 0, 0)) {
                                end_zone = i;
@@ -2753,7 +2770,7 @@ loop_again:
                 */
                for (i = 0; i <= end_zone; i++) {
                        struct zone *zone = pgdat->node_zones + i;
-                       int nr_slab;
+                       int nr_slab, testorder;
                        unsigned long balance_gap;
 
                        if (!populated_zone(zone))
@@ -2786,7 +2803,21 @@ loop_again:
                                (zone->present_pages +
                                        KSWAPD_ZONE_BALANCE_GAP_RATIO-1) /
                                KSWAPD_ZONE_BALANCE_GAP_RATIO);
-                       if (!zone_watermark_ok_safe(zone, order,
+                       /*
+                        * Kswapd reclaims only single pages with compaction
+                        * enabled. Trying too hard to reclaim until contiguous
+                        * free pages have become available can hurt performance
+                        * by evicting too much useful data from memory.
+                        * Do not reclaim more than needed for compaction.
+                        */
+                       testorder = order;
+                       if (COMPACTION_BUILD && order &&
+                                       compaction_suitable(zone, order) !=
+                                               COMPACT_SKIPPED)
+                               testorder = 0;
+
+                       if ((buffer_heads_over_limit && is_highmem_idx(i)) ||
+                                   !zone_watermark_ok_safe(zone, order,
                                        high_wmark_pages(zone) + balance_gap,
                                        end_zone, 0)) {
                                shrink_zone(priority, zone, &sc);
@@ -2815,7 +2846,7 @@ loop_again:
                                continue;
                        }
 
-                       if (!zone_watermark_ok_safe(zone, order,
+                       if (!zone_watermark_ok_safe(zone, testorder,
                                        high_wmark_pages(zone), end_zone, 0)) {
                                all_zones_ok = 0;
                                /*
@@ -2903,6 +2934,8 @@ out:
         * and it is potentially going to sleep here.
         */
        if (order) {
+               int zones_need_compaction = 1;
+
                for (i = 0; i <= end_zone; i++) {
                        struct zone *zone = pgdat->node_zones + i;
 
@@ -2912,6 +2945,10 @@ out:
                        if (zone->all_unreclaimable && priority != DEF_PRIORITY)
                                continue;
 
+                       /* Would compaction fail due to lack of free memory? */
+                       if (compaction_suitable(zone, order) == COMPACT_SKIPPED)
+                               goto loop_again;
+
                        /* Confirm the zone is balanced for order-0 */
                        if (!zone_watermark_ok(zone, 0,
                                        high_wmark_pages(zone), 0, 0)) {
@@ -2919,11 +2956,17 @@ out:
                                goto loop_again;
                        }
 
+                       /* Check if the memory needs to be defragmented. */
+                       if (zone_watermark_ok(zone, order,
+                                   low_wmark_pages(zone), *classzone_idx, 0))
+                               zones_need_compaction = 0;
+
                        /* If balanced, clear the congested flag */
                        zone_clear_flag(zone, ZONE_CONGESTED);
-                       if (i <= *classzone_idx)
-                               balanced += zone->present_pages;
                }
+
+               if (zones_need_compaction)
+                       compact_pgdat(pgdat, order);
        }
 
        /*