Documentation: devicetree: changesets do locking on their own meanwhile
[cascardo/linux.git] / mm / page-writeback.c
index e248194..f4cd7d8 100644 (file)
@@ -267,26 +267,35 @@ static void wb_min_max_ratio(struct bdi_writeback *wb,
  */
 
 /**
- * zone_dirtyable_memory - number of dirtyable pages in a zone
- * @zone: the zone
+ * node_dirtyable_memory - number of dirtyable pages in a node
+ * @pgdat: the node
  *
- * Returns the zone's number of pages potentially available for dirty
- * page cache.  This is the base value for the per-zone dirty limits.
+ * Returns the node's number of pages potentially available for dirty
+ * page cache.  This is the base value for the per-node dirty limits.
  */
-static unsigned long zone_dirtyable_memory(struct zone *zone)
+static unsigned long node_dirtyable_memory(struct pglist_data *pgdat)
 {
-       unsigned long nr_pages;
+       unsigned long nr_pages = 0;
+       int z;
+
+       for (z = 0; z < MAX_NR_ZONES; z++) {
+               struct zone *zone = pgdat->node_zones + z;
+
+               if (!populated_zone(zone))
+                       continue;
+
+               nr_pages += zone_page_state(zone, NR_FREE_PAGES);
+       }
 
-       nr_pages = zone_page_state(zone, NR_FREE_PAGES);
        /*
         * Pages reserved for the kernel should not be considered
         * dirtyable, to prevent a situation where reclaim has to
         * clean pages in order to balance the zones.
         */
-       nr_pages -= min(nr_pages, zone->totalreserve_pages);
+       nr_pages -= min(nr_pages, pgdat->totalreserve_pages);
 
-       nr_pages += zone_page_state(zone, NR_INACTIVE_FILE);
-       nr_pages += zone_page_state(zone, NR_ACTIVE_FILE);
+       nr_pages += node_page_state(pgdat, NR_INACTIVE_FILE);
+       nr_pages += node_page_state(pgdat, NR_ACTIVE_FILE);
 
        return nr_pages;
 }
@@ -299,13 +308,26 @@ static unsigned long highmem_dirtyable_memory(unsigned long total)
        int i;
 
        for_each_node_state(node, N_HIGH_MEMORY) {
-               for (i = 0; i < MAX_NR_ZONES; i++) {
-                       struct zone *z = &NODE_DATA(node)->node_zones[i];
+               for (i = ZONE_NORMAL + 1; i < MAX_NR_ZONES; i++) {
+                       struct zone *z;
+                       unsigned long nr_pages;
 
-                       if (is_highmem(z))
-                               x += zone_dirtyable_memory(z);
+                       if (!is_highmem_idx(i))
+                               continue;
+
+                       z = &NODE_DATA(node)->node_zones[i];
+                       if (!populated_zone(z))
+                               continue;
+
+                       nr_pages = zone_page_state(z, NR_FREE_PAGES);
+                       /* watch for underflows */
+                       nr_pages -= min(nr_pages, high_wmark_pages(z));
+                       nr_pages += zone_page_state(z, NR_ZONE_INACTIVE_FILE);
+                       nr_pages += zone_page_state(z, NR_ZONE_ACTIVE_FILE);
+                       x += nr_pages;
                }
        }
+
        /*
         * Unreclaimable memory (kernel memory or anonymous memory
         * without swap) can bring down the dirtyable pages below
@@ -348,8 +370,8 @@ static unsigned long global_dirtyable_memory(void)
         */
        x -= min(x, totalreserve_pages);
 
-       x += global_page_state(NR_INACTIVE_FILE);
-       x += global_page_state(NR_ACTIVE_FILE);
+       x += global_node_page_state(NR_INACTIVE_FILE);
+       x += global_node_page_state(NR_ACTIVE_FILE);
 
        if (!vm_highmem_is_dirtyable)
                x -= highmem_dirtyable_memory(x);
@@ -445,23 +467,23 @@ void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty)
 }
 
 /**
- * zone_dirty_limit - maximum number of dirty pages allowed in a zone
- * @zone: the zone
+ * node_dirty_limit - maximum number of dirty pages allowed in a node
+ * @pgdat: the node
  *
- * Returns the maximum number of dirty pages allowed in a zone, based
- * on the zone's dirtyable memory.
+ * Returns the maximum number of dirty pages allowed in a node, based
+ * on the node's dirtyable memory.
  */
-static unsigned long zone_dirty_limit(struct zone *zone)
+static unsigned long node_dirty_limit(struct pglist_data *pgdat)
 {
-       unsigned long zone_memory = zone_dirtyable_memory(zone);
+       unsigned long node_memory = node_dirtyable_memory(pgdat);
        struct task_struct *tsk = current;
        unsigned long dirty;
 
        if (vm_dirty_bytes)
                dirty = DIV_ROUND_UP(vm_dirty_bytes, PAGE_SIZE) *
-                       zone_memory / global_dirtyable_memory();
+                       node_memory / global_dirtyable_memory();
        else
-               dirty = vm_dirty_ratio * zone_memory / 100;
+               dirty = vm_dirty_ratio * node_memory / 100;
 
        if (tsk->flags & PF_LESS_THROTTLE || rt_task(tsk))
                dirty += dirty / 4;
@@ -470,19 +492,22 @@ static unsigned long zone_dirty_limit(struct zone *zone)
 }
 
 /**
- * zone_dirty_ok - tells whether a zone is within its dirty limits
- * @zone: the zone to check
+ * node_dirty_ok - tells whether a node is within its dirty limits
+ * @pgdat: the node to check
  *
- * Returns %true when the dirty pages in @zone are within the zone's
+ * Returns %true when the dirty pages in @pgdat are within the node's
  * dirty limit, %false if the limit is exceeded.
  */
-bool zone_dirty_ok(struct zone *zone)
+bool node_dirty_ok(struct pglist_data *pgdat)
 {
-       unsigned long limit = zone_dirty_limit(zone);
+       unsigned long limit = node_dirty_limit(pgdat);
+       unsigned long nr_pages = 0;
 
-       return zone_page_state(zone, NR_FILE_DIRTY) +
-              zone_page_state(zone, NR_UNSTABLE_NFS) +
-              zone_page_state(zone, NR_WRITEBACK) <= limit;
+       nr_pages += node_page_state(pgdat, NR_FILE_DIRTY);
+       nr_pages += node_page_state(pgdat, NR_UNSTABLE_NFS);
+       nr_pages += node_page_state(pgdat, NR_WRITEBACK);
+
+       return nr_pages <= limit;
 }
 
 int dirty_background_ratio_handler(struct ctl_table *table, int write,
@@ -1570,10 +1595,10 @@ static void balance_dirty_pages(struct address_space *mapping,
                 * written to the server's write cache, but has not yet
                 * been flushed to permanent storage.
                 */
-               nr_reclaimable = global_page_state(NR_FILE_DIRTY) +
-                                       global_page_state(NR_UNSTABLE_NFS);
+               nr_reclaimable = global_node_page_state(NR_FILE_DIRTY) +
+                                       global_node_page_state(NR_UNSTABLE_NFS);
                gdtc->avail = global_dirtyable_memory();
-               gdtc->dirty = nr_reclaimable + global_page_state(NR_WRITEBACK);
+               gdtc->dirty = nr_reclaimable + global_node_page_state(NR_WRITEBACK);
 
                domain_dirty_limits(gdtc);
 
@@ -1910,8 +1935,8 @@ bool wb_over_bg_thresh(struct bdi_writeback *wb)
         * as we're trying to decide whether to put more under writeback.
         */
        gdtc->avail = global_dirtyable_memory();
-       gdtc->dirty = global_page_state(NR_FILE_DIRTY) +
-                     global_page_state(NR_UNSTABLE_NFS);
+       gdtc->dirty = global_node_page_state(NR_FILE_DIRTY) +
+                     global_node_page_state(NR_UNSTABLE_NFS);
        domain_dirty_limits(gdtc);
 
        if (gdtc->dirty > gdtc->bg_thresh)
@@ -1955,8 +1980,8 @@ void throttle_vm_writeout(gfp_t gfp_mask)
                  */
                 dirty_thresh += dirty_thresh / 10;      /* wheeee... */
 
-                if (global_page_state(NR_UNSTABLE_NFS) +
-                       global_page_state(NR_WRITEBACK) <= dirty_thresh)
+                if (global_node_page_state(NR_UNSTABLE_NFS) +
+                       global_node_page_state(NR_WRITEBACK) <= dirty_thresh)
                                break;
                 congestion_wait(BLK_RW_ASYNC, HZ/10);
 
@@ -1984,8 +2009,8 @@ int dirty_writeback_centisecs_handler(struct ctl_table *table, int write,
 void laptop_mode_timer_fn(unsigned long data)
 {
        struct request_queue *q = (struct request_queue *)data;
-       int nr_pages = global_page_state(NR_FILE_DIRTY) +
-               global_page_state(NR_UNSTABLE_NFS);
+       int nr_pages = global_node_page_state(NR_FILE_DIRTY) +
+               global_node_page_state(NR_UNSTABLE_NFS);
        struct bdi_writeback *wb;
 
        /*
@@ -2436,8 +2461,9 @@ void account_page_dirtied(struct page *page, struct address_space *mapping)
                wb = inode_to_wb(inode);
 
                mem_cgroup_inc_page_stat(page, MEM_CGROUP_STAT_DIRTY);
-               __inc_zone_page_state(page, NR_FILE_DIRTY);
-               __inc_zone_page_state(page, NR_DIRTIED);
+               __inc_node_page_state(page, NR_FILE_DIRTY);
+               __inc_zone_page_state(page, NR_ZONE_WRITE_PENDING);
+               __inc_node_page_state(page, NR_DIRTIED);
                __inc_wb_stat(wb, WB_RECLAIMABLE);
                __inc_wb_stat(wb, WB_DIRTIED);
                task_io_account_write(PAGE_SIZE);
@@ -2457,7 +2483,8 @@ void account_page_cleaned(struct page *page, struct address_space *mapping,
 {
        if (mapping_cap_account_dirty(mapping)) {
                mem_cgroup_dec_page_stat(page, MEM_CGROUP_STAT_DIRTY);
-               dec_zone_page_state(page, NR_FILE_DIRTY);
+               dec_node_page_state(page, NR_FILE_DIRTY);
+               dec_zone_page_state(page, NR_ZONE_WRITE_PENDING);
                dec_wb_stat(wb, WB_RECLAIMABLE);
                task_io_account_cancelled_write(PAGE_SIZE);
        }
@@ -2525,7 +2552,7 @@ void account_page_redirty(struct page *page)
 
                wb = unlocked_inode_to_wb_begin(inode, &locked);
                current->nr_dirtied--;
-               dec_zone_page_state(page, NR_DIRTIED);
+               dec_node_page_state(page, NR_DIRTIED);
                dec_wb_stat(wb, WB_DIRTIED);
                unlocked_inode_to_wb_end(inode, locked);
        }
@@ -2563,6 +2590,7 @@ int set_page_dirty(struct page *page)
 {
        struct address_space *mapping = page_mapping(page);
 
+       page = compound_head(page);
        if (likely(mapping)) {
                int (*spd)(struct page *) = mapping->a_ops->set_page_dirty;
                /*
@@ -2712,7 +2740,8 @@ int clear_page_dirty_for_io(struct page *page)
                wb = unlocked_inode_to_wb_begin(inode, &locked);
                if (TestClearPageDirty(page)) {
                        mem_cgroup_dec_page_stat(page, MEM_CGROUP_STAT_DIRTY);
-                       dec_zone_page_state(page, NR_FILE_DIRTY);
+                       dec_node_page_state(page, NR_FILE_DIRTY);
+                       dec_zone_page_state(page, NR_ZONE_WRITE_PENDING);
                        dec_wb_stat(wb, WB_RECLAIMABLE);
                        ret = 1;
                }
@@ -2747,14 +2776,20 @@ int test_clear_page_writeback(struct page *page)
                                __wb_writeout_inc(wb);
                        }
                }
+
+               if (mapping->host && !mapping_tagged(mapping,
+                                                    PAGECACHE_TAG_WRITEBACK))
+                       sb_clear_inode_writeback(mapping->host);
+
                spin_unlock_irqrestore(&mapping->tree_lock, flags);
        } else {
                ret = TestClearPageWriteback(page);
        }
        if (ret) {
                mem_cgroup_dec_page_stat(page, MEM_CGROUP_STAT_WRITEBACK);
-               dec_zone_page_state(page, NR_WRITEBACK);
-               inc_zone_page_state(page, NR_WRITTEN);
+               dec_node_page_state(page, NR_WRITEBACK);
+               dec_zone_page_state(page, NR_ZONE_WRITE_PENDING);
+               inc_node_page_state(page, NR_WRITTEN);
        }
        unlock_page_memcg(page);
        return ret;
@@ -2774,11 +2809,24 @@ int __test_set_page_writeback(struct page *page, bool keep_write)
                spin_lock_irqsave(&mapping->tree_lock, flags);
                ret = TestSetPageWriteback(page);
                if (!ret) {
+                       bool on_wblist;
+
+                       on_wblist = mapping_tagged(mapping,
+                                                  PAGECACHE_TAG_WRITEBACK);
+
                        radix_tree_tag_set(&mapping->page_tree,
                                                page_index(page),
                                                PAGECACHE_TAG_WRITEBACK);
                        if (bdi_cap_account_writeback(bdi))
                                __inc_wb_stat(inode_to_wb(inode), WB_WRITEBACK);
+
+                       /*
+                        * We can come through here when swapping anonymous
+                        * pages, so we don't necessarily have an inode to track
+                        * for sync.
+                        */
+                       if (mapping->host && !on_wblist)
+                               sb_mark_inode_writeback(mapping->host);
                }
                if (!PageDirty(page))
                        radix_tree_tag_clear(&mapping->page_tree,
@@ -2794,7 +2842,8 @@ int __test_set_page_writeback(struct page *page, bool keep_write)
        }
        if (!ret) {
                mem_cgroup_inc_page_stat(page, MEM_CGROUP_STAT_WRITEBACK);
-               inc_zone_page_state(page, NR_WRITEBACK);
+               inc_node_page_state(page, NR_WRITEBACK);
+               inc_zone_page_state(page, NR_ZONE_WRITE_PENDING);
        }
        unlock_page_memcg(page);
        return ret;