NFS: nfs_migrate_page() does not wait for FS-Cache to finish with a page
authorDavid Howells <dhowells@redhat.com>
Wed, 5 Dec 2012 13:34:49 +0000 (13:34 +0000)
committerDavid Howells <dhowells@redhat.com>
Thu, 20 Dec 2012 22:12:03 +0000 (22:12 +0000)
nfs_migrate_page() does not wait for FS-Cache to finish with a page, probably
leading to the following bad-page-state:

 BUG: Bad page state in process python-bin  pfn:17d39b
 page:ffffea00053649e8 flags:004000000000100c count:0 mapcount:0 mapping:(null)
index:38686 (Tainted: G    B      ---------------- )
 Pid: 31053, comm: python-bin Tainted: G    B      ----------------
2.6.32-71.24.1.el6.x86_64 #1
 Call Trace:
 [<ffffffff8111bfe7>] bad_page+0x107/0x160
 [<ffffffff8111ee69>] free_hot_cold_page+0x1c9/0x220
 [<ffffffff8111ef19>] __pagevec_free+0x59/0xb0
 [<ffffffff8104b988>] ? flush_tlb_others_ipi+0x128/0x130
 [<ffffffff8112230c>] release_pages+0x21c/0x250
 [<ffffffff8115b92a>] ? remove_migration_pte+0x28a/0x2b0
 [<ffffffff8115f3f8>] ? mem_cgroup_get_reclaim_stat_from_page+0x18/0x70
 [<ffffffff81122687>] ____pagevec_lru_add+0x167/0x180
 [<ffffffff811226f8>] __lru_cache_add+0x58/0x70
 [<ffffffff81122731>] lru_cache_add_lru+0x21/0x40
 [<ffffffff81123f49>] putback_lru_page+0x69/0x100
 [<ffffffff8115c0bd>] migrate_pages+0x13d/0x5d0
 [<ffffffff81122687>] ? ____pagevec_lru_add+0x167/0x180
 [<ffffffff81152ab0>] ? compaction_alloc+0x0/0x370
 [<ffffffff8115255c>] compact_zone+0x4cc/0x600
 [<ffffffff8111cfac>] ? get_page_from_freelist+0x15c/0x820
 [<ffffffff810672f4>] ? check_preempt_wakeup+0x1c4/0x3c0
 [<ffffffff8115290e>] compact_zone_order+0x7e/0xb0
 [<ffffffff81152a49>] try_to_compact_pages+0x109/0x170
 [<ffffffff8111e94d>] __alloc_pages_nodemask+0x5ed/0x850
 [<ffffffff814c9136>] ? thread_return+0x4e/0x778
 [<ffffffff81150d43>] alloc_pages_vma+0x93/0x150
 [<ffffffff81167ea5>] do_huge_pmd_anonymous_page+0x135/0x340
 [<ffffffff814cb6f6>] ? rwsem_down_read_failed+0x26/0x30
 [<ffffffff81136755>] handle_mm_fault+0x245/0x2b0
 [<ffffffff814ce383>] do_page_fault+0x123/0x3a0
 [<ffffffff814cbdf5>] page_fault+0x25/0x30

nfs_migrate_page() calls nfs_fscache_release_page() which doesn't actually wait
- even if __GFP_WAIT is set.  The reason that doesn't wait is that
fscache_maybe_release_page() might deadlock the allocator as the work threads
writing to the cache may all end up sleeping on memory allocation.

However, I wonder if that is actually a problem.  There are a number of things
I can do to deal with this:

 (1) Make nfs_migrate_page() wait.

 (2) Make fscache_maybe_release_page() honour the __GFP_WAIT flag.

 (3) Set a timeout around the wait.

 (4) Make nfs_migrate_page() return an error if the page is still busy.

For the moment, I'll select (2) and (4).

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Jeff Layton <jlayton@redhat.com>
fs/fscache/internal.h
fs/fscache/page.c
fs/fscache/stats.c
fs/nfs/write.c

index dcb3e1d..88a48cc 100644 (file)
@@ -200,6 +200,7 @@ extern atomic_t fscache_n_store_vmscan_not_storing;
 extern atomic_t fscache_n_store_vmscan_gone;
 extern atomic_t fscache_n_store_vmscan_busy;
 extern atomic_t fscache_n_store_vmscan_cancelled;
+extern atomic_t fscache_n_store_vmscan_wait;
 
 extern atomic_t fscache_n_marks;
 extern atomic_t fscache_n_uncaches;
index 4dbbca1..f9b2fb3 100644 (file)
@@ -56,6 +56,7 @@ bool __fscache_maybe_release_page(struct fscache_cookie *cookie,
 
        _enter("%p,%p,%x", cookie, page, gfp);
 
+try_again:
        rcu_read_lock();
        val = radix_tree_lookup(&cookie->stores, page->index);
        if (!val) {
@@ -104,11 +105,19 @@ bool __fscache_maybe_release_page(struct fscache_cookie *cookie,
        return true;
 
 page_busy:
-       /* we might want to wait here, but that could deadlock the allocator as
-        * the work threads writing to the cache may all end up sleeping
-        * on memory allocation */
-       fscache_stat(&fscache_n_store_vmscan_busy);
-       return false;
+       /* We will wait here if we're allowed to, but that could deadlock the
+        * allocator as the work threads writing to the cache may all end up
+        * sleeping on memory allocation, so we may need to impose a timeout
+        * too. */
+       if (!(gfp & __GFP_WAIT)) {
+               fscache_stat(&fscache_n_store_vmscan_busy);
+               return false;
+       }
+
+       fscache_stat(&fscache_n_store_vmscan_wait);
+       __fscache_wait_on_page_write(cookie, page);
+       gfp &= ~__GFP_WAIT;
+       goto try_again;
 }
 EXPORT_SYMBOL(__fscache_maybe_release_page);
 
index 51cdaee..8179e8b 100644 (file)
@@ -69,6 +69,7 @@ atomic_t fscache_n_store_vmscan_not_storing;
 atomic_t fscache_n_store_vmscan_gone;
 atomic_t fscache_n_store_vmscan_busy;
 atomic_t fscache_n_store_vmscan_cancelled;
+atomic_t fscache_n_store_vmscan_wait;
 
 atomic_t fscache_n_marks;
 atomic_t fscache_n_uncaches;
@@ -232,11 +233,12 @@ static int fscache_stats_show(struct seq_file *m, void *v)
                   atomic_read(&fscache_n_store_radix_deletes),
                   atomic_read(&fscache_n_store_pages_over_limit));
 
-       seq_printf(m, "VmScan : nos=%u gon=%u bsy=%u can=%u\n",
+       seq_printf(m, "VmScan : nos=%u gon=%u bsy=%u can=%u wt=%u\n",
                   atomic_read(&fscache_n_store_vmscan_not_storing),
                   atomic_read(&fscache_n_store_vmscan_gone),
                   atomic_read(&fscache_n_store_vmscan_busy),
-                  atomic_read(&fscache_n_store_vmscan_cancelled));
+                  atomic_read(&fscache_n_store_vmscan_cancelled),
+                  atomic_read(&fscache_n_store_vmscan_wait));
 
        seq_printf(m, "Ops    : pend=%u run=%u enq=%u can=%u rej=%u\n",
                   atomic_read(&fscache_n_op_pend),
index 5209916..b673be3 100644 (file)
@@ -1794,7 +1794,8 @@ int nfs_migrate_page(struct address_space *mapping, struct page *newpage,
        if (PagePrivate(page))
                return -EBUSY;
 
-       nfs_fscache_release_page(page, GFP_KERNEL);
+       if (!nfs_fscache_release_page(page, GFP_KERNEL))
+               return -EBUSY;
 
        return migrate_page(mapping, newpage, page, mode);
 }