mm: memcontrol: rewrite charge API
[cascardo/linux.git] / mm / filemap.c
index 900edfa..349a40e 100644 (file)
@@ -31,6 +31,7 @@
 #include <linux/security.h>
 #include <linux/cpuset.h>
 #include <linux/hardirq.h> /* for BUG_ON(!in_atomic()) only */
+#include <linux/hugetlb.h>
 #include <linux/memcontrol.h>
 #include <linux/cleancache.h>
 #include <linux/rmap.h>
@@ -241,18 +242,6 @@ void delete_from_page_cache(struct page *page)
 }
 EXPORT_SYMBOL(delete_from_page_cache);
 
-static int sleep_on_page(void *word)
-{
-       io_schedule();
-       return 0;
-}
-
-static int sleep_on_page_killable(void *word)
-{
-       sleep_on_page(word);
-       return fatal_signal_pending(current) ? -EINTR : 0;
-}
-
 static int filemap_check_errors(struct address_space *mapping)
 {
        int ret = 0;
@@ -560,19 +549,24 @@ static int __add_to_page_cache_locked(struct page *page,
                                      pgoff_t offset, gfp_t gfp_mask,
                                      void **shadowp)
 {
+       int huge = PageHuge(page);
+       struct mem_cgroup *memcg;
        int error;
 
        VM_BUG_ON_PAGE(!PageLocked(page), page);
        VM_BUG_ON_PAGE(PageSwapBacked(page), page);
 
-       error = mem_cgroup_charge_file(page, current->mm,
-                                       gfp_mask & GFP_RECLAIM_MASK);
-       if (error)
-               return error;
+       if (!huge) {
+               error = mem_cgroup_try_charge(page, current->mm,
+                                             gfp_mask, &memcg);
+               if (error)
+                       return error;
+       }
 
        error = radix_tree_maybe_preload(gfp_mask & ~__GFP_HIGHMEM);
        if (error) {
-               mem_cgroup_uncharge_cache_page(page);
+               if (!huge)
+                       mem_cgroup_cancel_charge(page, memcg);
                return error;
        }
 
@@ -587,13 +581,16 @@ static int __add_to_page_cache_locked(struct page *page,
                goto err_insert;
        __inc_zone_page_state(page, NR_FILE_PAGES);
        spin_unlock_irq(&mapping->tree_lock);
+       if (!huge)
+               mem_cgroup_commit_charge(page, memcg, false);
        trace_mm_filemap_add_to_page_cache(page);
        return 0;
 err_insert:
        page->mapping = NULL;
        /* Leave page->index set: truncation relies upon it */
        spin_unlock_irq(&mapping->tree_lock);
-       mem_cgroup_uncharge_cache_page(page);
+       if (!huge)
+               mem_cgroup_cancel_charge(page, memcg);
        page_cache_release(page);
        return error;
 }
@@ -692,7 +689,7 @@ void wait_on_page_bit(struct page *page, int bit_nr)
        DEFINE_WAIT_BIT(wait, &page->flags, bit_nr);
 
        if (test_bit(bit_nr, &page->flags))
-               __wait_on_bit(page_waitqueue(page), &wait, sleep_on_page,
+               __wait_on_bit(page_waitqueue(page), &wait, bit_wait_io,
                                                        TASK_UNINTERRUPTIBLE);
 }
 EXPORT_SYMBOL(wait_on_page_bit);
@@ -705,7 +702,7 @@ int wait_on_page_bit_killable(struct page *page, int bit_nr)
                return 0;
 
        return __wait_on_bit(page_waitqueue(page), &wait,
-                            sleep_on_page_killable, TASK_KILLABLE);
+                            bit_wait_io, TASK_KILLABLE);
 }
 
 /**
@@ -806,7 +803,7 @@ void __lock_page(struct page *page)
 {
        DEFINE_WAIT_BIT(wait, &page->flags, PG_locked);
 
-       __wait_on_bit_lock(page_waitqueue(page), &wait, sleep_on_page,
+       __wait_on_bit_lock(page_waitqueue(page), &wait, bit_wait_io,
                                                        TASK_UNINTERRUPTIBLE);
 }
 EXPORT_SYMBOL(__lock_page);
@@ -816,10 +813,21 @@ int __lock_page_killable(struct page *page)
        DEFINE_WAIT_BIT(wait, &page->flags, PG_locked);
 
        return __wait_on_bit_lock(page_waitqueue(page), &wait,
-                                       sleep_on_page_killable, TASK_KILLABLE);
+                                       bit_wait_io, TASK_KILLABLE);
 }
 EXPORT_SYMBOL_GPL(__lock_page_killable);
 
+/*
+ * Return values:
+ * 1 - page is locked; mmap_sem is still held.
+ * 0 - page is not locked.
+ *     mmap_sem has been released (up_read()), unless flags had both
+ *     FAULT_FLAG_ALLOW_RETRY and FAULT_FLAG_RETRY_NOWAIT set, in
+ *     which case mmap_sem is still held.
+ *
+ * If neither ALLOW_RETRY nor KILLABLE are set, will always return 1
+ * with the page locked and the mmap_sem unperturbed.
+ */
 int __lock_page_or_retry(struct page *page, struct mm_struct *mm,
                         unsigned int flags)
 {
@@ -1103,9 +1111,9 @@ no_page:
                if (WARN_ON_ONCE(!(fgp_flags & FGP_LOCK)))
                        fgp_flags |= FGP_LOCK;
 
-               /* Init accessed so avoit atomic mark_page_accessed later */
+               /* Init accessed so avoid atomic mark_page_accessed later */
                if (fgp_flags & FGP_ACCESSED)
-                       init_page_accessed(page);
+                       __SetPageReferenced(page);
 
                err = add_to_page_cache_lru(page, mapping, offset, radix_gfp_mask);
                if (unlikely(err)) {
@@ -1839,6 +1847,18 @@ static void do_async_mmap_readahead(struct vm_area_struct *vma,
  * The goto's are kind of ugly, but this streamlines the normal case of having
  * it in the page cache, and handles the special cases reasonably without
  * having a lot of duplicated code.
+ *
+ * vma->vm_mm->mmap_sem must be held on entry.
+ *
+ * If our return value has VM_FAULT_RETRY set, it's because
+ * lock_page_or_retry() returned 0.
+ * The mmap_sem has usually been released in this case.
+ * See __lock_page_or_retry() for the exception.
+ *
+ * If our return value does not have VM_FAULT_RETRY set, the mmap_sem
+ * has not been released.
+ *
+ * We never return with VM_FAULT_RETRY and a bit from VM_FAULT_ERROR set.
  */
 int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 {