Btrfs: fix deadlock between direct IO reads and buffered writes
[cascardo/linux.git] / mm / shmem.c
index 642471b..440e2a7 100644 (file)
@@ -359,6 +359,87 @@ static int shmem_free_swap(struct address_space *mapping,
        return 0;
 }
 
+/*
+ * Determine (in bytes) how many of the shmem object's pages mapped by the
+ * given offsets are swapped out.
+ *
+ * This is safe to call without i_mutex or mapping->tree_lock thanks to RCU,
+ * as long as the inode doesn't go away and racy results are not a problem.
+ */
+unsigned long shmem_partial_swap_usage(struct address_space *mapping,
+                                               pgoff_t start, pgoff_t end)
+{
+       struct radix_tree_iter iter;
+       void **slot;
+       struct page *page;
+       unsigned long swapped = 0;
+
+       rcu_read_lock();
+
+restart:
+       radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) {
+               if (iter.index >= end)
+                       break;
+
+               page = radix_tree_deref_slot(slot);
+
+               /*
+                * This should only be possible to happen at index 0, so we
+                * don't need to reset the counter, nor do we risk infinite
+                * restarts.
+                */
+               if (radix_tree_deref_retry(page))
+                       goto restart;
+
+               if (radix_tree_exceptional_entry(page))
+                       swapped++;
+
+               if (need_resched()) {
+                       cond_resched_rcu();
+                       start = iter.index + 1;
+                       goto restart;
+               }
+       }
+
+       rcu_read_unlock();
+
+       return swapped << PAGE_SHIFT;
+}
+
+/*
+ * Determine (in bytes) how many of the shmem object's pages mapped by the
+ * given vma is swapped out.
+ *
+ * This is safe to call without i_mutex or mapping->tree_lock thanks to RCU,
+ * as long as the inode doesn't go away and racy results are not a problem.
+ */
+unsigned long shmem_swap_usage(struct vm_area_struct *vma)
+{
+       struct inode *inode = file_inode(vma->vm_file);
+       struct shmem_inode_info *info = SHMEM_I(inode);
+       struct address_space *mapping = inode->i_mapping;
+       unsigned long swapped;
+
+       /* Be careful as we don't hold info->lock */
+       swapped = READ_ONCE(info->swapped);
+
+       /*
+        * The easier cases are when the shmem object has nothing in swap, or
+        * the vma maps it whole. Then we can simply use the stats that we
+        * already track.
+        */
+       if (!swapped)
+               return 0;
+
+       if (!vma->vm_pgoff && vma->vm_end - vma->vm_start >= inode->i_size)
+               return swapped << PAGE_SHIFT;
+
+       /* Here comes the more involved part */
+       return shmem_partial_swap_usage(mapping,
+                       linear_page_index(vma, vma->vm_start),
+                       linear_page_index(vma, vma->vm_end));
+}
+
 /*
  * SysV IPC SHM_UNLOCK restore Unevictable pages to their evictable lists.
  */
@@ -620,8 +701,7 @@ static void shmem_evict_inode(struct inode *inode)
                        list_del_init(&info->swaplist);
                        mutex_unlock(&shmem_swaplist_mutex);
                }
-       } else
-               kfree(info->symlink);
+       }
 
        simple_xattrs_free(&info->xattrs);
        WARN_ON(inode->i_blocks);
@@ -729,7 +809,8 @@ int shmem_unuse(swp_entry_t swap, struct page *page)
         * the shmem_swaplist_mutex which might hold up shmem_writepage().
         * Charged back to the user (not to caller) when swap account is used.
         */
-       error = mem_cgroup_try_charge(page, current->mm, GFP_KERNEL, &memcg);
+       error = mem_cgroup_try_charge(page, current->mm, GFP_KERNEL, &memcg,
+                       false);
        if (error)
                goto out;
        /* No radix_tree_preload: swap entry keeps a place for page in tree */
@@ -752,9 +833,9 @@ int shmem_unuse(swp_entry_t swap, struct page *page)
        if (error) {
                if (error != -ENOMEM)
                        error = 0;
-               mem_cgroup_cancel_charge(page, memcg);
+               mem_cgroup_cancel_charge(page, memcg, false);
        } else
-               mem_cgroup_commit_charge(page, memcg, true);
+               mem_cgroup_commit_charge(page, memcg, true, false);
 out:
        unlock_page(page);
        page_cache_release(page);
@@ -830,6 +911,9 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
        if (!swap.val)
                goto redirty;
 
+       if (mem_cgroup_try_charge_swap(page, swap))
+               goto free_swap;
+
        /*
         * Add inode to shmem_unuse()'s list of swapped-out inodes,
         * if it's not already there.  Do it now before the page is
@@ -858,6 +942,7 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
        }
 
        mutex_unlock(&shmem_swaplist_mutex);
+free_swap:
        swapcache_free(swap);
 redirty:
        set_page_dirty(page);
@@ -1004,7 +1089,7 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp,
        copy_highpage(newpage, oldpage);
        flush_dcache_page(newpage);
 
-       __set_page_locked(newpage);
+       __SetPageLocked(newpage);
        SetPageUptodate(newpage);
        SetPageSwapBacked(newpage);
        set_page_private(newpage, swap_index);
@@ -1137,7 +1222,8 @@ repeat:
                                goto failed;
                }
 
-               error = mem_cgroup_try_charge(page, current->mm, gfp, &memcg);
+               error = mem_cgroup_try_charge(page, current->mm, gfp, &memcg,
+                               false);
                if (!error) {
                        error = shmem_add_to_page_cache(page, mapping, index,
                                                swp_to_radix_entry(swap));
@@ -1154,14 +1240,14 @@ repeat:
                         * "repeat": reading a hole and writing should succeed.
                         */
                        if (error) {
-                               mem_cgroup_cancel_charge(page, memcg);
+                               mem_cgroup_cancel_charge(page, memcg, false);
                                delete_from_swap_cache(page);
                        }
                }
                if (error)
                        goto failed;
 
-               mem_cgroup_commit_charge(page, memcg, true);
+               mem_cgroup_commit_charge(page, memcg, true, false);
 
                spin_lock(&info->lock);
                info->swapped--;
@@ -1196,11 +1282,12 @@ repeat:
                }
 
                __SetPageSwapBacked(page);
-               __set_page_locked(page);
+               __SetPageLocked(page);
                if (sgp == SGP_WRITE)
                        __SetPageReferenced(page);
 
-               error = mem_cgroup_try_charge(page, current->mm, gfp, &memcg);
+               error = mem_cgroup_try_charge(page, current->mm, gfp, &memcg,
+                               false);
                if (error)
                        goto decused;
                error = radix_tree_maybe_preload(gfp & GFP_RECLAIM_MASK);
@@ -1210,10 +1297,10 @@ repeat:
                        radix_tree_preload_end();
                }
                if (error) {
-                       mem_cgroup_cancel_charge(page, memcg);
+                       mem_cgroup_cancel_charge(page, memcg, false);
                        goto decused;
                }
-               mem_cgroup_commit_charge(page, memcg, false);
+               mem_cgroup_commit_charge(page, memcg, false, false);
                lru_cache_add_anon(page);
 
                spin_lock(&info->lock);
@@ -1814,7 +1901,7 @@ static loff_t shmem_file_llseek(struct file *file, loff_t offset, int whence)
        if (whence != SEEK_DATA && whence != SEEK_HOLE)
                return generic_file_llseek_size(file, offset, whence,
                                        MAX_LFS_FILESIZE, i_size_read(inode));
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
        /* We're holding i_mutex so we can access i_size directly */
 
        if (offset < 0)
@@ -1838,7 +1925,7 @@ static loff_t shmem_file_llseek(struct file *file, loff_t offset, int whence)
 
        if (offset >= 0)
                offset = vfs_setpos(file, offset, MAX_LFS_FILESIZE);
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return offset;
 }
 
@@ -2003,7 +2090,7 @@ int shmem_add_seals(struct file *file, unsigned int seals)
        if (seals & ~(unsigned int)F_ALL_SEALS)
                return -EINVAL;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        if (info->seals & F_SEAL_SEAL) {
                error = -EPERM;
@@ -2026,7 +2113,7 @@ int shmem_add_seals(struct file *file, unsigned int seals)
        error = 0;
 
 unlock:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return error;
 }
 EXPORT_SYMBOL_GPL(shmem_add_seals);
@@ -2076,7 +2163,7 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
        if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
                return -EOPNOTSUPP;
 
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
 
        if (mode & FALLOC_FL_PUNCH_HOLE) {
                struct address_space *mapping = file->f_mapping;
@@ -2189,7 +2276,7 @@ undone:
        inode->i_private = NULL;
        spin_unlock(&inode->i_lock);
 out:
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
        return error;
 }
 
@@ -2461,13 +2548,12 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s
        info = SHMEM_I(inode);
        inode->i_size = len-1;
        if (len <= SHORT_SYMLINK_LEN) {
-               info->symlink = kmemdup(symname, len, GFP_KERNEL);
-               if (!info->symlink) {
+               inode->i_link = kmemdup(symname, len, GFP_KERNEL);
+               if (!inode->i_link) {
                        iput(inode);
                        return -ENOMEM;
                }
                inode->i_op = &shmem_short_symlink_operations;
-               inode->i_link = info->symlink;
        } else {
                inode_nohighmem(inode);
                error = shmem_getpage(inode, 0, &page, SGP_WRITE, NULL);
@@ -3044,6 +3130,7 @@ static struct inode *shmem_alloc_inode(struct super_block *sb)
 static void shmem_destroy_callback(struct rcu_head *head)
 {
        struct inode *inode = container_of(head, struct inode, i_rcu);
+       kfree(inode->i_link);
        kmem_cache_free(shmem_inode_cachep, SHMEM_I(inode));
 }
 
@@ -3064,7 +3151,7 @@ static int shmem_init_inodecache(void)
 {
        shmem_inode_cachep = kmem_cache_create("shmem_inode_cache",
                                sizeof(struct shmem_inode_info),
-                               0, SLAB_PANIC, shmem_init_inode);
+                               0, SLAB_PANIC|SLAB_ACCOUNT, shmem_init_inode);
        return 0;
 }