Merge branch 'for-linus-4.5' of git://git.kernel.org/pub/scm/linux/kernel/git/mason...
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 12 Feb 2016 17:21:28 +0000 (09:21 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 12 Feb 2016 17:21:28 +0000 (09:21 -0800)
Pull btrfs fixes from Chris Mason:
 "This has a few fixes from Filipe, along with a readdir fix from Dave
  that we've been testing for some time"

* 'for-linus-4.5' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs:
  btrfs: properly set the termination value of ctx->pos in readdir
  Btrfs: fix hang on extent buffer lock caused by the inode_paths ioctl
  Btrfs: remove no longer used function extent_read_full_page_nolock()
  Btrfs: fix page reading in extent_same ioctl leading to csum errors
  Btrfs: fix invalid page accesses in extent_same (dedup) ioctl

fs/btrfs/backref.c
fs/btrfs/compression.c
fs/btrfs/delayed-inode.c
fs/btrfs/delayed-inode.h
fs/btrfs/extent_io.c
fs/btrfs/extent_io.h
fs/btrfs/inode.c
fs/btrfs/ioctl.c

index b90cd37..f6dac40 100644 (file)
@@ -1406,7 +1406,8 @@ char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
                        read_extent_buffer(eb, dest + bytes_left,
                                           name_off, name_len);
                if (eb != eb_in) {
-                       btrfs_tree_read_unlock_blocking(eb);
+                       if (!path->skip_locking)
+                               btrfs_tree_read_unlock_blocking(eb);
                        free_extent_buffer(eb);
                }
                ret = btrfs_find_item(fs_root, path, parent, 0,
@@ -1426,9 +1427,10 @@ char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
                eb = path->nodes[0];
                /* make sure we can use eb after releasing the path */
                if (eb != eb_in) {
-                       atomic_inc(&eb->refs);
-                       btrfs_tree_read_lock(eb);
-                       btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
+                       if (!path->skip_locking)
+                               btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
+                       path->nodes[0] = NULL;
+                       path->locks[0] = 0;
                }
                btrfs_release_path(path);
                iref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
index c473c42..3346cd8 100644 (file)
@@ -637,11 +637,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
        faili = nr_pages - 1;
        cb->nr_pages = nr_pages;
 
-       /* In the parent-locked case, we only locked the range we are
-        * interested in.  In all other cases, we can opportunistically
-        * cache decompressed data that goes beyond the requested range. */
-       if (!(bio_flags & EXTENT_BIO_PARENT_LOCKED))
-               add_ra_bio_pages(inode, em_start + em_len, cb);
+       add_ra_bio_pages(inode, em_start + em_len, cb);
 
        /* include any pages we added in add_ra-bio_pages */
        uncompressed_len = bio->bi_vcnt * PAGE_CACHE_SIZE;
index 0be47e4..b57daa8 100644 (file)
@@ -1689,7 +1689,7 @@ int btrfs_should_delete_dir_index(struct list_head *del_list,
  *
  */
 int btrfs_readdir_delayed_dir_index(struct dir_context *ctx,
-                                   struct list_head *ins_list)
+                                   struct list_head *ins_list, bool *emitted)
 {
        struct btrfs_dir_item *di;
        struct btrfs_delayed_item *curr, *next;
@@ -1733,6 +1733,7 @@ int btrfs_readdir_delayed_dir_index(struct dir_context *ctx,
 
                if (over)
                        return 1;
+               *emitted = true;
        }
        return 0;
 }
index f70119f..0167853 100644 (file)
@@ -144,7 +144,7 @@ void btrfs_put_delayed_items(struct list_head *ins_list,
 int btrfs_should_delete_dir_index(struct list_head *del_list,
                                  u64 index);
 int btrfs_readdir_delayed_dir_index(struct dir_context *ctx,
-                                   struct list_head *ins_list);
+                                   struct list_head *ins_list, bool *emitted);
 
 /* for init */
 int __init btrfs_delayed_inode_init(void);
index 2e7c97a..392592d 100644 (file)
@@ -2897,12 +2897,11 @@ static int __do_readpage(struct extent_io_tree *tree,
        struct block_device *bdev;
        int ret;
        int nr = 0;
-       int parent_locked = *bio_flags & EXTENT_BIO_PARENT_LOCKED;
        size_t pg_offset = 0;
        size_t iosize;
        size_t disk_io_size;
        size_t blocksize = inode->i_sb->s_blocksize;
-       unsigned long this_bio_flag = *bio_flags & EXTENT_BIO_PARENT_LOCKED;
+       unsigned long this_bio_flag = 0;
 
        set_page_extent_mapped(page);
 
@@ -2942,18 +2941,16 @@ static int __do_readpage(struct extent_io_tree *tree,
                        kunmap_atomic(userpage);
                        set_extent_uptodate(tree, cur, cur + iosize - 1,
                                            &cached, GFP_NOFS);
-                       if (!parent_locked)
-                               unlock_extent_cached(tree, cur,
-                                                    cur + iosize - 1,
-                                                    &cached, GFP_NOFS);
+                       unlock_extent_cached(tree, cur,
+                                            cur + iosize - 1,
+                                            &cached, GFP_NOFS);
                        break;
                }
                em = __get_extent_map(inode, page, pg_offset, cur,
                                      end - cur + 1, get_extent, em_cached);
                if (IS_ERR_OR_NULL(em)) {
                        SetPageError(page);
-                       if (!parent_locked)
-                               unlock_extent(tree, cur, end);
+                       unlock_extent(tree, cur, end);
                        break;
                }
                extent_offset = cur - em->start;
@@ -3038,12 +3035,9 @@ static int __do_readpage(struct extent_io_tree *tree,
 
                        set_extent_uptodate(tree, cur, cur + iosize - 1,
                                            &cached, GFP_NOFS);
-                       if (parent_locked)
-                               free_extent_state(cached);
-                       else
-                               unlock_extent_cached(tree, cur,
-                                                    cur + iosize - 1,
-                                                    &cached, GFP_NOFS);
+                       unlock_extent_cached(tree, cur,
+                                            cur + iosize - 1,
+                                            &cached, GFP_NOFS);
                        cur = cur + iosize;
                        pg_offset += iosize;
                        continue;
@@ -3052,8 +3046,7 @@ static int __do_readpage(struct extent_io_tree *tree,
                if (test_range_bit(tree, cur, cur_end,
                                   EXTENT_UPTODATE, 1, NULL)) {
                        check_page_uptodate(tree, page);
-                       if (!parent_locked)
-                               unlock_extent(tree, cur, cur + iosize - 1);
+                       unlock_extent(tree, cur, cur + iosize - 1);
                        cur = cur + iosize;
                        pg_offset += iosize;
                        continue;
@@ -3063,8 +3056,7 @@ static int __do_readpage(struct extent_io_tree *tree,
                 */
                if (block_start == EXTENT_MAP_INLINE) {
                        SetPageError(page);
-                       if (!parent_locked)
-                               unlock_extent(tree, cur, cur + iosize - 1);
+                       unlock_extent(tree, cur, cur + iosize - 1);
                        cur = cur + iosize;
                        pg_offset += iosize;
                        continue;
@@ -3083,8 +3075,7 @@ static int __do_readpage(struct extent_io_tree *tree,
                        *bio_flags = this_bio_flag;
                } else {
                        SetPageError(page);
-                       if (!parent_locked)
-                               unlock_extent(tree, cur, cur + iosize - 1);
+                       unlock_extent(tree, cur, cur + iosize - 1);
                }
                cur = cur + iosize;
                pg_offset += iosize;
@@ -3213,20 +3204,6 @@ int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
        return ret;
 }
 
-int extent_read_full_page_nolock(struct extent_io_tree *tree, struct page *page,
-                                get_extent_t *get_extent, int mirror_num)
-{
-       struct bio *bio = NULL;
-       unsigned long bio_flags = EXTENT_BIO_PARENT_LOCKED;
-       int ret;
-
-       ret = __do_readpage(tree, page, get_extent, NULL, &bio, mirror_num,
-                           &bio_flags, READ, NULL);
-       if (bio)
-               ret = submit_one_bio(READ, bio, mirror_num, bio_flags);
-       return ret;
-}
-
 static noinline void update_nr_written(struct page *page,
                                      struct writeback_control *wbc,
                                      unsigned long nr_written)
index 0377413..880d529 100644 (file)
@@ -29,7 +29,6 @@
  */
 #define EXTENT_BIO_COMPRESSED 1
 #define EXTENT_BIO_TREE_LOG 2
-#define EXTENT_BIO_PARENT_LOCKED 4
 #define EXTENT_BIO_FLAG_SHIFT 16
 
 /* these are bit numbers for test/set bit */
@@ -210,8 +209,6 @@ static inline int lock_extent(struct extent_io_tree *tree, u64 start, u64 end)
 int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end);
 int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
                          get_extent_t *get_extent, int mirror_num);
-int extent_read_full_page_nolock(struct extent_io_tree *tree, struct page *page,
-                                get_extent_t *get_extent, int mirror_num);
 int __init extent_io_init(void);
 void extent_io_exit(void);
 
index 5f06eb1..151b7c7 100644 (file)
@@ -5717,6 +5717,7 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)
        char *name_ptr;
        int name_len;
        int is_curr = 0;        /* ctx->pos points to the current index? */
+       bool emitted;
 
        /* FIXME, use a real flag for deciding about the key type */
        if (root->fs_info->tree_root == root)
@@ -5745,6 +5746,7 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)
        if (ret < 0)
                goto err;
 
+       emitted = false;
        while (1) {
                leaf = path->nodes[0];
                slot = path->slots[0];
@@ -5824,6 +5826,7 @@ skip:
 
                        if (over)
                                goto nopos;
+                       emitted = true;
                        di_len = btrfs_dir_name_len(leaf, di) +
                                 btrfs_dir_data_len(leaf, di) + sizeof(*di);
                        di_cur += di_len;
@@ -5836,11 +5839,20 @@ next:
        if (key_type == BTRFS_DIR_INDEX_KEY) {
                if (is_curr)
                        ctx->pos++;
-               ret = btrfs_readdir_delayed_dir_index(ctx, &ins_list);
+               ret = btrfs_readdir_delayed_dir_index(ctx, &ins_list, &emitted);
                if (ret)
                        goto nopos;
        }
 
+       /*
+        * If we haven't emitted any dir entry, we must not touch ctx->pos as
+        * it was was set to the termination value in previous call. We assume
+        * that "." and ".." were emitted if we reach this point and set the
+        * termination value as well for an empty directory.
+        */
+       if (ctx->pos > 2 && !emitted)
+               goto nopos;
+
        /* Reached end of directory/root. Bump pos past the last item. */
        ctx->pos++;
 
index 952172c..48aee98 100644 (file)
@@ -2794,24 +2794,29 @@ out:
 static struct page *extent_same_get_page(struct inode *inode, pgoff_t index)
 {
        struct page *page;
-       struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
 
        page = grab_cache_page(inode->i_mapping, index);
        if (!page)
-               return NULL;
+               return ERR_PTR(-ENOMEM);
 
        if (!PageUptodate(page)) {
-               if (extent_read_full_page_nolock(tree, page, btrfs_get_extent,
-                                                0))
-                       return NULL;
+               int ret;
+
+               ret = btrfs_readpage(NULL, page);
+               if (ret)
+                       return ERR_PTR(ret);
                lock_page(page);
                if (!PageUptodate(page)) {
                        unlock_page(page);
                        page_cache_release(page);
-                       return NULL;
+                       return ERR_PTR(-EIO);
+               }
+               if (page->mapping != inode->i_mapping) {
+                       unlock_page(page);
+                       page_cache_release(page);
+                       return ERR_PTR(-EAGAIN);
                }
        }
-       unlock_page(page);
 
        return page;
 }
@@ -2823,17 +2828,31 @@ static int gather_extent_pages(struct inode *inode, struct page **pages,
        pgoff_t index = off >> PAGE_CACHE_SHIFT;
 
        for (i = 0; i < num_pages; i++) {
+again:
                pages[i] = extent_same_get_page(inode, index + i);
-               if (!pages[i])
-                       return -ENOMEM;
+               if (IS_ERR(pages[i])) {
+                       int err = PTR_ERR(pages[i]);
+
+                       if (err == -EAGAIN)
+                               goto again;
+                       pages[i] = NULL;
+                       return err;
+               }
        }
        return 0;
 }
 
-static inline void lock_extent_range(struct inode *inode, u64 off, u64 len)
+static int lock_extent_range(struct inode *inode, u64 off, u64 len,
+                            bool retry_range_locking)
 {
-       /* do any pending delalloc/csum calc on src, one way or
-          another, and lock file content */
+       /*
+        * Do any pending delalloc/csum calculations on inode, one way or
+        * another, and lock file content.
+        * The locking order is:
+        *
+        *   1) pages
+        *   2) range in the inode's io tree
+        */
        while (1) {
                struct btrfs_ordered_extent *ordered;
                lock_extent(&BTRFS_I(inode)->io_tree, off, off + len - 1);
@@ -2851,8 +2870,11 @@ static inline void lock_extent_range(struct inode *inode, u64 off, u64 len)
                unlock_extent(&BTRFS_I(inode)->io_tree, off, off + len - 1);
                if (ordered)
                        btrfs_put_ordered_extent(ordered);
+               if (!retry_range_locking)
+                       return -EAGAIN;
                btrfs_wait_ordered_range(inode, off, len);
        }
+       return 0;
 }
 
 static void btrfs_double_inode_unlock(struct inode *inode1, struct inode *inode2)
@@ -2877,15 +2899,24 @@ static void btrfs_double_extent_unlock(struct inode *inode1, u64 loff1,
        unlock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1);
 }
 
-static void btrfs_double_extent_lock(struct inode *inode1, u64 loff1,
-                                    struct inode *inode2, u64 loff2, u64 len)
+static int btrfs_double_extent_lock(struct inode *inode1, u64 loff1,
+                                   struct inode *inode2, u64 loff2, u64 len,
+                                   bool retry_range_locking)
 {
+       int ret;
+
        if (inode1 < inode2) {
                swap(inode1, inode2);
                swap(loff1, loff2);
        }
-       lock_extent_range(inode1, loff1, len);
-       lock_extent_range(inode2, loff2, len);
+       ret = lock_extent_range(inode1, loff1, len, retry_range_locking);
+       if (ret)
+               return ret;
+       ret = lock_extent_range(inode2, loff2, len, retry_range_locking);
+       if (ret)
+               unlock_extent(&BTRFS_I(inode1)->io_tree, loff1,
+                             loff1 + len - 1);
+       return ret;
 }
 
 struct cmp_pages {
@@ -2901,11 +2932,15 @@ static void btrfs_cmp_data_free(struct cmp_pages *cmp)
 
        for (i = 0; i < cmp->num_pages; i++) {
                pg = cmp->src_pages[i];
-               if (pg)
+               if (pg) {
+                       unlock_page(pg);
                        page_cache_release(pg);
+               }
                pg = cmp->dst_pages[i];
-               if (pg)
+               if (pg) {
+                       unlock_page(pg);
                        page_cache_release(pg);
+               }
        }
        kfree(cmp->src_pages);
        kfree(cmp->dst_pages);
@@ -2966,6 +3001,8 @@ static int btrfs_cmp_data(struct inode *src, u64 loff, struct inode *dst,
 
                src_page = cmp->src_pages[i];
                dst_page = cmp->dst_pages[i];
+               ASSERT(PageLocked(src_page));
+               ASSERT(PageLocked(dst_page));
 
                addr = kmap_atomic(src_page);
                dst_addr = kmap_atomic(dst_page);
@@ -3078,14 +3115,46 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen,
                goto out_unlock;
        }
 
+again:
        ret = btrfs_cmp_data_prepare(src, loff, dst, dst_loff, olen, &cmp);
        if (ret)
                goto out_unlock;
 
        if (same_inode)
-               lock_extent_range(src, same_lock_start, same_lock_len);
+               ret = lock_extent_range(src, same_lock_start, same_lock_len,
+                                       false);
        else
-               btrfs_double_extent_lock(src, loff, dst, dst_loff, len);
+               ret = btrfs_double_extent_lock(src, loff, dst, dst_loff, len,
+                                              false);
+       /*
+        * If one of the inodes has dirty pages in the respective range or
+        * ordered extents, we need to flush dellaloc and wait for all ordered
+        * extents in the range. We must unlock the pages and the ranges in the
+        * io trees to avoid deadlocks when flushing delalloc (requires locking
+        * pages) and when waiting for ordered extents to complete (they require
+        * range locking).
+        */
+       if (ret == -EAGAIN) {
+               /*
+                * Ranges in the io trees already unlocked. Now unlock all
+                * pages before waiting for all IO to complete.
+                */
+               btrfs_cmp_data_free(&cmp);
+               if (same_inode) {
+                       btrfs_wait_ordered_range(src, same_lock_start,
+                                                same_lock_len);
+               } else {
+                       btrfs_wait_ordered_range(src, loff, len);
+                       btrfs_wait_ordered_range(dst, dst_loff, len);
+               }
+               goto again;
+       }
+       ASSERT(ret == 0);
+       if (WARN_ON(ret)) {
+               /* ranges in the io trees already unlocked */
+               btrfs_cmp_data_free(&cmp);
+               return ret;
+       }
 
        /* pass original length for comparison so we stay within i_size */
        ret = btrfs_cmp_data(src, loff, dst, dst_loff, olen, &cmp);
@@ -3795,9 +3864,15 @@ static noinline int btrfs_clone_files(struct file *file, struct file *file_src,
                u64 lock_start = min_t(u64, off, destoff);
                u64 lock_len = max_t(u64, off, destoff) + len - lock_start;
 
-               lock_extent_range(src, lock_start, lock_len);
+               ret = lock_extent_range(src, lock_start, lock_len, true);
        } else {
-               btrfs_double_extent_lock(src, off, inode, destoff, len);
+               ret = btrfs_double_extent_lock(src, off, inode, destoff, len,
+                                              true);
+       }
+       ASSERT(ret == 0);
+       if (WARN_ON(ret)) {
+               /* ranges in the io trees already unlocked */
+               goto out_unlock;
        }
 
        ret = btrfs_clone(src, inode, off, olen, len, destoff, 0);