Merge branch 'fst-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux...
[cascardo/linux.git] / fs / btrfs / extent_io.c
index c3ec30d..66a7551 100644 (file)
@@ -20,6 +20,7 @@
 #include "locking.h"
 #include "rcu-string.h"
 #include "backref.h"
+#include "transaction.h"
 
 static struct kmem_cache *extent_state_cache;
 static struct kmem_cache *extent_buffer_cache;
@@ -74,8 +75,7 @@ void btrfs_leak_debug_check(void)
 
        while (!list_empty(&buffers)) {
                eb = list_entry(buffers.next, struct extent_buffer, leak_list);
-               printk(KERN_ERR "BTRFS: buffer leak start %llu len %lu "
-                      "refs %d\n",
+               pr_err("BTRFS: buffer leak start %llu len %lu refs %d\n",
                       eb->start, eb->len, atomic_read(&eb->refs));
                list_del(&eb->leak_list);
                kmem_cache_free(extent_buffer_cache, eb);
@@ -460,8 +460,7 @@ static int insert_state(struct extent_io_tree *tree,
        if (node) {
                struct extent_state *found;
                found = rb_entry(node, struct extent_state, rb_node);
-               printk(KERN_ERR "BTRFS: found node %llu %llu on insert of "
-                      "%llu %llu\n",
+               pr_err("BTRFS: found node %llu %llu on insert of %llu %llu\n",
                       found->start, found->end, start, end);
                return -EEXIST;
        }
@@ -572,9 +571,8 @@ alloc_extent_state_atomic(struct extent_state *prealloc)
 
 static void extent_io_tree_panic(struct extent_io_tree *tree, int err)
 {
-       btrfs_panic(tree_fs_info(tree), err, "Locking error: "
-                   "Extent tree was modified by another "
-                   "thread while locked.");
+       btrfs_panic(tree_fs_info(tree), err,
+                   "Locking error: Extent tree was modified by another thread while locked.");
 }
 
 /*
@@ -1729,7 +1727,7 @@ out_failed:
 }
 
 void extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end,
-                                struct page *locked_page,
+                                u64 delalloc_end, struct page *locked_page,
                                 unsigned clear_bits,
                                 unsigned long page_ops)
 {
@@ -2122,8 +2120,9 @@ int clean_io_failure(struct inode *inode, u64 start, struct page *page,
 
        if (failrec->in_validation) {
                /* there was no real error, just free the record */
-               pr_debug("clean_io_failure: freeing dummy error at %llu\n",
-                        failrec->start);
+               btrfs_debug(fs_info,
+                       "clean_io_failure: freeing dummy error at %llu",
+                       failrec->start);
                goto out;
        }
        if (fs_info->sb->s_flags & MS_RDONLY)
@@ -2189,6 +2188,7 @@ void btrfs_free_io_failure_record(struct inode *inode, u64 start, u64 end)
 int btrfs_get_io_failure_record(struct inode *inode, u64 start, u64 end,
                struct io_failure_record **failrec_ret)
 {
+       struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
        struct io_failure_record *failrec;
        struct extent_map *em;
        struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
@@ -2236,8 +2236,9 @@ int btrfs_get_io_failure_record(struct inode *inode, u64 start, u64 end,
                                                 em->compress_type);
                }
 
-               pr_debug("Get IO Failure Record: (new) logical=%llu, start=%llu, len=%llu\n",
-                        logical, start, failrec->len);
+               btrfs_debug(fs_info,
+                       "Get IO Failure Record: (new) logical=%llu, start=%llu, len=%llu",
+                       logical, start, failrec->len);
 
                failrec->logical = logical;
                free_extent_map(em);
@@ -2255,9 +2256,10 @@ int btrfs_get_io_failure_record(struct inode *inode, u64 start, u64 end,
                        return ret;
                }
        } else {
-               pr_debug("Get IO Failure Record: (found) logical=%llu, start=%llu, len=%llu, validation=%d\n",
-                        failrec->logical, failrec->start, failrec->len,
-                        failrec->in_validation);
+               btrfs_debug(fs_info,
+                       "Get IO Failure Record: (found) logical=%llu, start=%llu, len=%llu, validation=%d",
+                       failrec->logical, failrec->start, failrec->len,
+                       failrec->in_validation);
                /*
                 * when data can be on disk more than twice, add to failrec here
                 * (e.g. with a list for failed_mirror) to make
@@ -2273,18 +2275,19 @@ int btrfs_get_io_failure_record(struct inode *inode, u64 start, u64 end,
 int btrfs_check_repairable(struct inode *inode, struct bio *failed_bio,
                           struct io_failure_record *failrec, int failed_mirror)
 {
+       struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
        int num_copies;
 
-       num_copies = btrfs_num_copies(BTRFS_I(inode)->root->fs_info,
-                                     failrec->logical, failrec->len);
+       num_copies = btrfs_num_copies(fs_info, failrec->logical, failrec->len);
        if (num_copies == 1) {
                /*
                 * we only have a single copy of the data, so don't bother with
                 * all the retry and error correction code that follows. no
                 * matter what the error is, it is very likely to persist.
                 */
-               pr_debug("Check Repairable: cannot repair, num_copies=%d, next_mirror %d, failed_mirror %d\n",
-                        num_copies, failrec->this_mirror, failed_mirror);
+               btrfs_debug(fs_info,
+                       "Check Repairable: cannot repair, num_copies=%d, next_mirror %d, failed_mirror %d",
+                       num_copies, failrec->this_mirror, failed_mirror);
                return 0;
        }
 
@@ -2323,8 +2326,9 @@ int btrfs_check_repairable(struct inode *inode, struct bio *failed_bio,
        }
 
        if (failrec->this_mirror > num_copies) {
-               pr_debug("Check Repairable: (fail) num_copies=%d, next_mirror %d, failed_mirror %d\n",
-                        num_copies, failrec->this_mirror, failed_mirror);
+               btrfs_debug(fs_info,
+                       "Check Repairable: (fail) num_copies=%d, next_mirror %d, failed_mirror %d",
+                       num_copies, failrec->this_mirror, failed_mirror);
                return 0;
        }
 
@@ -2415,8 +2419,9 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
        }
        bio_set_op_attrs(bio, REQ_OP_READ, read_mode);
 
-       pr_debug("Repair Read Error: submitting new read[%#x] to this_mirror=%d, in_validation=%d\n",
-                read_mode, failrec->this_mirror, failrec->in_validation);
+       btrfs_debug(btrfs_sb(inode->i_sb),
+               "Repair Read Error: submitting new read[%#x] to this_mirror=%d, in_validation=%d",
+               read_mode, failrec->this_mirror, failrec->in_validation);
 
        ret = tree->ops->submit_bio_hook(inode, bio, failrec->this_mirror,
                                         failrec->bio_flags, 0);
@@ -2484,8 +2489,7 @@ static void end_bio_extent_writepage(struct bio *bio)
                                        bvec->bv_offset, bvec->bv_len);
                        else
                                btrfs_info(BTRFS_I(page->mapping->host)->root->fs_info,
-                                  "incomplete page write in btrfs with offset %u and "
-                                  "length %u",
+                                  "incomplete page write in btrfs with offset %u and length %u",
                                        bvec->bv_offset, bvec->bv_len);
                }
 
@@ -2541,10 +2545,12 @@ static void end_bio_extent_readpage(struct bio *bio)
        bio_for_each_segment_all(bvec, bio, i) {
                struct page *page = bvec->bv_page;
                struct inode *inode = page->mapping->host;
+               struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
 
-               pr_debug("end_bio_extent_readpage: bi_sector=%llu, err=%d, "
-                        "mirror=%u\n", (u64)bio->bi_iter.bi_sector,
-                        bio->bi_error, io_bio->mirror_num);
+               btrfs_debug(fs_info,
+                       "end_bio_extent_readpage: bi_sector=%llu, err=%d, mirror=%u",
+                       (u64)bio->bi_iter.bi_sector, bio->bi_error,
+                       io_bio->mirror_num);
                tree = &BTRFS_I(inode)->io_tree;
 
                /* We always issue full-page reads, but if some block
@@ -2554,13 +2560,12 @@ static void end_bio_extent_readpage(struct bio *bio)
                 * if they don't add up to a full page.  */
                if (bvec->bv_offset || bvec->bv_len != PAGE_SIZE) {
                        if (bvec->bv_offset + bvec->bv_len != PAGE_SIZE)
-                               btrfs_err(BTRFS_I(page->mapping->host)->root->fs_info,
-                                  "partial page read in btrfs with offset %u and length %u",
+                               btrfs_err(fs_info,
+                                       "partial page read in btrfs with offset %u and length %u",
                                        bvec->bv_offset, bvec->bv_len);
                        else
-                               btrfs_info(BTRFS_I(page->mapping->host)->root->fs_info,
-                                  "incomplete page read in btrfs with offset %u and "
-                                  "length %u",
+                               btrfs_info(fs_info,
+                                       "incomplete page read in btrfs with offset %u and length %u",
                                        bvec->bv_offset, bvec->bv_len);
                }
 
@@ -3624,7 +3629,6 @@ static void end_extent_buffer_writeback(struct extent_buffer *eb)
 static void set_btree_ioerr(struct page *page)
 {
        struct extent_buffer *eb = (struct extent_buffer *)page->private;
-       struct btrfs_inode *btree_ino = BTRFS_I(eb->fs_info->btree_inode);
 
        SetPageError(page);
        if (test_and_set_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags))
@@ -3670,13 +3674,13 @@ static void set_btree_ioerr(struct page *page)
         */
        switch (eb->log_index) {
        case -1:
-               set_bit(BTRFS_INODE_BTREE_ERR, &btree_ino->runtime_flags);
+               set_bit(BTRFS_FS_BTREE_ERR, &eb->fs_info->flags);
                break;
        case 0:
-               set_bit(BTRFS_INODE_BTREE_LOG1_ERR, &btree_ino->runtime_flags);
+               set_bit(BTRFS_FS_LOG1_ERR, &eb->fs_info->flags);
                break;
        case 1:
-               set_bit(BTRFS_INODE_BTREE_LOG2_ERR, &btree_ino->runtime_flags);
+               set_bit(BTRFS_FS_LOG2_ERR, &eb->fs_info->flags);
                break;
        default:
                BUG(); /* unexpected, logic error */
@@ -3721,8 +3725,10 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb,
        struct block_device *bdev = fs_info->fs_devices->latest_bdev;
        struct extent_io_tree *tree = &BTRFS_I(fs_info->btree_inode)->io_tree;
        u64 offset = eb->start;
+       u32 nritems;
        unsigned long i, num_pages;
        unsigned long bio_flags = 0;
+       unsigned long start, end;
        int write_flags = (epd->sync_io ? WRITE_SYNC : 0) | REQ_META;
        int ret = 0;
 
@@ -3732,6 +3738,23 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb,
        if (btrfs_header_owner(eb) == BTRFS_TREE_LOG_OBJECTID)
                bio_flags = EXTENT_BIO_TREE_LOG;
 
+       /* set btree blocks beyond nritems with 0 to avoid stale content. */
+       nritems = btrfs_header_nritems(eb);
+       if (btrfs_header_level(eb) > 0) {
+               end = btrfs_node_key_ptr_offset(nritems);
+
+               memset_extent_buffer(eb, 0, end, eb->len - end);
+       } else {
+               /*
+                * leaf:
+                * header 0 1 2 .. N ... data_N .. data_2 data_1 data_0
+                */
+               start = btrfs_item_nr_offset(nritems);
+               end = btrfs_leaf_data(eb) +
+                     leaf_data_end(fs_info->tree_root, eb);
+               memset_extent_buffer(eb, 0, start, end - start);
+       }
+
        for (i = 0; i < num_pages; i++) {
                struct page *p = eb->pages[i];
 
@@ -4487,11 +4510,24 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
                        flags |= (FIEMAP_EXTENT_DELALLOC |
                                  FIEMAP_EXTENT_UNKNOWN);
                } else if (fieinfo->fi_extents_max) {
+                       struct btrfs_trans_handle *trans;
+
                        u64 bytenr = em->block_start -
                                (em->start - em->orig_start);
 
                        disko = em->block_start + offset_in_extent;
 
+                       /*
+                        * We need a trans handle to get delayed refs
+                        */
+                       trans = btrfs_join_transaction(root);
+                       /*
+                        * It's OK if we can't start a trans we can still check
+                        * from commit_root
+                        */
+                       if (IS_ERR(trans))
+                               trans = NULL;
+
                        /*
                         * As btrfs supports shared space, this information
                         * can be exported to userspace tools via
@@ -4499,9 +4535,11 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
                         * then we're just getting a count and we can skip the
                         * lookup stuff.
                         */
-                       ret = btrfs_check_shared(NULL, root->fs_info,
+                       ret = btrfs_check_shared(trans, root->fs_info,
                                                 root->objectid,
                                                 btrfs_ino(inode), bytenr);
+                       if (trans)
+                               btrfs_end_transaction(trans, root);
                        if (ret < 0)
                                goto out_free;
                        if (ret)
@@ -5173,11 +5211,10 @@ int extent_buffer_uptodate(struct extent_buffer *eb)
 }
 
 int read_extent_buffer_pages(struct extent_io_tree *tree,
-                            struct extent_buffer *eb, u64 start, int wait,
+                            struct extent_buffer *eb, int wait,
                             get_extent_t *get_extent, int mirror_num)
 {
        unsigned long i;
-       unsigned long start_i;
        struct page *page;
        int err;
        int ret = 0;
@@ -5191,16 +5228,8 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
        if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))
                return 0;
 
-       if (start) {
-               WARN_ON(start < eb->start);
-               start_i = (start >> PAGE_SHIFT) -
-                       (eb->start >> PAGE_SHIFT);
-       } else {
-               start_i = 0;
-       }
-
        num_pages = num_extent_pages(eb->start, eb->len);
-       for (i = start_i; i < num_pages; i++) {
+       for (i = 0; i < num_pages; i++) {
                page = eb->pages[i];
                if (wait == WAIT_NONE) {
                        if (!trylock_page(page))
@@ -5209,21 +5238,29 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
                        lock_page(page);
                }
                locked_pages++;
+       }
+       /*
+        * We need to firstly lock all pages to make sure that
+        * the uptodate bit of our pages won't be affected by
+        * clear_extent_buffer_uptodate().
+        */
+       for (i = 0; i < num_pages; i++) {
+               page = eb->pages[i];
                if (!PageUptodate(page)) {
                        num_reads++;
                        all_uptodate = 0;
                }
        }
+
        if (all_uptodate) {
-               if (start_i == 0)
-                       set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
+               set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
                goto unlock_exit;
        }
 
        clear_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags);
        eb->read_mirror = 0;
        atomic_set(&eb->io_pages, num_reads);
-       for (i = start_i; i < num_pages; i++) {
+       for (i = 0; i < num_pages; i++) {
                page = eb->pages[i];
 
                if (!PageUptodate(page)) {
@@ -5264,7 +5301,7 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
        if (ret || wait != WAIT_COMPLETE)
                return ret;
 
-       for (i = start_i; i < num_pages; i++) {
+       for (i = 0; i < num_pages; i++) {
                page = eb->pages[i];
                wait_on_page_locked(page);
                if (!PageUptodate(page))
@@ -5274,12 +5311,10 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
        return ret;
 
 unlock_exit:
-       i = start_i;
        while (locked_pages > 0) {
-               page = eb->pages[i];
-               i++;
-               unlock_page(page);
                locked_pages--;
+               page = eb->pages[locked_pages];
+               unlock_page(page);
        }
        return ret;
 }
@@ -5382,8 +5417,7 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start,
        }
 
        if (start + min_len > eb->len) {
-               WARN(1, KERN_ERR "btrfs bad mapping eb start %llu len %lu, "
-                      "wanted %lu %lu\n",
+               WARN(1, KERN_ERR "btrfs bad mapping eb start %llu len %lu, wanted %lu %lu\n",
                       eb->start, eb->len, start, min_len);
                return -EINVAL;
        }
@@ -5741,14 +5775,14 @@ void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
 
        if (src_offset + len > dst->len) {
                btrfs_err(dst->fs_info,
-                       "memmove bogus src_offset %lu move "
-                      "len %lu dst len %lu", src_offset, len, dst->len);
+                       "memmove bogus src_offset %lu move len %lu dst len %lu",
+                        src_offset, len, dst->len);
                BUG_ON(1);
        }
        if (dst_offset + len > dst->len) {
                btrfs_err(dst->fs_info,
-                       "memmove bogus dst_offset %lu move "
-                      "len %lu dst len %lu", dst_offset, len, dst->len);
+                       "memmove bogus dst_offset %lu move len %lu dst len %lu",
+                        dst_offset, len, dst->len);
                BUG_ON(1);
        }
 
@@ -5788,13 +5822,15 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
        unsigned long src_i;
 
        if (src_offset + len > dst->len) {
-               btrfs_err(dst->fs_info, "memmove bogus src_offset %lu move "
-                      "len %lu len %lu", src_offset, len, dst->len);
+               btrfs_err(dst->fs_info,
+                         "memmove bogus src_offset %lu move len %lu len %lu",
+                         src_offset, len, dst->len);
                BUG_ON(1);
        }
        if (dst_offset + len > dst->len) {
-               btrfs_err(dst->fs_info, "memmove bogus dst_offset %lu move "
-                      "len %lu len %lu", dst_offset, len, dst->len);
+               btrfs_err(dst->fs_info,
+                         "memmove bogus dst_offset %lu move len %lu len %lu",
+                         dst_offset, len, dst->len);
                BUG_ON(1);
        }
        if (dst_offset < src_offset) {