Merge branch 'irq-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
[cascardo/linux.git] / fs / btrfs / extent_io.c
index 75533ad..66a7551 100644 (file)
@@ -20,6 +20,7 @@
 #include "locking.h"
 #include "rcu-string.h"
 #include "backref.h"
+#include "transaction.h"
 
 static struct kmem_cache *extent_state_cache;
 static struct kmem_cache *extent_buffer_cache;
@@ -74,8 +75,7 @@ void btrfs_leak_debug_check(void)
 
        while (!list_empty(&buffers)) {
                eb = list_entry(buffers.next, struct extent_buffer, leak_list);
-               printk(KERN_ERR "BTRFS: buffer leak start %llu len %lu "
-                      "refs %d\n",
+               pr_err("BTRFS: buffer leak start %llu len %lu refs %d\n",
                       eb->start, eb->len, atomic_read(&eb->refs));
                list_del(&eb->leak_list);
                kmem_cache_free(extent_buffer_cache, eb);
@@ -163,13 +163,13 @@ int __init extent_io_init(void)
 {
        extent_state_cache = kmem_cache_create("btrfs_extent_state",
                        sizeof(struct extent_state), 0,
-                       SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
+                       SLAB_MEM_SPREAD, NULL);
        if (!extent_state_cache)
                return -ENOMEM;
 
        extent_buffer_cache = kmem_cache_create("btrfs_extent_buffer",
                        sizeof(struct extent_buffer), 0,
-                       SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
+                       SLAB_MEM_SPREAD, NULL);
        if (!extent_buffer_cache)
                goto free_state_cache;
 
@@ -460,8 +460,7 @@ static int insert_state(struct extent_io_tree *tree,
        if (node) {
                struct extent_state *found;
                found = rb_entry(node, struct extent_state, rb_node);
-               printk(KERN_ERR "BTRFS: found node %llu %llu on insert of "
-                      "%llu %llu\n",
+               pr_err("BTRFS: found node %llu %llu on insert of %llu %llu\n",
                       found->start, found->end, start, end);
                return -EEXIST;
        }
@@ -572,9 +571,8 @@ alloc_extent_state_atomic(struct extent_state *prealloc)
 
 static void extent_io_tree_panic(struct extent_io_tree *tree, int err)
 {
-       btrfs_panic(tree_fs_info(tree), err, "Locking error: "
-                   "Extent tree was modified by another "
-                   "thread while locked.");
+       btrfs_panic(tree_fs_info(tree), err,
+                   "Locking error: Extent tree was modified by another thread while locked.");
 }
 
 /*
@@ -1729,7 +1727,7 @@ out_failed:
 }
 
 void extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end,
-                                struct page *locked_page,
+                                u64 delalloc_end, struct page *locked_page,
                                 unsigned clear_bits,
                                 unsigned long page_ops)
 {
@@ -2049,9 +2047,10 @@ int repair_io_failure(struct inode *inode, u64 start, u64 length, u64 logical,
                return -EIO;
        }
        bio->bi_bdev = dev->bdev;
+       bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_SYNC);
        bio_add_page(bio, page, length, pg_offset);
 
-       if (btrfsic_submit_bio_wait(WRITE_SYNC, bio)) {
+       if (btrfsic_submit_bio_wait(bio)) {
                /* try to remap that extent elsewhere? */
                btrfs_bio_counter_dec(fs_info);
                bio_put(bio);
@@ -2121,8 +2120,9 @@ int clean_io_failure(struct inode *inode, u64 start, struct page *page,
 
        if (failrec->in_validation) {
                /* there was no real error, just free the record */
-               pr_debug("clean_io_failure: freeing dummy error at %llu\n",
-                        failrec->start);
+               btrfs_debug(fs_info,
+                       "clean_io_failure: freeing dummy error at %llu",
+                       failrec->start);
                goto out;
        }
        if (fs_info->sb->s_flags & MS_RDONLY)
@@ -2188,6 +2188,7 @@ void btrfs_free_io_failure_record(struct inode *inode, u64 start, u64 end)
 int btrfs_get_io_failure_record(struct inode *inode, u64 start, u64 end,
                struct io_failure_record **failrec_ret)
 {
+       struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
        struct io_failure_record *failrec;
        struct extent_map *em;
        struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
@@ -2235,8 +2236,9 @@ int btrfs_get_io_failure_record(struct inode *inode, u64 start, u64 end,
                                                 em->compress_type);
                }
 
-               pr_debug("Get IO Failure Record: (new) logical=%llu, start=%llu, len=%llu\n",
-                        logical, start, failrec->len);
+               btrfs_debug(fs_info,
+                       "Get IO Failure Record: (new) logical=%llu, start=%llu, len=%llu",
+                       logical, start, failrec->len);
 
                failrec->logical = logical;
                free_extent_map(em);
@@ -2254,9 +2256,10 @@ int btrfs_get_io_failure_record(struct inode *inode, u64 start, u64 end,
                        return ret;
                }
        } else {
-               pr_debug("Get IO Failure Record: (found) logical=%llu, start=%llu, len=%llu, validation=%d\n",
-                        failrec->logical, failrec->start, failrec->len,
-                        failrec->in_validation);
+               btrfs_debug(fs_info,
+                       "Get IO Failure Record: (found) logical=%llu, start=%llu, len=%llu, validation=%d",
+                       failrec->logical, failrec->start, failrec->len,
+                       failrec->in_validation);
                /*
                 * when data can be on disk more than twice, add to failrec here
                 * (e.g. with a list for failed_mirror) to make
@@ -2272,18 +2275,19 @@ int btrfs_get_io_failure_record(struct inode *inode, u64 start, u64 end,
 int btrfs_check_repairable(struct inode *inode, struct bio *failed_bio,
                           struct io_failure_record *failrec, int failed_mirror)
 {
+       struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
        int num_copies;
 
-       num_copies = btrfs_num_copies(BTRFS_I(inode)->root->fs_info,
-                                     failrec->logical, failrec->len);
+       num_copies = btrfs_num_copies(fs_info, failrec->logical, failrec->len);
        if (num_copies == 1) {
                /*
                 * we only have a single copy of the data, so don't bother with
                 * all the retry and error correction code that follows. no
                 * matter what the error is, it is very likely to persist.
                 */
-               pr_debug("Check Repairable: cannot repair, num_copies=%d, next_mirror %d, failed_mirror %d\n",
-                        num_copies, failrec->this_mirror, failed_mirror);
+               btrfs_debug(fs_info,
+                       "Check Repairable: cannot repair, num_copies=%d, next_mirror %d, failed_mirror %d",
+                       num_copies, failrec->this_mirror, failed_mirror);
                return 0;
        }
 
@@ -2322,8 +2326,9 @@ int btrfs_check_repairable(struct inode *inode, struct bio *failed_bio,
        }
 
        if (failrec->this_mirror > num_copies) {
-               pr_debug("Check Repairable: (fail) num_copies=%d, next_mirror %d, failed_mirror %d\n",
-                        num_copies, failrec->this_mirror, failed_mirror);
+               btrfs_debug(fs_info,
+                       "Check Repairable: (fail) num_copies=%d, next_mirror %d, failed_mirror %d",
+                       num_copies, failrec->this_mirror, failed_mirror);
                return 0;
        }
 
@@ -2386,7 +2391,7 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
        int read_mode;
        int ret;
 
-       BUG_ON(failed_bio->bi_rw & REQ_WRITE);
+       BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE);
 
        ret = btrfs_get_io_failure_record(inode, start, end, &failrec);
        if (ret)
@@ -2412,12 +2417,13 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
                free_io_failure(inode, failrec);
                return -EIO;
        }
+       bio_set_op_attrs(bio, REQ_OP_READ, read_mode);
 
-       pr_debug("Repair Read Error: submitting new read[%#x] to this_mirror=%d, in_validation=%d\n",
-                read_mode, failrec->this_mirror, failrec->in_validation);
+       btrfs_debug(btrfs_sb(inode->i_sb),
+               "Repair Read Error: submitting new read[%#x] to this_mirror=%d, in_validation=%d",
+               read_mode, failrec->this_mirror, failrec->in_validation);
 
-       ret = tree->ops->submit_bio_hook(inode, read_mode, bio,
-                                        failrec->this_mirror,
+       ret = tree->ops->submit_bio_hook(inode, bio, failrec->this_mirror,
                                         failrec->bio_flags, 0);
        if (ret) {
                free_io_failure(inode, failrec);
@@ -2483,8 +2489,7 @@ static void end_bio_extent_writepage(struct bio *bio)
                                        bvec->bv_offset, bvec->bv_len);
                        else
                                btrfs_info(BTRFS_I(page->mapping->host)->root->fs_info,
-                                  "incomplete page write in btrfs with offset %u and "
-                                  "length %u",
+                                  "incomplete page write in btrfs with offset %u and length %u",
                                        bvec->bv_offset, bvec->bv_len);
                }
 
@@ -2540,10 +2545,12 @@ static void end_bio_extent_readpage(struct bio *bio)
        bio_for_each_segment_all(bvec, bio, i) {
                struct page *page = bvec->bv_page;
                struct inode *inode = page->mapping->host;
+               struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
 
-               pr_debug("end_bio_extent_readpage: bi_sector=%llu, err=%d, "
-                        "mirror=%u\n", (u64)bio->bi_iter.bi_sector,
-                        bio->bi_error, io_bio->mirror_num);
+               btrfs_debug(fs_info,
+                       "end_bio_extent_readpage: bi_sector=%llu, err=%d, mirror=%u",
+                       (u64)bio->bi_iter.bi_sector, bio->bi_error,
+                       io_bio->mirror_num);
                tree = &BTRFS_I(inode)->io_tree;
 
                /* We always issue full-page reads, but if some block
@@ -2553,13 +2560,12 @@ static void end_bio_extent_readpage(struct bio *bio)
                 * if they don't add up to a full page.  */
                if (bvec->bv_offset || bvec->bv_len != PAGE_SIZE) {
                        if (bvec->bv_offset + bvec->bv_len != PAGE_SIZE)
-                               btrfs_err(BTRFS_I(page->mapping->host)->root->fs_info,
-                                  "partial page read in btrfs with offset %u and length %u",
+                               btrfs_err(fs_info,
+                                       "partial page read in btrfs with offset %u and length %u",
                                        bvec->bv_offset, bvec->bv_len);
                        else
-                               btrfs_info(BTRFS_I(page->mapping->host)->root->fs_info,
-                                  "incomplete page read in btrfs with offset %u and "
-                                  "length %u",
+                               btrfs_info(fs_info,
+                                       "incomplete page read in btrfs with offset %u and length %u",
                                        bvec->bv_offset, bvec->bv_len);
                }
 
@@ -2696,12 +2702,6 @@ struct bio *btrfs_bio_clone(struct bio *bio, gfp_t gfp_mask)
                btrfs_bio->csum = NULL;
                btrfs_bio->csum_allocated = NULL;
                btrfs_bio->end_io = NULL;
-
-#ifdef CONFIG_BLK_CGROUP
-               /* FIXME, put this into bio_clone_bioset */
-               if (bio->bi_css)
-                       bio_associate_blkcg(new, bio->bi_css);
-#endif
        }
        return new;
 }
@@ -2723,8 +2723,8 @@ struct bio *btrfs_io_bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs)
 }
 
 
-static int __must_check submit_one_bio(int rw, struct bio *bio,
-                                      int mirror_num, unsigned long bio_flags)
+static int __must_check submit_one_bio(struct bio *bio, int mirror_num,
+                                      unsigned long bio_flags)
 {
        int ret = 0;
        struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
@@ -2735,33 +2735,31 @@ static int __must_check submit_one_bio(int rw, struct bio *bio,
        start = page_offset(page) + bvec->bv_offset;
 
        bio->bi_private = NULL;
-
        bio_get(bio);
 
        if (tree->ops && tree->ops->submit_bio_hook)
-               ret = tree->ops->submit_bio_hook(page->mapping->host, rw, bio,
+               ret = tree->ops->submit_bio_hook(page->mapping->host, bio,
                                           mirror_num, bio_flags, start);
        else
-               btrfsic_submit_bio(rw, bio);
+               btrfsic_submit_bio(bio);
 
        bio_put(bio);
        return ret;
 }
 
-static int merge_bio(int rw, struct extent_io_tree *tree, struct page *page,
+static int merge_bio(struct extent_io_tree *tree, struct page *page,
                     unsigned long offset, size_t size, struct bio *bio,
                     unsigned long bio_flags)
 {
        int ret = 0;
        if (tree->ops && tree->ops->merge_bio_hook)
-               ret = tree->ops->merge_bio_hook(rw, page, offset, size, bio,
+               ret = tree->ops->merge_bio_hook(page, offset, size, bio,
                                                bio_flags);
-       BUG_ON(ret < 0);
        return ret;
 
 }
 
-static int submit_extent_page(int rw, struct extent_io_tree *tree,
+static int submit_extent_page(int op, int op_flags, struct extent_io_tree *tree,
                              struct writeback_control *wbc,
                              struct page *page, sector_t sector,
                              size_t size, unsigned long offset,
@@ -2789,10 +2787,9 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree,
 
                if (prev_bio_flags != bio_flags || !contig ||
                    force_bio_submit ||
-                   merge_bio(rw, tree, page, offset, page_size, bio, bio_flags) ||
+                   merge_bio(tree, page, offset, page_size, bio, bio_flags) ||
                    bio_add_page(bio, page, page_size, offset) < page_size) {
-                       ret = submit_one_bio(rw, bio, mirror_num,
-                                            prev_bio_flags);
+                       ret = submit_one_bio(bio, mirror_num, prev_bio_flags);
                        if (ret < 0) {
                                *bio_ret = NULL;
                                return ret;
@@ -2813,6 +2810,7 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree,
        bio_add_page(bio, page, page_size, offset);
        bio->bi_end_io = end_io_func;
        bio->bi_private = tree;
+       bio_set_op_attrs(bio, op, op_flags);
        if (wbc) {
                wbc_init_bio(wbc, bio);
                wbc_account_io(wbc, page, page_size);
@@ -2821,7 +2819,7 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree,
        if (bio_ret)
                *bio_ret = bio;
        else
-               ret = submit_one_bio(rw, bio, mirror_num, bio_flags);
+               ret = submit_one_bio(bio, mirror_num, bio_flags);
 
        return ret;
 }
@@ -2879,13 +2877,14 @@ __get_extent_map(struct inode *inode, struct page *page, size_t pg_offset,
  * into the tree that are removed when the IO is done (by the end_io
  * handlers)
  * XXX JDM: This needs looking at to ensure proper page locking
+ * return 0 on success, otherwise return error
  */
 static int __do_readpage(struct extent_io_tree *tree,
                         struct page *page,
                         get_extent_t *get_extent,
                         struct extent_map **em_cached,
                         struct bio **bio, int mirror_num,
-                        unsigned long *bio_flags, int rw,
+                        unsigned long *bio_flags, int read_flags,
                         u64 *prev_em_start)
 {
        struct inode *inode = page->mapping->host;
@@ -2900,7 +2899,7 @@ static int __do_readpage(struct extent_io_tree *tree,
        sector_t sector;
        struct extent_map *em;
        struct block_device *bdev;
-       int ret;
+       int ret = 0;
        int nr = 0;
        size_t pg_offset = 0;
        size_t iosize;
@@ -3068,8 +3067,8 @@ static int __do_readpage(struct extent_io_tree *tree,
                }
 
                pnr -= page->index;
-               ret = submit_extent_page(rw, tree, NULL, page,
-                                        sector, disk_io_size, pg_offset,
+               ret = submit_extent_page(REQ_OP_READ, read_flags, tree, NULL,
+                                        page, sector, disk_io_size, pg_offset,
                                         bdev, bio, pnr,
                                         end_bio_extent_readpage, mirror_num,
                                         *bio_flags,
@@ -3081,6 +3080,7 @@ static int __do_readpage(struct extent_io_tree *tree,
                } else {
                        SetPageError(page);
                        unlock_extent(tree, cur, cur + iosize - 1);
+                       goto out;
                }
                cur = cur + iosize;
                pg_offset += iosize;
@@ -3091,7 +3091,7 @@ out:
                        SetPageUptodate(page);
                unlock_page(page);
        }
-       return 0;
+       return ret;
 }
 
 static inline void __do_contiguous_readpages(struct extent_io_tree *tree,
@@ -3100,7 +3100,7 @@ static inline void __do_contiguous_readpages(struct extent_io_tree *tree,
                                             get_extent_t *get_extent,
                                             struct extent_map **em_cached,
                                             struct bio **bio, int mirror_num,
-                                            unsigned long *bio_flags, int rw,
+                                            unsigned long *bio_flags,
                                             u64 *prev_em_start)
 {
        struct inode *inode;
@@ -3121,7 +3121,7 @@ static inline void __do_contiguous_readpages(struct extent_io_tree *tree,
 
        for (index = 0; index < nr_pages; index++) {
                __do_readpage(tree, pages[index], get_extent, em_cached, bio,
-                             mirror_num, bio_flags, rw, prev_em_start);
+                             mirror_num, bio_flags, 0, prev_em_start);
                put_page(pages[index]);
        }
 }
@@ -3131,7 +3131,7 @@ static void __extent_readpages(struct extent_io_tree *tree,
                               int nr_pages, get_extent_t *get_extent,
                               struct extent_map **em_cached,
                               struct bio **bio, int mirror_num,
-                              unsigned long *bio_flags, int rw,
+                              unsigned long *bio_flags,
                               u64 *prev_em_start)
 {
        u64 start = 0;
@@ -3153,7 +3153,7 @@ static void __extent_readpages(struct extent_io_tree *tree,
                                                  index - first_index, start,
                                                  end, get_extent, em_cached,
                                                  bio, mirror_num, bio_flags,
-                                                 rw, prev_em_start);
+                                                 prev_em_start);
                        start = page_start;
                        end = start + PAGE_SIZE - 1;
                        first_index = index;
@@ -3164,7 +3164,7 @@ static void __extent_readpages(struct extent_io_tree *tree,
                __do_contiguous_readpages(tree, &pages[first_index],
                                          index - first_index, start,
                                          end, get_extent, em_cached, bio,
-                                         mirror_num, bio_flags, rw,
+                                         mirror_num, bio_flags,
                                          prev_em_start);
 }
 
@@ -3172,7 +3172,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
                                   struct page *page,
                                   get_extent_t *get_extent,
                                   struct bio **bio, int mirror_num,
-                                  unsigned long *bio_flags, int rw)
+                                  unsigned long *bio_flags, int read_flags)
 {
        struct inode *inode = page->mapping->host;
        struct btrfs_ordered_extent *ordered;
@@ -3192,7 +3192,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
        }
 
        ret = __do_readpage(tree, page, get_extent, NULL, bio, mirror_num,
-                           bio_flags, rw, NULL);
+                           bio_flags, read_flags, NULL);
        return ret;
 }
 
@@ -3204,9 +3204,9 @@ int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
        int ret;
 
        ret = __extent_read_full_page(tree, page, get_extent, &bio, mirror_num,
-                                     &bio_flags, READ);
+                                     &bio_flags, 0);
        if (bio)
-               ret = submit_one_bio(READ, bio, mirror_num, bio_flags);
+               ret = submit_one_bio(bio, mirror_num, bio_flags);
        return ret;
 }
 
@@ -3440,8 +3440,8 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
                               page->index, cur, end);
                }
 
-               ret = submit_extent_page(write_flags, tree, wbc, page,
-                                        sector, iosize, pg_offset,
+               ret = submit_extent_page(REQ_OP_WRITE, write_flags, tree, wbc,
+                                        page, sector, iosize, pg_offset,
                                         bdev, &epd->bio, max_nr,
                                         end_bio_extent_writepage,
                                         0, 0, 0, false);
@@ -3480,13 +3480,11 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
        size_t pg_offset = 0;
        loff_t i_size = i_size_read(inode);
        unsigned long end_index = i_size >> PAGE_SHIFT;
-       int write_flags;
+       int write_flags = 0;
        unsigned long nr_written = 0;
 
        if (wbc->sync_mode == WB_SYNC_ALL)
                write_flags = WRITE_SYNC;
-       else
-               write_flags = WRITE;
 
        trace___extent_writepage(page, inode, wbc);
 
@@ -3631,7 +3629,6 @@ static void end_extent_buffer_writeback(struct extent_buffer *eb)
 static void set_btree_ioerr(struct page *page)
 {
        struct extent_buffer *eb = (struct extent_buffer *)page->private;
-       struct btrfs_inode *btree_ino = BTRFS_I(eb->fs_info->btree_inode);
 
        SetPageError(page);
        if (test_and_set_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags))
@@ -3677,13 +3674,13 @@ static void set_btree_ioerr(struct page *page)
         */
        switch (eb->log_index) {
        case -1:
-               set_bit(BTRFS_INODE_BTREE_ERR, &btree_ino->runtime_flags);
+               set_bit(BTRFS_FS_BTREE_ERR, &eb->fs_info->flags);
                break;
        case 0:
-               set_bit(BTRFS_INODE_BTREE_LOG1_ERR, &btree_ino->runtime_flags);
+               set_bit(BTRFS_FS_LOG1_ERR, &eb->fs_info->flags);
                break;
        case 1:
-               set_bit(BTRFS_INODE_BTREE_LOG2_ERR, &btree_ino->runtime_flags);
+               set_bit(BTRFS_FS_LOG2_ERR, &eb->fs_info->flags);
                break;
        default:
                BUG(); /* unexpected, logic error */
@@ -3728,9 +3725,11 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb,
        struct block_device *bdev = fs_info->fs_devices->latest_bdev;
        struct extent_io_tree *tree = &BTRFS_I(fs_info->btree_inode)->io_tree;
        u64 offset = eb->start;
+       u32 nritems;
        unsigned long i, num_pages;
        unsigned long bio_flags = 0;
-       int rw = (epd->sync_io ? WRITE_SYNC : WRITE) | REQ_META;
+       unsigned long start, end;
+       int write_flags = (epd->sync_io ? WRITE_SYNC : 0) | REQ_META;
        int ret = 0;
 
        clear_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags);
@@ -3739,14 +3738,32 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb,
        if (btrfs_header_owner(eb) == BTRFS_TREE_LOG_OBJECTID)
                bio_flags = EXTENT_BIO_TREE_LOG;
 
+       /* set btree blocks beyond nritems with 0 to avoid stale content. */
+       nritems = btrfs_header_nritems(eb);
+       if (btrfs_header_level(eb) > 0) {
+               end = btrfs_node_key_ptr_offset(nritems);
+
+               memset_extent_buffer(eb, 0, end, eb->len - end);
+       } else {
+               /*
+                * leaf:
+                * header 0 1 2 .. N ... data_N .. data_2 data_1 data_0
+                */
+               start = btrfs_item_nr_offset(nritems);
+               end = btrfs_leaf_data(eb) +
+                     leaf_data_end(fs_info->tree_root, eb);
+               memset_extent_buffer(eb, 0, start, end - start);
+       }
+
        for (i = 0; i < num_pages; i++) {
                struct page *p = eb->pages[i];
 
                clear_page_dirty_for_io(p);
                set_page_writeback(p);
-               ret = submit_extent_page(rw, tree, wbc, p, offset >> 9,
-                                        PAGE_SIZE, 0, bdev, &epd->bio,
-                                        -1, end_bio_extent_buffer_writepage,
+               ret = submit_extent_page(REQ_OP_WRITE, write_flags, tree, wbc,
+                                        p, offset >> 9, PAGE_SIZE, 0, bdev,
+                                        &epd->bio, -1,
+                                        end_bio_extent_buffer_writepage,
                                         0, epd->bio_flags, bio_flags, false);
                epd->bio_flags = bio_flags;
                if (ret) {
@@ -4056,13 +4073,12 @@ retry:
 static void flush_epd_write_bio(struct extent_page_data *epd)
 {
        if (epd->bio) {
-               int rw = WRITE;
                int ret;
 
-               if (epd->sync_io)
-                       rw = WRITE_SYNC;
+               bio_set_op_attrs(epd->bio, REQ_OP_WRITE,
+                                epd->sync_io ? WRITE_SYNC : 0);
 
-               ret = submit_one_bio(rw, epd->bio, 0, epd->bio_flags);
+               ret = submit_one_bio(epd->bio, 0, epd->bio_flags);
                BUG_ON(ret < 0); /* -ENOMEM */
                epd->bio = NULL;
        }
@@ -4180,7 +4196,8 @@ int extent_readpages(struct extent_io_tree *tree,
                prefetchw(&page->flags);
                list_del(&page->lru);
                if (add_to_page_cache_lru(page, mapping,
-                                       page->index, GFP_NOFS)) {
+                                       page->index,
+                                       readahead_gfp_mask(mapping))) {
                        put_page(page);
                        continue;
                }
@@ -4189,19 +4206,19 @@ int extent_readpages(struct extent_io_tree *tree,
                if (nr < ARRAY_SIZE(pagepool))
                        continue;
                __extent_readpages(tree, pagepool, nr, get_extent, &em_cached,
-                                  &bio, 0, &bio_flags, READ, &prev_em_start);
+                                  &bio, 0, &bio_flags, &prev_em_start);
                nr = 0;
        }
        if (nr)
                __extent_readpages(tree, pagepool, nr, get_extent, &em_cached,
-                                  &bio, 0, &bio_flags, READ, &prev_em_start);
+                                  &bio, 0, &bio_flags, &prev_em_start);
 
        if (em_cached)
                free_extent_map(em_cached);
 
        BUG_ON(!list_empty(pages));
        if (bio)
-               return submit_one_bio(READ, bio, 0, bio_flags);
+               return submit_one_bio(bio, 0, bio_flags);
        return 0;
 }
 
@@ -4493,11 +4510,24 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
                        flags |= (FIEMAP_EXTENT_DELALLOC |
                                  FIEMAP_EXTENT_UNKNOWN);
                } else if (fieinfo->fi_extents_max) {
+                       struct btrfs_trans_handle *trans;
+
                        u64 bytenr = em->block_start -
                                (em->start - em->orig_start);
 
                        disko = em->block_start + offset_in_extent;
 
+                       /*
+                        * We need a trans handle to get delayed refs
+                        */
+                       trans = btrfs_join_transaction(root);
+                       /*
+                        * It's OK if we can't start a trans we can still check
+                        * from commit_root
+                        */
+                       if (IS_ERR(trans))
+                               trans = NULL;
+
                        /*
                         * As btrfs supports shared space, this information
                         * can be exported to userspace tools via
@@ -4505,9 +4535,11 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
                         * then we're just getting a count and we can skip the
                         * lookup stuff.
                         */
-                       ret = btrfs_check_shared(NULL, root->fs_info,
+                       ret = btrfs_check_shared(trans, root->fs_info,
                                                 root->objectid,
                                                 btrfs_ino(inode), bytenr);
+                       if (trans)
+                               btrfs_end_transaction(trans, root);
                        if (ret < 0)
                                goto out_free;
                        if (ret)
@@ -5179,11 +5211,10 @@ int extent_buffer_uptodate(struct extent_buffer *eb)
 }
 
 int read_extent_buffer_pages(struct extent_io_tree *tree,
-                            struct extent_buffer *eb, u64 start, int wait,
+                            struct extent_buffer *eb, int wait,
                             get_extent_t *get_extent, int mirror_num)
 {
        unsigned long i;
-       unsigned long start_i;
        struct page *page;
        int err;
        int ret = 0;
@@ -5197,16 +5228,8 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
        if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))
                return 0;
 
-       if (start) {
-               WARN_ON(start < eb->start);
-               start_i = (start >> PAGE_SHIFT) -
-                       (eb->start >> PAGE_SHIFT);
-       } else {
-               start_i = 0;
-       }
-
        num_pages = num_extent_pages(eb->start, eb->len);
-       for (i = start_i; i < num_pages; i++) {
+       for (i = 0; i < num_pages; i++) {
                page = eb->pages[i];
                if (wait == WAIT_NONE) {
                        if (!trylock_page(page))
@@ -5215,38 +5238,62 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
                        lock_page(page);
                }
                locked_pages++;
+       }
+       /*
+        * We need to firstly lock all pages to make sure that
+        * the uptodate bit of our pages won't be affected by
+        * clear_extent_buffer_uptodate().
+        */
+       for (i = 0; i < num_pages; i++) {
+               page = eb->pages[i];
                if (!PageUptodate(page)) {
                        num_reads++;
                        all_uptodate = 0;
                }
        }
+
        if (all_uptodate) {
-               if (start_i == 0)
-                       set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
+               set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
                goto unlock_exit;
        }
 
        clear_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags);
        eb->read_mirror = 0;
        atomic_set(&eb->io_pages, num_reads);
-       for (i = start_i; i < num_pages; i++) {
+       for (i = 0; i < num_pages; i++) {
                page = eb->pages[i];
+
                if (!PageUptodate(page)) {
+                       if (ret) {
+                               atomic_dec(&eb->io_pages);
+                               unlock_page(page);
+                               continue;
+                       }
+
                        ClearPageError(page);
                        err = __extent_read_full_page(tree, page,
                                                      get_extent, &bio,
                                                      mirror_num, &bio_flags,
-                                                     READ | REQ_META);
-                       if (err)
+                                                     REQ_META);
+                       if (err) {
                                ret = err;
+                               /*
+                                * We use &bio in above __extent_read_full_page,
+                                * so we ensure that if it returns error, the
+                                * current page fails to add itself to bio and
+                                * it's been unlocked.
+                                *
+                                * We must dec io_pages by ourselves.
+                                */
+                               atomic_dec(&eb->io_pages);
+                       }
                } else {
                        unlock_page(page);
                }
        }
 
        if (bio) {
-               err = submit_one_bio(READ | REQ_META, bio, mirror_num,
-                                    bio_flags);
+               err = submit_one_bio(bio, mirror_num, bio_flags);
                if (err)
                        return err;
        }
@@ -5254,7 +5301,7 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
        if (ret || wait != WAIT_COMPLETE)
                return ret;
 
-       for (i = start_i; i < num_pages; i++) {
+       for (i = 0; i < num_pages; i++) {
                page = eb->pages[i];
                wait_on_page_locked(page);
                if (!PageUptodate(page))
@@ -5264,12 +5311,10 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
        return ret;
 
 unlock_exit:
-       i = start_i;
        while (locked_pages > 0) {
-               page = eb->pages[i];
-               i++;
-               unlock_page(page);
                locked_pages--;
+               page = eb->pages[locked_pages];
+               unlock_page(page);
        }
        return ret;
 }
@@ -5372,8 +5417,7 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start,
        }
 
        if (start + min_len > eb->len) {
-               WARN(1, KERN_ERR "btrfs bad mapping eb start %llu len %lu, "
-                      "wanted %lu %lu\n",
+               WARN(1, KERN_ERR "btrfs bad mapping eb start %llu len %lu, wanted %lu %lu\n",
                       eb->start, eb->len, start, min_len);
                return -EINVAL;
        }
@@ -5514,17 +5558,45 @@ void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
        }
 }
 
-/*
- * The extent buffer bitmap operations are done with byte granularity because
- * bitmap items are not guaranteed to be aligned to a word and therefore a
- * single word in a bitmap may straddle two pages in the extent buffer.
- */
-#define BIT_BYTE(nr) ((nr) / BITS_PER_BYTE)
-#define BYTE_MASK ((1 << BITS_PER_BYTE) - 1)
-#define BITMAP_FIRST_BYTE_MASK(start) \
-       ((BYTE_MASK << ((start) & (BITS_PER_BYTE - 1))) & BYTE_MASK)
-#define BITMAP_LAST_BYTE_MASK(nbits) \
-       (BYTE_MASK >> (-(nbits) & (BITS_PER_BYTE - 1)))
+void le_bitmap_set(u8 *map, unsigned int start, int len)
+{
+       u8 *p = map + BIT_BYTE(start);
+       const unsigned int size = start + len;
+       int bits_to_set = BITS_PER_BYTE - (start % BITS_PER_BYTE);
+       u8 mask_to_set = BITMAP_FIRST_BYTE_MASK(start);
+
+       while (len - bits_to_set >= 0) {
+               *p |= mask_to_set;
+               len -= bits_to_set;
+               bits_to_set = BITS_PER_BYTE;
+               mask_to_set = ~(u8)0;
+               p++;
+       }
+       if (len) {
+               mask_to_set &= BITMAP_LAST_BYTE_MASK(size);
+               *p |= mask_to_set;
+       }
+}
+
+void le_bitmap_clear(u8 *map, unsigned int start, int len)
+{
+       u8 *p = map + BIT_BYTE(start);
+       const unsigned int size = start + len;
+       int bits_to_clear = BITS_PER_BYTE - (start % BITS_PER_BYTE);
+       u8 mask_to_clear = BITMAP_FIRST_BYTE_MASK(start);
+
+       while (len - bits_to_clear >= 0) {
+               *p &= ~mask_to_clear;
+               len -= bits_to_clear;
+               bits_to_clear = BITS_PER_BYTE;
+               mask_to_clear = ~(u8)0;
+               p++;
+       }
+       if (len) {
+               mask_to_clear &= BITMAP_LAST_BYTE_MASK(size);
+               *p &= ~mask_to_clear;
+       }
+}
 
 /*
  * eb_bitmap_offset() - calculate the page and offset of the byte containing the
@@ -5568,7 +5640,7 @@ static inline void eb_bitmap_offset(struct extent_buffer *eb,
 int extent_buffer_test_bit(struct extent_buffer *eb, unsigned long start,
                           unsigned long nr)
 {
-       char *kaddr;
+       u8 *kaddr;
        struct page *page;
        unsigned long i;
        size_t offset;
@@ -5590,13 +5662,13 @@ int extent_buffer_test_bit(struct extent_buffer *eb, unsigned long start,
 void extent_buffer_bitmap_set(struct extent_buffer *eb, unsigned long start,
                              unsigned long pos, unsigned long len)
 {
-       char *kaddr;
+       u8 *kaddr;
        struct page *page;
        unsigned long i;
        size_t offset;
        const unsigned int size = pos + len;
        int bits_to_set = BITS_PER_BYTE - (pos % BITS_PER_BYTE);
-       unsigned int mask_to_set = BITMAP_FIRST_BYTE_MASK(pos);
+       u8 mask_to_set = BITMAP_FIRST_BYTE_MASK(pos);
 
        eb_bitmap_offset(eb, start, pos, &i, &offset);
        page = eb->pages[i];
@@ -5607,7 +5679,7 @@ void extent_buffer_bitmap_set(struct extent_buffer *eb, unsigned long start,
                kaddr[offset] |= mask_to_set;
                len -= bits_to_set;
                bits_to_set = BITS_PER_BYTE;
-               mask_to_set = ~0U;
+               mask_to_set = ~(u8)0;
                if (++offset >= PAGE_SIZE && len > 0) {
                        offset = 0;
                        page = eb->pages[++i];
@@ -5632,13 +5704,13 @@ void extent_buffer_bitmap_set(struct extent_buffer *eb, unsigned long start,
 void extent_buffer_bitmap_clear(struct extent_buffer *eb, unsigned long start,
                                unsigned long pos, unsigned long len)
 {
-       char *kaddr;
+       u8 *kaddr;
        struct page *page;
        unsigned long i;
        size_t offset;
        const unsigned int size = pos + len;
        int bits_to_clear = BITS_PER_BYTE - (pos % BITS_PER_BYTE);
-       unsigned int mask_to_clear = BITMAP_FIRST_BYTE_MASK(pos);
+       u8 mask_to_clear = BITMAP_FIRST_BYTE_MASK(pos);
 
        eb_bitmap_offset(eb, start, pos, &i, &offset);
        page = eb->pages[i];
@@ -5649,7 +5721,7 @@ void extent_buffer_bitmap_clear(struct extent_buffer *eb, unsigned long start,
                kaddr[offset] &= ~mask_to_clear;
                len -= bits_to_clear;
                bits_to_clear = BITS_PER_BYTE;
-               mask_to_clear = ~0U;
+               mask_to_clear = ~(u8)0;
                if (++offset >= PAGE_SIZE && len > 0) {
                        offset = 0;
                        page = eb->pages[++i];
@@ -5703,14 +5775,14 @@ void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
 
        if (src_offset + len > dst->len) {
                btrfs_err(dst->fs_info,
-                       "memmove bogus src_offset %lu move "
-                      "len %lu dst len %lu", src_offset, len, dst->len);
+                       "memmove bogus src_offset %lu move len %lu dst len %lu",
+                        src_offset, len, dst->len);
                BUG_ON(1);
        }
        if (dst_offset + len > dst->len) {
                btrfs_err(dst->fs_info,
-                       "memmove bogus dst_offset %lu move "
-                      "len %lu dst len %lu", dst_offset, len, dst->len);
+                       "memmove bogus dst_offset %lu move len %lu dst len %lu",
+                        dst_offset, len, dst->len);
                BUG_ON(1);
        }
 
@@ -5750,13 +5822,15 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
        unsigned long src_i;
 
        if (src_offset + len > dst->len) {
-               btrfs_err(dst->fs_info, "memmove bogus src_offset %lu move "
-                      "len %lu len %lu", src_offset, len, dst->len);
+               btrfs_err(dst->fs_info,
+                         "memmove bogus src_offset %lu move len %lu len %lu",
+                         src_offset, len, dst->len);
                BUG_ON(1);
        }
        if (dst_offset + len > dst->len) {
-               btrfs_err(dst->fs_info, "memmove bogus dst_offset %lu move "
-                      "len %lu len %lu", dst_offset, len, dst->len);
+               btrfs_err(dst->fs_info,
+                         "memmove bogus dst_offset %lu move len %lu len %lu",
+                         dst_offset, len, dst->len);
                BUG_ON(1);
        }
        if (dst_offset < src_offset) {