Merge branch 'raid56-experimental' into for-linus-3.9
[cascardo/linux.git] / fs / btrfs / inode.c
index 492ee0e..4e6a11c 100644 (file)
 #include <linux/slab.h>
 #include <linux/ratelimit.h>
 #include <linux/mount.h>
+#include <linux/btrfs.h>
 #include <linux/blkdev.h>
 #include "compat.h"
 #include "ctree.h"
 #include "disk-io.h"
 #include "transaction.h"
 #include "btrfs_inode.h"
-#include "ioctl.h"
 #include "print-tree.h"
 #include "ordered-data.h"
 #include "xattr.h"
@@ -609,7 +609,7 @@ static noinline int submit_compressed_extents(struct inode *inode,
        if (list_empty(&async_cow->extents))
                return 0;
 
-
+again:
        while (!list_empty(&async_cow->extents)) {
                async_extent = list_entry(async_cow->extents.next,
                                          struct async_extent, list);
@@ -649,6 +649,8 @@ retry:
                                                  async_extent->ram_size - 1,
                                                  btrfs_get_extent,
                                                  WB_SYNC_ALL);
+                       else if (ret)
+                               unlock_page(async_cow->locked_page);
                        kfree(async_extent);
                        cond_resched();
                        continue;
@@ -673,6 +675,7 @@ retry:
 
                if (ret) {
                        int i;
+
                        for (i = 0; i < async_extent->nr_pages; i++) {
                                WARN_ON(async_extent->pages[i]->mapping);
                                page_cache_release(async_extent->pages[i]);
@@ -680,12 +683,10 @@ retry:
                        kfree(async_extent->pages);
                        async_extent->nr_pages = 0;
                        async_extent->pages = NULL;
-                       unlock_extent(io_tree, async_extent->start,
-                                     async_extent->start +
-                                     async_extent->ram_size - 1);
+
                        if (ret == -ENOSPC)
                                goto retry;
-                       goto out_free; /* JDM: Requeue? */
+                       goto out_free;
                }
 
                /*
@@ -697,10 +698,13 @@ retry:
                                        async_extent->ram_size - 1, 0);
 
                em = alloc_extent_map();
-               BUG_ON(!em); /* -ENOMEM */
+               if (!em)
+                       goto out_free_reserve;
                em->start = async_extent->start;
                em->len = async_extent->ram_size;
                em->orig_start = em->start;
+               em->mod_start = em->start;
+               em->mod_len = em->len;
 
                em->block_start = ins.objectid;
                em->block_len = ins.offset;
@@ -727,6 +731,9 @@ retry:
                                                async_extent->ram_size - 1, 0);
                }
 
+               if (ret)
+                       goto out_free_reserve;
+
                ret = btrfs_add_ordered_extent_compress(inode,
                                                async_extent->start,
                                                ins.objectid,
@@ -734,7 +741,8 @@ retry:
                                                ins.offset,
                                                BTRFS_ORDERED_COMPRESSED,
                                                async_extent->compress_type);
-               BUG_ON(ret); /* -ENOMEM */
+               if (ret)
+                       goto out_free_reserve;
 
                /*
                 * clear dirty, set writeback and unlock the pages.
@@ -755,18 +763,30 @@ retry:
                                    ins.objectid,
                                    ins.offset, async_extent->pages,
                                    async_extent->nr_pages);
-
-               BUG_ON(ret); /* -ENOMEM */
                alloc_hint = ins.objectid + ins.offset;
                kfree(async_extent);
+               if (ret)
+                       goto out;
                cond_resched();
        }
        ret = 0;
 out:
        return ret;
+out_free_reserve:
+       btrfs_free_reserved_extent(root, ins.objectid, ins.offset);
 out_free:
+       extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree,
+                                    async_extent->start,
+                                    async_extent->start +
+                                    async_extent->ram_size - 1,
+                                    NULL, EXTENT_CLEAR_UNLOCK_PAGE |
+                                    EXTENT_CLEAR_UNLOCK |
+                                    EXTENT_CLEAR_DELALLOC |
+                                    EXTENT_CLEAR_DIRTY |
+                                    EXTENT_SET_WRITEBACK |
+                                    EXTENT_END_WRITEBACK);
        kfree(async_extent);
-       goto out;
+       goto again;
 }
 
 static u64 get_extent_allocation_hint(struct inode *inode, u64 start,
@@ -893,6 +913,8 @@ static noinline int __cow_file_range(struct btrfs_trans_handle *trans,
                em->orig_start = em->start;
                ram_size = ins.offset;
                em->len = ins.offset;
+               em->mod_start = em->start;
+               em->mod_len = em->len;
 
                em->block_start = ins.objectid;
                em->block_len = ins.offset;
@@ -1339,6 +1361,8 @@ out_check:
                        em->block_start = disk_bytenr;
                        em->orig_block_len = disk_num_bytes;
                        em->bdev = root->fs_info->fs_devices->latest_bdev;
+                       em->mod_start = em->start;
+                       em->mod_len = em->len;
                        set_bit(EXTENT_FLAG_PINNED, &em->flags);
                        set_bit(EXTENT_FLAG_FILLING, &em->flags);
                        em->generation = -1;
@@ -1509,14 +1533,22 @@ static void btrfs_set_bit_hook(struct inode *inode,
                        spin_unlock(&BTRFS_I(inode)->lock);
                }
 
-               spin_lock(&root->fs_info->delalloc_lock);
+               __percpu_counter_add(&root->fs_info->delalloc_bytes, len,
+                                    root->fs_info->delalloc_batch);
+               spin_lock(&BTRFS_I(inode)->lock);
                BTRFS_I(inode)->delalloc_bytes += len;
-               root->fs_info->delalloc_bytes += len;
-               if (do_list && list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
-                       list_add_tail(&BTRFS_I(inode)->delalloc_inodes,
-                                     &root->fs_info->delalloc_inodes);
+               if (do_list && !test_bit(BTRFS_INODE_IN_DELALLOC_LIST,
+                                        &BTRFS_I(inode)->runtime_flags)) {
+                       spin_lock(&root->fs_info->delalloc_lock);
+                       if (list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
+                               list_add_tail(&BTRFS_I(inode)->delalloc_inodes,
+                                             &root->fs_info->delalloc_inodes);
+                               set_bit(BTRFS_INODE_IN_DELALLOC_LIST,
+                                       &BTRFS_I(inode)->runtime_flags);
+                       }
+                       spin_unlock(&root->fs_info->delalloc_lock);
                }
-               spin_unlock(&root->fs_info->delalloc_lock);
+               spin_unlock(&BTRFS_I(inode)->lock);
        }
 }
 
@@ -1551,15 +1583,22 @@ static void btrfs_clear_bit_hook(struct inode *inode,
                    && do_list)
                        btrfs_free_reserved_data_space(inode, len);
 
-               spin_lock(&root->fs_info->delalloc_lock);
-               root->fs_info->delalloc_bytes -= len;
+               __percpu_counter_add(&root->fs_info->delalloc_bytes, -len,
+                                    root->fs_info->delalloc_batch);
+               spin_lock(&BTRFS_I(inode)->lock);
                BTRFS_I(inode)->delalloc_bytes -= len;
-
                if (do_list && BTRFS_I(inode)->delalloc_bytes == 0 &&
-                   !list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
-                       list_del_init(&BTRFS_I(inode)->delalloc_inodes);
+                   test_bit(BTRFS_INODE_IN_DELALLOC_LIST,
+                            &BTRFS_I(inode)->runtime_flags)) {
+                       spin_lock(&root->fs_info->delalloc_lock);
+                       if (!list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
+                               list_del_init(&BTRFS_I(inode)->delalloc_inodes);
+                               clear_bit(BTRFS_INODE_IN_DELALLOC_LIST,
+                                         &BTRFS_I(inode)->runtime_flags);
+                       }
+                       spin_unlock(&root->fs_info->delalloc_lock);
                }
-               spin_unlock(&root->fs_info->delalloc_lock);
+               spin_unlock(&BTRFS_I(inode)->lock);
        }
 }
 
@@ -2002,11 +2041,23 @@ out:
        if (trans)
                btrfs_end_transaction(trans, root);
 
-       if (ret)
+       if (ret) {
                clear_extent_uptodate(io_tree, ordered_extent->file_offset,
                                      ordered_extent->file_offset +
                                      ordered_extent->len - 1, NULL, GFP_NOFS);
 
+               /*
+                * If the ordered extent had an IOERR or something else went
+                * wrong we need to return the space for this ordered extent
+                * back to the allocator.
+                */
+               if (!test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags) &&
+                   !test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags))
+                       btrfs_free_reserved_extent(root, ordered_extent->start,
+                                                  ordered_extent->disk_len);
+       }
+
+
        /*
         * This needs to be done to make sure anybody waiting knows we are done
         * updating everything for this ordered extent.
@@ -2063,7 +2114,7 @@ static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end,
 static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end,
                               struct extent_state *state, int mirror)
 {
-       size_t offset = start - ((u64)page->index << PAGE_CACHE_SHIFT);
+       size_t offset = start - page_offset(page);
        struct inode *inode = page->mapping->host;
        struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
        char *kaddr;
@@ -2168,11 +2219,6 @@ void btrfs_run_delayed_iputs(struct btrfs_root *root)
        }
 }
 
-enum btrfs_orphan_cleanup_state {
-       ORPHAN_CLEANUP_STARTED  = 1,
-       ORPHAN_CLEANUP_DONE     = 2,
-};
-
 /*
  * This is called in transaction commit time. If there are no orphan
  * files in the subvolume, it removes orphan item and frees block_rsv
@@ -2470,6 +2516,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
                 */
                set_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
                        &BTRFS_I(inode)->runtime_flags);
+               atomic_inc(&root->orphan_inodes);
 
                /* if we have links, this was a truncate, lets do that */
                if (inode->i_nlink) {
@@ -2492,6 +2539,8 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
                                goto out;
 
                        ret = btrfs_truncate(inode);
+                       if (ret)
+                               btrfs_orphan_del(NULL, inode);
                } else {
                        nr_unlink++;
                }
@@ -2710,34 +2759,41 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
                            struct btrfs_inode_item *item,
                            struct inode *inode)
 {
-       btrfs_set_inode_uid(leaf, item, i_uid_read(inode));
-       btrfs_set_inode_gid(leaf, item, i_gid_read(inode));
-       btrfs_set_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size);
-       btrfs_set_inode_mode(leaf, item, inode->i_mode);
-       btrfs_set_inode_nlink(leaf, item, inode->i_nlink);
+       struct btrfs_map_token token;
 
-       btrfs_set_timespec_sec(leaf, btrfs_inode_atime(item),
-                              inode->i_atime.tv_sec);
-       btrfs_set_timespec_nsec(leaf, btrfs_inode_atime(item),
-                               inode->i_atime.tv_nsec);
+       btrfs_init_map_token(&token);
 
-       btrfs_set_timespec_sec(leaf, btrfs_inode_mtime(item),
-                              inode->i_mtime.tv_sec);
-       btrfs_set_timespec_nsec(leaf, btrfs_inode_mtime(item),
-                               inode->i_mtime.tv_nsec);
+       btrfs_set_token_inode_uid(leaf, item, i_uid_read(inode), &token);
+       btrfs_set_token_inode_gid(leaf, item, i_gid_read(inode), &token);
+       btrfs_set_token_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size,
+                                  &token);
+       btrfs_set_token_inode_mode(leaf, item, inode->i_mode, &token);
+       btrfs_set_token_inode_nlink(leaf, item, inode->i_nlink, &token);
 
-       btrfs_set_timespec_sec(leaf, btrfs_inode_ctime(item),
-                              inode->i_ctime.tv_sec);
-       btrfs_set_timespec_nsec(leaf, btrfs_inode_ctime(item),
-                               inode->i_ctime.tv_nsec);
+       btrfs_set_token_timespec_sec(leaf, btrfs_inode_atime(item),
+                                    inode->i_atime.tv_sec, &token);
+       btrfs_set_token_timespec_nsec(leaf, btrfs_inode_atime(item),
+                                     inode->i_atime.tv_nsec, &token);
 
-       btrfs_set_inode_nbytes(leaf, item, inode_get_bytes(inode));
-       btrfs_set_inode_generation(leaf, item, BTRFS_I(inode)->generation);
-       btrfs_set_inode_sequence(leaf, item, inode->i_version);
-       btrfs_set_inode_transid(leaf, item, trans->transid);
-       btrfs_set_inode_rdev(leaf, item, inode->i_rdev);
-       btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags);
-       btrfs_set_inode_block_group(leaf, item, 0);
+       btrfs_set_token_timespec_sec(leaf, btrfs_inode_mtime(item),
+                                    inode->i_mtime.tv_sec, &token);
+       btrfs_set_token_timespec_nsec(leaf, btrfs_inode_mtime(item),
+                                     inode->i_mtime.tv_nsec, &token);
+
+       btrfs_set_token_timespec_sec(leaf, btrfs_inode_ctime(item),
+                                    inode->i_ctime.tv_sec, &token);
+       btrfs_set_token_timespec_nsec(leaf, btrfs_inode_ctime(item),
+                                     inode->i_ctime.tv_nsec, &token);
+
+       btrfs_set_token_inode_nbytes(leaf, item, inode_get_bytes(inode),
+                                    &token);
+       btrfs_set_token_inode_generation(leaf, item, BTRFS_I(inode)->generation,
+                                        &token);
+       btrfs_set_token_inode_sequence(leaf, item, inode->i_version, &token);
+       btrfs_set_token_inode_transid(leaf, item, trans->transid, &token);
+       btrfs_set_token_inode_rdev(leaf, item, inode->i_rdev, &token);
+       btrfs_set_token_inode_flags(leaf, item, BTRFS_I(inode)->flags, &token);
+       btrfs_set_token_inode_block_group(leaf, item, 0, &token);
 }
 
 /*
@@ -3833,6 +3889,12 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr)
 
                /* we don't support swapfiles, so vmtruncate shouldn't fail */
                truncate_setsize(inode, newsize);
+
+               /* Disable nonlocked read DIO to avoid the end less truncate */
+               btrfs_inode_block_unlocked_dio(inode);
+               inode_dio_wait(inode);
+               btrfs_inode_resume_unlocked_dio(inode);
+
                ret = btrfs_truncate(inode);
                if (ret && inode->i_nlink)
                        btrfs_orphan_del(NULL, inode);
@@ -3905,6 +3967,12 @@ void btrfs_evict_inode(struct inode *inode)
                goto no_delete;
        }
 
+       ret = btrfs_commit_inode_delayed_inode(inode);
+       if (ret) {
+               btrfs_orphan_del(NULL, inode);
+               goto no_delete;
+       }
+
        rsv = btrfs_alloc_block_rsv(root, BTRFS_BLOCK_RSV_TEMP);
        if (!rsv) {
                btrfs_orphan_del(NULL, inode);
@@ -3942,7 +4010,7 @@ void btrfs_evict_inode(struct inode *inode)
                        goto no_delete;
                }
 
-               trans = btrfs_start_transaction_lflush(root, 1);
+               trans = btrfs_join_transaction(root);
                if (IS_ERR(trans)) {
                        btrfs_orphan_del(NULL, inode);
                        btrfs_free_block_rsv(root, rsv);
@@ -3956,9 +4024,6 @@ void btrfs_evict_inode(struct inode *inode)
                        break;
 
                trans->block_rsv = &root->fs_info->trans_block_rsv;
-               ret = btrfs_update_inode(trans, root, inode);
-               BUG_ON(ret);
-
                btrfs_end_transaction(trans, root);
                trans = NULL;
                btrfs_btree_balance_dirty(root);
@@ -4312,16 +4377,7 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
        if (dentry->d_name.len > BTRFS_NAME_LEN)
                return ERR_PTR(-ENAMETOOLONG);
 
-       if (unlikely(d_need_lookup(dentry))) {
-               memcpy(&location, dentry->d_fsdata, sizeof(struct btrfs_key));
-               kfree(dentry->d_fsdata);
-               dentry->d_fsdata = NULL;
-               /* This thing is hashed, drop it for now */
-               d_drop(dentry);
-       } else {
-               ret = btrfs_inode_by_name(dir, dentry, &location);
-       }
-
+       ret = btrfs_inode_by_name(dir, dentry, &location);
        if (ret < 0)
                return ERR_PTR(ret);
 
@@ -4391,11 +4447,6 @@ static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
        struct dentry *ret;
 
        ret = d_splice_alias(btrfs_lookup_dentry(dir, dentry), dentry);
-       if (unlikely(d_need_lookup(dentry))) {
-               spin_lock(&dentry->d_lock);
-               dentry->d_flags &= ~DCACHE_NEED_LOOKUP;
-               spin_unlock(&dentry->d_lock);
-       }
        return ret;
 }
 
@@ -5021,12 +5072,6 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
                goto out_unlock;
        }
 
-       err = btrfs_update_inode(trans, root, inode);
-       if (err) {
-               drop_inode = 1;
-               goto out_unlock;
-       }
-
        /*
        * If the active LSM wants to access the inode during
        * d_instantiate it needs these. Smack checks to see
@@ -5964,6 +6009,8 @@ static struct extent_map *create_pinned_em(struct inode *inode, u64 start,
 
        em->start = start;
        em->orig_start = orig_start;
+       em->mod_start = start;
+       em->mod_len = len;
        em->len = len;
        em->block_len = block_len;
        em->block_start = block_start;
@@ -6005,16 +6052,15 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
        u64 len = bh_result->b_size;
        struct btrfs_trans_handle *trans;
        int unlock_bits = EXTENT_LOCKED;
-       int ret;
+       int ret = 0;
 
        if (create) {
-               ret = btrfs_delalloc_reserve_space(inode, len);
-               if (ret)
-                       return ret;
+               spin_lock(&BTRFS_I(inode)->lock);
+               BTRFS_I(inode)->outstanding_extents++;
+               spin_unlock(&BTRFS_I(inode)->lock);
                unlock_bits |= EXTENT_DELALLOC | EXTENT_DIRTY;
-       } else {
+       } else
                len = min_t(u64, len, root->sectorsize);
-       }
 
        lockstart = start;
        lockend = start + len - 1;
@@ -6026,14 +6072,6 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
        if (lock_extent_direct(inode, lockstart, lockend, &cached_state, create))
                return -ENOTBLK;
 
-       if (create) {
-               ret = set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
-                                    lockend, EXTENT_DELALLOC, NULL,
-                                    &cached_state, GFP_NOFS);
-               if (ret)
-                       goto unlock_err;
-       }
-
        em = btrfs_get_extent(inode, NULL, 0, start, len, 0);
        if (IS_ERR(em)) {
                ret = PTR_ERR(em);
@@ -6065,7 +6103,6 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
        if (!create && (em->block_start == EXTENT_MAP_HOLE ||
                        test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) {
                free_extent_map(em);
-               ret = 0;
                goto unlock_err;
        }
 
@@ -6163,6 +6200,11 @@ unlock:
                 */
                if (start + len > i_size_read(inode))
                        i_size_write(inode, start + len);
+
+               ret = set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
+                                    lockstart + len - 1, EXTENT_DELALLOC, NULL,
+                                    &cached_state, GFP_NOFS);
+               BUG_ON(ret);
        }
 
        /*
@@ -6171,24 +6213,9 @@ unlock:
         * aren't using if there is any left over space.
         */
        if (lockstart < lockend) {
-               if (create && len < lockend - lockstart) {
-                       clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
-                                        lockstart + len - 1,
-                                        unlock_bits | EXTENT_DEFRAG, 1, 0,
-                                        &cached_state, GFP_NOFS);
-                       /*
-                        * Beside unlock, we also need to cleanup reserved space
-                        * for the left range by attaching EXTENT_DO_ACCOUNTING.
-                        */
-                       clear_extent_bit(&BTRFS_I(inode)->io_tree,
-                                        lockstart + len, lockend,
-                                        unlock_bits | EXTENT_DO_ACCOUNTING |
-                                        EXTENT_DEFRAG, 1, 0, NULL, GFP_NOFS);
-               } else {
-                       clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
-                                        lockend, unlock_bits, 1, 0,
-                                        &cached_state, GFP_NOFS);
-               }
+               clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
+                                lockend, unlock_bits, 1, 0,
+                                &cached_state, GFP_NOFS);
        } else {
                free_extent_state(cached_state);
        }
@@ -6198,9 +6225,6 @@ unlock:
        return 0;
 
 unlock_err:
-       if (create)
-               unlock_bits |= EXTENT_DO_ACCOUNTING;
-
        clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend,
                         unlock_bits, 1, 0, &cached_state, GFP_NOFS);
        return ret;
@@ -6643,16 +6667,62 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
 {
        struct file *file = iocb->ki_filp;
        struct inode *inode = file->f_mapping->host;
+       size_t count = 0;
+       int flags = 0;
+       bool wakeup = true;
+       bool relock = false;
        ssize_t ret;
 
        if (check_direct_IO(BTRFS_I(inode)->root, rw, iocb, iov,
                            offset, nr_segs))
                return 0;
 
+       atomic_inc(&inode->i_dio_count);
+       smp_mb__after_atomic_inc();
+
+       if (rw & WRITE) {
+               count = iov_length(iov, nr_segs);
+               /*
+                * If the write DIO is beyond the EOF, we need update
+                * the isize, but it is protected by i_mutex. So we can
+                * not unlock the i_mutex at this case.
+                */
+               if (offset + count <= inode->i_size) {
+                       mutex_unlock(&inode->i_mutex);
+                       relock = true;
+               }
+               ret = btrfs_delalloc_reserve_space(inode, count);
+               if (ret)
+                       goto out;
+       } else if (unlikely(test_bit(BTRFS_INODE_READDIO_NEED_LOCK,
+                                    &BTRFS_I(inode)->runtime_flags))) {
+               inode_dio_done(inode);
+               flags = DIO_LOCKING | DIO_SKIP_HOLES;
+               wakeup = false;
+       }
+
        ret = __blockdev_direct_IO(rw, iocb, inode,
-                  BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev,
-                  iov, offset, nr_segs, btrfs_get_blocks_direct, NULL,
-                  btrfs_submit_direct, 0);
+                       BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev,
+                       iov, offset, nr_segs, btrfs_get_blocks_direct, NULL,
+                       btrfs_submit_direct, flags);
+       if (rw & WRITE) {
+               if (ret < 0 && ret != -EIOCBQUEUED)
+                       btrfs_delalloc_release_space(inode, count);
+               else if (ret > 0 && (size_t)ret < count) {
+                       spin_lock(&BTRFS_I(inode)->lock);
+                       BTRFS_I(inode)->outstanding_extents++;
+                       spin_unlock(&BTRFS_I(inode)->lock);
+                       btrfs_delalloc_release_space(inode,
+                                                    count - (size_t)ret);
+               }
+               btrfs_delalloc_release_metadata(inode, 0);
+       }
+out:
+       if (wakeup)
+               inode_dio_done(inode);
+       if (relock)
+               mutex_lock(&inode->i_mutex);
+
        return ret;
 }
 
@@ -6757,8 +6827,7 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset)
                return;
        }
        lock_extent_bits(tree, page_start, page_end, 0, &cached_state);
-       ordered = btrfs_lookup_ordered_extent(inode,
-                                          page_offset(page));
+       ordered = btrfs_lookup_ordered_extent(inode, page_offset(page));
        if (ordered) {
                /*
                 * IO on this page will never be started, so we need
@@ -7238,8 +7307,9 @@ int btrfs_drop_inode(struct inode *inode)
 {
        struct btrfs_root *root = BTRFS_I(inode)->root;
 
+       /* the snap/subvol tree is on deleting */
        if (btrfs_root_refs(&root->root_item) == 0 &&
-           !btrfs_is_free_space_inode(inode))
+           root != root->fs_info->tree_root)
                return 1;
        else
                return generic_drop_inode(inode);
@@ -7321,14 +7391,19 @@ fail:
 static int btrfs_getattr(struct vfsmount *mnt,
                         struct dentry *dentry, struct kstat *stat)
 {
+       u64 delalloc_bytes;
        struct inode *inode = dentry->d_inode;
        u32 blocksize = inode->i_sb->s_blocksize;
 
        generic_fillattr(inode, stat);
        stat->dev = BTRFS_I(inode)->root->anon_dev;
        stat->blksize = PAGE_CACHE_SIZE;
+
+       spin_lock(&BTRFS_I(inode)->lock);
+       delalloc_bytes = BTRFS_I(inode)->delalloc_bytes;
+       spin_unlock(&BTRFS_I(inode)->lock);
        stat->blocks = (ALIGN(inode_get_bytes(inode), blocksize) +
-               ALIGN(BTRFS_I(inode)->delalloc_bytes, blocksize)) >> 9;
+                       ALIGN(delalloc_bytes, blocksize)) >> 9;
        return 0;
 }
 
@@ -7605,7 +7680,7 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
 
        INIT_LIST_HEAD(&works);
        INIT_LIST_HEAD(&splice);
-again:
+
        spin_lock(&root->fs_info->delalloc_lock);
        list_splice_init(&root->fs_info->delalloc_inodes, &splice);
        while (!list_empty(&splice)) {
@@ -7615,8 +7690,11 @@ again:
                list_del_init(&binode->delalloc_inodes);
 
                inode = igrab(&binode->vfs_inode);
-               if (!inode)
+               if (!inode) {
+                       clear_bit(BTRFS_INODE_IN_DELALLOC_LIST,
+                                 &binode->runtime_flags);
                        continue;
+               }
 
                list_add_tail(&binode->delalloc_inodes,
                              &root->fs_info->delalloc_inodes);
@@ -7641,13 +7719,6 @@ again:
                btrfs_wait_and_free_delalloc_work(work);
        }
 
-       spin_lock(&root->fs_info->delalloc_lock);
-       if (!list_empty(&root->fs_info->delalloc_inodes)) {
-               spin_unlock(&root->fs_info->delalloc_lock);
-               goto again;
-       }
-       spin_unlock(&root->fs_info->delalloc_lock);
-
        /* the filemap_flush will queue IO into the worker threads, but
         * we have to make sure the IO is actually started and that
         * ordered extents get created before we return