Merge git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 25 Feb 2011 22:03:39 +0000 (14:03 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 25 Feb 2011 22:03:39 +0000 (14:03 -0800)
* git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable:
  Btrfs: fix fiemap bugs with delalloc
  Btrfs: set FMODE_EXCL in btrfs_device->mode
  Btrfs: make btrfs_rm_device() fail gracefully
  Btrfs: Avoid accessing unmapped kernel address
  Btrfs: Fix BTRFS_IOC_SUBVOL_SETFLAGS ioctl
  Btrfs: allow balance to explicitly allocate chunks as it relocates
  Btrfs: put ENOSPC debugging under a mount option

1  2 
fs/btrfs/ctree.h
fs/btrfs/extent-tree.c
fs/btrfs/extent_io.c
fs/btrfs/inode.c
fs/btrfs/super.c
fs/btrfs/volumes.c

diff --combined fs/btrfs/ctree.h
@@@ -1254,6 -1254,7 +1254,7 @@@ struct btrfs_root 
  #define BTRFS_MOUNT_SPACE_CACHE               (1 << 12)
  #define BTRFS_MOUNT_CLEAR_CACHE               (1 << 13)
  #define BTRFS_MOUNT_USER_SUBVOL_RM_ALLOWED (1 << 14)
+ #define BTRFS_MOUNT_ENOSPC_DEBUG       (1 << 15)
  
  #define btrfs_clear_opt(o, opt)               ((o) &= ~BTRFS_MOUNT_##opt)
  #define btrfs_set_opt(o, opt)         ((o) |= BTRFS_MOUNT_##opt)
@@@ -2218,6 -2219,8 +2219,8 @@@ int btrfs_error_unpin_extent_range(stru
                                   u64 start, u64 end);
  int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr,
                               u64 num_bytes);
+ int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans,
+                           struct btrfs_root *root, u64 type);
  
  /* ctree.c */
  int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
@@@ -2583,7 -2586,7 +2586,7 @@@ do {                                                            
  
  /* acl.c */
  #ifdef CONFIG_BTRFS_FS_POSIX_ACL
 -int btrfs_check_acl(struct inode *inode, int mask);
 +int btrfs_check_acl(struct inode *inode, int mask, unsigned int flags);
  #else
  #define btrfs_check_acl NULL
  #endif
diff --combined fs/btrfs/extent-tree.c
@@@ -1743,7 -1743,8 +1743,7 @@@ static int remove_extent_backref(struc
  static void btrfs_issue_discard(struct block_device *bdev,
                                u64 start, u64 len)
  {
 -      blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL,
 -                      BLKDEV_IFL_WAIT | BLKDEV_IFL_BARRIER);
 +      blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL, 0);
  }
  
  static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
@@@ -5376,7 -5377,7 +5376,7 @@@ again
                               num_bytes, data, 1);
                goto again;
        }
-       if (ret == -ENOSPC) {
+       if (ret == -ENOSPC && btrfs_test_opt(root, ENOSPC_DEBUG)) {
                struct btrfs_space_info *sinfo;
  
                sinfo = __find_space_info(root->fs_info, data);
        return ret;
  }
  
+ int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans,
+                           struct btrfs_root *root, u64 type)
+ {
+       u64 alloc_flags = get_alloc_profile(root, type);
+       return do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1);
+ }
  /*
   * helper to account the unused space of all the readonly block group in the
   * list. takes mirrors into account.
diff --combined fs/btrfs/extent_io.c
@@@ -1433,12 -1433,13 +1433,13 @@@ int extent_clear_unlock_delalloc(struc
   */
  u64 count_range_bits(struct extent_io_tree *tree,
                     u64 *start, u64 search_end, u64 max_bytes,
-                    unsigned long bits)
+                    unsigned long bits, int contig)
  {
        struct rb_node *node;
        struct extent_state *state;
        u64 cur_start = *start;
        u64 total_bytes = 0;
+       u64 last = 0;
        int found = 0;
  
        if (search_end <= cur_start) {
                state = rb_entry(node, struct extent_state, rb_node);
                if (state->start > search_end)
                        break;
-               if (state->end >= cur_start && (state->state & bits)) {
+               if (contig && found && state->start > last + 1)
+                       break;
+               if (state->end >= cur_start && (state->state & bits) == bits) {
                        total_bytes += min(search_end, state->end) + 1 -
                                       max(cur_start, state->start);
                        if (total_bytes >= max_bytes)
                                *start = state->start;
                                found = 1;
                        }
+                       last = state->end;
+               } else if (contig && found) {
+                       break;
                }
                node = rb_next(node);
                if (!node)
        return sector;
  }
  
+ /*
+  * helper function for fiemap, which doesn't want to see any holes.
+  * This maps until we find something past 'last'
+  */
+ static struct extent_map *get_extent_skip_holes(struct inode *inode,
+                                               u64 offset,
+                                               u64 last,
+                                               get_extent_t *get_extent)
+ {
+       u64 sectorsize = BTRFS_I(inode)->root->sectorsize;
+       struct extent_map *em;
+       u64 len;
+       if (offset >= last)
+               return NULL;
+       while(1) {
+               len = last - offset;
+               if (len == 0)
+                       break;
+               len = (len + sectorsize - 1) & ~(sectorsize - 1);
+               em = get_extent(inode, NULL, 0, offset, len, 0);
+               if (!em || IS_ERR(em))
+                       return em;
+               /* if this isn't a hole return it */
+               if (!test_bit(EXTENT_FLAG_VACANCY, &em->flags) &&
+                   em->block_start != EXTENT_MAP_HOLE) {
+                       return em;
+               }
+               /* this is a hole, advance to the next extent */
+               offset = extent_map_end(em);
+               free_extent_map(em);
+               if (offset >= last)
+                       break;
+       }
+       return NULL;
+ }
  int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
                __u64 start, __u64 len, get_extent_t *get_extent)
  {
        u32 flags = 0;
        u32 found_type;
        u64 last;
+       u64 last_for_get_extent = 0;
        u64 disko = 0;
+       u64 isize = i_size_read(inode);
        struct btrfs_key found_key;
        struct extent_map *em = NULL;
        struct extent_state *cached_state = NULL;
        struct btrfs_path *path;
        struct btrfs_file_extent_item *item;
        int end = 0;
-       u64 em_start = 0, em_len = 0;
+       u64 em_start = 0;
+       u64 em_len = 0;
+       u64 em_end = 0;
        unsigned long emflags;
-       int hole = 0;
  
        if (len == 0)
                return -EINVAL;
                return -ENOMEM;
        path->leave_spinning = 1;
  
+       /*
+        * lookup the last file extent.  We're not using i_size here
+        * because there might be preallocation past i_size
+        */
        ret = btrfs_lookup_file_extent(NULL, BTRFS_I(inode)->root,
                                       path, inode->i_ino, -1, 0);
        if (ret < 0) {
        btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]);
        found_type = btrfs_key_type(&found_key);
  
-       /* No extents, just return */
+       /* No extents, but there might be delalloc bits */
        if (found_key.objectid != inode->i_ino ||
            found_type != BTRFS_EXTENT_DATA_KEY) {
-               btrfs_free_path(path);
-               return 0;
+               /* have to trust i_size as the end */
+               last = (u64)-1;
+               last_for_get_extent = isize;
+       } else {
+               /*
+                * remember the start of the last extent.  There are a
+                * bunch of different factors that go into the length of the
+                * extent, so its much less complex to remember where it started
+                */
+               last = found_key.offset;
+               last_for_get_extent = last + 1;
        }
-       last = found_key.offset;
        btrfs_free_path(path);
  
+       /*
+        * we might have some extents allocated but more delalloc past those
+        * extents.  so, we trust isize unless the start of the last extent is
+        * beyond isize
+        */
+       if (last < isize) {
+               last = (u64)-1;
+               last_for_get_extent = isize;
+       }
        lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len, 0,
                         &cached_state, GFP_NOFS);
-       em = get_extent(inode, NULL, 0, off, max - off, 0);
+       em = get_extent_skip_holes(inode, off, last_for_get_extent,
+                                  get_extent);
        if (!em)
                goto out;
        if (IS_ERR(em)) {
        }
  
        while (!end) {
-               hole = 0;
-               off = em->start + em->len;
+               off = extent_map_end(em);
                if (off >= max)
                        end = 1;
  
-               if (em->block_start == EXTENT_MAP_HOLE) {
-                       hole = 1;
-                       goto next;
-               }
                em_start = em->start;
                em_len = em->len;
+               em_end = extent_map_end(em);
+               emflags = em->flags;
                disko = 0;
                flags = 0;
  
                if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
                        flags |= FIEMAP_EXTENT_ENCODED;
  
- next:
-               emflags = em->flags;
                free_extent_map(em);
                em = NULL;
-               if (!end) {
-                       em = get_extent(inode, NULL, 0, off, max - off, 0);
-                       if (!em)
-                               goto out;
-                       if (IS_ERR(em)) {
-                               ret = PTR_ERR(em);
-                               goto out;
-                       }
-                       emflags = em->flags;
-               }
-               if (test_bit(EXTENT_FLAG_VACANCY, &emflags)) {
+               if ((em_start >= last) || em_len == (u64)-1 ||
+                  (last == (u64)-1 && isize <= em_end)) {
                        flags |= FIEMAP_EXTENT_LAST;
                        end = 1;
                }
  
-               if (em_start == last) {
+               /* now scan forward to see if this is really the last extent. */
+               em = get_extent_skip_holes(inode, off, last_for_get_extent,
+                                          get_extent);
+               if (IS_ERR(em)) {
+                       ret = PTR_ERR(em);
+                       goto out;
+               }
+               if (!em) {
                        flags |= FIEMAP_EXTENT_LAST;
                        end = 1;
                }
-               if (!hole) {
-                       ret = fiemap_fill_next_extent(fieinfo, em_start, disko,
-                                               em_len, flags);
-                       if (ret)
-                               goto out_free;
-               }
+               ret = fiemap_fill_next_extent(fieinfo, em_start, disko,
+                                             em_len, flags);
+               if (ret)
+                       goto out_free;
        }
  out_free:
        free_extent_map(em);
@@@ -3092,6 -3152,7 +3152,6 @@@ static struct extent_buffer *__alloc_ex
        eb->len = len;
        spin_lock_init(&eb->lock);
        init_waitqueue_head(&eb->lock_wq);
 -      INIT_RCU_HEAD(&eb->rcu_head);
  
  #if LEAK_DEBUG
        spin_lock_irqsave(&leak_lock, flags);
diff --combined fs/btrfs/inode.c
@@@ -1913,7 -1913,7 +1913,7 @@@ static int btrfs_clean_io_failures(stru
  
        private = 0;
        if (count_range_bits(&BTRFS_I(inode)->io_failure_tree, &private,
-                            (u64)-1, 1, EXTENT_DIRTY)) {
+                            (u64)-1, 1, EXTENT_DIRTY, 0)) {
                ret = get_state_private(&BTRFS_I(inode)->io_failure_tree,
                                        start, &private_failure);
                if (ret == 0) {
@@@ -3905,7 -3905,7 +3905,7 @@@ again
        p = &root->inode_tree.rb_node;
        parent = NULL;
  
 -      if (hlist_unhashed(&inode->i_hash))
 +      if (inode_unhashed(inode))
                return;
  
        spin_lock(&root->inode_lock);
@@@ -4112,6 -4112,8 +4112,6 @@@ struct inode *btrfs_lookup_dentry(struc
        int index;
        int ret;
  
 -      dentry->d_op = &btrfs_dentry_operations;
 -
        if (dentry->d_name.len > BTRFS_NAME_LEN)
                return ERR_PTR(-ENAMETOOLONG);
  
        return inode;
  }
  
 -static int btrfs_dentry_delete(struct dentry *dentry)
 +static int btrfs_dentry_delete(const struct dentry *dentry)
  {
        struct btrfs_root *root;
  
@@@ -4831,7 -4833,7 +4831,7 @@@ static int btrfs_link(struct dentry *ol
        }
  
        btrfs_set_trans_block_group(trans, dir);
 -      atomic_inc(&inode->i_count);
 +      ihold(inode);
  
        err = btrfs_add_nondir(trans, dir, dentry, inode, 1, index);
  
        return em;
  }
  
+ struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *page,
+                                          size_t pg_offset, u64 start, u64 len,
+                                          int create)
+ {
+       struct extent_map *em;
+       struct extent_map *hole_em = NULL;
+       u64 range_start = start;
+       u64 end;
+       u64 found;
+       u64 found_end;
+       int err = 0;
+       em = btrfs_get_extent(inode, page, pg_offset, start, len, create);
+       if (IS_ERR(em))
+               return em;
+       if (em) {
+               /*
+                * if our em maps to a hole, there might
+                * actually be delalloc bytes behind it
+                */
+               if (em->block_start != EXTENT_MAP_HOLE)
+                       return em;
+               else
+                       hole_em = em;
+       }
+       /* check to see if we've wrapped (len == -1 or similar) */
+       end = start + len;
+       if (end < start)
+               end = (u64)-1;
+       else
+               end -= 1;
+       em = NULL;
+       /* ok, we didn't find anything, lets look for delalloc */
+       found = count_range_bits(&BTRFS_I(inode)->io_tree, &range_start,
+                                end, len, EXTENT_DELALLOC, 1);
+       found_end = range_start + found;
+       if (found_end < range_start)
+               found_end = (u64)-1;
+       /*
+        * we didn't find anything useful, return
+        * the original results from get_extent()
+        */
+       if (range_start > end || found_end <= start) {
+               em = hole_em;
+               hole_em = NULL;
+               goto out;
+       }
+       /* adjust the range_start to make sure it doesn't
+        * go backwards from the start they passed in
+        */
+       range_start = max(start,range_start);
+       found = found_end - range_start;
+       if (found > 0) {
+               u64 hole_start = start;
+               u64 hole_len = len;
+               em = alloc_extent_map(GFP_NOFS);
+               if (!em) {
+                       err = -ENOMEM;
+                       goto out;
+               }
+               /*
+                * when btrfs_get_extent can't find anything it
+                * returns one huge hole
+                *
+                * make sure what it found really fits our range, and
+                * adjust to make sure it is based on the start from
+                * the caller
+                */
+               if (hole_em) {
+                       u64 calc_end = extent_map_end(hole_em);
+                       if (calc_end <= start || (hole_em->start > end)) {
+                               free_extent_map(hole_em);
+                               hole_em = NULL;
+                       } else {
+                               hole_start = max(hole_em->start, start);
+                               hole_len = calc_end - hole_start;
+                       }
+               }
+               em->bdev = NULL;
+               if (hole_em && range_start > hole_start) {
+                       /* our hole starts before our delalloc, so we
+                        * have to return just the parts of the hole
+                        * that go until  the delalloc starts
+                        */
+                       em->len = min(hole_len,
+                                     range_start - hole_start);
+                       em->start = hole_start;
+                       em->orig_start = hole_start;
+                       /*
+                        * don't adjust block start at all,
+                        * it is fixed at EXTENT_MAP_HOLE
+                        */
+                       em->block_start = hole_em->block_start;
+                       em->block_len = hole_len;
+               } else {
+                       em->start = range_start;
+                       em->len = found;
+                       em->orig_start = range_start;
+                       em->block_start = EXTENT_MAP_DELALLOC;
+                       em->block_len = found;
+               }
+       } else if (hole_em) {
+               return hole_em;
+       }
+ out:
+       free_extent_map(hole_em);
+       if (err) {
+               free_extent_map(em);
+               return ERR_PTR(err);
+       }
+       return em;
+ }
  static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
                                                  u64 start, u64 len)
  {
@@@ -6102,7 -6226,7 +6224,7 @@@ out
  static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
                __u64 start, __u64 len)
  {
-       return extent_fiemap(inode, fieinfo, start, len, btrfs_get_extent);
+       return extent_fiemap(inode, fieinfo, start, len, btrfs_get_extent_fiemap);
  }
  
  int btrfs_readpage(struct file *file, struct page *page)
@@@ -6531,13 -6655,6 +6653,13 @@@ struct inode *btrfs_alloc_inode(struct 
        return inode;
  }
  
 +static void btrfs_i_callback(struct rcu_head *head)
 +{
 +      struct inode *inode = container_of(head, struct inode, i_rcu);
 +      INIT_LIST_HEAD(&inode->i_dentry);
 +      kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
 +}
 +
  void btrfs_destroy_inode(struct inode *inode)
  {
        struct btrfs_ordered_extent *ordered;
        inode_tree_del(inode);
        btrfs_drop_extent_cache(inode, 0, (u64)-1, 0);
  free:
 -      kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
 +      call_rcu(&inode->i_rcu, btrfs_i_callback);
  }
  
  int btrfs_drop_inode(struct inode *inode)
@@@ -7136,12 -7253,118 +7258,12 @@@ int btrfs_prealloc_file_range_trans(str
                                           min_size, actual_len, alloc_hint, trans);
  }
  
 -static long btrfs_fallocate(struct inode *inode, int mode,
 -                          loff_t offset, loff_t len)
 -{
 -      struct extent_state *cached_state = NULL;
 -      u64 cur_offset;
 -      u64 last_byte;
 -      u64 alloc_start;
 -      u64 alloc_end;
 -      u64 alloc_hint = 0;
 -      u64 locked_end;
 -      u64 mask = BTRFS_I(inode)->root->sectorsize - 1;
 -      struct extent_map *em;
 -      int ret;
 -
 -      alloc_start = offset & ~mask;
 -      alloc_end =  (offset + len + mask) & ~mask;
 -
 -      /*
 -       * wait for ordered IO before we have any locks.  We'll loop again
 -       * below with the locks held.
 -       */
 -      btrfs_wait_ordered_range(inode, alloc_start, alloc_end - alloc_start);
 -
 -      mutex_lock(&inode->i_mutex);
 -      ret = inode_newsize_ok(inode, alloc_end);
 -      if (ret)
 -              goto out;
 -
 -      if (alloc_start > inode->i_size) {
 -              ret = btrfs_cont_expand(inode, alloc_start);
 -              if (ret)
 -                      goto out;
 -      }
 -
 -      ret = btrfs_check_data_free_space(inode, alloc_end - alloc_start);
 -      if (ret)
 -              goto out;
 -
 -      locked_end = alloc_end - 1;
 -      while (1) {
 -              struct btrfs_ordered_extent *ordered;
 -
 -              /* the extent lock is ordered inside the running
 -               * transaction
 -               */
 -              lock_extent_bits(&BTRFS_I(inode)->io_tree, alloc_start,
 -                               locked_end, 0, &cached_state, GFP_NOFS);
 -              ordered = btrfs_lookup_first_ordered_extent(inode,
 -                                                          alloc_end - 1);
 -              if (ordered &&
 -                  ordered->file_offset + ordered->len > alloc_start &&
 -                  ordered->file_offset < alloc_end) {
 -                      btrfs_put_ordered_extent(ordered);
 -                      unlock_extent_cached(&BTRFS_I(inode)->io_tree,
 -                                           alloc_start, locked_end,
 -                                           &cached_state, GFP_NOFS);
 -                      /*
 -                       * we can't wait on the range with the transaction
 -                       * running or with the extent lock held
 -                       */
 -                      btrfs_wait_ordered_range(inode, alloc_start,
 -                                               alloc_end - alloc_start);
 -              } else {
 -                      if (ordered)
 -                              btrfs_put_ordered_extent(ordered);
 -                      break;
 -              }
 -      }
 -
 -      cur_offset = alloc_start;
 -      while (1) {
 -              em = btrfs_get_extent(inode, NULL, 0, cur_offset,
 -                                    alloc_end - cur_offset, 0);
 -              BUG_ON(IS_ERR(em) || !em);
 -              last_byte = min(extent_map_end(em), alloc_end);
 -              last_byte = (last_byte + mask) & ~mask;
 -              if (em->block_start == EXTENT_MAP_HOLE ||
 -                  (cur_offset >= inode->i_size &&
 -                   !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) {
 -                      ret = btrfs_prealloc_file_range(inode, mode, cur_offset,
 -                                                      last_byte - cur_offset,
 -                                                      1 << inode->i_blkbits,
 -                                                      offset + len,
 -                                                      &alloc_hint);
 -                      if (ret < 0) {
 -                              free_extent_map(em);
 -                              break;
 -                      }
 -              }
 -              free_extent_map(em);
 -
 -              cur_offset = last_byte;
 -              if (cur_offset >= alloc_end) {
 -                      ret = 0;
 -                      break;
 -              }
 -      }
 -      unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end,
 -                           &cached_state, GFP_NOFS);
 -
 -      btrfs_free_reserved_data_space(inode, alloc_end - alloc_start);
 -out:
 -      mutex_unlock(&inode->i_mutex);
 -      return ret;
 -}
 -
  static int btrfs_set_page_dirty(struct page *page)
  {
        return __set_page_dirty_nobuffers(page);
  }
  
 -static int btrfs_permission(struct inode *inode, int mask)
 +static int btrfs_permission(struct inode *inode, int mask, unsigned int flags)
  {
        struct btrfs_root *root = BTRFS_I(inode)->root;
  
                return -EROFS;
        if ((BTRFS_I(inode)->flags & BTRFS_INODE_READONLY) && (mask & MAY_WRITE))
                return -EACCES;
 -      return generic_permission(inode, mask, btrfs_check_acl);
 +      return generic_permission(inode, mask, flags, btrfs_check_acl);
  }
  
  static const struct inode_operations btrfs_dir_inode_operations = {
@@@ -7242,6 -7465,7 +7364,6 @@@ static const struct inode_operations bt
        .listxattr      = btrfs_listxattr,
        .removexattr    = btrfs_removexattr,
        .permission     = btrfs_permission,
 -      .fallocate      = btrfs_fallocate,
        .fiemap         = btrfs_fiemap,
  };
  static const struct inode_operations btrfs_special_inode_operations = {
diff --combined fs/btrfs/super.c
@@@ -155,7 -155,8 +155,8 @@@ enum 
        Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, Opt_compress,
        Opt_compress_type, Opt_compress_force, Opt_compress_force_type,
        Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard,
-       Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed, Opt_err,
+       Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed,
+       Opt_enospc_debug, Opt_err,
  };
  
  static match_table_t tokens = {
        {Opt_space_cache, "space_cache"},
        {Opt_clear_cache, "clear_cache"},
        {Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"},
+       {Opt_enospc_debug, "enospc_debug"},
        {Opt_err, NULL},
  };
  
@@@ -358,6 -360,9 +360,9 @@@ int btrfs_parse_options(struct btrfs_ro
                case Opt_user_subvol_rm_allowed:
                        btrfs_set_opt(info->mount_opt, USER_SUBVOL_RM_ALLOWED);
                        break;
+               case Opt_enospc_debug:
+                       btrfs_set_opt(info->mount_opt, ENOSPC_DEBUG);
+                       break;
                case Opt_err:
                        printk(KERN_INFO "btrfs: unrecognized mount option "
                               "'%s'\n", p);
@@@ -567,7 -572,6 +572,7 @@@ static int btrfs_fill_super(struct supe
        sb->s_maxbytes = MAX_LFS_FILESIZE;
        sb->s_magic = BTRFS_SUPER_MAGIC;
        sb->s_op = &btrfs_super_ops;
 +      sb->s_d_op = &btrfs_dentry_operations;
        sb->s_export_op = &btrfs_export_ops;
        sb->s_xattr = btrfs_xattr_handlers;
        sb->s_time_gran = 1;
@@@ -699,8 -703,8 +704,8 @@@ static int btrfs_set_super(struct super
   * Note:  This is based on get_sb_bdev from fs/super.c with a few additions
   *      for multiple device setup.  Make sure to keep it in sync.
   */
 -static int btrfs_get_sb(struct file_system_type *fs_type, int flags,
 -              const char *dev_name, void *data, struct vfsmount *mnt)
 +static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
 +              const char *dev_name, void *data)
  {
        struct block_device *bdev = NULL;
        struct super_block *s;
                                          &subvol_name, &subvol_objectid,
                                          &fs_devices);
        if (error)
 -              return error;
 +              return ERR_PTR(error);
  
        error = btrfs_scan_one_device(dev_name, mode, fs_type, &fs_devices);
        if (error)
                root = new_root;
        }
  
 -      mnt->mnt_sb = s;
 -      mnt->mnt_root = root;
 -
        kfree(subvol_name);
 -      return 0;
 +      return root;
  
  error_s:
        error = PTR_ERR(s);
@@@ -824,7 -831,7 +829,7 @@@ error_close_devices
        kfree(tree_root);
  error_free_subvol_name:
        kfree(subvol_name);
 -      return error;
 +      return ERR_PTR(error);
  }
  
  static int btrfs_remount(struct super_block *sb, int *flags, char *data)
@@@ -1041,7 -1048,7 +1046,7 @@@ static int btrfs_statfs(struct dentry *
  static struct file_system_type btrfs_fs_type = {
        .owner          = THIS_MODULE,
        .name           = "btrfs",
 -      .get_sb         = btrfs_get_sb,
 +      .mount          = btrfs_mount,
        .kill_sb        = kill_anon_super,
        .fs_flags       = FS_REQUIRES_DEV,
  };
@@@ -1110,7 -1117,6 +1115,7 @@@ static const struct file_operations btr
        .unlocked_ioctl  = btrfs_control_ioctl,
        .compat_ioctl = btrfs_control_ioctl,
        .owner   = THIS_MODULE,
 +      .llseek = noop_llseek,
  };
  
  static struct miscdevice btrfs_misc = {
diff --combined fs/btrfs/volumes.c
@@@ -399,6 -399,7 +399,6 @@@ static noinline int device_list_add(con
                device->work.func = pending_bios_fn;
                memcpy(device->uuid, disk_super->dev_item.uuid,
                       BTRFS_UUID_SIZE);
 -              device->barriers = 1;
                spin_lock_init(&device->io_lock);
                device->name = kstrdup(path, GFP_NOFS);
                if (!device->name) {
@@@ -466,6 -467,7 +466,6 @@@ static struct btrfs_fs_devices *clone_f
                device->devid = orig_dev->devid;
                device->work.func = pending_bios_fn;
                memcpy(device->uuid, orig_dev->uuid, sizeof(device->uuid));
 -              device->barriers = 1;
                spin_lock_init(&device->io_lock);
                INIT_LIST_HEAD(&device->dev_list);
                INIT_LIST_HEAD(&device->dev_alloc_list);
@@@ -494,7 -496,7 +494,7 @@@ again
                        continue;
  
                if (device->bdev) {
 -                      close_bdev_exclusive(device->bdev, device->mode);
 +                      blkdev_put(device->bdev, device->mode);
                        device->bdev = NULL;
                        fs_devices->open_devices--;
                }
@@@ -528,7 -530,7 +528,7 @@@ static int __btrfs_close_devices(struc
  
        list_for_each_entry(device, &fs_devices->devices, dev_list) {
                if (device->bdev) {
 -                      close_bdev_exclusive(device->bdev, device->mode);
 +                      blkdev_put(device->bdev, device->mode);
                        fs_devices->open_devices--;
                }
                if (device->writeable) {
@@@ -585,15 -587,13 +585,15 @@@ static int __btrfs_open_devices(struct 
        int seeding = 1;
        int ret = 0;
  
 +      flags |= FMODE_EXCL;
 +
        list_for_each_entry(device, head, dev_list) {
                if (device->bdev)
                        continue;
                if (!device->name)
                        continue;
  
 -              bdev = open_bdev_exclusive(device->name, flags, holder);
 +              bdev = blkdev_get_by_path(device->name, flags, holder);
                if (IS_ERR(bdev)) {
                        printk(KERN_INFO "open %s failed\n", device->name);
                        goto error;
  error_brelse:
                brelse(bh);
  error_close:
 -              close_bdev_exclusive(bdev, FMODE_READ);
 +              blkdev_put(bdev, flags);
  error:
                continue;
        }
@@@ -693,8 -693,7 +693,8 @@@ int btrfs_scan_one_device(const char *p
  
        mutex_lock(&uuid_mutex);
  
 -      bdev = open_bdev_exclusive(path, flags, holder);
 +      flags |= FMODE_EXCL;
 +      bdev = blkdev_get_by_path(path, flags, holder);
  
        if (IS_ERR(bdev)) {
                ret = PTR_ERR(bdev);
  
        brelse(bh);
  error_close:
 -      close_bdev_exclusive(bdev, flags);
 +      blkdev_put(bdev, flags);
  error:
        mutex_unlock(&uuid_mutex);
        return ret;
@@@ -1300,8 -1299,8 +1300,8 @@@ int btrfs_rm_device(struct btrfs_root *
                        goto out;
                }
        } else {
 -              bdev = open_bdev_exclusive(device_path, FMODE_READ,
 -                                    root->fs_info->bdev_holder);
 +              bdev = blkdev_get_by_path(device_path, FMODE_READ | FMODE_EXCL,
 +                                        root->fs_info->bdev_holder);
                if (IS_ERR(bdev)) {
                        ret = PTR_ERR(bdev);
                        goto out;
  
        ret = btrfs_shrink_device(device, 0);
        if (ret)
-               goto error_brelse;
+               goto error_undo;
  
        ret = btrfs_rm_dev_item(root->fs_info->chunk_root, device);
        if (ret)
-               goto error_brelse;
+               goto error_undo;
  
        device->in_fs_metadata = 0;
  
                root->fs_info->fs_devices->latest_bdev = next_device->bdev;
  
        if (device->bdev) {
 -              close_bdev_exclusive(device->bdev, device->mode);
 +              blkdev_put(device->bdev, device->mode);
                device->bdev = NULL;
                device->fs_devices->open_devices--;
        }
@@@ -1411,11 -1410,18 +1411,18 @@@ error_brelse
        brelse(bh);
  error_close:
        if (bdev)
 -              close_bdev_exclusive(bdev, FMODE_READ);
 +              blkdev_put(bdev, FMODE_READ | FMODE_EXCL);
  out:
        mutex_unlock(&root->fs_info->volume_mutex);
        mutex_unlock(&uuid_mutex);
        return ret;
+ error_undo:
+       if (device->writeable) {
+               list_add(&device->dev_alloc_list,
+                        &root->fs_info->fs_devices->alloc_list);
+               root->fs_info->fs_devices->rw_devices++;
+       }
+       goto error_brelse;
  }
  
  /*
@@@ -1563,8 -1569,7 +1570,8 @@@ int btrfs_init_new_device(struct btrfs_
        if ((sb->s_flags & MS_RDONLY) && !root->fs_info->fs_devices->seeding)
                return -EINVAL;
  
 -      bdev = open_bdev_exclusive(device_path, 0, root->fs_info->bdev_holder);
 +      bdev = blkdev_get_by_path(device_path, FMODE_EXCL,
 +                                root->fs_info->bdev_holder);
        if (IS_ERR(bdev))
                return PTR_ERR(bdev);
  
  
        lock_chunks(root);
  
 -      device->barriers = 1;
        device->writeable = 1;
        device->work.func = pending_bios_fn;
        generate_random_uuid(device->uuid);
        device->dev_root = root->fs_info->dev_root;
        device->bdev = bdev;
        device->in_fs_metadata = 1;
-       device->mode = 0;
+       device->mode = FMODE_EXCL;
        set_blocksize(device->bdev, 4096);
  
        if (seeding_dev) {
@@@ -1698,7 -1704,7 +1705,7 @@@ out
        mutex_unlock(&root->fs_info->volume_mutex);
        return ret;
  error:
 -      close_bdev_exclusive(bdev, 0);
 +      blkdev_put(bdev, FMODE_EXCL);
        if (seeding_dev) {
                mutex_unlock(&uuid_mutex);
                up_write(&sb->s_umount);
@@@ -3396,6 -3402,7 +3403,6 @@@ static struct btrfs_device *add_missing
                return NULL;
        list_add(&device->dev_list,
                 &fs_devices->devices);
 -      device->barriers = 1;
        device->dev_root = root->fs_info->dev_root;
        device->devid = devid;
        device->work.func = pending_bios_fn;