Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux...

author Linus Torvalds <torvalds@linux-foundation.org>

Fri, 6 Mar 2015 21:52:54 +0000 (13:52 -0800)

committer Linus Torvalds <torvalds@linux-foundation.org>

Fri, 6 Mar 2015 21:52:54 +0000 (13:52 -0800)
author Linus Torvalds <torvalds@linux-foundation.org>
Fri, 6 Mar 2015 21:52:54 +0000 (13:52 -0800)
committer Linus Torvalds <torvalds@linux-foundation.org>
Fri, 6 Mar 2015 21:52:54 +0000 (13:52 -0800)
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c

index 9936421..6d67f32 100644 (file)
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -1645,14 +1645,14 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
  
         parent_nritems = btrfs_header_nritems(parent);
         blocksize = root->nodesize;
-       end_slot = parent_nritems;
+       end_slot = parent_nritems - 1;
  
-       if (parent_nritems == 1)
+       if (parent_nritems <= 1)
                 return 0;
  
         btrfs_set_lock_blocking(parent);
  
-       for (i = start_slot; i < end_slot; i++) {
+       for (i = start_slot; i <= end_slot; i++) {
                 int close = 1;
  
                 btrfs_node_key(parent, &disk_key, i);
@@ -1669,7 +1669,7 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
                         other = btrfs_node_blockptr(parent, i - 1);
                         close = close_blocks(blocknr, other, blocksize);
                 }
-               if (!close && i < end_slot - 2) {
+               if (!close && i < end_slot) {
                         other = btrfs_node_blockptr(parent, i + 1);
                         close = close_blocks(blocknr, other, blocksize);
                 }
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c

index 571f402..6f08045 100644 (file)
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3208,6 +3208,8 @@ static int cache_save_setup(struct btrfs_block_group_cache *block_group,
                 return 0;
         }
  
+       if (trans->aborted)
+               return 0;
  again:
         inode = lookup_free_space_inode(root, block_group, path);
         if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) {
@@ -3243,6 +3245,20 @@ again:
          */
         BTRFS_I(inode)->generation = 0;
         ret = btrfs_update_inode(trans, root, inode);
+       if (ret) {
+               /*
+                * So theoretically we could recover from this, simply set the
+                * super cache generation to 0 so we know to invalidate the
+                * cache, but then we'd have to keep track of the block groups
+                * that fail this way so we know we _have_ to reset this cache
+                * before the next commit or risk reading stale cache.  So to
+                * limit our exposure to horrible edge cases lets just abort the
+                * transaction, this only happens in really bad situations
+                * anyway.
+                */
+               btrfs_abort_transaction(trans, root, ret);
+               goto out_put;
+       }
         WARN_ON(ret);
  
         if (i_size_read(inode) > 0) {
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c

index b78bbba..30982bb 100644 (file)
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1811,22 +1811,10 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
         mutex_unlock(&inode->i_mutex);
  
         /*
-        * we want to make sure fsync finds this change
-        * but we haven't joined a transaction running right now.
-        *
-        * Later on, someone is sure to update the inode and get the
-        * real transid recorded.
-        *
-        * We set last_trans now to the fs_info generation + 1,
-        * this will either be one more than the running transaction
-        * or the generation used for the next transaction if there isn't
-        * one running right now.
-        *
          * We also have to set last_sub_trans to the current log transid,
          * otherwise subsequent syncs to a file that's been synced in this
          * transaction will appear to have already occured.
          */
-       BTRFS_I(inode)->last_trans = root->fs_info->generation + 1;
         BTRFS_I(inode)->last_sub_trans = root->log_transid;
         if (num_written > 0) {
                 err = generic_write_sync(file, pos, num_written);
@@ -1959,25 +1947,37 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
         atomic_inc(&root->log_batch);
  
         /*
-        * check the transaction that last modified this inode
-        * and see if its already been committed
-        */
-       if (!BTRFS_I(inode)->last_trans) {
-               mutex_unlock(&inode->i_mutex);
-               goto out;
-       }
-
-       /*
-        * if the last transaction that changed this file was before
-        * the current transaction, we can bail out now without any
-        * syncing
+        * If the last transaction that changed this file was before the current
+        * transaction and we have the full sync flag set in our inode, we can
+        * bail out now without any syncing.
+        *
+        * Note that we can't bail out if the full sync flag isn't set. This is
+        * because when the full sync flag is set we start all ordered extents
+        * and wait for them to fully complete - when they complete they update
+        * the inode's last_trans field through:
+        *
+        *     btrfs_finish_ordered_io() ->
+        *         btrfs_update_inode_fallback() ->
+        *             btrfs_update_inode() ->
+        *                 btrfs_set_inode_last_trans()
+        *
+        * So we are sure that last_trans is up to date and can do this check to
+        * bail out safely. For the fast path, when the full sync flag is not
+        * set in our inode, we can not do it because we start only our ordered
+        * extents and don't wait for them to complete (that is when
+        * btrfs_finish_ordered_io runs), so here at this point their last_trans
+        * value might be less than or equals to fs_info->last_trans_committed,
+        * and setting a speculative last_trans for an inode when a buffered
+        * write is made (such as fs_info->generation + 1 for example) would not
+        * be reliable since after setting the value and before fsync is called
+        * any number of transactions can start and commit (transaction kthread
+        * commits the current transaction periodically), and a transaction
+        * commit does not start nor waits for ordered extents to complete.
          */
         smp_mb();
         if (btrfs_inode_in_log(inode, root->fs_info->generation) ||
-           BTRFS_I(inode)->last_trans <=
-           root->fs_info->last_trans_committed) {
-               BTRFS_I(inode)->last_trans = 0;
-
+           (full_sync && BTRFS_I(inode)->last_trans <=
+            root->fs_info->last_trans_committed)) {
                 /*
                  * We'v had everything committed since the last time we were
                  * modified so clear this flag in case it was set for whatever
@@ -2275,6 +2275,8 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
         bool same_page;
         bool no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
         u64 ino_size;
+       bool truncated_page = false;
+       bool updated_inode = false;
  
         ret = btrfs_wait_ordered_range(inode, offset, len);
         if (ret)
@@ -2306,13 +2308,18 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
          * entire page.
          */
         if (same_page && len < PAGE_CACHE_SIZE) {
-               if (offset < ino_size)
+               if (offset < ino_size) {
+                       truncated_page = true;
                         ret = btrfs_truncate_page(inode, offset, len, 0);
+               } else {
+                       ret = 0;
+               }
                 goto out_only_mutex;
         }
  
         /* zero back part of the first page */
         if (offset < ino_size) {
+               truncated_page = true;
                 ret = btrfs_truncate_page(inode, offset, 0, 0);
                 if (ret) {
                         mutex_unlock(&inode->i_mutex);
@@ -2348,6 +2355,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
                 if (!ret) {
                         /* zero the front end of the last page */
                         if (tail_start + tail_len < ino_size) {
+                               truncated_page = true;
                                 ret = btrfs_truncate_page(inode,
                                                 tail_start + tail_len, 0, 1);
                                 if (ret)
@@ -2357,8 +2365,8 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
         }
  
         if (lockend < lockstart) {
-               mutex_unlock(&inode->i_mutex);
-               return 0;
+               ret = 0;
+               goto out_only_mutex;
         }
  
         while (1) {
@@ -2506,6 +2514,7 @@ out_trans:
  
         trans->block_rsv = &root->fs_info->trans_block_rsv;
         ret = btrfs_update_inode(trans, root, inode);
+       updated_inode = true;
         btrfs_end_transaction(trans, root);
         btrfs_btree_balance_dirty(root);
  out_free:
@@ -2515,6 +2524,22 @@ out:
         unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
                              &cached_state, GFP_NOFS);
  out_only_mutex:
+       if (!updated_inode && truncated_page && !ret && !err) {
+               /*
+                * If we only end up zeroing part of a page, we still need to
+                * update the inode item, so that all the time fields are
+                * updated as well as the necessary btrfs inode in memory fields
+                * for detecting, at fsync time, if the inode isn't yet in the
+                * log tree or it's there but not up to date.
+                */
+               trans = btrfs_start_transaction(root, 1);
+               if (IS_ERR(trans)) {
+                       err = PTR_ERR(trans);
+               } else {
+                       err = btrfs_update_inode(trans, root, inode);
+                       ret = btrfs_end_transaction(trans, root);
+               }
+       }
         mutex_unlock(&inode->i_mutex);
         if (ret && !err)
                 err = ret;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c

index a85c23d..da828cf 100644 (file)
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -7285,7 +7285,6 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
             ((BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW) &&
              em->block_start != EXTENT_MAP_HOLE)) {
                 int type;
-               int ret;
                 u64 block_start, orig_start, orig_block_len, ram_bytes;
  
                 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c

index 534544e..157cc54 100644 (file)
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -452,9 +452,7 @@ void btrfs_get_logged_extents(struct inode *inode,
                         continue;
                 if (entry_end(ordered) <= start)
                         break;
-               if (!list_empty(&ordered->log_list))
-                       continue;
-               if (test_bit(BTRFS_ORDERED_LOGGED, &ordered->flags))
+               if (test_and_set_bit(BTRFS_ORDERED_LOGGED, &ordered->flags))
                         continue;
                 list_add(&ordered->log_list, logged_list);
                 atomic_inc(&ordered->refs);
@@ -511,8 +509,7 @@ void btrfs_wait_logged_extents(struct btrfs_trans_handle *trans,
                 wait_event(ordered->wait, test_bit(BTRFS_ORDERED_IO_DONE,
                                                    &ordered->flags));
  
-               if (!test_and_set_bit(BTRFS_ORDERED_LOGGED, &ordered->flags))
-                       list_add_tail(&ordered->trans_list, &trans->ordered);
+               list_add_tail(&ordered->trans_list, &trans->ordered);
                 spin_lock_irq(&log->log_extents_lock[index]);
         }
         spin_unlock_irq(&log->log_extents_lock[index]);
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c

index fe58572..d6033f5 100644 (file)
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -230,6 +230,7 @@ struct pending_dir_move {
         u64 parent_ino;
         u64 ino;
         u64 gen;
+       bool is_orphan;
         struct list_head update_refs;
  };
  
@@ -2984,7 +2985,8 @@ static int add_pending_dir_move(struct send_ctx *sctx,
                                 u64 ino_gen,
                                 u64 parent_ino,
                                 struct list_head *new_refs,
-                               struct list_head *deleted_refs)
+                               struct list_head *deleted_refs,
+                               const bool is_orphan)
  {
         struct rb_node **p = &sctx->pending_dir_moves.rb_node;
         struct rb_node *parent = NULL;
@@ -2999,6 +3001,7 @@ static int add_pending_dir_move(struct send_ctx *sctx,
         pm->parent_ino = parent_ino;
         pm->ino = ino;
         pm->gen = ino_gen;
+       pm->is_orphan = is_orphan;
         INIT_LIST_HEAD(&pm->list);
         INIT_LIST_HEAD(&pm->update_refs);
         RB_CLEAR_NODE(&pm->node);
@@ -3131,16 +3134,20 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
         rmdir_ino = dm->rmdir_ino;
         free_waiting_dir_move(sctx, dm);
  
-       ret = get_first_ref(sctx->parent_root, pm->ino,
-                           &parent_ino, &parent_gen, name);
-       if (ret < 0)
-               goto out;
-
-       ret = get_cur_path(sctx, parent_ino, parent_gen,
-                          from_path);
-       if (ret < 0)
-               goto out;
-       ret = fs_path_add_path(from_path, name);
+       if (pm->is_orphan) {
+               ret = gen_unique_name(sctx, pm->ino,
+                                     pm->gen, from_path);
+       } else {
+               ret = get_first_ref(sctx->parent_root, pm->ino,
+                                   &parent_ino, &parent_gen, name);
+               if (ret < 0)
+                       goto out;
+               ret = get_cur_path(sctx, parent_ino, parent_gen,
+                                  from_path);
+               if (ret < 0)
+                       goto out;
+               ret = fs_path_add_path(from_path, name);
+       }
         if (ret < 0)
                 goto out;
  
@@ -3150,7 +3157,8 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
                 LIST_HEAD(deleted_refs);
                 ASSERT(ancestor > BTRFS_FIRST_FREE_OBJECTID);
                 ret = add_pending_dir_move(sctx, pm->ino, pm->gen, ancestor,
-                                          &pm->update_refs, &deleted_refs);
+                                          &pm->update_refs, &deleted_refs,
+                                          pm->is_orphan);
                 if (ret < 0)
                         goto out;
                 if (rmdir_ino) {
@@ -3283,6 +3291,127 @@ out:
         return ret;
  }
  
+/*
+ * We might need to delay a directory rename even when no ancestor directory
+ * (in the send root) with a higher inode number than ours (sctx->cur_ino) was
+ * renamed. This happens when we rename a directory to the old name (the name
+ * in the parent root) of some other unrelated directory that got its rename
+ * delayed due to some ancestor with higher number that got renamed.
+ *
+ * Example:
+ *
+ * Parent snapshot:
+ * .                                       (ino 256)
+ * |---- a/                                (ino 257)
+ * |     |---- file                        (ino 260)
+ * |
+ * |---- b/                                (ino 258)
+ * |---- c/                                (ino 259)
+ *
+ * Send snapshot:
+ * .                                       (ino 256)
+ * |---- a/                                (ino 258)
+ * |---- x/                                (ino 259)
+ *       |---- y/                          (ino 257)
+ *             |----- file                 (ino 260)
+ *
+ * Here we can not rename 258 from 'b' to 'a' without the rename of inode 257
+ * from 'a' to 'x/y' happening first, which in turn depends on the rename of
+ * inode 259 from 'c' to 'x'. So the order of rename commands the send stream
+ * must issue is:
+ *
+ * 1 - rename 259 from 'c' to 'x'
+ * 2 - rename 257 from 'a' to 'x/y'
+ * 3 - rename 258 from 'b' to 'a'
+ *
+ * Returns 1 if the rename of sctx->cur_ino needs to be delayed, 0 if it can
+ * be done right away and < 0 on error.
+ */
+static int wait_for_dest_dir_move(struct send_ctx *sctx,
+                                 struct recorded_ref *parent_ref,
+                                 const bool is_orphan)
+{
+       struct btrfs_path *path;
+       struct btrfs_key key;
+       struct btrfs_key di_key;
+       struct btrfs_dir_item *di;
+       u64 left_gen;
+       u64 right_gen;
+       int ret = 0;
+
+       if (RB_EMPTY_ROOT(&sctx->waiting_dir_moves))
+               return 0;
+
+       path = alloc_path_for_send();
+       if (!path)
+               return -ENOMEM;
+
+       key.objectid = parent_ref->dir;
+       key.type = BTRFS_DIR_ITEM_KEY;
+       key.offset = btrfs_name_hash(parent_ref->name, parent_ref->name_len);
+
+       ret = btrfs_search_slot(NULL, sctx->parent_root, &key, path, 0, 0);
+       if (ret < 0) {
+               goto out;
+       } else if (ret > 0) {
+               ret = 0;
+               goto out;
+       }
+
+       di = btrfs_match_dir_item_name(sctx->parent_root, path,
+                                      parent_ref->name, parent_ref->name_len);
+       if (!di) {
+               ret = 0;
+               goto out;
+       }
+       /*
+        * di_key.objectid has the number of the inode that has a dentry in the
+        * parent directory with the same name that sctx->cur_ino is being
+        * renamed to. We need to check if that inode is in the send root as
+        * well and if it is currently marked as an inode with a pending rename,
+        * if it is, we need to delay the rename of sctx->cur_ino as well, so
+        * that it happens after that other inode is renamed.
+        */
+       btrfs_dir_item_key_to_cpu(path->nodes[0], di, &di_key);
+       if (di_key.type != BTRFS_INODE_ITEM_KEY) {
+               ret = 0;
+               goto out;
+       }
+
+       ret = get_inode_info(sctx->parent_root, di_key.objectid, NULL,
+                            &left_gen, NULL, NULL, NULL, NULL);
+       if (ret < 0)
+               goto out;
+       ret = get_inode_info(sctx->send_root, di_key.objectid, NULL,
+                            &right_gen, NULL, NULL, NULL, NULL);
+       if (ret < 0) {
+               if (ret == -ENOENT)
+                       ret = 0;
+               goto out;
+       }
+
+       /* Different inode, no need to delay the rename of sctx->cur_ino */
+       if (right_gen != left_gen) {
+               ret = 0;
+               goto out;
+       }
+
+       if (is_waiting_for_move(sctx, di_key.objectid)) {
+               ret = add_pending_dir_move(sctx,
+                                          sctx->cur_ino,
+                                          sctx->cur_inode_gen,
+                                          di_key.objectid,
+                                          &sctx->new_refs,
+                                          &sctx->deleted_refs,
+                                          is_orphan);
+               if (!ret)
+                       ret = 1;
+       }
+out:
+       btrfs_free_path(path);
+       return ret;
+}
+
  static int wait_for_parent_move(struct send_ctx *sctx,
                                 struct recorded_ref *parent_ref)
  {
@@ -3349,7 +3478,8 @@ out:
                                            sctx->cur_inode_gen,
                                            ino,
                                            &sctx->new_refs,
-                                          &sctx->deleted_refs);
+                                          &sctx->deleted_refs,
+                                          false);
                 if (!ret)
                         ret = 1;
         }
@@ -3372,6 +3502,7 @@ static int process_recorded_refs(struct send_ctx *sctx, int *pending_move)
         int did_overwrite = 0;
         int is_orphan = 0;
         u64 last_dir_ino_rm = 0;
+       bool can_rename = true;
  
  verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);
  
@@ -3490,12 +3621,22 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);
                         }
                 }
  
+               if (S_ISDIR(sctx->cur_inode_mode) && sctx->parent_root) {
+                       ret = wait_for_dest_dir_move(sctx, cur, is_orphan);
+                       if (ret < 0)
+                               goto out;
+                       if (ret == 1) {
+                               can_rename = false;
+                               *pending_move = 1;
+                       }
+               }
+
                 /*
                  * link/move the ref to the new place. If we have an orphan
                  * inode, move it and update valid_path. If not, link or move
                  * it depending on the inode mode.
                  */
-               if (is_orphan) {
+               if (is_orphan && can_rename) {
                         ret = send_rename(sctx, valid_path, cur->full_path);
                         if (ret < 0)
                                 goto out;
@@ -3503,7 +3644,7 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);
                         ret = fs_path_copy(valid_path, cur->full_path);
                         if (ret < 0)
                                 goto out;
-               } else {
+               } else if (can_rename) {
                         if (S_ISDIR(sctx->cur_inode_mode)) {
                                 /*
                                  * Dirs can't be linked, so move it. For moved
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c

index 7e80f32..88e51ad 100644 (file)
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -1052,9 +1052,6 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans,
                 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
                 if (ret)
                         return ret;
-               ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
-               if (ret)
-                       return ret;
         }
  
         return 0;
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c

index 9a37f8b..c5b8ba3 100644 (file)
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -1012,7 +1012,7 @@ again:
                 base = btrfs_item_ptr_offset(leaf, path->slots[0]);
  
                 while (cur_offset < item_size) {
-                       extref = (struct btrfs_inode_extref *)base + cur_offset;
+                       extref = (struct btrfs_inode_extref *)(base + cur_offset);
  
                         victim_name_len = btrfs_inode_extref_name_len(leaf, extref);
  
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c

index 47b1946..883b936 100644 (file)
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -111,6 +111,8 @@ static int do_setxattr(struct btrfs_trans_handle *trans,
                                         name, name_len, -1);
                 if (!di && (flags & XATTR_REPLACE))
                         ret = -ENODATA;
+               else if (IS_ERR(di))
+                       ret = PTR_ERR(di);
                 else if (di)
                         ret = btrfs_delete_one_dir_name(trans, root, path, di);
                 goto out;
@@ -127,10 +129,12 @@ static int do_setxattr(struct btrfs_trans_handle *trans,
                 ASSERT(mutex_is_locked(&inode->i_mutex));
                 di = btrfs_lookup_xattr(NULL, root, path, btrfs_ino(inode),
                                         name, name_len, 0);
-               if (!di) {
+               if (!di)
                         ret = -ENODATA;
+               else if (IS_ERR(di))
+                       ret = PTR_ERR(di);
+               if (ret)
                         goto out;
-               }
                 btrfs_release_path(path);
                 di = NULL;
         }
author	Linus Torvalds <torvalds@linux-foundation.org>
	Fri, 6 Mar 2015 21:52:54 +0000 (13:52 -0800)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Fri, 6 Mar 2015 21:52:54 +0000 (13:52 -0800)
fs/btrfs/ctree.c		patch \| blob \| history
fs/btrfs/extent-tree.c		patch \| blob \| history
fs/btrfs/file.c		patch \| blob \| history
fs/btrfs/inode.c		patch \| blob \| history
fs/btrfs/ordered-data.c		patch \| blob \| history
fs/btrfs/send.c		patch \| blob \| history
fs/btrfs/transaction.c		patch \| blob \| history
fs/btrfs/tree-log.c		patch \| blob \| history
fs/btrfs/xattr.c		patch \| blob \| history