Merge branch 'for-linus-4.9' of git://git.kernel.org/pub/scm/linux/kernel/git/mason...
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 28 Oct 2016 17:07:35 +0000 (10:07 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 28 Oct 2016 17:07:35 +0000 (10:07 -0700)
Pull btrfs fixes from Chris Mason:
 "My patch fixes the btrfs list_head abuse that we tracked down during
  Dave Jones' memory corruption investigation. With both Jens and my
  patches in place, I'm no longer able to trigger problems.

  Filipe is fixing a difficult old bug between snapshots, balance and
  send. Dave is cooking a few more for the next rc, but these are tested
  and ready"

* 'for-linus-4.9' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs:
  btrfs: fix races on root_log_ctx lists
  btrfs: fix incremental send failure caused by balance

fs/btrfs/send.c
fs/btrfs/tree-log.c

index 01bc36c..71261b4 100644 (file)
@@ -5805,6 +5805,64 @@ static int changed_extent(struct send_ctx *sctx,
        int ret = 0;
 
        if (sctx->cur_ino != sctx->cmp_key->objectid) {
+
+               if (result == BTRFS_COMPARE_TREE_CHANGED) {
+                       struct extent_buffer *leaf_l;
+                       struct extent_buffer *leaf_r;
+                       struct btrfs_file_extent_item *ei_l;
+                       struct btrfs_file_extent_item *ei_r;
+
+                       leaf_l = sctx->left_path->nodes[0];
+                       leaf_r = sctx->right_path->nodes[0];
+                       ei_l = btrfs_item_ptr(leaf_l,
+                                             sctx->left_path->slots[0],
+                                             struct btrfs_file_extent_item);
+                       ei_r = btrfs_item_ptr(leaf_r,
+                                             sctx->right_path->slots[0],
+                                             struct btrfs_file_extent_item);
+
+                       /*
+                        * We may have found an extent item that has changed
+                        * only its disk_bytenr field and the corresponding
+                        * inode item was not updated. This case happens due to
+                        * very specific timings during relocation when a leaf
+                        * that contains file extent items is COWed while
+                        * relocation is ongoing and its in the stage where it
+                        * updates data pointers. So when this happens we can
+                        * safely ignore it since we know it's the same extent,
+                        * but just at different logical and physical locations
+                        * (when an extent is fully replaced with a new one, we
+                        * know the generation number must have changed too,
+                        * since snapshot creation implies committing the current
+                        * transaction, and the inode item must have been updated
+                        * as well).
+                        * This replacement of the disk_bytenr happens at
+                        * relocation.c:replace_file_extents() through
+                        * relocation.c:btrfs_reloc_cow_block().
+                        */
+                       if (btrfs_file_extent_generation(leaf_l, ei_l) ==
+                           btrfs_file_extent_generation(leaf_r, ei_r) &&
+                           btrfs_file_extent_ram_bytes(leaf_l, ei_l) ==
+                           btrfs_file_extent_ram_bytes(leaf_r, ei_r) &&
+                           btrfs_file_extent_compression(leaf_l, ei_l) ==
+                           btrfs_file_extent_compression(leaf_r, ei_r) &&
+                           btrfs_file_extent_encryption(leaf_l, ei_l) ==
+                           btrfs_file_extent_encryption(leaf_r, ei_r) &&
+                           btrfs_file_extent_other_encoding(leaf_l, ei_l) ==
+                           btrfs_file_extent_other_encoding(leaf_r, ei_r) &&
+                           btrfs_file_extent_type(leaf_l, ei_l) ==
+                           btrfs_file_extent_type(leaf_r, ei_r) &&
+                           btrfs_file_extent_disk_bytenr(leaf_l, ei_l) !=
+                           btrfs_file_extent_disk_bytenr(leaf_r, ei_r) &&
+                           btrfs_file_extent_disk_num_bytes(leaf_l, ei_l) ==
+                           btrfs_file_extent_disk_num_bytes(leaf_r, ei_r) &&
+                           btrfs_file_extent_offset(leaf_l, ei_l) ==
+                           btrfs_file_extent_offset(leaf_r, ei_r) &&
+                           btrfs_file_extent_num_bytes(leaf_l, ei_l) ==
+                           btrfs_file_extent_num_bytes(leaf_r, ei_r))
+                               return 0;
+               }
+
                inconsistent_snapshot_error(sctx, result, "extent");
                return -EIO;
        }
index 528cae1..3d33c4e 100644 (file)
@@ -2713,14 +2713,12 @@ static inline void btrfs_remove_all_log_ctxs(struct btrfs_root *root,
                                             int index, int error)
 {
        struct btrfs_log_ctx *ctx;
+       struct btrfs_log_ctx *safe;
 
-       if (!error) {
-               INIT_LIST_HEAD(&root->log_ctxs[index]);
-               return;
-       }
-
-       list_for_each_entry(ctx, &root->log_ctxs[index], list)
+       list_for_each_entry_safe(ctx, safe, &root->log_ctxs[index], list) {
+               list_del_init(&ctx->list);
                ctx->log_ret = error;
+       }
 
        INIT_LIST_HEAD(&root->log_ctxs[index]);
 }
@@ -2961,13 +2959,9 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
        mutex_unlock(&root->log_mutex);
 
 out_wake_log_root:
-       /*
-        * We needn't get log_mutex here because we are sure all
-        * the other tasks are blocked.
-        */
+       mutex_lock(&log_root_tree->log_mutex);
        btrfs_remove_all_log_ctxs(log_root_tree, index2, ret);
 
-       mutex_lock(&log_root_tree->log_mutex);
        log_root_tree->log_transid_committed++;
        atomic_set(&log_root_tree->log_commit[index2], 0);
        mutex_unlock(&log_root_tree->log_mutex);
@@ -2978,10 +2972,8 @@ out_wake_log_root:
        if (waitqueue_active(&log_root_tree->log_commit_wait[index2]))
                wake_up(&log_root_tree->log_commit_wait[index2]);
 out:
-       /* See above. */
-       btrfs_remove_all_log_ctxs(root, index1, ret);
-
        mutex_lock(&root->log_mutex);
+       btrfs_remove_all_log_ctxs(root, index1, ret);
        root->log_transid_committed++;
        atomic_set(&root->log_commit[index1], 0);
        mutex_unlock(&root->log_mutex);