Merge branch 'sysfs-fsdevices-4.2-part1' of git://git.kernel.org/pub/scm/linux/kernel...
authorChris Mason <clm@fb.com>
Tue, 23 Jun 2015 12:34:39 +0000 (05:34 -0700)
committerChris Mason <clm@fb.com>
Tue, 23 Jun 2015 12:34:39 +0000 (05:34 -0700)
1  2 
fs/btrfs/ctree.h
fs/btrfs/disk-io.c
fs/btrfs/volumes.c

diff --combined fs/btrfs/ctree.h
@@@ -174,7 -174,7 +174,7 @@@ struct btrfs_ordered_sum
  /* csum types */
  #define BTRFS_CSUM_TYPE_CRC32 0
  
 -static int btrfs_csum_sizes[] = { 4, 0 };
 +static int btrfs_csum_sizes[] = { 4 };
  
  /* four bytes for CRC32 */
  #define BTRFS_EMPTY_DIR_SIZE 0
@@@ -1619,10 -1619,7 +1619,7 @@@ struct btrfs_fs_info 
        struct task_struct *cleaner_kthread;
        int thread_pool_size;
  
-       struct kobject super_kobj;
        struct kobject *space_info_kobj;
-       struct kobject *device_dir_kobj;
-       struct completion kobj_unregister;
        int do_barriers;
        int closing;
        int log_root_recovering;
        struct btrfs_workqueue *scrub_workers;
        struct btrfs_workqueue *scrub_wr_completion_workers;
        struct btrfs_workqueue *scrub_nocow_workers;
 +      struct btrfs_workqueue *scrub_parity_workers;
  
  #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
        u32 check_integrity_print_mask;
        /* list of dirty qgroups to be written at next commit */
        struct list_head dirty_qgroups;
  
 -      /* used by btrfs_qgroup_record_ref for an efficient tree traversal */
 +      /* used by qgroup for an efficient tree traversal */
        u64 qgroup_seq;
  
        /* qgroup rescan items */
@@@ -3459,7 -3455,6 +3456,7 @@@ int btrfs_check_data_free_space(struct 
  void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes);
  void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
                                struct btrfs_root *root);
 +void btrfs_trans_release_chunk_metadata(struct btrfs_trans_handle *trans);
  int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
                                  struct inode *inode);
  void btrfs_orphan_release_metadata(struct inode *inode);
@@@ -3517,9 -3512,6 +3514,9 @@@ int btrfs_delayed_refs_qgroup_accountin
  int __get_raid_index(u64 flags);
  int btrfs_start_write_no_snapshoting(struct btrfs_root *root);
  void btrfs_end_write_no_snapshoting(struct btrfs_root *root);
 +void check_system_chunk(struct btrfs_trans_handle *trans,
 +                      struct btrfs_root *root,
 +                      const u64 type);
  /* ctree.c */
  int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
                     int level, int *slot);
@@@ -4055,7 -4047,6 +4052,7 @@@ void btrfs_printk(const struct btrfs_fs
  
  #ifdef CONFIG_BTRFS_ASSERT
  
 +__cold
  static inline void assfail(char *expr, char *file, int line)
  {
        pr_err("BTRFS: assertion failed: %s, file: %s, line: %d",
  
  #define btrfs_assert()
  __printf(5, 6)
 +__cold
  void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function,
                     unsigned int line, int errno, const char *fmt, ...);
  
  
 +__cold
  void __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
                               struct btrfs_root *root, const char *function,
                               unsigned int line, int errno);
@@@ -4119,17 -4108,11 +4116,17 @@@ static inline int __btrfs_fs_incompat(s
   * Call btrfs_abort_transaction as early as possible when an error condition is
   * detected, that way the exact line number is reported.
   */
 -
  #define btrfs_abort_transaction(trans, root, errno)           \
  do {                                                          \
 -      __btrfs_abort_transaction(trans, root, __func__,        \
 -                                __LINE__, errno);             \
 +      /* Report first abort since mount */                    \
 +      if (!test_and_set_bit(BTRFS_FS_STATE_TRANS_ABORTED,     \
 +                      &((root)->fs_info->fs_state))) {        \
 +              WARN(1, KERN_DEBUG                              \
 +              "BTRFS: Transaction aborted (error %d)\n",      \
 +              (errno));                                       \
 +      }                                                       \
 +      __btrfs_abort_transaction((trans), (root), __func__,    \
 +                                __LINE__, (errno));           \
  } while (0)
  
  #define btrfs_std_error(fs_info, errno)                               \
@@@ -4146,7 -4129,6 +4143,7 @@@ do {                                                            
  } while (0)
  
  __printf(5, 6)
 +__cold
  void __btrfs_panic(struct btrfs_fs_info *fs_info, const char *function,
                   unsigned int line, int errno, const char *fmt, ...);
  
diff --combined fs/btrfs/disk-io.c
@@@ -1149,12 -1149,12 +1149,12 @@@ struct extent_buffer *read_tree_block(s
  
        buf = btrfs_find_create_tree_block(root, bytenr);
        if (!buf)
 -              return NULL;
 +              return ERR_PTR(-ENOMEM);
  
        ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid);
        if (ret) {
                free_extent_buffer(buf);
 -              return NULL;
 +              return ERR_PTR(ret);
        }
        return buf;
  
@@@ -1509,19 -1509,20 +1509,19 @@@ static struct btrfs_root *btrfs_read_tr
        generation = btrfs_root_generation(&root->root_item);
        root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item),
                                     generation);
 -      if (!root->node) {
 -              ret = -ENOMEM;
 +      if (IS_ERR(root->node)) {
 +              ret = PTR_ERR(root->node);
                goto find_fail;
        } else if (!btrfs_buffer_uptodate(root->node, generation, 0)) {
                ret = -EIO;
 -              goto read_fail;
 +              free_extent_buffer(root->node);
 +              goto find_fail;
        }
        root->commit_root = btrfs_root_node(root);
  out:
        btrfs_free_path(path);
        return root;
  
 -read_fail:
 -      free_extent_buffer(root->node);
  find_fail:
        kfree(root);
  alloc_fail:
@@@ -2319,12 -2320,8 +2319,12 @@@ static int btrfs_replay_log(struct btrf
  
        log_tree_root->node = read_tree_block(tree_root, bytenr,
                        fs_info->generation + 1);
 -      if (!log_tree_root->node ||
 -          !extent_buffer_uptodate(log_tree_root->node)) {
 +      if (IS_ERR(log_tree_root->node)) {
 +              printk(KERN_ERR "BTRFS: failed to read log tree\n");
 +              ret = PTR_ERR(log_tree_root->node);
 +              kfree(log_tree_root);
 +              return ret;
 +      } else if (!extent_buffer_uptodate(log_tree_root->node)) {
                printk(KERN_ERR "BTRFS: failed to read log tree\n");
                free_extent_buffer(log_tree_root->node);
                kfree(log_tree_root);
@@@ -2497,7 -2494,6 +2497,6 @@@ int open_ctree(struct super_block *sb
        seqlock_init(&fs_info->profiles_lock);
        init_rwsem(&fs_info->delayed_iput_sem);
  
-       init_completion(&fs_info->kobj_unregister);
        INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots);
        INIT_LIST_HEAD(&fs_info->space_info);
        INIT_LIST_HEAD(&fs_info->tree_mod_seq_list);
        chunk_root->node = read_tree_block(chunk_root,
                                           btrfs_super_chunk_root(disk_super),
                                           generation);
 -      if (!chunk_root->node ||
 -          !test_bit(EXTENT_BUFFER_UPTODATE, &chunk_root->node->bflags)) {
 +      if (IS_ERR(chunk_root->node) ||
 +          !extent_buffer_uptodate(chunk_root->node)) {
                printk(KERN_ERR "BTRFS: failed to read chunk root on %s\n",
                       sb->s_id);
                goto fail_tree_roots;
@@@ -2837,8 -2833,8 +2836,8 @@@ retry_root_backup
        tree_root->node = read_tree_block(tree_root,
                                          btrfs_super_root(disk_super),
                                          generation);
 -      if (!tree_root->node ||
 -          !test_bit(EXTENT_BUFFER_UPTODATE, &tree_root->node->bflags)) {
 +      if (IS_ERR(tree_root->node) ||
 +          !extent_buffer_uptodate(tree_root->node)) {
                printk(KERN_WARNING "BTRFS: failed to read tree root on %s\n",
                       sb->s_id);
  
  
        btrfs_close_extra_devices(fs_devices, 1);
  
+       ret = btrfs_sysfs_add_fsid(fs_devices, NULL);
+       if (ret) {
+               pr_err("BTRFS: failed to init sysfs fsid interface: %d\n", ret);
+               goto fail_block_groups;
+       }
+       ret = btrfs_sysfs_add_device(fs_devices);
+       if (ret) {
+               pr_err("BTRFS: failed to init sysfs device interface: %d\n", ret);
+               goto fail_fsdev_sysfs;
+       }
        ret = btrfs_sysfs_add_one(fs_info);
        if (ret) {
                pr_err("BTRFS: failed to init sysfs interface: %d\n", ret);
-               goto fail_block_groups;
+               goto fail_fsdev_sysfs;
        }
  
        ret = btrfs_init_space_info(fs_info);
@@@ -3058,6 -3066,9 +3069,9 @@@ fail_cleaner
  fail_sysfs:
        btrfs_sysfs_remove_one(fs_info);
  
+ fail_fsdev_sysfs:
+       btrfs_sysfs_remove_fsid(fs_info->fs_devices);
  fail_block_groups:
        btrfs_put_block_group_cache(fs_info);
        btrfs_free_block_groups(fs_info);
@@@ -3735,6 -3746,7 +3749,7 @@@ void close_ctree(struct btrfs_root *roo
        }
  
        btrfs_sysfs_remove_one(fs_info);
+       btrfs_sysfs_remove_fsid(fs_info->fs_devices);
  
        btrfs_free_fs_roots(fs_info);
  
@@@ -4063,7 -4075,6 +4078,7 @@@ static int btrfs_destroy_delayed_refs(s
  
        while ((node = rb_first(&delayed_refs->href_root)) != NULL) {
                struct btrfs_delayed_ref_head *head;
 +              struct btrfs_delayed_ref_node *tmp;
                bool pin_bytes = false;
  
                head = rb_entry(node, struct btrfs_delayed_ref_head,
                        continue;
                }
                spin_lock(&head->lock);
 -              while ((node = rb_first(&head->ref_root)) != NULL) {
 -                      ref = rb_entry(node, struct btrfs_delayed_ref_node,
 -                                     rb_node);
 +              list_for_each_entry_safe_reverse(ref, tmp, &head->ref_list,
 +                                               list) {
                        ref->in_tree = 0;
 -                      rb_erase(&ref->rb_node, &head->ref_root);
 +                      list_del(&ref->list);
                        atomic_dec(&delayed_refs->num_entries);
                        btrfs_put_delayed_ref(ref);
                }
diff --combined fs/btrfs/volumes.c
@@@ -52,6 -52,10 +52,10 @@@ static void btrfs_dev_stat_print_on_loa
  
  DEFINE_MUTEX(uuid_mutex);
  static LIST_HEAD(fs_uuids);
+ struct list_head *btrfs_get_fs_uuids(void)
+ {
+       return &fs_uuids;
+ }
  
  static struct btrfs_fs_devices *__alloc_fs_devices(void)
  {
@@@ -441,6 -445,61 +445,61 @@@ static void pending_bios_fn(struct btrf
        run_scheduled_bios(device);
  }
  
+ void btrfs_free_stale_device(struct btrfs_device *cur_dev)
+ {
+       struct btrfs_fs_devices *fs_devs;
+       struct btrfs_device *dev;
+       if (!cur_dev->name)
+               return;
+       list_for_each_entry(fs_devs, &fs_uuids, list) {
+               int del = 1;
+               if (fs_devs->opened)
+                       continue;
+               if (fs_devs->seeding)
+                       continue;
+               list_for_each_entry(dev, &fs_devs->devices, dev_list) {
+                       if (dev == cur_dev)
+                               continue;
+                       if (!dev->name)
+                               continue;
+                       /*
+                        * Todo: This won't be enough. What if the same device
+                        * comes back (with new uuid and) with its mapper path?
+                        * But for now, this does help as mostly an admin will
+                        * either use mapper or non mapper path throughout.
+                        */
+                       rcu_read_lock();
+                       del = strcmp(rcu_str_deref(dev->name),
+                                               rcu_str_deref(cur_dev->name));
+                       rcu_read_unlock();
+                       if (!del)
+                               break;
+               }
+               if (!del) {
+                       /* delete the stale device */
+                       if (fs_devs->num_devices == 1) {
+                               btrfs_sysfs_remove_fsid(fs_devs);
+                               list_del(&fs_devs->list);
+                               free_fs_devices(fs_devs);
+                       } else {
+                               fs_devs->num_devices--;
+                               list_del(&dev->dev_list);
+                               rcu_string_free(dev->name);
+                               kfree(dev);
+                       }
+                       break;
+               }
+       }
+ }
  /*
   * Add new device to list of registered devices
   *
@@@ -556,6 -615,12 +615,12 @@@ static noinline int device_list_add(con
        if (!fs_devices->opened)
                device->generation = found_transid;
  
+       /*
+        * if there is new btrfs on an already registered device,
+        * then remove the stale device entry.
+        */
+       btrfs_free_stale_device(device);
        *fs_devices_ret = fs_devices;
  
        return ret;
@@@ -693,13 -758,13 +758,13 @@@ static void free_device(struct rcu_hea
  
  static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
  {
 -      struct btrfs_device *device;
 +      struct btrfs_device *device, *tmp;
  
        if (--fs_devices->opened > 0)
                return 0;
  
        mutex_lock(&fs_devices->device_list_mutex);
 -      list_for_each_entry(device, &fs_devices->devices, dev_list) {
 +      list_for_each_entry_safe(device, tmp, &fs_devices->devices, dev_list) {
                struct btrfs_device *new_device;
                struct rcu_string *name;
  
@@@ -1067,31 -1132,15 +1132,31 @@@ again
  
                map = (struct map_lookup *)em->bdev;
                for (i = 0; i < map->num_stripes; i++) {
 +                      u64 end;
 +
                        if (map->stripes[i].dev != device)
                                continue;
                        if (map->stripes[i].physical >= physical_start + len ||
                            map->stripes[i].physical + em->orig_block_len <=
                            physical_start)
                                continue;
 -                      *start = map->stripes[i].physical +
 -                              em->orig_block_len;
 -                      ret = 1;
 +                      /*
 +                       * Make sure that while processing the pinned list we do
 +                       * not override our *start with a lower value, because
 +                       * we can have pinned chunks that fall within this
 +                       * device hole and that have lower physical addresses
 +                       * than the pending chunks we processed before. If we
 +                       * do not take this special care we can end up getting
 +                       * 2 pending chunks that start at the same physical
 +                       * device offsets because the end offset of a pinned
 +                       * chunk can be equal to the start offset of some
 +                       * pending chunk.
 +                       */
 +                      end = map->stripes[i].physical + em->orig_block_len;
 +                      if (end > *start) {
 +                              *start = end;
 +                              ret = 1;
 +                      }
                }
        }
        if (search_list == &trans->transaction->pending_chunks) {
@@@ -1722,7 -1771,7 +1787,7 @@@ int btrfs_rm_device(struct btrfs_root *
        if (device->bdev) {
                device->fs_devices->open_devices--;
                /* remove sysfs entry */
-               btrfs_kobj_rm_device(root->fs_info, device);
+               btrfs_kobj_rm_device(root->fs_info->fs_devices, device);
        }
  
        call_rcu(&device->rcu, free_device);
@@@ -1891,6 -1940,9 +1956,9 @@@ void btrfs_destroy_dev_replace_tgtdev(s
        mutex_lock(&uuid_mutex);
        WARN_ON(!tgtdev);
        mutex_lock(&fs_info->fs_devices->device_list_mutex);
+       btrfs_kobj_rm_device(fs_info->fs_devices, tgtdev);
        if (tgtdev->bdev) {
                btrfs_scratch_superblock(tgtdev);
                fs_info->fs_devices->open_devices--;
@@@ -2227,7 -2279,7 +2295,7 @@@ int btrfs_init_new_device(struct btrfs_
                                    tmp + 1);
  
        /* add sysfs device entry */
-       btrfs_kobj_add_device(root->fs_info, device);
+       btrfs_kobj_add_device(root->fs_info->fs_devices, device);
  
        /*
         * we've got more storage, clear any full flags on the space
                 */
                snprintf(fsid_buf, BTRFS_UUID_UNPARSED_SIZE, "%pU",
                                                root->fs_info->fsid);
-               if (kobject_rename(&root->fs_info->super_kobj, fsid_buf))
-                       goto error_trans;
+               if (kobject_rename(&root->fs_info->fs_devices->super_kobj,
+                                                               fsid_buf))
+                       pr_warn("BTRFS: sysfs: failed to create fsid for sprout\n");
        }
  
        root->fs_info->num_tolerated_disk_barrier_failures =
  error_trans:
        btrfs_end_transaction(trans, root);
        rcu_string_free(device->name);
-       btrfs_kobj_rm_device(root->fs_info, device);
+       btrfs_kobj_rm_device(root->fs_info->fs_devices, device);
        kfree(device);
  error:
        blkdev_put(bdev, FMODE_EXCL);
@@@ -2625,9 -2678,6 +2694,9 @@@ int btrfs_remove_chunk(struct btrfs_tra
                return -EINVAL;
        }
        map = (struct map_lookup *)em->bdev;
 +      lock_chunks(root->fs_info->chunk_root);
 +      check_system_chunk(trans, extent_root, map->type);
 +      unlock_chunks(root->fs_info->chunk_root);
  
        for (i = 0; i < map->num_stripes; i++) {
                struct btrfs_device *device = map->stripes[i].dev;
@@@ -3927,9 -3977,9 +3996,9 @@@ int btrfs_create_uuid_tree(struct btrfs
        uuid_root = btrfs_create_tree(trans, fs_info,
                                      BTRFS_UUID_TREE_OBJECTID);
        if (IS_ERR(uuid_root)) {
 -              btrfs_abort_transaction(trans, tree_root,
 -                                      PTR_ERR(uuid_root));
 -              return PTR_ERR(uuid_root);
 +              ret = PTR_ERR(uuid_root);
 +              btrfs_abort_transaction(trans, tree_root, ret);
 +              return ret;
        }
  
        fs_info->uuid_root = uuid_root;
@@@ -3984,7 -4034,6 +4053,7 @@@ int btrfs_shrink_device(struct btrfs_de
        int slot;
        int failed = 0;
        bool retried = false;
 +      bool checked_pending_chunks = false;
        struct extent_buffer *l;
        struct btrfs_key key;
        struct btrfs_super_block *super_copy = root->fs_info->super_copy;
@@@ -4065,6 -4114,15 +4134,6 @@@ again
                goto again;
        } else if (failed && retried) {
                ret = -ENOSPC;
 -              lock_chunks(root);
 -
 -              btrfs_device_set_total_bytes(device, old_size);
 -              if (device->writeable)
 -                      device->fs_devices->total_rw_bytes += diff;
 -              spin_lock(&root->fs_info->free_chunk_lock);
 -              root->fs_info->free_chunk_space += diff;
 -              spin_unlock(&root->fs_info->free_chunk_lock);
 -              unlock_chunks(root);
                goto done;
        }
  
        }
  
        lock_chunks(root);
 +
 +      /*
 +       * We checked in the above loop all device extents that were already in
 +       * the device tree. However before we have updated the device's
 +       * total_bytes to the new size, we might have had chunk allocations that
 +       * have not complete yet (new block groups attached to transaction
 +       * handles), and therefore their device extents were not yet in the
 +       * device tree and we missed them in the loop above. So if we have any
 +       * pending chunk using a device extent that overlaps the device range
 +       * that we can not use anymore, commit the current transaction and
 +       * repeat the search on the device tree - this way we guarantee we will
 +       * not have chunks using device extents that end beyond 'new_size'.
 +       */
 +      if (!checked_pending_chunks) {
 +              u64 start = new_size;
 +              u64 len = old_size - new_size;
 +
 +              if (contains_pending_extent(trans, device, &start, len)) {
 +                      unlock_chunks(root);
 +                      checked_pending_chunks = true;
 +                      failed = 0;
 +                      retried = false;
 +                      ret = btrfs_commit_transaction(trans, root);
 +                      if (ret)
 +                              goto done;
 +                      goto again;
 +              }
 +      }
 +
        btrfs_device_set_disk_total_bytes(device, new_size);
        if (list_empty(&device->resized_list))
                list_add_tail(&device->resized_list,
        btrfs_end_transaction(trans, root);
  done:
        btrfs_free_path(path);
 +      if (ret) {
 +              lock_chunks(root);
 +              btrfs_device_set_total_bytes(device, old_size);
 +              if (device->writeable)
 +                      device->fs_devices->total_rw_bytes += diff;
 +              spin_lock(&root->fs_info->free_chunk_lock);
 +              root->fs_info->free_chunk_space += diff;
 +              spin_unlock(&root->fs_info->free_chunk_lock);
 +              unlock_chunks(root);
 +      }
        return ret;
  }
  
@@@ -6128,8 -6147,6 +6197,8 @@@ static int read_one_chunk(struct btrfs_
                                free_extent_map(em);
                                return -EIO;
                        }
 +                      btrfs_warn(root->fs_info, "devid %llu uuid %pU is missing",
 +                                              devid, uuid);
                }
                map->stripes[i].dev->in_fs_metadata = 1;
        }
@@@ -6249,11 -6266,10 +6318,11 @@@ static int read_one_dev(struct btrfs_ro
                if (!btrfs_test_opt(root, DEGRADED))
                        return -EIO;
  
 -              btrfs_warn(root->fs_info, "devid %llu missing", devid);
                device = add_missing_dev(root, fs_devices, devid, dev_uuid);
                if (!device)
                        return -ENOMEM;
 +              btrfs_warn(root->fs_info, "devid %llu uuid %pU missing",
 +                              devid, dev_uuid);
        } else {
                if (!device->bdev && !btrfs_test_opt(root, DEGRADED))
                        return -EIO;
@@@ -6781,3 -6797,21 +6850,21 @@@ void btrfs_update_commit_device_bytes_u
        }
        unlock_chunks(root);
  }
+ void btrfs_set_fs_info_ptr(struct btrfs_fs_info *fs_info)
+ {
+       struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
+       while (fs_devices) {
+               fs_devices->fs_info = fs_info;
+               fs_devices = fs_devices->seed;
+       }
+ }
+ void btrfs_reset_fs_info_ptr(struct btrfs_fs_info *fs_info)
+ {
+       struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
+       while (fs_devices) {
+               fs_devices->fs_info = NULL;
+               fs_devices = fs_devices->seed;
+       }
+ }