Merge tag 'md/4.8-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/shli/md
authorLinus Torvalds <torvalds@linux-foundation.org>
Tue, 30 Aug 2016 18:24:04 +0000 (11:24 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Tue, 30 Aug 2016 18:24:04 +0000 (11:24 -0700)
Pull MD fixes from Shaohua Li:
 "This includes several bug fixes:

   - Alexey Obitotskiy fixed a hang for faulty raid5 array with external
     management

   - Song Liu fixed two raid5 journal related bugs

   - Tomasz Majchrzak fixed a bad block recording issue and an
     accounting issue for raid10

   - ZhengYuan Liu fixed an accounting issue for raid5

   - I fixed a potential race condition and memory leak with DIF/DIX
     enabled

   - other trival fixes"

* tag 'md/4.8-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/shli/md:
  raid5: avoid unnecessary bio data set
  raid5: fix memory leak of bio integrity data
  raid10: record correct address of bad block
  md-cluster: fix error return code in join()
  r5cache: set MD_JOURNAL_CLEAN correctly
  md: don't print the same repeated messages about delayed sync operation
  md: remove obsolete ret in md_start_sync
  md: do not count journal as spare in GET_ARRAY_INFO
  md: Prevent IO hold during accessing to faulty raid5 array
  MD: hold mddev lock to change bitmap location
  raid5: fix incorrectly counter of conf->empty_inactive_list_nr
  raid10: increment write counter after bio is split

drivers/md/bitmap.c
drivers/md/md-cluster.c
drivers/md/md.c
drivers/md/raid10.c
drivers/md/raid5.c

index 6fff794..13041ee 100644 (file)
@@ -2183,19 +2183,29 @@ location_show(struct mddev *mddev, char *page)
 static ssize_t
 location_store(struct mddev *mddev, const char *buf, size_t len)
 {
+       int rv;
 
+       rv = mddev_lock(mddev);
+       if (rv)
+               return rv;
        if (mddev->pers) {
-               if (!mddev->pers->quiesce)
-                       return -EBUSY;
-               if (mddev->recovery || mddev->sync_thread)
-                       return -EBUSY;
+               if (!mddev->pers->quiesce) {
+                       rv = -EBUSY;
+                       goto out;
+               }
+               if (mddev->recovery || mddev->sync_thread) {
+                       rv = -EBUSY;
+                       goto out;
+               }
        }
 
        if (mddev->bitmap || mddev->bitmap_info.file ||
            mddev->bitmap_info.offset) {
                /* bitmap already configured.  Only option is to clear it */
-               if (strncmp(buf, "none", 4) != 0)
-                       return -EBUSY;
+               if (strncmp(buf, "none", 4) != 0) {
+                       rv = -EBUSY;
+                       goto out;
+               }
                if (mddev->pers) {
                        mddev->pers->quiesce(mddev, 1);
                        bitmap_destroy(mddev);
@@ -2214,21 +2224,25 @@ location_store(struct mddev *mddev, const char *buf, size_t len)
                        /* nothing to be done */;
                else if (strncmp(buf, "file:", 5) == 0) {
                        /* Not supported yet */
-                       return -EINVAL;
+                       rv = -EINVAL;
+                       goto out;
                } else {
-                       int rv;
                        if (buf[0] == '+')
                                rv = kstrtoll(buf+1, 10, &offset);
                        else
                                rv = kstrtoll(buf, 10, &offset);
                        if (rv)
-                               return rv;
-                       if (offset == 0)
-                               return -EINVAL;
+                               goto out;
+                       if (offset == 0) {
+                               rv = -EINVAL;
+                               goto out;
+                       }
                        if (mddev->bitmap_info.external == 0 &&
                            mddev->major_version == 0 &&
-                           offset != mddev->bitmap_info.default_offset)
-                               return -EINVAL;
+                           offset != mddev->bitmap_info.default_offset) {
+                               rv = -EINVAL;
+                               goto out;
+                       }
                        mddev->bitmap_info.offset = offset;
                        if (mddev->pers) {
                                struct bitmap *bitmap;
@@ -2245,7 +2259,7 @@ location_store(struct mddev *mddev, const char *buf, size_t len)
                                mddev->pers->quiesce(mddev, 0);
                                if (rv) {
                                        bitmap_destroy(mddev);
-                                       return rv;
+                                       goto out;
                                }
                        }
                }
@@ -2257,6 +2271,11 @@ location_store(struct mddev *mddev, const char *buf, size_t len)
                set_bit(MD_CHANGE_DEVS, &mddev->flags);
                md_wakeup_thread(mddev->thread);
        }
+       rv = 0;
+out:
+       mddev_unlock(mddev);
+       if (rv)
+               return rv;
        return len;
 }
 
index 41573f1..34a840d 100644 (file)
@@ -834,8 +834,10 @@ static int join(struct mddev *mddev, int nodes)
                goto err;
        }
        cinfo->ack_lockres = lockres_init(mddev, "ack", ack_bast, 0);
-       if (!cinfo->ack_lockres)
+       if (!cinfo->ack_lockres) {
+               ret = -ENOMEM;
                goto err;
+       }
        /* get sync CR lock on ACK. */
        if (dlm_lock_sync(cinfo->ack_lockres, DLM_LOCK_CR))
                pr_err("md-cluster: failed to get a sync CR lock on ACK!(%d)\n",
@@ -849,8 +851,10 @@ static int join(struct mddev *mddev, int nodes)
        pr_info("md-cluster: Joined cluster %s slot %d\n", str, cinfo->slot_number);
        snprintf(str, 64, "bitmap%04d", cinfo->slot_number - 1);
        cinfo->bitmap_lockres = lockres_init(mddev, str, NULL, 1);
-       if (!cinfo->bitmap_lockres)
+       if (!cinfo->bitmap_lockres) {
+               ret = -ENOMEM;
                goto err;
+       }
        if (dlm_lock_sync(cinfo->bitmap_lockres, DLM_LOCK_PW)) {
                pr_err("Failed to get bitmap lock\n");
                ret = -EINVAL;
@@ -858,8 +862,10 @@ static int join(struct mddev *mddev, int nodes)
        }
 
        cinfo->resync_lockres = lockres_init(mddev, "resync", NULL, 0);
-       if (!cinfo->resync_lockres)
+       if (!cinfo->resync_lockres) {
+               ret = -ENOMEM;
                goto err;
+       }
 
        return 0;
 err:
index d646f6e..67642ba 100644 (file)
@@ -1604,11 +1604,8 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev)
                        mddev->new_chunk_sectors = mddev->chunk_sectors;
                }
 
-               if (le32_to_cpu(sb->feature_map) & MD_FEATURE_JOURNAL) {
+               if (le32_to_cpu(sb->feature_map) & MD_FEATURE_JOURNAL)
                        set_bit(MD_HAS_JOURNAL, &mddev->flags);
-                       if (mddev->recovery_cp == MaxSector)
-                               set_bit(MD_JOURNAL_CLEAN, &mddev->flags);
-               }
        } else if (mddev->pers == NULL) {
                /* Insist of good event counter while assembling, except for
                 * spares (which don't need an event count) */
@@ -5851,6 +5848,9 @@ static int get_array_info(struct mddev *mddev, void __user *arg)
                        working++;
                        if (test_bit(In_sync, &rdev->flags))
                                insync++;
+                       else if (test_bit(Journal, &rdev->flags))
+                               /* TODO: add journal count to md_u.h */
+                               ;
                        else
                                spare++;
                }
@@ -7862,6 +7862,7 @@ void md_do_sync(struct md_thread *thread)
         */
 
        do {
+               int mddev2_minor = -1;
                mddev->curr_resync = 2;
 
        try_again:
@@ -7891,10 +7892,14 @@ void md_do_sync(struct md_thread *thread)
                                prepare_to_wait(&resync_wait, &wq, TASK_INTERRUPTIBLE);
                                if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
                                    mddev2->curr_resync >= mddev->curr_resync) {
-                                       printk(KERN_INFO "md: delaying %s of %s"
-                                              " until %s has finished (they"
-                                              " share one or more physical units)\n",
-                                              desc, mdname(mddev), mdname(mddev2));
+                                       if (mddev2_minor != mddev2->md_minor) {
+                                               mddev2_minor = mddev2->md_minor;
+                                               printk(KERN_INFO "md: delaying %s of %s"
+                                                      " until %s has finished (they"
+                                                      " share one or more physical units)\n",
+                                                      desc, mdname(mddev),
+                                                      mdname(mddev2));
+                                       }
                                        mddev_put(mddev2);
                                        if (signal_pending(current))
                                                flush_signals(current);
@@ -8275,16 +8280,13 @@ no_add:
 static void md_start_sync(struct work_struct *ws)
 {
        struct mddev *mddev = container_of(ws, struct mddev, del_work);
-       int ret = 0;
 
        mddev->sync_thread = md_register_thread(md_do_sync,
                                                mddev,
                                                "resync");
        if (!mddev->sync_thread) {
-               if (!(mddev_is_clustered(mddev) && ret == -EAGAIN))
-                       printk(KERN_ERR "%s: could not start resync"
-                              " thread...\n",
-                              mdname(mddev));
+               printk(KERN_ERR "%s: could not start resync thread...\n",
+                      mdname(mddev));
                /* leave the spares where they are, it shouldn't hurt */
                clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
                clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
index 0e4efcd..be1a9fc 100644 (file)
@@ -1064,6 +1064,8 @@ static void __make_request(struct mddev *mddev, struct bio *bio)
        int max_sectors;
        int sectors;
 
+       md_write_start(mddev, bio);
+
        /*
         * Register the new request and wait if the reconstruction
         * thread has put up a bar for new requests.
@@ -1445,8 +1447,6 @@ static void raid10_make_request(struct mddev *mddev, struct bio *bio)
                return;
        }
 
-       md_write_start(mddev, bio);
-
        do {
 
                /*
@@ -2465,20 +2465,21 @@ static int narrow_write_error(struct r10bio *r10_bio, int i)
 
        while (sect_to_write) {
                struct bio *wbio;
+               sector_t wsector;
                if (sectors > sect_to_write)
                        sectors = sect_to_write;
                /* Write at 'sector' for 'sectors' */
                wbio = bio_clone_mddev(bio, GFP_NOIO, mddev);
                bio_trim(wbio, sector - bio->bi_iter.bi_sector, sectors);
-               wbio->bi_iter.bi_sector = (r10_bio->devs[i].addr+
-                                  choose_data_offset(r10_bio, rdev) +
-                                  (sector - r10_bio->sector));
+               wsector = r10_bio->devs[i].addr + (sector - r10_bio->sector);
+               wbio->bi_iter.bi_sector = wsector +
+                                  choose_data_offset(r10_bio, rdev);
                wbio->bi_bdev = rdev->bdev;
                bio_set_op_attrs(wbio, REQ_OP_WRITE, 0);
 
                if (submit_bio_wait(wbio) < 0)
                        /* Failure! */
-                       ok = rdev_set_badblocks(rdev, sector,
+                       ok = rdev_set_badblocks(rdev, wsector,
                                                sectors, 0)
                                && ok;
 
index 8912407..da583bb 100644 (file)
@@ -659,6 +659,7 @@ raid5_get_active_stripe(struct r5conf *conf, sector_t sector,
 {
        struct stripe_head *sh;
        int hash = stripe_hash_locks_hash(sector);
+       int inc_empty_inactive_list_flag;
 
        pr_debug("get_stripe, sector %llu\n", (unsigned long long)sector);
 
@@ -703,7 +704,12 @@ raid5_get_active_stripe(struct r5conf *conf, sector_t sector,
                                        atomic_inc(&conf->active_stripes);
                                BUG_ON(list_empty(&sh->lru) &&
                                       !test_bit(STRIPE_EXPANDING, &sh->state));
+                               inc_empty_inactive_list_flag = 0;
+                               if (!list_empty(conf->inactive_list + hash))
+                                       inc_empty_inactive_list_flag = 1;
                                list_del_init(&sh->lru);
+                               if (list_empty(conf->inactive_list + hash) && inc_empty_inactive_list_flag)
+                                       atomic_inc(&conf->empty_inactive_list_nr);
                                if (sh->group) {
                                        sh->group->stripes_cnt--;
                                        sh->group = NULL;
@@ -762,6 +768,7 @@ static void stripe_add_to_batch_list(struct r5conf *conf, struct stripe_head *sh
        sector_t head_sector, tmp_sec;
        int hash;
        int dd_idx;
+       int inc_empty_inactive_list_flag;
 
        /* Don't cross chunks, so stripe pd_idx/qd_idx is the same */
        tmp_sec = sh->sector;
@@ -779,7 +786,12 @@ static void stripe_add_to_batch_list(struct r5conf *conf, struct stripe_head *sh
                                atomic_inc(&conf->active_stripes);
                        BUG_ON(list_empty(&head->lru) &&
                               !test_bit(STRIPE_EXPANDING, &head->state));
+                       inc_empty_inactive_list_flag = 0;
+                       if (!list_empty(conf->inactive_list + hash))
+                               inc_empty_inactive_list_flag = 1;
                        list_del_init(&head->lru);
+                       if (list_empty(conf->inactive_list + hash) && inc_empty_inactive_list_flag)
+                               atomic_inc(&conf->empty_inactive_list_nr);
                        if (head->group) {
                                head->group->stripes_cnt--;
                                head->group = NULL;
@@ -993,7 +1005,6 @@ again:
 
                        set_bit(STRIPE_IO_STARTED, &sh->state);
 
-                       bio_reset(bi);
                        bi->bi_bdev = rdev->bdev;
                        bio_set_op_attrs(bi, op, op_flags);
                        bi->bi_end_io = op_is_write(op)
@@ -1045,7 +1056,6 @@ again:
 
                        set_bit(STRIPE_IO_STARTED, &sh->state);
 
-                       bio_reset(rbi);
                        rbi->bi_bdev = rrdev->bdev;
                        bio_set_op_attrs(rbi, op, op_flags);
                        BUG_ON(!op_is_write(op));
@@ -1978,9 +1988,11 @@ static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request)
        put_cpu();
 }
 
-static struct stripe_head *alloc_stripe(struct kmem_cache *sc, gfp_t gfp)
+static struct stripe_head *alloc_stripe(struct kmem_cache *sc, gfp_t gfp,
+       int disks)
 {
        struct stripe_head *sh;
+       int i;
 
        sh = kmem_cache_zalloc(sc, gfp);
        if (sh) {
@@ -1989,6 +2001,17 @@ static struct stripe_head *alloc_stripe(struct kmem_cache *sc, gfp_t gfp)
                INIT_LIST_HEAD(&sh->batch_list);
                INIT_LIST_HEAD(&sh->lru);
                atomic_set(&sh->count, 1);
+               for (i = 0; i < disks; i++) {
+                       struct r5dev *dev = &sh->dev[i];
+
+                       bio_init(&dev->req);
+                       dev->req.bi_io_vec = &dev->vec;
+                       dev->req.bi_max_vecs = 1;
+
+                       bio_init(&dev->rreq);
+                       dev->rreq.bi_io_vec = &dev->rvec;
+                       dev->rreq.bi_max_vecs = 1;
+               }
        }
        return sh;
 }
@@ -1996,7 +2019,7 @@ static int grow_one_stripe(struct r5conf *conf, gfp_t gfp)
 {
        struct stripe_head *sh;
 
-       sh = alloc_stripe(conf->slab_cache, gfp);
+       sh = alloc_stripe(conf->slab_cache, gfp, conf->pool_size);
        if (!sh)
                return 0;
 
@@ -2167,7 +2190,7 @@ static int resize_stripes(struct r5conf *conf, int newsize)
        mutex_lock(&conf->cache_size_mutex);
 
        for (i = conf->max_nr_stripes; i; i--) {
-               nsh = alloc_stripe(sc, GFP_KERNEL);
+               nsh = alloc_stripe(sc, GFP_KERNEL, newsize);
                if (!nsh)
                        break;
 
@@ -2299,6 +2322,7 @@ static void raid5_end_read_request(struct bio * bi)
                (unsigned long long)sh->sector, i, atomic_read(&sh->count),
                bi->bi_error);
        if (i == disks) {
+               bio_reset(bi);
                BUG();
                return;
        }
@@ -2402,6 +2426,7 @@ static void raid5_end_read_request(struct bio * bi)
        clear_bit(R5_LOCKED, &sh->dev[i].flags);
        set_bit(STRIPE_HANDLE, &sh->state);
        raid5_release_stripe(sh);
+       bio_reset(bi);
 }
 
 static void raid5_end_write_request(struct bio *bi)
@@ -2436,6 +2461,7 @@ static void raid5_end_write_request(struct bio *bi)
                (unsigned long long)sh->sector, i, atomic_read(&sh->count),
                bi->bi_error);
        if (i == disks) {
+               bio_reset(bi);
                BUG();
                return;
        }
@@ -2479,22 +2505,13 @@ static void raid5_end_write_request(struct bio *bi)
 
        if (sh->batch_head && sh != sh->batch_head)
                raid5_release_stripe(sh->batch_head);
+       bio_reset(bi);
 }
 
 static void raid5_build_block(struct stripe_head *sh, int i, int previous)
 {
        struct r5dev *dev = &sh->dev[i];
 
-       bio_init(&dev->req);
-       dev->req.bi_io_vec = &dev->vec;
-       dev->req.bi_max_vecs = 1;
-       dev->req.bi_private = sh;
-
-       bio_init(&dev->rreq);
-       dev->rreq.bi_io_vec = &dev->rvec;
-       dev->rreq.bi_max_vecs = 1;
-       dev->rreq.bi_private = sh;
-
        dev->flags = 0;
        dev->sector = raid5_compute_blocknr(sh, i, previous);
 }
@@ -4628,7 +4645,9 @@ finish:
        }
 
        if (!bio_list_empty(&s.return_bi)) {
-               if (test_bit(MD_CHANGE_PENDING, &conf->mddev->flags)) {
+               if (test_bit(MD_CHANGE_PENDING, &conf->mddev->flags) &&
+                               (s.failed <= conf->max_degraded ||
+                                       conf->mddev->external == 0)) {
                        spin_lock_irq(&conf->device_lock);
                        bio_list_merge(&conf->return_bi, &s.return_bi);
                        spin_unlock_irq(&conf->device_lock);
@@ -6826,11 +6845,14 @@ static int raid5_run(struct mddev *mddev)
        if (IS_ERR(conf))
                return PTR_ERR(conf);
 
-       if (test_bit(MD_HAS_JOURNAL, &mddev->flags) && !journal_dev) {
-               printk(KERN_ERR "md/raid:%s: journal disk is missing, force array readonly\n",
-                      mdname(mddev));
-               mddev->ro = 1;
-               set_disk_ro(mddev->gendisk, 1);
+       if (test_bit(MD_HAS_JOURNAL, &mddev->flags)) {
+               if (!journal_dev) {
+                       pr_err("md/raid:%s: journal disk is missing, force array readonly\n",
+                              mdname(mddev));
+                       mddev->ro = 1;
+                       set_disk_ro(mddev->gendisk, 1);
+               } else if (mddev->recovery_cp == MaxSector)
+                       set_bit(MD_JOURNAL_CLEAN, &mddev->flags);
        }
 
        conf->min_offset_diff = min_offset_diff;