Merge tag 'md/4.8-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/shli/md
[cascardo/linux.git] / drivers / md / raid5.c
index d189e89..da583bb 100644 (file)
@@ -659,6 +659,7 @@ raid5_get_active_stripe(struct r5conf *conf, sector_t sector,
 {
        struct stripe_head *sh;
        int hash = stripe_hash_locks_hash(sector);
+       int inc_empty_inactive_list_flag;
 
        pr_debug("get_stripe, sector %llu\n", (unsigned long long)sector);
 
@@ -703,7 +704,12 @@ raid5_get_active_stripe(struct r5conf *conf, sector_t sector,
                                        atomic_inc(&conf->active_stripes);
                                BUG_ON(list_empty(&sh->lru) &&
                                       !test_bit(STRIPE_EXPANDING, &sh->state));
+                               inc_empty_inactive_list_flag = 0;
+                               if (!list_empty(conf->inactive_list + hash))
+                                       inc_empty_inactive_list_flag = 1;
                                list_del_init(&sh->lru);
+                               if (list_empty(conf->inactive_list + hash) && inc_empty_inactive_list_flag)
+                                       atomic_inc(&conf->empty_inactive_list_nr);
                                if (sh->group) {
                                        sh->group->stripes_cnt--;
                                        sh->group = NULL;
@@ -762,6 +768,7 @@ static void stripe_add_to_batch_list(struct r5conf *conf, struct stripe_head *sh
        sector_t head_sector, tmp_sec;
        int hash;
        int dd_idx;
+       int inc_empty_inactive_list_flag;
 
        /* Don't cross chunks, so stripe pd_idx/qd_idx is the same */
        tmp_sec = sh->sector;
@@ -779,7 +786,12 @@ static void stripe_add_to_batch_list(struct r5conf *conf, struct stripe_head *sh
                                atomic_inc(&conf->active_stripes);
                        BUG_ON(list_empty(&head->lru) &&
                               !test_bit(STRIPE_EXPANDING, &head->state));
+                       inc_empty_inactive_list_flag = 0;
+                       if (!list_empty(conf->inactive_list + hash))
+                               inc_empty_inactive_list_flag = 1;
                        list_del_init(&head->lru);
+                       if (list_empty(conf->inactive_list + hash) && inc_empty_inactive_list_flag)
+                               atomic_inc(&conf->empty_inactive_list_nr);
                        if (head->group) {
                                head->group->stripes_cnt--;
                                head->group = NULL;
@@ -806,7 +818,7 @@ static void stripe_add_to_batch_list(struct r5conf *conf, struct stripe_head *sh
        dd_idx = 0;
        while (dd_idx == sh->pd_idx || dd_idx == sh->qd_idx)
                dd_idx++;
-       if (head->dev[dd_idx].towrite->bi_rw != sh->dev[dd_idx].towrite->bi_rw ||
+       if (head->dev[dd_idx].towrite->bi_opf != sh->dev[dd_idx].towrite->bi_opf ||
            bio_op(head->dev[dd_idx].towrite) != bio_op(sh->dev[dd_idx].towrite))
                goto unlock_out;
 
@@ -993,7 +1005,6 @@ again:
 
                        set_bit(STRIPE_IO_STARTED, &sh->state);
 
-                       bio_reset(bi);
                        bi->bi_bdev = rdev->bdev;
                        bio_set_op_attrs(bi, op, op_flags);
                        bi->bi_end_io = op_is_write(op)
@@ -1003,7 +1014,7 @@ again:
 
                        pr_debug("%s: for %llu schedule op %d on disc %d\n",
                                __func__, (unsigned long long)sh->sector,
-                               bi->bi_rw, i);
+                               bi->bi_opf, i);
                        atomic_inc(&sh->count);
                        if (sh != head_sh)
                                atomic_inc(&head_sh->count);
@@ -1014,7 +1025,7 @@ again:
                                bi->bi_iter.bi_sector = (sh->sector
                                                 + rdev->data_offset);
                        if (test_bit(R5_ReadNoMerge, &head_sh->dev[i].flags))
-                               bi->bi_rw |= REQ_NOMERGE;
+                               bi->bi_opf |= REQ_NOMERGE;
 
                        if (test_bit(R5_SkipCopy, &sh->dev[i].flags))
                                WARN_ON(test_bit(R5_UPTODATE, &sh->dev[i].flags));
@@ -1045,7 +1056,6 @@ again:
 
                        set_bit(STRIPE_IO_STARTED, &sh->state);
 
-                       bio_reset(rbi);
                        rbi->bi_bdev = rrdev->bdev;
                        bio_set_op_attrs(rbi, op, op_flags);
                        BUG_ON(!op_is_write(op));
@@ -1055,7 +1065,7 @@ again:
                        pr_debug("%s: for %llu schedule op %d on "
                                 "replacement disc %d\n",
                                __func__, (unsigned long long)sh->sector,
-                               rbi->bi_rw, i);
+                               rbi->bi_opf, i);
                        atomic_inc(&sh->count);
                        if (sh != head_sh)
                                atomic_inc(&head_sh->count);
@@ -1088,7 +1098,7 @@ again:
                        if (op_is_write(op))
                                set_bit(STRIPE_DEGRADED, &sh->state);
                        pr_debug("skip op %d on disc %d for sector %llu\n",
-                               bi->bi_rw, i, (unsigned long long)sh->sector);
+                               bi->bi_opf, i, (unsigned long long)sh->sector);
                        clear_bit(R5_LOCKED, &sh->dev[i].flags);
                        set_bit(STRIPE_HANDLE, &sh->state);
                }
@@ -1619,9 +1629,9 @@ again:
 
                        while (wbi && wbi->bi_iter.bi_sector <
                                dev->sector + STRIPE_SECTORS) {
-                               if (wbi->bi_rw & REQ_FUA)
+                               if (wbi->bi_opf & REQ_FUA)
                                        set_bit(R5_WantFUA, &dev->flags);
-                               if (wbi->bi_rw & REQ_SYNC)
+                               if (wbi->bi_opf & REQ_SYNC)
                                        set_bit(R5_SyncIO, &dev->flags);
                                if (bio_op(wbi) == REQ_OP_DISCARD)
                                        set_bit(R5_Discard, &dev->flags);
@@ -1978,9 +1988,11 @@ static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request)
        put_cpu();
 }
 
-static struct stripe_head *alloc_stripe(struct kmem_cache *sc, gfp_t gfp)
+static struct stripe_head *alloc_stripe(struct kmem_cache *sc, gfp_t gfp,
+       int disks)
 {
        struct stripe_head *sh;
+       int i;
 
        sh = kmem_cache_zalloc(sc, gfp);
        if (sh) {
@@ -1989,6 +2001,17 @@ static struct stripe_head *alloc_stripe(struct kmem_cache *sc, gfp_t gfp)
                INIT_LIST_HEAD(&sh->batch_list);
                INIT_LIST_HEAD(&sh->lru);
                atomic_set(&sh->count, 1);
+               for (i = 0; i < disks; i++) {
+                       struct r5dev *dev = &sh->dev[i];
+
+                       bio_init(&dev->req);
+                       dev->req.bi_io_vec = &dev->vec;
+                       dev->req.bi_max_vecs = 1;
+
+                       bio_init(&dev->rreq);
+                       dev->rreq.bi_io_vec = &dev->rvec;
+                       dev->rreq.bi_max_vecs = 1;
+               }
        }
        return sh;
 }
@@ -1996,7 +2019,7 @@ static int grow_one_stripe(struct r5conf *conf, gfp_t gfp)
 {
        struct stripe_head *sh;
 
-       sh = alloc_stripe(conf->slab_cache, gfp);
+       sh = alloc_stripe(conf->slab_cache, gfp, conf->pool_size);
        if (!sh)
                return 0;
 
@@ -2167,7 +2190,7 @@ static int resize_stripes(struct r5conf *conf, int newsize)
        mutex_lock(&conf->cache_size_mutex);
 
        for (i = conf->max_nr_stripes; i; i--) {
-               nsh = alloc_stripe(sc, GFP_KERNEL);
+               nsh = alloc_stripe(sc, GFP_KERNEL, newsize);
                if (!nsh)
                        break;
 
@@ -2299,6 +2322,7 @@ static void raid5_end_read_request(struct bio * bi)
                (unsigned long long)sh->sector, i, atomic_read(&sh->count),
                bi->bi_error);
        if (i == disks) {
+               bio_reset(bi);
                BUG();
                return;
        }
@@ -2402,6 +2426,7 @@ static void raid5_end_read_request(struct bio * bi)
        clear_bit(R5_LOCKED, &sh->dev[i].flags);
        set_bit(STRIPE_HANDLE, &sh->state);
        raid5_release_stripe(sh);
+       bio_reset(bi);
 }
 
 static void raid5_end_write_request(struct bio *bi)
@@ -2436,6 +2461,7 @@ static void raid5_end_write_request(struct bio *bi)
                (unsigned long long)sh->sector, i, atomic_read(&sh->count),
                bi->bi_error);
        if (i == disks) {
+               bio_reset(bi);
                BUG();
                return;
        }
@@ -2479,22 +2505,13 @@ static void raid5_end_write_request(struct bio *bi)
 
        if (sh->batch_head && sh != sh->batch_head)
                raid5_release_stripe(sh->batch_head);
+       bio_reset(bi);
 }
 
 static void raid5_build_block(struct stripe_head *sh, int i, int previous)
 {
        struct r5dev *dev = &sh->dev[i];
 
-       bio_init(&dev->req);
-       dev->req.bi_io_vec = &dev->vec;
-       dev->req.bi_max_vecs = 1;
-       dev->req.bi_private = sh;
-
-       bio_init(&dev->rreq);
-       dev->rreq.bi_io_vec = &dev->rvec;
-       dev->rreq.bi_max_vecs = 1;
-       dev->rreq.bi_private = sh;
-
        dev->flags = 0;
        dev->sector = raid5_compute_blocknr(sh, i, previous);
 }
@@ -4628,7 +4645,9 @@ finish:
        }
 
        if (!bio_list_empty(&s.return_bi)) {
-               if (test_bit(MD_CHANGE_PENDING, &conf->mddev->flags)) {
+               if (test_bit(MD_CHANGE_PENDING, &conf->mddev->flags) &&
+                               (s.failed <= conf->max_degraded ||
+                                       conf->mddev->external == 0)) {
                        spin_lock_irq(&conf->device_lock);
                        bio_list_merge(&conf->return_bi, &s.return_bi);
                        spin_unlock_irq(&conf->device_lock);
@@ -5154,7 +5173,7 @@ static void raid5_make_request(struct mddev *mddev, struct bio * bi)
        DEFINE_WAIT(w);
        bool do_prepare;
 
-       if (unlikely(bi->bi_rw & REQ_PREFLUSH)) {
+       if (unlikely(bi->bi_opf & REQ_PREFLUSH)) {
                int ret = r5l_handle_flush_request(conf->log, bi);
 
                if (ret == 0)
@@ -5237,7 +5256,7 @@ static void raid5_make_request(struct mddev *mddev, struct bio * bi)
                        (unsigned long long)logical_sector);
 
                sh = raid5_get_active_stripe(conf, new_sector, previous,
-                                      (bi->bi_rw & REQ_RAHEAD), 0);
+                                      (bi->bi_opf & REQ_RAHEAD), 0);
                if (sh) {
                        if (unlikely(previous)) {
                                /* expansion might have moved on while waiting for a
@@ -5305,7 +5324,7 @@ static void raid5_make_request(struct mddev *mddev, struct bio * bi)
                        set_bit(STRIPE_HANDLE, &sh->state);
                        clear_bit(STRIPE_DELAYED, &sh->state);
                        if ((!sh->batch_head || sh == sh->batch_head) &&
-                           (bi->bi_rw & REQ_SYNC) &&
+                           (bi->bi_opf & REQ_SYNC) &&
                            !test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
                                atomic_inc(&conf->preread_active_stripes);
                        release_stripe_plug(mddev, sh);
@@ -6826,11 +6845,14 @@ static int raid5_run(struct mddev *mddev)
        if (IS_ERR(conf))
                return PTR_ERR(conf);
 
-       if (test_bit(MD_HAS_JOURNAL, &mddev->flags) && !journal_dev) {
-               printk(KERN_ERR "md/raid:%s: journal disk is missing, force array readonly\n",
-                      mdname(mddev));
-               mddev->ro = 1;
-               set_disk_ro(mddev->gendisk, 1);
+       if (test_bit(MD_HAS_JOURNAL, &mddev->flags)) {
+               if (!journal_dev) {
+                       pr_err("md/raid:%s: journal disk is missing, force array readonly\n",
+                              mdname(mddev));
+                       mddev->ro = 1;
+                       set_disk_ro(mddev->gendisk, 1);
+               } else if (mddev->recovery_cp == MaxSector)
+                       set_bit(MD_JOURNAL_CLEAN, &mddev->flags);
        }
 
        conf->min_offset_diff = min_offset_diff;