Merge tag 'md/4.6-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/shli/md
authorLinus Torvalds <torvalds@linux-foundation.org>
Mon, 21 Mar 2016 21:18:10 +0000 (14:18 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Mon, 21 Mar 2016 21:18:10 +0000 (14:18 -0700)
Pull MD updates from Shaohua Li:
 "This update mainly fixes bugs.

   - a raid5 discard related fix from Jes
   - a MD multipath bio clone fix from Ming
   - raid1 error handling deadlock fix from Nate and corresponding
     raid10 fix from myself
   - a raid5 stripe batch fix from Neil
   - a patch from Sebastian to avoid unnecessary uevent
   - several cleanup/debug patches"

* tag 'md/4.6-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/shli/md:
  md/raid5: Cleanup cpu hotplug notifier
  raid10: include bio_end_io_list in nr_queued to prevent freeze_array hang
  raid1: include bio_end_io_list in nr_queued to prevent freeze_array hang
  md: fix typos for stipe
  md/bitmap: remove redundant return in bitmap_checkpage
  md/raid1: remove unnecessary BUG_ON
  md: multipath: don't hardcopy bio in .make_request path
  md/raid5: output stripe state for debug
  md/raid5: preserve STRIPE_PREREAD_ACTIVE in break_stripe_batch_list
  Update MD git tree URL
  md/bitmap: remove redundant check
  MD: warn for potential deadlock
  md: Drop sending a change uevent when stopping
  RAID5: revert e9e4c377e2f563 to fix a livelock
  RAID5: check_reshape() shouldn't call mddev_suspend
  md/raid5: Compare apples to apples (or sectors to sectors)

MAINTAINERS
drivers/md/bitmap.c
drivers/md/bitmap.h
drivers/md/md.c
drivers/md/multipath.c
drivers/md/raid1.c
drivers/md/raid10.c
drivers/md/raid5.c
drivers/md/raid5.h

index 29d9779..ce02830 100644 (file)
@@ -10291,7 +10291,7 @@ F:      drivers/media/pci/solo6x10/
 SOFTWARE RAID (Multiple Disks) SUPPORT
 M:     Shaohua Li <shli@kernel.org>
 L:     linux-raid@vger.kernel.org
-T:     git git://neil.brown.name/md
+T:     git git://git.kernel.org/pub/scm/linux/kernel/git/shli/md.git
 S:     Supported
 F:     drivers/md/
 F:     include/linux/raid/
index d80cce4..7df6b4f 100644 (file)
@@ -98,7 +98,6 @@ __acquires(bitmap->lock)
                   bitmap->bp[page].hijacked) {
                /* somebody beat us to getting the page */
                kfree(mappage);
-               return 0;
        } else {
 
                /* no page was in place and we have one, so install it */
@@ -510,8 +509,7 @@ static int bitmap_new_disk_sb(struct bitmap *bitmap)
        sb->chunksize = cpu_to_le32(chunksize);
 
        daemon_sleep = bitmap->mddev->bitmap_info.daemon_sleep;
-       if (!daemon_sleep ||
-           (daemon_sleep < 1) || (daemon_sleep > MAX_SCHEDULE_TIMEOUT)) {
+       if (!daemon_sleep || (daemon_sleep > MAX_SCHEDULE_TIMEOUT)) {
                printk(KERN_INFO "Choosing daemon_sleep default (5 sec)\n");
                daemon_sleep = 5 * HZ;
        }
index 7d5c3a6..5e3fcd6 100644 (file)
@@ -49,8 +49,8 @@
  * When we set a bit, or in the counter (to start a write), if the fields is
  * 0, we first set the disk bit and set the counter to 1.
  *
- * If the counter is 0, the on-disk bit is clear and the stipe is clean
- * Anything that dirties the stipe pushes the counter to 2 (at least)
+ * If the counter is 0, the on-disk bit is clear and the stripe is clean
+ * Anything that dirties the stripe pushes the counter to 2 (at least)
  * and sets the on-disk bit (lazily).
  * If a periodic sweep find the counter at 2, it is decremented to 1.
  * If the sweep find the counter at 1, the on-disk bit is cleared and the
index e55e6cf..c068f17 100644 (file)
@@ -305,6 +305,7 @@ static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio)
  */
 void mddev_suspend(struct mddev *mddev)
 {
+       WARN_ON_ONCE(current == mddev->thread->tsk);
        if (mddev->suspended++)
                return;
        synchronize_rcu();
@@ -5671,7 +5672,6 @@ static int do_md_stop(struct mddev *mddev, int mode,
                export_array(mddev);
 
                md_clean(mddev);
-               kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE);
                if (mddev->hold_active == UNTIL_STOP)
                        mddev->hold_active = 0;
        }
index 0a72ab6..dd483bb 100644 (file)
@@ -129,7 +129,9 @@ static void multipath_make_request(struct mddev *mddev, struct bio * bio)
        }
        multipath = conf->multipaths + mp_bh->path;
 
-       mp_bh->bio = *bio;
+       bio_init(&mp_bh->bio);
+       __bio_clone_fast(&mp_bh->bio, bio);
+
        mp_bh->bio.bi_iter.bi_sector += multipath->rdev->data_offset;
        mp_bh->bio.bi_bdev = multipath->rdev->bdev;
        mp_bh->bio.bi_rw |= REQ_FAILFAST_TRANSPORT;
index 4e3843f..39fb21e 100644 (file)
@@ -2274,6 +2274,7 @@ static void handle_write_finished(struct r1conf *conf, struct r1bio *r1_bio)
        if (fail) {
                spin_lock_irq(&conf->device_lock);
                list_add(&r1_bio->retry_list, &conf->bio_end_io_list);
+               conf->nr_queued++;
                spin_unlock_irq(&conf->device_lock);
                md_wakeup_thread(conf->mddev->thread);
        } else {
@@ -2391,8 +2392,10 @@ static void raid1d(struct md_thread *thread)
                LIST_HEAD(tmp);
                spin_lock_irqsave(&conf->device_lock, flags);
                if (!test_bit(MD_CHANGE_PENDING, &mddev->flags)) {
-                       list_add(&tmp, &conf->bio_end_io_list);
-                       list_del_init(&conf->bio_end_io_list);
+                       while (!list_empty(&conf->bio_end_io_list)) {
+                               list_move(conf->bio_end_io_list.prev, &tmp);
+                               conf->nr_queued--;
+                       }
                }
                spin_unlock_irqrestore(&conf->device_lock, flags);
                while (!list_empty(&tmp)) {
@@ -2695,7 +2698,6 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr,
                            !conf->fullsync &&
                            !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
                                break;
-                       BUG_ON(sync_blocks < (PAGE_SIZE>>9));
                        if ((len >> 9) > sync_blocks)
                                len = sync_blocks<<9;
                }
index 1c1447d..e3fd725 100644 (file)
@@ -2664,6 +2664,7 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
                if (fail) {
                        spin_lock_irq(&conf->device_lock);
                        list_add(&r10_bio->retry_list, &conf->bio_end_io_list);
+                       conf->nr_queued++;
                        spin_unlock_irq(&conf->device_lock);
                        md_wakeup_thread(conf->mddev->thread);
                } else {
@@ -2691,8 +2692,10 @@ static void raid10d(struct md_thread *thread)
                LIST_HEAD(tmp);
                spin_lock_irqsave(&conf->device_lock, flags);
                if (!test_bit(MD_CHANGE_PENDING, &mddev->flags)) {
-                       list_add(&tmp, &conf->bio_end_io_list);
-                       list_del_init(&conf->bio_end_io_list);
+                       while (!list_empty(&conf->bio_end_io_list)) {
+                               list_move(conf->bio_end_io_list.prev, &tmp);
+                               conf->nr_queued--;
+                       }
                }
                spin_unlock_irqrestore(&conf->device_lock, flags);
                while (!list_empty(&tmp)) {
index b4f02c9..8ab8b65 100644 (file)
@@ -340,8 +340,7 @@ static void release_inactive_stripe_list(struct r5conf *conf,
                                         int hash)
 {
        int size;
-       unsigned long do_wakeup = 0;
-       int i = 0;
+       bool do_wakeup = false;
        unsigned long flags;
 
        if (hash == NR_STRIPE_HASH_LOCKS) {
@@ -362,19 +361,15 @@ static void release_inactive_stripe_list(struct r5conf *conf,
                            !list_empty(list))
                                atomic_dec(&conf->empty_inactive_list_nr);
                        list_splice_tail_init(list, conf->inactive_list + hash);
-                       do_wakeup |= 1 << hash;
+                       do_wakeup = true;
                        spin_unlock_irqrestore(conf->hash_locks + hash, flags);
                }
                size--;
                hash--;
        }
 
-       for (i = 0; i < NR_STRIPE_HASH_LOCKS; i++) {
-               if (do_wakeup & (1 << i))
-                       wake_up(&conf->wait_for_stripe[i]);
-       }
-
        if (do_wakeup) {
+               wake_up(&conf->wait_for_stripe);
                if (atomic_read(&conf->active_stripes) == 0)
                        wake_up(&conf->wait_for_quiescent);
                if (conf->retry_read_aligned)
@@ -687,15 +682,14 @@ raid5_get_active_stripe(struct r5conf *conf, sector_t sector,
                        if (!sh) {
                                set_bit(R5_INACTIVE_BLOCKED,
                                        &conf->cache_state);
-                               wait_event_exclusive_cmd(
-                                       conf->wait_for_stripe[hash],
+                               wait_event_lock_irq(
+                                       conf->wait_for_stripe,
                                        !list_empty(conf->inactive_list + hash) &&
                                        (atomic_read(&conf->active_stripes)
                                         < (conf->max_nr_stripes * 3 / 4)
                                         || !test_bit(R5_INACTIVE_BLOCKED,
                                                      &conf->cache_state)),
-                                       spin_unlock_irq(conf->hash_locks + hash),
-                                       spin_lock_irq(conf->hash_locks + hash));
+                                       *(conf->hash_locks + hash));
                                clear_bit(R5_INACTIVE_BLOCKED,
                                          &conf->cache_state);
                        } else {
@@ -720,9 +714,6 @@ raid5_get_active_stripe(struct r5conf *conf, sector_t sector,
                }
        } while (sh == NULL);
 
-       if (!list_empty(conf->inactive_list + hash))
-               wake_up(&conf->wait_for_stripe[hash]);
-
        spin_unlock_irq(conf->hash_locks + hash);
        return sh;
 }
@@ -2089,6 +2080,14 @@ static int resize_chunks(struct r5conf *conf, int new_disks, int new_sectors)
        unsigned long cpu;
        int err = 0;
 
+       /*
+        * Never shrink. And mddev_suspend() could deadlock if this is called
+        * from raid5d. In that case, scribble_disks and scribble_sectors
+        * should equal to new_disks and new_sectors
+        */
+       if (conf->scribble_disks >= new_disks &&
+           conf->scribble_sectors >= new_sectors)
+               return 0;
        mddev_suspend(conf->mddev);
        get_online_cpus();
        for_each_present_cpu(cpu) {
@@ -2110,6 +2109,10 @@ static int resize_chunks(struct r5conf *conf, int new_disks, int new_sectors)
        }
        put_online_cpus();
        mddev_resume(conf->mddev);
+       if (!err) {
+               conf->scribble_disks = new_disks;
+               conf->scribble_sectors = new_sectors;
+       }
        return err;
 }
 
@@ -2190,7 +2193,7 @@ static int resize_stripes(struct r5conf *conf, int newsize)
        cnt = 0;
        list_for_each_entry(nsh, &newstripes, lru) {
                lock_device_hash_lock(conf, hash);
-               wait_event_exclusive_cmd(conf->wait_for_stripe[hash],
+               wait_event_cmd(conf->wait_for_stripe,
                                    !list_empty(conf->inactive_list + hash),
                                    unlock_device_hash_lock(conf, hash),
                                    lock_device_hash_lock(conf, hash));
@@ -4233,10 +4236,9 @@ static void break_stripe_batch_list(struct stripe_head *head_sh,
 
                list_del_init(&sh->batch_list);
 
-               WARN_ON_ONCE(sh->state & ((1 << STRIPE_ACTIVE) |
+               WARN_ONCE(sh->state & ((1 << STRIPE_ACTIVE) |
                                          (1 << STRIPE_SYNCING) |
                                          (1 << STRIPE_REPLACED) |
-                                         (1 << STRIPE_PREREAD_ACTIVE) |
                                          (1 << STRIPE_DELAYED) |
                                          (1 << STRIPE_BIT_DELAY) |
                                          (1 << STRIPE_FULL_WRITE) |
@@ -4246,11 +4248,14 @@ static void break_stripe_batch_list(struct stripe_head *head_sh,
                                          (1 << STRIPE_DISCARD) |
                                          (1 << STRIPE_BATCH_READY) |
                                          (1 << STRIPE_BATCH_ERR) |
-                                         (1 << STRIPE_BITMAP_PENDING)));
-               WARN_ON_ONCE(head_sh->state & ((1 << STRIPE_DISCARD) |
-                                             (1 << STRIPE_REPLACED)));
+                                         (1 << STRIPE_BITMAP_PENDING)),
+                       "stripe state: %lx\n", sh->state);
+               WARN_ONCE(head_sh->state & ((1 << STRIPE_DISCARD) |
+                                             (1 << STRIPE_REPLACED)),
+                       "head stripe state: %lx\n", head_sh->state);
 
                set_mask_bits(&sh->state, ~(STRIPE_EXPAND_SYNC_FLAGS |
+                                           (1 << STRIPE_PREREAD_ACTIVE) |
                                            (1 << STRIPE_DEGRADED)),
                              head_sh->state & (1 << STRIPE_INSYNC));
 
@@ -6376,6 +6381,8 @@ static int raid456_cpu_notify(struct notifier_block *nfb, unsigned long action,
                break;
        case CPU_DEAD:
        case CPU_DEAD_FROZEN:
+       case CPU_UP_CANCELED:
+       case CPU_UP_CANCELED_FROZEN:
                free_scratch_buffer(conf, per_cpu_ptr(conf->percpu, cpu));
                break;
        default:
@@ -6413,6 +6420,12 @@ static int raid5_alloc_percpu(struct r5conf *conf)
        }
        put_online_cpus();
 
+       if (!err) {
+               conf->scribble_disks = max(conf->raid_disks,
+                       conf->previous_raid_disks);
+               conf->scribble_sectors = max(conf->chunk_sectors,
+                       conf->prev_chunk_sectors);
+       }
        return err;
 }
 
@@ -6503,9 +6516,7 @@ static struct r5conf *setup_conf(struct mddev *mddev)
        seqcount_init(&conf->gen_lock);
        mutex_init(&conf->cache_size_mutex);
        init_waitqueue_head(&conf->wait_for_quiescent);
-       for (i = 0; i < NR_STRIPE_HASH_LOCKS; i++) {
-               init_waitqueue_head(&conf->wait_for_stripe[i]);
-       }
+       init_waitqueue_head(&conf->wait_for_stripe);
        init_waitqueue_head(&conf->wait_for_overlap);
        INIT_LIST_HEAD(&conf->handle_list);
        INIT_LIST_HEAD(&conf->hold_list);
@@ -7014,8 +7025,8 @@ static int raid5_run(struct mddev *mddev)
                }
 
                if (discard_supported &&
-                  mddev->queue->limits.max_discard_sectors >= stripe &&
-                  mddev->queue->limits.discard_granularity >= stripe)
+                   mddev->queue->limits.max_discard_sectors >= (stripe >> 9) &&
+                   mddev->queue->limits.discard_granularity >= stripe)
                        queue_flag_set_unlocked(QUEUE_FLAG_DISCARD,
                                                mddev->queue);
                else
index a415e1c..517d4b6 100644 (file)
@@ -510,6 +510,8 @@ struct r5conf {
                                              * conversions
                                              */
        } __percpu *percpu;
+       int scribble_disks;
+       int scribble_sectors;
 #ifdef CONFIG_HOTPLUG_CPU
        struct notifier_block   cpu_notify;
 #endif
@@ -522,7 +524,7 @@ struct r5conf {
        atomic_t                empty_inactive_list_nr;
        struct llist_head       released_stripes;
        wait_queue_head_t       wait_for_quiescent;
-       wait_queue_head_t       wait_for_stripe[NR_STRIPE_HASH_LOCKS];
+       wait_queue_head_t       wait_for_stripe;
        wait_queue_head_t       wait_for_overlap;
        unsigned long           cache_state;
 #define R5_INACTIVE_BLOCKED    1       /* release of inactive stripes blocked,