#include <linux/random.h>
#include <linux/reboot.h>
#include <linux/file.h>
+#include <linux/compat.h>
#include <linux/delay.h>
#include <linux/raid/md_p.h>
#include <linux/raid/md_u.h>
#define MD_BUG(x...) { printk("md: bug in file %s, line %d\n", __FILE__, __LINE__); md_print_devices(); }
+/*
+ * Default number of read corrections we'll attempt on an rdev
+ * before ejecting it from the array. We divide the read error
+ * count by 2 for every hour elapsed between read errors.
+ */
+#define MD_DEFAULT_MAX_CORRECTED_READ_ERRORS 20
/*
* Current RAID-1,4,5 parallel reconstruction 'guaranteed speed limit'
* is 1000 KB/sec, so the extra system load does not show up that much.
return 0;
}
rcu_read_lock();
- if (mddev->suspended) {
+ if (mddev->suspended || mddev->barrier) {
DEFINE_WAIT(__wait);
for (;;) {
prepare_to_wait(&mddev->sb_wait, &__wait,
TASK_UNINTERRUPTIBLE);
- if (!mddev->suspended)
+ if (!mddev->suspended && !mddev->barrier)
break;
rcu_read_unlock();
schedule();
int mddev_congested(mddev_t *mddev, int bits)
{
+ if (mddev->barrier)
+ return 1;
return mddev->suspended;
}
EXPORT_SYMBOL(mddev_congested);
+/*
+ * Generic barrier handling for md
+ */
+
+#define POST_REQUEST_BARRIER ((void*)1)
+
+static void md_end_barrier(struct bio *bio, int err)
+{
+ mdk_rdev_t *rdev = bio->bi_private;
+ mddev_t *mddev = rdev->mddev;
+ if (err == -EOPNOTSUPP && mddev->barrier != POST_REQUEST_BARRIER)
+ set_bit(BIO_EOPNOTSUPP, &mddev->barrier->bi_flags);
+
+ rdev_dec_pending(rdev, mddev);
+
+ if (atomic_dec_and_test(&mddev->flush_pending)) {
+ if (mddev->barrier == POST_REQUEST_BARRIER) {
+ /* This was a post-request barrier */
+ mddev->barrier = NULL;
+ wake_up(&mddev->sb_wait);
+ } else
+ /* The pre-request barrier has finished */
+ schedule_work(&mddev->barrier_work);
+ }
+ bio_put(bio);
+}
+
+static void submit_barriers(mddev_t *mddev)
+{
+ mdk_rdev_t *rdev;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(rdev, &mddev->disks, same_set)
+ if (rdev->raid_disk >= 0 &&
+ !test_bit(Faulty, &rdev->flags)) {
+ /* Take two references, one is dropped
+ * when request finishes, one after
+ * we reclaim rcu_read_lock
+ */
+ struct bio *bi;
+ atomic_inc(&rdev->nr_pending);
+ atomic_inc(&rdev->nr_pending);
+ rcu_read_unlock();
+ bi = bio_alloc(GFP_KERNEL, 0);
+ bi->bi_end_io = md_end_barrier;
+ bi->bi_private = rdev;
+ bi->bi_bdev = rdev->bdev;
+ atomic_inc(&mddev->flush_pending);
+ submit_bio(WRITE_BARRIER, bi);
+ rcu_read_lock();
+ rdev_dec_pending(rdev, mddev);
+ }
+ rcu_read_unlock();
+}
+
+static void md_submit_barrier(struct work_struct *ws)
+{
+ mddev_t *mddev = container_of(ws, mddev_t, barrier_work);
+ struct bio *bio = mddev->barrier;
+
+ atomic_set(&mddev->flush_pending, 1);
+
+ if (test_bit(BIO_EOPNOTSUPP, &bio->bi_flags))
+ bio_endio(bio, -EOPNOTSUPP);
+ else if (bio->bi_size == 0)
+ /* an empty barrier - all done */
+ bio_endio(bio, 0);
+ else {
+ bio->bi_rw &= ~(1<<BIO_RW_BARRIER);
+ if (mddev->pers->make_request(mddev->queue, bio))
+ generic_make_request(bio);
+ mddev->barrier = POST_REQUEST_BARRIER;
+ submit_barriers(mddev);
+ }
+ if (atomic_dec_and_test(&mddev->flush_pending)) {
+ mddev->barrier = NULL;
+ wake_up(&mddev->sb_wait);
+ }
+}
+
+void md_barrier_request(mddev_t *mddev, struct bio *bio)
+{
+ spin_lock_irq(&mddev->write_lock);
+ wait_event_lock_irq(mddev->sb_wait,
+ !mddev->barrier,
+ mddev->write_lock, /*nothing*/);
+ mddev->barrier = bio;
+ spin_unlock_irq(&mddev->write_lock);
+
+ atomic_set(&mddev->flush_pending, 1);
+ INIT_WORK(&mddev->barrier_work, md_submit_barrier);
+
+ submit_barriers(mddev);
+
+ if (atomic_dec_and_test(&mddev->flush_pending))
+ schedule_work(&mddev->barrier_work);
+}
+EXPORT_SYMBOL(md_barrier_request);
static inline mddev_t *mddev_get(mddev_t *mddev)
{
mutex_init(&new->open_mutex);
mutex_init(&new->reconfig_mutex);
+ mutex_init(&new->bitmap_info.mutex);
INIT_LIST_HEAD(&new->disks);
INIT_LIST_HEAD(&new->all_mddevs);
init_timer(&new->safemode_timer);
atomic_set(&new->openers, 0);
atomic_set(&new->active_io, 0);
spin_lock_init(&new->write_lock);
+ atomic_set(&new->flush_pending, 0);
init_waitqueue_head(&new->sb_wait);
init_waitqueue_head(&new->recovery_wait);
new->reshape_position = MaxSector;
*/
int md_check_no_bitmap(mddev_t *mddev)
{
- if (!mddev->bitmap_file && !mddev->bitmap_offset)
+ if (!mddev->bitmap_info.file && !mddev->bitmap_info.offset)
return 0;
printk(KERN_ERR "%s: bitmaps are not supported for %s\n",
mdname(mddev), mddev->pers->name);
mddev->raid_disks = sb->raid_disks;
mddev->dev_sectors = sb->size * 2;
mddev->events = ev1;
- mddev->bitmap_offset = 0;
- mddev->default_bitmap_offset = MD_SB_BYTES >> 9;
+ mddev->bitmap_info.offset = 0;
+ mddev->bitmap_info.default_offset = MD_SB_BYTES >> 9;
if (mddev->minor_version >= 91) {
mddev->reshape_position = sb->reshape_position;
mddev->max_disks = MD_SB_DISKS;
if (sb->state & (1<<MD_SB_BITMAP_PRESENT) &&
- mddev->bitmap_file == NULL)
- mddev->bitmap_offset = mddev->default_bitmap_offset;
+ mddev->bitmap_info.file == NULL)
+ mddev->bitmap_info.offset =
+ mddev->bitmap_info.default_offset;
} else if (mddev->pers == NULL) {
/* Insist on good event counter while assembling */
sb->layout = mddev->layout;
sb->chunk_size = mddev->chunk_sectors << 9;
- if (mddev->bitmap && mddev->bitmap_file == NULL)
+ if (mddev->bitmap && mddev->bitmap_info.file == NULL)
sb->state |= (1<<MD_SB_BITMAP_PRESENT);
sb->disks[0].state = (1<<MD_DISK_REMOVED);
{
if (num_sectors && num_sectors < rdev->mddev->dev_sectors)
return 0; /* component must fit device */
- if (rdev->mddev->bitmap_offset)
+ if (rdev->mddev->bitmap_info.offset)
return 0; /* can't move bitmap */
rdev->sb_start = calc_dev_sboffset(rdev->bdev);
if (!num_sectors || num_sectors > rdev->sb_start)
mddev->raid_disks = le32_to_cpu(sb->raid_disks);
mddev->dev_sectors = le64_to_cpu(sb->size);
mddev->events = ev1;
- mddev->bitmap_offset = 0;
- mddev->default_bitmap_offset = 1024 >> 9;
+ mddev->bitmap_info.offset = 0;
+ mddev->bitmap_info.default_offset = 1024 >> 9;
mddev->recovery_cp = le64_to_cpu(sb->resync_offset);
memcpy(mddev->uuid, sb->set_uuid, 16);
mddev->max_disks = (4096-256)/2;
if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BITMAP_OFFSET) &&
- mddev->bitmap_file == NULL )
- mddev->bitmap_offset = (__s32)le32_to_cpu(sb->bitmap_offset);
+ mddev->bitmap_info.file == NULL )
+ mddev->bitmap_info.offset =
+ (__s32)le32_to_cpu(sb->bitmap_offset);
if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE)) {
mddev->reshape_position = le64_to_cpu(sb->reshape_position);
sb->level = cpu_to_le32(mddev->level);
sb->layout = cpu_to_le32(mddev->layout);
- if (mddev->bitmap && mddev->bitmap_file == NULL) {
- sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_offset);
+ if (mddev->bitmap && mddev->bitmap_info.file == NULL) {
+ sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_info.offset);
sb->feature_map = cpu_to_le32(MD_FEATURE_BITMAP_OFFSET);
}
max_sectors -= rdev->data_offset;
if (!num_sectors || num_sectors > max_sectors)
num_sectors = max_sectors;
- } else if (rdev->mddev->bitmap_offset) {
+ } else if (rdev->mddev->bitmap_info.offset) {
/* minor version 0 with bitmap we can't move */
return 0;
} else {
rdev->flags = 0;
rdev->data_offset = 0;
rdev->sb_events = 0;
+ rdev->last_read_error.tv_sec = 0;
+ rdev->last_read_error.tv_nsec = 0;
atomic_set(&rdev->nr_pending, 0);
atomic_set(&rdev->read_errors, 0);
atomic_set(&rdev->corrected_errors, 0);
}
}
+/* Read a fixed-point number.
+ * Numbers in sysfs attributes should be in "standard" units where
+ * possible, so time should be in seconds.
+ * However we internally use a a much smaller unit such as
+ * milliseconds or jiffies.
+ * This function takes a decimal number with a possible fractional
+ * component, and produces an integer which is the result of
+ * multiplying that number by 10^'scale'.
+ * all without any floating-point arithmetic.
+ */
+int strict_strtoul_scaled(const char *cp, unsigned long *res, int scale)
+{
+ unsigned long result = 0;
+ long decimals = -1;
+ while (isdigit(*cp) || (*cp == '.' && decimals < 0)) {
+ if (*cp == '.')
+ decimals = 0;
+ else if (decimals < scale) {
+ unsigned int value;
+ value = *cp - '0';
+ result = result * 10 + value;
+ if (decimals >= 0)
+ decimals++;
+ }
+ cp++;
+ }
+ if (*cp == '\n')
+ cp++;
+ if (*cp)
+ return -EINVAL;
+ if (decimals < 0)
+ decimals = 0;
+ while (decimals < scale) {
+ result *= 10;
+ decimals ++;
+ }
+ *res = result;
+ return 0;
+}
+
+
static void md_safemode_timeout(unsigned long data);
static ssize_t
static ssize_t
safe_delay_store(mddev_t *mddev, const char *cbuf, size_t len)
{
- int scale=1;
- int dot=0;
- int i;
unsigned long msec;
- char buf[30];
- /* remove a period, and count digits after it */
- if (len >= sizeof(buf))
- return -EINVAL;
- strlcpy(buf, cbuf, sizeof(buf));
- for (i=0; i<len; i++) {
- if (dot) {
- if (isdigit(buf[i])) {
- buf[i-1] = buf[i];
- scale *= 10;
- }
- buf[i] = 0;
- } else if (buf[i] == '.') {
- dot=1;
- buf[i] = 0;
- }
- }
- if (strict_strtoul(buf, 10, &msec) < 0)
+ if (strict_strtoul_scaled(cbuf, &msec, 3) < 0)
return -EINVAL;
- msec = (msec * 1000) / scale;
if (msec == 0)
mddev->safemode_delay = 0;
else {
static struct md_sysfs_entry md_array_state =
__ATTR(array_state, S_IRUGO|S_IWUSR, array_state_show, array_state_store);
+static ssize_t
+max_corrected_read_errors_show(mddev_t *mddev, char *page) {
+ return sprintf(page, "%d\n",
+ atomic_read(&mddev->max_corr_read_errors));
+}
+
+static ssize_t
+max_corrected_read_errors_store(mddev_t *mddev, const char *buf, size_t len)
+{
+ char *e;
+ unsigned long n = simple_strtoul(buf, &e, 10);
+
+ if (*buf && (*e == 0 || *e == '\n')) {
+ atomic_set(&mddev->max_corr_read_errors, n);
+ return len;
+ }
+ return -EINVAL;
+}
+
+static struct md_sysfs_entry max_corr_read_errors =
+__ATTR(max_read_errors, S_IRUGO|S_IWUSR, max_corrected_read_errors_show,
+ max_corrected_read_errors_store);
+
static ssize_t
null_show(mddev_t *mddev, char *page)
{
&md_array_state.attr,
&md_reshape_position.attr,
&md_array_size.attr,
+ &max_corr_read_errors.attr,
NULL,
};
mddev->sysfs_action = NULL;
mddev->private = NULL;
}
+ sysfs_remove_group(&mddev->kobj, &md_bitmap_group);
kobject_del(&mddev->kobj);
kobject_put(&mddev->kobj);
}
disk->disk_name);
error = 0;
}
+ if (sysfs_create_group(&mddev->kobj, &md_bitmap_group))
+ printk(KERN_DEBUG "pointless warning\n");
abort:
mutex_unlock(&disks_mutex);
if (!error) {
mddev->ro = 0;
atomic_set(&mddev->writes_pending,0);
+ atomic_set(&mddev->max_corr_read_errors,
+ MD_DEFAULT_MAX_CORRECTED_READ_ERRORS);
mddev->safemode = 0;
mddev->safemode_timer.function = md_safemode_timeout;
mddev->safemode_timer.data = (unsigned long) mddev;
return 0;
}
-static void restore_bitmap_write_access(struct file *file)
+void restore_bitmap_write_access(struct file *file)
{
struct inode *inode = file->f_mapping->host;
printk(KERN_INFO "md: %s stopped.\n", mdname(mddev));
bitmap_destroy(mddev);
- if (mddev->bitmap_file) {
- restore_bitmap_write_access(mddev->bitmap_file);
- fput(mddev->bitmap_file);
- mddev->bitmap_file = NULL;
+ if (mddev->bitmap_info.file) {
+ restore_bitmap_write_access(mddev->bitmap_info.file);
+ fput(mddev->bitmap_info.file);
+ mddev->bitmap_info.file = NULL;
}
- mddev->bitmap_offset = 0;
+ mddev->bitmap_info.offset = 0;
/* make sure all md_delayed_delete calls have finished */
flush_scheduled_work();
mddev->degraded = 0;
mddev->barriers_work = 0;
mddev->safemode = 0;
+ mddev->bitmap_info.offset = 0;
+ mddev->bitmap_info.default_offset = 0;
+ mddev->bitmap_info.chunksize = 0;
+ mddev->bitmap_info.daemon_sleep = 0;
+ mddev->bitmap_info.max_write_behind = 0;
kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE);
if (mddev->hold_active == UNTIL_STOP)
mddev->hold_active = 0;
info.state = 0;
if (mddev->in_sync)
info.state = (1<<MD_SB_CLEAN);
- if (mddev->bitmap && mddev->bitmap_offset)
+ if (mddev->bitmap && mddev->bitmap_info.offset)
info.state = (1<<MD_SB_BITMAP_PRESENT);
info.active_disks = insync;
info.working_disks = working;
if (fd >= 0) {
if (mddev->bitmap)
return -EEXIST; /* cannot add when bitmap is present */
- mddev->bitmap_file = fget(fd);
+ mddev->bitmap_info.file = fget(fd);
- if (mddev->bitmap_file == NULL) {
+ if (mddev->bitmap_info.file == NULL) {
printk(KERN_ERR "%s: error: failed to get bitmap file\n",
mdname(mddev));
return -EBADF;
}
- err = deny_bitmap_write_access(mddev->bitmap_file);
+ err = deny_bitmap_write_access(mddev->bitmap_info.file);
if (err) {
printk(KERN_ERR "%s: error: bitmap file is already in use\n",
mdname(mddev));
- fput(mddev->bitmap_file);
- mddev->bitmap_file = NULL;
+ fput(mddev->bitmap_info.file);
+ mddev->bitmap_info.file = NULL;
return err;
}
- mddev->bitmap_offset = 0; /* file overrides offset */
+ mddev->bitmap_info.offset = 0; /* file overrides offset */
} else if (mddev->bitmap == NULL)
return -ENOENT; /* cannot remove what isn't there */
err = 0;
mddev->pers->quiesce(mddev, 0);
}
if (fd < 0) {
- if (mddev->bitmap_file) {
- restore_bitmap_write_access(mddev->bitmap_file);
- fput(mddev->bitmap_file);
+ if (mddev->bitmap_info.file) {
+ restore_bitmap_write_access(mddev->bitmap_info.file);
+ fput(mddev->bitmap_info.file);
}
- mddev->bitmap_file = NULL;
+ mddev->bitmap_info.file = NULL;
}
return err;
mddev->flags = 0;
set_bit(MD_CHANGE_DEVS, &mddev->flags);
- mddev->default_bitmap_offset = MD_SB_BYTES >> 9;
- mddev->bitmap_offset = 0;
+ mddev->bitmap_info.default_offset = MD_SB_BYTES >> 9;
+ mddev->bitmap_info.offset = 0;
mddev->reshape_position = MaxSector;
int state = 0;
/* calculate expected state,ignoring low bits */
- if (mddev->bitmap && mddev->bitmap_offset)
+ if (mddev->bitmap && mddev->bitmap_info.offset)
state |= (1 << MD_SB_BITMAP_PRESENT);
if (mddev->major_version != info->major_version ||
/* add the bitmap */
if (mddev->bitmap)
return -EEXIST;
- if (mddev->default_bitmap_offset == 0)
+ if (mddev->bitmap_info.default_offset == 0)
return -EINVAL;
- mddev->bitmap_offset = mddev->default_bitmap_offset;
+ mddev->bitmap_info.offset =
+ mddev->bitmap_info.default_offset;
mddev->pers->quiesce(mddev, 1);
rv = bitmap_create(mddev);
if (rv)
mddev->pers->quiesce(mddev, 1);
bitmap_destroy(mddev);
mddev->pers->quiesce(mddev, 0);
- mddev->bitmap_offset = 0;
+ mddev->bitmap_info.offset = 0;
}
}
md_update_sb(mddev, 1);
abort:
return err;
}
+#ifdef CONFIG_COMPAT
+static int md_compat_ioctl(struct block_device *bdev, fmode_t mode,
+ unsigned int cmd, unsigned long arg)
+{
+ switch (cmd) {
+ case HOT_REMOVE_DISK:
+ case HOT_ADD_DISK:
+ case SET_DISK_FAULTY:
+ case SET_BITMAP_FILE:
+ /* These take in integer arg, do not convert */
+ break;
+ default:
+ arg = (unsigned long)compat_ptr(arg);
+ break;
+ }
+
+ return md_ioctl(bdev, mode, cmd, arg);
+}
+#endif /* CONFIG_COMPAT */
static int md_open(struct block_device *bdev, fmode_t mode)
{
.open = md_open,
.release = md_release,
.ioctl = md_ioctl,
+#ifdef CONFIG_COMPAT
+ .compat_ioctl = md_compat_ioctl,
+#endif
.getgeo = md_getgeo,
.media_changed = md_media_changed,
.revalidate_disk= md_revalidate,
unsigned long chunk_kb;
unsigned long flags;
spin_lock_irqsave(&bitmap->lock, flags);
- chunk_kb = bitmap->chunksize >> 10;
+ chunk_kb = mddev->bitmap_info.chunksize >> 10;
seq_printf(seq, "bitmap: %lu/%lu pages [%luKB], "
"%lu%s chunk",
bitmap->pages - bitmap->missing_pages,
bitmap->pages,
(bitmap->pages - bitmap->missing_pages)
<< (PAGE_SHIFT - 10),
- chunk_kb ? chunk_kb : bitmap->chunksize,
+ chunk_kb ? chunk_kb : mddev->bitmap_info.chunksize,
chunk_kb ? "KB" : "B");
if (bitmap->file) {
seq_printf(seq, ", file: ");
desc, mdname(mddev));
mddev->curr_resync = j;
}
+ mddev->curr_resync_completed = mddev->curr_resync;
while (j < max_sectors) {
sector_t sectors;
set_bit(MD_CHANGE_DEVS, &mddev->flags);
skip:
+ if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
+ /* We completed so min/max setting can be forgotten if used. */
+ if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
+ mddev->resync_min = 0;
+ mddev->resync_max = MaxSector;
+ } else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
+ mddev->resync_min = mddev->curr_resync_completed;
mddev->curr_resync = 0;
- mddev->curr_resync_completed = 0;
if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery))
- /* We completed so max setting can be forgotten. */
- mddev->resync_max = MaxSector;
+ mddev->curr_resync_completed = 0;
sysfs_notify(&mddev->kobj, NULL, "sync_completed");
wake_up(&resync_wait);
set_bit(MD_RECOVERY_DONE, &mddev->recovery);
if (mddev->bitmap)
- bitmap_daemon_work(mddev->bitmap);
+ bitmap_daemon_work(mddev);
if (mddev->ro)
return;
EXPORT_SYMBOL(md_wakeup_thread);
EXPORT_SYMBOL(md_check_recovery);
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("MD RAID framework");
MODULE_ALIAS("md");
MODULE_ALIAS_BLOCKDEV_MAJOR(MD_MAJOR);