*/
+ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/module.h>
#include <linux/drbd.h>
#include <linux/in.h>
{
genlmsg_end(skb, genlmsg_data(nlmsg_data(nlmsg_hdr(skb))));
if (genlmsg_reply(skb, info))
- printk(KERN_ERR "drbd: error sending genl reply\n");
+ pr_err("error sending genl reply\n");
}
/* Used on a fresh "drbd_adm_prepare"d reply_skb, this cannot fail: The only
struct task_struct *opa;
kref_get(&connection->kref);
+ /* We may just have force_sig()'ed this thread
+ * to get it out of some blocking network function.
+ * Clear signals; otherwise kthread_run(), which internally uses
+ * wait_on_completion_killable(), will mistake our pending signal
+ * for a new fatal signal and fail. */
+ flush_signals(current);
opa = kthread_run(_try_outdate_peer_async, connection, "drbd_async_h");
if (IS_ERR(opa)) {
drbd_err(connection, "out of mem, failed to invoke fence-peer helper\n");
}
enum drbd_state_rv
- drbd_set_role(struct drbd_device *device, enum drbd_role new_role, int force)
+ drbd_set_role(struct drbd_device *const device, enum drbd_role new_role, int force)
{
+ struct drbd_peer_device *const peer_device = first_peer_device(device);
+ struct drbd_connection *const connection = peer_device ? peer_device->connection : NULL;
const int max_tries = 4;
enum drbd_state_rv rv = SS_UNKNOWN_ERROR;
struct net_conf *nc;
device->state.disk == D_CONSISTENT && mask.pdsk == 0) {
D_ASSERT(device, device->state.pdsk == D_UNKNOWN);
- if (conn_try_outdate_peer(first_peer_device(device)->connection)) {
+ if (conn_try_outdate_peer(connection)) {
val.disk = D_UP_TO_DATE;
mask.disk = D_MASK;
}
if (rv == SS_NOTHING_TO_DO)
goto out;
if (rv == SS_PRIMARY_NOP && mask.pdsk == 0) {
- if (!conn_try_outdate_peer(first_peer_device(device)->connection) && force) {
+ if (!conn_try_outdate_peer(connection) && force) {
drbd_warn(device, "Forced into split brain situation!\n");
mask.pdsk = D_MASK;
val.pdsk = D_OUTDATED;
retry at most once more in this case. */
int timeo;
rcu_read_lock();
- nc = rcu_dereference(first_peer_device(device)->connection->net_conf);
+ nc = rcu_dereference(connection->net_conf);
timeo = nc ? (nc->ping_timeo + 1) * HZ / 10 : 1;
rcu_read_unlock();
schedule_timeout_interruptible(timeo);
/* FIXME also wait for all pending P_BARRIER_ACK? */
if (new_role == R_SECONDARY) {
- set_disk_ro(device->vdisk, true);
if (get_ldev(device)) {
device->ldev->md.uuid[UI_CURRENT] &= ~(u64)1;
put_ldev(device);
}
} else {
- /* Called from drbd_adm_set_role only.
- * We are still holding the conf_update mutex. */
- nc = first_peer_device(device)->connection->net_conf;
+ mutex_lock(&device->resource->conf_update);
+ nc = connection->net_conf;
if (nc)
nc->discard_my_data = 0; /* without copy; single bit op is atomic */
+ mutex_unlock(&device->resource->conf_update);
- set_disk_ro(device->vdisk, false);
if (get_ldev(device)) {
if (((device->state.conn < C_CONNECTED ||
device->state.pdsk <= D_FAILED)
if (device->state.conn >= C_WF_REPORT_PARAMS) {
/* if this was forced, we should consider sync */
if (forced)
- drbd_send_uuids(first_peer_device(device));
- drbd_send_current_state(first_peer_device(device));
+ drbd_send_uuids(peer_device);
+ drbd_send_current_state(peer_device);
}
drbd_md_sync(device);
-
+ set_disk_ro(device->vdisk, new_role == R_SECONDARY);
kobject_uevent(&disk_to_dev(device->vdisk)->kobj, KOBJ_CHANGE);
out:
mutex_unlock(device->state_mutex);
* still lock the act_log to not trigger ASSERTs there.
*/
drbd_suspend_io(device);
- buffer = drbd_md_get_buffer(device); /* Lock meta-data IO */
+ buffer = drbd_md_get_buffer(device, __func__); /* Lock meta-data IO */
if (!buffer) {
drbd_resume_io(device);
return DS_ERROR;
if (la_size_changed || md_moved || rs) {
u32 prev_flags;
+ /* We do some synchronous IO below, which may take some time.
+ * Clear the timer, to avoid scary "timer expired!" messages,
+ * "Superblock" is written out at least twice below, anyways. */
+ del_timer(&device->md_sync_timer);
drbd_al_shrink(device); /* All extents inactive. */
prev_flags = md->flags;
return 0;
}
- static void drbd_setup_queue_param(struct drbd_device *device, unsigned int max_bio_size)
+ static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backing_dev *bdev,
+ unsigned int max_bio_size)
{
struct request_queue * const q = device->rq_queue;
unsigned int max_hw_sectors = max_bio_size >> 9;
unsigned int max_segments = 0;
struct request_queue *b = NULL;
- if (get_ldev_if_state(device, D_ATTACHING)) {
- b = device->ldev->backing_bdev->bd_disk->queue;
+ if (bdev) {
+ b = bdev->backing_bdev->bd_disk->queue;
max_hw_sectors = min(queue_max_hw_sectors(b), max_bio_size >> 9);
rcu_read_lock();
b->backing_dev_info.ra_pages);
q->backing_dev_info.ra_pages = b->backing_dev_info.ra_pages;
}
- put_ldev(device);
}
}
- void drbd_reconsider_max_bio_size(struct drbd_device *device)
+ void drbd_reconsider_max_bio_size(struct drbd_device *device, struct drbd_backing_dev *bdev)
{
unsigned int now, new, local, peer;
local = device->local_max_bio_size; /* Eventually last known value, from volatile memory */
peer = device->peer_max_bio_size; /* Eventually last known value, from meta data */
- if (get_ldev_if_state(device, D_ATTACHING)) {
- local = queue_max_hw_sectors(device->ldev->backing_bdev->bd_disk->queue) << 9;
+ if (bdev) {
+ local = queue_max_hw_sectors(bdev->backing_bdev->bd_disk->queue) << 9;
device->local_max_bio_size = local;
- put_ldev(device);
}
local = min(local, DRBD_MAX_BIO_SIZE);
if (new != now)
drbd_info(device, "max BIO size = %u\n", new);
- drbd_setup_queue_param(device, new);
+ drbd_setup_queue_param(device, bdev, new);
}
/* Starts the worker thread */
return (al_size_4k - 1) * AL_CONTEXT_PER_TRANSACTION;
}
+ static bool write_ordering_changed(struct disk_conf *a, struct disk_conf *b)
+ {
+ return a->disk_barrier != b->disk_barrier ||
+ a->disk_flushes != b->disk_flushes ||
+ a->disk_drain != b->disk_drain;
+ }
+
int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info)
{
struct drbd_config_context adm_ctx;
else
set_bit(MD_NO_FUA, &device->flags);
- drbd_bump_write_ordering(first_peer_device(device)->connection, WO_bdev_flush);
+ if (write_ordering_changed(old_disk_conf, new_disk_conf))
+ drbd_bump_write_ordering(device->resource, NULL, WO_bdev_flush);
drbd_md_sync(device);
{
struct drbd_config_context adm_ctx;
struct drbd_device *device;
+ struct drbd_peer_device *peer_device;
+ struct drbd_connection *connection;
int err;
enum drbd_ret_code retcode;
enum determine_dev_size dd;
device = adm_ctx.device;
mutex_lock(&adm_ctx.resource->adm_mutex);
- conn_reconfig_start(first_peer_device(device)->connection);
+ peer_device = first_peer_device(device);
+ connection = peer_device ? peer_device->connection : NULL;
+ conn_reconfig_start(connection);
/* if you want to reconfigure, please tear down first */
if (device->state.disk > D_DISKLESS) {
* drbd_ldev_destroy is done already, we may end up here very fast,
* e.g. if someone calls attach from the on-io-error handler,
* to realize a "hot spare" feature (not that I'd recommend that) */
- wait_event(device->misc_wait, !atomic_read(&device->local_cnt));
+ wait_event(device->misc_wait, !test_bit(GOING_DISKLESS, &device->flags));
/* make sure there is no leftover from previous force-detach attempts */
clear_bit(FORCE_DETACH, &device->flags);
goto fail;
rcu_read_lock();
- nc = rcu_dereference(first_peer_device(device)->connection->net_conf);
+ nc = rcu_dereference(connection->net_conf);
if (nc) {
if (new_disk_conf->fencing == FP_STONITH && nc->wire_protocol == DRBD_PROT_A) {
rcu_read_unlock();
*/
wait_event(device->misc_wait, !atomic_read(&device->ap_pending_cnt) || drbd_suspended(device));
/* and for any other previously queued work */
- drbd_flush_workqueue(&first_peer_device(device)->connection->sender_work);
+ drbd_flush_workqueue(&connection->sender_work);
rv = _drbd_request_state(device, NS(disk, D_ATTACHING), CS_VERBOSE);
retcode = rv; /* FIXME: Type mismatch. */
new_disk_conf = NULL;
new_plan = NULL;
- drbd_bump_write_ordering(first_peer_device(device)->connection, WO_bdev_flush);
+ drbd_bump_write_ordering(device->resource, device->ldev, WO_bdev_flush);
if (drbd_md_test_flag(device->ldev, MDF_CRASHED_PRIMARY))
set_bit(CRASHED_PRIMARY, &device->flags);
device->read_cnt = 0;
device->writ_cnt = 0;
- drbd_reconsider_max_bio_size(device);
+ drbd_reconsider_max_bio_size(device, device->ldev);
/* If I am currently not R_PRIMARY,
* but meta data primary indicator is set,
kobject_uevent(&disk_to_dev(device->vdisk)->kobj, KOBJ_CHANGE);
put_ldev(device);
- conn_reconfig_done(first_peer_device(device)->connection);
+ conn_reconfig_done(connection);
mutex_unlock(&adm_ctx.resource->adm_mutex);
drbd_adm_finish(&adm_ctx, info, retcode);
return 0;
drbd_force_state(device, NS(disk, D_DISKLESS));
drbd_md_sync(device);
fail:
- conn_reconfig_done(first_peer_device(device)->connection);
+ conn_reconfig_done(connection);
if (nbc) {
if (nbc->backing_bdev)
blkdev_put(nbc->backing_bdev,
}
drbd_suspend_io(device); /* so no-one is stuck in drbd_al_begin_io */
- drbd_md_get_buffer(device); /* make sure there is no in-flight meta-data IO */
+ drbd_md_get_buffer(device, __func__); /* make sure there is no in-flight meta-data IO */
retcode = drbd_request_state(device, NS(disk, D_FAILED));
drbd_md_put_buffer(device);
/* D_FAILED will transition to DISKLESS. */
if (retcode != NO_ERROR)
goto out;
- mutex_lock(&adm_ctx.resource->adm_mutex);
device = adm_ctx.device;
+ if (!get_ldev(device)) {
+ retcode = ERR_NO_DISK;
+ goto out;
+ }
+
+ mutex_lock(&adm_ctx.resource->adm_mutex);
/* If there is still bitmap IO pending, probably because of a previous
* resync just being finished, wait for it before requesting a new resync.
retcode = drbd_request_state(device, NS(conn, C_STARTING_SYNC_T));
drbd_resume_io(device);
mutex_unlock(&adm_ctx.resource->adm_mutex);
+ put_ldev(device);
out:
drbd_adm_finish(&adm_ctx, info, retcode);
return 0;
return 0;
}
- static int drbd_bmio_set_susp_al(struct drbd_device *device)
+ static int drbd_bmio_set_susp_al(struct drbd_device *device) __must_hold(local)
{
int rv;
if (retcode != NO_ERROR)
goto out;
- mutex_lock(&adm_ctx.resource->adm_mutex);
device = adm_ctx.device;
+ if (!get_ldev(device)) {
+ retcode = ERR_NO_DISK;
+ goto out;
+ }
+
+ mutex_lock(&adm_ctx.resource->adm_mutex);
/* If there is still bitmap IO pending, probably because of a previous
* resync just being finished, wait for it before requesting a new resync.
retcode = drbd_request_state(device, NS(conn, C_STARTING_SYNC_S));
drbd_resume_io(device);
mutex_unlock(&adm_ctx.resource->adm_mutex);
+ put_ldev(device);
out:
drbd_adm_finish(&adm_ctx, info, retcode);
return 0;
return list_first_entry(&resource->connections, struct drbd_connection, connections);
}
- int nla_put_status_info(struct sk_buff *skb, struct drbd_device *device,
+ static int nla_put_status_info(struct sk_buff *skb, struct drbd_device *device,
const struct sib_info *sib)
{
struct drbd_resource *resource = device->resource;
unsigned seq;
int err = -ENOMEM;
- if (sib->sib_reason == SIB_SYNC_PROGRESS) {
- if (time_after(jiffies, device->rs_last_bcast + HZ))
- device->rs_last_bcast = jiffies;
- else
- return;
- }
-
seq = atomic_inc_return(&drbd_genl_seq);
msg = genlmsg_new(NLMSG_GOODSIZE, GFP_NOIO);
if (!msg)
#include <linux/compiler.h>
/*
- History:
- Started: Aug 9 by Lawrence Foard (entropy@world.std.com), to allow user
- process control of SCSI devices.
- Development Sponsored by Killy Corp. NY NY
-Original driver (sg.h):
-* Copyright (C) 1992 Lawrence Foard
-Version 2 and 3 extensions to driver:
-* Copyright (C) 1998 - 2006 Douglas Gilbert
-
- Version: 3.5.34 (20060920)
- This version is for 2.6 series kernels.
-
- For a full changelog see http://www.torque.net/sg
-
-Map of SG verions to the Linux kernels in which they appear:
- ---------- ----------------------------------
- original all kernels < 2.2.6
- 2.1.40 2.2.20
- 3.0.x optional version 3 sg driver for 2.2 series
- 3.1.17++ 2.4.0++
- 3.5.30++ 2.6.0++
-
-Major new features in SG 3.x driver (cf SG 2.x drivers)
- - SG_IO ioctl() combines function if write() and read()
- - new interface (sg_io_hdr_t) but still supports old interface
- - scatter/gather in user space, direct IO, and mmap supported
-
- The normal action of this driver is to use the adapter (HBA) driver to DMA
- data into kernel buffers and then use the CPU to copy the data into the
- user space (vice versa for writes). That is called "indirect" IO due to
- the double handling of data. There are two methods offered to remove the
- redundant copy: 1) direct IO and 2) using the mmap() system call to map
- the reserve buffer (this driver has one reserve buffer per fd) into the
- user space. Both have their advantages.
- In terms of absolute speed mmap() is faster. If speed is not a concern,
- indirect IO should be fine. Read the documentation for more information.
-
- ** N.B. To use direct IO 'echo 1 > /proc/scsi/sg/allow_dio' or
- 'echo 1 > /sys/module/sg/parameters/allow_dio' is needed.
- That attribute is 0 by default. **
-
- Historical note: this SCSI pass-through driver has been known as "sg" for
- a decade. In broader kernel discussions "sg" is used to refer to scatter
- gather techniques. The context should clarify which "sg" is referred to.
-
- Documentation
- =============
- A web site for the SG device driver can be found at:
- http://www.torque.net/sg [alternatively check the MAINTAINERS file]
- The documentation for the sg version 3 driver can be found at:
- http://www.torque.net/sg/p/sg_v3_ho.html
- This is a rendering from DocBook source [change the extension to "sgml"
- or "xml"]. There are renderings in "ps", "pdf", "rtf" and "txt" (soon).
- The SG_IO ioctl is now found in other parts kernel (e.g. the block layer).
- For more information see http://www.torque.net/sg/sg_io.html
-
- The older, version 2 documents discuss the original sg interface in detail:
- http://www.torque.net/sg/p/scsi-generic.txt
- http://www.torque.net/sg/p/scsi-generic_long.txt
- Also available: <kernel_source>/Documentation/scsi/scsi-generic.txt
-
- Utility and test programs are available at the sg web site. They are
- packaged as sg3_utils (for the lk 2.4 and 2.6 series) and sg_utils
- (for the lk 2.2 series).
-*/
+ * History:
+ * Started: Aug 9 by Lawrence Foard (entropy@world.std.com), to allow user
+ * process control of SCSI devices.
+ * Development Sponsored by Killy Corp. NY NY
+ *
+ * Original driver (sg.h):
+ * Copyright (C) 1992 Lawrence Foard
+ * Version 2 and 3 extensions to driver:
+ * Copyright (C) 1998 - 2014 Douglas Gilbert
+ *
+ * Version: 3.5.36 (20140603)
+ * This version is for 2.6 and 3 series kernels.
+ *
+ * Documentation
+ * =============
+ * A web site for the SG device driver can be found at:
+ * http://sg.danny.cz/sg [alternatively check the MAINTAINERS file]
+ * The documentation for the sg version 3 driver can be found at:
+ * http://sg.danny.cz/sg/p/sg_v3_ho.html
+ * Also see: <kernel_source>/Documentation/scsi/scsi-generic.txt
+ *
+ * For utility and test programs see: http://sg.danny.cz/sg/sg3_utils.html
+ */
#ifdef __KERNEL__
extern int sg_big_buff; /* for sysctl */
#endif
-/* New interface introduced in the 3.x SG drivers follows */
typedef struct sg_iovec /* same structure as used by readv() Linux system */
{ /* call. It defines one scatter-gather element. */
{
int interface_id; /* [i] 'S' for SCSI generic (required) */
int dxfer_direction; /* [i] data transfer direction */
- unsigned char cmd_len; /* [i] SCSI command length ( <= 16 bytes) */
+ unsigned char cmd_len; /* [i] SCSI command length */
unsigned char mx_sb_len; /* [i] max length to write to sbp */
unsigned short iovec_count; /* [i] 0 implies no scatter gather */
unsigned int dxfer_len; /* [i] byte count of data transfer */
#define SG_FLAG_MMAP_IO 4 /* request memory mapped IO */
#define SG_FLAG_NO_DXFER 0x10000 /* no transfer of kernel buffers to/from */
/* user space (debug indirect IO) */
- #define SG_FLAG_Q_AT_TAIL 0x10 /* default is Q_AT_HEAD */
+ /* defaults:: for sg driver: Q_AT_HEAD; for block layer: Q_AT_TAIL */
+ #define SG_FLAG_Q_AT_TAIL 0x10
+ #define SG_FLAG_Q_AT_HEAD 0x20
/* following 'info' values are "or"-ed together */
#define SG_INFO_OK_MASK 0x1