Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/vapier...
[cascardo/linux.git] / drivers / block / drbd / drbd_main.c
index 29cd0dc..dfc85f3 100644 (file)
@@ -85,7 +85,8 @@ MODULE_AUTHOR("Philipp Reisner <phil@linbit.com>, "
 MODULE_DESCRIPTION("drbd - Distributed Replicated Block Device v" REL_VERSION);
 MODULE_VERSION(REL_VERSION);
 MODULE_LICENSE("GPL");
-MODULE_PARM_DESC(minor_count, "Maximum number of drbd devices (1-255)");
+MODULE_PARM_DESC(minor_count, "Maximum number of drbd devices ("
+                __stringify(DRBD_MINOR_COUNT_MIN) "-" __stringify(DRBD_MINOR_COUNT_MAX) ")");
 MODULE_ALIAS_BLOCKDEV_MAJOR(DRBD_MAJOR);
 
 #include <linux/moduleparam.h>
@@ -115,7 +116,7 @@ module_param(fault_devs, int, 0644);
 #endif
 
 /* module parameter, defined */
-unsigned int minor_count = 32;
+unsigned int minor_count = DRBD_MINOR_COUNT_DEF;
 int disable_sendpage;
 int allow_oos;
 unsigned int cn_idx = CN_IDX_DRBD;
@@ -335,6 +336,7 @@ bail:
        drbd_force_state(mdev, NS(conn, C_PROTOCOL_ERROR));
 }
 
+
 /**
  * _tl_restart() - Walks the transfer log, and applies an action to all requests
  * @mdev:      DRBD device.
@@ -456,7 +458,7 @@ void tl_restart(struct drbd_conf *mdev, enum drbd_req_event what)
 }
 
 /**
- * cl_wide_st_chg() - TRUE if the state change is a cluster wide one
+ * cl_wide_st_chg() - true if the state change is a cluster wide one
  * @mdev:      DRBD device.
  * @os:                old (current) state.
  * @ns:                new (wanted) state.
@@ -473,12 +475,13 @@ static int cl_wide_st_chg(struct drbd_conf *mdev,
                (os.conn == C_CONNECTED && ns.conn == C_VERIFY_S);
 }
 
-int drbd_change_state(struct drbd_conf *mdev, enum chg_state_flags f,
-                     union drbd_state mask, union drbd_state val)
+enum drbd_state_rv
+drbd_change_state(struct drbd_conf *mdev, enum chg_state_flags f,
+                 union drbd_state mask, union drbd_state val)
 {
        unsigned long flags;
        union drbd_state os, ns;
-       int rv;
+       enum drbd_state_rv rv;
 
        spin_lock_irqsave(&mdev->req_lock, flags);
        os = mdev->state;
@@ -502,20 +505,22 @@ void drbd_force_state(struct drbd_conf *mdev,
        drbd_change_state(mdev, CS_HARD, mask, val);
 }
 
-static int is_valid_state(struct drbd_conf *mdev, union drbd_state ns);
-static int is_valid_state_transition(struct drbd_conf *,
-                                    union drbd_state, union drbd_state);
+static enum drbd_state_rv is_valid_state(struct drbd_conf *, union drbd_state);
+static enum drbd_state_rv is_valid_state_transition(struct drbd_conf *,
+                                                   union drbd_state,
+                                                   union drbd_state);
 static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state os,
                                       union drbd_state ns, const char **warn_sync_abort);
 int drbd_send_state_req(struct drbd_conf *,
                        union drbd_state, union drbd_state);
 
-static enum drbd_state_ret_codes _req_st_cond(struct drbd_conf *mdev,
-                                   union drbd_state mask, union drbd_state val)
+static enum drbd_state_rv
+_req_st_cond(struct drbd_conf *mdev, union drbd_state mask,
+            union drbd_state val)
 {
        union drbd_state os, ns;
        unsigned long flags;
-       int rv;
+       enum drbd_state_rv rv;
 
        if (test_and_clear_bit(CL_ST_CHG_SUCCESS, &mdev->flags))
                return SS_CW_SUCCESS;
@@ -536,7 +541,7 @@ static enum drbd_state_ret_codes _req_st_cond(struct drbd_conf *mdev,
                if (rv == SS_SUCCESS) {
                        rv = is_valid_state_transition(mdev, ns, os);
                        if (rv == SS_SUCCESS)
-                               rv = 0; /* cont waiting, otherwise fail. */
+                               rv = SS_UNKNOWN_ERROR; /* cont waiting, otherwise fail. */
                }
        }
        spin_unlock_irqrestore(&mdev->req_lock, flags);
@@ -554,14 +559,14 @@ static enum drbd_state_ret_codes _req_st_cond(struct drbd_conf *mdev,
  * Should not be called directly, use drbd_request_state() or
  * _drbd_request_state().
  */
-static int drbd_req_state(struct drbd_conf *mdev,
-                         union drbd_state mask, union drbd_state val,
-                         enum chg_state_flags f)
+static enum drbd_state_rv
+drbd_req_state(struct drbd_conf *mdev, union drbd_state mask,
+              union drbd_state val, enum chg_state_flags f)
 {
        struct completion done;
        unsigned long flags;
        union drbd_state os, ns;
-       int rv;
+       enum drbd_state_rv rv;
 
        init_completion(&done);
 
@@ -636,10 +641,11 @@ abort:
  * Cousin of drbd_request_state(), useful with the CS_WAIT_COMPLETE
  * flag, or when logging of failed state change requests is not desired.
  */
-int _drbd_request_state(struct drbd_conf *mdev,        union drbd_state mask,
-                       union drbd_state val,   enum chg_state_flags f)
+enum drbd_state_rv
+_drbd_request_state(struct drbd_conf *mdev, union drbd_state mask,
+                   union drbd_state val, enum chg_state_flags f)
 {
-       int rv;
+       enum drbd_state_rv rv;
 
        wait_event(mdev->state_wait,
                   (rv = drbd_req_state(mdev, mask, val, f)) != SS_IN_TRANSIENT_STATE);
@@ -663,8 +669,8 @@ static void print_st(struct drbd_conf *mdev, char *name, union drbd_state ns)
            );
 }
 
-void print_st_err(struct drbd_conf *mdev,
-       union drbd_state os, union drbd_state ns, int err)
+void print_st_err(struct drbd_conf *mdev, union drbd_state os,
+                 union drbd_state ns, enum drbd_state_rv err)
 {
        if (err == SS_IN_TRANSIENT_STATE)
                return;
@@ -674,32 +680,18 @@ void print_st_err(struct drbd_conf *mdev,
 }
 
 
-#define drbd_peer_str drbd_role_str
-#define drbd_pdsk_str drbd_disk_str
-
-#define drbd_susp_str(A)     ((A) ? "1" : "0")
-#define drbd_aftr_isp_str(A) ((A) ? "1" : "0")
-#define drbd_peer_isp_str(A) ((A) ? "1" : "0")
-#define drbd_user_isp_str(A) ((A) ? "1" : "0")
-
-#define PSC(A) \
-       ({ if (ns.A != os.A) { \
-               pbp += sprintf(pbp, #A "( %s -> %s ) ", \
-                             drbd_##A##_str(os.A), \
-                             drbd_##A##_str(ns.A)); \
-       } })
-
 /**
  * is_valid_state() - Returns an SS_ error code if ns is not valid
  * @mdev:      DRBD device.
  * @ns:                State to consider.
  */
-static int is_valid_state(struct drbd_conf *mdev, union drbd_state ns)
+static enum drbd_state_rv
+is_valid_state(struct drbd_conf *mdev, union drbd_state ns)
 {
        /* See drbd_state_sw_errors in drbd_strings.c */
 
        enum drbd_fencing_p fp;
-       int rv = SS_SUCCESS;
+       enum drbd_state_rv rv = SS_SUCCESS;
 
        fp = FP_DONT_CARE;
        if (get_ldev(mdev)) {
@@ -762,10 +754,11 @@ static int is_valid_state(struct drbd_conf *mdev, union drbd_state ns)
  * @ns:                new state.
  * @os:                old state.
  */
-static int is_valid_state_transition(struct drbd_conf *mdev,
-                                    union drbd_state ns, union drbd_state os)
+static enum drbd_state_rv
+is_valid_state_transition(struct drbd_conf *mdev, union drbd_state ns,
+                         union drbd_state os)
 {
-       int rv = SS_SUCCESS;
+       enum drbd_state_rv rv = SS_SUCCESS;
 
        if ((ns.conn == C_STARTING_SYNC_T || ns.conn == C_STARTING_SYNC_S) &&
            os.conn > C_CONNECTED)
@@ -800,6 +793,10 @@ static int is_valid_state_transition(struct drbd_conf *mdev,
            os.conn < C_CONNECTED)
                rv = SS_NEED_CONNECTION;
 
+       if ((ns.conn == C_SYNC_TARGET || ns.conn == C_SYNC_SOURCE)
+           && os.conn < C_WF_REPORT_PARAMS)
+               rv = SS_NEED_CONNECTION; /* No NetworkFailure -> SyncTarget etc... */
+
        return rv;
 }
 
@@ -817,6 +814,7 @@ static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state
                                       union drbd_state ns, const char **warn_sync_abort)
 {
        enum drbd_fencing_p fp;
+       enum drbd_disk_state disk_min, disk_max, pdsk_min, pdsk_max;
 
        fp = FP_DONT_CARE;
        if (get_ldev(mdev)) {
@@ -869,56 +867,6 @@ static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state
                ns.conn = C_CONNECTED;
        }
 
-       if (ns.conn >= C_CONNECTED &&
-           ((ns.disk == D_CONSISTENT || ns.disk == D_OUTDATED) ||
-            (ns.disk == D_NEGOTIATING && ns.conn == C_WF_BITMAP_T))) {
-               switch (ns.conn) {
-               case C_WF_BITMAP_T:
-               case C_PAUSED_SYNC_T:
-                       ns.disk = D_OUTDATED;
-                       break;
-               case C_CONNECTED:
-               case C_WF_BITMAP_S:
-               case C_SYNC_SOURCE:
-               case C_PAUSED_SYNC_S:
-                       ns.disk = D_UP_TO_DATE;
-                       break;
-               case C_SYNC_TARGET:
-                       ns.disk = D_INCONSISTENT;
-                       dev_warn(DEV, "Implicitly set disk state Inconsistent!\n");
-                       break;
-               }
-               if (os.disk == D_OUTDATED && ns.disk == D_UP_TO_DATE)
-                       dev_warn(DEV, "Implicitly set disk from Outdated to UpToDate\n");
-       }
-
-       if (ns.conn >= C_CONNECTED &&
-           (ns.pdsk == D_CONSISTENT || ns.pdsk == D_OUTDATED)) {
-               switch (ns.conn) {
-               case C_CONNECTED:
-               case C_WF_BITMAP_T:
-               case C_PAUSED_SYNC_T:
-               case C_SYNC_TARGET:
-                       ns.pdsk = D_UP_TO_DATE;
-                       break;
-               case C_WF_BITMAP_S:
-               case C_PAUSED_SYNC_S:
-                       /* remap any consistent state to D_OUTDATED,
-                        * but disallow "upgrade" of not even consistent states.
-                        */
-                       ns.pdsk =
-                               (D_DISKLESS < os.pdsk && os.pdsk < D_OUTDATED)
-                               ? os.pdsk : D_OUTDATED;
-                       break;
-               case C_SYNC_SOURCE:
-                       ns.pdsk = D_INCONSISTENT;
-                       dev_warn(DEV, "Implicitly set pdsk Inconsistent!\n");
-                       break;
-               }
-               if (os.pdsk == D_OUTDATED && ns.pdsk == D_UP_TO_DATE)
-                       dev_warn(DEV, "Implicitly set pdsk from Outdated to UpToDate\n");
-       }
-
        /* Connection breaks down before we finished "Negotiating" */
        if (ns.conn < C_CONNECTED && ns.disk == D_NEGOTIATING &&
            get_ldev_if_state(mdev, D_NEGOTIATING)) {
@@ -933,6 +881,94 @@ static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state
                put_ldev(mdev);
        }
 
+       /* D_CONSISTENT and D_OUTDATED vanish when we get connected */
+       if (ns.conn >= C_CONNECTED && ns.conn < C_AHEAD) {
+               if (ns.disk == D_CONSISTENT || ns.disk == D_OUTDATED)
+                       ns.disk = D_UP_TO_DATE;
+               if (ns.pdsk == D_CONSISTENT || ns.pdsk == D_OUTDATED)
+                       ns.pdsk = D_UP_TO_DATE;
+       }
+
+       /* Implications of the connection stat on the disk states */
+       disk_min = D_DISKLESS;
+       disk_max = D_UP_TO_DATE;
+       pdsk_min = D_INCONSISTENT;
+       pdsk_max = D_UNKNOWN;
+       switch ((enum drbd_conns)ns.conn) {
+       case C_WF_BITMAP_T:
+       case C_PAUSED_SYNC_T:
+       case C_STARTING_SYNC_T:
+       case C_WF_SYNC_UUID:
+       case C_BEHIND:
+               disk_min = D_INCONSISTENT;
+               disk_max = D_OUTDATED;
+               pdsk_min = D_UP_TO_DATE;
+               pdsk_max = D_UP_TO_DATE;
+               break;
+       case C_VERIFY_S:
+       case C_VERIFY_T:
+               disk_min = D_UP_TO_DATE;
+               disk_max = D_UP_TO_DATE;
+               pdsk_min = D_UP_TO_DATE;
+               pdsk_max = D_UP_TO_DATE;
+               break;
+       case C_CONNECTED:
+               disk_min = D_DISKLESS;
+               disk_max = D_UP_TO_DATE;
+               pdsk_min = D_DISKLESS;
+               pdsk_max = D_UP_TO_DATE;
+               break;
+       case C_WF_BITMAP_S:
+       case C_PAUSED_SYNC_S:
+       case C_STARTING_SYNC_S:
+       case C_AHEAD:
+               disk_min = D_UP_TO_DATE;
+               disk_max = D_UP_TO_DATE;
+               pdsk_min = D_INCONSISTENT;
+               pdsk_max = D_CONSISTENT; /* D_OUTDATED would be nice. But explicit outdate necessary*/
+               break;
+       case C_SYNC_TARGET:
+               disk_min = D_INCONSISTENT;
+               disk_max = D_INCONSISTENT;
+               pdsk_min = D_UP_TO_DATE;
+               pdsk_max = D_UP_TO_DATE;
+               break;
+       case C_SYNC_SOURCE:
+               disk_min = D_UP_TO_DATE;
+               disk_max = D_UP_TO_DATE;
+               pdsk_min = D_INCONSISTENT;
+               pdsk_max = D_INCONSISTENT;
+               break;
+       case C_STANDALONE:
+       case C_DISCONNECTING:
+       case C_UNCONNECTED:
+       case C_TIMEOUT:
+       case C_BROKEN_PIPE:
+       case C_NETWORK_FAILURE:
+       case C_PROTOCOL_ERROR:
+       case C_TEAR_DOWN:
+       case C_WF_CONNECTION:
+       case C_WF_REPORT_PARAMS:
+       case C_MASK:
+               break;
+       }
+       if (ns.disk > disk_max)
+               ns.disk = disk_max;
+
+       if (ns.disk < disk_min) {
+               dev_warn(DEV, "Implicitly set disk from %s to %s\n",
+                        drbd_disk_str(ns.disk), drbd_disk_str(disk_min));
+               ns.disk = disk_min;
+       }
+       if (ns.pdsk > pdsk_max)
+               ns.pdsk = pdsk_max;
+
+       if (ns.pdsk < pdsk_min) {
+               dev_warn(DEV, "Implicitly set pdsk from %s to %s\n",
+                        drbd_disk_str(ns.pdsk), drbd_disk_str(pdsk_min));
+               ns.pdsk = pdsk_min;
+       }
+
        if (fp == FP_STONITH &&
            (ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.pdsk > D_OUTDATED) &&
            !(os.role == R_PRIMARY && os.conn < C_CONNECTED && os.pdsk > D_OUTDATED))
@@ -961,6 +997,10 @@ static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state
 /* helper for __drbd_set_state */
 static void set_ov_position(struct drbd_conf *mdev, enum drbd_conns cs)
 {
+       if (mdev->agreed_pro_version < 90)
+               mdev->ov_start_sector = 0;
+       mdev->rs_total = drbd_bm_bits(mdev);
+       mdev->ov_position = 0;
        if (cs == C_VERIFY_T) {
                /* starting online verify from an arbitrary position
                 * does not fit well into the existing protocol.
@@ -970,11 +1010,15 @@ static void set_ov_position(struct drbd_conf *mdev, enum drbd_conns cs)
                mdev->ov_start_sector = ~(sector_t)0;
        } else {
                unsigned long bit = BM_SECT_TO_BIT(mdev->ov_start_sector);
-               if (bit >= mdev->rs_total)
+               if (bit >= mdev->rs_total) {
                        mdev->ov_start_sector =
                                BM_BIT_TO_SECT(mdev->rs_total - 1);
+                       mdev->rs_total = 1;
+               } else
+                       mdev->rs_total -= bit;
                mdev->ov_position = mdev->ov_start_sector;
        }
+       mdev->ov_left = mdev->rs_total;
 }
 
 static void drbd_resume_al(struct drbd_conf *mdev)
@@ -992,12 +1036,12 @@ static void drbd_resume_al(struct drbd_conf *mdev)
  *
  * Caller needs to hold req_lock, and global_state_lock. Do not call directly.
  */
-int __drbd_set_state(struct drbd_conf *mdev,
-                   union drbd_state ns, enum chg_state_flags flags,
-                   struct completion *done)
+enum drbd_state_rv
+__drbd_set_state(struct drbd_conf *mdev, union drbd_state ns,
+                enum chg_state_flags flags, struct completion *done)
 {
        union drbd_state os;
-       int rv = SS_SUCCESS;
+       enum drbd_state_rv rv = SS_SUCCESS;
        const char *warn_sync_abort = NULL;
        struct after_state_chg_work *ascw;
 
@@ -1033,22 +1077,46 @@ int __drbd_set_state(struct drbd_conf *mdev,
                dev_warn(DEV, "%s aborted.\n", warn_sync_abort);
 
        {
-               char *pbp, pb[300];
-               pbp = pb;
-               *pbp = 0;
-               PSC(role);
-               PSC(peer);
-               PSC(conn);
-               PSC(disk);
-               PSC(pdsk);
-               if (is_susp(ns) != is_susp(os))
-                       pbp += sprintf(pbp, "susp( %s -> %s ) ",
-                                      drbd_susp_str(is_susp(os)),
-                                      drbd_susp_str(is_susp(ns)));
-               PSC(aftr_isp);
-               PSC(peer_isp);
-               PSC(user_isp);
-               dev_info(DEV, "%s\n", pb);
+       char *pbp, pb[300];
+       pbp = pb;
+       *pbp = 0;
+       if (ns.role != os.role)
+               pbp += sprintf(pbp, "role( %s -> %s ) ",
+                              drbd_role_str(os.role),
+                              drbd_role_str(ns.role));
+       if (ns.peer != os.peer)
+               pbp += sprintf(pbp, "peer( %s -> %s ) ",
+                              drbd_role_str(os.peer),
+                              drbd_role_str(ns.peer));
+       if (ns.conn != os.conn)
+               pbp += sprintf(pbp, "conn( %s -> %s ) ",
+                              drbd_conn_str(os.conn),
+                              drbd_conn_str(ns.conn));
+       if (ns.disk != os.disk)
+               pbp += sprintf(pbp, "disk( %s -> %s ) ",
+                              drbd_disk_str(os.disk),
+                              drbd_disk_str(ns.disk));
+       if (ns.pdsk != os.pdsk)
+               pbp += sprintf(pbp, "pdsk( %s -> %s ) ",
+                              drbd_disk_str(os.pdsk),
+                              drbd_disk_str(ns.pdsk));
+       if (is_susp(ns) != is_susp(os))
+               pbp += sprintf(pbp, "susp( %d -> %d ) ",
+                              is_susp(os),
+                              is_susp(ns));
+       if (ns.aftr_isp != os.aftr_isp)
+               pbp += sprintf(pbp, "aftr_isp( %d -> %d ) ",
+                              os.aftr_isp,
+                              ns.aftr_isp);
+       if (ns.peer_isp != os.peer_isp)
+               pbp += sprintf(pbp, "peer_isp( %d -> %d ) ",
+                              os.peer_isp,
+                              ns.peer_isp);
+       if (ns.user_isp != os.user_isp)
+               pbp += sprintf(pbp, "user_isp( %d -> %d ) ",
+                              os.user_isp,
+                              ns.user_isp);
+       dev_info(DEV, "%s\n", pb);
        }
 
        /* solve the race between becoming unconfigured,
@@ -1074,6 +1142,10 @@ int __drbd_set_state(struct drbd_conf *mdev,
                atomic_inc(&mdev->local_cnt);
 
        mdev->state = ns;
+
+       if (os.disk == D_ATTACHING && ns.disk >= D_NEGOTIATING)
+               drbd_print_uuids(mdev, "attached to UUIDs");
+
        wake_up(&mdev->misc_wait);
        wake_up(&mdev->state_wait);
 
@@ -1081,7 +1153,7 @@ int __drbd_set_state(struct drbd_conf *mdev,
        if ((os.conn == C_VERIFY_S || os.conn == C_VERIFY_T) &&
            ns.conn < C_CONNECTED) {
                mdev->ov_start_sector =
-                       BM_BIT_TO_SECT(mdev->rs_total - mdev->ov_left);
+                       BM_BIT_TO_SECT(drbd_bm_bits(mdev) - mdev->ov_left);
                dev_info(DEV, "Online Verify reached sector %llu\n",
                        (unsigned long long)mdev->ov_start_sector);
        }
@@ -1106,14 +1178,7 @@ int __drbd_set_state(struct drbd_conf *mdev,
                unsigned long now = jiffies;
                int i;
 
-               mdev->ov_position = 0;
-               mdev->rs_total = drbd_bm_bits(mdev);
-               if (mdev->agreed_pro_version >= 90)
-                       set_ov_position(mdev, ns.conn);
-               else
-                       mdev->ov_start_sector = 0;
-               mdev->ov_left = mdev->rs_total
-                             - BM_SECT_TO_BIT(mdev->ov_position);
+               set_ov_position(mdev, ns.conn);
                mdev->rs_start = now;
                mdev->rs_last_events = 0;
                mdev->rs_last_sect_ev = 0;
@@ -1121,10 +1186,12 @@ int __drbd_set_state(struct drbd_conf *mdev,
                mdev->ov_last_oos_start = 0;
 
                for (i = 0; i < DRBD_SYNC_MARKS; i++) {
-                       mdev->rs_mark_left[i] = mdev->rs_total;
+                       mdev->rs_mark_left[i] = mdev->ov_left;
                        mdev->rs_mark_time[i] = now;
                }
 
+               drbd_rs_controller_reset(mdev);
+
                if (ns.conn == C_VERIFY_S) {
                        dev_info(DEV, "Starting Online Verify from sector %llu\n",
                                        (unsigned long long)mdev->ov_position);
@@ -1228,6 +1295,26 @@ static void abw_start_sync(struct drbd_conf *mdev, int rv)
        }
 }
 
+int drbd_bitmap_io_from_worker(struct drbd_conf *mdev,
+               int (*io_fn)(struct drbd_conf *),
+               char *why, enum bm_flag flags)
+{
+       int rv;
+
+       D_ASSERT(current == mdev->worker.task);
+
+       /* open coded non-blocking drbd_suspend_io(mdev); */
+       set_bit(SUSPEND_IO, &mdev->flags);
+
+       drbd_bm_lock(mdev, why, flags);
+       rv = io_fn(mdev);
+       drbd_bm_unlock(mdev);
+
+       drbd_resume_io(mdev);
+
+       return rv;
+}
+
 /**
  * after_state_ch() - Perform after state change actions that may sleep
  * @mdev:      DRBD device.
@@ -1266,16 +1353,14 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
 
        nsm.i = -1;
        if (ns.susp_nod) {
-               if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED) {
-                       if (ns.conn == C_CONNECTED)
-                               what = resend, nsm.susp_nod = 0;
-                       else /* ns.conn > C_CONNECTED */
-                               dev_err(DEV, "Unexpected Resynd going on!\n");
-               }
+               if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED)
+                       what = resend;
 
                if (os.disk == D_ATTACHING && ns.disk > D_ATTACHING)
-                       what = restart_frozen_disk_io, nsm.susp_nod = 0;
+                       what = restart_frozen_disk_io;
 
+               if (what != nothing)
+                       nsm.susp_nod = 0;
        }
 
        if (ns.susp_fen) {
@@ -1306,13 +1391,30 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
                spin_unlock_irq(&mdev->req_lock);
        }
 
+       /* Became sync source.  With protocol >= 96, we still need to send out
+        * the sync uuid now. Need to do that before any drbd_send_state, or
+        * the other side may go "paused sync" before receiving the sync uuids,
+        * which is unexpected. */
+       if ((os.conn != C_SYNC_SOURCE && os.conn != C_PAUSED_SYNC_S) &&
+           (ns.conn == C_SYNC_SOURCE || ns.conn == C_PAUSED_SYNC_S) &&
+           mdev->agreed_pro_version >= 96 && get_ldev(mdev)) {
+               drbd_gen_and_send_sync_uuid(mdev);
+               put_ldev(mdev);
+       }
+
        /* Do not change the order of the if above and the two below... */
        if (os.pdsk == D_DISKLESS && ns.pdsk > D_DISKLESS) {      /* attach on the peer */
                drbd_send_uuids(mdev);
                drbd_send_state(mdev);
        }
-       if (os.conn != C_WF_BITMAP_S && ns.conn == C_WF_BITMAP_S)
-               drbd_queue_bitmap_io(mdev, &drbd_send_bitmap, NULL, "send_bitmap (WFBitMapS)");
+       /* No point in queuing send_bitmap if we don't have a connection
+        * anymore, so check also the _current_ state, not only the new state
+        * at the time this work was queued. */
+       if (os.conn != C_WF_BITMAP_S && ns.conn == C_WF_BITMAP_S &&
+           mdev->state.conn == C_WF_BITMAP_S)
+               drbd_queue_bitmap_io(mdev, &drbd_send_bitmap, NULL,
+                               "send_bitmap (WFBitMapS)",
+                               BM_LOCKED_TEST_ALLOWED);
 
        /* Lost contact to peer's copy of the data */
        if ((os.pdsk >= D_INCONSISTENT &&
@@ -1343,7 +1445,23 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
 
                /* D_DISKLESS Peer becomes secondary */
                if (os.peer == R_PRIMARY && ns.peer == R_SECONDARY)
-                       drbd_al_to_on_disk_bm(mdev);
+                       /* We may still be Primary ourselves.
+                        * No harm done if the bitmap still changes,
+                        * redirtied pages will follow later. */
+                       drbd_bitmap_io_from_worker(mdev, &drbd_bm_write,
+                               "demote diskless peer", BM_LOCKED_SET_ALLOWED);
+               put_ldev(mdev);
+       }
+
+       /* Write out all changed bits on demote.
+        * Though, no need to da that just yet
+        * if there is a resync going on still */
+       if (os.role == R_PRIMARY && ns.role == R_SECONDARY &&
+               mdev->state.conn <= C_CONNECTED && get_ldev(mdev)) {
+               /* No changes to the bitmap expected this time, so assert that,
+                * even though no harm was done if it did change. */
+               drbd_bitmap_io_from_worker(mdev, &drbd_bm_write,
+                               "demote", BM_LOCKED_TEST_ALLOWED);
                put_ldev(mdev);
        }
 
@@ -1371,15 +1489,23 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
        if (os.conn == C_WF_REPORT_PARAMS && ns.conn >= C_CONNECTED)
                drbd_send_state(mdev);
 
+       if (os.conn != C_AHEAD && ns.conn == C_AHEAD)
+               drbd_send_state(mdev);
+
        /* We are in the progress to start a full sync... */
        if ((os.conn != C_STARTING_SYNC_T && ns.conn == C_STARTING_SYNC_T) ||
            (os.conn != C_STARTING_SYNC_S && ns.conn == C_STARTING_SYNC_S))
-               drbd_queue_bitmap_io(mdev, &drbd_bmio_set_n_write, &abw_start_sync, "set_n_write from StartingSync");
+               /* no other bitmap changes expected during this phase */
+               drbd_queue_bitmap_io(mdev,
+                       &drbd_bmio_set_n_write, &abw_start_sync,
+                       "set_n_write from StartingSync", BM_LOCKED_TEST_ALLOWED);
 
        /* We are invalidating our self... */
        if (os.conn < C_CONNECTED && ns.conn < C_CONNECTED &&
            os.disk > D_INCONSISTENT && ns.disk == D_INCONSISTENT)
-               drbd_queue_bitmap_io(mdev, &drbd_bmio_set_n_write, NULL, "set_n_write from invalidate");
+               /* other bitmap operation expected during this phase */
+               drbd_queue_bitmap_io(mdev, &drbd_bmio_set_n_write, NULL,
+                       "set_n_write from invalidate", BM_LOCKED_MASK);
 
        /* first half of local IO error, failure to attach,
         * or administrative detach */
@@ -1434,8 +1560,6 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
 
                if (drbd_send_state(mdev))
                        dev_warn(DEV, "Notified peer that I'm now diskless.\n");
-               else
-                       dev_err(DEV, "Sending state for being diskless failed\n");
                /* corresponding get_ldev in __drbd_set_state
                 * this may finaly trigger drbd_ldev_destroy. */
                put_ldev(mdev);
@@ -1459,6 +1583,19 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
        if (os.disk < D_UP_TO_DATE && os.conn >= C_SYNC_SOURCE && ns.conn == C_CONNECTED)
                drbd_send_state(mdev);
 
+       /* This triggers bitmap writeout of potentially still unwritten pages
+        * if the resync finished cleanly, or aborted because of peer disk
+        * failure, or because of connection loss.
+        * For resync aborted because of local disk failure, we cannot do
+        * any bitmap writeout anymore.
+        * No harm done if some bits change during this phase.
+        */
+       if (os.conn > C_CONNECTED && ns.conn <= C_CONNECTED && get_ldev(mdev)) {
+               drbd_queue_bitmap_io(mdev, &drbd_bm_write, NULL,
+                       "write from resync_finished", BM_LOCKED_SET_ALLOWED);
+               put_ldev(mdev);
+       }
+
        /* free tl_hash if we Got thawed and are C_STANDALONE */
        if (ns.conn == C_STANDALONE && !is_susp(ns) && mdev->tl_hash)
                drbd_free_tl_hash(mdev);
@@ -1559,7 +1696,7 @@ int drbd_thread_start(struct drbd_thread *thi)
                if (!try_module_get(THIS_MODULE)) {
                        dev_err(DEV, "Failed to get module reference in drbd_thread_start\n");
                        spin_unlock_irqrestore(&thi->t_lock, flags);
-                       return FALSE;
+                       return false;
                }
 
                init_completion(&thi->stop);
@@ -1576,7 +1713,7 @@ int drbd_thread_start(struct drbd_thread *thi)
                        dev_err(DEV, "Couldn't start thread\n");
 
                        module_put(THIS_MODULE);
-                       return FALSE;
+                       return false;
                }
                spin_lock_irqsave(&thi->t_lock, flags);
                thi->task = nt;
@@ -1596,7 +1733,7 @@ int drbd_thread_start(struct drbd_thread *thi)
                break;
        }
 
-       return TRUE;
+       return true;
 }
 
 
@@ -1694,8 +1831,8 @@ int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock,
 {
        int sent, ok;
 
-       ERR_IF(!h) return FALSE;
-       ERR_IF(!size) return FALSE;
+       ERR_IF(!h) return false;
+       ERR_IF(!size) return false;
 
        h->magic   = BE_DRBD_MAGIC;
        h->command = cpu_to_be16(cmd);
@@ -1704,8 +1841,8 @@ int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock,
        sent = drbd_send(mdev, sock, h, size, msg_flags);
 
        ok = (sent == size);
-       if (!ok)
-               dev_err(DEV, "short sent %s size=%d sent=%d\n",
+       if (!ok && !signal_pending(current))
+               dev_warn(DEV, "short sent %s size=%d sent=%d\n",
                    cmdname(cmd), (int)size, sent);
        return ok;
 }
@@ -1840,7 +1977,7 @@ int drbd_send_protocol(struct drbd_conf *mdev)
                else {
                        dev_err(DEV, "--dry-run is not supported by peer");
                        kfree(p);
-                       return 0;
+                       return -1;
                }
        }
        p->conn_flags    = cpu_to_be32(cf);
@@ -1888,12 +2025,36 @@ int drbd_send_uuids_skip_initial_sync(struct drbd_conf *mdev)
        return _drbd_send_uuids(mdev, 8);
 }
 
+void drbd_print_uuids(struct drbd_conf *mdev, const char *text)
+{
+       if (get_ldev_if_state(mdev, D_NEGOTIATING)) {
+               u64 *uuid = mdev->ldev->md.uuid;
+               dev_info(DEV, "%s %016llX:%016llX:%016llX:%016llX\n",
+                    text,
+                    (unsigned long long)uuid[UI_CURRENT],
+                    (unsigned long long)uuid[UI_BITMAP],
+                    (unsigned long long)uuid[UI_HISTORY_START],
+                    (unsigned long long)uuid[UI_HISTORY_END]);
+               put_ldev(mdev);
+       } else {
+               dev_info(DEV, "%s effective data uuid: %016llX\n",
+                               text,
+                               (unsigned long long)mdev->ed_uuid);
+       }
+}
 
-int drbd_send_sync_uuid(struct drbd_conf *mdev, u64 val)
+int drbd_gen_and_send_sync_uuid(struct drbd_conf *mdev)
 {
        struct p_rs_uuid p;
+       u64 uuid;
+
+       D_ASSERT(mdev->state.disk == D_UP_TO_DATE);
 
-       p.uuid = cpu_to_be64(val);
+       uuid = mdev->ldev->md.uuid[UI_BITMAP] + UUID_NEW_BM_OFFSET;
+       drbd_uuid_set(mdev, UI_BITMAP, uuid);
+       drbd_print_uuids(mdev, "updated sync UUID");
+       drbd_md_sync(mdev);
+       p.uuid = cpu_to_be64(uuid);
 
        return drbd_send_cmd(mdev, USE_DATA_SOCKET, P_SYNC_UUID,
                             (struct p_header80 *)&p, sizeof(p));
@@ -1921,7 +2082,7 @@ int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags fl
        p.d_size = cpu_to_be64(d_size);
        p.u_size = cpu_to_be64(u_size);
        p.c_size = cpu_to_be64(trigger_reply ? 0 : drbd_get_capacity(mdev->this_bdev));
-       p.max_segment_size = cpu_to_be32(queue_max_segment_size(mdev->rq_queue));
+       p.max_bio_size = cpu_to_be32(queue_max_hw_sectors(mdev->rq_queue) << 9);
        p.queue_order_type = cpu_to_be16(q_order_type);
        p.dds_flags = cpu_to_be16(flags);
 
@@ -1972,7 +2133,7 @@ int drbd_send_state_req(struct drbd_conf *mdev,
                             (struct p_header80 *)&p, sizeof(p));
 }
 
-int drbd_send_sr_reply(struct drbd_conf *mdev, int retcode)
+int drbd_send_sr_reply(struct drbd_conf *mdev, enum drbd_state_rv retcode)
 {
        struct p_req_state_reply p;
 
@@ -2076,9 +2237,15 @@ int fill_bitmap_rle_bits(struct drbd_conf *mdev,
        return len;
 }
 
-enum { OK, FAILED, DONE }
+/**
+ * send_bitmap_rle_or_plain
+ *
+ * Return 0 when done, 1 when another iteration is needed, and a negative error
+ * code upon failure.
+ */
+static int
 send_bitmap_rle_or_plain(struct drbd_conf *mdev,
-       struct p_header80 *h, struct bm_xfer_ctx *c)
+                        struct p_header80 *h, struct bm_xfer_ctx *c)
 {
        struct p_compressed_bm *p = (void*)h;
        unsigned long num_words;
@@ -2088,7 +2255,7 @@ send_bitmap_rle_or_plain(struct drbd_conf *mdev,
        len = fill_bitmap_rle_bits(mdev, p, c);
 
        if (len < 0)
-               return FAILED;
+               return -EIO;
 
        if (len) {
                DCBP_set_code(p, RLE_VLI_Bits);
@@ -2118,11 +2285,14 @@ send_bitmap_rle_or_plain(struct drbd_conf *mdev,
                if (c->bit_offset > c->bm_bits)
                        c->bit_offset = c->bm_bits;
        }
-       ok = ok ? ((len == 0) ? DONE : OK) : FAILED;
-
-       if (ok == DONE)
-               INFO_bm_xfer_stats(mdev, "send", c);
-       return ok;
+       if (ok) {
+               if (len == 0) {
+                       INFO_bm_xfer_stats(mdev, "send", c);
+                       return 0;
+               } else
+                       return 1;
+       }
+       return -EIO;
 }
 
 /* See the comment at receive_bitmap() */
@@ -2130,16 +2300,16 @@ int _drbd_send_bitmap(struct drbd_conf *mdev)
 {
        struct bm_xfer_ctx c;
        struct p_header80 *p;
-       int ret;
+       int err;
 
-       ERR_IF(!mdev->bitmap) return FALSE;
+       ERR_IF(!mdev->bitmap) return false;
 
        /* maybe we should use some per thread scratch page,
         * and allocate that during initial device creation? */
        p = (struct p_header80 *) __get_free_page(GFP_NOIO);
        if (!p) {
                dev_err(DEV, "failed to allocate one page buffer in %s\n", __func__);
-               return FALSE;
+               return false;
        }
 
        if (get_ldev(mdev)) {
@@ -2165,11 +2335,11 @@ int _drbd_send_bitmap(struct drbd_conf *mdev)
        };
 
        do {
-               ret = send_bitmap_rle_or_plain(mdev, p, &c);
-       } while (ret == OK);
+               err = send_bitmap_rle_or_plain(mdev, p, &c);
+       } while (err > 0);
 
        free_page((unsigned long) p);
-       return (ret == DONE);
+       return err == 0;
 }
 
 int drbd_send_bitmap(struct drbd_conf *mdev)
@@ -2192,7 +2362,7 @@ int drbd_send_b_ack(struct drbd_conf *mdev, u32 barrier_nr, u32 set_size)
        p.set_size = cpu_to_be32(set_size);
 
        if (mdev->state.conn < C_CONNECTED)
-               return FALSE;
+               return false;
        ok = drbd_send_cmd(mdev, USE_META_SOCKET, P_BARRIER_ACK,
                        (struct p_header80 *)&p, sizeof(p));
        return ok;
@@ -2220,7 +2390,7 @@ static int _drbd_send_ack(struct drbd_conf *mdev, enum drbd_packets cmd,
        p.seq_num  = cpu_to_be32(atomic_add_return(1, &mdev->packet_seq));
 
        if (!mdev->meta.socket || mdev->state.conn < C_CONNECTED)
-               return FALSE;
+               return false;
        ok = drbd_send_cmd(mdev, USE_META_SOCKET, cmd,
                                (struct p_header80 *)&p, sizeof(p));
        return ok;
@@ -2326,8 +2496,8 @@ int drbd_send_ov_request(struct drbd_conf *mdev, sector_t sector, int size)
 }
 
 /* called on sndtimeo
- * returns FALSE if we should retry,
- * TRUE if we think connection is dead
+ * returns false if we should retry,
+ * true if we think connection is dead
  */
 static int we_should_drop_the_connection(struct drbd_conf *mdev, struct socket *sock)
 {
@@ -2340,7 +2510,7 @@ static int we_should_drop_the_connection(struct drbd_conf *mdev, struct socket *
                || mdev->state.conn < C_CONNECTED;
 
        if (drop_it)
-               return TRUE;
+               return true;
 
        drop_it = !--mdev->ko_count;
        if (!drop_it) {
@@ -2477,12 +2647,11 @@ static u32 bio_flags_to_wire(struct drbd_conf *mdev, unsigned long bi_rw)
 {
        if (mdev->agreed_pro_version >= 95)
                return  (bi_rw & REQ_SYNC ? DP_RW_SYNC : 0) |
-                       (bi_rw & REQ_UNPLUG ? DP_UNPLUG : 0) |
                        (bi_rw & REQ_FUA ? DP_FUA : 0) |
                        (bi_rw & REQ_FLUSH ? DP_FLUSH : 0) |
                        (bi_rw & REQ_DISCARD ? DP_DISCARD : 0);
        else
-               return bi_rw & (REQ_SYNC | REQ_UNPLUG) ? DP_RW_SYNC : 0;
+               return bi_rw & REQ_SYNC ? DP_RW_SYNC : 0;
 }
 
 /* Used to send write requests
@@ -2532,13 +2701,39 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req)
        if (ok && dgs) {
                dgb = mdev->int_dig_out;
                drbd_csum_bio(mdev, mdev->integrity_w_tfm, req->master_bio, dgb);
-               ok = drbd_send(mdev, mdev->data.socket, dgb, dgs, 0);
+               ok = dgs == drbd_send(mdev, mdev->data.socket, dgb, dgs, 0);
        }
        if (ok) {
-               if (mdev->net_conf->wire_protocol == DRBD_PROT_A)
+               /* For protocol A, we have to memcpy the payload into
+                * socket buffers, as we may complete right away
+                * as soon as we handed it over to tcp, at which point the data
+                * pages may become invalid.
+                *
+                * For data-integrity enabled, we copy it as well, so we can be
+                * sure that even if the bio pages may still be modified, it
+                * won't change the data on the wire, thus if the digest checks
+                * out ok after sending on this side, but does not fit on the
+                * receiving side, we sure have detected corruption elsewhere.
+                */
+               if (mdev->net_conf->wire_protocol == DRBD_PROT_A || dgs)
                        ok = _drbd_send_bio(mdev, req->master_bio);
                else
                        ok = _drbd_send_zc_bio(mdev, req->master_bio);
+
+               /* double check digest, sometimes buffers have been modified in flight. */
+               if (dgs > 0 && dgs <= 64) {
+                       /* 64 byte, 512 bit, is the larges digest size
+                        * currently supported in kernel crypto. */
+                       unsigned char digest[64];
+                       drbd_csum_bio(mdev, mdev->integrity_w_tfm, req->master_bio, digest);
+                       if (memcmp(mdev->int_dig_out, digest, dgs)) {
+                               dev_warn(DEV,
+                                       "Digest mismatch, buffer modified by upper layers during write: %llus +%u\n",
+                                       (unsigned long long)req->sector, req->size);
+                       }
+               } /* else if (dgs > 64) {
+                    ... Be noisy about digest too large ...
+               } */
        }
 
        drbd_put_data_sock(mdev);
@@ -2588,7 +2783,7 @@ int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd,
        if (ok && dgs) {
                dgb = mdev->int_dig_out;
                drbd_csum_ee(mdev, mdev->integrity_w_tfm, e, dgb);
-               ok = drbd_send(mdev, mdev->data.socket, dgb, dgs, 0);
+               ok = dgs == drbd_send(mdev, mdev->data.socket, dgb, dgs, 0);
        }
        if (ok)
                ok = _drbd_send_zc_ee(mdev, e);
@@ -2598,6 +2793,16 @@ int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd,
        return ok;
 }
 
+int drbd_send_oos(struct drbd_conf *mdev, struct drbd_request *req)
+{
+       struct p_block_desc p;
+
+       p.sector  = cpu_to_be64(req->sector);
+       p.blksize = cpu_to_be32(req->size);
+
+       return drbd_send_cmd(mdev, USE_DATA_SOCKET, P_OUT_OF_SYNC, &p.head, sizeof(p));
+}
+
 /*
   drbd_send distinguishes two cases:
 
@@ -2719,35 +2924,6 @@ static int drbd_release(struct gendisk *gd, fmode_t mode)
        return 0;
 }
 
-static void drbd_unplug_fn(struct request_queue *q)
-{
-       struct drbd_conf *mdev = q->queuedata;
-
-       /* unplug FIRST */
-       spin_lock_irq(q->queue_lock);
-       blk_remove_plug(q);
-       spin_unlock_irq(q->queue_lock);
-
-       /* only if connected */
-       spin_lock_irq(&mdev->req_lock);
-       if (mdev->state.pdsk >= D_INCONSISTENT && mdev->state.conn >= C_CONNECTED) {
-               D_ASSERT(mdev->state.role == R_PRIMARY);
-               if (test_and_clear_bit(UNPLUG_REMOTE, &mdev->flags)) {
-                       /* add to the data.work queue,
-                        * unless already queued.
-                        * XXX this might be a good addition to drbd_queue_work
-                        * anyways, to detect "double queuing" ... */
-                       if (list_empty(&mdev->unplug_work.list))
-                               drbd_queue_work(&mdev->data.work,
-                                               &mdev->unplug_work);
-               }
-       }
-       spin_unlock_irq(&mdev->req_lock);
-
-       if (mdev->state.disk >= D_INCONSISTENT)
-               drbd_kick_lo(mdev);
-}
-
 static void drbd_set_defaults(struct drbd_conf *mdev)
 {
        /* This way we get a compile error when sync_conf grows,
@@ -2800,6 +2976,7 @@ void drbd_init_set_defaults(struct drbd_conf *mdev)
        atomic_set(&mdev->pp_in_use_by_net, 0);
        atomic_set(&mdev->rs_sect_in, 0);
        atomic_set(&mdev->rs_sect_ev, 0);
+       atomic_set(&mdev->ap_in_flight, 0);
 
        mutex_init(&mdev->md_io_mutex);
        mutex_init(&mdev->data.mutex);
@@ -2828,19 +3005,27 @@ void drbd_init_set_defaults(struct drbd_conf *mdev)
        INIT_LIST_HEAD(&mdev->unplug_work.list);
        INIT_LIST_HEAD(&mdev->go_diskless.list);
        INIT_LIST_HEAD(&mdev->md_sync_work.list);
+       INIT_LIST_HEAD(&mdev->start_resync_work.list);
        INIT_LIST_HEAD(&mdev->bm_io_work.w.list);
 
-       mdev->resync_work.cb  = w_resync_inactive;
+       mdev->resync_work.cb  = w_resync_timer;
        mdev->unplug_work.cb  = w_send_write_hint;
        mdev->go_diskless.cb  = w_go_diskless;
        mdev->md_sync_work.cb = w_md_sync;
        mdev->bm_io_work.w.cb = w_bitmap_io;
+       mdev->start_resync_work.cb = w_start_resync;
        init_timer(&mdev->resync_timer);
        init_timer(&mdev->md_sync_timer);
+       init_timer(&mdev->start_resync_timer);
+       init_timer(&mdev->request_timer);
        mdev->resync_timer.function = resync_timer_fn;
        mdev->resync_timer.data = (unsigned long) mdev;
        mdev->md_sync_timer.function = md_sync_timer_fn;
        mdev->md_sync_timer.data = (unsigned long) mdev;
+       mdev->start_resync_timer.function = start_resync_timer_fn;
+       mdev->start_resync_timer.data = (unsigned long) mdev;
+       mdev->request_timer.function = request_timer_fn;
+       mdev->request_timer.data = (unsigned long) mdev;
 
        init_waitqueue_head(&mdev->misc_wait);
        init_waitqueue_head(&mdev->state_wait);
@@ -2911,6 +3096,8 @@ void drbd_mdev_cleanup(struct drbd_conf *mdev)
        D_ASSERT(list_empty(&mdev->resync_work.list));
        D_ASSERT(list_empty(&mdev->unplug_work.list));
        D_ASSERT(list_empty(&mdev->go_diskless.list));
+
+       drbd_set_defaults(mdev);
 }
 
 
@@ -2953,7 +3140,7 @@ static void drbd_destroy_mempools(void)
 static int drbd_create_mempools(void)
 {
        struct page *page;
-       const int number = (DRBD_MAX_SEGMENT_SIZE/PAGE_SIZE) * minor_count;
+       const int number = (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * minor_count;
        int i;
 
        /* prepare our caches and mempools */
@@ -3117,11 +3304,20 @@ static void drbd_cleanup(void)
 
        unregister_reboot_notifier(&drbd_notifier);
 
+       /* first remove proc,
+        * drbdsetup uses it's presence to detect
+        * whether DRBD is loaded.
+        * If we would get stuck in proc removal,
+        * but have netlink already deregistered,
+        * some drbdsetup commands may wait forever
+        * for an answer.
+        */
+       if (drbd_proc)
+               remove_proc_entry("drbd", NULL);
+
        drbd_nl_cleanup();
 
        if (minor_table) {
-               if (drbd_proc)
-                       remove_proc_entry("drbd", NULL);
                i = minor_count;
                while (i--)
                        drbd_delete_device(i);
@@ -3149,7 +3345,7 @@ static int drbd_congested(void *congested_data, int bdi_bits)
        char reason = '-';
        int r = 0;
 
-       if (!__inc_ap_bio_cond(mdev)) {
+       if (!may_inc_ap_bio(mdev)) {
                /* DRBD has frozen IO */
                r = bdi_bits;
                reason = 'd';
@@ -3202,7 +3398,7 @@ struct drbd_conf *drbd_new_device(unsigned int minor)
                goto out_no_disk;
        mdev->vdisk = disk;
 
-       set_disk_ro(disk, TRUE);
+       set_disk_ro(disk, true);
 
        disk->queue = q;
        disk->major = DRBD_MAJOR;
@@ -3218,13 +3414,11 @@ struct drbd_conf *drbd_new_device(unsigned int minor)
        q->backing_dev_info.congested_fn = drbd_congested;
        q->backing_dev_info.congested_data = mdev;
 
-       blk_queue_make_request(q, drbd_make_request_26);
-       blk_queue_max_segment_size(q, DRBD_MAX_SEGMENT_SIZE);
+       blk_queue_make_request(q, drbd_make_request);
+       blk_queue_max_hw_sectors(q, DRBD_MAX_BIO_SIZE >> 9);
        blk_queue_bounce_limit(q, BLK_BOUNCE_ANY);
        blk_queue_merge_bvec(q, drbd_merge_bvec);
-       q->queue_lock = &mdev->req_lock; /* needed since we use */
-               /* plugging on a queue, that actually has no requests! */
-       q->unplug_fn = drbd_unplug_fn;
+       q->queue_lock = &mdev->req_lock;
 
        mdev->md_io_page = alloc_page(GFP_KERNEL);
        if (!mdev->md_io_page)
@@ -3283,6 +3477,7 @@ void drbd_free_mdev(struct drbd_conf *mdev)
        put_disk(mdev->vdisk);
        blk_cleanup_queue(mdev->rq_queue);
        free_cpumask_var(mdev->cpu_mask);
+       drbd_free_tl_hash(mdev);
        kfree(mdev);
 }
 
@@ -3298,7 +3493,7 @@ int __init drbd_init(void)
                return -EINVAL;
        }
 
-       if (1 > minor_count || minor_count > 255) {
+       if (minor_count < DRBD_MINOR_COUNT_MIN || minor_count > DRBD_MINOR_COUNT_MAX) {
                printk(KERN_ERR
                        "drbd: invalid minor_count (%d)\n", minor_count);
 #ifdef MODULE
@@ -3480,7 +3675,7 @@ void drbd_md_sync(struct drbd_conf *mdev)
        if (!drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE)) {
                /* this was a try anyways ... */
                dev_err(DEV, "meta data update failed!\n");
-               drbd_chk_io_error(mdev, 1, TRUE);
+               drbd_chk_io_error(mdev, 1, true);
        }
 
        /* Update mdev->ldev->md.la_size_sect,
@@ -3496,7 +3691,7 @@ void drbd_md_sync(struct drbd_conf *mdev)
  * @mdev:      DRBD device.
  * @bdev:      Device from which the meta data should be read in.
  *
- * Return 0 (NO_ERROR) on success, and an enum drbd_ret_codes in case
+ * Return 0 (NO_ERROR) on success, and an enum drbd_ret_code in case
  * something goes wrong.  Currently only: ERR_IO_MD_DISK, ERR_MD_INVALID.
  */
 int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
@@ -3566,28 +3761,6 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
        return rv;
 }
 
-static void debug_drbd_uuid(struct drbd_conf *mdev, enum drbd_uuid_index index)
-{
-       static char *uuid_str[UI_EXTENDED_SIZE] = {
-               [UI_CURRENT] = "CURRENT",
-               [UI_BITMAP] = "BITMAP",
-               [UI_HISTORY_START] = "HISTORY_START",
-               [UI_HISTORY_END] = "HISTORY_END",
-               [UI_SIZE] = "SIZE",
-               [UI_FLAGS] = "FLAGS",
-       };
-
-       if (index >= UI_EXTENDED_SIZE) {
-               dev_warn(DEV, " uuid_index >= EXTENDED_SIZE\n");
-               return;
-       }
-
-       dynamic_dev_dbg(DEV, " uuid[%s] now %016llX\n",
-                uuid_str[index],
-                (unsigned long long)mdev->ldev->md.uuid[index]);
-}
-
-
 /**
  * drbd_md_mark_dirty() - Mark meta data super block as dirty
  * @mdev:      DRBD device.
@@ -3617,10 +3790,8 @@ static void drbd_uuid_move_history(struct drbd_conf *mdev) __must_hold(local)
 {
        int i;
 
-       for (i = UI_HISTORY_START; i < UI_HISTORY_END; i++) {
+       for (i = UI_HISTORY_START; i < UI_HISTORY_END; i++)
                mdev->ldev->md.uuid[i+1] = mdev->ldev->md.uuid[i];
-               debug_drbd_uuid(mdev, i+1);
-       }
 }
 
 void _drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local)
@@ -3635,7 +3806,6 @@ void _drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local)
        }
 
        mdev->ldev->md.uuid[idx] = val;
-       debug_drbd_uuid(mdev, idx);
        drbd_md_mark_dirty(mdev);
 }
 
@@ -3645,7 +3815,6 @@ void drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local)
        if (mdev->ldev->md.uuid[idx]) {
                drbd_uuid_move_history(mdev);
                mdev->ldev->md.uuid[UI_HISTORY_START] = mdev->ldev->md.uuid[idx];
-               debug_drbd_uuid(mdev, UI_HISTORY_START);
        }
        _drbd_uuid_set(mdev, idx, val);
 }
@@ -3660,14 +3829,16 @@ void drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local)
 void drbd_uuid_new_current(struct drbd_conf *mdev) __must_hold(local)
 {
        u64 val;
+       unsigned long long bm_uuid = mdev->ldev->md.uuid[UI_BITMAP];
+
+       if (bm_uuid)
+               dev_warn(DEV, "bm UUID was already set: %llX\n", bm_uuid);
 
-       dev_info(DEV, "Creating new current UUID\n");
-       D_ASSERT(mdev->ldev->md.uuid[UI_BITMAP] == 0);
        mdev->ldev->md.uuid[UI_BITMAP] = mdev->ldev->md.uuid[UI_CURRENT];
-       debug_drbd_uuid(mdev, UI_BITMAP);
 
        get_random_bytes(&val, sizeof(u64));
        _drbd_uuid_set(mdev, UI_CURRENT, val);
+       drbd_print_uuids(mdev, "new current UUID");
        /* get it to stable storage _now_ */
        drbd_md_sync(mdev);
 }
@@ -3681,16 +3852,12 @@ void drbd_uuid_set_bm(struct drbd_conf *mdev, u64 val) __must_hold(local)
                drbd_uuid_move_history(mdev);
                mdev->ldev->md.uuid[UI_HISTORY_START] = mdev->ldev->md.uuid[UI_BITMAP];
                mdev->ldev->md.uuid[UI_BITMAP] = 0;
-               debug_drbd_uuid(mdev, UI_HISTORY_START);
-               debug_drbd_uuid(mdev, UI_BITMAP);
        } else {
-               if (mdev->ldev->md.uuid[UI_BITMAP])
-                       dev_warn(DEV, "bm UUID already set");
+               unsigned long long bm_uuid = mdev->ldev->md.uuid[UI_BITMAP];
+               if (bm_uuid)
+                       dev_warn(DEV, "bm UUID was already set: %llX\n", bm_uuid);
 
-               mdev->ldev->md.uuid[UI_BITMAP] = val;
-               mdev->ldev->md.uuid[UI_BITMAP] &= ~((u64)1);
-
-               debug_drbd_uuid(mdev, UI_BITMAP);
+               mdev->ldev->md.uuid[UI_BITMAP] = val & ~((u64)1);
        }
        drbd_md_mark_dirty(mdev);
 }
@@ -3746,15 +3913,19 @@ int drbd_bmio_clear_n_write(struct drbd_conf *mdev)
 static int w_bitmap_io(struct drbd_conf *mdev, struct drbd_work *w, int unused)
 {
        struct bm_io_work *work = container_of(w, struct bm_io_work, w);
-       int rv;
+       int rv = -EIO;
 
        D_ASSERT(atomic_read(&mdev->ap_bio_cnt) == 0);
 
-       drbd_bm_lock(mdev, work->why);
-       rv = work->io_fn(mdev);
-       drbd_bm_unlock(mdev);
+       if (get_ldev(mdev)) {
+               drbd_bm_lock(mdev, work->why, work->flags);
+               rv = work->io_fn(mdev);
+               drbd_bm_unlock(mdev);
+               put_ldev(mdev);
+       }
 
        clear_bit(BITMAP_IO, &mdev->flags);
+       smp_mb__after_clear_bit();
        wake_up(&mdev->misc_wait);
 
        if (work->done)
@@ -3762,6 +3933,7 @@ static int w_bitmap_io(struct drbd_conf *mdev, struct drbd_work *w, int unused)
 
        clear_bit(BITMAP_IO_QUEUED, &mdev->flags);
        work->why = NULL;
+       work->flags = 0;
 
        return 1;
 }
@@ -3816,7 +3988,7 @@ void drbd_go_diskless(struct drbd_conf *mdev)
 void drbd_queue_bitmap_io(struct drbd_conf *mdev,
                          int (*io_fn)(struct drbd_conf *),
                          void (*done)(struct drbd_conf *, int),
-                         char *why)
+                         char *why, enum bm_flag flags)
 {
        D_ASSERT(current == mdev->worker.task);
 
@@ -3830,15 +4002,15 @@ void drbd_queue_bitmap_io(struct drbd_conf *mdev,
        mdev->bm_io_work.io_fn = io_fn;
        mdev->bm_io_work.done = done;
        mdev->bm_io_work.why = why;
+       mdev->bm_io_work.flags = flags;
 
+       spin_lock_irq(&mdev->req_lock);
        set_bit(BITMAP_IO, &mdev->flags);
        if (atomic_read(&mdev->ap_bio_cnt) == 0) {
-               if (list_empty(&mdev->bm_io_work.w.list)) {
-                       set_bit(BITMAP_IO_QUEUED, &mdev->flags);
+               if (!test_and_set_bit(BITMAP_IO_QUEUED, &mdev->flags))
                        drbd_queue_work(&mdev->data.work, &mdev->bm_io_work.w);
-               } else
-                       dev_err(DEV, "FIXME avoided double queuing bm_io_work\n");
        }
+       spin_unlock_irq(&mdev->req_lock);
 }
 
 /**
@@ -3850,19 +4022,22 @@ void drbd_queue_bitmap_io(struct drbd_conf *mdev,
  * freezes application IO while that the actual IO operations runs. This
  * functions MAY NOT be called from worker context.
  */
-int drbd_bitmap_io(struct drbd_conf *mdev, int (*io_fn)(struct drbd_conf *), char *why)
+int drbd_bitmap_io(struct drbd_conf *mdev, int (*io_fn)(struct drbd_conf *),
+               char *why, enum bm_flag flags)
 {
        int rv;
 
        D_ASSERT(current != mdev->worker.task);
 
-       drbd_suspend_io(mdev);
+       if ((flags & BM_LOCKED_SET_ALLOWED) == 0)
+               drbd_suspend_io(mdev);
 
-       drbd_bm_lock(mdev, why);
+       drbd_bm_lock(mdev, why, flags);
        rv = io_fn(mdev);
        drbd_bm_unlock(mdev);
 
-       drbd_resume_io(mdev);
+       if ((flags & BM_LOCKED_SET_ALLOWED) == 0)
+               drbd_resume_io(mdev);
 
        return rv;
 }