drbd: No longer answer P_RS_DATA_REQUEST packets when in C_AHEAD mode
authorPhilipp Reisner <philipp.reisner@linbit.com>
Mon, 27 Dec 2010 09:53:28 +0000 (10:53 +0100)
committerPhilipp Reisner <philipp.reisner@linbit.com>
Thu, 10 Mar 2011 10:45:25 +0000 (11:45 +0100)
When the sync source node replies to a P_RS_DATA_REQUEST packet
when it is already in ahead mode. I.e. those two packets
crossed each other on the wire, that may lead to diverging
bitmaps.

  This never happens in a well-tuned-system. In a well-tuned-
  system the resync controller has reduced the resync speed
  to zero long before we got into ahead-mode.

But we have to be prepared for the not-well-tuned-system
of course as well.
Because -> diverging bitmaps = non terminating resync.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
drivers/block/drbd/drbd_int.h
drivers/block/drbd/drbd_receiver.c
drivers/block/drbd/drbd_worker.c

index ec06e74..a529285 100644 (file)
@@ -205,8 +205,9 @@ enum drbd_packets {
        /* P_CKPT_DISABLE_REQ    = 0x26, * currently reserved for protocol D */
        P_DELAY_PROBE         = 0x27, /* is used on BOTH sockets */
        P_OUT_OF_SYNC         = 0x28, /* Mark as out of sync (Outrunning), data socket */
+       P_RS_CANCEL           = 0x29, /* meta: Used to cancel RS_DATA_REQUEST packet by SyncSource */
 
-       P_MAX_CMD             = 0x28,
+       P_MAX_CMD             = 0x2A,
        P_MAY_IGNORE          = 0x100, /* Flag to test if (cmd > P_MAY_IGNORE) ... */
        P_MAX_OPT_CMD         = 0x101,
 
index f0a0f66..bf865bd 100644 (file)
@@ -4361,7 +4361,16 @@ static int got_NegRSDReply(struct drbd_conf *mdev, struct p_header80 *h)
 
        if (get_ldev_if_state(mdev, D_FAILED)) {
                drbd_rs_complete_io(mdev, sector);
-               drbd_rs_failed_io(mdev, sector, size);
+               switch (be16_to_cpu(h->command)) {
+               case P_NEG_RS_DREPLY:
+                       drbd_rs_failed_io(mdev, sector, size);
+               case P_RS_CANCEL:
+                       break;
+               default:
+                       D_ASSERT(0);
+                       put_ldev(mdev);
+                       return false;
+               }
                put_ldev(mdev);
        }
 
@@ -4459,6 +4468,7 @@ static struct asender_cmd *get_asender_cmd(int cmd)
        [P_STATE_CHG_REPLY] = { sizeof(struct p_req_state_reply), got_RqSReply },
        [P_RS_IS_IN_SYNC]   = { sizeof(struct p_block_ack), got_IsInSync },
        [P_DELAY_PROBE]     = { sizeof(struct p_delay_probe93), got_skip },
+       [P_RS_CANCEL]       = { sizeof(struct p_block_ack), got_NegRSDReply},
        [P_MAX_CMD]         = { 0, NULL },
        };
        if (cmd > P_MAX_CMD || asender_tbl[cmd].process == NULL)
index 7bfeb79..1d7510e 100644 (file)
@@ -988,7 +988,9 @@ int w_e_end_rsdata_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
                put_ldev(mdev);
        }
 
-       if (likely((e->flags & EE_WAS_ERROR) == 0)) {
+       if (mdev->state.conn == C_AHEAD) {
+               ok = drbd_send_ack(mdev, P_RS_CANCEL, e);
+       } else if (likely((e->flags & EE_WAS_ERROR) == 0)) {
                if (likely(mdev->state.pdsk >= D_INCONSISTENT)) {
                        inc_rs_pending(mdev);
                        ok = drbd_send_block(mdev, P_RS_DATA_REPLY, e);