From f36af18c7b4ea1ba333c09b606bb4a7e5af66b4d Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Wed, 9 Mar 2011 22:44:55 +0100 Subject: drbd: fix disconnect/reconnect loop, if ping-timeout == ping-int If there is no replication traffic within the idle timeout (ping-int seconds), DRBD will send a P_PING, and adjust the timeout to ping-timeout. If there is no P_PING_ACK received within this ping-timeout, DRBD finally drops the connection, and tries to re-establish it. To decide which timeout was active, we compared the current timeout with the ping-timeout, and dropped the connection, if that was the case. By default, ping-int is 10 seconds, ping-timeout is 500 ms. Unfortunately, if you configure ping-timeout to be the same as ping-int, expiry of the idle-timeout had been mistaken for a missing ping ack, and caused an immediate reconnection attempt. Fix: Allow both timeouts to be equal, use a local variable to store which timeout is active. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index fd26666..0b17d42 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -4554,6 +4554,7 @@ int drbd_asender(struct drbd_thread *thi) int received = 0; int expect = sizeof(struct p_header80); int empty; + int ping_timeout_active = 0; sprintf(current->comm, "drbd%d_asender", mdev_to_minor(mdev)); @@ -4566,6 +4567,7 @@ int drbd_asender(struct drbd_thread *thi) ERR_IF(!drbd_send_ping(mdev)) goto reconnect; mdev->meta.socket->sk->sk_rcvtimeo = mdev->net_conf->ping_timeo*HZ/10; + ping_timeout_active = 1; } /* conditionally cork; @@ -4620,8 +4622,7 @@ int drbd_asender(struct drbd_thread *thi) dev_err(DEV, "meta connection shut down by peer.\n"); goto reconnect; } else if (rv == -EAGAIN) { - if (mdev->meta.socket->sk->sk_rcvtimeo == - mdev->net_conf->ping_timeo*HZ/10) { + if (ping_timeout_active) { dev_err(DEV, "PingAck did not arrive in time.\n"); goto reconnect; } @@ -4660,6 +4661,11 @@ int drbd_asender(struct drbd_thread *thi) if (!cmd->process(mdev, h)) goto reconnect; + /* the idle_timeout (ping-int) + * has been restored in got_PingAck() */ + if (cmd == get_asender_cmd(P_PING_ACK)) + ping_timeout_active = 0; + buf = h; received = 0; expect = sizeof(struct p_header80); -- cgit v1.1