drbd: implement csums-after-crash-only Checksum based resync trades CPU cycles for network bandwidth, in situations where we expect much of the to-be-resynced blocks to be actually identical on both sides already. In a "network hickup" scenario, it won't help: all to-be-resynced blocks will typically be different. The use case is for the resync of *potentially* different blocks after crash recovery -- the crash recovery had marked larger areas (those covered by the activity log) as need-to-be-resynced, just in case. Most of those blocks will be identical. This option makes it possible to configure checksum based resync, but only actually use it for the first resync after primary crash. Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com> Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>

commit: aaaba34576407857f6146ff6c330f06e63fb2bf2 [log] [tgz]
author: Lars Ellenberg <lars.ellenberg@linbit.com> Tue Mar 18 12:30:09 2014 +0100
committer: Philipp Reisner <philipp.reisner@linbit.com> Thu Jul 10 18:35:00 2014 +0200
tree: 6cd468ecb123314d06c641185410f6cbce58c504
parent: 6a8d68b1878e8839b4927150da236c8248703c70 [diff]
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index abf5aef..fe6595a 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h

@@ -738,6 +738,8 @@
 	struct rb_root read_requests;
 	struct rb_root write_requests;
 
+	/* use checksums for *this* resync */
+	bool use_csums;
 	/* blocks to resync in this run [unit BM_BLOCK_SIZE] */
 	unsigned long rs_total;
 	/* number of resync blocks that failed in this run */

diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 5626c5b..d326af6 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c

@@ -2555,6 +2555,8 @@
 			peer_req->w.cb = w_e_end_csum_rs_req;
 			/* used in the sector offset progress display */
 			device->bm_resync_fo = BM_SECT_TO_BIT(sector);
+			/* remember to report stats in drbd_resync_finished */
+			device->use_csums = true;
 		} else if (pi->cmd == P_OV_REPLY) {
 			/* track progress, we may need to throttle */
 			atomic_add(size >> 9, &device->rs_sect_in);

diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index 2ff5fd4..6532a69 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c

@@ -698,8 +698,8 @@
 		/* adjust very last sectors, in case we are oddly sized */
 		if (sector + (size>>9) > capacity)
 			size = (capacity-sector)<<9;
-		if (connection->agreed_pro_version >= 89 &&
-		    connection->csums_tfm) {
+
+		if (device->use_csums) {
 			switch (read_for_csum(peer_device, sector, size)) {
 			case -EIO: /* Disk failure */
 				put_ldev(device);
@@ -913,7 +913,7 @@
 		if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T)
 			khelper_cmd = "after-resync-target";
 
-		if (first_peer_device(device)->connection->csums_tfm && device->rs_total) {
+		if (device->use_csums && device->rs_total) {
 			const unsigned long s = device->rs_same_csum;
 			const unsigned long t = device->rs_total;
 			const int ratio =
@@ -1622,6 +1622,18 @@
 	clear_bit(AHEAD_TO_SYNC_SOURCE, &device->flags);
 }
 
+static bool use_checksum_based_resync(struct drbd_connection *connection, struct drbd_device *device)
+{
+	bool csums_after_crash_only;
+	rcu_read_lock();
+	csums_after_crash_only = rcu_dereference(connection->net_conf)->csums_after_crash_only;
+	rcu_read_unlock();
+	return connection->agreed_pro_version >= 89 &&		/* supported? */
+		connection->csums_tfm &&			/* configured? */
+		(csums_after_crash_only == 0			/* use for each resync? */
+		 || test_bit(CRASHED_PRIMARY, &device->flags));	/* or only after Primary crash? */
+}
+
 /**
  * drbd_start_resync() - Start the resync process
  * @device:	DRBD device.
@@ -1756,8 +1768,12 @@
 		     drbd_conn_str(ns.conn),
 		     (unsigned long) device->rs_total << (BM_BLOCK_SHIFT-10),
 		     (unsigned long) device->rs_total);
-		if (side == C_SYNC_TARGET)
+		if (side == C_SYNC_TARGET) {
 			device->bm_resync_fo = 0;
+			device->use_csums = use_checksum_based_resync(connection, device);
+		} else {
+			device->use_csums = 0;
+		}
 
 		/* Since protocol 96, we must serialize drbd_gen_and_send_sync_uuid
 		 * with w_send_oos, or the sync target will get confused as to
commit	aaaba34576407857f6146ff6c330f06e63fb2bf2	[log] [tgz]
author	Lars Ellenberg <lars.ellenberg@linbit.com>	Tue Mar 18 12:30:09 2014 +0100
committer	Philipp Reisner <philipp.reisner@linbit.com>	Thu Jul 10 18:35:00 2014 +0200
tree	6cd468ecb123314d06c641185410f6cbce58c504
parent	6a8d68b1878e8839b4927150da236c8248703c70 [diff]