drbd: bitmap: don't count unused bits (fix non-terminating resync)

We trusted the on-disk bitmap to have unused bits cleared.
In case that is not true for whatever reason,
and we take a code path where the unused bits don't get cleared
elsewhere (bm_clear_surplus is not called), we may miscount the bits,
and get confused during resync, waiting for bits to get cleared that we
don't even use: the resync process would not terminate.

Fix this by masking out unused bits in __bm_count_bits.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c
index 9390e95..c536148 100644
--- a/drivers/block/drbd/drbd_bitmap.c
+++ b/drivers/block/drbd/drbd_bitmap.c
@@ -376,9 +376,16 @@
 	unsigned long *p_addr, *bm, offset = 0;
 	unsigned long bits = 0;
 	unsigned long i, do_now;
+	unsigned long words;
 
-	while (offset < b->bm_words) {
-		i = do_now = min_t(size_t, b->bm_words-offset, LWPP);
+	/* due to 64bit alignment, the last long on a 32bit arch
+	 * may be not used at all. The last used long will likely
+	 * be only partially used, always. Don't count those bits,
+	 * but mask them out. */
+	words = (b->bm_bits + BITS_PER_LONG - 1) >> LN2_BPL;
+
+	while (offset < words) {
+		i = do_now = min_t(size_t, words-offset, LWPP);
 		p_addr = __bm_map_paddr(b, offset, KM_USER0);
 		bm = p_addr + MLPP(offset);
 		while (i--) {
@@ -388,8 +395,20 @@
 #endif
 			bits += hweight_long(*bm++);
 		}
-		__bm_unmap(p_addr, KM_USER0);
 		offset += do_now;
+		if (offset == words) {
+			/* last word may only be partially used,
+			 * see also bm_clear_surplus. */
+			i = (1UL << (b->bm_bits & (BITS_PER_LONG-1))) -1;
+			if (i) {
+				bits -= hweight_long(p_addr[do_now-1] & ~i);
+				p_addr[do_now-1] &= i;
+			}
+			/* 32bit arch, may have an unused padding long */
+			if (words != b->bm_words)
+				p_addr[do_now] = 0;
+		}
+		__bm_unmap(p_addr, KM_USER0);
 		cond_resched();
 	}