ext4: Add the journal checksum feature

The journal checksum feature adds two new flags i.e
JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT and JBD2_FEATURE_COMPAT_CHECKSUM.

JBD2_FEATURE_CHECKSUM flag indicates that the commit block contains the
checksum for the blocks described by the descriptor blocks.
Due to checksums, writing of the commit record no longer needs to be
synchronous. Now commit record can be sent to disk without waiting for
descriptor blocks to be written to disk. This behavior is controlled
using JBD2_FEATURE_ASYNC_COMMIT flag. Older kernels/e2fsck should not be
able to recover the journal with _ASYNC_COMMIT hence it is made
incompat.
The commit header has been extended to hold the checksum along with the
type of the checksum.

For recovery in pass scan checksums are verified to ensure the sanity
and completeness(in case of _ASYNC_COMMIT) of every transaction.

Signed-off-by: Andreas Dilger <adilger@clusterfs.com>
Signed-off-by: Girish Shilamkar <girish@clusterfs.com>
Signed-off-by: Dave Kleikamp <shaggy@linux.vnet.ibm.com>
Signed-off-by: Mingming Cao <cmm@us.ibm.com>
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
index d0ce627..92168066 100644
--- a/fs/jbd2/recovery.c
+++ b/fs/jbd2/recovery.c
@@ -21,6 +21,7 @@
 #include <linux/jbd2.h>
 #include <linux/errno.h>
 #include <linux/slab.h>
+#include <linux/crc32.h>
 #endif
 
 /*
@@ -316,6 +317,37 @@
 	return block;
 }
 
+/*
+ * calc_chksums calculates the checksums for the blocks described in the
+ * descriptor block.
+ */
+static int calc_chksums(journal_t *journal, struct buffer_head *bh,
+			unsigned long *next_log_block, __u32 *crc32_sum)
+{
+	int i, num_blks, err;
+	unsigned long io_block;
+	struct buffer_head *obh;
+
+	num_blks = count_tags(journal, bh);
+	/* Calculate checksum of the descriptor block. */
+	*crc32_sum = crc32_be(*crc32_sum, (void *)bh->b_data, bh->b_size);
+
+	for (i = 0; i < num_blks; i++) {
+		io_block = (*next_log_block)++;
+		wrap(journal, *next_log_block);
+		err = jread(&obh, journal, io_block);
+		if (err) {
+			printk(KERN_ERR "JBD: IO error %d recovering block "
+				"%lu in log\n", err, io_block);
+			return 1;
+		} else {
+			*crc32_sum = crc32_be(*crc32_sum, (void *)obh->b_data,
+				     obh->b_size);
+		}
+	}
+	return 0;
+}
+
 static int do_one_pass(journal_t *journal,
 			struct recovery_info *info, enum passtype pass)
 {
@@ -328,6 +360,7 @@
 	unsigned int		sequence;
 	int			blocktype;
 	int			tag_bytes = journal_tag_bytes(journal);
+	__u32			crc32_sum = ~0; /* Transactional Checksums */
 
 	/* Precompute the maximum metadata descriptors in a descriptor block */
 	int			MAX_BLOCKS_PER_DESC;
@@ -419,12 +452,26 @@
 		switch(blocktype) {
 		case JBD2_DESCRIPTOR_BLOCK:
 			/* If it is a valid descriptor block, replay it
-			 * in pass REPLAY; otherwise, just skip over the
-			 * blocks it describes. */
+			 * in pass REPLAY; if journal_checksums enabled, then
+			 * calculate checksums in PASS_SCAN, otherwise,
+			 * just skip over the blocks it describes. */
 			if (pass != PASS_REPLAY) {
+				if (pass == PASS_SCAN &&
+				    JBD2_HAS_COMPAT_FEATURE(journal,
+					    JBD2_FEATURE_COMPAT_CHECKSUM) &&
+				    !info->end_transaction) {
+					if (calc_chksums(journal, bh,
+							&next_log_block,
+							&crc32_sum)) {
+						put_bh(bh);
+						break;
+					}
+					put_bh(bh);
+					continue;
+				}
 				next_log_block += count_tags(journal, bh);
 				wrap(journal, next_log_block);
-				brelse(bh);
+				put_bh(bh);
 				continue;
 			}
 
@@ -516,9 +563,96 @@
 			continue;
 
 		case JBD2_COMMIT_BLOCK:
-			/* Found an expected commit block: not much to
-			 * do other than move on to the next sequence
+			/*     How to differentiate between interrupted commit
+			 *               and journal corruption ?
+			 *
+			 * {nth transaction}
+			 *        Checksum Verification Failed
+			 *			 |
+			 *		 ____________________
+			 *		|		     |
+			 * 	async_commit             sync_commit
+			 *     		|                    |
+			 *		| GO TO NEXT    "Journal Corruption"
+			 *		| TRANSACTION
+			 *		|
+			 * {(n+1)th transanction}
+			 *		|
+			 * 	 _______|______________
+			 * 	|	 	      |
+			 * Commit block found	Commit block not found
+			 *      |		      |
+			 * "Journal Corruption"       |
+			 *		 _____________|_________
+			 *     		|	           	|
+			 *	nth trans corrupt	OR   nth trans
+			 *	and (n+1)th interrupted     interrupted
+			 *	before commit block
+			 *      could reach the disk.
+			 *	(Cannot find the difference in above
+			 *	 mentioned conditions. Hence assume
+			 *	 "Interrupted Commit".)
+			 */
+
+			/* Found an expected commit block: if checksums
+			 * are present verify them in PASS_SCAN; else not
+			 * much to do other than move on to the next sequence
 			 * number. */
+			if (pass == PASS_SCAN &&
+			    JBD2_HAS_COMPAT_FEATURE(journal,
+				    JBD2_FEATURE_COMPAT_CHECKSUM)) {
+				int chksum_err, chksum_seen;
+				struct commit_header *cbh =
+					(struct commit_header *)bh->b_data;
+				unsigned found_chksum =
+					be32_to_cpu(cbh->h_chksum[0]);
+
+				chksum_err = chksum_seen = 0;
+
+				if (info->end_transaction) {
+					printk(KERN_ERR "JBD: Transaction %u "
+						"found to be corrupt.\n",
+						next_commit_ID - 1);
+					brelse(bh);
+					break;
+				}
+
+				if (crc32_sum == found_chksum &&
+				    cbh->h_chksum_type == JBD2_CRC32_CHKSUM &&
+				    cbh->h_chksum_size ==
+						JBD2_CRC32_CHKSUM_SIZE)
+				       chksum_seen = 1;
+				else if (!(cbh->h_chksum_type == 0 &&
+					     cbh->h_chksum_size == 0 &&
+					     found_chksum == 0 &&
+					     !chksum_seen))
+				/*
+				 * If fs is mounted using an old kernel and then
+				 * kernel with journal_chksum is used then we
+				 * get a situation where the journal flag has
+				 * checksum flag set but checksums are not
+				 * present i.e chksum = 0, in the individual
+				 * commit blocks.
+				 * Hence to avoid checksum failures, in this
+				 * situation, this extra check is added.
+				 */
+						chksum_err = 1;
+
+				if (chksum_err) {
+					info->end_transaction = next_commit_ID;
+
+					if (!JBD2_HAS_COMPAT_FEATURE(journal,
+					   JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)){
+						printk(KERN_ERR
+						       "JBD: Transaction %u "
+						       "found to be corrupt.\n",
+						       next_commit_ID);
+						brelse(bh);
+						break;
+					}
+				}
+				crc32_sum = ~0;
+			}
 			brelse(bh);
 			next_commit_ID++;
 			continue;
@@ -554,9 +688,10 @@
 	 * transaction marks the end of the valid log.
 	 */
 
-	if (pass == PASS_SCAN)
-		info->end_transaction = next_commit_ID;
-	else {
+	if (pass == PASS_SCAN) {
+		if (!info->end_transaction)
+			info->end_transaction = next_commit_ID;
+	} else {
 		/* It's really bad news if different passes end up at
 		 * different places (but possible due to IO errors). */
 		if (info->end_transaction != next_commit_ID) {