[PATCH] blktrace: add timestamp message

This adds a new timestamp message to blktrace, giving the timeofday when
we starting tracing. This helps user space correlate block trace events
with eg an application strace.

This requires a (compatible) update to blkparse. The changed blkparse
is still able to process traces generated by older kernels, and older
versions of blkparse should silently ignore the new records (because
they have a pid of 0).

Signed-off-by: Olaf Kirch <okir@suse.de>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
diff --git a/block/blktrace.c b/block/blktrace.c
index 135593c..562ca7c 100644
--- a/block/blktrace.c
+++ b/block/blktrace.c
@@ -22,30 +22,61 @@
 #include <linux/init.h>
 #include <linux/mutex.h>
 #include <linux/debugfs.h>
+#include <linux/time.h>
 #include <asm/uaccess.h>
 
 static DEFINE_PER_CPU(unsigned long long, blk_trace_cpu_offset) = { 0, };
 static unsigned int blktrace_seq __read_mostly = 1;
 
 /*
+ * Send out a notify message.
+ */
+static inline unsigned int trace_note(struct blk_trace *bt,
+		pid_t pid, int action,
+		const void *data, size_t len)
+{
+	struct blk_io_trace *t;
+	int cpu = smp_processor_id();
+
+	t = relay_reserve(bt->rchan, sizeof(*t) + len);
+	if (t == NULL)
+		return 0;
+
+	t->magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION;
+	t->time = sched_clock() - per_cpu(blk_trace_cpu_offset, cpu);
+	t->device = bt->dev;
+	t->action = action;
+	t->pid = pid;
+	t->cpu = cpu;
+	t->pdu_len = len;
+	memcpy((void *) t + sizeof(*t), data, len);
+	return blktrace_seq;
+}
+
+/*
  * Send out a notify for this process, if we haven't done so since a trace
  * started
  */
 static void trace_note_tsk(struct blk_trace *bt, struct task_struct *tsk)
 {
-	struct blk_io_trace *t;
+	tsk->btrace_seq = trace_note(bt, tsk->pid,
+			BLK_TN_PROCESS,
+			tsk->comm, sizeof(tsk->comm));
+}
 
-	t = relay_reserve(bt->rchan, sizeof(*t) + sizeof(tsk->comm));
-	if (t) {
-		t->magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION;
-		t->device = bt->dev;
-		t->action = BLK_TC_ACT(BLK_TC_NOTIFY);
-		t->pid = tsk->pid;
-		t->cpu = smp_processor_id();
-		t->pdu_len = sizeof(tsk->comm);
-		memcpy((void *) t + sizeof(*t), tsk->comm, t->pdu_len);
-		tsk->btrace_seq = blktrace_seq;
-	}
+static void trace_note_time(struct blk_trace *bt)
+{
+	struct timespec now;
+	unsigned long flags;
+	u32 words[2];
+
+	getnstimeofday(&now);
+	words[0] = now.tv_sec;
+	words[1] = now.tv_nsec;
+
+	local_irq_save(flags);
+	trace_note(bt, 0, BLK_TN_TIMESTAMP, words, sizeof(words));
+	local_irq_restore(flags);
 }
 
 static int act_log_check(struct blk_trace *bt, u32 what, sector_t sector,
@@ -394,6 +425,8 @@
 			blktrace_seq++;
 			smp_mb();
 			bt->trace_state = Blktrace_running;
+
+			trace_note_time(bt);
 			ret = 0;
 		}
 	} else {
diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h
index b99a714..3680ff9a 100644
--- a/include/linux/blktrace_api.h
+++ b/include/linux/blktrace_api.h
@@ -50,6 +50,15 @@
 };
 
 /*
+ * Notify events.
+ */
+enum blktrace_notify {
+	__BLK_TN_PROCESS = 0,		/* establish pid/name mapping */
+	__BLK_TN_TIMESTAMP,		/* include system clock */
+};
+
+
+/*
  * Trace actions in full. Additionally, read or write is masked
  */
 #define BLK_TA_QUEUE		(__BLK_TA_QUEUE | BLK_TC_ACT(BLK_TC_QUEUE))
@@ -68,6 +77,9 @@
 #define BLK_TA_BOUNCE		(__BLK_TA_BOUNCE)
 #define BLK_TA_REMAP		(__BLK_TA_REMAP | BLK_TC_ACT(BLK_TC_QUEUE))
 
+#define BLK_TN_PROCESS		(__BLK_TN_PROCESS | BLK_TC_ACT(BLK_TC_NOTIFY))
+#define BLK_TN_TIMESTAMP	(__BLK_TN_TIMESTAMP | BLK_TC_ACT(BLK_TC_NOTIFY))
+
 #define BLK_IO_TRACE_MAGIC	0x65617400
 #define BLK_IO_TRACE_VERSION	0x07