[PATCH] fix and optimize clock source update

This fixes the clock source updates in update_wall_time() to correctly
track the time coming in via current_tick_length().  Optimize the fast
paths to be as short as possible to keep the overhead low.

Signed-off-by: Roman Zippel <zippel@linux-m68k.org>
Acked-by: John Stultz <johnstul@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
diff --git a/kernel/timer.c b/kernel/timer.c
index 890a56937..5bb6b79 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -770,7 +770,7 @@
  * specified number of bits to the right of the binary point.
  * This function has no side-effects.
  */
-u64 current_tick_length(long shift)
+u64 current_tick_length(void)
 {
 	long delta_nsec;
 	u64 ret;
@@ -779,14 +779,8 @@
 	 *    ie: nanosecond value shifted by (SHIFT_SCALE - 10)
 	 */
 	delta_nsec = tick_nsec + adjtime_adjustment() * 1000;
-	ret = ((u64) delta_nsec << (SHIFT_SCALE - 10)) + time_adj;
-
-	/* convert from (SHIFT_SCALE - 10) to specified shift scale: */
-	shift = shift - (SHIFT_SCALE - 10);
-	if (shift < 0)
-		ret >>= -shift;
-	else
-		ret <<= shift;
+	ret = (u64)delta_nsec << TICK_LENGTH_SHIFT;
+	ret += (s64)time_adj << (TICK_LENGTH_SHIFT - (SHIFT_SCALE - 10));
 
 	return ret;
 }
@@ -794,7 +788,6 @@
 /* XXX - all of this timekeeping code should be later moved to time.c */
 #include <linux/clocksource.h>
 static struct clocksource *clock; /* pointer to current clocksource */
-static cycle_t last_clock_cycle;  /* cycle value at last update_wall_time */
 
 #ifdef CONFIG_GENERIC_TIME
 /**
@@ -813,7 +806,7 @@
 	cycle_now = clocksource_read(clock);
 
 	/* calculate the delta since the last update_wall_time: */
-	cycle_delta = (cycle_now - last_clock_cycle) & clock->mask;
+	cycle_delta = (cycle_now - clock->cycle_last) & clock->mask;
 
 	/* convert to nanoseconds: */
 	ns_offset = cyc2ns(clock, cycle_delta);
@@ -927,7 +920,7 @@
 		timespec_add_ns(&xtime, nsec);
 
 		clock = new;
-		last_clock_cycle = now;
+		clock->cycle_last = now;
 		printk(KERN_INFO "Time: %s clocksource has been installed.\n",
 					clock->name);
 		return 1;
@@ -968,7 +961,7 @@
 	write_seqlock_irqsave(&xtime_lock, flags);
 	clock = clocksource_get_next();
 	clocksource_calculate_interval(clock, tick_nsec);
-	last_clock_cycle = clocksource_read(clock);
+	clock->cycle_last = clocksource_read(clock);
 	ntp_clear();
 	write_sequnlock_irqrestore(&xtime_lock, flags);
 }
@@ -988,7 +981,7 @@
 
 	write_seqlock_irqsave(&xtime_lock, flags);
 	/* restart the last cycle value */
-	last_clock_cycle = clocksource_read(clock);
+	clock->cycle_last = clocksource_read(clock);
 	write_sequnlock_irqrestore(&xtime_lock, flags);
 	return 0;
 }
@@ -1015,60 +1008,134 @@
 device_initcall(timekeeping_init_device);
 
 /*
+ * If the error is already larger, we look ahead another tick,
+ * to compensate for late or lost adjustments.
+ */
+static __always_inline int clocksource_bigadjust(int sign, s64 error, s64 *interval, s64 *offset)
+{
+	int adj;
+
+	/*
+	 * As soon as the machine is synchronized to the external time
+	 * source this should be the common case.
+	 */
+	error >>= 2;
+	if (likely(sign > 0 ? error <= *interval : error >= *interval))
+		return sign;
+
+	/*
+	 * An extra look ahead dampens the effect of the current error,
+	 * which can grow quite large with continously late updates, as
+	 * it would dominate the adjustment value and can lead to
+	 * oscillation.
+	 */
+	error += current_tick_length() >> (TICK_LENGTH_SHIFT - clock->shift + 1);
+	error -= clock->xtime_interval >> 1;
+
+	adj = 0;
+	while (1) {
+		error >>= 1;
+		if (sign > 0 ? error <= *interval : error >= *interval)
+			break;
+		adj++;
+	}
+
+	/*
+	 * Add the current adjustments to the error and take the offset
+	 * into account, the latter can cause the error to be hardly
+	 * reduced at the next tick. Check the error again if there's
+	 * room for another adjustment, thus further reducing the error
+	 * which otherwise had to be corrected at the next update.
+	 */
+	error = (error << 1) - *interval + *offset;
+	if (sign > 0 ? error > *interval : error < *interval)
+		adj++;
+
+	*interval <<= adj;
+	*offset <<= adj;
+	return sign << adj;
+}
+
+/*
+ * Adjust the multiplier to reduce the error value,
+ * this is optimized for the most common adjustments of -1,0,1,
+ * for other values we can do a bit more work.
+ */
+static void clocksource_adjust(struct clocksource *clock, s64 offset)
+{
+	s64 error, interval = clock->cycle_interval;
+	int adj;
+
+	error = clock->error >> (TICK_LENGTH_SHIFT - clock->shift - 1);
+	if (error > interval) {
+		adj = clocksource_bigadjust(1, error, &interval, &offset);
+	} else if (error < -interval) {
+		interval = -interval;
+		offset = -offset;
+		adj = clocksource_bigadjust(-1, error, &interval, &offset);
+	} else
+		return;
+
+	clock->mult += adj;
+	clock->xtime_interval += interval;
+	clock->xtime_nsec -= offset;
+	clock->error -= (interval - offset) << (TICK_LENGTH_SHIFT - clock->shift);
+}
+
+/*
  * update_wall_time - Uses the current clocksource to increment the wall time
  *
  * Called from the timer interrupt, must hold a write on xtime_lock.
  */
 static void update_wall_time(void)
 {
-	static s64 remainder_snsecs, error;
-	s64 snsecs_per_sec;
-	cycle_t now, offset;
+	cycle_t offset;
 
-	snsecs_per_sec = (s64)NSEC_PER_SEC << clock->shift;
-	remainder_snsecs += (s64)xtime.tv_nsec << clock->shift;
+	clock->xtime_nsec += (s64)xtime.tv_nsec << clock->shift;
 
-	now = clocksource_read(clock);
-	offset = (now - last_clock_cycle)&clock->mask;
+#ifdef CONFIG_GENERIC_TIME
+	offset = (clocksource_read(clock) - clock->cycle_last) & clock->mask;
+#else
+	offset = clock->cycle_interval;
+#endif
 
 	/* normally this loop will run just once, however in the
 	 * case of lost or late ticks, it will accumulate correctly.
 	 */
-	while (offset > clock->interval_cycles) {
-		/* get the ntp interval in clock shifted nanoseconds */
-		s64 ntp_snsecs	= current_tick_length(clock->shift);
-
+	while (offset >= clock->cycle_interval) {
 		/* accumulate one interval */
-		remainder_snsecs += clock->interval_snsecs;
-		last_clock_cycle += clock->interval_cycles;
-		offset -= clock->interval_cycles;
+		clock->xtime_nsec += clock->xtime_interval;
+		clock->cycle_last += clock->cycle_interval;
+		offset -= clock->cycle_interval;
+
+		if (clock->xtime_nsec >= (u64)NSEC_PER_SEC << clock->shift) {
+			clock->xtime_nsec -= (u64)NSEC_PER_SEC << clock->shift;
+			xtime.tv_sec++;
+			second_overflow();
+		}
 
 		/* interpolator bits */
-		time_interpolator_update(clock->interval_snsecs
+		time_interpolator_update(clock->xtime_interval
 						>> clock->shift);
 		/* increment the NTP state machine */
 		update_ntp_one_tick();
 
 		/* accumulate error between NTP and clock interval */
-		error += (ntp_snsecs - (s64)clock->interval_snsecs);
-
-		/* correct the clock when NTP error is too big */
-		remainder_snsecs += make_ntp_adj(clock, offset, &error);
-
-		if (remainder_snsecs >= snsecs_per_sec) {
-			remainder_snsecs -= snsecs_per_sec;
-			xtime.tv_sec++;
-			second_overflow();
-		}
+		clock->error += current_tick_length();
+		clock->error -= clock->xtime_interval << (TICK_LENGTH_SHIFT - clock->shift);
 	}
+
+	/* correct the clock when NTP error is too big */
+	clocksource_adjust(clock, offset);
+
 	/* store full nanoseconds into xtime */
-	xtime.tv_nsec = remainder_snsecs >> clock->shift;
-	remainder_snsecs -= (s64)xtime.tv_nsec << clock->shift;
+	xtime.tv_nsec = clock->xtime_nsec >> clock->shift;
+	clock->xtime_nsec -= (s64)xtime.tv_nsec << clock->shift;
 
 	/* check to see if there is a new clocksource to use */
 	if (change_clocksource()) {
-		error = 0;
-		remainder_snsecs = 0;
+		clock->error = 0;
+		clock->xtime_nsec = 0;
 		clocksource_calculate_interval(clock, tick_nsec);
 	}
 }