blob: f190048c9bdedc63f9ef2a6a8be7184ffbe224f4 [file] [log] [blame]
#include "libcflat.h"
#include "smp.h"
#include "atomic.h"
#include "processor.h"
#include "kvmclock.h"
#include "asm/barrier.h"
#define unlikely(x) __builtin_expect(!!(x), 0)
#define likely(x) __builtin_expect(!!(x), 1)
struct pvclock_vcpu_time_info __attribute__((aligned(4))) hv_clock[MAX_CPU];
struct pvclock_wall_clock wall_clock;
static unsigned char valid_flags = 0;
static atomic64_t last_value = ATOMIC64_INIT(0);
/*
* Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction,
* yielding a 64-bit result.
*/
static inline u64 scale_delta(u64 delta, u32 mul_frac, int shift)
{
u64 product;
#ifdef __i386__
u32 tmp1, tmp2;
#endif
if (shift < 0)
delta >>= -shift;
else
delta <<= shift;
#ifdef __i386__
__asm__ (
"mul %5 ; "
"mov %4,%%eax ; "
"mov %%edx,%4 ; "
"mul %5 ; "
"xor %5,%5 ; "
"add %4,%%eax ; "
"adc %5,%%edx ; "
: "=A" (product), "=r" (tmp1), "=r" (tmp2)
: "a" ((u32)delta), "1" ((u32)(delta >> 32)), "2" (mul_frac) );
#elif defined(__x86_64__)
__asm__ (
"mul %%rdx ; shrd $32,%%rdx,%%rax"
: "=a" (product) : "0" (delta), "d" ((u64)mul_frac) );
#else
#error implement me!
#endif
return product;
}
#ifdef __i386__
# define do_div(n,base) ({ \
u32 __base = (base); \
u32 __rem; \
__rem = ((u64)(n)) % __base; \
(n) = ((u64)(n)) / __base; \
__rem; \
})
#else
u32 __attribute__((weak)) __div64_32(u64 *n, u32 base);
u32 __attribute__((weak)) __div64_32(u64 *n, u32 base)
{
u64 rem = *n;
u64 b = base;
u64 res, d = 1;
u32 high = rem >> 32;
/* Reduce the thing a bit first */
res = 0;
if (high >= base) {
high /= base;
res = (u64) high << 32;
rem -= (u64) (high*base) << 32;
}
while ((s64)b > 0 && b < rem) {
b = b+b;
d = d+d;
}
do {
if (rem >= b) {
rem -= b;
res += d;
}
b >>= 1;
d >>= 1;
} while (d);
*n = res;
return rem;
}
# define do_div(n,base) ({ \
u32 __base = (base); \
u32 __rem; \
(void)(((typeof((n)) *)0) == ((u64 *)0)); \
if (likely(((n) >> 32) == 0)) { \
__rem = (u32)(n) % __base; \
(n) = (u32)(n) / __base; \
} else \
__rem = __div64_32(&(n), __base); \
__rem; \
})
#endif
/**
* set_normalized_timespec - set timespec sec and nsec parts and normalize
*
* @ts: pointer to timespec variable to be set
* @sec: seconds to set
* @nsec: nanoseconds to set
*
* Set seconds and nanoseconds field of a timespec variable and
* normalize to the timespec storage format
*
* Note: The tv_nsec part is always in the range of
* 0 <= tv_nsec < NSEC_PER_SEC
* For negative values only the tv_sec field is negative !
*/
static void set_normalized_timespec(struct timespec *ts, long sec, s64 nsec)
{
while (nsec >= NSEC_PER_SEC) {
/*
* The following asm() prevents the compiler from
* optimising this loop into a modulo operation. See
* also __iter_div_u64_rem() in include/linux/time.h
*/
asm("" : "+rm"(nsec));
nsec -= NSEC_PER_SEC;
++sec;
}
while (nsec < 0) {
asm("" : "+rm"(nsec));
nsec += NSEC_PER_SEC;
--sec;
}
ts->tv_sec = sec;
ts->tv_nsec = nsec;
}
static inline
unsigned pvclock_read_begin(const struct pvclock_vcpu_time_info *src)
{
unsigned version = src->version & ~1;
/* Make sure that the version is read before the data. */
smp_rmb();
return version;
}
static inline
bool pvclock_read_retry(const struct pvclock_vcpu_time_info *src,
unsigned version)
{
/* Make sure that the version is re-read after the data. */
smp_rmb();
return version != src->version;
}
static inline u64 rdtsc_ordered(void)
{
/*
* FIXME: on Intel CPUs rmb() aka lfence is sufficient which brings up
* to 2x speedup
*/
mb();
return rdtsc();
}
static inline
cycle_t __pvclock_read_cycles(const struct pvclock_vcpu_time_info *src)
{
u64 delta = rdtsc_ordered() - src->tsc_timestamp;
cycle_t offset = scale_delta(delta, src->tsc_to_system_mul,
src->tsc_shift);
return src->system_time + offset;
}
static cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src)
{
unsigned version;
cycle_t ret;
u64 last;
u8 flags;
do {
version = pvclock_read_begin(src);
ret = __pvclock_read_cycles(src);
flags = src->flags;
} while (pvclock_read_retry(src, version));
if ((valid_flags & PVCLOCK_RAW_CYCLE_BIT) ||
((valid_flags & PVCLOCK_TSC_STABLE_BIT) &&
(flags & PVCLOCK_TSC_STABLE_BIT)))
return ret;
/*
* Assumption here is that last_value, a global accumulator, always goes
* forward. If we are less than that, we should not be much smaller.
* We assume there is an error margin we're inside, and then the
* correction does not sacrifice accuracy.
*
* For reads: global may have changed between test and return,
* but this means someone else updated poked the clock at a later time.
* We just need to make sure we are not seeing a backwards event.
*
* For updates: last_value = ret is not enough, since two vcpus could be
* updating at the same time, and one of them could be slightly behind,
* making the assumption that last_value always go forward fail to hold.
*/
last = atomic64_read(&last_value);
do {
if (ret < last)
return last;
last = atomic64_cmpxchg(&last_value, last, ret);
} while (unlikely(last != ret));
return ret;
}
cycle_t kvm_clock_read()
{
struct pvclock_vcpu_time_info *src;
cycle_t ret;
int index = smp_id();
src = &hv_clock[index];
ret = pvclock_clocksource_read(src);
return ret;
}
void kvm_clock_init(void *data)
{
int index = smp_id();
struct pvclock_vcpu_time_info *hvc = &hv_clock[index];
printf("kvm-clock: cpu %d, msr %p\n", index, hvc);
wrmsr(MSR_KVM_SYSTEM_TIME_NEW, (unsigned long)hvc | 1);
}
void kvm_clock_clear(void *data)
{
wrmsr(MSR_KVM_SYSTEM_TIME_NEW, 0LL);
}
static void pvclock_read_wallclock(struct pvclock_wall_clock *wall_clock,
struct pvclock_vcpu_time_info *vcpu_time,
struct timespec *ts)
{
u32 version;
u64 delta;
struct timespec now;
/* get wallclock at system boot */
do {
version = wall_clock->version;
rmb(); /* fetch version before time */
now.tv_sec = wall_clock->sec;
now.tv_nsec = wall_clock->nsec;
rmb(); /* fetch time before checking version */
} while ((wall_clock->version & 1) || (version != wall_clock->version));
delta = pvclock_clocksource_read(vcpu_time); /* time since system boot */
delta += now.tv_sec * (u64)NSEC_PER_SEC + now.tv_nsec;
now.tv_nsec = do_div(delta, NSEC_PER_SEC);
now.tv_sec = delta;
set_normalized_timespec(ts, now.tv_sec, now.tv_nsec);
}
void kvm_get_wallclock(struct timespec *ts)
{
struct pvclock_vcpu_time_info *vcpu_time;
int index = smp_id();
wrmsr(MSR_KVM_WALL_CLOCK_NEW, (unsigned long)&wall_clock);
vcpu_time = &hv_clock[index];
pvclock_read_wallclock(&wall_clock, vcpu_time, ts);
}
void pvclock_set_flags(unsigned char flags)
{
valid_flags = flags;
}