arch/mips/cavium-octeon/csrc-octeon.c - linux - Git at Google

 /*
  * This file is subject to the terms and conditions of the GNU General Public
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
  * Copyright (C) 2007 by Ralf Baechle
  * Copyright (C) 2009, 2012 Cavium, Inc.
  */
 #include <linux/clocksource.h>
 #include <linux/sched/clock.h>
 #include <linux/export.h>
 #include <linux/init.h>
 #include <linux/smp.h>

 #include <asm/cpu-info.h>
 #include <asm/cpu-type.h>
 #include <asm/time.h>

 #include <asm/octeon/octeon.h>
 #include <asm/octeon/cvmx-ipd-defs.h>
 #include <asm/octeon/cvmx-mio-defs.h>
 #include <asm/octeon/cvmx-rst-defs.h>
 #include <asm/octeon/cvmx-fpa-defs.h>

 static u64 f;
 static u64 rdiv;
 static u64 sdiv;
 static u64 octeon_udelay_factor;
 static u64 octeon_ndelay_factor;

 void __init octeon_setup_delays(void)
 {
 	octeon_udelay_factor = octeon_get_clock_rate() / 1000000;
 	/*
 	 * For __ndelay we divide by 2^16, so the factor is multiplied
 	 * by the same amount.
 	 */
 	octeon_ndelay_factor = (octeon_udelay_factor * 0x10000ull) / 1000ull;

 	preset_lpj = octeon_get_clock_rate() / HZ;

 	if (current_cpu_type() == CPU_CAVIUM_OCTEON2) {
 		union cvmx_mio_rst_boot rst_boot;

 		rst_boot.u64 = cvmx_read_csr(CVMX_MIO_RST_BOOT);
 		rdiv = rst_boot.s.c_mul;	/* CPU clock */
 		sdiv = rst_boot.s.pnr_mul;	/* I/O clock */
 		f = (0x8000000000000000ull / sdiv) * 2;
 	} else if (current_cpu_type() == CPU_CAVIUM_OCTEON3) {
 		union cvmx_rst_boot rst_boot;

 		rst_boot.u64 = cvmx_read_csr(CVMX_RST_BOOT);
 		rdiv = rst_boot.s.c_mul;	/* CPU clock */
 		sdiv = rst_boot.s.pnr_mul;	/* I/O clock */
 		f = (0x8000000000000000ull / sdiv) * 2;
 	}

 }

 /*
  * Set the current core's cvmcount counter to the value of the
  * IPD_CLK_COUNT.  We do this on all cores as they are brought
  * on-line.  This allows for a read from a local cpu register to
  * access a synchronized counter.
  *
  * On CPU_CAVIUM_OCTEON2 the IPD_CLK_COUNT is scaled by rdiv/sdiv.
  */
 void octeon_init_cvmcount(void)
 {
 	u64 clk_reg;
 	unsigned long flags;
 	unsigned loops = 2;

 	clk_reg = octeon_has_feature(OCTEON_FEATURE_FPA3) ?
 		CVMX_FPA_CLK_COUNT : CVMX_IPD_CLK_COUNT;

 	/* Clobber loops so GCC will not unroll the following while loop. */
 	asm("" : "+r" (loops));

 	local_irq_save(flags);
 	/*
 	 * Loop several times so we are executing from the cache,
 	 * which should give more deterministic timing.
 	 */
 	while (loops--) {
 		u64 clk_count = cvmx_read_csr(clk_reg);
 		if (rdiv != 0) {
 			clk_count *= rdiv;
 			if (f != 0) {
 				asm("dmultu\t%[cnt],%[f]\n\t"
 				    "mfhi\t%[cnt]"
 				    : [cnt] "+r" (clk_count)
 				    : [f] "r" (f)
 				    : "hi", "lo");
 			}
 		}
 		write_c0_cvmcount(clk_count);
 	}
 	local_irq_restore(flags);
 }

 static u64 octeon_cvmcount_read(struct clocksource *cs)
 {
 	return read_c0_cvmcount();
 }

 static struct clocksource clocksource_mips = {
 	.name		= "OCTEON_CVMCOUNT",
 	.read		= octeon_cvmcount_read,
 	.mask		= CLOCKSOURCE_MASK(64),
 	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
 };

 unsigned long long notrace sched_clock(void)
 {
 	/* 64-bit arithmatic can overflow, so use 128-bit.  */
 	u64 t1, t2, t3;
 	unsigned long long rv;
 	u64 mult = clocksource_mips.mult;
 	u64 shift = clocksource_mips.shift;
 	u64 cnt = read_c0_cvmcount();

 	asm (
 		"dmultu\t%[cnt],%[mult]\n\t"
 		"nor\t%[t1],$0,%[shift]\n\t"
 		"mfhi\t%[t2]\n\t"
 		"mflo\t%[t3]\n\t"
 		"dsll\t%[t2],%[t2],1\n\t"
 		"dsrlv\t%[rv],%[t3],%[shift]\n\t"
 		"dsllv\t%[t1],%[t2],%[t1]\n\t"
 		"or\t%[rv],%[t1],%[rv]\n\t"
 		: [rv] "=&r" (rv), [t1] "=&r" (t1), [t2] "=&r" (t2), [t3] "=&r" (t3)
 		: [cnt] "r" (cnt), [mult] "r" (mult), [shift] "r" (shift)
 		: "hi", "lo");
 	return rv;
 }

 void __init plat_time_init(void)
 {
 	clocksource_mips.rating = 300;
 	clocksource_register_hz(&clocksource_mips, octeon_get_clock_rate());
 }

 void __udelay(unsigned long us)
 {
 	u64 cur, end, inc;

 	cur = read_c0_cvmcount();

 	inc = us * octeon_udelay_factor;
 	end = cur + inc;

 	while (end > cur)
 		cur = read_c0_cvmcount();
 }
 EXPORT_SYMBOL(__udelay);

 void __ndelay(unsigned long ns)
 {
 	u64 cur, end, inc;

 	cur = read_c0_cvmcount();

 	inc = ((ns * octeon_ndelay_factor) >> 16);
 	end = cur + inc;

 	while (end > cur)
 		cur = read_c0_cvmcount();
 }
 EXPORT_SYMBOL(__ndelay);

 void __delay(unsigned long loops)
 {
 	u64 cur, end;

 	cur = read_c0_cvmcount();
 	end = cur + loops;

 	while (end > cur)
 		cur = read_c0_cvmcount();
 }
 EXPORT_SYMBOL(__delay);


 /**
  * octeon_io_clk_delay - wait for a given number of io clock cycles to pass.
  *
  * We scale the wait by the clock ratio, and then wait for the
  * corresponding number of core clocks.
  *
  * @count: The number of clocks to wait.
  */
 void octeon_io_clk_delay(unsigned long count)
 {
 	u64 cur, end;

 	cur = read_c0_cvmcount();
 	if (rdiv != 0) {
 		end = count * rdiv;
 		if (f != 0) {
 			asm("dmultu\t%[cnt],%[f]\n\t"
 				"mfhi\t%[cnt]"
 				: [cnt] "+r" (end)
 				: [f] "r" (f)
 				: "hi", "lo");
 		}
 		end = cur + end;
 	} else {
 		end = cur + count;
 	}
 	while (end > cur)
 		cur = read_c0_cvmcount();
 }
 EXPORT_SYMBOL(octeon_io_clk_delay);
	/*
	* This file is subject to the terms and conditions of the GNU General Public
	* License. See the file "COPYING" in the main directory of this archive
	* for more details.
	*
	* Copyright (C) 2007 by Ralf Baechle
	* Copyright (C) 2009, 2012 Cavium, Inc.
	*/
	#include <linux/clocksource.h>
	#include <linux/sched/clock.h>
	#include <linux/export.h>
	#include <linux/init.h>
	#include <linux/smp.h>

	#include <asm/cpu-info.h>
	#include <asm/cpu-type.h>
	#include <asm/time.h>

	#include <asm/octeon/octeon.h>
	#include <asm/octeon/cvmx-ipd-defs.h>
	#include <asm/octeon/cvmx-mio-defs.h>
	#include <asm/octeon/cvmx-rst-defs.h>
	#include <asm/octeon/cvmx-fpa-defs.h>

	static u64 f;
	static u64 rdiv;
	static u64 sdiv;
	static u64 octeon_udelay_factor;
	static u64 octeon_ndelay_factor;

	void __init octeon_setup_delays(void)
	{
	octeon_udelay_factor = octeon_get_clock_rate() / 1000000;
	/*
	* For __ndelay we divide by 2^16, so the factor is multiplied
	* by the same amount.
	*/
	octeon_ndelay_factor = (octeon_udelay_factor * 0x10000ull) / 1000ull;

	preset_lpj = octeon_get_clock_rate() / HZ;

	if (current_cpu_type() == CPU_CAVIUM_OCTEON2) {
	union cvmx_mio_rst_boot rst_boot;

	rst_boot.u64 = cvmx_read_csr(CVMX_MIO_RST_BOOT);
	rdiv = rst_boot.s.c_mul; /* CPU clock */
	sdiv = rst_boot.s.pnr_mul; /* I/O clock */
	f = (0x8000000000000000ull / sdiv) * 2;
	} else if (current_cpu_type() == CPU_CAVIUM_OCTEON3) {
	union cvmx_rst_boot rst_boot;

	rst_boot.u64 = cvmx_read_csr(CVMX_RST_BOOT);
	rdiv = rst_boot.s.c_mul; /* CPU clock */
	sdiv = rst_boot.s.pnr_mul; /* I/O clock */
	f = (0x8000000000000000ull / sdiv) * 2;
	}

	}

	/*
	* Set the current core's cvmcount counter to the value of the
	* IPD_CLK_COUNT. We do this on all cores as they are brought
	* on-line. This allows for a read from a local cpu register to
	* access a synchronized counter.
	*
	* On CPU_CAVIUM_OCTEON2 the IPD_CLK_COUNT is scaled by rdiv/sdiv.
	*/
	void octeon_init_cvmcount(void)
	{
	u64 clk_reg;
	unsigned long flags;
	unsigned loops = 2;

	clk_reg = octeon_has_feature(OCTEON_FEATURE_FPA3) ?
	CVMX_FPA_CLK_COUNT : CVMX_IPD_CLK_COUNT;

	/* Clobber loops so GCC will not unroll the following while loop. */
	asm("" : "+r" (loops));

	local_irq_save(flags);
	/*
	* Loop several times so we are executing from the cache,
	* which should give more deterministic timing.
	*/
	while (loops--) {
	u64 clk_count = cvmx_read_csr(clk_reg);
	if (rdiv != 0) {
	clk_count *= rdiv;
	if (f != 0) {
	asm("dmultu\t%[cnt],%[f]\n\t"
	"mfhi\t%[cnt]"
	: [cnt] "+r" (clk_count)
	: [f] "r" (f)
	: "hi", "lo");
	}
	}
	write_c0_cvmcount(clk_count);
	}
	local_irq_restore(flags);
	}

	static u64 octeon_cvmcount_read(struct clocksource *cs)
	{
	return read_c0_cvmcount();
	}

	static struct clocksource clocksource_mips = {
	.name = "OCTEON_CVMCOUNT",
	.read = octeon_cvmcount_read,
	.mask = CLOCKSOURCE_MASK(64),
	.flags = CLOCK_SOURCE_IS_CONTINUOUS,
	};

	unsigned long long notrace sched_clock(void)
	{
	/* 64-bit arithmatic can overflow, so use 128-bit. */
	u64 t1, t2, t3;
	unsigned long long rv;
	u64 mult = clocksource_mips.mult;
	u64 shift = clocksource_mips.shift;
	u64 cnt = read_c0_cvmcount();

	asm (
	"dmultu\t%[cnt],%[mult]\n\t"
	"nor\t%[t1],$0,%[shift]\n\t"
	"mfhi\t%[t2]\n\t"
	"mflo\t%[t3]\n\t"
	"dsll\t%[t2],%[t2],1\n\t"
	"dsrlv\t%[rv],%[t3],%[shift]\n\t"
	"dsllv\t%[t1],%[t2],%[t1]\n\t"
	"or\t%[rv],%[t1],%[rv]\n\t"
	: [rv] "=&r" (rv), [t1] "=&r" (t1), [t2] "=&r" (t2), [t3] "=&r" (t3)
	: [cnt] "r" (cnt), [mult] "r" (mult), [shift] "r" (shift)
	: "hi", "lo");
	return rv;
	}

	void __init plat_time_init(void)
	{
	clocksource_mips.rating = 300;
	clocksource_register_hz(&clocksource_mips, octeon_get_clock_rate());
	}

	void __udelay(unsigned long us)
	{
	u64 cur, end, inc;

	cur = read_c0_cvmcount();

	inc = us * octeon_udelay_factor;
	end = cur + inc;

	while (end > cur)
	cur = read_c0_cvmcount();
	}
	EXPORT_SYMBOL(__udelay);

	void __ndelay(unsigned long ns)
	{
	u64 cur, end, inc;

	cur = read_c0_cvmcount();

	inc = ((ns * octeon_ndelay_factor) >> 16);
	end = cur + inc;

	while (end > cur)
	cur = read_c0_cvmcount();
	}
	EXPORT_SYMBOL(__ndelay);

	void __delay(unsigned long loops)
	{
	u64 cur, end;

	cur = read_c0_cvmcount();
	end = cur + loops;

	while (end > cur)
	cur = read_c0_cvmcount();
	}
	EXPORT_SYMBOL(__delay);


	/**
	* octeon_io_clk_delay - wait for a given number of io clock cycles to pass.
	*
	* We scale the wait by the clock ratio, and then wait for the
	* corresponding number of core clocks.
	*
	* @count: The number of clocks to wait.
	*/
	void octeon_io_clk_delay(unsigned long count)
	{
	u64 cur, end;

	cur = read_c0_cvmcount();
	if (rdiv != 0) {
	end = count * rdiv;
	if (f != 0) {
	asm("dmultu\t%[cnt],%[f]\n\t"
	"mfhi\t%[cnt]"
	: [cnt] "+r" (end)
	: [f] "r" (f)
	: "hi", "lo");
	}
	end = cur + end;
	} else {
	end = cur + count;
	}
	while (end > cur)
	cur = read_c0_cvmcount();
	}
	EXPORT_SYMBOL(octeon_io_clk_delay);