arch/mips/include/asm/barrier.h - linux - Git at Google

 /*
  * This file is subject to the terms and conditions of the GNU General Public
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
  * Copyright (C) 2006 by Ralf Baechle (ralf@linux-mips.org)
  */
 #ifndef __ASM_BARRIER_H
 #define __ASM_BARRIER_H

 #include <asm/addrspace.h>

 /*
  * Sync types defined by the MIPS architecture (document MD00087 table 6.5)
  * These values are used with the sync instruction to perform memory barriers.
  * Types of ordering guarantees available through the SYNC instruction:
  * - Completion Barriers
  * - Ordering Barriers
  * As compared to the completion barrier, the ordering barrier is a
  * lighter-weight operation as it does not require the specified instructions
  * before the SYNC to be already completed. Instead it only requires that those
  * specified instructions which are subsequent to the SYNC in the instruction
  * stream are never re-ordered for processing ahead of the specified
  * instructions which are before the SYNC in the instruction stream.
  * This potentially reduces how many cycles the barrier instruction must stall
  * before it completes.
  * Implementations that do not use any of the non-zero values of stype to define
  * different barriers, such as ordering barriers, must make those stype values
  * act the same as stype zero.
  */

 /*
  * Completion barriers:
  * - Every synchronizable specified memory instruction (loads or stores or both)
  *   that occurs in the instruction stream before the SYNC instruction must be
  *   already globally performed before any synchronizable specified memory
  *   instructions that occur after the SYNC are allowed to be performed, with
  *   respect to any other processor or coherent I/O module.
  *
  * - The barrier does not guarantee the order in which instruction fetches are
  *   performed.
  *
  * - A stype value of zero will always be defined such that it performs the most
  *   complete set of synchronization operations that are defined.This means
  *   stype zero always does a completion barrier that affects both loads and
  *   stores preceding the SYNC instruction and both loads and stores that are
  *   subsequent to the SYNC instruction. Non-zero values of stype may be defined
  *   by the architecture or specific implementations to perform synchronization
  *   behaviors that are less complete than that of stype zero. If an
  *   implementation does not use one of these non-zero values to define a
  *   different synchronization behavior, then that non-zero value of stype must
  *   act the same as stype zero completion barrier. This allows software written
  *   for an implementation with a lighter-weight barrier to work on another
  *   implementation which only implements the stype zero completion barrier.
  *
  * - A completion barrier is required, potentially in conjunction with SSNOP (in
  *   Release 1 of the Architecture) or EHB (in Release 2 of the Architecture),
  *   to guarantee that memory reference results are visible across operating
  *   mode changes. For example, a completion barrier is required on some
  *   implementations on entry to and exit from Debug Mode to guarantee that
  *   memory effects are handled correctly.
  */

 /*
  * stype 0 - A completion barrier that affects preceding loads and stores and
  * subsequent loads and stores.
  * Older instructions which must reach the load/store ordering point before the
  * SYNC instruction completes: Loads, Stores
  * Younger instructions which must reach the load/store ordering point only
  * after the SYNC instruction completes: Loads, Stores
  * Older instructions which must be globally performed when the SYNC instruction
  * completes: Loads, Stores
  */
 #define STYPE_SYNC 0x0

 /*
  * Ordering barriers:
  * - Every synchronizable specified memory instruction (loads or stores or both)
  *   that occurs in the instruction stream before the SYNC instruction must
  *   reach a stage in the load/store datapath after which no instruction
  *   re-ordering is possible before any synchronizable specified memory
  *   instruction which occurs after the SYNC instruction in the instruction
  *   stream reaches the same stage in the load/store datapath.
  *
  * - If any memory instruction before the SYNC instruction in program order,
  *   generates a memory request to the external memory and any memory
  *   instruction after the SYNC instruction in program order also generates a
  *   memory request to external memory, the memory request belonging to the
  *   older instruction must be globally performed before the time the memory
  *   request belonging to the younger instruction is globally performed.
  *
  * - The barrier does not guarantee the order in which instruction fetches are
  *   performed.
  */

 /*
  * stype 0x10 - An ordering barrier that affects preceding loads and stores and
  * subsequent loads and stores.
  * Older instructions which must reach the load/store ordering point before the
  * SYNC instruction completes: Loads, Stores
  * Younger instructions which must reach the load/store ordering point only
  * after the SYNC instruction completes: Loads, Stores
  * Older instructions which must be globally performed when the SYNC instruction
  * completes: N/A
  */
 #define STYPE_SYNC_MB 0x10

 /*
  * stype 0x14 - A completion barrier specific to global invalidations
  *
  * When a sync instruction of this type completes any preceding GINVI or GINVT
  * operation has been globalized & completed on all coherent CPUs. Anything
  * that the GINV* instruction should invalidate will have been invalidated on
  * all coherent CPUs when this instruction completes. It is implementation
  * specific whether the GINV* instructions themselves will ensure completion,
  * or this sync type will.
  *
  * In systems implementing global invalidates (ie. with Config5.GI == 2 or 3)
  * this sync type also requires that previous SYNCI operations have completed.
  */
 #define STYPE_GINV	0x14

 #ifdef CONFIG_CPU_HAS_SYNC
 #define __sync()				\
 	__asm__ __volatile__(			\
 		".set	push\n\t"		\
 		".set	noreorder\n\t"		\
 		".set	mips2\n\t"		\
 		"sync\n\t"			\
 		".set	pop"			\
 		: /* no output */		\
 		: /* no input */		\
 		: "memory")
 #else
 #define __sync()	do { } while(0)
 #endif

 #define __fast_iob()				\
 	__asm__ __volatile__(			\
 		".set	push\n\t"		\
 		".set	noreorder\n\t"		\
 		"lw	$0,%0\n\t"		\
 		"nop\n\t"			\
 		".set	pop"			\
 		: /* no output */		\
 		: "m" (*(int *)CKSEG1)		\
 		: "memory")
 #ifdef CONFIG_CPU_CAVIUM_OCTEON
 # define OCTEON_SYNCW_STR	".set push\n.set arch=octeon\nsyncw\nsyncw\n.set pop\n"
 # define __syncw()	__asm__ __volatile__(OCTEON_SYNCW_STR : : : "memory")

 # define fast_wmb()	__syncw()
 # define fast_rmb()	barrier()
 # define fast_mb()	__sync()
 # define fast_iob()	do { } while (0)
 #else /* ! CONFIG_CPU_CAVIUM_OCTEON */
 # define fast_wmb()	__sync()
 # define fast_rmb()	__sync()
 # define fast_mb()	__sync()
 # ifdef CONFIG_SGI_IP28
 #  define fast_iob()				\
 	__asm__ __volatile__(			\
 		".set	push\n\t"		\
 		".set	noreorder\n\t"		\
 		"lw	$0,%0\n\t"		\
 		"sync\n\t"			\
 		"lw	$0,%0\n\t"		\
 		".set	pop"			\
 		: /* no output */		\
 		: "m" (*(int *)CKSEG1ADDR(0x1fa00004)) \
 		: "memory")
 # else
 #  define fast_iob()				\
 	do {					\
 		__sync();			\
 		__fast_iob();			\
 	} while (0)
 # endif
 #endif /* CONFIG_CPU_CAVIUM_OCTEON */

 #ifdef CONFIG_CPU_HAS_WB

 #include <asm/wbflush.h>

 #define mb()		wbflush()
 #define iob()		wbflush()

 #else /* !CONFIG_CPU_HAS_WB */

 #define mb()		fast_mb()
 #define iob()		fast_iob()

 #endif /* !CONFIG_CPU_HAS_WB */

 #define wmb()		fast_wmb()
 #define rmb()		fast_rmb()

 #if defined(CONFIG_WEAK_ORDERING)
 # ifdef CONFIG_CPU_CAVIUM_OCTEON
 #  define __smp_mb()	__sync()
 #  define __smp_rmb()	barrier()
 #  define __smp_wmb()	__syncw()
 # else
 #  define __smp_mb()	__asm__ __volatile__("sync" : : :"memory")
 #  define __smp_rmb()	__asm__ __volatile__("sync" : : :"memory")
 #  define __smp_wmb()	__asm__ __volatile__("sync" : : :"memory")
 # endif
 #else
 #define __smp_mb()	barrier()
 #define __smp_rmb()	barrier()
 #define __smp_wmb()	barrier()
 #endif

 #if defined(CONFIG_WEAK_REORDERING_BEYOND_LLSC) && defined(CONFIG_SMP)
 #define __WEAK_LLSC_MB		"	sync	\n"
 #else
 #define __WEAK_LLSC_MB		"		\n"
 #endif

 #define smp_llsc_mb()	__asm__ __volatile__(__WEAK_LLSC_MB : : :"memory")

 #ifdef CONFIG_CPU_CAVIUM_OCTEON
 #define smp_mb__before_llsc() smp_wmb()
 #define __smp_mb__before_llsc() __smp_wmb()
 /* Cause previous writes to become visible on all CPUs as soon as possible */
 #define nudge_writes() __asm__ __volatile__(".set push\n\t"		\
 					    ".set arch=octeon\n\t"	\
 					    "syncw\n\t"			\
 					    ".set pop" : : : "memory")
 #else
 #define smp_mb__before_llsc() smp_llsc_mb()
 #define __smp_mb__before_llsc() smp_llsc_mb()
 #define nudge_writes() mb()
 #endif

 #define __smp_mb__before_atomic()	__smp_mb__before_llsc()
 #define __smp_mb__after_atomic()	smp_llsc_mb()

 /*
  * Some Loongson 3 CPUs have a bug wherein execution of a memory access (load,
  * store or pref) in between an ll & sc can cause the sc instruction to
  * erroneously succeed, breaking atomicity. Whilst it's unusual to write code
  * containing such sequences, this bug bites harder than we might otherwise
  * expect due to reordering & speculation:
  *
  * 1) A memory access appearing prior to the ll in program order may actually
  *    be executed after the ll - this is the reordering case.
  *
  *    In order to avoid this we need to place a memory barrier (ie. a sync
  *    instruction) prior to every ll instruction, in between it & any earlier
  *    memory access instructions. Many of these cases are already covered by
  *    smp_mb__before_llsc() but for the remaining cases, typically ones in
  *    which multiple CPUs may operate on a memory location but ordering is not
  *    usually guaranteed, we use loongson_llsc_mb() below.
  *
  *    This reordering case is fixed by 3A R2 CPUs, ie. 3A2000 models and later.
  *
  * 2) If a conditional branch exists between an ll & sc with a target outside
  *    of the ll-sc loop, for example an exit upon value mismatch in cmpxchg()
  *    or similar, then misprediction of the branch may allow speculative
  *    execution of memory accesses from outside of the ll-sc loop.
  *
  *    In order to avoid this we need a memory barrier (ie. a sync instruction)
  *    at each affected branch target, for which we also use loongson_llsc_mb()
  *    defined below.
  *
  *    This case affects all current Loongson 3 CPUs.
  */
 #ifdef CONFIG_CPU_LOONGSON3_WORKAROUNDS /* Loongson-3's LLSC workaround */
 #define loongson_llsc_mb()	__asm__ __volatile__(__WEAK_LLSC_MB : : :"memory")
 #else
 #define loongson_llsc_mb()	do { } while (0)
 #endif

 static inline void sync_ginv(void)
 {
 	asm volatile("sync\t%0" :: "i"(STYPE_GINV));
 }

 #include <asm-generic/barrier.h>

 #endif /* __ASM_BARRIER_H */
	/*
	* This file is subject to the terms and conditions of the GNU General Public
	* License. See the file "COPYING" in the main directory of this archive
	* for more details.
	*
	* Copyright (C) 2006 by Ralf Baechle (ralf@linux-mips.org)
	*/
	#ifndef __ASM_BARRIER_H
	#define __ASM_BARRIER_H

	#include <asm/addrspace.h>

	/*
	* Sync types defined by the MIPS architecture (document MD00087 table 6.5)
	* These values are used with the sync instruction to perform memory barriers.
	* Types of ordering guarantees available through the SYNC instruction:
	* - Completion Barriers
	* - Ordering Barriers
	* As compared to the completion barrier, the ordering barrier is a
	* lighter-weight operation as it does not require the specified instructions
	* before the SYNC to be already completed. Instead it only requires that those
	* specified instructions which are subsequent to the SYNC in the instruction
	* stream are never re-ordered for processing ahead of the specified
	* instructions which are before the SYNC in the instruction stream.
	* This potentially reduces how many cycles the barrier instruction must stall
	* before it completes.
	* Implementations that do not use any of the non-zero values of stype to define
	* different barriers, such as ordering barriers, must make those stype values
	* act the same as stype zero.
	*/

	/*
	* Completion barriers:
	* - Every synchronizable specified memory instruction (loads or stores or both)
	* that occurs in the instruction stream before the SYNC instruction must be
	* already globally performed before any synchronizable specified memory
	* instructions that occur after the SYNC are allowed to be performed, with
	* respect to any other processor or coherent I/O module.
	*
	* - The barrier does not guarantee the order in which instruction fetches are
	* performed.
	*
	* - A stype value of zero will always be defined such that it performs the most
	* complete set of synchronization operations that are defined.This means
	* stype zero always does a completion barrier that affects both loads and
	* stores preceding the SYNC instruction and both loads and stores that are
	* subsequent to the SYNC instruction. Non-zero values of stype may be defined
	* by the architecture or specific implementations to perform synchronization
	* behaviors that are less complete than that of stype zero. If an
	* implementation does not use one of these non-zero values to define a
	* different synchronization behavior, then that non-zero value of stype must
	* act the same as stype zero completion barrier. This allows software written
	* for an implementation with a lighter-weight barrier to work on another
	* implementation which only implements the stype zero completion barrier.
	*
	* - A completion barrier is required, potentially in conjunction with SSNOP (in
	* Release 1 of the Architecture) or EHB (in Release 2 of the Architecture),
	* to guarantee that memory reference results are visible across operating
	* mode changes. For example, a completion barrier is required on some
	* implementations on entry to and exit from Debug Mode to guarantee that
	* memory effects are handled correctly.
	*/

	/*
	* stype 0 - A completion barrier that affects preceding loads and stores and
	* subsequent loads and stores.
	* Older instructions which must reach the load/store ordering point before the
	* SYNC instruction completes: Loads, Stores
	* Younger instructions which must reach the load/store ordering point only
	* after the SYNC instruction completes: Loads, Stores
	* Older instructions which must be globally performed when the SYNC instruction
	* completes: Loads, Stores
	*/
	#define STYPE_SYNC 0x0

	/*
	* Ordering barriers:
	* - Every synchronizable specified memory instruction (loads or stores or both)
	* that occurs in the instruction stream before the SYNC instruction must
	* reach a stage in the load/store datapath after which no instruction
	* re-ordering is possible before any synchronizable specified memory
	* instruction which occurs after the SYNC instruction in the instruction
	* stream reaches the same stage in the load/store datapath.
	*
	* - If any memory instruction before the SYNC instruction in program order,
	* generates a memory request to the external memory and any memory
	* instruction after the SYNC instruction in program order also generates a
	* memory request to external memory, the memory request belonging to the
	* older instruction must be globally performed before the time the memory
	* request belonging to the younger instruction is globally performed.
	*
	* - The barrier does not guarantee the order in which instruction fetches are
	* performed.
	*/

	/*
	* stype 0x10 - An ordering barrier that affects preceding loads and stores and
	* subsequent loads and stores.
	* Older instructions which must reach the load/store ordering point before the
	* SYNC instruction completes: Loads, Stores
	* Younger instructions which must reach the load/store ordering point only
	* after the SYNC instruction completes: Loads, Stores
	* Older instructions which must be globally performed when the SYNC instruction
	* completes: N/A
	*/
	#define STYPE_SYNC_MB 0x10

	/*
	* stype 0x14 - A completion barrier specific to global invalidations
	*
	* When a sync instruction of this type completes any preceding GINVI or GINVT
	* operation has been globalized & completed on all coherent CPUs. Anything
	* that the GINV* instruction should invalidate will have been invalidated on
	* all coherent CPUs when this instruction completes. It is implementation
	* specific whether the GINV* instructions themselves will ensure completion,
	* or this sync type will.
	*
	* In systems implementing global invalidates (ie. with Config5.GI == 2 or 3)
	* this sync type also requires that previous SYNCI operations have completed.
	*/
	#define STYPE_GINV 0x14

	#ifdef CONFIG_CPU_HAS_SYNC
	#define __sync() \
	__asm__ __volatile__( \
	".set push\n\t" \
	".set noreorder\n\t" \
	".set mips2\n\t" \
	"sync\n\t" \
	".set pop" \
	: /* no output */ \
	: /* no input */ \
	: "memory")
	#else
	#define __sync() do { } while(0)
	#endif

	#define __fast_iob() \
	__asm__ __volatile__( \
	".set push\n\t" \
	".set noreorder\n\t" \
	"lw $0,%0\n\t" \
	"nop\n\t" \
	".set pop" \
	: /* no output */ \
	: "m" ((int )CKSEG1) \
	: "memory")
	#ifdef CONFIG_CPU_CAVIUM_OCTEON
	# define OCTEON_SYNCW_STR ".set push\n.set arch=octeon\nsyncw\nsyncw\n.set pop\n"
	# define __syncw() __asm__ __volatile__(OCTEON_SYNCW_STR : : : "memory")

	# define fast_wmb() __syncw()
	# define fast_rmb() barrier()
	# define fast_mb() __sync()
	# define fast_iob() do { } while (0)
	#else /* ! CONFIG_CPU_CAVIUM_OCTEON */
	# define fast_wmb() __sync()
	# define fast_rmb() __sync()
	# define fast_mb() __sync()
	# ifdef CONFIG_SGI_IP28
	# define fast_iob() \
	__asm__ __volatile__( \
	".set push\n\t" \
	".set noreorder\n\t" \
	"lw $0,%0\n\t" \
	"sync\n\t" \
	"lw $0,%0\n\t" \
	".set pop" \
	: /* no output */ \
	: "m" ((int )CKSEG1ADDR(0x1fa00004)) \
	: "memory")
	# else
	# define fast_iob() \
	do { \
	__sync(); \
	__fast_iob(); \
	} while (0)
	# endif
	#endif /* CONFIG_CPU_CAVIUM_OCTEON */

	#ifdef CONFIG_CPU_HAS_WB

	#include <asm/wbflush.h>

	#define mb() wbflush()
	#define iob() wbflush()

	#else /* !CONFIG_CPU_HAS_WB */

	#define mb() fast_mb()
	#define iob() fast_iob()

	#endif /* !CONFIG_CPU_HAS_WB */

	#define wmb() fast_wmb()
	#define rmb() fast_rmb()

	#if defined(CONFIG_WEAK_ORDERING)
	# ifdef CONFIG_CPU_CAVIUM_OCTEON
	# define __smp_mb() __sync()
	# define __smp_rmb() barrier()
	# define __smp_wmb() __syncw()
	# else
	# define __smp_mb() __asm__ __volatile__("sync" : : :"memory")
	# define __smp_rmb() __asm__ __volatile__("sync" : : :"memory")
	# define __smp_wmb() __asm__ __volatile__("sync" : : :"memory")
	# endif
	#else
	#define __smp_mb() barrier()
	#define __smp_rmb() barrier()
	#define __smp_wmb() barrier()
	#endif

	#if defined(CONFIG_WEAK_REORDERING_BEYOND_LLSC) && defined(CONFIG_SMP)
	#define __WEAK_LLSC_MB " sync \n"
	#else
	#define __WEAK_LLSC_MB " \n"
	#endif

	#define smp_llsc_mb() __asm__ __volatile__(__WEAK_LLSC_MB : : :"memory")

	#ifdef CONFIG_CPU_CAVIUM_OCTEON
	#define smp_mb__before_llsc() smp_wmb()
	#define __smp_mb__before_llsc() __smp_wmb()
	/* Cause previous writes to become visible on all CPUs as soon as possible */
	#define nudge_writes() __asm__ __volatile__(".set push\n\t" \
	".set arch=octeon\n\t" \
	"syncw\n\t" \
	".set pop" : : : "memory")
	#else
	#define smp_mb__before_llsc() smp_llsc_mb()
	#define __smp_mb__before_llsc() smp_llsc_mb()
	#define nudge_writes() mb()
	#endif

	#define __smp_mb__before_atomic() __smp_mb__before_llsc()
	#define __smp_mb__after_atomic() smp_llsc_mb()

	/*
	* Some Loongson 3 CPUs have a bug wherein execution of a memory access (load,
	* store or pref) in between an ll & sc can cause the sc instruction to
	* erroneously succeed, breaking atomicity. Whilst it's unusual to write code
	* containing such sequences, this bug bites harder than we might otherwise
	* expect due to reordering & speculation:
	*
	* 1) A memory access appearing prior to the ll in program order may actually
	* be executed after the ll - this is the reordering case.
	*
	* In order to avoid this we need to place a memory barrier (ie. a sync
	* instruction) prior to every ll instruction, in between it & any earlier
	* memory access instructions. Many of these cases are already covered by
	* smp_mb__before_llsc() but for the remaining cases, typically ones in
	* which multiple CPUs may operate on a memory location but ordering is not
	* usually guaranteed, we use loongson_llsc_mb() below.
	*
	* This reordering case is fixed by 3A R2 CPUs, ie. 3A2000 models and later.
	*
	* 2) If a conditional branch exists between an ll & sc with a target outside
	* of the ll-sc loop, for example an exit upon value mismatch in cmpxchg()
	* or similar, then misprediction of the branch may allow speculative
	* execution of memory accesses from outside of the ll-sc loop.
	*
	* In order to avoid this we need a memory barrier (ie. a sync instruction)
	* at each affected branch target, for which we also use loongson_llsc_mb()
	* defined below.
	*
	* This case affects all current Loongson 3 CPUs.
	*/
	#ifdef CONFIG_CPU_LOONGSON3_WORKAROUNDS /* Loongson-3's LLSC workaround */
	#define loongson_llsc_mb() __asm__ __volatile__(__WEAK_LLSC_MB : : :"memory")
	#else
	#define loongson_llsc_mb() do { } while (0)
	#endif

	static inline void sync_ginv(void)
	{
	asm volatile("sync\t%0" :: "i"(STYPE_GINV));
	}

	#include <asm-generic/barrier.h>

	#endif /* __ASM_BARRIER_H */