arch/loongarch/include/asm/xor.h - linux - Git at Google

 /* SPDX-License-Identifier: GPL-2.0-or-later */
 /*
  * Copyright (C) 2023 WANG Xuerui <git@xen0n.name>
  */
 #ifndef _ASM_LOONGARCH_XOR_H
 #define _ASM_LOONGARCH_XOR_H

 #include <asm/cpu-features.h>
 #include <asm/xor_simd.h>

 #ifdef CONFIG_CPU_HAS_LSX
 static struct xor_block_template xor_block_lsx = {
 	.name = "lsx",
 	.do_2 = xor_lsx_2,
 	.do_3 = xor_lsx_3,
 	.do_4 = xor_lsx_4,
 	.do_5 = xor_lsx_5,
 };

 #define XOR_SPEED_LSX()					\
 	do {						\
 		if (cpu_has_lsx)			\
 			xor_speed(&xor_block_lsx);	\
 	} while (0)
 #else /* CONFIG_CPU_HAS_LSX */
 #define XOR_SPEED_LSX()
 #endif /* CONFIG_CPU_HAS_LSX */

 #ifdef CONFIG_CPU_HAS_LASX
 static struct xor_block_template xor_block_lasx = {
 	.name = "lasx",
 	.do_2 = xor_lasx_2,
 	.do_3 = xor_lasx_3,
 	.do_4 = xor_lasx_4,
 	.do_5 = xor_lasx_5,
 };

 #define XOR_SPEED_LASX()					\
 	do {							\
 		if (cpu_has_lasx)				\
 			xor_speed(&xor_block_lasx);		\
 	} while (0)
 #else /* CONFIG_CPU_HAS_LASX */
 #define XOR_SPEED_LASX()
 #endif /* CONFIG_CPU_HAS_LASX */

 /*
  * For grins, also test the generic routines.
  *
  * More importantly: it cannot be ruled out at this point of time, that some
  * future (maybe reduced) models could run the vector algorithms slower than
  * the scalar ones, maybe for errata or micro-op reasons. It may be
  * appropriate to revisit this after one or two more uarch generations.
  */
 #include <asm-generic/xor.h>

 #undef XOR_TRY_TEMPLATES
 #define XOR_TRY_TEMPLATES				\
 do {							\
 	xor_speed(&xor_block_8regs);			\
 	xor_speed(&xor_block_8regs_p);			\
 	xor_speed(&xor_block_32regs);			\
 	xor_speed(&xor_block_32regs_p);			\
 	XOR_SPEED_LSX();				\
 	XOR_SPEED_LASX();				\
 } while (0)

 #endif /* _ASM_LOONGARCH_XOR_H */
	/* SPDX-License-Identifier: GPL-2.0-or-later */
	/*
	* Copyright (C) 2023 WANG Xuerui <git@xen0n.name>
	*/
	#ifndef _ASM_LOONGARCH_XOR_H
	#define _ASM_LOONGARCH_XOR_H

	#include <asm/cpu-features.h>
	#include <asm/xor_simd.h>

	#ifdef CONFIG_CPU_HAS_LSX
	static struct xor_block_template xor_block_lsx = {
	.name = "lsx",
	.do_2 = xor_lsx_2,
	.do_3 = xor_lsx_3,
	.do_4 = xor_lsx_4,
	.do_5 = xor_lsx_5,
	};

	#define XOR_SPEED_LSX() \
	do { \
	if (cpu_has_lsx) \
	xor_speed(&xor_block_lsx); \
	} while (0)
	#else /* CONFIG_CPU_HAS_LSX */
	#define XOR_SPEED_LSX()
	#endif /* CONFIG_CPU_HAS_LSX */

	#ifdef CONFIG_CPU_HAS_LASX
	static struct xor_block_template xor_block_lasx = {
	.name = "lasx",
	.do_2 = xor_lasx_2,
	.do_3 = xor_lasx_3,
	.do_4 = xor_lasx_4,
	.do_5 = xor_lasx_5,
	};

	#define XOR_SPEED_LASX() \
	do { \
	if (cpu_has_lasx) \
	xor_speed(&xor_block_lasx); \
	} while (0)
	#else /* CONFIG_CPU_HAS_LASX */
	#define XOR_SPEED_LASX()
	#endif /* CONFIG_CPU_HAS_LASX */

	/*
	* For grins, also test the generic routines.
	*
	* More importantly: it cannot be ruled out at this point of time, that some
	* future (maybe reduced) models could run the vector algorithms slower than
	* the scalar ones, maybe for errata or micro-op reasons. It may be
	* appropriate to revisit this after one or two more uarch generations.
	*/
	#include <asm-generic/xor.h>

	#undef XOR_TRY_TEMPLATES
	#define XOR_TRY_TEMPLATES \
	do { \
	xor_speed(&xor_block_8regs); \
	xor_speed(&xor_block_8regs_p); \
	xor_speed(&xor_block_32regs); \
	xor_speed(&xor_block_32regs_p); \
	XOR_SPEED_LSX(); \
	XOR_SPEED_LASX(); \
	} while (0)

	#endif /* _ASM_LOONGARCH_XOR_H */