tools/testing/selftests/arm64/fp/fpsimd-test.S - linux - Git at Google

 // SPDX-License-Identifier: GPL-2.0-only
 // Copyright (C) 2015-2019 ARM Limited.
 // Original author: Dave Martin <Dave.Martin@arm.com>
 //
 // Simple FPSIMD context switch test
 // Repeatedly writes unique test patterns into each FPSIMD register
 // and reads them back to verify integrity.
 //
 // for x in `seq 1 NR_CPUS`; do fpsimd-test & pids=$pids\ $! ; done
 // (leave it running for as long as you want...)
 // kill $pids

 #include <asm/unistd.h>
 #include "assembler.h"
 #include "asm-offsets.h"

 #define NVR	32
 #define MAXVL_B	(128 / 8)

 .macro _vldr Vn:req, Xt:req
 	ld1	{v\Vn\().2d}, [x\Xt]
 .endm

 .macro _vstr Vn:req, Xt:req
 	st1	{v\Vn\().2d}, [x\Xt]
 .endm

 // Generate accessor functions to read/write programmatically selected
 // FPSIMD registers.
 // x0 is the register index to access
 // x1 is the memory address to read from (getv,setp) or store to (setv,setp)
 // All clobber x0-x2
 define_accessor setv, NVR, _vldr
 define_accessor getv, NVR, _vstr

 // Print a single character x0 to stdout
 // Clobbers x0-x2,x8
 function putc
 	str	x0, [sp, #-16]!

 	mov	x0, #1			// STDOUT_FILENO
 	mov	x1, sp
 	mov	x2, #1
 	mov	x8, #__NR_write
 	svc	#0

 	add	sp, sp, #16
 	ret
 endfunction

 // Print a NUL-terminated string starting at address x0 to stdout
 // Clobbers x0-x3,x8
 function puts
 	mov	x1, x0

 	mov	x2, #0
 0:	ldrb	w3, [x0], #1
 	cbz	w3, 1f
 	add	x2, x2, #1
 	b	0b

 1:	mov	w0, #1			// STDOUT_FILENO
 	mov	x8, #__NR_write
 	svc	#0

 	ret
 endfunction

 // Utility macro to print a literal string
 // Clobbers x0-x4,x8
 .macro puts string
 	.pushsection .rodata.str1.1, "aMS", 1
 .L__puts_literal\@: .string "\string"
 	.popsection

 	ldr	x0, =.L__puts_literal\@
 	bl	puts
 .endm

 // Print an unsigned decimal number x0 to stdout
 // Clobbers x0-x4,x8
 function putdec
 	mov	x1, sp
 	str	x30, [sp, #-32]!	// Result can't be > 20 digits

 	mov	x2, #0
 	strb	w2, [x1, #-1]!		// Write the NUL terminator

 	mov	x2, #10
 0:	udiv	x3, x0, x2		// div-mod loop to generate the digits
 	msub	x0, x3, x2, x0
 	add	w0, w0, #'0'
 	strb	w0, [x1, #-1]!
 	mov	x0, x3
 	cbnz	x3, 0b

 	ldrb	w0, [x1]
 	cbnz	w0, 1f
 	mov	w0, #'0'		// Print "0" for 0, not ""
 	strb	w0, [x1, #-1]!

 1:	mov	x0, x1
 	bl	puts

 	ldr	x30, [sp], #32
 	ret
 endfunction

 // Print an unsigned decimal number x0 to stdout, followed by a newline
 // Clobbers x0-x5,x8
 function putdecn
 	mov	x5, x30

 	bl	putdec
 	mov	x0, #'\n'
 	bl	putc

 	ret	x5
 endfunction


 // Clobbers x0-x3,x8
 function puthexb
 	str	x30, [sp, #-0x10]!

 	mov	w3, w0
 	lsr	w0, w0, #4
 	bl	puthexnibble
 	mov	w0, w3

 	ldr	x30, [sp], #0x10
 	// fall through to puthexnibble
 endfunction
 // Clobbers x0-x2,x8
 function puthexnibble
 	and	w0, w0, #0xf
 	cmp	w0, #10
 	blo	1f
 	add	w0, w0, #'a' - ('9' + 1)
 1:	add	w0, w0, #'0'
 	b	putc
 endfunction

 // x0=data in, x1=size in, clobbers x0-x5,x8
 function dumphex
 	str	x30, [sp, #-0x10]!

 	mov	x4, x0
 	mov	x5, x1

 0:	subs	x5, x5, #1
 	b.lo	1f
 	ldrb	w0, [x4], #1
 	bl	puthexb
 	b	0b

 1:	ldr	x30, [sp], #0x10
 	ret
 endfunction

 // Declare some storate space to shadow the SVE register contents:
 .pushsection .text
 .data
 .align 4
 vref:
 	.space	MAXVL_B * NVR
 scratch:
 	.space	MAXVL_B
 .popsection

 // Trivial memory copy: copy x2 bytes, starting at address x1, to address x0.
 // Clobbers x0-x3
 function memcpy
 	cmp	x2, #0
 	b.eq	1f
 0:	ldrb	w3, [x1], #1
 	strb	w3, [x0], #1
 	subs	x2, x2, #1
 	b.ne	0b
 1:	ret
 endfunction

 // Generate a test pattern for storage in SVE registers
 // x0: pid	(16 bits)
 // x1: register number (6 bits)
 // x2: generation (4 bits)
 function pattern
 	orr	w1, w0, w1, lsl #16
 	orr	w2, w1, w2, lsl #28

 	ldr	x0, =scratch
 	mov	w1, #MAXVL_B / 4

 0:	str	w2, [x0], #4
 	add	w2, w2, #(1 << 22)
 	subs	w1, w1, #1
 	bne	0b

 	ret
 endfunction

 // Get the address of shadow data for FPSIMD V-register V<xn>
 .macro _adrv xd, xn, nrtmp
 	ldr	\xd, =vref
 	mov	x\nrtmp, #16
 	madd	\xd, x\nrtmp, \xn, \xd
 .endm

 // Set up test pattern in a FPSIMD V-register
 // x0: pid
 // x1: register number
 // x2: generation
 function setup_vreg
 	mov	x4, x30

 	mov	x6, x1
 	bl	pattern
 	_adrv	x0, x6, 2
 	mov	x5, x0
 	ldr	x1, =scratch
 	bl	memcpy

 	mov	x0, x6
 	mov	x1, x5
 	bl	setv

 	ret	x4
 endfunction

 // Fill x1 bytes starting at x0 with 0xae (for canary purposes)
 // Clobbers x1, x2.
 function memfill_ae
 	mov	w2, #0xae
 	b	memfill
 endfunction

 // Fill x1 bytes starting at x0 with 0.
 // Clobbers x1, x2.
 function memclr
 	mov	w2, #0
 endfunction
 	// fall through to memfill

 // Trivial memory fill: fill x1 bytes starting at address x0 with byte w2
 // Clobbers x1
 function memfill
 	cmp	x1, #0
 	b.eq	1f

 0:	strb	w2, [x0], #1
 	subs	x1, x1, #1
 	b.ne	0b

 1:	ret
 endfunction

 // Trivial memory compare: compare x2 bytes starting at address x0 with
 // bytes starting at address x1.
 // Returns only if all bytes match; otherwise, the program is aborted.
 // Clobbers x0-x5.
 function memcmp
 	cbz	x2, 1f

 	mov	x5, #0
 0:	ldrb	w3, [x0, x5]
 	ldrb	w4, [x1, x5]
 	add	x5, x5, #1
 	cmp	w3, w4
 	b.ne	barf
 	subs	x2, x2, #1
 	b.ne	0b

 1:	ret
 endfunction

 // Verify that a FPSIMD V-register matches its shadow in memory, else abort
 // x0: reg number
 // Clobbers x0-x5.
 function check_vreg
 	mov	x3, x30

 	_adrv	x5, x0, 6
 	mov	x4, x0
 	ldr	x7, =scratch

 	mov	x0, x7
 	mov	x1, x6
 	bl	memfill_ae

 	mov	x0, x4
 	mov	x1, x7
 	bl	getv

 	mov	x0, x5
 	mov	x1, x7
 	mov	x2, x6
 	mov	x30, x3
 	b	memcmp
 endfunction

 // Any SVE register modified here can cause corruption in the main
 // thread -- but *only* the registers modified here.
 function irritator_handler
 	// Increment the irritation signal count (x23):
 	ldr	x0, [x2, #ucontext_regs + 8 * 23]
 	add	x0, x0, #1
 	str	x0, [x2, #ucontext_regs + 8 * 23]

 	// Corrupt some random V-regs
 	adr	x0, .text + (irritator_handler - .text) / 16 * 16
 	movi	v0.8b, #7
 	movi	v9.16b, #9
 	movi	v31.8b, #31

 	ret
 endfunction

 function terminate_handler
 	mov	w21, w0
 	mov	x20, x2

 	puts	"Terminated by signal "
 	mov	w0, w21
 	bl	putdec
 	puts	", no error, iterations="
 	ldr	x0, [x20, #ucontext_regs + 8 * 22]
 	bl	putdec
 	puts	", signals="
 	ldr	x0, [x20, #ucontext_regs + 8 * 23]
 	bl	putdecn

 	mov	x0, #0
 	mov	x8, #__NR_exit
 	svc	#0
 endfunction

 // w0: signal number
 // x1: sa_action
 // w2: sa_flags
 // Clobbers x0-x6,x8
 function setsignal
 	str	x30, [sp, #-((sa_sz + 15) / 16 * 16 + 16)]!

 	mov	w4, w0
 	mov	x5, x1
 	mov	w6, w2

 	add	x0, sp, #16
 	mov	x1, #sa_sz
 	bl	memclr

 	mov	w0, w4
 	add	x1, sp, #16
 	str	w6, [x1, #sa_flags]
 	str	x5, [x1, #sa_handler]
 	mov	x2, #0
 	mov	x3, #sa_mask_sz
 	mov	x8, #__NR_rt_sigaction
 	svc	#0

 	cbz	w0, 1f

 	puts	"sigaction failure\n"
 	b	.Labort

 1:	ldr	x30, [sp], #((sa_sz + 15) / 16 * 16 + 16)
 	ret
 endfunction

 // Main program entry point
 .globl _start
 function _start
 _start:
 	// Sanity-check and report the vector length

 	mov	x19, #128
 	cmp	x19, #128
 	b.lo	1f
 	cmp	x19, #2048
 	b.hi	1f
 	tst	x19, #(8 - 1)
 	b.eq	2f

 1:	puts	"Bad vector length: "
 	mov	x0, x19
 	bl	putdecn
 	b	.Labort

 2:	puts	"Vector length:\t"
 	mov	x0, x19
 	bl	putdec
 	puts	" bits\n"

 	// Obtain our PID, to ensure test pattern uniqueness between processes

 	mov	x8, #__NR_getpid
 	svc	#0
 	mov	x20, x0

 	puts	"PID:\t"
 	mov	x0, x20
 	bl	putdecn

 	mov	x23, #0		// Irritation signal count

 	mov	w0, #SIGINT
 	adr	x1, terminate_handler
 	mov	w2, #SA_SIGINFO
 	bl	setsignal

 	mov	w0, #SIGTERM
 	adr	x1, terminate_handler
 	mov	w2, #SA_SIGINFO
 	bl	setsignal

 	mov	w0, #SIGUSR1
 	adr	x1, irritator_handler
 	mov	w2, #SA_SIGINFO
 	orr	w2, w2, #SA_NODEFER
 	bl	setsignal

 	mov	x22, #0		// generation number, increments per iteration
 .Ltest_loop:

 	mov	x21, #0		// Set up V-regs & shadow with test pattern
 0:	mov	x0, x20
 	mov	x1, x21
 	and	x2, x22, #0xf
 	bl	setup_vreg
 	add	x21, x21, #1
 	cmp	x21, #NVR
 	b.lo	0b

 // Can't do this when SVE state is volatile across SVC:
 	mov	x8, #__NR_sched_yield	// Encourage preemption
 	svc	#0

 	mov	x21, #0
 0:	mov	x0, x21
 	bl	check_vreg
 	add	x21, x21, #1
 	cmp	x21, #NVR
 	b.lo	0b

 	add	x22, x22, #1
 	b	.Ltest_loop

 .Labort:
 	mov	x0, #0
 	mov	x1, #SIGABRT
 	mov	x8, #__NR_kill
 	svc	#0
 endfunction

 function barf
 	mov	x10, x0	// expected data
 	mov	x11, x1	// actual data
 	mov	x12, x2	// data size

 	puts	"Mismatch: PID="
 	mov	x0, x20
 	bl	putdec
 	puts	", iteration="
 	mov	x0, x22
 	bl	putdec
 	puts	", reg="
 	mov	x0, x21
 	bl	putdecn
 	puts	"\tExpected ["
 	mov	x0, x10
 	mov	x1, x12
 	bl	dumphex
 	puts	"]\n\tGot      ["
 	mov	x0, x11
 	mov	x1, x12
 	bl	dumphex
 	puts	"]\n"

 	mov	x8, #__NR_exit
 	mov	x1, #1
 	svc	#0
 endfunction
	// SPDX-License-Identifier: GPL-2.0-only
	// Copyright (C) 2015-2019 ARM Limited.
	// Original author: Dave Martin <Dave.Martin@arm.com>
	//
	// Simple FPSIMD context switch test
	// Repeatedly writes unique test patterns into each FPSIMD register
	// and reads them back to verify integrity.
	//
	// for x in `seq 1 NR_CPUS`; do fpsimd-test & pids=$pids\ $! ; done
	// (leave it running for as long as you want...)
	// kill $pids

	#include <asm/unistd.h>
	#include "assembler.h"
	#include "asm-offsets.h"

	#define NVR 32
	#define MAXVL_B (128 / 8)

	.macro _vldr Vn:req, Xt:req
	ld1 {v\Vn\().2d}, [x\Xt]
	.endm

	.macro _vstr Vn:req, Xt:req
	st1 {v\Vn\().2d}, [x\Xt]
	.endm

	// Generate accessor functions to read/write programmatically selected
	// FPSIMD registers.
	// x0 is the register index to access
	// x1 is the memory address to read from (getv,setp) or store to (setv,setp)
	// All clobber x0-x2
	define_accessor setv, NVR, _vldr
	define_accessor getv, NVR, _vstr

	// Print a single character x0 to stdout
	// Clobbers x0-x2,x8
	function putc
	str x0, [sp, #-16]!

	mov x0, #1 // STDOUT_FILENO
	mov x1, sp
	mov x2, #1
	mov x8, #__NR_write
	svc #0

	add sp, sp, #16
	ret
	endfunction

	// Print a NUL-terminated string starting at address x0 to stdout
	// Clobbers x0-x3,x8
	function puts
	mov x1, x0

	mov x2, #0
	0: ldrb w3, [x0], #1
	cbz w3, 1f
	add x2, x2, #1
	b 0b

	1: mov w0, #1 // STDOUT_FILENO
	mov x8, #__NR_write
	svc #0

	ret
	endfunction

	// Utility macro to print a literal string
	// Clobbers x0-x4,x8
	.macro puts string
	.pushsection .rodata.str1.1, "aMS", 1
	.L__puts_literal\@: .string "\string"
	.popsection

	ldr x0, =.L__puts_literal\@
	bl puts
	.endm

	// Print an unsigned decimal number x0 to stdout
	// Clobbers x0-x4,x8
	function putdec
	mov x1, sp
	str x30, [sp, #-32]! // Result can't be > 20 digits

	mov x2, #0
	strb w2, [x1, #-1]! // Write the NUL terminator

	mov x2, #10
	0: udiv x3, x0, x2 // div-mod loop to generate the digits
	msub x0, x3, x2, x0
	add w0, w0, #'0'
	strb w0, [x1, #-1]!
	mov x0, x3
	cbnz x3, 0b

	ldrb w0, [x1]
	cbnz w0, 1f
	mov w0, #'0' // Print "0" for 0, not ""
	strb w0, [x1, #-1]!

	1: mov x0, x1
	bl puts

	ldr x30, [sp], #32
	ret
	endfunction

	// Print an unsigned decimal number x0 to stdout, followed by a newline
	// Clobbers x0-x5,x8
	function putdecn
	mov x5, x30

	bl putdec
	mov x0, #'\n'
	bl putc

	ret x5
	endfunction


	// Clobbers x0-x3,x8
	function puthexb
	str x30, [sp, #-0x10]!

	mov w3, w0
	lsr w0, w0, #4
	bl puthexnibble
	mov w0, w3

	ldr x30, [sp], #0x10
	// fall through to puthexnibble
	endfunction
	// Clobbers x0-x2,x8
	function puthexnibble
	and w0, w0, #0xf
	cmp w0, #10
	blo 1f
	add w0, w0, #'a' - ('9' + 1)
	1: add w0, w0, #'0'
	b putc
	endfunction

	// x0=data in, x1=size in, clobbers x0-x5,x8
	function dumphex
	str x30, [sp, #-0x10]!

	mov x4, x0
	mov x5, x1

	0: subs x5, x5, #1
	b.lo 1f
	ldrb w0, [x4], #1
	bl puthexb
	b 0b

	1: ldr x30, [sp], #0x10
	ret
	endfunction

	// Declare some storate space to shadow the SVE register contents:
	.pushsection .text
	.data
	.align 4
	vref:
	.space MAXVL_B * NVR
	scratch:
	.space MAXVL_B
	.popsection

	// Trivial memory copy: copy x2 bytes, starting at address x1, to address x0.
	// Clobbers x0-x3
	function memcpy
	cmp x2, #0
	b.eq 1f
	0: ldrb w3, [x1], #1
	strb w3, [x0], #1
	subs x2, x2, #1
	b.ne 0b
	1: ret
	endfunction

	// Generate a test pattern for storage in SVE registers
	// x0: pid (16 bits)
	// x1: register number (6 bits)
	// x2: generation (4 bits)
	function pattern
	orr w1, w0, w1, lsl #16
	orr w2, w1, w2, lsl #28

	ldr x0, =scratch
	mov w1, #MAXVL_B / 4

	0: str w2, [x0], #4
	add w2, w2, #(1 << 22)
	subs w1, w1, #1
	bne 0b

	ret
	endfunction

	// Get the address of shadow data for FPSIMD V-register V<xn>
	.macro _adrv xd, xn, nrtmp
	ldr \xd, =vref
	mov x\nrtmp, #16
	madd \xd, x\nrtmp, \xn, \xd
	.endm

	// Set up test pattern in a FPSIMD V-register
	// x0: pid
	// x1: register number
	// x2: generation
	function setup_vreg
	mov x4, x30

	mov x6, x1
	bl pattern
	_adrv x0, x6, 2
	mov x5, x0
	ldr x1, =scratch
	bl memcpy

	mov x0, x6
	mov x1, x5
	bl setv

	ret x4
	endfunction

	// Fill x1 bytes starting at x0 with 0xae (for canary purposes)
	// Clobbers x1, x2.
	function memfill_ae
	mov w2, #0xae
	b memfill
	endfunction

	// Fill x1 bytes starting at x0 with 0.
	// Clobbers x1, x2.
	function memclr
	mov w2, #0
	endfunction
	// fall through to memfill

	// Trivial memory fill: fill x1 bytes starting at address x0 with byte w2
	// Clobbers x1
	function memfill
	cmp x1, #0
	b.eq 1f

	0: strb w2, [x0], #1
	subs x1, x1, #1
	b.ne 0b

	1: ret
	endfunction

	// Trivial memory compare: compare x2 bytes starting at address x0 with
	// bytes starting at address x1.
	// Returns only if all bytes match; otherwise, the program is aborted.
	// Clobbers x0-x5.
	function memcmp
	cbz x2, 1f

	mov x5, #0
	0: ldrb w3, [x0, x5]
	ldrb w4, [x1, x5]
	add x5, x5, #1
	cmp w3, w4
	b.ne barf
	subs x2, x2, #1
	b.ne 0b

	1: ret
	endfunction

	// Verify that a FPSIMD V-register matches its shadow in memory, else abort
	// x0: reg number
	// Clobbers x0-x5.
	function check_vreg
	mov x3, x30

	_adrv x5, x0, 6
	mov x4, x0
	ldr x7, =scratch

	mov x0, x7
	mov x1, x6
	bl memfill_ae

	mov x0, x4
	mov x1, x7
	bl getv

	mov x0, x5
	mov x1, x7
	mov x2, x6
	mov x30, x3
	b memcmp
	endfunction

	// Any SVE register modified here can cause corruption in the main
	// thread -- but only the registers modified here.
	function irritator_handler
	// Increment the irritation signal count (x23):
	ldr x0, [x2, #ucontext_regs + 8 * 23]
	add x0, x0, #1
	str x0, [x2, #ucontext_regs + 8 * 23]

	// Corrupt some random V-regs
	adr x0, .text + (irritator_handler - .text) / 16 * 16
	movi v0.8b, #7
	movi v9.16b, #9
	movi v31.8b, #31

	ret
	endfunction

	function terminate_handler
	mov w21, w0
	mov x20, x2

	puts "Terminated by signal "
	mov w0, w21
	bl putdec
	puts ", no error, iterations="
	ldr x0, [x20, #ucontext_regs + 8 * 22]
	bl putdec
	puts ", signals="
	ldr x0, [x20, #ucontext_regs + 8 * 23]
	bl putdecn

	mov x0, #0
	mov x8, #__NR_exit
	svc #0
	endfunction

	// w0: signal number
	// x1: sa_action
	// w2: sa_flags
	// Clobbers x0-x6,x8
	function setsignal
	str x30, [sp, #-((sa_sz + 15) / 16 * 16 + 16)]!

	mov w4, w0
	mov x5, x1
	mov w6, w2

	add x0, sp, #16
	mov x1, #sa_sz
	bl memclr

	mov w0, w4
	add x1, sp, #16
	str w6, [x1, #sa_flags]
	str x5, [x1, #sa_handler]
	mov x2, #0
	mov x3, #sa_mask_sz
	mov x8, #__NR_rt_sigaction
	svc #0

	cbz w0, 1f

	puts "sigaction failure\n"
	b .Labort

	1: ldr x30, [sp], #((sa_sz + 15) / 16 * 16 + 16)
	ret
	endfunction

	// Main program entry point
	.globl _start
	function _start
	_start:
	// Sanity-check and report the vector length

	mov x19, #128
	cmp x19, #128
	b.lo 1f
	cmp x19, #2048
	b.hi 1f
	tst x19, #(8 - 1)
	b.eq 2f

	1: puts "Bad vector length: "
	mov x0, x19
	bl putdecn
	b .Labort

	2: puts "Vector length:\t"
	mov x0, x19
	bl putdec
	puts " bits\n"

	// Obtain our PID, to ensure test pattern uniqueness between processes

	mov x8, #__NR_getpid
	svc #0
	mov x20, x0

	puts "PID:\t"
	mov x0, x20
	bl putdecn

	mov x23, #0 // Irritation signal count

	mov w0, #SIGINT
	adr x1, terminate_handler
	mov w2, #SA_SIGINFO
	bl setsignal

	mov w0, #SIGTERM
	adr x1, terminate_handler
	mov w2, #SA_SIGINFO
	bl setsignal

	mov w0, #SIGUSR1
	adr x1, irritator_handler
	mov w2, #SA_SIGINFO
	orr w2, w2, #SA_NODEFER
	bl setsignal

	mov x22, #0 // generation number, increments per iteration
	.Ltest_loop:

	mov x21, #0 // Set up V-regs & shadow with test pattern
	0: mov x0, x20
	mov x1, x21
	and x2, x22, #0xf
	bl setup_vreg
	add x21, x21, #1
	cmp x21, #NVR
	b.lo 0b

	// Can't do this when SVE state is volatile across SVC:
	mov x8, #__NR_sched_yield // Encourage preemption
	svc #0

	mov x21, #0
	0: mov x0, x21
	bl check_vreg
	add x21, x21, #1
	cmp x21, #NVR
	b.lo 0b

	add x22, x22, #1
	b .Ltest_loop

	.Labort:
	mov x0, #0
	mov x1, #SIGABRT
	mov x8, #__NR_kill
	svc #0
	endfunction

	function barf
	mov x10, x0 // expected data
	mov x11, x1 // actual data
	mov x12, x2 // data size

	puts "Mismatch: PID="
	mov x0, x20
	bl putdec
	puts ", iteration="
	mov x0, x22
	bl putdec
	puts ", reg="
	mov x0, x21
	bl putdecn
	puts "\tExpected ["
	mov x0, x10
	mov x1, x12
	bl dumphex
	puts "]\n\tGot ["
	mov x0, x11
	mov x1, x12
	bl dumphex
	puts "]\n"

	mov x8, #__NR_exit
	mov x1, #1
	svc #0
	endfunction