arch/x86/math-emu/wm_sqrt.S - linux - Git at Google

 /* SPDX-License-Identifier: GPL-2.0 */
 	.file	"wm_sqrt.S"
 /*---------------------------------------------------------------------------+
  |  wm_sqrt.S                                                                |
  |                                                                           |
  | Fixed point arithmetic square root evaluation.                            |
  |                                                                           |
  | Copyright (C) 1992,1993,1995,1997                                         |
  |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
  |                       Australia.  E-mail billm@suburbia.net               |
  |                                                                           |
  | Call from C as:                                                           |
  |    int wm_sqrt(FPU_REG *n, unsigned int control_word)                     |
  |                                                                           |
  +---------------------------------------------------------------------------*/

 /*---------------------------------------------------------------------------+
  |  wm_sqrt(FPU_REG *n, unsigned int control_word)                           |
  |    returns the square root of n in n.                                     |
  |                                                                           |
  |  Use Newton's method to compute the square root of a number, which must   |
  |  be in the range  [1.0 .. 4.0),  to 64 bits accuracy.                     |
  |  Does not check the sign or tag of the argument.                          |
  |  Sets the exponent, but not the sign or tag of the result.                |
  |                                                                           |
  |  The guess is kept in %esi:%edi                                           |
  +---------------------------------------------------------------------------*/

 #include "exception.h"
 #include "fpu_emu.h"


 #ifndef NON_REENTRANT_FPU
 /*	Local storage on the stack: */
 #define FPU_accum_3	-4(%ebp)	/* ms word */
 #define FPU_accum_2	-8(%ebp)
 #define FPU_accum_1	-12(%ebp)
 #define FPU_accum_0	-16(%ebp)

 /*
  * The de-normalised argument:
  *                  sq_2                  sq_1              sq_0
  *        b b b b b b b ... b b b   b b b .... b b b   b 0 0 0 ... 0
  *           ^ binary point here
  */
 #define FPU_fsqrt_arg_2	-20(%ebp)	/* ms word */
 #define FPU_fsqrt_arg_1	-24(%ebp)
 #define FPU_fsqrt_arg_0	-28(%ebp)	/* ls word, at most the ms bit is set */

 #else
 /*	Local storage in a static area: */
 .data
 	.align 4,0
 FPU_accum_3:
 	.long	0		/* ms word */
 FPU_accum_2:
 	.long	0
 FPU_accum_1:
 	.long	0
 FPU_accum_0:
 	.long	0

 /* The de-normalised argument:
                     sq_2                  sq_1              sq_0
           b b b b b b b ... b b b   b b b .... b b b   b 0 0 0 ... 0
              ^ binary point here
  */
 FPU_fsqrt_arg_2:
 	.long	0		/* ms word */
 FPU_fsqrt_arg_1:
 	.long	0
 FPU_fsqrt_arg_0:
 	.long	0		/* ls word, at most the ms bit is set */
 #endif /* NON_REENTRANT_FPU */


 .text
 SYM_FUNC_START(wm_sqrt)
 	pushl	%ebp
 	movl	%esp,%ebp
 #ifndef NON_REENTRANT_FPU
 	subl	$28,%esp
 #endif /* NON_REENTRANT_FPU */
 	pushl	%esi
 	pushl	%edi
 	pushl	%ebx

 	movl	PARAM1,%esi

 	movl	SIGH(%esi),%eax
 	movl	SIGL(%esi),%ecx
 	xorl	%edx,%edx

 /* We use a rough linear estimate for the first guess.. */

 	cmpw	EXP_BIAS,EXP(%esi)
 	jnz	sqrt_arg_ge_2

 	shrl	$1,%eax			/* arg is in the range  [1.0 .. 2.0) */
 	rcrl	$1,%ecx
 	rcrl	$1,%edx

 sqrt_arg_ge_2:
 /* From here on, n is never accessed directly again until it is
    replaced by the answer. */

 	movl	%eax,FPU_fsqrt_arg_2		/* ms word of n */
 	movl	%ecx,FPU_fsqrt_arg_1
 	movl	%edx,FPU_fsqrt_arg_0

 /* Make a linear first estimate */
 	shrl	$1,%eax
 	addl	$0x40000000,%eax
 	movl	$0xaaaaaaaa,%ecx
 	mull	%ecx
 	shll	%edx			/* max result was 7fff... */
 	testl	$0x80000000,%edx	/* but min was 3fff... */
 	jnz	sqrt_prelim_no_adjust

 	movl	$0x80000000,%edx	/* round up */

 sqrt_prelim_no_adjust:
 	movl	%edx,%esi	/* Our first guess */

 /* We have now computed (approx)   (2 + x) / 3, which forms the basis
    for a few iterations of Newton's method */

 	movl	FPU_fsqrt_arg_2,%ecx	/* ms word */

 /*
  * From our initial estimate, three iterations are enough to get us
  * to 30 bits or so. This will then allow two iterations at better
  * precision to complete the process.
  */

 /* Compute  (g + n/g)/2  at each iteration (g is the guess). */
 	shrl	%ecx		/* Doing this first will prevent a divide */
 				/* overflow later. */

 	movl	%ecx,%edx	/* msw of the arg / 2 */
 	divl	%esi		/* current estimate */
 	shrl	%esi		/* divide by 2 */
 	addl	%eax,%esi	/* the new estimate */

 	movl	%ecx,%edx
 	divl	%esi
 	shrl	%esi
 	addl	%eax,%esi

 	movl	%ecx,%edx
 	divl	%esi
 	shrl	%esi
 	addl	%eax,%esi

 /*
  * Now that an estimate accurate to about 30 bits has been obtained (in %esi),
  * we improve it to 60 bits or so.
  *
  * The strategy from now on is to compute new estimates from
  *      guess := guess + (n - guess^2) / (2 * guess)
  */

 /* First, find the square of the guess */
 	movl	%esi,%eax
 	mull	%esi
 /* guess^2 now in %edx:%eax */

 	movl	FPU_fsqrt_arg_1,%ecx
 	subl	%ecx,%eax
 	movl	FPU_fsqrt_arg_2,%ecx	/* ms word of normalized n */
 	sbbl	%ecx,%edx
 	jnc	sqrt_stage_2_positive

 /* Subtraction gives a negative result,
    negate the result before division. */
 	notl	%edx
 	notl	%eax
 	addl	$1,%eax
 	adcl	$0,%edx

 	divl	%esi
 	movl	%eax,%ecx

 	movl	%edx,%eax
 	divl	%esi
 	jmp	sqrt_stage_2_finish

 sqrt_stage_2_positive:
 	divl	%esi
 	movl	%eax,%ecx

 	movl	%edx,%eax
 	divl	%esi

 	notl	%ecx
 	notl	%eax
 	addl	$1,%eax
 	adcl	$0,%ecx

 sqrt_stage_2_finish:
 	sarl	$1,%ecx		/* divide by 2 */
 	rcrl	$1,%eax

 	/* Form the new estimate in %esi:%edi */
 	movl	%eax,%edi
 	addl	%ecx,%esi

 	jnz	sqrt_stage_2_done	/* result should be [1..2) */

 #ifdef PARANOID
 /* It should be possible to get here only if the arg is ffff....ffff */
 	cmpl	$0xffffffff,FPU_fsqrt_arg_1
 	jnz	sqrt_stage_2_error
 #endif /* PARANOID */

 /* The best rounded result. */
 	xorl	%eax,%eax
 	decl	%eax
 	movl	%eax,%edi
 	movl	%eax,%esi
 	movl	$0x7fffffff,%eax
 	jmp	sqrt_round_result

 #ifdef PARANOID
 sqrt_stage_2_error:
 	pushl	EX_INTERNAL|0x213
 	call	EXCEPTION
 #endif /* PARANOID */

 sqrt_stage_2_done:

 /* Now the square root has been computed to better than 60 bits. */

 /* Find the square of the guess. */
 	movl	%edi,%eax		/* ls word of guess */
 	mull	%edi
 	movl	%edx,FPU_accum_1

 	movl	%esi,%eax
 	mull	%esi
 	movl	%edx,FPU_accum_3
 	movl	%eax,FPU_accum_2

 	movl	%edi,%eax
 	mull	%esi
 	addl	%eax,FPU_accum_1
 	adcl	%edx,FPU_accum_2
 	adcl	$0,FPU_accum_3

 /*	movl	%esi,%eax */
 /*	mull	%edi */
 	addl	%eax,FPU_accum_1
 	adcl	%edx,FPU_accum_2
 	adcl	$0,FPU_accum_3

 /* guess^2 now in FPU_accum_3:FPU_accum_2:FPU_accum_1 */

 	movl	FPU_fsqrt_arg_0,%eax		/* get normalized n */
 	subl	%eax,FPU_accum_1
 	movl	FPU_fsqrt_arg_1,%eax
 	sbbl	%eax,FPU_accum_2
 	movl	FPU_fsqrt_arg_2,%eax		/* ms word of normalized n */
 	sbbl	%eax,FPU_accum_3
 	jnc	sqrt_stage_3_positive

 /* Subtraction gives a negative result,
    negate the result before division */
 	notl	FPU_accum_1
 	notl	FPU_accum_2
 	notl	FPU_accum_3
 	addl	$1,FPU_accum_1
 	adcl	$0,FPU_accum_2

 #ifdef PARANOID
 	adcl	$0,FPU_accum_3	/* This must be zero */
 	jz	sqrt_stage_3_no_error

 sqrt_stage_3_error:
 	pushl	EX_INTERNAL|0x207
 	call	EXCEPTION

 sqrt_stage_3_no_error:
 #endif /* PARANOID */

 	movl	FPU_accum_2,%edx
 	movl	FPU_accum_1,%eax
 	divl	%esi
 	movl	%eax,%ecx

 	movl	%edx,%eax
 	divl	%esi

 	sarl	$1,%ecx		/* divide by 2 */
 	rcrl	$1,%eax

 	/* prepare to round the result */

 	addl	%ecx,%edi
 	adcl	$0,%esi

 	jmp	sqrt_stage_3_finished

 sqrt_stage_3_positive:
 	movl	FPU_accum_2,%edx
 	movl	FPU_accum_1,%eax
 	divl	%esi
 	movl	%eax,%ecx

 	movl	%edx,%eax
 	divl	%esi

 	sarl	$1,%ecx		/* divide by 2 */
 	rcrl	$1,%eax

 	/* prepare to round the result */

 	notl	%eax		/* Negate the correction term */
 	notl	%ecx
 	addl	$1,%eax
 	adcl	$0,%ecx		/* carry here ==> correction == 0 */
 	adcl	$0xffffffff,%esi

 	addl	%ecx,%edi
 	adcl	$0,%esi

 sqrt_stage_3_finished:

 /*
  * The result in %esi:%edi:%esi should be good to about 90 bits here,
  * and the rounding information here does not have sufficient accuracy
  * in a few rare cases.
  */
 	cmpl	$0xffffffe0,%eax
 	ja	sqrt_near_exact_x

 	cmpl	$0x00000020,%eax
 	jb	sqrt_near_exact

 	cmpl	$0x7fffffe0,%eax
 	jb	sqrt_round_result

 	cmpl	$0x80000020,%eax
 	jb	sqrt_get_more_precision

 sqrt_round_result:
 /* Set up for rounding operations */
 	movl	%eax,%edx
 	movl	%esi,%eax
 	movl	%edi,%ebx
 	movl	PARAM1,%edi
 	movw	EXP_BIAS,EXP(%edi)	/* Result is in  [1.0 .. 2.0) */
 	jmp	fpu_reg_round


 sqrt_near_exact_x:
 /* First, the estimate must be rounded up. */
 	addl	$1,%edi
 	adcl	$0,%esi

 sqrt_near_exact:
 /*
  * This is an easy case because x^1/2 is monotonic.
  * We need just find the square of our estimate, compare it
  * with the argument, and deduce whether our estimate is
  * above, below, or exact. We use the fact that the estimate
  * is known to be accurate to about 90 bits.
  */
 	movl	%edi,%eax		/* ls word of guess */
 	mull	%edi
 	movl	%edx,%ebx		/* 2nd ls word of square */
 	movl	%eax,%ecx		/* ls word of square */

 	movl	%edi,%eax
 	mull	%esi
 	addl	%eax,%ebx
 	addl	%eax,%ebx

 #ifdef PARANOID
 	cmp	$0xffffffb0,%ebx
 	jb	sqrt_near_exact_ok

 	cmp	$0x00000050,%ebx
 	ja	sqrt_near_exact_ok

 	pushl	EX_INTERNAL|0x214
 	call	EXCEPTION

 sqrt_near_exact_ok:
 #endif /* PARANOID */

 	or	%ebx,%ebx
 	js	sqrt_near_exact_small

 	jnz	sqrt_near_exact_large

 	or	%ebx,%edx
 	jnz	sqrt_near_exact_large

 /* Our estimate is exactly the right answer */
 	xorl	%eax,%eax
 	jmp	sqrt_round_result

 sqrt_near_exact_small:
 /* Our estimate is too small */
 	movl	$0x000000ff,%eax
 	jmp	sqrt_round_result

 sqrt_near_exact_large:
 /* Our estimate is too large, we need to decrement it */
 	subl	$1,%edi
 	sbbl	$0,%esi
 	movl	$0xffffff00,%eax
 	jmp	sqrt_round_result


 sqrt_get_more_precision:
 /* This case is almost the same as the above, except we start
    with an extra bit of precision in the estimate. */
 	stc			/* The extra bit. */
 	rcll	$1,%edi		/* Shift the estimate left one bit */
 	rcll	$1,%esi

 	movl	%edi,%eax		/* ls word of guess */
 	mull	%edi
 	movl	%edx,%ebx		/* 2nd ls word of square */
 	movl	%eax,%ecx		/* ls word of square */

 	movl	%edi,%eax
 	mull	%esi
 	addl	%eax,%ebx
 	addl	%eax,%ebx

 /* Put our estimate back to its original value */
 	stc			/* The ms bit. */
 	rcrl	$1,%esi		/* Shift the estimate left one bit */
 	rcrl	$1,%edi

 #ifdef PARANOID
 	cmp	$0xffffff60,%ebx
 	jb	sqrt_more_prec_ok

 	cmp	$0x000000a0,%ebx
 	ja	sqrt_more_prec_ok

 	pushl	EX_INTERNAL|0x215
 	call	EXCEPTION

 sqrt_more_prec_ok:
 #endif /* PARANOID */

 	or	%ebx,%ebx
 	js	sqrt_more_prec_small

 	jnz	sqrt_more_prec_large

 	or	%ebx,%ecx
 	jnz	sqrt_more_prec_large

 /* Our estimate is exactly the right answer */
 	movl	$0x80000000,%eax
 	jmp	sqrt_round_result

 sqrt_more_prec_small:
 /* Our estimate is too small */
 	movl	$0x800000ff,%eax
 	jmp	sqrt_round_result

 sqrt_more_prec_large:
 /* Our estimate is too large */
 	movl	$0x7fffff00,%eax
 	jmp	sqrt_round_result
 SYM_FUNC_END(wm_sqrt)
	/* SPDX-License-Identifier: GPL-2.0 */
	.file "wm_sqrt.S"
	/*---------------------------------------------------------------------------+
	\| wm_sqrt.S \|
	\| \|
	\| Fixed point arithmetic square root evaluation. \|
	\| \|
	\| Copyright (C) 1992,1993,1995,1997 \|
	\| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, \|
	\| Australia. E-mail billm@suburbia.net \|
	\| \|
	\| Call from C as: \|
	\| int wm_sqrt(FPU_REG *n, unsigned int control_word) \|
	\| \|
	+---------------------------------------------------------------------------*/

	/*---------------------------------------------------------------------------+
	\| wm_sqrt(FPU_REG *n, unsigned int control_word) \|
	\| returns the square root of n in n. \|
	\| \|
	\| Use Newton's method to compute the square root of a number, which must \|
	\| be in the range [1.0 .. 4.0), to 64 bits accuracy. \|
	\| Does not check the sign or tag of the argument. \|
	\| Sets the exponent, but not the sign or tag of the result. \|
	\| \|
	\| The guess is kept in %esi:%edi \|
	+---------------------------------------------------------------------------*/

	#include "exception.h"
	#include "fpu_emu.h"


	#ifndef NON_REENTRANT_FPU
	/* Local storage on the stack: */
	#define FPU_accum_3 -4(%ebp) /* ms word */
	#define FPU_accum_2 -8(%ebp)
	#define FPU_accum_1 -12(%ebp)
	#define FPU_accum_0 -16(%ebp)

	/*
	* The de-normalised argument:
	* sq_2 sq_1 sq_0
	* b b b b b b b ... b b b b b b .... b b b b 0 0 0 ... 0
	* ^ binary point here
	*/
	#define FPU_fsqrt_arg_2 -20(%ebp) /* ms word */
	#define FPU_fsqrt_arg_1 -24(%ebp)
	#define FPU_fsqrt_arg_0 -28(%ebp) /* ls word, at most the ms bit is set */

	#else
	/* Local storage in a static area: */
	.data
	.align 4,0
	FPU_accum_3:
	.long 0 /* ms word */
	FPU_accum_2:
	.long 0
	FPU_accum_1:
	.long 0
	FPU_accum_0:
	.long 0

	/* The de-normalised argument:
	sq_2 sq_1 sq_0
	b b b b b b b ... b b b b b b .... b b b b 0 0 0 ... 0
	^ binary point here
	*/
	FPU_fsqrt_arg_2:
	.long 0 /* ms word */
	FPU_fsqrt_arg_1:
	.long 0
	FPU_fsqrt_arg_0:
	.long 0 /* ls word, at most the ms bit is set */
	#endif /* NON_REENTRANT_FPU */


	.text
	SYM_FUNC_START(wm_sqrt)
	pushl %ebp
	movl %esp,%ebp
	#ifndef NON_REENTRANT_FPU
	subl $28,%esp
	#endif /* NON_REENTRANT_FPU */
	pushl %esi
	pushl %edi
	pushl %ebx

	movl PARAM1,%esi

	movl SIGH(%esi),%eax
	movl SIGL(%esi),%ecx
	xorl %edx,%edx

	/* We use a rough linear estimate for the first guess.. */

	cmpw EXP_BIAS,EXP(%esi)
	jnz sqrt_arg_ge_2

	shrl $1,%eax /* arg is in the range [1.0 .. 2.0) */
	rcrl $1,%ecx
	rcrl $1,%edx

	sqrt_arg_ge_2:
	/* From here on, n is never accessed directly again until it is
	replaced by the answer. */

	movl %eax,FPU_fsqrt_arg_2 /* ms word of n */
	movl %ecx,FPU_fsqrt_arg_1
	movl %edx,FPU_fsqrt_arg_0

	/* Make a linear first estimate */
	shrl $1,%eax
	addl $0x40000000,%eax
	movl $0xaaaaaaaa,%ecx
	mull %ecx
	shll %edx /* max result was 7fff... */
	testl $0x80000000,%edx /* but min was 3fff... */
	jnz sqrt_prelim_no_adjust

	movl $0x80000000,%edx /* round up */

	sqrt_prelim_no_adjust:
	movl %edx,%esi /* Our first guess */

	/* We have now computed (approx) (2 + x) / 3, which forms the basis
	for a few iterations of Newton's method */

	movl FPU_fsqrt_arg_2,%ecx /* ms word */

	/*
	* From our initial estimate, three iterations are enough to get us
	* to 30 bits or so. This will then allow two iterations at better
	* precision to complete the process.
	*/

	/* Compute (g + n/g)/2 at each iteration (g is the guess). */
	shrl %ecx /* Doing this first will prevent a divide */
	/* overflow later. */

	movl %ecx,%edx /* msw of the arg / 2 */
	divl %esi /* current estimate */
	shrl %esi /* divide by 2 */
	addl %eax,%esi /* the new estimate */

	movl %ecx,%edx
	divl %esi
	shrl %esi
	addl %eax,%esi

	movl %ecx,%edx
	divl %esi
	shrl %esi
	addl %eax,%esi

	/*
	* Now that an estimate accurate to about 30 bits has been obtained (in %esi),
	* we improve it to 60 bits or so.
	*
	* The strategy from now on is to compute new estimates from
	* guess := guess + (n - guess^2) / (2 * guess)
	*/

	/* First, find the square of the guess */
	movl %esi,%eax
	mull %esi
	/* guess^2 now in %edx:%eax */

	movl FPU_fsqrt_arg_1,%ecx
	subl %ecx,%eax
	movl FPU_fsqrt_arg_2,%ecx /* ms word of normalized n */
	sbbl %ecx,%edx
	jnc sqrt_stage_2_positive

	/* Subtraction gives a negative result,
	negate the result before division. */
	notl %edx
	notl %eax
	addl $1,%eax
	adcl $0,%edx

	divl %esi
	movl %eax,%ecx

	movl %edx,%eax
	divl %esi
	jmp sqrt_stage_2_finish

	sqrt_stage_2_positive:
	divl %esi
	movl %eax,%ecx

	movl %edx,%eax
	divl %esi

	notl %ecx
	notl %eax
	addl $1,%eax
	adcl $0,%ecx

	sqrt_stage_2_finish:
	sarl $1,%ecx /* divide by 2 */
	rcrl $1,%eax

	/* Form the new estimate in %esi:%edi */
	movl %eax,%edi
	addl %ecx,%esi

	jnz sqrt_stage_2_done /* result should be [1..2) */

	#ifdef PARANOID
	/* It should be possible to get here only if the arg is ffff....ffff */
	cmpl $0xffffffff,FPU_fsqrt_arg_1
	jnz sqrt_stage_2_error
	#endif /* PARANOID */

	/* The best rounded result. */
	xorl %eax,%eax
	decl %eax
	movl %eax,%edi
	movl %eax,%esi
	movl $0x7fffffff,%eax
	jmp sqrt_round_result

	#ifdef PARANOID
	sqrt_stage_2_error:
	pushl EX_INTERNAL\|0x213
	call EXCEPTION
	#endif /* PARANOID */

	sqrt_stage_2_done:

	/* Now the square root has been computed to better than 60 bits. */

	/* Find the square of the guess. */
	movl %edi,%eax /* ls word of guess */
	mull %edi
	movl %edx,FPU_accum_1

	movl %esi,%eax
	mull %esi
	movl %edx,FPU_accum_3
	movl %eax,FPU_accum_2

	movl %edi,%eax
	mull %esi
	addl %eax,FPU_accum_1
	adcl %edx,FPU_accum_2
	adcl $0,FPU_accum_3

	/* movl %esi,%eax */
	/* mull %edi */
	addl %eax,FPU_accum_1
	adcl %edx,FPU_accum_2
	adcl $0,FPU_accum_3

	/* guess^2 now in FPU_accum_3:FPU_accum_2:FPU_accum_1 */

	movl FPU_fsqrt_arg_0,%eax /* get normalized n */
	subl %eax,FPU_accum_1
	movl FPU_fsqrt_arg_1,%eax
	sbbl %eax,FPU_accum_2
	movl FPU_fsqrt_arg_2,%eax /* ms word of normalized n */
	sbbl %eax,FPU_accum_3
	jnc sqrt_stage_3_positive

	/* Subtraction gives a negative result,
	negate the result before division */
	notl FPU_accum_1
	notl FPU_accum_2
	notl FPU_accum_3
	addl $1,FPU_accum_1
	adcl $0,FPU_accum_2

	#ifdef PARANOID
	adcl $0,FPU_accum_3 /* This must be zero */
	jz sqrt_stage_3_no_error

	sqrt_stage_3_error:
	pushl EX_INTERNAL\|0x207
	call EXCEPTION

	sqrt_stage_3_no_error:
	#endif /* PARANOID */

	movl FPU_accum_2,%edx
	movl FPU_accum_1,%eax
	divl %esi
	movl %eax,%ecx

	movl %edx,%eax
	divl %esi

	sarl $1,%ecx /* divide by 2 */
	rcrl $1,%eax

	/* prepare to round the result */

	addl %ecx,%edi
	adcl $0,%esi

	jmp sqrt_stage_3_finished

	sqrt_stage_3_positive:
	movl FPU_accum_2,%edx
	movl FPU_accum_1,%eax
	divl %esi
	movl %eax,%ecx

	movl %edx,%eax
	divl %esi

	sarl $1,%ecx /* divide by 2 */
	rcrl $1,%eax

	/* prepare to round the result */

	notl %eax /* Negate the correction term */
	notl %ecx
	addl $1,%eax
	adcl $0,%ecx /* carry here ==> correction == 0 */
	adcl $0xffffffff,%esi

	addl %ecx,%edi
	adcl $0,%esi

	sqrt_stage_3_finished:

	/*
	* The result in %esi:%edi:%esi should be good to about 90 bits here,
	* and the rounding information here does not have sufficient accuracy
	* in a few rare cases.
	*/
	cmpl $0xffffffe0,%eax
	ja sqrt_near_exact_x

	cmpl $0x00000020,%eax
	jb sqrt_near_exact

	cmpl $0x7fffffe0,%eax
	jb sqrt_round_result

	cmpl $0x80000020,%eax
	jb sqrt_get_more_precision

	sqrt_round_result:
	/* Set up for rounding operations */
	movl %eax,%edx
	movl %esi,%eax
	movl %edi,%ebx
	movl PARAM1,%edi
	movw EXP_BIAS,EXP(%edi) /* Result is in [1.0 .. 2.0) */
	jmp fpu_reg_round


	sqrt_near_exact_x:
	/* First, the estimate must be rounded up. */
	addl $1,%edi
	adcl $0,%esi

	sqrt_near_exact:
	/*
	* This is an easy case because x^1/2 is monotonic.
	* We need just find the square of our estimate, compare it
	* with the argument, and deduce whether our estimate is
	* above, below, or exact. We use the fact that the estimate
	* is known to be accurate to about 90 bits.
	*/
	movl %edi,%eax /* ls word of guess */
	mull %edi
	movl %edx,%ebx /* 2nd ls word of square */
	movl %eax,%ecx /* ls word of square */

	movl %edi,%eax
	mull %esi
	addl %eax,%ebx
	addl %eax,%ebx

	#ifdef PARANOID
	cmp $0xffffffb0,%ebx
	jb sqrt_near_exact_ok

	cmp $0x00000050,%ebx
	ja sqrt_near_exact_ok

	pushl EX_INTERNAL\|0x214
	call EXCEPTION

	sqrt_near_exact_ok:
	#endif /* PARANOID */

	or %ebx,%ebx
	js sqrt_near_exact_small

	jnz sqrt_near_exact_large

	or %ebx,%edx
	jnz sqrt_near_exact_large

	/* Our estimate is exactly the right answer */
	xorl %eax,%eax
	jmp sqrt_round_result

	sqrt_near_exact_small:
	/* Our estimate is too small */
	movl $0x000000ff,%eax
	jmp sqrt_round_result

	sqrt_near_exact_large:
	/* Our estimate is too large, we need to decrement it */
	subl $1,%edi
	sbbl $0,%esi
	movl $0xffffff00,%eax
	jmp sqrt_round_result


	sqrt_get_more_precision:
	/* This case is almost the same as the above, except we start
	with an extra bit of precision in the estimate. */
	stc /* The extra bit. */
	rcll $1,%edi /* Shift the estimate left one bit */
	rcll $1,%esi

	movl %edi,%eax /* ls word of guess */
	mull %edi
	movl %edx,%ebx /* 2nd ls word of square */
	movl %eax,%ecx /* ls word of square */

	movl %edi,%eax
	mull %esi
	addl %eax,%ebx
	addl %eax,%ebx

	/* Put our estimate back to its original value */
	stc /* The ms bit. */
	rcrl $1,%esi /* Shift the estimate left one bit */
	rcrl $1,%edi

	#ifdef PARANOID
	cmp $0xffffff60,%ebx
	jb sqrt_more_prec_ok

	cmp $0x000000a0,%ebx
	ja sqrt_more_prec_ok

	pushl EX_INTERNAL\|0x215
	call EXCEPTION

	sqrt_more_prec_ok:
	#endif /* PARANOID */

	or %ebx,%ebx
	js sqrt_more_prec_small

	jnz sqrt_more_prec_large

	or %ebx,%ecx
	jnz sqrt_more_prec_large

	/* Our estimate is exactly the right answer */
	movl $0x80000000,%eax
	jmp sqrt_round_result

	sqrt_more_prec_small:
	/* Our estimate is too small */
	movl $0x800000ff,%eax
	jmp sqrt_round_result

	sqrt_more_prec_large:
	/* Our estimate is too large */
	movl $0x7fffff00,%eax
	jmp sqrt_round_result
	SYM_FUNC_END(wm_sqrt)