| /* SPDX-License-Identifier: GPL-2.0 */ |
| .file "wm_sqrt.S" |
| /*---------------------------------------------------------------------------+ |
| | wm_sqrt.S | |
| | | |
| | Fixed point arithmetic square root evaluation. | |
| | | |
| | Copyright (C) 1992,1993,1995,1997 | |
| | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | |
| | Australia. E-mail billm@suburbia.net | |
| | | |
| | Call from C as: | |
| | int wm_sqrt(FPU_REG *n, unsigned int control_word) | |
| | | |
| +---------------------------------------------------------------------------*/ |
| |
| /*---------------------------------------------------------------------------+ |
| | wm_sqrt(FPU_REG *n, unsigned int control_word) | |
| | returns the square root of n in n. | |
| | | |
| | Use Newton's method to compute the square root of a number, which must | |
| | be in the range [1.0 .. 4.0), to 64 bits accuracy. | |
| | Does not check the sign or tag of the argument. | |
| | Sets the exponent, but not the sign or tag of the result. | |
| | | |
| | The guess is kept in %esi:%edi | |
| +---------------------------------------------------------------------------*/ |
| |
| #include "exception.h" |
| #include "fpu_emu.h" |
| |
| |
| #ifndef NON_REENTRANT_FPU |
| /* Local storage on the stack: */ |
| #define FPU_accum_3 -4(%ebp) /* ms word */ |
| #define FPU_accum_2 -8(%ebp) |
| #define FPU_accum_1 -12(%ebp) |
| #define FPU_accum_0 -16(%ebp) |
| |
| /* |
| * The de-normalised argument: |
| * sq_2 sq_1 sq_0 |
| * b b b b b b b ... b b b b b b .... b b b b 0 0 0 ... 0 |
| * ^ binary point here |
| */ |
| #define FPU_fsqrt_arg_2 -20(%ebp) /* ms word */ |
| #define FPU_fsqrt_arg_1 -24(%ebp) |
| #define FPU_fsqrt_arg_0 -28(%ebp) /* ls word, at most the ms bit is set */ |
| |
| #else |
| /* Local storage in a static area: */ |
| .data |
| .align 4,0 |
| FPU_accum_3: |
| .long 0 /* ms word */ |
| FPU_accum_2: |
| .long 0 |
| FPU_accum_1: |
| .long 0 |
| FPU_accum_0: |
| .long 0 |
| |
| /* The de-normalised argument: |
| sq_2 sq_1 sq_0 |
| b b b b b b b ... b b b b b b .... b b b b 0 0 0 ... 0 |
| ^ binary point here |
| */ |
| FPU_fsqrt_arg_2: |
| .long 0 /* ms word */ |
| FPU_fsqrt_arg_1: |
| .long 0 |
| FPU_fsqrt_arg_0: |
| .long 0 /* ls word, at most the ms bit is set */ |
| #endif /* NON_REENTRANT_FPU */ |
| |
| |
| .text |
| SYM_FUNC_START(wm_sqrt) |
| pushl %ebp |
| movl %esp,%ebp |
| #ifndef NON_REENTRANT_FPU |
| subl $28,%esp |
| #endif /* NON_REENTRANT_FPU */ |
| pushl %esi |
| pushl %edi |
| pushl %ebx |
| |
| movl PARAM1,%esi |
| |
| movl SIGH(%esi),%eax |
| movl SIGL(%esi),%ecx |
| xorl %edx,%edx |
| |
| /* We use a rough linear estimate for the first guess.. */ |
| |
| cmpw EXP_BIAS,EXP(%esi) |
| jnz sqrt_arg_ge_2 |
| |
| shrl $1,%eax /* arg is in the range [1.0 .. 2.0) */ |
| rcrl $1,%ecx |
| rcrl $1,%edx |
| |
| sqrt_arg_ge_2: |
| /* From here on, n is never accessed directly again until it is |
| replaced by the answer. */ |
| |
| movl %eax,FPU_fsqrt_arg_2 /* ms word of n */ |
| movl %ecx,FPU_fsqrt_arg_1 |
| movl %edx,FPU_fsqrt_arg_0 |
| |
| /* Make a linear first estimate */ |
| shrl $1,%eax |
| addl $0x40000000,%eax |
| movl $0xaaaaaaaa,%ecx |
| mull %ecx |
| shll %edx /* max result was 7fff... */ |
| testl $0x80000000,%edx /* but min was 3fff... */ |
| jnz sqrt_prelim_no_adjust |
| |
| movl $0x80000000,%edx /* round up */ |
| |
| sqrt_prelim_no_adjust: |
| movl %edx,%esi /* Our first guess */ |
| |
| /* We have now computed (approx) (2 + x) / 3, which forms the basis |
| for a few iterations of Newton's method */ |
| |
| movl FPU_fsqrt_arg_2,%ecx /* ms word */ |
| |
| /* |
| * From our initial estimate, three iterations are enough to get us |
| * to 30 bits or so. This will then allow two iterations at better |
| * precision to complete the process. |
| */ |
| |
| /* Compute (g + n/g)/2 at each iteration (g is the guess). */ |
| shrl %ecx /* Doing this first will prevent a divide */ |
| /* overflow later. */ |
| |
| movl %ecx,%edx /* msw of the arg / 2 */ |
| divl %esi /* current estimate */ |
| shrl %esi /* divide by 2 */ |
| addl %eax,%esi /* the new estimate */ |
| |
| movl %ecx,%edx |
| divl %esi |
| shrl %esi |
| addl %eax,%esi |
| |
| movl %ecx,%edx |
| divl %esi |
| shrl %esi |
| addl %eax,%esi |
| |
| /* |
| * Now that an estimate accurate to about 30 bits has been obtained (in %esi), |
| * we improve it to 60 bits or so. |
| * |
| * The strategy from now on is to compute new estimates from |
| * guess := guess + (n - guess^2) / (2 * guess) |
| */ |
| |
| /* First, find the square of the guess */ |
| movl %esi,%eax |
| mull %esi |
| /* guess^2 now in %edx:%eax */ |
| |
| movl FPU_fsqrt_arg_1,%ecx |
| subl %ecx,%eax |
| movl FPU_fsqrt_arg_2,%ecx /* ms word of normalized n */ |
| sbbl %ecx,%edx |
| jnc sqrt_stage_2_positive |
| |
| /* Subtraction gives a negative result, |
| negate the result before division. */ |
| notl %edx |
| notl %eax |
| addl $1,%eax |
| adcl $0,%edx |
| |
| divl %esi |
| movl %eax,%ecx |
| |
| movl %edx,%eax |
| divl %esi |
| jmp sqrt_stage_2_finish |
| |
| sqrt_stage_2_positive: |
| divl %esi |
| movl %eax,%ecx |
| |
| movl %edx,%eax |
| divl %esi |
| |
| notl %ecx |
| notl %eax |
| addl $1,%eax |
| adcl $0,%ecx |
| |
| sqrt_stage_2_finish: |
| sarl $1,%ecx /* divide by 2 */ |
| rcrl $1,%eax |
| |
| /* Form the new estimate in %esi:%edi */ |
| movl %eax,%edi |
| addl %ecx,%esi |
| |
| jnz sqrt_stage_2_done /* result should be [1..2) */ |
| |
| #ifdef PARANOID |
| /* It should be possible to get here only if the arg is ffff....ffff */ |
| cmpl $0xffffffff,FPU_fsqrt_arg_1 |
| jnz sqrt_stage_2_error |
| #endif /* PARANOID */ |
| |
| /* The best rounded result. */ |
| xorl %eax,%eax |
| decl %eax |
| movl %eax,%edi |
| movl %eax,%esi |
| movl $0x7fffffff,%eax |
| jmp sqrt_round_result |
| |
| #ifdef PARANOID |
| sqrt_stage_2_error: |
| pushl EX_INTERNAL|0x213 |
| call EXCEPTION |
| #endif /* PARANOID */ |
| |
| sqrt_stage_2_done: |
| |
| /* Now the square root has been computed to better than 60 bits. */ |
| |
| /* Find the square of the guess. */ |
| movl %edi,%eax /* ls word of guess */ |
| mull %edi |
| movl %edx,FPU_accum_1 |
| |
| movl %esi,%eax |
| mull %esi |
| movl %edx,FPU_accum_3 |
| movl %eax,FPU_accum_2 |
| |
| movl %edi,%eax |
| mull %esi |
| addl %eax,FPU_accum_1 |
| adcl %edx,FPU_accum_2 |
| adcl $0,FPU_accum_3 |
| |
| /* movl %esi,%eax */ |
| /* mull %edi */ |
| addl %eax,FPU_accum_1 |
| adcl %edx,FPU_accum_2 |
| adcl $0,FPU_accum_3 |
| |
| /* guess^2 now in FPU_accum_3:FPU_accum_2:FPU_accum_1 */ |
| |
| movl FPU_fsqrt_arg_0,%eax /* get normalized n */ |
| subl %eax,FPU_accum_1 |
| movl FPU_fsqrt_arg_1,%eax |
| sbbl %eax,FPU_accum_2 |
| movl FPU_fsqrt_arg_2,%eax /* ms word of normalized n */ |
| sbbl %eax,FPU_accum_3 |
| jnc sqrt_stage_3_positive |
| |
| /* Subtraction gives a negative result, |
| negate the result before division */ |
| notl FPU_accum_1 |
| notl FPU_accum_2 |
| notl FPU_accum_3 |
| addl $1,FPU_accum_1 |
| adcl $0,FPU_accum_2 |
| |
| #ifdef PARANOID |
| adcl $0,FPU_accum_3 /* This must be zero */ |
| jz sqrt_stage_3_no_error |
| |
| sqrt_stage_3_error: |
| pushl EX_INTERNAL|0x207 |
| call EXCEPTION |
| |
| sqrt_stage_3_no_error: |
| #endif /* PARANOID */ |
| |
| movl FPU_accum_2,%edx |
| movl FPU_accum_1,%eax |
| divl %esi |
| movl %eax,%ecx |
| |
| movl %edx,%eax |
| divl %esi |
| |
| sarl $1,%ecx /* divide by 2 */ |
| rcrl $1,%eax |
| |
| /* prepare to round the result */ |
| |
| addl %ecx,%edi |
| adcl $0,%esi |
| |
| jmp sqrt_stage_3_finished |
| |
| sqrt_stage_3_positive: |
| movl FPU_accum_2,%edx |
| movl FPU_accum_1,%eax |
| divl %esi |
| movl %eax,%ecx |
| |
| movl %edx,%eax |
| divl %esi |
| |
| sarl $1,%ecx /* divide by 2 */ |
| rcrl $1,%eax |
| |
| /* prepare to round the result */ |
| |
| notl %eax /* Negate the correction term */ |
| notl %ecx |
| addl $1,%eax |
| adcl $0,%ecx /* carry here ==> correction == 0 */ |
| adcl $0xffffffff,%esi |
| |
| addl %ecx,%edi |
| adcl $0,%esi |
| |
| sqrt_stage_3_finished: |
| |
| /* |
| * The result in %esi:%edi:%esi should be good to about 90 bits here, |
| * and the rounding information here does not have sufficient accuracy |
| * in a few rare cases. |
| */ |
| cmpl $0xffffffe0,%eax |
| ja sqrt_near_exact_x |
| |
| cmpl $0x00000020,%eax |
| jb sqrt_near_exact |
| |
| cmpl $0x7fffffe0,%eax |
| jb sqrt_round_result |
| |
| cmpl $0x80000020,%eax |
| jb sqrt_get_more_precision |
| |
| sqrt_round_result: |
| /* Set up for rounding operations */ |
| movl %eax,%edx |
| movl %esi,%eax |
| movl %edi,%ebx |
| movl PARAM1,%edi |
| movw EXP_BIAS,EXP(%edi) /* Result is in [1.0 .. 2.0) */ |
| jmp fpu_reg_round |
| |
| |
| sqrt_near_exact_x: |
| /* First, the estimate must be rounded up. */ |
| addl $1,%edi |
| adcl $0,%esi |
| |
| sqrt_near_exact: |
| /* |
| * This is an easy case because x^1/2 is monotonic. |
| * We need just find the square of our estimate, compare it |
| * with the argument, and deduce whether our estimate is |
| * above, below, or exact. We use the fact that the estimate |
| * is known to be accurate to about 90 bits. |
| */ |
| movl %edi,%eax /* ls word of guess */ |
| mull %edi |
| movl %edx,%ebx /* 2nd ls word of square */ |
| movl %eax,%ecx /* ls word of square */ |
| |
| movl %edi,%eax |
| mull %esi |
| addl %eax,%ebx |
| addl %eax,%ebx |
| |
| #ifdef PARANOID |
| cmp $0xffffffb0,%ebx |
| jb sqrt_near_exact_ok |
| |
| cmp $0x00000050,%ebx |
| ja sqrt_near_exact_ok |
| |
| pushl EX_INTERNAL|0x214 |
| call EXCEPTION |
| |
| sqrt_near_exact_ok: |
| #endif /* PARANOID */ |
| |
| or %ebx,%ebx |
| js sqrt_near_exact_small |
| |
| jnz sqrt_near_exact_large |
| |
| or %ebx,%edx |
| jnz sqrt_near_exact_large |
| |
| /* Our estimate is exactly the right answer */ |
| xorl %eax,%eax |
| jmp sqrt_round_result |
| |
| sqrt_near_exact_small: |
| /* Our estimate is too small */ |
| movl $0x000000ff,%eax |
| jmp sqrt_round_result |
| |
| sqrt_near_exact_large: |
| /* Our estimate is too large, we need to decrement it */ |
| subl $1,%edi |
| sbbl $0,%esi |
| movl $0xffffff00,%eax |
| jmp sqrt_round_result |
| |
| |
| sqrt_get_more_precision: |
| /* This case is almost the same as the above, except we start |
| with an extra bit of precision in the estimate. */ |
| stc /* The extra bit. */ |
| rcll $1,%edi /* Shift the estimate left one bit */ |
| rcll $1,%esi |
| |
| movl %edi,%eax /* ls word of guess */ |
| mull %edi |
| movl %edx,%ebx /* 2nd ls word of square */ |
| movl %eax,%ecx /* ls word of square */ |
| |
| movl %edi,%eax |
| mull %esi |
| addl %eax,%ebx |
| addl %eax,%ebx |
| |
| /* Put our estimate back to its original value */ |
| stc /* The ms bit. */ |
| rcrl $1,%esi /* Shift the estimate left one bit */ |
| rcrl $1,%edi |
| |
| #ifdef PARANOID |
| cmp $0xffffff60,%ebx |
| jb sqrt_more_prec_ok |
| |
| cmp $0x000000a0,%ebx |
| ja sqrt_more_prec_ok |
| |
| pushl EX_INTERNAL|0x215 |
| call EXCEPTION |
| |
| sqrt_more_prec_ok: |
| #endif /* PARANOID */ |
| |
| or %ebx,%ebx |
| js sqrt_more_prec_small |
| |
| jnz sqrt_more_prec_large |
| |
| or %ebx,%ecx |
| jnz sqrt_more_prec_large |
| |
| /* Our estimate is exactly the right answer */ |
| movl $0x80000000,%eax |
| jmp sqrt_round_result |
| |
| sqrt_more_prec_small: |
| /* Our estimate is too small */ |
| movl $0x800000ff,%eax |
| jmp sqrt_round_result |
| |
| sqrt_more_prec_large: |
| /* Our estimate is too large */ |
| movl $0x7fffff00,%eax |
| jmp sqrt_round_result |
| SYM_FUNC_END(wm_sqrt) |