| From f3ed8bfe8a82af1870ddc8696ed4cc1d5aa6b441 Mon Sep 17 00:00:00 2001 |
| From: Rich Felker <dalias@aerifal.cx> |
| Date: Mon, 5 Aug 2019 18:41:47 -0400 |
| Subject: [PATCH] fix x87 stack imbalance in corner cases of i386 math asm |
| |
| commit 31c5fb80b9eae86f801be4f46025bc6532a554c5 introduced underflow |
| code paths for the i386 math asm, along with checks on the fpu status |
| word to skip the underflow-generation instructions if the underflow |
| flag was already raised. unfortunately, at least one such path, in |
| log1p, returned with 2 items on the x87 stack rather than just 1 item |
| for the return value. this is a violation of the ABI's calling |
| convention, and could cause subsequent floating point code to produce |
| NANs due to x87 stack overflow. if floating point results are used in |
| flow control, this can lead to runaway wrong code execution. |
| |
| rather than reviewing each "underflow already raised" code path for |
| correctness, remove them all. they're likely slower than just |
| performing the underflow code unconditionally, and significantly more |
| complex. |
| |
| all of this code should be ripped out and replaced by C source files |
| with inline asm. doing so would preclude this kind of error by having |
| the compiler perform all x87 stack register allocation and stack |
| manipulation, and would produce comparable or better code. however |
| such a change is a much larger project. |
| |
| Signed-off-by: Peter Korsgaard <peter@korsgaard.com> |
| --- |
| src/math/i386/asin.s | 10 ++-------- |
| src/math/i386/atan.s | 7 ++----- |
| src/math/i386/atan2.s | 5 +---- |
| src/math/i386/atan2f.s | 5 +---- |
| src/math/i386/atanf.s | 7 ++----- |
| src/math/i386/exp.s | 10 ++-------- |
| src/math/i386/log1p.s | 7 ++----- |
| src/math/i386/log1pf.s | 7 ++----- |
| 8 files changed, 14 insertions(+), 44 deletions(-) |
| |
| diff --git a/src/math/i386/asin.s b/src/math/i386/asin.s |
| index a9f691bf..920d967a 100644 |
| --- a/src/math/i386/asin.s |
| +++ b/src/math/i386/asin.s |
| @@ -7,13 +7,10 @@ asinf: |
| cmp $0x01000000,%eax |
| jae 1f |
| # subnormal x, return x with underflow |
| - fnstsw %ax |
| - and $16,%ax |
| - jnz 2f |
| fld %st(0) |
| fmul %st(1) |
| fstps 4(%esp) |
| -2: ret |
| + ret |
| |
| .global asinl |
| .type asinl,@function |
| @@ -30,11 +27,8 @@ asin: |
| cmp $0x00200000,%eax |
| jae 1f |
| # subnormal x, return x with underflow |
| - fnstsw %ax |
| - and $16,%ax |
| - jnz 2f |
| fsts 4(%esp) |
| -2: ret |
| + ret |
| 1: fld %st(0) |
| fld1 |
| fsub %st(0),%st(1) |
| diff --git a/src/math/i386/atan.s b/src/math/i386/atan.s |
| index d73137b2..a26feae1 100644 |
| --- a/src/math/i386/atan.s |
| +++ b/src/math/i386/atan.s |
| @@ -10,8 +10,5 @@ atan: |
| fpatan |
| ret |
| # subnormal x, return x with underflow |
| -1: fnstsw %ax |
| - and $16,%ax |
| - jnz 2f |
| - fsts 4(%esp) |
| -2: ret |
| +1: fsts 4(%esp) |
| + ret |
| diff --git a/src/math/i386/atan2.s b/src/math/i386/atan2.s |
| index a7d2979b..1fa0524d 100644 |
| --- a/src/math/i386/atan2.s |
| +++ b/src/math/i386/atan2.s |
| @@ -10,8 +10,5 @@ atan2: |
| cmp $0x00200000,%eax |
| jae 1f |
| # subnormal x, return x with underflow |
| - fnstsw %ax |
| - and $16,%ax |
| - jnz 1f |
| fsts 4(%esp) |
| -1: ret |
| + ret |
| diff --git a/src/math/i386/atan2f.s b/src/math/i386/atan2f.s |
| index 14b88ce5..0b264726 100644 |
| --- a/src/math/i386/atan2f.s |
| +++ b/src/math/i386/atan2f.s |
| @@ -10,10 +10,7 @@ atan2f: |
| cmp $0x01000000,%eax |
| jae 1f |
| # subnormal x, return x with underflow |
| - fnstsw %ax |
| - and $16,%ax |
| - jnz 1f |
| fld %st(0) |
| fmul %st(1) |
| fstps 4(%esp) |
| -1: ret |
| + ret |
| diff --git a/src/math/i386/atanf.s b/src/math/i386/atanf.s |
| index 8caddefa..893beac5 100644 |
| --- a/src/math/i386/atanf.s |
| +++ b/src/math/i386/atanf.s |
| @@ -10,10 +10,7 @@ atanf: |
| fpatan |
| ret |
| # subnormal x, return x with underflow |
| -1: fnstsw %ax |
| - and $16,%ax |
| - jnz 2f |
| - fld %st(0) |
| +1: fld %st(0) |
| fmul %st(1) |
| fstps 4(%esp) |
| -2: ret |
| + ret |
| diff --git a/src/math/i386/exp.s b/src/math/i386/exp.s |
| index c7aa5b6e..df87c497 100644 |
| --- a/src/math/i386/exp.s |
| +++ b/src/math/i386/exp.s |
| @@ -7,13 +7,10 @@ expm1f: |
| cmp $0x01000000,%eax |
| jae 1f |
| # subnormal x, return x with underflow |
| - fnstsw %ax |
| - and $16,%ax |
| - jnz 2f |
| fld %st(0) |
| fmul %st(1) |
| fstps 4(%esp) |
| -2: ret |
| + ret |
| |
| .global expm1l |
| .type expm1l,@function |
| @@ -30,11 +27,8 @@ expm1: |
| cmp $0x00200000,%eax |
| jae 1f |
| # subnormal x, return x with underflow |
| - fnstsw %ax |
| - and $16,%ax |
| - jnz 2f |
| fsts 4(%esp) |
| -2: ret |
| + ret |
| 1: fldl2e |
| fmulp |
| mov $0xc2820000,%eax |
| diff --git a/src/math/i386/log1p.s b/src/math/i386/log1p.s |
| index 6b6929c7..354f391a 100644 |
| --- a/src/math/i386/log1p.s |
| +++ b/src/math/i386/log1p.s |
| @@ -16,9 +16,6 @@ log1p: |
| fyl2x |
| ret |
| # subnormal x, return x with underflow |
| -2: fnstsw %ax |
| - and $16,%ax |
| - jnz 1f |
| - fsts 4(%esp) |
| +2: fsts 4(%esp) |
| fstp %st(1) |
| -1: ret |
| + ret |
| diff --git a/src/math/i386/log1pf.s b/src/math/i386/log1pf.s |
| index c0bcd30f..4d3484cd 100644 |
| --- a/src/math/i386/log1pf.s |
| +++ b/src/math/i386/log1pf.s |
| @@ -16,10 +16,7 @@ log1pf: |
| fyl2x |
| ret |
| # subnormal x, return x with underflow |
| -2: fnstsw %ax |
| - and $16,%ax |
| - jnz 1f |
| - fxch |
| +2: fxch |
| fmul %st(1) |
| fstps 4(%esp) |
| -1: ret |
| + ret |
| -- |
| 2.11.0 |
| |