FPU: Improve accuracy in multiply-add almost-cancellation cases

There are two paths for multiply-add instructions; one where the product is larger or nearly the same as the addend, which does the addition/subtraction in the multiplier with 128-bit accuracy; the other is used when the addend is clearly larger, which shifts the product right before doing the addition/subtraction in 64-bit arithmetic. The threshold for the second path is that B_exp has to be greater than A_exp + C_exp + 1, the +1 being because the product mantissa can be greater than 2. This increases the +1 to +2 to make sure that the 128-bit path is used when there is any chance of cancellation of the high-order bits of the sum. With the +1 threshold we could still get close to cancellation when the mantissas of A and C were nearly 2 and the mantissa of B was 1. This improves accuracy and avoids the need to do a 120-bit subtraction in the second path. Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
2026-01-11 23:43:15 +00:00 · 2025-12-12 18:51:13 +11:00 · 2025-12-12 18:51:13 +11:00 · 6fe4b549f5
commit 6fe4b549f5
parent 80c81b58ef
3 changed files with 11 additions and 5 deletions
--- a/fpu.vhdl
+++ b/fpu.vhdl
@ -1786,14 +1786,16 @@ begin
                re_set_result <= '1';
                -- put b.exp into shift
                rs_sel1 <= RSH1_B;
-                if (r.a.exponent + r.c.exponent + 1) < r.b.exponent then
-                    -- addend is bigger, do multiply first
+                if (r.a.exponent + r.c.exponent + 2) < r.b.exponent then
+                    -- addend is definitely bigger, do multiply first
                    -- if subtracting, sign is opposite to initial estimate
                    f_to_multiply.valid <= '1';
                    v.first := '1';
                    v.state := FMADD_0;
                else
-                    -- product is bigger, shift B first
+                    -- product may be bigger, or the answer might be
+                    -- close to 0; shift B first so the multiplier does
+                    -- the add/subtract operation.
                    v.state := FMADD_1;
                end if;

@ -1961,8 +1963,8 @@ begin
                end if;

            when FMADD_1 =>
-                -- shift is b.exp, so new_exp is a.exp + c.exp - b.exp
-                -- product is bigger here
+                -- shift is b.exp, so new_exp is a.exp + c.exp - b.exp (>= -2)
+                -- product may bigger here
                -- shift B right and use it as the addend to the multiplier
                -- for subtract, multiplier does B - A * C
                re_sel2 <= REXP2_B;
@ -3342,6 +3344,8 @@ begin
        ci := '0';
        case opsel_c is
            when CIN_SUBEXT =>
+                -- Used with opsel_b = BIN_ADDSUBR, which will invert it if
+                -- r.subtract = 1, hence we use r.x here, rather than not r.x.
                ci := r.is_subtract and r.x;
            when CIN_ABSEXT =>
                ci := r.r(63) and (s_nz or r.x);
--- a/tests/fpu/fpu.c
+++ b/tests/fpu/fpu.c
@ -1622,6 +1622,8 @@ struct fmavals {
 	  0x014fd79870000001, 0x014fd79870000000, 0x814fd79870000001, 0x814fd79870000000 },
 	{ 0x00000000ffffffff, 0x1fc771af627f62ab, 0x8000000000000000, FPS_RN_ZERO,
 	  0x0000000000000000, 0x0000000000000000, 0x8000000000000000, 0x8000000000000000 },
+	{ 0x41efffffffe00000, 0xc1efffffffe00000, 0x43f0000000000000, FPS_RN_CEIL,
+	  0x41fffffffff00000, 0xc3ffffffffe00000, 0xc1fffffffff00000, 0x43ffffffffe00000 },
 };

 int test23(long arg)
--- a/tests/test_fpu.bin
+++ b/tests/test_fpu.bin