From b122577a4e658fa8332cf6c4a3b6c04d0b3d56a8 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 12 Dec 2025 10:12:10 +1100 Subject: [PATCH] FPU: Be more careful about preserving low-order bits in multiply-add instrs Add code to check whether bits of S which don't get shifted into R are non-zero, and set X if they are, so that rounding in multiply-add instructions works correctly. This needs to be done after normalization in the case of very small results, where potentially all the non-zero bits in S do get shifted into R. Also fix an incorrect test case, and add another multiply-add test case. Signed-off-by: Paul Mackerras --- fpu.vhdl | 22 +++++++++++++++++++--- tests/fpu/fpu.c | 7 +++++-- tests/test_fpu.bin | Bin 33560 -> 33624 bytes 3 files changed, 24 insertions(+), 5 deletions(-) diff --git a/fpu.vhdl b/fpu.vhdl index 07617af..7d8060a 100644 --- a/fpu.vhdl +++ b/fpu.vhdl @@ -1003,6 +1003,7 @@ begin variable exp_huge : std_ulogic; variable clz : std_ulogic_vector(5 downto 0); variable set_x : std_ulogic; + variable set_xs : std_ulogic; variable mshift : signed(EXP_BITS-1 downto 0); variable need_check : std_ulogic; variable msb : std_ulogic; @@ -1056,6 +1057,7 @@ begin variable bneg : std_ulogic; variable ci : std_ulogic; variable rormr : std_ulogic_vector(63 downto 0); + variable sorms : std_ulogic_vector(55 downto 0); begin v := r; v.complete := '0'; @@ -1358,6 +1360,7 @@ begin invalid := '0'; zero_divide := '0'; set_x := '0'; + set_xs := '0'; qnan_result := '0'; set_a := '0'; set_a_exp := '0'; @@ -1931,6 +1934,8 @@ begin f_to_multiply.valid <= r.first; opsel_r <= RES_MULT; set_r := '1'; + opsel_s <= S_MULT; + set_s := '1'; if multiply_to_f.valid = '1' then v.state := FINISH; end if; @@ -1971,6 +1976,7 @@ begin when FMADD_2 => -- Product is potentially bigger here -- r.shift = addend exp - product exp + 64, r.r = r.b.mantissa + -- R contains B, S contains 0 set_s := '1'; opsel_s <= S_SHIFT; set_x := '1'; @@ -2408,9 +2414,6 @@ begin when FINISH => -- r.shift = 0 - if r.is_multiply = '1' and px_nz = '1' then - v.x := '1'; - end if; -- set shift to new_exp - min_exp (N.B. rs_norm overrides this) rs_sel1 <= RSH1_NE; rs_con2 <= RSCON2_MINEXP; @@ -2420,6 +2423,7 @@ begin v.state := NORMALIZE; else set_x := '1'; + set_xs := r.is_multiply; if exp_tiny = '1' then v.state := ROUND_UFLOW; elsif exp_huge = '1' and r.fpscr(FPSCR_OE) = '0' then @@ -2441,6 +2445,7 @@ begin rs_con2 <= RSCON2_MINEXP; rs_neg2 <= '1'; set_x := '1'; + set_xs := r.is_multiply; if exp_tiny = '1' then v.state := ROUND_UFLOW; elsif exp_huge = '1' and r.fpscr(FPSCR_OE) = '0' then @@ -2485,6 +2490,7 @@ begin re_sel2 <= REXP2_NE; re_set_result <= '1'; set_x := '1'; + set_xs := r.is_multiply; v.state := ROUNDING; when ROUND_OFLOW_DIS => @@ -3309,6 +3315,16 @@ begin end if; v.x := v.x or rormr(to_integer(unsigned(mshift(5 downto 0)))); end if; + -- Test if there are non-zero bits in S which won't get shifted into R + if set_xs = '1' and not is_X(r.shift) and r.shift < to_signed(56, EXP_BITS) then + if r.shift > to_signed(0, EXP_BITS) then + mshift := to_signed(55, EXP_BITS) - r.shift; + else + mshift := to_signed(55, EXP_BITS); + end if; + sorms := r.s or std_ulogic_vector(- signed(r.s)); + v.x := v.x or sorms(to_integer(unsigned(mshift(5 downto 0)))); + end if; asign := '0'; case opsel_a is when AIN_A => diff --git a/tests/fpu/fpu.c b/tests/fpu/fpu.c index 5f0131c..89fb44f 100644 --- a/tests/fpu/fpu.c +++ b/tests/fpu/fpu.c @@ -1543,9 +1543,9 @@ struct fmavals { /* +(1 + 2^-52) * +(1 + 2^-52) +- +1.0 -> +(2 + 2^-51), +2^-51, -(2 + 2^-51), -2^-51 */ { 0x3ff0000000000001, 0x3ff0000000000001, 0x3ff0000000000000, FPS_RN_NEAR, 0x4000000000000001, 0x3cc0000000000000, 0xc000000000000001, 0xbcc0000000000000 }, - /* +(1 + 3*2^-52) * +(1 + 2^-51) +- +1.0 -> +(2 + 2^-50), +5 * 2^-52 + 2^-101, -, - */ + /* +(1 + 3*2^-52) * +(1 + 2^-51) +- +1.0 -> +(2 + 3*2^-51), +5 * 2^-52 + 2^-101, -, - */ { 0x3ff0000000000003, 0x3ff0000000000002, 0x3ff0000000000000, FPS_RN_NEAR, - 0x4000000000000002, 0x3cd4000000000002, 0xc000000000000002, 0xbcd4000000000002 }, + 0x4000000000000003, 0x3cd4000000000002, 0xc000000000000003, 0xbcd4000000000002 }, /* +2.443e-77 * 2.828 +- 6.909e-77 -> -1.402e-93, +1.382e-76, +1.402e-93, -1.382e-76 */ { 0x3006a09e667f3bcc, 0x4006a09e667f3bcd, 0xb020000000000000, FPS_RN_NEAR, 0xaca765753908cd20, 0x3030000000000000, 0x2ca765753908cd20, 0xb030000000000000 }, @@ -1615,6 +1615,9 @@ struct fmavals { /* 1 * -1 + tiny -> -1 + delta, -1, 1 - delta, 1 */ { 0x3ff0000000000000, 0xbff0000000000000, 0x00000000b2200102, FPS_RN_CEIL, 0xbfefffffffffffff, 0xbff0000000000000, 0x3fefffffffffffff, 0x3ff0000000000000 }, + /* from random exec tests */ + { 0x43eff79000000000, 0x00000000000000ff, 0x0000000000000081, FPS_RN_CEIL, + 0x014fd79870000001, 0x014fd79870000000, 0x814fd79870000001, 0x814fd79870000000 }, }; int test23(long arg) diff --git a/tests/test_fpu.bin b/tests/test_fpu.bin index 229e70f68afa50d2af321912e16afa1f774477ee..e6a21b8d72d9e5f5afd9e0892d5f88ed437c4233 100755 GIT binary patch delta 334 zcmbQy#&n~NX@iFVW58rj0d3iU%}SOXo1H8f7*3vMU^sRA!vFu?3=9kjAnY-DqJT7` z$K2TI=svOP8{iUe~pc1$jnd(Ff#adMLU zJjNZH85MrBf^@2eGcrjmpKPb8wz)?A03(yZ;>~uNWlW3(o40BkgH*gUaA#!FSiRZE z$cm9kVgBYk<2FXd1(Uy;{AN;^viXxK6C-2AGk{Kr+!1NN(1( zy3WX{%wVVU;$M@)jL9D@r6+6H>|j!uy!nyMHYq`7pjUvvfeB1qv0>i4GW;^D&4lmo so&SS{7#JF%G(-sl10w@N!HnzvKng-b`5CNC8>2TI=svN<*@iUe~pdQ2{rd(CuW!sI0R zd5jgC85MrBf^@2eGcw&+HrY;7ZF7zK0Y;_|i#FS7mN79JY~HGE3{vsZz@3rl#j4Fl zMpleW59V#oGj3yKOql%DY=G!!NT=zRD=P J`Fw1rDgYd{U)lfw