From 80c81b58efeb7d77e6b300c813b234d18af2c92a Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 12 Dec 2025 16:44:43 +1100 Subject: [PATCH] FPU: Generate correct result sign when B is denormal If a subtraction A - B is done where A is in normalized form with an exponent of -1022, and B is denormal, an inconsistency arises between the comparison of the raw exponents in the first cycle, which sees A.exp (0x001) > B.exp (0x000), and the comparison in DO_FADD state, which sees r.a.exponent (-1022) = r.b.exponent (-1022). Conseqently we get r.add_bsmall = 0 and the subtraction is done the wrong way around, yielding the wrong sign for the result. Fix this by setting r.add_bsmall according to the comparison of raw exponents in the first cycle and then using it in DO_FADD state. Also add a test case for this. Signed-off-by: Paul Mackerras --- fpu.vhdl | 15 ++++---- tests/fpu/fpu.c | 92 +++++++++++++++++++++++---------------------- tests/test_fpu.bin | Bin 33688 -> 34432 bytes 3 files changed, 55 insertions(+), 52 deletions(-) diff --git a/fpu.vhdl b/fpu.vhdl index 48c021d..3bc7b3e 100644 --- a/fpu.vhdl +++ b/fpu.vhdl @@ -1102,6 +1102,7 @@ begin v.is_addition := '0'; v.is_subtract := '0'; v.is_inverse := '0'; + v.add_bsmall := '0'; v.do_renorm_b := '0'; fpin_a := '0'; fpin_b := '0'; @@ -1140,6 +1141,8 @@ begin v.result_sign := e_in.fra(63); if unsigned(e_in.fra(62 downto 52)) <= unsigned(e_in.frb(62 downto 52)) then v.result_sign := e_in.frb(63) xnor e_in.insn(1); + else + v.add_bsmall := '1'; end if; v.is_subtract := not (e_in.fra(63) xor e_in.frb(63) xor e_in.insn(1)); when "11001" => -- fmul @@ -1255,7 +1258,6 @@ begin end case; v.tiny := '0'; v.denorm := '0'; - v.add_bsmall := '0'; v.int_ovf := '0'; v.div_close := '0'; @@ -1705,15 +1707,13 @@ begin rs_sel1 <= RSH1_B; rs_neg1 <= '1'; rs_sel2 <= RSH2_A; - v.add_bsmall := '0'; - if r.a.exponent = r.b.exponent then + if r.add_bsmall = '1' then + v.state := ADD_1; + elsif r.a.exponent = r.b.exponent then v.state := ADD_2B; - elsif r.a.exponent < r.b.exponent then + elsif v.add_bsmall = '0' then v.longmask := '0'; v.state := ADD_SHIFT; - else - v.add_bsmall := '1'; - v.state := ADD_1; end if; when DO_FMUL => @@ -1856,6 +1856,7 @@ begin re_sel2 <= REXP2_B; re_set_result <= '1'; -- set shift to b.exp - a.exp + -- (N.B., shift can be 0 if B is denorm and A's exp is -1022) rs_sel1 <= RSH1_B; rs_sel2 <= RSH2_A; rs_neg2 <= '1'; diff --git a/tests/fpu/fpu.c b/tests/fpu/fpu.c index ccf07f8..b72e069 100644 --- a/tests/fpu/fpu.c +++ b/tests/fpu/fpu.c @@ -975,51 +975,53 @@ struct addvals { unsigned long val_b; unsigned long sum; unsigned long diff; + unsigned long fpscr; } addvals[] = { - { 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }, - { 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x0000000000000000 }, - { 0x3fdfffffffffffff, 0x0000000000000000, 0x3fdfffffffffffff, 0x3fdfffffffffffff }, - { 0x3ff0000000000000, 0x3ff0000000000000, 0x4000000000000000, 0x0000000000000000 }, - { 0xbff0000000000000, 0xbff0000000000000, 0xc000000000000000, 0x0000000000000000 }, - { 0x402123456789abcd, 0x4021000000000000, 0x403111a2b3c4d5e6, 0x3fb1a2b3c4d5e680 }, - { 0x4061200000000000, 0x406123456789abcd, 0x407121a2b3c4d5e6, 0xbfba2b3c4d5e6800 }, - { 0x4061230000000000, 0x3fa4560000000000, 0x4061244560000000, 0x406121baa0000000 }, - { 0xc061230000000000, 0x3fa4560000000000, 0xc06121baa0000000, 0xc061244560000000 }, - { 0x4061230000000000, 0xbfa4560000000000, 0x406121baa0000000, 0x4061244560000000 }, - { 0xc061230000000000, 0xbfa4560000000000, 0xc061244560000000, 0xc06121baa0000000 }, - { 0x3fa1230000000000, 0x4064560000000000, 0x4064571230000000, 0xc06454edd0000000 }, - { 0xbfa1230000000000, 0x4064560000000000, 0x406454edd0000000, 0xc064571230000000 }, - { 0x3fa1230000000000, 0xc064560000000000, 0xc06454edd0000000, 0x4064571230000000 }, - { 0xbfa1230000000000, 0xc064560000000000, 0xc064571230000000, 0x406454edd0000000 }, - { 0x6780000000000001, 0x6470000000000000, 0x6780000000000009, 0x677ffffffffffff2 }, - { 0x6780000000000001, 0x6460000000000000, 0x6780000000000005, 0x677ffffffffffffa }, - { 0x6780000000000001, 0x6450000000000000, 0x6780000000000003, 0x677ffffffffffffe }, - { 0x6780000000000001, 0x6440000000000000, 0x6780000000000002, 0x6780000000000000 }, - { 0x7ff8888888888888, 0x7ff9999999999999, 0x7ff8888888888888, 0x7ff8888888888888 }, - { 0xfff8888888888888, 0x7ff9999999999999, 0xfff8888888888888, 0xfff8888888888888 }, - { 0x7ff8888888888888, 0x7ff0000000000000, 0x7ff8888888888888, 0x7ff8888888888888 }, - { 0x7ff8888888888888, 0x0000000000000000, 0x7ff8888888888888, 0x7ff8888888888888 }, - { 0x7ff8888888888888, 0x0001111111111111, 0x7ff8888888888888, 0x7ff8888888888888 }, - { 0x7ff8888888888888, 0x3ff0000000000000, 0x7ff8888888888888, 0x7ff8888888888888 }, - { 0x7ff0000000000000, 0x7ff9999999999999, 0x7ff9999999999999, 0x7ff9999999999999 }, - { 0x7ff0000000000000, 0x7ff0000000000000, 0x7ff0000000000000, 0x7ff8000000000000 }, - { 0x7ff0000000000000, 0xfff0000000000000, 0x7ff8000000000000, 0x7ff0000000000000 }, - { 0x7ff0000000000000, 0x0000000000000000, 0x7ff0000000000000, 0x7ff0000000000000 }, - { 0x7ff0000000000000, 0x8000000000000000, 0x7ff0000000000000, 0x7ff0000000000000 }, - { 0x7ff0000000000000, 0x8002222222222222, 0x7ff0000000000000, 0x7ff0000000000000 }, - { 0x7ff0000000000000, 0xc002222222222222, 0x7ff0000000000000, 0x7ff0000000000000 }, - { 0x0000000000000000, 0x7ff9999999999999, 0x7ff9999999999999, 0x7ff9999999999999 }, - { 0x0000000000000000, 0x7ff0000000000000, 0x7ff0000000000000, 0xfff0000000000000 }, - { 0x8000000000000000, 0x7ff0000000000000, 0x7ff0000000000000, 0xfff0000000000000 }, - { 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }, - { 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }, - { 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x0000000000000000 }, - { 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x0000000000000000 }, - { 0x8002222222222222, 0x0001111111111111, 0x8001111111111111, 0x8003333333333333 }, - { 0x0000022222222222, 0x0000111111111111, 0x0000133333333333, 0x80000eeeeeeeeeef }, - { 0x401ffffffbfffefe, 0x406b8265196bd89e, 0x406c8265194bd896, 0xc06a8265198bd8a6 }, - { 0x4030020000000004, 0xbf110001ffffffff, 0x403001fbbfff8004, 0x4030020440008004 }, - { 0x3fdfffffffffffff, 0x3fe0000000000000, 0x3ff0000000000000, 0xbc90000000000000 }, + { 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, FPS_RN_NEAR }, + { 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x0000000000000000, FPS_RN_NEAR }, + { 0x3fdfffffffffffff, 0x0000000000000000, 0x3fdfffffffffffff, 0x3fdfffffffffffff, FPS_RN_NEAR }, + { 0x3ff0000000000000, 0x3ff0000000000000, 0x4000000000000000, 0x0000000000000000, FPS_RN_NEAR }, + { 0xbff0000000000000, 0xbff0000000000000, 0xc000000000000000, 0x0000000000000000, FPS_RN_NEAR }, + { 0x402123456789abcd, 0x4021000000000000, 0x403111a2b3c4d5e6, 0x3fb1a2b3c4d5e680, FPS_RN_NEAR }, + { 0x4061200000000000, 0x406123456789abcd, 0x407121a2b3c4d5e6, 0xbfba2b3c4d5e6800, FPS_RN_NEAR }, + { 0x4061230000000000, 0x3fa4560000000000, 0x4061244560000000, 0x406121baa0000000, FPS_RN_NEAR }, + { 0xc061230000000000, 0x3fa4560000000000, 0xc06121baa0000000, 0xc061244560000000, FPS_RN_NEAR }, + { 0x4061230000000000, 0xbfa4560000000000, 0x406121baa0000000, 0x4061244560000000, FPS_RN_NEAR }, + { 0xc061230000000000, 0xbfa4560000000000, 0xc061244560000000, 0xc06121baa0000000, FPS_RN_NEAR }, + { 0x3fa1230000000000, 0x4064560000000000, 0x4064571230000000, 0xc06454edd0000000, FPS_RN_NEAR }, + { 0xbfa1230000000000, 0x4064560000000000, 0x406454edd0000000, 0xc064571230000000, FPS_RN_NEAR }, + { 0x3fa1230000000000, 0xc064560000000000, 0xc06454edd0000000, 0x4064571230000000, FPS_RN_NEAR }, + { 0xbfa1230000000000, 0xc064560000000000, 0xc064571230000000, 0x406454edd0000000, FPS_RN_NEAR }, + { 0x6780000000000001, 0x6470000000000000, 0x6780000000000009, 0x677ffffffffffff2, FPS_RN_NEAR }, + { 0x6780000000000001, 0x6460000000000000, 0x6780000000000005, 0x677ffffffffffffa, FPS_RN_NEAR }, + { 0x6780000000000001, 0x6450000000000000, 0x6780000000000003, 0x677ffffffffffffe, FPS_RN_NEAR }, + { 0x6780000000000001, 0x6440000000000000, 0x6780000000000002, 0x6780000000000000, FPS_RN_NEAR }, + { 0x7ff8888888888888, 0x7ff9999999999999, 0x7ff8888888888888, 0x7ff8888888888888, FPS_RN_NEAR }, + { 0xfff8888888888888, 0x7ff9999999999999, 0xfff8888888888888, 0xfff8888888888888, FPS_RN_NEAR }, + { 0x7ff8888888888888, 0x7ff0000000000000, 0x7ff8888888888888, 0x7ff8888888888888, FPS_RN_NEAR }, + { 0x7ff8888888888888, 0x0000000000000000, 0x7ff8888888888888, 0x7ff8888888888888, FPS_RN_NEAR }, + { 0x7ff8888888888888, 0x0001111111111111, 0x7ff8888888888888, 0x7ff8888888888888, FPS_RN_NEAR }, + { 0x7ff8888888888888, 0x3ff0000000000000, 0x7ff8888888888888, 0x7ff8888888888888, FPS_RN_NEAR }, + { 0x7ff0000000000000, 0x7ff9999999999999, 0x7ff9999999999999, 0x7ff9999999999999, FPS_RN_NEAR }, + { 0x7ff0000000000000, 0x7ff0000000000000, 0x7ff0000000000000, 0x7ff8000000000000, FPS_RN_NEAR }, + { 0x7ff0000000000000, 0xfff0000000000000, 0x7ff8000000000000, 0x7ff0000000000000, FPS_RN_NEAR }, + { 0x7ff0000000000000, 0x0000000000000000, 0x7ff0000000000000, 0x7ff0000000000000, FPS_RN_NEAR }, + { 0x7ff0000000000000, 0x8000000000000000, 0x7ff0000000000000, 0x7ff0000000000000, FPS_RN_NEAR }, + { 0x7ff0000000000000, 0x8002222222222222, 0x7ff0000000000000, 0x7ff0000000000000, FPS_RN_NEAR }, + { 0x7ff0000000000000, 0xc002222222222222, 0x7ff0000000000000, 0x7ff0000000000000, FPS_RN_NEAR }, + { 0x0000000000000000, 0x7ff9999999999999, 0x7ff9999999999999, 0x7ff9999999999999, FPS_RN_NEAR }, + { 0x0000000000000000, 0x7ff0000000000000, 0x7ff0000000000000, 0xfff0000000000000, FPS_RN_NEAR }, + { 0x8000000000000000, 0x7ff0000000000000, 0x7ff0000000000000, 0xfff0000000000000, FPS_RN_NEAR }, + { 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, FPS_RN_NEAR }, + { 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, FPS_RN_NEAR }, + { 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x0000000000000000, FPS_RN_NEAR }, + { 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x0000000000000000, FPS_RN_NEAR }, + { 0x8002222222222222, 0x0001111111111111, 0x8001111111111111, 0x8003333333333333, FPS_RN_NEAR }, + { 0x0000022222222222, 0x0000111111111111, 0x0000133333333333, 0x80000eeeeeeeeeef, FPS_RN_NEAR }, + { 0x401ffffffbfffefe, 0x406b8265196bd89e, 0x406c8265194bd896, 0xc06a8265198bd8a6, FPS_RN_NEAR }, + { 0x4030020000000004, 0xbf110001ffffffff, 0x403001fbbfff8004, 0x4030020440008004, FPS_RN_NEAR }, + { 0x3fdfffffffffffff, 0x3fe0000000000000, 0x3ff0000000000000, 0xbc90000000000000, FPS_RN_NEAR }, + { 0x001000100010000f, 0x00000000000000ff, 0x001000100010010e, 0x00100010000fff10, FPS_RN_CEIL }, }; int test13(long arg) @@ -1029,8 +1031,8 @@ int test13(long arg) struct addvals *vp = addvals; unsigned long fpscr; - set_fpscr(FPS_RN_NEAR); for (i = 0; i < sizeof(addvals) / sizeof(addvals[0]); ++i, ++vp) { + set_fpscr(vp->fpscr); asm("lfd 5,0(%0); lfd 6,8(%0); fadd 7,5,6; fsub 8,5,6; stfd 7,0(%1); stfd 8,8(%1)" : : "b" (&vp->val_a), "b" (results) : "memory"); fpscr = get_fpscr(); diff --git a/tests/test_fpu.bin b/tests/test_fpu.bin index 2a7845bd7939f18fa5a2444a80996f887ced14ab..ed714b776907036c03b56b2d80d67a521a382909 100755 GIT binary patch delta 1459 zcmbVKT}V@582--Z&wdC!Y3TA|9T%->8Maa~a~&s?tyobYB?PnF&gQ0*gtD+am8b=2 z?Y$ZFx4MXs8p^42X`_pNp`V+&Fl<3cix|=$hf2yMwAlzB`F z*D;9qB0@&xd}iiDF1J;Rr6bjjif&T>kkme!d8X5PRW0dF3?ei!bL$UU*2@#^^h{jS z4Dz5M5uVVi@Z@db8~uVZkBXS=Bh7=^k~TalLVanql}yiIvLV?ZgIcQxUgb`({a@Y= z=E*rYR%)*}KqRubEMHLNd_|rvwVHcH_i?E&X*gHvG!KdUcq!}^1>2&XW@-FE>##vy zV_6%8*$Hq}*@c1SDGKktCb+5&VjRCMh%1~0cyda3U%QFIJCcIaxt_wUSB0K60m`wu z7`0x>RnAL&5lT0*=kGQ$=n6TmOf9PXT#Md37;8e`;Op96$_L$bjV1@0ZLC2XS8jl* zx-XPNxLMy~z`Q7YYVu%wLIf9UH^)WWDXHaI5qj7ox={GUwo{m-z3vJe7op2tvFR*v z;6^?gnApx9l{miZIh>EG8uYn5#5OGWEl2+n!y~s<>iFL6EK zh&7iGd)2to-ip;ly6jbiRHEiX&4-%LYQYt~7)vVS_ z376zlf&}%}vm08rWG&|?$_`sR)l1;D#TN*;>>~wf>pNm?m(O&}-l`zU*&DS8V$I(2 znPC+n~247RH?dXqtJ@l&Ijd zQwzp&JiB5b$z_6LE)m+1`^XFAE3&;tgo8*Oa-+roPqEG+{3#LAkut=O^di&9G9tRo z;Bbr3f~1{Pfwo!+{cZtf-FoQ2s;N-Hh(~}p&ko4*B!JsvhAWk72zs-z{0^u&_*xTW)c(24chpREA`~O1>kQC? zbrhLLgnALOkV+(gOw{Z7K=@nkgg0S91DXasq&58iVug+WUhG7Z0sKw)^N?v|xhV