1
0
mirror of https://github.com/antonblanchard/microwatt.git synced 2026-01-13 15:18:09 +00:00

FPU: Generate correct result sign when B is denormal

If a subtraction A - B is done where A is in normalized form with an
exponent of -1022, and B is denormal, an inconsistency arises between
the comparison of the raw exponents in the first cycle, which sees
A.exp (0x001) > B.exp (0x000), and the comparison in DO_FADD state,
which sees r.a.exponent (-1022) = r.b.exponent (-1022).  Conseqently
we get r.add_bsmall = 0 and the subtraction is done the wrong way
around, yielding the wrong sign for the result.

Fix this by setting r.add_bsmall according to the comparison of raw
exponents in the first cycle and then using it in DO_FADD state.
Also add a test case for this.

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
This commit is contained in:
Paul Mackerras 2025-12-12 16:44:43 +11:00
parent f631dcd700
commit 80c81b58ef
3 changed files with 55 additions and 52 deletions

View File

@ -1102,6 +1102,7 @@ begin
v.is_addition := '0';
v.is_subtract := '0';
v.is_inverse := '0';
v.add_bsmall := '0';
v.do_renorm_b := '0';
fpin_a := '0';
fpin_b := '0';
@ -1140,6 +1141,8 @@ begin
v.result_sign := e_in.fra(63);
if unsigned(e_in.fra(62 downto 52)) <= unsigned(e_in.frb(62 downto 52)) then
v.result_sign := e_in.frb(63) xnor e_in.insn(1);
else
v.add_bsmall := '1';
end if;
v.is_subtract := not (e_in.fra(63) xor e_in.frb(63) xor e_in.insn(1));
when "11001" => -- fmul
@ -1255,7 +1258,6 @@ begin
end case;
v.tiny := '0';
v.denorm := '0';
v.add_bsmall := '0';
v.int_ovf := '0';
v.div_close := '0';
@ -1705,15 +1707,13 @@ begin
rs_sel1 <= RSH1_B;
rs_neg1 <= '1';
rs_sel2 <= RSH2_A;
v.add_bsmall := '0';
if r.a.exponent = r.b.exponent then
if r.add_bsmall = '1' then
v.state := ADD_1;
elsif r.a.exponent = r.b.exponent then
v.state := ADD_2B;
elsif r.a.exponent < r.b.exponent then
elsif v.add_bsmall = '0' then
v.longmask := '0';
v.state := ADD_SHIFT;
else
v.add_bsmall := '1';
v.state := ADD_1;
end if;
when DO_FMUL =>
@ -1856,6 +1856,7 @@ begin
re_sel2 <= REXP2_B;
re_set_result <= '1';
-- set shift to b.exp - a.exp
-- (N.B., shift can be 0 if B is denorm and A's exp is -1022)
rs_sel1 <= RSH1_B;
rs_sel2 <= RSH2_A;
rs_neg2 <= '1';

View File

@ -975,51 +975,53 @@ struct addvals {
unsigned long val_b;
unsigned long sum;
unsigned long diff;
unsigned long fpscr;
} addvals[] = {
{ 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 },
{ 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x0000000000000000 },
{ 0x3fdfffffffffffff, 0x0000000000000000, 0x3fdfffffffffffff, 0x3fdfffffffffffff },
{ 0x3ff0000000000000, 0x3ff0000000000000, 0x4000000000000000, 0x0000000000000000 },
{ 0xbff0000000000000, 0xbff0000000000000, 0xc000000000000000, 0x0000000000000000 },
{ 0x402123456789abcd, 0x4021000000000000, 0x403111a2b3c4d5e6, 0x3fb1a2b3c4d5e680 },
{ 0x4061200000000000, 0x406123456789abcd, 0x407121a2b3c4d5e6, 0xbfba2b3c4d5e6800 },
{ 0x4061230000000000, 0x3fa4560000000000, 0x4061244560000000, 0x406121baa0000000 },
{ 0xc061230000000000, 0x3fa4560000000000, 0xc06121baa0000000, 0xc061244560000000 },
{ 0x4061230000000000, 0xbfa4560000000000, 0x406121baa0000000, 0x4061244560000000 },
{ 0xc061230000000000, 0xbfa4560000000000, 0xc061244560000000, 0xc06121baa0000000 },
{ 0x3fa1230000000000, 0x4064560000000000, 0x4064571230000000, 0xc06454edd0000000 },
{ 0xbfa1230000000000, 0x4064560000000000, 0x406454edd0000000, 0xc064571230000000 },
{ 0x3fa1230000000000, 0xc064560000000000, 0xc06454edd0000000, 0x4064571230000000 },
{ 0xbfa1230000000000, 0xc064560000000000, 0xc064571230000000, 0x406454edd0000000 },
{ 0x6780000000000001, 0x6470000000000000, 0x6780000000000009, 0x677ffffffffffff2 },
{ 0x6780000000000001, 0x6460000000000000, 0x6780000000000005, 0x677ffffffffffffa },
{ 0x6780000000000001, 0x6450000000000000, 0x6780000000000003, 0x677ffffffffffffe },
{ 0x6780000000000001, 0x6440000000000000, 0x6780000000000002, 0x6780000000000000 },
{ 0x7ff8888888888888, 0x7ff9999999999999, 0x7ff8888888888888, 0x7ff8888888888888 },
{ 0xfff8888888888888, 0x7ff9999999999999, 0xfff8888888888888, 0xfff8888888888888 },
{ 0x7ff8888888888888, 0x7ff0000000000000, 0x7ff8888888888888, 0x7ff8888888888888 },
{ 0x7ff8888888888888, 0x0000000000000000, 0x7ff8888888888888, 0x7ff8888888888888 },
{ 0x7ff8888888888888, 0x0001111111111111, 0x7ff8888888888888, 0x7ff8888888888888 },
{ 0x7ff8888888888888, 0x3ff0000000000000, 0x7ff8888888888888, 0x7ff8888888888888 },
{ 0x7ff0000000000000, 0x7ff9999999999999, 0x7ff9999999999999, 0x7ff9999999999999 },
{ 0x7ff0000000000000, 0x7ff0000000000000, 0x7ff0000000000000, 0x7ff8000000000000 },
{ 0x7ff0000000000000, 0xfff0000000000000, 0x7ff8000000000000, 0x7ff0000000000000 },
{ 0x7ff0000000000000, 0x0000000000000000, 0x7ff0000000000000, 0x7ff0000000000000 },
{ 0x7ff0000000000000, 0x8000000000000000, 0x7ff0000000000000, 0x7ff0000000000000 },
{ 0x7ff0000000000000, 0x8002222222222222, 0x7ff0000000000000, 0x7ff0000000000000 },
{ 0x7ff0000000000000, 0xc002222222222222, 0x7ff0000000000000, 0x7ff0000000000000 },
{ 0x0000000000000000, 0x7ff9999999999999, 0x7ff9999999999999, 0x7ff9999999999999 },
{ 0x0000000000000000, 0x7ff0000000000000, 0x7ff0000000000000, 0xfff0000000000000 },
{ 0x8000000000000000, 0x7ff0000000000000, 0x7ff0000000000000, 0xfff0000000000000 },
{ 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 },
{ 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 },
{ 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x0000000000000000 },
{ 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x0000000000000000 },
{ 0x8002222222222222, 0x0001111111111111, 0x8001111111111111, 0x8003333333333333 },
{ 0x0000022222222222, 0x0000111111111111, 0x0000133333333333, 0x80000eeeeeeeeeef },
{ 0x401ffffffbfffefe, 0x406b8265196bd89e, 0x406c8265194bd896, 0xc06a8265198bd8a6 },
{ 0x4030020000000004, 0xbf110001ffffffff, 0x403001fbbfff8004, 0x4030020440008004 },
{ 0x3fdfffffffffffff, 0x3fe0000000000000, 0x3ff0000000000000, 0xbc90000000000000 },
{ 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, FPS_RN_NEAR },
{ 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x0000000000000000, FPS_RN_NEAR },
{ 0x3fdfffffffffffff, 0x0000000000000000, 0x3fdfffffffffffff, 0x3fdfffffffffffff, FPS_RN_NEAR },
{ 0x3ff0000000000000, 0x3ff0000000000000, 0x4000000000000000, 0x0000000000000000, FPS_RN_NEAR },
{ 0xbff0000000000000, 0xbff0000000000000, 0xc000000000000000, 0x0000000000000000, FPS_RN_NEAR },
{ 0x402123456789abcd, 0x4021000000000000, 0x403111a2b3c4d5e6, 0x3fb1a2b3c4d5e680, FPS_RN_NEAR },
{ 0x4061200000000000, 0x406123456789abcd, 0x407121a2b3c4d5e6, 0xbfba2b3c4d5e6800, FPS_RN_NEAR },
{ 0x4061230000000000, 0x3fa4560000000000, 0x4061244560000000, 0x406121baa0000000, FPS_RN_NEAR },
{ 0xc061230000000000, 0x3fa4560000000000, 0xc06121baa0000000, 0xc061244560000000, FPS_RN_NEAR },
{ 0x4061230000000000, 0xbfa4560000000000, 0x406121baa0000000, 0x4061244560000000, FPS_RN_NEAR },
{ 0xc061230000000000, 0xbfa4560000000000, 0xc061244560000000, 0xc06121baa0000000, FPS_RN_NEAR },
{ 0x3fa1230000000000, 0x4064560000000000, 0x4064571230000000, 0xc06454edd0000000, FPS_RN_NEAR },
{ 0xbfa1230000000000, 0x4064560000000000, 0x406454edd0000000, 0xc064571230000000, FPS_RN_NEAR },
{ 0x3fa1230000000000, 0xc064560000000000, 0xc06454edd0000000, 0x4064571230000000, FPS_RN_NEAR },
{ 0xbfa1230000000000, 0xc064560000000000, 0xc064571230000000, 0x406454edd0000000, FPS_RN_NEAR },
{ 0x6780000000000001, 0x6470000000000000, 0x6780000000000009, 0x677ffffffffffff2, FPS_RN_NEAR },
{ 0x6780000000000001, 0x6460000000000000, 0x6780000000000005, 0x677ffffffffffffa, FPS_RN_NEAR },
{ 0x6780000000000001, 0x6450000000000000, 0x6780000000000003, 0x677ffffffffffffe, FPS_RN_NEAR },
{ 0x6780000000000001, 0x6440000000000000, 0x6780000000000002, 0x6780000000000000, FPS_RN_NEAR },
{ 0x7ff8888888888888, 0x7ff9999999999999, 0x7ff8888888888888, 0x7ff8888888888888, FPS_RN_NEAR },
{ 0xfff8888888888888, 0x7ff9999999999999, 0xfff8888888888888, 0xfff8888888888888, FPS_RN_NEAR },
{ 0x7ff8888888888888, 0x7ff0000000000000, 0x7ff8888888888888, 0x7ff8888888888888, FPS_RN_NEAR },
{ 0x7ff8888888888888, 0x0000000000000000, 0x7ff8888888888888, 0x7ff8888888888888, FPS_RN_NEAR },
{ 0x7ff8888888888888, 0x0001111111111111, 0x7ff8888888888888, 0x7ff8888888888888, FPS_RN_NEAR },
{ 0x7ff8888888888888, 0x3ff0000000000000, 0x7ff8888888888888, 0x7ff8888888888888, FPS_RN_NEAR },
{ 0x7ff0000000000000, 0x7ff9999999999999, 0x7ff9999999999999, 0x7ff9999999999999, FPS_RN_NEAR },
{ 0x7ff0000000000000, 0x7ff0000000000000, 0x7ff0000000000000, 0x7ff8000000000000, FPS_RN_NEAR },
{ 0x7ff0000000000000, 0xfff0000000000000, 0x7ff8000000000000, 0x7ff0000000000000, FPS_RN_NEAR },
{ 0x7ff0000000000000, 0x0000000000000000, 0x7ff0000000000000, 0x7ff0000000000000, FPS_RN_NEAR },
{ 0x7ff0000000000000, 0x8000000000000000, 0x7ff0000000000000, 0x7ff0000000000000, FPS_RN_NEAR },
{ 0x7ff0000000000000, 0x8002222222222222, 0x7ff0000000000000, 0x7ff0000000000000, FPS_RN_NEAR },
{ 0x7ff0000000000000, 0xc002222222222222, 0x7ff0000000000000, 0x7ff0000000000000, FPS_RN_NEAR },
{ 0x0000000000000000, 0x7ff9999999999999, 0x7ff9999999999999, 0x7ff9999999999999, FPS_RN_NEAR },
{ 0x0000000000000000, 0x7ff0000000000000, 0x7ff0000000000000, 0xfff0000000000000, FPS_RN_NEAR },
{ 0x8000000000000000, 0x7ff0000000000000, 0x7ff0000000000000, 0xfff0000000000000, FPS_RN_NEAR },
{ 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, FPS_RN_NEAR },
{ 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, FPS_RN_NEAR },
{ 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x0000000000000000, FPS_RN_NEAR },
{ 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x0000000000000000, FPS_RN_NEAR },
{ 0x8002222222222222, 0x0001111111111111, 0x8001111111111111, 0x8003333333333333, FPS_RN_NEAR },
{ 0x0000022222222222, 0x0000111111111111, 0x0000133333333333, 0x80000eeeeeeeeeef, FPS_RN_NEAR },
{ 0x401ffffffbfffefe, 0x406b8265196bd89e, 0x406c8265194bd896, 0xc06a8265198bd8a6, FPS_RN_NEAR },
{ 0x4030020000000004, 0xbf110001ffffffff, 0x403001fbbfff8004, 0x4030020440008004, FPS_RN_NEAR },
{ 0x3fdfffffffffffff, 0x3fe0000000000000, 0x3ff0000000000000, 0xbc90000000000000, FPS_RN_NEAR },
{ 0x001000100010000f, 0x00000000000000ff, 0x001000100010010e, 0x00100010000fff10, FPS_RN_CEIL },
};
int test13(long arg)
@ -1029,8 +1031,8 @@ int test13(long arg)
struct addvals *vp = addvals;
unsigned long fpscr;
set_fpscr(FPS_RN_NEAR);
for (i = 0; i < sizeof(addvals) / sizeof(addvals[0]); ++i, ++vp) {
set_fpscr(vp->fpscr);
asm("lfd 5,0(%0); lfd 6,8(%0); fadd 7,5,6; fsub 8,5,6; stfd 7,0(%1); stfd 8,8(%1)"
: : "b" (&vp->val_a), "b" (results) : "memory");
fpscr = get_fpscr();

Binary file not shown.