mirror of
https://github.com/antonblanchard/microwatt.git
synced 2026-01-11 23:43:15 +00:00
FPU: Improve zero result detection and simplify final states
This improves detection of results that are exactly zero in FINISH state by noting that on entry to FINISH state, if R is zero then X must also be zero, so no rounding needs to be done and no underflow exists. Therefore we can set rcls_op = RCLS_TZERO to test for zero and exit early if R = 0. The RCLS_TZERO test now tests the whole of R just in case. The rest of the following states have been streamlined and simplified. In cases of underflow, we only need to take action before rounding in the UE=0 case (disabled underflow exception), where we need to denormalize before rounding. For enabled underflow cases we just use the existing NORMALIZE state, which lets us remove NORM_UFLOW state. On entry to ROUNDING state, R can be zero or denorm only for round to integer instructions (fri*) or for disabled underflow exception cases. Note that in case of underflow with UE=0, the exception is only actually signalled if there is loss of accuracy, i.e. if FPSCR[FI] will be set. This is now done at the end of ROUNDING state. For underflow with UE=1, we go to a new ROUND_UFLOW_EN state to adjust the exponent from ROUNDING, ROUNDING_2 or ROUNDING_3 state. In the ROUNDING* states, we avoid shifting left to normalize a result with exponent <= -1022, because if we did we would then just need to denormalize again. This lets us get rid of DENORM state. Finally, noticing that DO_FRSP_2 state does much the same as FINISH state lets us remove DO_FRSP_2 state and go to FINISH state from DO_FRSP. Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
This commit is contained in:
parent
f8a11420ca
commit
1ad8848655
136
fpu.vhdl
136
fpu.vhdl
@ -51,7 +51,7 @@ architecture behaviour of fpu is
|
|||||||
DO_MCRFS, DO_MTFSB, DO_MTFSFI, DO_MFFS, DO_MTFSF,
|
DO_MCRFS, DO_MTFSB, DO_MTFSFI, DO_MFFS, DO_MTFSF,
|
||||||
DO_FMR, DO_FMRG, DO_FCMP, DO_FTDIV, DO_FTSQRT,
|
DO_FMR, DO_FMRG, DO_FCMP, DO_FTDIV, DO_FTSQRT,
|
||||||
DO_FCFID, DO_FCTI,
|
DO_FCFID, DO_FCTI,
|
||||||
DO_FRSP, DO_FRSP_2, DO_FRI,
|
DO_FRSP, DO_FRI,
|
||||||
DO_FADD, DO_FMUL, DO_FDIV, DO_FSQRT, DO_FMADD,
|
DO_FADD, DO_FMUL, DO_FDIV, DO_FSQRT, DO_FMADD,
|
||||||
DO_FRE,
|
DO_FRE,
|
||||||
DO_FSEL,
|
DO_FSEL,
|
||||||
@ -72,9 +72,9 @@ architecture behaviour of fpu is
|
|||||||
INT_SHIFT, INT_ROUND, INT_ISHIFT,
|
INT_SHIFT, INT_ROUND, INT_ISHIFT,
|
||||||
INT_FINAL, INT_CHECK, INT_OFLOW,
|
INT_FINAL, INT_CHECK, INT_OFLOW,
|
||||||
FINISH, NORMALIZE,
|
FINISH, NORMALIZE,
|
||||||
ROUND_UFLOW, NORM_UFLOW, ROUND_OFLOW_DIS, ROUND_OFLOW_EN,
|
ROUND_UFLOW_DIS, ROUND_UFLOW_EN,
|
||||||
|
ROUND_OFLOW_DIS, ROUND_OFLOW_EN,
|
||||||
ROUNDING, ROUND_INC, ROUNDING_2, ROUNDING_3,
|
ROUNDING, ROUND_INC, ROUNDING_2, ROUNDING_3,
|
||||||
DENORM,
|
|
||||||
RENORM_A, RENORM_B, RENORM_C,
|
RENORM_A, RENORM_B, RENORM_C,
|
||||||
RENORM_1, RENORM_2,
|
RENORM_1, RENORM_2,
|
||||||
IDIV_NORMB, IDIV_NORMB2, IDIV_NORMB3,
|
IDIV_NORMB, IDIV_NORMB2, IDIV_NORMB3,
|
||||||
@ -776,6 +776,9 @@ begin
|
|||||||
end if;
|
end if;
|
||||||
else
|
else
|
||||||
assert not (r.state /= IDLE and e_in.valid = '1') severity failure;
|
assert not (r.state /= IDLE and e_in.valid = '1') severity failure;
|
||||||
|
assert not (rin.state = FINISH and rin.r = 64x"0" and rin.x = '1');
|
||||||
|
assert not (rin.state = ROUNDING and rin.r(UNIT_BIT) = '0' and
|
||||||
|
not (rin.tiny = '1' or rin.zero_fri = '1'));
|
||||||
r <= rin;
|
r <= rin;
|
||||||
end if;
|
end if;
|
||||||
end if;
|
end if;
|
||||||
@ -1630,22 +1633,7 @@ begin
|
|||||||
set_r := '1';
|
set_r := '1';
|
||||||
re_sel2 <= REXP2_B;
|
re_sel2 <= REXP2_B;
|
||||||
re_set_result <= '1';
|
re_set_result <= '1';
|
||||||
v.state := DO_FRSP_2;
|
v.state := FINISH;
|
||||||
|
|
||||||
when DO_FRSP_2 =>
|
|
||||||
-- r.shift = 0
|
|
||||||
-- set shift to exponent - -126 (for ROUND_UFLOW state)
|
|
||||||
rs_sel1 <= RSH1_B;
|
|
||||||
rs_con2 <= RSCON2_MINEXP;
|
|
||||||
rs_neg2 <= '1';
|
|
||||||
set_x := '1'; -- uses r.r and r.shift
|
|
||||||
if exp_tiny = '1' then
|
|
||||||
v.state := ROUND_UFLOW;
|
|
||||||
elsif exp_huge = '1' and r.fpscr(FPSCR_OE) = '0' then
|
|
||||||
v.state := ROUND_OFLOW_DIS;
|
|
||||||
else
|
|
||||||
v.state := ROUNDING;
|
|
||||||
end if;
|
|
||||||
|
|
||||||
when DO_FCTI =>
|
when DO_FCTI =>
|
||||||
-- instr bit 9: 1=dword 0=word
|
-- instr bit 9: 1=dword 0=word
|
||||||
@ -2414,17 +2402,20 @@ begin
|
|||||||
when FINISH =>
|
when FINISH =>
|
||||||
-- r.shift = 0
|
-- r.shift = 0
|
||||||
-- set shift to new_exp - min_exp (N.B. rs_norm overrides this)
|
-- set shift to new_exp - min_exp (N.B. rs_norm overrides this)
|
||||||
|
-- assert that if r.r = 0 then r.x = 0 also
|
||||||
rs_sel1 <= RSH1_NE;
|
rs_sel1 <= RSH1_NE;
|
||||||
rs_con2 <= RSCON2_MINEXP;
|
rs_con2 <= RSCON2_MINEXP;
|
||||||
rs_neg2 <= '1';
|
rs_neg2 <= '1';
|
||||||
|
rcls_op <= RCLS_TZERO;
|
||||||
if r.r(63 downto UNIT_BIT) /= std_ulogic_vector(to_unsigned(1, 64 - UNIT_BIT)) then
|
if r.r(63 downto UNIT_BIT) /= std_ulogic_vector(to_unsigned(1, 64 - UNIT_BIT)) then
|
||||||
rs_norm <= '1';
|
rs_norm <= '1';
|
||||||
v.state := NORMALIZE;
|
v.state := NORMALIZE;
|
||||||
else
|
else
|
||||||
set_x := '1';
|
set_x := '1';
|
||||||
set_xs := r.is_multiply;
|
set_xs := r.is_multiply;
|
||||||
if exp_tiny = '1' then
|
v.tiny := exp_tiny;
|
||||||
v.state := ROUND_UFLOW;
|
if exp_tiny = '1' and r.fpscr(FPSCR_UE) = '0' then
|
||||||
|
v.state := ROUND_UFLOW_DIS;
|
||||||
elsif exp_huge = '1' and r.fpscr(FPSCR_OE) = '0' then
|
elsif exp_huge = '1' and r.fpscr(FPSCR_OE) = '0' then
|
||||||
v.state := ROUND_OFLOW_DIS;
|
v.state := ROUND_OFLOW_DIS;
|
||||||
else
|
else
|
||||||
@ -2445,51 +2436,25 @@ begin
|
|||||||
rs_neg2 <= '1';
|
rs_neg2 <= '1';
|
||||||
set_x := '1';
|
set_x := '1';
|
||||||
set_xs := r.is_multiply;
|
set_xs := r.is_multiply;
|
||||||
if exp_tiny = '1' then
|
v.tiny := exp_tiny;
|
||||||
v.state := ROUND_UFLOW;
|
if exp_tiny = '1' and r.fpscr(FPSCR_UE) = '0' then
|
||||||
|
v.state := ROUND_UFLOW_DIS;
|
||||||
elsif exp_huge = '1' and r.fpscr(FPSCR_OE) = '0' then
|
elsif exp_huge = '1' and r.fpscr(FPSCR_OE) = '0' then
|
||||||
v.state := ROUND_OFLOW_DIS;
|
v.state := ROUND_OFLOW_DIS;
|
||||||
else
|
else
|
||||||
v.state := ROUNDING;
|
v.state := ROUNDING;
|
||||||
end if;
|
end if;
|
||||||
|
|
||||||
when ROUND_UFLOW =>
|
when ROUND_UFLOW_DIS =>
|
||||||
-- r.shift = - amount by which exponent underflows
|
-- r.shift = - amount by which exponent underflows
|
||||||
v.tiny := '1';
|
-- disabled underflow exception case
|
||||||
|
-- have to denormalize before rounding
|
||||||
opsel_r <= RES_SHIFT;
|
opsel_r <= RES_SHIFT;
|
||||||
set_r := '0';
|
set_r := '0';
|
||||||
if r.fpscr(FPSCR_UE) = '0' then
|
|
||||||
-- disabled underflow exception case
|
|
||||||
-- have to denormalize before rounding
|
|
||||||
set_r := '1';
|
|
||||||
re_sel2 <= REXP2_NE;
|
|
||||||
re_set_result <= '1';
|
|
||||||
set_x := '1';
|
|
||||||
v.state := ROUNDING;
|
|
||||||
else
|
|
||||||
-- enabled underflow exception case
|
|
||||||
-- if denormalized, have to normalize before rounding
|
|
||||||
v.fpscr(FPSCR_UX) := '1';
|
|
||||||
re_sel1 <= REXP1_R;
|
|
||||||
re_con2 <= RECON2_BIAS;
|
|
||||||
re_set_result <= '1';
|
|
||||||
if r.r(UNIT_BIT) = '0' then
|
|
||||||
rs_norm <= '1';
|
|
||||||
v.state := NORM_UFLOW;
|
|
||||||
else
|
|
||||||
v.state := ROUNDING;
|
|
||||||
end if;
|
|
||||||
end if;
|
|
||||||
|
|
||||||
when NORM_UFLOW =>
|
|
||||||
-- normalize for UE=1 underflow case
|
|
||||||
-- r.shift = clz(r.r) - 7
|
|
||||||
opsel_r <= RES_SHIFT;
|
|
||||||
set_r := '1';
|
set_r := '1';
|
||||||
re_sel2 <= REXP2_NE;
|
re_sel2 <= REXP2_NE;
|
||||||
re_set_result <= '1';
|
re_set_result <= '1';
|
||||||
set_x := '1';
|
set_x := '1';
|
||||||
set_xs := r.is_multiply;
|
|
||||||
v.state := ROUNDING;
|
v.state := ROUNDING;
|
||||||
|
|
||||||
when ROUND_OFLOW_DIS =>
|
when ROUND_OFLOW_DIS =>
|
||||||
@ -2508,6 +2473,8 @@ begin
|
|||||||
arith_done := '1';
|
arith_done := '1';
|
||||||
|
|
||||||
when ROUNDING =>
|
when ROUNDING =>
|
||||||
|
-- r.r can be zero or denorm here for fri* instructions,
|
||||||
|
-- and for disabled underflow exception cases.
|
||||||
opsel_mask <= '1';
|
opsel_mask <= '1';
|
||||||
set_r := '1';
|
set_r := '1';
|
||||||
round := fp_rounding(r.r, r.x, r.single_prec, r.round_mode, r.result_sign);
|
round := fp_rounding(r.r, r.x, r.single_prec, r.round_mode, r.result_sign);
|
||||||
@ -2520,10 +2487,22 @@ begin
|
|||||||
-- increment the LSB for the precision
|
-- increment the LSB for the precision
|
||||||
v.state := ROUND_INC;
|
v.state := ROUND_INC;
|
||||||
elsif r.r(UNIT_BIT) = '0' then
|
elsif r.r(UNIT_BIT) = '0' then
|
||||||
-- result after masking could be zero, or could be a
|
-- Result after masking could be zero, or could be a
|
||||||
-- denormalized result that needs to be renormalized
|
-- denormalized result that needs to be renormalized,
|
||||||
rs_norm <= '1';
|
-- but only for fri* instructions and for disabled
|
||||||
|
-- underflow exception cases.
|
||||||
|
-- For fri* instructions, result_exp is 52.
|
||||||
|
-- For disabled underflow exception cases for DP operations,
|
||||||
|
-- result_exp is -1022 and there is no point renormalizing
|
||||||
|
-- since it will just get denormalized again, but we do need
|
||||||
|
-- to check for a zero result in a subsequent cycle
|
||||||
|
-- after R is masked.
|
||||||
|
if r.result_exp > to_signed(-1022, EXP_BITS) then
|
||||||
|
rs_norm <= '1';
|
||||||
|
end if;
|
||||||
v.state := ROUNDING_3;
|
v.state := ROUNDING_3;
|
||||||
|
elsif r.tiny = '1' and r.fpscr(FPSCR_UE) = '1' then
|
||||||
|
v.state := ROUND_UFLOW_EN;
|
||||||
elsif r.result_exp > max_exp then
|
elsif r.result_exp > max_exp then
|
||||||
v.state := ROUND_OFLOW_EN;
|
v.state := ROUND_OFLOW_EN;
|
||||||
else
|
else
|
||||||
@ -2531,9 +2510,9 @@ begin
|
|||||||
end if;
|
end if;
|
||||||
if round(0) = '1' and r.zero_fri = '0' then
|
if round(0) = '1' and r.zero_fri = '0' then
|
||||||
v.fpscr(FPSCR_XX) := '1';
|
v.fpscr(FPSCR_XX) := '1';
|
||||||
if r.tiny = '1' then
|
end if;
|
||||||
v.fpscr(FPSCR_UX) := '1';
|
if round(0) = '1' and r.tiny = '1' then
|
||||||
end if;
|
v.fpscr(FPSCR_UX) := '1';
|
||||||
end if;
|
end if;
|
||||||
|
|
||||||
when ROUND_INC =>
|
when ROUND_INC =>
|
||||||
@ -2544,18 +2523,30 @@ begin
|
|||||||
when ROUNDING_2 =>
|
when ROUNDING_2 =>
|
||||||
-- Check for overflow during rounding
|
-- Check for overflow during rounding
|
||||||
-- r.shift = 0
|
-- r.shift = 0
|
||||||
if r.r(UNIT_BIT + 1) = '1' or r.r(UNIT_BIT) = '0' then
|
if r.r(UNIT_BIT + 1) = '1' then
|
||||||
-- Do CLZ so we can renormalize the result
|
-- Do CLZ so we can renormalize the result
|
||||||
rs_norm <= '1';
|
rs_norm <= '1';
|
||||||
v.state := ROUNDING_3;
|
v.state := ROUNDING_3;
|
||||||
|
elsif r.r(UNIT_BIT) = '0' then
|
||||||
|
-- R is non-zero (we just incremented it)
|
||||||
|
-- If result_exp is -1022 here, don't normalize since
|
||||||
|
-- we would then need to denormalize again.
|
||||||
|
if r.result_exp > to_signed(-1022, EXP_BITS) then
|
||||||
|
rs_norm <= '1';
|
||||||
|
end if;
|
||||||
|
v.state := ROUNDING_3;
|
||||||
elsif exp_huge = '1' then
|
elsif exp_huge = '1' then
|
||||||
v.state := ROUND_OFLOW_EN;
|
v.state := ROUND_OFLOW_EN;
|
||||||
|
elsif r.tiny = '1' and r.fpscr(FPSCR_UE) = '1' then
|
||||||
|
v.state := ROUND_UFLOW_EN;
|
||||||
else
|
else
|
||||||
arith_done := '1';
|
arith_done := '1';
|
||||||
end if;
|
end if;
|
||||||
|
|
||||||
when ROUNDING_3 =>
|
when ROUNDING_3 =>
|
||||||
-- r.shift = clz(r.r) - 7
|
-- r.shift = clz(r.r) - 7 (or 0, or -7, if r.r is 0)
|
||||||
|
-- Note clz may be done on the value before being masked
|
||||||
|
-- to the result precision.
|
||||||
opsel_r <= RES_SHIFT;
|
opsel_r <= RES_SHIFT;
|
||||||
set_r := '1';
|
set_r := '1';
|
||||||
re_sel2 <= REXP2_NE;
|
re_sel2 <= REXP2_NE;
|
||||||
@ -2572,20 +2563,12 @@ begin
|
|||||||
v.state := ROUND_OFLOW_DIS;
|
v.state := ROUND_OFLOW_DIS;
|
||||||
elsif exp_huge = '1' and r.fpscr(FPSCR_OE) = '1' then
|
elsif exp_huge = '1' and r.fpscr(FPSCR_OE) = '1' then
|
||||||
v.state := ROUND_OFLOW_EN;
|
v.state := ROUND_OFLOW_EN;
|
||||||
elsif new_exp < to_signed(-1022, EXP_BITS) then
|
elsif r.tiny = '1' and r.fpscr(FPSCR_UE) = '1' then
|
||||||
v.state := DENORM;
|
v.state := ROUND_UFLOW_EN;
|
||||||
else
|
else
|
||||||
arith_done := '1';
|
arith_done := '1';
|
||||||
end if;
|
end if;
|
||||||
|
|
||||||
when DENORM =>
|
|
||||||
-- r.shift = result_exp - -1022
|
|
||||||
opsel_r <= RES_SHIFT;
|
|
||||||
set_r := '1';
|
|
||||||
re_sel2 <= REXP2_NE;
|
|
||||||
re_set_result <= '1';
|
|
||||||
arith_done := '1';
|
|
||||||
|
|
||||||
when ROUND_OFLOW_EN =>
|
when ROUND_OFLOW_EN =>
|
||||||
-- enabled overflow exception
|
-- enabled overflow exception
|
||||||
-- rounding and normalization has been done
|
-- rounding and normalization has been done
|
||||||
@ -2596,6 +2579,15 @@ begin
|
|||||||
re_set_result <= '1';
|
re_set_result <= '1';
|
||||||
arith_done := '1';
|
arith_done := '1';
|
||||||
|
|
||||||
|
when ROUND_UFLOW_EN =>
|
||||||
|
-- enabled underflow exception
|
||||||
|
-- rounding and normalization has been done
|
||||||
|
v.fpscr(FPSCR_UX) := '1';
|
||||||
|
re_sel1 <= REXP1_R;
|
||||||
|
re_con2 <= RECON2_BIAS;
|
||||||
|
re_set_result <= '1';
|
||||||
|
arith_done := '1';
|
||||||
|
|
||||||
when DO_IDIVMOD =>
|
when DO_IDIVMOD =>
|
||||||
opsel_a <= AIN_B;
|
opsel_a <= AIN_B;
|
||||||
opsel_aabs <= '1';
|
opsel_aabs <= '1';
|
||||||
@ -3196,7 +3188,7 @@ begin
|
|||||||
when others =>
|
when others =>
|
||||||
end case;
|
end case;
|
||||||
when RCLS_TZERO =>
|
when RCLS_TZERO =>
|
||||||
if or (r.r(UNIT_BIT + 2 downto 0)) = '0' then
|
if or (r.r) = '0' then
|
||||||
v.result_class := ZERO;
|
v.result_class := ZERO;
|
||||||
arith_done := '1';
|
arith_done := '1';
|
||||||
end if;
|
end if;
|
||||||
|
|||||||
@ -1627,6 +1627,8 @@ struct fmavals {
|
|||||||
0x0000000000000000, 0x0000000000000000, 0x8000000000000000, 0x8000000000000000 },
|
0x0000000000000000, 0x0000000000000000, 0x8000000000000000, 0x8000000000000000 },
|
||||||
{ 0x41efffffffe00000, 0xc1efffffffe00000, 0x43f0000000000000, FPS_RN_CEIL,
|
{ 0x41efffffffe00000, 0xc1efffffffe00000, 0x43f0000000000000, FPS_RN_CEIL,
|
||||||
0x41fffffffff00000, 0xc3ffffffffe00000, 0xc1fffffffff00000, 0x43ffffffffe00000 },
|
0x41fffffffff00000, 0xc3ffffffffe00000, 0xc1fffffffff00000, 0x43ffffffffe00000 },
|
||||||
|
{ 0x3ff0000000000000, 0x000060fbffffefc1, 0x000060fbffffefc1, FPS_RN_NEAR,
|
||||||
|
0x0000c1f7ffffdf82, 0x0000000000000000, 0x8000c1f7ffffdf82, 0x8000000000000000 },
|
||||||
};
|
};
|
||||||
|
|
||||||
int test23(long arg)
|
int test23(long arg)
|
||||||
|
|||||||
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user