diff --git a/fpu.vhdl b/fpu.vhdl index 16583cb..190f4a3 100644 --- a/fpu.vhdl +++ b/fpu.vhdl @@ -51,7 +51,7 @@ architecture behaviour of fpu is DO_MCRFS, DO_MTFSB, DO_MTFSFI, DO_MFFS, DO_MTFSF, DO_FMR, DO_FMRG, DO_FCMP, DO_FTDIV, DO_FTSQRT, DO_FCFID, DO_FCTI, - DO_FRSP, DO_FRSP_2, DO_FRI, + DO_FRSP, DO_FRI, DO_FADD, DO_FMUL, DO_FDIV, DO_FSQRT, DO_FMADD, DO_FRE, DO_FSEL, @@ -72,9 +72,9 @@ architecture behaviour of fpu is INT_SHIFT, INT_ROUND, INT_ISHIFT, INT_FINAL, INT_CHECK, INT_OFLOW, FINISH, NORMALIZE, - ROUND_UFLOW, ROUND_OFLOW, + ROUND_UFLOW_DIS, ROUND_UFLOW_EN, + ROUND_OFLOW_DIS, ROUND_OFLOW_EN, ROUNDING, ROUND_INC, ROUNDING_2, ROUNDING_3, - DENORM, RENORM_A, RENORM_B, RENORM_C, RENORM_1, RENORM_2, IDIV_NORMB, IDIV_NORMB2, IDIV_NORMB3, @@ -98,6 +98,7 @@ architecture behaviour of fpu is zero_divide : std_ulogic; new_fpscr : std_ulogic_vector(31 downto 0); immed_result : std_ulogic; -- result is an input, zero, infinity or NaN + need_finish : std_ulogic; -- result needs further processing qnan_result : std_ulogic; result_sel : std_ulogic_vector(2 downto 0); result_class : fp_number_class; @@ -144,7 +145,7 @@ architecture behaviour of fpu is int_result : std_ulogic; cr_result : std_ulogic_vector(3 downto 0); cr_mask : std_ulogic_vector(7 downto 0); - old_exc : std_ulogic_vector(4 downto 0); + old_exc : std_ulogic_vector(12 downto 0); update_fprf : std_ulogic; quieten_nan : std_ulogic; nsnan_result : std_ulogic; @@ -158,6 +159,7 @@ architecture behaviour of fpu is is_multiply : std_ulogic; is_inverse : std_ulogic; is_sqrt : std_ulogic; + do_renorm_b : std_ulogic; first : std_ulogic; count : unsigned(1 downto 0); doing_ftdiv : std_ulogic_vector(1 downto 0); @@ -187,6 +189,7 @@ architecture behaviour of fpu is cycle_1_ar : std_ulogic; regsel : std_ulogic_vector(2 downto 0); is_nan_inf : std_ulogic; + zero_fri : std_ulogic; end record; type lookup_table is array(0 to 1023) of std_ulogic_vector(17 downto 0); @@ -312,6 +315,7 @@ architecture behaviour of fpu is constant RSCON2_63 : std_ulogic_vector(3 downto 0) := "0111"; constant RSCON2_64 : std_ulogic_vector(3 downto 0) := "1000"; constant RSCON2_MINEXP : std_ulogic_vector(3 downto 0) := "1001"; + constant RSCON2_DPMINX : std_ulogic_vector(3 downto 0) := "1010"; signal rs_sel1 : std_ulogic_vector(1 downto 0); signal rs_sel2 : std_ulogic; @@ -713,19 +717,24 @@ architecture behaviour of fpu is end; -- Determine result flags to write into the FPSCR - function result_flags(sign: std_ulogic; class: fp_number_class; unitbit: std_ulogic) + function result_flags(sign: std_ulogic; class: fp_number_class; int_result: std_ulogic; + unitbit: std_ulogic) return std_ulogic_vector is begin - case class is - when ZERO => - return sign & "0010"; - when FINITE => - return (not unitbit) & sign & (not sign) & "00"; - when INFINITY => - return '0' & sign & (not sign) & "01"; - when NAN => - return "10001"; - end case; + if int_result = '1' then + return "00000"; + else + case class is + when ZERO => + return sign & "0010"; + when FINITE => + return (not unitbit) & sign & (not sign) & "00"; + when INFINITY => + return '0' & sign & (not sign) & "01"; + when NAN => + return "10001"; + end case; + end if; end; begin @@ -767,6 +776,9 @@ begin end if; else assert not (r.state /= IDLE and e_in.valid = '1') severity failure; + assert not (rin.state = FINISH and rin.r = 64x"0" and rin.x = '1'); + assert not (rin.state = ROUNDING and rin.r(UNIT_BIT) = '0' and + not (rin.tiny = '1' or rin.zero_fri = '1')); r <= rin; end if; end if; @@ -827,6 +839,7 @@ begin e.zero_divide := '0'; e.new_fpscr := (others => '0'); e.immed_result := '0'; + e.need_finish := '0'; e.qnan_result := '0'; e.result_sel := AIN_ZERO; e.result_class := FINITE; @@ -905,6 +918,11 @@ begin -- result is +/- B e.result_sel := AIN_B; e.result_class := r.b.class; + -- r.result_sign is already correct + if r.b.class = FINITE and r.int_result = '0' and + (r.single_prec = '1' or (r.fpscr(FPSCR_UE) = '1' and r.b.denorm = '1')) then + e.need_finish := '1'; + end if; else e.result_class := ZERO; end if; @@ -919,6 +937,10 @@ begin e.immed_result := '1'; e.result_sel := AIN_B; e.result_class := r.b.class; + if r.b.class = FINITE and r.int_result = '0' and + (r.single_prec = '1' or (r.fpscr(FPSCR_UE) = '1' and r.b.denorm = '1')) then + e.need_finish := '1'; + end if; elsif r.use_b = '1' and r.b.class = ZERO and r.is_multiply = '0' then -- B is zero, other operands are finite @@ -931,6 +953,11 @@ begin elsif r.is_addition = '1' then -- fadd, result is A e.result_sel := AIN_A; + e.rsgn_op := RSGN_SEL; + if r.a.class = FINITE and r.int_result = '0' and + (r.single_prec = '1' or (r.fpscr(FPSCR_UE) = '1' and r.a.denorm = '1')) then + e.need_finish := '1'; + end if; else -- other things, result is zero e.result_class := ZERO; @@ -979,6 +1006,7 @@ begin variable exp_huge : std_ulogic; variable clz : std_ulogic_vector(5 downto 0); variable set_x : std_ulogic; + variable set_xs : std_ulogic; variable mshift : signed(EXP_BITS-1 downto 0); variable need_check : std_ulogic; variable msb : std_ulogic; @@ -1032,6 +1060,7 @@ begin variable bneg : std_ulogic; variable ci : std_ulogic; variable rormr : std_ulogic_vector(63 downto 0); + variable sorms : std_ulogic_vector(55 downto 0); begin v := r; v.complete := '0'; @@ -1048,7 +1077,6 @@ begin v.writing_fpr := '0'; v.writing_cr := '0'; v.writing_xer := '0'; - v.comm_fpscr := r.fpscr; v.illegal := '0'; end if; @@ -1076,6 +1104,8 @@ begin v.is_addition := '0'; v.is_subtract := '0'; v.is_inverse := '0'; + v.add_bsmall := '0'; + v.do_renorm_b := '0'; fpin_a := '0'; fpin_b := '0'; fpin_c := '0'; @@ -1088,6 +1118,7 @@ begin v.quieten_nan := '1'; v.int_result := '0'; v.is_arith := '0'; + v.zero_fri := '0'; case e_in.op is when OP_FP_ARITH => fpin_a := e_in.valid_a; @@ -1112,6 +1143,8 @@ begin v.result_sign := e_in.fra(63); if unsigned(e_in.fra(62 downto 52)) <= unsigned(e_in.frb(62 downto 52)) then v.result_sign := e_in.frb(63) xnor e_in.insn(1); + else + v.add_bsmall := '1'; end if; v.is_subtract := not (e_in.fra(63) xor e_in.frb(63) xor e_in.insn(1)); when "11001" => -- fmul @@ -1124,17 +1157,24 @@ begin v.is_subtract := not (e_in.fra(63) xor e_in.frb(63) xor e_in.frc(63) xor e_in.insn(1)); v.negate := e_in.insn(2); + v.do_renorm_b := '1'; when "10010" => -- fdiv v.is_inverse := '1'; v.result_sign := e_in.fra(63) xor e_in.frb(63); + v.do_renorm_b := '1'; when "11000" | "11010" => -- fre and frsqrte v.is_inverse := '1'; v.result_sign := e_in.frb(63); + v.do_renorm_b := '1'; when "01110" | "01111" => -- fcti* v.int_result := '1'; v.result_sign := e_in.frb(63); - when others => -- fri* and frsp + when "01000" => -- fri* + v.zero_fri := '1'; v.result_sign := e_in.frb(63); + when others => -- frsp and fsqrt + v.result_sign := e_in.frb(63); + v.do_renorm_b := '1'; end case; when OP_FP_CMP => fpin_a := e_in.valid_a; @@ -1145,12 +1185,21 @@ begin opcbits := e_in.insn(10) & e_in.insn(8) & e_in.insn(4) & e_in.insn(2) & e_in.insn(1); exec_state := misc_decode(to_integer(unsigned(opcbits))); case opcbits is - when "10010" | "11010" | "10011" => - -- fmrg*, mffs + when "10010" | "11010" => + -- fmrg* v.int_result := '1'; v.result_sign := '0'; + when "10011" => + -- mffs* + v.int_result := '1'; + v.result_sign := '0'; + if e_in.insn(20 downto 16) /= "00000" then + -- mffs* variants other than mffs have bit 0 reserved + v.rc := '0'; + end if; when "10110" => -- fcfid v.result_sign := e_in.frb(63); + v.longmask := e_in.single; when others => v.result_sign := '0'; end case; @@ -1211,7 +1260,6 @@ begin end case; v.tiny := '0'; v.denorm := '0'; - v.add_bsmall := '0'; v.int_ovf := '0'; v.div_close := '0'; @@ -1268,6 +1316,9 @@ begin end if; -- Compare P with zero and with B + -- This has a 2-bit shift in it (p(59..4) compared to b(57..2)) + -- because it's used in the FP division code to determine whether + -- to increment the quotient at bit 2 (DP_RBIT). px_nz := or (r.p(UNIT_BIT + 1 downto 4)); pcmpb_eq := '0'; if r.p(59 downto 4) = r.b.mantissa(UNIT_BIT + 1 downto DP_RBIT) then @@ -1279,6 +1330,9 @@ begin elsif unsigned(r.p(59 downto 4)) < unsigned(r.b.mantissa(UNIT_BIT + 1 downto DP_RBIT)) then pcmpb_lt := '1'; end if; + -- Compare P with zero and with C + -- This is used in the square root and integer division code + -- to decide whether to increment the result by 1 pcmpc_eq := '0'; if r.p = r.c.mantissa then pcmpc_eq := '1'; @@ -1303,13 +1357,14 @@ begin opsel_s <= S_ZERO; misc_sel <= "000"; opsel_sel <= AIN_ZERO; - fpscr_mask := (others => '1'); + fpscr_mask := x"FFFFFFFF"; cr_op := CROP_NONE; update_fx := '0'; arith_done := '0'; invalid := '0'; zero_divide := '0'; set_x := '0'; + set_xs := '0'; qnan_result := '0'; set_a := '0'; set_a_exp := '0'; @@ -1354,12 +1409,6 @@ begin rsgn_op := RSGN_NOP; rcls_op <= RCLS_NOP; - if r.cycle_1_ar = '1' then - v.fpscr(FPSCR_FR) := '0'; - v.fpscr(FPSCR_FI) := '0'; - v.result_class := FINITE; - end if; - case r.state is when IDLE => v.invalid := '0'; @@ -1374,7 +1423,7 @@ begin end if; end if; v.x := '0'; - v.old_exc := r.fpscr(FPSCR_VX downto FPSCR_XX); + v.old_exc := r.fpscr(FPSCR_OX downto FPSCR_VXVC) & r.fpscr(FPSCR_VXSOFT downto FPSCR_VXCVI); set_s := '1'; v.regsel := AIN_ZERO; @@ -1391,7 +1440,7 @@ begin v.state := RENORM_A; elsif r.c.denorm = '1' then v.state := RENORM_C; - elsif r.b.denorm = '1' and (r.is_inverse = '1' or r.is_sqrt = '1') then + elsif r.b.denorm = '1' and r.do_renorm_b = '1' then v.state := RENORM_B; elsif r.is_multiply = '1' and r.b.class = ZERO then v.state := DO_FMUL; @@ -1410,11 +1459,10 @@ begin for i in 0 to 7 loop if i = j then k := (7 - i) * 4; - v.cr_result := r.fpscr(k + 3 downto k); fpscr_mask(k + 3 downto k) := "0000"; end if; end loop; - v.fpscr := r.fpscr and (fpscr_mask or x"6007F8FF"); + v.fpscr := r.fpscr and (fpscr_mask or x"6007F0FF"); v.instr_done := '1'; when DO_FTDIV => @@ -1477,6 +1525,7 @@ begin v.fpscr(31 - i) := r.insn(6); end if; end loop; + update_fx := '1'; v.instr_done := '1'; when DO_MTFSFI => @@ -1583,22 +1632,7 @@ begin set_r := '1'; re_sel2 <= REXP2_B; re_set_result <= '1'; - v.state := DO_FRSP_2; - - when DO_FRSP_2 => - -- r.shift = 0 - -- set shift to exponent - -126 (for ROUND_UFLOW state) - rs_sel1 <= RSH1_B; - rs_con2 <= RSCON2_MINEXP; - rs_neg2 <= '1'; - set_x := '1'; -- uses r.r and r.shift - if r.b.exponent < to_signed(-126, EXP_BITS) then - v.state := ROUND_UFLOW; - elsif r.b.exponent > to_signed(127, EXP_BITS) then - v.state := ROUND_OFLOW; - else - v.state := ROUNDING; - end if; + v.state := FINISH; when DO_FCTI => -- instr bit 9: 1=dword 0=word @@ -1611,6 +1645,7 @@ begin re_set_result <= '1'; rs_sel1 <= RSH1_B; rs_neg2 <= '1'; + v.single_prec := not r.insn(9); if r.b.exponent >= to_signed(64, EXP_BITS) or (r.insn(9) = '0' and r.b.exponent >= to_signed(32, EXP_BITS)) then @@ -1640,6 +1675,8 @@ begin rcls_op <= RCLS_SEL; re_con2 <= RECON2_UNIT; re_set_result <= '1'; + v.fpscr(FPSCR_FR) := '0'; + v.fpscr(FPSCR_FI) := '0'; if r.b.class = ZERO then arith_done := '1'; else @@ -1657,15 +1694,13 @@ begin rs_sel1 <= RSH1_B; rs_neg1 <= '1'; rs_sel2 <= RSH2_A; - v.add_bsmall := '0'; - if r.a.exponent = r.b.exponent then + if r.add_bsmall = '1' then + v.state := ADD_1; + elsif r.a.exponent = r.b.exponent then v.state := ADD_2B; - elsif r.a.exponent < r.b.exponent then + elsif v.add_bsmall = '0' then v.longmask := '0'; v.state := ADD_SHIFT; - else - v.add_bsmall := '1'; - v.state := ADD_1; end if; when DO_FMUL => @@ -1705,7 +1740,8 @@ begin misc_sel <= "111"; set_r := '1'; re_set_result <= '1'; - arith_done := '1'; + v.writing_fpr := '1'; + v.instr_done := '1'; when DO_FSQRT => opsel_a <= AIN_B; @@ -1737,14 +1773,16 @@ begin re_set_result <= '1'; -- put b.exp into shift rs_sel1 <= RSH1_B; - if (r.a.exponent + r.c.exponent + 1) < r.b.exponent then - -- addend is bigger, do multiply first + if (r.a.exponent + r.c.exponent + 2) < r.b.exponent then + -- addend is definitely bigger, do multiply first -- if subtracting, sign is opposite to initial estimate f_to_multiply.valid <= '1'; v.first := '1'; v.state := FMADD_0; else - -- product is bigger, shift B first + -- product may be bigger, or the answer might be + -- close to 0; shift B first so the multiplier does + -- the add/subtract operation. v.state := FMADD_1; end if; @@ -1791,7 +1829,7 @@ begin if r.c.denorm = '1' then -- must be either fmul or fmadd/sub v.state := RENORM_C; - elsif r.b.denorm = '1' and r.is_addition = '0' then + elsif r.b.denorm = '1' and r.do_renorm_b = '1' then v.state := RENORM_B; elsif r.is_multiply = '1' and r.b.class = ZERO then v.state := DO_FMUL; @@ -1807,6 +1845,7 @@ begin re_sel2 <= REXP2_B; re_set_result <= '1'; -- set shift to b.exp - a.exp + -- (N.B., shift can be 0 if B is denorm and A's exp is -1022) rs_sel1 <= RSH1_B; rs_sel2 <= RSH2_A; rs_neg2 <= '1'; @@ -1821,6 +1860,7 @@ begin re_set_result <= '1'; v.x := s_nz; set_x := '1'; + set_s := '1'; v.longmask := r.single_prec; if r.add_bsmall = '1' then v.state := ADD_2; @@ -1859,25 +1899,14 @@ begin -- result is opposite sign to expected rsgn_op := RSGN_INV; set_r := '1'; - v.state := FINISH; elsif r.r(UNIT_BIT + 1) = '1' then -- sum overflowed, shift right opsel_r <= RES_SHIFT; set_r := '1'; re_set_result <= '1'; set_x := '1'; - if exp_huge = '1' then - v.state := ROUND_OFLOW; - else - v.state := ROUNDING; - end if; - elsif r.r(UNIT_BIT) = '1' then - set_x := '1'; - v.state := ROUNDING; - else - rs_norm <= '1'; - v.state := NORMALIZE; end if; + v.state := FINISH; when CMP_1 => opsel_a <= AIN_A; @@ -1892,9 +1921,10 @@ begin v.instr_done := '1'; when MULT_1 => - f_to_multiply.valid <= r.first; opsel_r <= RES_MULT; set_r := '1'; + opsel_s <= S_MULT; + set_s := '1'; if multiply_to_f.valid = '1' then v.state := FINISH; end if; @@ -1920,8 +1950,8 @@ begin end if; when FMADD_1 => - -- shift is b.exp, so new_exp is a.exp + c.exp - b.exp - -- product is bigger here + -- shift is b.exp, so new_exp is a.exp + c.exp - b.exp (>= -2) + -- product may bigger here -- shift B right and use it as the addend to the multiplier -- for subtract, multiplier does B - A * C re_sel2 <= REXP2_B; @@ -1935,8 +1965,10 @@ begin when FMADD_2 => -- Product is potentially bigger here -- r.shift = addend exp - product exp + 64, r.r = r.b.mantissa + -- R contains B, S contains 0 set_s := '1'; opsel_s <= S_SHIFT; + set_x := '1'; -- set shift to r.shift - 64 rs_sel1 <= RSH1_S; rs_con2 <= RSCON2_64; @@ -1979,25 +2011,18 @@ begin v.state := FMADD_6; when FMADD_6 => - -- r.shift = UNIT_BIT (or 0, but only if r is now nonzero) + -- r.shift = UNIT_BIT set_r := '0'; opsel_r <= RES_SHIFT; re_sel2 <= REXP2_NE; - rs_norm <= '1'; - rcls_op <= RCLS_TZERO; if (r.r(UNIT_BIT + 2) or r_hi_nz or r_lo_nz or (or (r.r(DP_LSB - 1 downto 0)))) = '0' then - -- S = 0 case is handled by RCLS_TZERO logic, otherwise... - -- R is all zeroes but there are non-zero bits in S + -- R is all zeroes but there may be non-zero bits in S -- so shift them into R and set S to 0 set_r := '1'; re_set_result <= '1'; set_s := '1'; - v.state := FINISH; - elsif r.r(UNIT_BIT + 2 downto UNIT_BIT) = "001" then - v.state := FINISH; - else - v.state := NORMALIZE; end if; + v.state := FINISH; when DIV_2 => -- compute Y = inverse_table[B] (when count=0); P = 2 - B * Y @@ -2248,29 +2273,29 @@ begin when SQRT_11 => -- compute P = A - R * R (remainder) - -- also put 2 * R + 1 into B for comparison with P + -- also put 2 * R + 1 into C for comparison with P msel_1 <= MUL1_R; msel_2 <= MUL2_R; msel_add <= MULADD_A; msel_inv <= '1'; f_to_multiply.valid <= r.first; shiftin := '1'; - set_b := r.first; + set_c := r.first; if multiply_to_f.valid = '1' then v.state := SQRT_12; end if; when SQRT_12 => - -- test if remainder is 0 or >= B = 2*R + 1 + -- test if remainder is 0 or >= C = 2*R + 1 set_r := '0'; opsel_c <= CIN_INC; - if pcmpb_lt = '1' then + if pcmpc_lt = '1' then -- square root is correct, set X if remainder non-zero v.x := r.p(UNIT_BIT + 2) or px_nz; else -- square root needs to be incremented by 1 set_r := '1'; - v.x := not pcmpb_eq; + v.x := not pcmpc_eq; end if; v.state := FINISH; @@ -2318,10 +2343,13 @@ begin -- Check for possible overflows case r.insn(9 downto 8) is when "00" => -- fctiw[z] + -- check bit 32 in case of rounding overflow need_check := r.r(31) or (r.r(30) and not r.result_sign); when "01" => -- fctiwu[z] - need_check := r.r(31); + -- check bit 32 in case of rounding overflow + need_check := r.r(31) or r.r(31); when "10" => -- fctid[z] + -- can't get rounding overflow for 64-bit conversion need_check := r.r(63) or (r.r(62) and not r.result_sign); when others => -- fctidu[z] need_check := r.r(63); @@ -2341,26 +2369,23 @@ begin else msb := r.r(63); end if; - opsel_r <= RES_MISC; - misc_sel <= "110"; if (r.insn(8) = '0' and msb /= r.result_sign) or - (r.insn(8) = '1' and msb /= '1') then - set_r := '1'; - v.fpscr(FPSCR_VXCVI) := '1'; - invalid := '1'; + (r.insn(8) = '1' and msb /= '1') or + (r.insn(9) = '0' and r.r(32) /= r.result_sign) then + v.state := INT_OFLOW; else - set_r := '0'; if r.fpscr(FPSCR_FI) = '1' then v.fpscr(FPSCR_XX) := '1'; end if; + arith_done := '1'; end if; - arith_done := '1'; when INT_OFLOW => opsel_r <= RES_MISC; misc_sel <= "110"; set_r := '1'; v.fpscr(FPSCR_VXCVI) := '1'; + v.fpscr(FPSCR_FR downto FPSCR_FI) := "00"; invalid := '1'; arith_done := '1'; @@ -2374,22 +2399,24 @@ begin v.state := ROUNDING; when FINISH => - if r.is_multiply = '1' and px_nz = '1' then - v.x := '1'; - end if; + -- r.shift = 0 -- set shift to new_exp - min_exp (N.B. rs_norm overrides this) + -- assert that if r.r = 0 then r.x = 0 also rs_sel1 <= RSH1_NE; rs_con2 <= RSCON2_MINEXP; rs_neg2 <= '1'; + rcls_op <= RCLS_TZERO; if r.r(63 downto UNIT_BIT) /= std_ulogic_vector(to_unsigned(1, 64 - UNIT_BIT)) then rs_norm <= '1'; v.state := NORMALIZE; else set_x := '1'; - if exp_tiny = '1' then - v.state := ROUND_UFLOW; - elsif exp_huge = '1' then - v.state := ROUND_OFLOW; + set_xs := r.is_multiply; + v.tiny := exp_tiny; + if exp_tiny = '1' and r.fpscr(FPSCR_UE) = '0' then + v.state := ROUND_UFLOW_DIS; + elsif exp_huge = '1' and r.fpscr(FPSCR_OE) = '0' then + v.state := ROUND_OFLOW_DIS; else v.state := ROUNDING; end if; @@ -2407,146 +2434,156 @@ begin rs_con2 <= RSCON2_MINEXP; rs_neg2 <= '1'; set_x := '1'; - if exp_tiny = '1' then - v.state := ROUND_UFLOW; - elsif exp_huge = '1' then - v.state := ROUND_OFLOW; + set_xs := r.is_multiply; + v.tiny := exp_tiny; + if exp_tiny = '1' and r.fpscr(FPSCR_UE) = '0' then + v.state := ROUND_UFLOW_DIS; + elsif exp_huge = '1' and r.fpscr(FPSCR_OE) = '0' then + v.state := ROUND_OFLOW_DIS; else v.state := ROUNDING; end if; - when ROUND_UFLOW => + when ROUND_UFLOW_DIS => -- r.shift = - amount by which exponent underflows - v.tiny := '1'; + -- disabled underflow exception case + -- have to denormalize before rounding opsel_r <= RES_SHIFT; set_r := '0'; - if r.fpscr(FPSCR_UE) = '0' then - -- disabled underflow exception case - -- have to denormalize before rounding - set_r := '1'; - re_sel2 <= REXP2_NE; - re_set_result <= '1'; - set_x := '1'; - v.state := ROUNDING; - else - -- enabled underflow exception case - -- if denormalized, have to normalize before rounding - v.fpscr(FPSCR_UX) := '1'; - re_sel1 <= REXP1_R; - re_con2 <= RECON2_BIAS; - re_set_result <= '1'; - if r.r(UNIT_BIT) = '0' then - rs_norm <= '1'; - v.state := NORMALIZE; - else - v.state := ROUNDING; - end if; - end if; + set_r := '1'; + re_sel2 <= REXP2_NE; + re_set_result <= '1'; + set_x := '1'; + v.state := ROUNDING; - when ROUND_OFLOW => + when ROUND_OFLOW_DIS => + -- disabled overflow exception + -- result depends on rounding mode rcls_op <= RCLS_TINF; v.fpscr(FPSCR_OX) := '1'; opsel_r <= RES_MISC; misc_sel <= "010"; - set_r := '0'; - if r.fpscr(FPSCR_OE) = '0' then - -- disabled overflow exception - -- result depends on rounding mode - set_r := '1'; - v.fpscr(FPSCR_XX) := '1'; - v.fpscr(FPSCR_FI) := '1'; - -- construct largest representable number - re_con2 <= RECON2_MAX; - re_set_result <= '1'; - arith_done := '1'; - else - -- enabled overflow exception - re_sel1 <= REXP1_R; - re_con2 <= RECON2_BIAS; - re_neg2 <= '1'; - re_set_result <= '1'; - v.state := ROUNDING; - end if; + set_r := '1'; + v.fpscr(FPSCR_XX) := '1'; + v.fpscr(FPSCR_FI) := '1'; + -- construct largest representable number + re_con2 <= RECON2_MAX; + re_set_result <= '1'; + arith_done := '1'; when ROUNDING => + -- r.r can be zero or denorm here for fri* instructions, + -- and for disabled underflow exception cases. opsel_mask <= '1'; set_r := '1'; round := fp_rounding(r.r, r.x, r.single_prec, r.round_mode, r.result_sign); - v.fpscr(FPSCR_FR downto FPSCR_FI) := round; + if r.zero_fri = '0' then + v.fpscr(FPSCR_FR downto FPSCR_FI) := round; + else + v.fpscr(FPSCR_FR downto FPSCR_FI) := "00"; -- for fri* instructions + end if; if round(1) = '1' then -- increment the LSB for the precision v.state := ROUND_INC; elsif r.r(UNIT_BIT) = '0' then - -- result after masking could be zero, or could be a - -- denormalized result that needs to be renormalized - rs_norm <= '1'; + -- Result after masking could be zero, or could be a + -- denormalized result that needs to be renormalized, + -- but only for fri* instructions and for disabled + -- underflow exception cases. + -- For fri* instructions, result_exp is 52. + -- For disabled underflow exception cases for DP operations, + -- result_exp is -1022 and there is no point renormalizing + -- since it will just get denormalized again, but we do need + -- to check for a zero result in a subsequent cycle + -- after R is masked. + if r.result_exp > to_signed(-1022, EXP_BITS) then + rs_norm <= '1'; + end if; v.state := ROUNDING_3; + elsif r.tiny = '1' and r.fpscr(FPSCR_UE) = '1' then + v.state := ROUND_UFLOW_EN; + elsif r.result_exp > max_exp then + v.state := ROUND_OFLOW_EN; else arith_done := '1'; end if; - if round(0) = '1' then + if round(0) = '1' and r.zero_fri = '0' then v.fpscr(FPSCR_XX) := '1'; - if r.tiny = '1' then - v.fpscr(FPSCR_UX) := '1'; - end if; + end if; + if round(0) = '1' and r.tiny = '1' then + v.fpscr(FPSCR_UX) := '1'; end if; when ROUND_INC => set_r := '1'; opsel_a <= AIN_RND; - -- set shift to -1 - rs_con2 <= RSCON2_1; - rs_neg2 <= '1'; v.state := ROUNDING_2; when ROUNDING_2 => -- Check for overflow during rounding - -- r.shift = -1 - v.x := '0'; - re_sel2 <= REXP2_NE; - opsel_r <= RES_SHIFT; - set_r := '0'; + -- r.shift = 0 if r.r(UNIT_BIT + 1) = '1' then - set_r := '1'; - re_set_result <= '1'; - if exp_huge = '1' then - v.state := ROUND_OFLOW; - else - arith_done := '1'; - end if; - elsif r.r(UNIT_BIT) = '0' then -- Do CLZ so we can renormalize the result rs_norm <= '1'; v.state := ROUNDING_3; + elsif r.r(UNIT_BIT) = '0' then + -- R is non-zero (we just incremented it) + -- If result_exp is -1022 here, don't normalize since + -- we would then need to denormalize again. + if r.result_exp > to_signed(-1022, EXP_BITS) then + rs_norm <= '1'; + end if; + v.state := ROUNDING_3; + elsif exp_huge = '1' then + v.state := ROUND_OFLOW_EN; + elsif r.tiny = '1' and r.fpscr(FPSCR_UE) = '1' then + v.state := ROUND_UFLOW_EN; else arith_done := '1'; end if; when ROUNDING_3 => - -- r.shift = clz(r.r) - 9 + -- r.shift = clz(r.r) - 7 (or 0, or -7, if r.r is 0) + -- Note clz may be done on the value before being masked + -- to the result precision. opsel_r <= RES_SHIFT; set_r := '1'; re_sel2 <= REXP2_NE; - -- set shift to new_exp - min_exp (== -1022) + -- set shift to new_exp - DP min_exp (== -1022) rs_sel1 <= RSH1_NE; - rs_con2 <= RSCON2_MINEXP; + rs_con2 <= RSCON2_DPMINX; rs_neg2 <= '1'; rcls_op <= RCLS_TZERO; -- If the result is zero, that's handled below. -- Renormalize result after rounding - re_set_result <= '1'; v.denorm := exp_tiny; - if new_exp < to_signed(-1022, EXP_BITS) then - v.state := DENORM; + re_set_result <= '1'; + if exp_huge = '1' and r.fpscr(FPSCR_OE) = '0' then + v.state := ROUND_OFLOW_DIS; + elsif exp_huge = '1' and r.fpscr(FPSCR_OE) = '1' then + v.state := ROUND_OFLOW_EN; + elsif r.tiny = '1' and r.fpscr(FPSCR_UE) = '1' then + v.state := ROUND_UFLOW_EN; else arith_done := '1'; end if; - when DENORM => - -- r.shift = result_exp - -1022 - opsel_r <= RES_SHIFT; - set_r := '1'; - re_sel2 <= REXP2_NE; + when ROUND_OFLOW_EN => + -- enabled overflow exception + -- rounding and normalization has been done + v.fpscr(FPSCR_OX) := '1'; + re_sel1 <= REXP1_R; + re_con2 <= RECON2_BIAS; + re_neg2 <= '1'; + re_set_result <= '1'; + arith_done := '1'; + + when ROUND_UFLOW_EN => + -- enabled underflow exception + -- rounding and normalization has been done + v.fpscr(FPSCR_UX) := '1'; + re_sel1 <= REXP1_R; + re_con2 <= RECON2_BIAS; re_set_result <= '1'; arith_done := '1'; @@ -3077,13 +3114,16 @@ begin -- Handle exceptions and special cases for arithmetic operations if r.cycle_1_ar = '1' then v.fpscr := r.fpscr or scinfo.new_fpscr; + v.fpscr(FPSCR_FR) := '0'; + v.fpscr(FPSCR_FI) := '0'; + v.result_class := FINITE; invalid := scinfo.invalid; zero_divide := scinfo.zero_divide; qnan_result := scinfo.qnan_result; if scinfo.immed_result = '1' then -- state machine is in the DO_SPECIAL or DO_FSQRT state here - arith_done := '1'; set_r := '1'; + v.is_multiply := '0'; -- P is not valid opsel_r <= RES_MISC; opsel_sel <= scinfo.result_sel; if scinfo.qnan_result = '1' then @@ -3092,8 +3132,15 @@ begin else misc_sel <= "110"; end if; + arith_done := '1'; else misc_sel <= "111"; + if scinfo.need_finish = '1' then + -- we have to do rounding or underflow exception processing on the result + v.state := FINISH; + else + arith_done := '1'; + end if; end if; rsgn_op := scinfo.rsgn_op; v.result_class := scinfo.result_class; @@ -3140,19 +3187,17 @@ begin when others => end case; when RCLS_TZERO => - if or (r.r(UNIT_BIT + 2 downto 0)) = '0' and s_nz = '0' then + if or (r.r) = '0' then v.result_class := ZERO; arith_done := '1'; end if; when RCLS_TINF => - if r.fpscr(FPSCR_OE) = '0' then - if r.round_mode(1 downto 0) = "00" or - (r.round_mode(1) = '1' and r.round_mode(0) = r.result_sign) then - v.result_class := INFINITY; - v.fpscr(FPSCR_FR) := '1'; - else - v.fpscr(FPSCR_FR) := '0'; - end if; + if r.round_mode(1 downto 0) = "00" or + (r.round_mode(1) = '1' and r.round_mode(0) = r.result_sign) then + v.result_class := INFINITY; + v.fpscr(FPSCR_FR) := '1'; + else + v.fpscr(FPSCR_FR) := '0'; end if; when others => end case; @@ -3171,7 +3216,7 @@ begin v.writing_fpr := '1'; v.update_fprf := '1'; end if; - if r.is_subtract = '1' and v.result_class = ZERO then + if r.is_subtract = '1' and v.result_class = ZERO and v.fpscr(FPSCR_FI) = '0' then rsign := r.round_mode(0) and r.round_mode(1); end if; if r.negate = '1' and v.result_class /= NAN then @@ -3242,7 +3287,7 @@ begin -- If shifting right, test if bits of R will be shifted out of significance if r.longmask = '1' then - mshift := to_signed(28, EXP_BITS); + mshift := to_signed(SP_RBIT - 1, EXP_BITS); else mshift := to_signed(-1, EXP_BITS); end if; @@ -3258,7 +3303,17 @@ begin if mshift >= to_signed(64, EXP_BITS) then mshift := to_signed(63, EXP_BITS); end if; - v.x := v.x or r.r(to_integer(unsigned(mshift(5 downto 0)))); + v.x := v.x or rormr(to_integer(unsigned(mshift(5 downto 0)))); + end if; + -- Test if there are non-zero bits in S which won't get shifted into R + if set_xs = '1' and not is_X(r.shift) and r.shift < to_signed(56, EXP_BITS) then + if r.shift > to_signed(0, EXP_BITS) then + mshift := to_signed(55, EXP_BITS) - r.shift; + else + mshift := to_signed(55, EXP_BITS); + end if; + sorms := r.s or std_ulogic_vector(- signed(r.s)); + v.x := v.x or sorms(to_integer(unsigned(mshift(5 downto 0)))); end if; asign := '0'; case opsel_a is @@ -3284,6 +3339,8 @@ begin ci := '0'; case opsel_c is when CIN_SUBEXT => + -- Used with opsel_b = BIN_ADDSUBR, which will invert it if + -- r.subtract = 1, hence we use r.x here, rather than not r.x. ci := r.is_subtract and r.x; when CIN_ABSEXT => ci := r.r(63) and (s_nz or r.x); @@ -3537,6 +3594,8 @@ begin rsh_in2 := to_signed(64, EXP_BITS); when RSCON2_MINEXP => rsh_in2 := min_exp; + when RSCON2_DPMINX => + rsh_in2 := to_signed(-1022, EXP_BITS); when others => rsh_in2 := to_signed(0, EXP_BITS); end case; @@ -3654,7 +3713,7 @@ begin end if; if r.update_fprf = '1' then - v.fpscr(FPSCR_C downto FPSCR_FU) := result_flags(r.res_sign, r.result_class, + v.fpscr(FPSCR_C downto FPSCR_FU) := result_flags(r.res_sign, r.result_class, r.int_result, r.r(UNIT_BIT) and not r.denorm); end if; @@ -3663,10 +3722,15 @@ begin v.fpscr(FPSCR_FEX) := or (v.fpscr(FPSCR_VX downto FPSCR_XX) and v.fpscr(FPSCR_VE downto FPSCR_XE)); if update_fx = '1' and - (v.fpscr(FPSCR_VX downto FPSCR_XX) and not r.old_exc) /= "00000" then + ((v.fpscr(FPSCR_OX downto FPSCR_VXVC) & v.fpscr(FPSCR_VXSOFT downto FPSCR_VXCVI)) and + not r.old_exc) /= 13x"0" then v.fpscr(FPSCR_FX) := '1'; end if; + if r.complete = '1' or r.do_intr = '1' then + v.comm_fpscr := v.fpscr; + end if; + if v.instr_done = '1' then if r.state /= IDLE then v.state := IDLE; @@ -3675,7 +3739,8 @@ begin if r.fp_rc = '1' then v.cr_result := v.fpscr(FPSCR_FX downto FPSCR_OX); end if; - v.sp_result := r.single_prec; + -- set sp_result for fctiw* + v.sp_result := r.single_prec and not r.integer_op; v.res_int := r.int_result or r.integer_op; v.illegal := illegal; v.nsnan_result := r.quieten_nan; @@ -3709,11 +3774,17 @@ begin -- This mustn't depend on any fields of r that are modified in IDLE state. if r.res_int = '1' then fp_result <= r.r; + if r.sp_result = '1' then + fp_result(63 downto 32) <= r.r(31 downto 0); + end if; else fp_result <= pack_dp(r.res_sign, r.result_class, r.result_exp, r.r, r.sp_result, r.nsnan_result); end if; + -- Make sure the reserved bit 11 (52) of FPSCR can never be set + v.fpscr(11) := '0'; + rin <= v; end process; diff --git a/tests/fpu/fpu.c b/tests/fpu/fpu.c index 5e45038..a123f62 100644 --- a/tests/fpu/fpu.c +++ b/tests/fpu/fpu.c @@ -21,6 +21,8 @@ #define FPS_VE 0x80 #define FPS_VXCVI 0x100 #define FPS_VXSOFT 0x400 +#define FPS_FI 0x20000 +#define FPS_FR 0x40000 extern int trapit(long arg, int (*func)(long)); extern void do_rfid(unsigned long msr); @@ -272,6 +274,7 @@ void set_fpscr(unsigned long fpscr) unsigned long fpscr_eval(unsigned long val) { val &= ~0x60000000; /* clear FEX and VX */ + val &= ~0x00000800; /* clear reserved bit 52 (BE) */ if (val & 0x1f80700) /* test all VX* bits */ val |= 0x20000000; if ((val >> 25) & (val >> 3) & 0x1f) @@ -348,15 +351,15 @@ int test4(long arg) fpscr = fpscr_eval((fpscr & 0x0fffffff) | 0x70000000); if (get_fpscr() != fpscr) return 16 * i + 27; - asm("mtfsb0 21"); + asm("mtfsb0 21"); /* VXSOFT */ fpscr = fpscr_eval(fpscr & ~(1 << (31-21))); if (get_fpscr() != fpscr) return 16 * i + 28; asm("mtfsb1 21"); - fpscr = fpscr_eval(fpscr | (1 << (31-21))); + fpscr = fpscr_eval(fpscr | (1 << (31-21)) | (1ul << 31)); if (get_fpscr() != fpscr) return 16 * i + 29; - asm("mtfsb0 24"); + asm("mtfsb0 24"); /* OE */ fpscr = fpscr_eval(fpscr & ~(1 << (31-24))); if (get_fpscr() != fpscr) return 16 * i + 30; @@ -653,29 +656,35 @@ struct roundvals { unsigned long fpscr; unsigned long dpval; unsigned long spval; + unsigned long fpscr_fir; } roundvals[] = { - { FPS_RN_NEAR, 0, 0 }, - { FPS_RN_CEIL, 0x8000000000000000, 0x8000000000000000 }, - { FPS_RN_NEAR, 0x402123456789abcd, 0x4021234560000000 }, - { FPS_RN_ZERO, 0x402123456789abcd, 0x4021234560000000 }, - { FPS_RN_CEIL, 0x402123456789abcd, 0x4021234580000000 }, - { FPS_RN_FLOOR, 0x402123456789abcd, 0x4021234560000000 }, - { FPS_RN_NEAR, 0x402123457689abcd, 0x4021234580000000 }, - { FPS_RN_ZERO, 0x402123457689abcd, 0x4021234560000000 }, - { FPS_RN_CEIL, 0x402123457689abcd, 0x4021234580000000 }, - { FPS_RN_FLOOR, 0x402123457689abcd, 0x4021234560000000 }, - { FPS_RN_NEAR, 0x4021234570000000, 0x4021234580000000 }, - { FPS_RN_NEAR, 0x4021234550000000, 0x4021234540000000 }, - { FPS_RN_NEAR, 0x7ff123456789abcd, 0x7ff9234560000000 }, - { FPS_RN_ZERO, 0x7ffa3456789abcde, 0x7ffa345660000000 }, - { FPS_RN_FLOOR, 0x7ff0000000000000, 0x7ff0000000000000 }, - { FPS_RN_NEAR, 0x47e1234550000000, 0x47e1234540000000 }, - { FPS_RN_NEAR, 0x47f1234550000000, 0x7ff0000000000000 }, - { FPS_RN_ZERO, 0x47f1234550000000, 0x47efffffe0000000 }, - { FPS_RN_CEIL, 0x47f1234550000000, 0x7ff0000000000000 }, - { FPS_RN_FLOOR, 0x47f1234550000000, 0x47efffffe0000000 }, - { FPS_RN_NEAR, 0x38012345b0000000, 0x38012345c0000000 }, - { FPS_RN_NEAR, 0x37c12345b0000000, 0x37c1234400000000 }, + { FPS_RN_NEAR|FPS_FI|FPS_FR, 0, 0, 0 }, + { FPS_RN_CEIL|FPS_FI|FPS_FR, 0x8000000000000000, 0x8000000000000000, 0 }, + { FPS_RN_NEAR|FPS_FR, 0x402123456789abcd, 0x4021234560000000, FPS_FI }, + { FPS_RN_ZERO|FPS_FR, 0x402123456789abcd, 0x4021234560000000, FPS_FI }, + { FPS_RN_CEIL, 0x402123456789abcd, 0x4021234580000000, FPS_FR|FPS_FI }, + { FPS_RN_FLOOR, 0x402123456789abcd, 0x4021234560000000, FPS_FI }, + { FPS_RN_NEAR, 0x402123457689abcd, 0x4021234580000000, FPS_FR|FPS_FI }, + { FPS_RN_ZERO|FPS_FR|FPS_FI, 0x402123457689abcd, 0x4021234560000000, FPS_FI }, + { FPS_RN_CEIL|FPS_FR, 0x402123457689abcd, 0x4021234580000000, FPS_FR|FPS_FI }, + { FPS_RN_FLOOR, 0x402123457689abcd, 0x4021234560000000, FPS_FI }, + { FPS_RN_NEAR, 0x4021234570000000, 0x4021234580000000, FPS_FR|FPS_FI }, + { FPS_RN_NEAR, 0x4021234550000000, 0x4021234540000000, FPS_FI }, + { FPS_RN_NEAR|FPS_FR|FPS_FI, 0x7ff123456789abcd, 0x7ff9234560000000, 0 }, + { FPS_RN_ZERO|FPS_FR, 0x7ffa3456789abcde, 0x7ffa345660000000, 0 }, + { FPS_RN_FLOOR|FPS_FR|FPS_FI, 0x7ff0000000000000, 0x7ff0000000000000, 0 }, + { FPS_RN_NEAR, 0x47e1234550000000, 0x47e1234540000000, FPS_FI }, + { FPS_RN_NEAR, 0x47f1234550000000, 0x7ff0000000000000, FPS_FR|FPS_FI }, + { FPS_RN_ZERO, 0x47f1234550000000, 0x47efffffe0000000, FPS_FI }, + { FPS_RN_CEIL, 0x47f1234550000000, 0x7ff0000000000000, FPS_FR|FPS_FI }, + { FPS_RN_FLOOR, 0x47f1234550000000, 0x47efffffe0000000, FPS_FI }, + { FPS_RN_NEAR, 0x38012345b0000000, 0x38012345c0000000, FPS_FR|FPS_FI }, + { FPS_RN_NEAR, 0x37c12345b0000000, 0x37c1234400000000, FPS_FI }, + { FPS_RN_NEAR, 0x0000008800000088, 0, FPS_FI }, + { FPS_RN_NEAR, 0xc2000000c2000000, 0xc2000000c0000000, FPS_FI }, + { FPS_RN_NEAR|FPS_OE, 0xefffffffffffffff, 0xe400000000000000, FPS_FR|FPS_FI }, + { FPS_RN_NEAR|FPS_OE, 0xff0000ff43434343, 0xf30000ff40000000, FPS_FI }, + { FPS_RN_NEAR|FPS_OE, 0xfc00fc0139fffcff, 0xf000fc0140000000, FPS_FR|FPS_FI }, }; int test8(long arg) @@ -696,6 +705,13 @@ int test8(long arg) } if (check_fprf(result, true, fpscr)) return i + 0x101; + if ((fpscr & (FPS_FR|FPS_FI)) != roundvals[i].fpscr_fir) { + print_string("\r\n"); + print_hex(i, 4, " "); + print_hex(fpscr, 8, " "); + print_hex(roundvals[i].fpscr_fir, 8, " "); + return i + 0x201; + } } return 0; } @@ -740,6 +756,8 @@ struct cvtivals { { 0x7ff923456789abcd, 0x8000000000000000, 0, 0x80000000, 0, { 1, 1, 1, 1 } }, { 0xfff923456789abcd, 0x8000000000000000, 0, 0x80000000, 0, { 1, 1, 1, 1 } }, { 0xbfd123456789abcd, 0, 0, 0, 0, {0, 0, 0, 0} }, + { 0x41effffffff00081, 0x100000000, 0x100000000, 0x7fffffff, 0xffffffff, { 0, 0, 1, 1 } }, + { 0xc1e0000000000000, 0xffffffff80000000, 0x0000000000000000, 0x80000000, 0x00000000, { 0, 1, 0, 1 } }, }; #define GET_VXCVI() ((get_fpscr() >> 8) & 1) @@ -814,6 +832,7 @@ struct cvtivals cvtizvals[] = { { 0xfff0000000000000, 0x8000000000000000, 0, 0x80000000, 0, { 1, 1, 1, 1 } }, { 0x7ff923456789abcd, 0x8000000000000000, 0, 0x80000000, 0, { 1, 1, 1, 1 } }, { 0xfff923456789abcd, 0x8000000000000000, 0, 0x80000000, 0, { 1, 1, 1, 1 } }, + { 0xc1e0000000000000, 0xffffffff80000000, 0x0000000000000000, 0x80000000, 0x00000000, { 0, 1, 0, 1 } }, }; int test10(long arg) @@ -959,51 +978,53 @@ struct addvals { unsigned long val_b; unsigned long sum; unsigned long diff; + unsigned long fpscr; } addvals[] = { - { 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }, - { 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x0000000000000000 }, - { 0x3fdfffffffffffff, 0x0000000000000000, 0x3fdfffffffffffff, 0x3fdfffffffffffff }, - { 0x3ff0000000000000, 0x3ff0000000000000, 0x4000000000000000, 0x0000000000000000 }, - { 0xbff0000000000000, 0xbff0000000000000, 0xc000000000000000, 0x0000000000000000 }, - { 0x402123456789abcd, 0x4021000000000000, 0x403111a2b3c4d5e6, 0x3fb1a2b3c4d5e680 }, - { 0x4061200000000000, 0x406123456789abcd, 0x407121a2b3c4d5e6, 0xbfba2b3c4d5e6800 }, - { 0x4061230000000000, 0x3fa4560000000000, 0x4061244560000000, 0x406121baa0000000 }, - { 0xc061230000000000, 0x3fa4560000000000, 0xc06121baa0000000, 0xc061244560000000 }, - { 0x4061230000000000, 0xbfa4560000000000, 0x406121baa0000000, 0x4061244560000000 }, - { 0xc061230000000000, 0xbfa4560000000000, 0xc061244560000000, 0xc06121baa0000000 }, - { 0x3fa1230000000000, 0x4064560000000000, 0x4064571230000000, 0xc06454edd0000000 }, - { 0xbfa1230000000000, 0x4064560000000000, 0x406454edd0000000, 0xc064571230000000 }, - { 0x3fa1230000000000, 0xc064560000000000, 0xc06454edd0000000, 0x4064571230000000 }, - { 0xbfa1230000000000, 0xc064560000000000, 0xc064571230000000, 0x406454edd0000000 }, - { 0x6780000000000001, 0x6470000000000000, 0x6780000000000009, 0x677ffffffffffff2 }, - { 0x6780000000000001, 0x6460000000000000, 0x6780000000000005, 0x677ffffffffffffa }, - { 0x6780000000000001, 0x6450000000000000, 0x6780000000000003, 0x677ffffffffffffe }, - { 0x6780000000000001, 0x6440000000000000, 0x6780000000000002, 0x6780000000000000 }, - { 0x7ff8888888888888, 0x7ff9999999999999, 0x7ff8888888888888, 0x7ff8888888888888 }, - { 0xfff8888888888888, 0x7ff9999999999999, 0xfff8888888888888, 0xfff8888888888888 }, - { 0x7ff8888888888888, 0x7ff0000000000000, 0x7ff8888888888888, 0x7ff8888888888888 }, - { 0x7ff8888888888888, 0x0000000000000000, 0x7ff8888888888888, 0x7ff8888888888888 }, - { 0x7ff8888888888888, 0x0001111111111111, 0x7ff8888888888888, 0x7ff8888888888888 }, - { 0x7ff8888888888888, 0x3ff0000000000000, 0x7ff8888888888888, 0x7ff8888888888888 }, - { 0x7ff0000000000000, 0x7ff9999999999999, 0x7ff9999999999999, 0x7ff9999999999999 }, - { 0x7ff0000000000000, 0x7ff0000000000000, 0x7ff0000000000000, 0x7ff8000000000000 }, - { 0x7ff0000000000000, 0xfff0000000000000, 0x7ff8000000000000, 0x7ff0000000000000 }, - { 0x7ff0000000000000, 0x0000000000000000, 0x7ff0000000000000, 0x7ff0000000000000 }, - { 0x7ff0000000000000, 0x8000000000000000, 0x7ff0000000000000, 0x7ff0000000000000 }, - { 0x7ff0000000000000, 0x8002222222222222, 0x7ff0000000000000, 0x7ff0000000000000 }, - { 0x7ff0000000000000, 0xc002222222222222, 0x7ff0000000000000, 0x7ff0000000000000 }, - { 0x0000000000000000, 0x7ff9999999999999, 0x7ff9999999999999, 0x7ff9999999999999 }, - { 0x0000000000000000, 0x7ff0000000000000, 0x7ff0000000000000, 0xfff0000000000000 }, - { 0x8000000000000000, 0x7ff0000000000000, 0x7ff0000000000000, 0xfff0000000000000 }, - { 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }, - { 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }, - { 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x0000000000000000 }, - { 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x0000000000000000 }, - { 0x8002222222222222, 0x0001111111111111, 0x8001111111111111, 0x8003333333333333 }, - { 0x0000022222222222, 0x0000111111111111, 0x0000133333333333, 0x80000eeeeeeeeeef }, - { 0x401ffffffbfffefe, 0x406b8265196bd89e, 0x406c8265194bd896, 0xc06a8265198bd8a6 }, - { 0x4030020000000004, 0xbf110001ffffffff, 0x403001fbbfff8004, 0x4030020440008004 }, - { 0x3fdfffffffffffff, 0x3fe0000000000000, 0x3ff0000000000000, 0xbc90000000000000 }, + { 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, FPS_RN_NEAR }, + { 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x0000000000000000, FPS_RN_NEAR }, + { 0x3fdfffffffffffff, 0x0000000000000000, 0x3fdfffffffffffff, 0x3fdfffffffffffff, FPS_RN_NEAR }, + { 0x3ff0000000000000, 0x3ff0000000000000, 0x4000000000000000, 0x0000000000000000, FPS_RN_NEAR }, + { 0xbff0000000000000, 0xbff0000000000000, 0xc000000000000000, 0x0000000000000000, FPS_RN_NEAR }, + { 0x402123456789abcd, 0x4021000000000000, 0x403111a2b3c4d5e6, 0x3fb1a2b3c4d5e680, FPS_RN_NEAR }, + { 0x4061200000000000, 0x406123456789abcd, 0x407121a2b3c4d5e6, 0xbfba2b3c4d5e6800, FPS_RN_NEAR }, + { 0x4061230000000000, 0x3fa4560000000000, 0x4061244560000000, 0x406121baa0000000, FPS_RN_NEAR }, + { 0xc061230000000000, 0x3fa4560000000000, 0xc06121baa0000000, 0xc061244560000000, FPS_RN_NEAR }, + { 0x4061230000000000, 0xbfa4560000000000, 0x406121baa0000000, 0x4061244560000000, FPS_RN_NEAR }, + { 0xc061230000000000, 0xbfa4560000000000, 0xc061244560000000, 0xc06121baa0000000, FPS_RN_NEAR }, + { 0x3fa1230000000000, 0x4064560000000000, 0x4064571230000000, 0xc06454edd0000000, FPS_RN_NEAR }, + { 0xbfa1230000000000, 0x4064560000000000, 0x406454edd0000000, 0xc064571230000000, FPS_RN_NEAR }, + { 0x3fa1230000000000, 0xc064560000000000, 0xc06454edd0000000, 0x4064571230000000, FPS_RN_NEAR }, + { 0xbfa1230000000000, 0xc064560000000000, 0xc064571230000000, 0x406454edd0000000, FPS_RN_NEAR }, + { 0x6780000000000001, 0x6470000000000000, 0x6780000000000009, 0x677ffffffffffff2, FPS_RN_NEAR }, + { 0x6780000000000001, 0x6460000000000000, 0x6780000000000005, 0x677ffffffffffffa, FPS_RN_NEAR }, + { 0x6780000000000001, 0x6450000000000000, 0x6780000000000003, 0x677ffffffffffffe, FPS_RN_NEAR }, + { 0x6780000000000001, 0x6440000000000000, 0x6780000000000002, 0x6780000000000000, FPS_RN_NEAR }, + { 0x7ff8888888888888, 0x7ff9999999999999, 0x7ff8888888888888, 0x7ff8888888888888, FPS_RN_NEAR }, + { 0xfff8888888888888, 0x7ff9999999999999, 0xfff8888888888888, 0xfff8888888888888, FPS_RN_NEAR }, + { 0x7ff8888888888888, 0x7ff0000000000000, 0x7ff8888888888888, 0x7ff8888888888888, FPS_RN_NEAR }, + { 0x7ff8888888888888, 0x0000000000000000, 0x7ff8888888888888, 0x7ff8888888888888, FPS_RN_NEAR }, + { 0x7ff8888888888888, 0x0001111111111111, 0x7ff8888888888888, 0x7ff8888888888888, FPS_RN_NEAR }, + { 0x7ff8888888888888, 0x3ff0000000000000, 0x7ff8888888888888, 0x7ff8888888888888, FPS_RN_NEAR }, + { 0x7ff0000000000000, 0x7ff9999999999999, 0x7ff9999999999999, 0x7ff9999999999999, FPS_RN_NEAR }, + { 0x7ff0000000000000, 0x7ff0000000000000, 0x7ff0000000000000, 0x7ff8000000000000, FPS_RN_NEAR }, + { 0x7ff0000000000000, 0xfff0000000000000, 0x7ff8000000000000, 0x7ff0000000000000, FPS_RN_NEAR }, + { 0x7ff0000000000000, 0x0000000000000000, 0x7ff0000000000000, 0x7ff0000000000000, FPS_RN_NEAR }, + { 0x7ff0000000000000, 0x8000000000000000, 0x7ff0000000000000, 0x7ff0000000000000, FPS_RN_NEAR }, + { 0x7ff0000000000000, 0x8002222222222222, 0x7ff0000000000000, 0x7ff0000000000000, FPS_RN_NEAR }, + { 0x7ff0000000000000, 0xc002222222222222, 0x7ff0000000000000, 0x7ff0000000000000, FPS_RN_NEAR }, + { 0x0000000000000000, 0x7ff9999999999999, 0x7ff9999999999999, 0x7ff9999999999999, FPS_RN_NEAR }, + { 0x0000000000000000, 0x7ff0000000000000, 0x7ff0000000000000, 0xfff0000000000000, FPS_RN_NEAR }, + { 0x8000000000000000, 0x7ff0000000000000, 0x7ff0000000000000, 0xfff0000000000000, FPS_RN_NEAR }, + { 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, FPS_RN_NEAR }, + { 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, FPS_RN_NEAR }, + { 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x0000000000000000, FPS_RN_NEAR }, + { 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x0000000000000000, FPS_RN_NEAR }, + { 0x8002222222222222, 0x0001111111111111, 0x8001111111111111, 0x8003333333333333, FPS_RN_NEAR }, + { 0x0000022222222222, 0x0000111111111111, 0x0000133333333333, 0x80000eeeeeeeeeef, FPS_RN_NEAR }, + { 0x401ffffffbfffefe, 0x406b8265196bd89e, 0x406c8265194bd896, 0xc06a8265198bd8a6, FPS_RN_NEAR }, + { 0x4030020000000004, 0xbf110001ffffffff, 0x403001fbbfff8004, 0x4030020440008004, FPS_RN_NEAR }, + { 0x3fdfffffffffffff, 0x3fe0000000000000, 0x3ff0000000000000, 0xbc90000000000000, FPS_RN_NEAR }, + { 0x001000100010000f, 0x00000000000000ff, 0x001000100010010e, 0x00100010000fff10, FPS_RN_CEIL }, }; int test13(long arg) @@ -1013,8 +1034,8 @@ int test13(long arg) struct addvals *vp = addvals; unsigned long fpscr; - set_fpscr(FPS_RN_NEAR); for (i = 0; i < sizeof(addvals) / sizeof(addvals[0]); ++i, ++vp) { + set_fpscr(vp->fpscr); asm("lfd 5,0(%0); lfd 6,8(%0); fadd 7,5,6; fsub 8,5,6; stfd 7,0(%1); stfd 8,8(%1)" : : "b" (&vp->val_a), "b" (results) : "memory"); fpscr = get_fpscr(); @@ -1491,110 +1512,123 @@ struct fmavals { unsigned long ra; unsigned long rc; unsigned long rb; + unsigned long fpscr; unsigned long fma; unsigned long fms; unsigned long nfma; unsigned long nfms; } fmavals[] = { /* +0 * +0 +- +0 -> +0, +0, -0, -0 */ - { 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + { 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, FPS_RN_NEAR, 0x0000000000000000, 0x0000000000000000, 0x8000000000000000, 0x8000000000000000 }, /* +0 * NaNC +- +0 -> NaNC, NaNC, NaNC, NaNC */ - { 0x0000000000000000, 0x7ffc000000000000, 0x0000000000000000, + { 0x0000000000000000, 0x7ffc000000000000, 0x0000000000000000, FPS_RN_NEAR, 0x7ffc000000000000, 0x7ffc000000000000, 0x7ffc000000000000, 0x7ffc000000000000 }, /* +0 * NaNC +- NaNB -> NaNB, NaNB, NaNB, NaNB */ - { 0x0000000000000000, 0x7ffc000000000000, 0x7ffb000000000000, + { 0x0000000000000000, 0x7ffc000000000000, 0x7ffb000000000000, FPS_RN_NEAR, 0x7ffb000000000000, 0x7ffb000000000000, 0x7ffb000000000000, 0x7ffb000000000000 }, /* NaNA * NaNC +- NaNB -> NaNA, NaNA, NaNA, NaNA */ - { 0x7ffa000000000000, 0x7ffc000000000000, 0x7ffb000000000000, + { 0x7ffa000000000000, 0x7ffc000000000000, 0x7ffb000000000000, FPS_RN_NEAR, 0x7ffa000000000000, 0x7ffa000000000000, 0x7ffa000000000000, 0x7ffa000000000000 }, /* +1.0 * -0 +- +finite B -> +B, -B, -B, +B */ - { 0x3ff0000000000000, 0x8000000000000000, 0x678123456789abcd, + { 0x3ff0000000000000, 0x8000000000000000, 0x678123456789abcd, FPS_RN_NEAR, 0x678123456789abcd, 0xe78123456789abcd, 0xe78123456789abcd, 0x678123456789abcd }, /* +1.0 * -1.0 +- (B = +3.818e+190) -> +B, -B, -B, +B */ - { 0x3ff0000000000000, 0xbff0000000000000, 0x678123456789abcd, + { 0x3ff0000000000000, 0xbff0000000000000, 0x678123456789abcd, FPS_RN_NEAR, 0x678123456789abcd, 0xe78123456789abcd, 0xe78123456789abcd, 0x678123456789abcd }, /* +inf * -1.0 +- +finite B -> -inf, -inf, +inf, +inf */ - { 0x7ff0000000000000, 0xbff0000000000000, 0x678123456789abcd, + { 0x7ff0000000000000, 0xbff0000000000000, 0x678123456789abcd, FPS_RN_NEAR, 0xfff0000000000000, 0xfff0000000000000, 0x7ff0000000000000, 0x7ff0000000000000 }, /* +inf * +0 +- +finite B -> NaNQ, NaNQ, NaNQ, NaNQ */ - { 0x7ff0000000000000, 0x0000000000000000, 0x678123456789abcd, + { 0x7ff0000000000000, 0x0000000000000000, 0x678123456789abcd, FPS_RN_NEAR, 0x7ff8000000000000, 0x7ff8000000000000, 0x7ff8000000000000, 0x7ff8000000000000 }, /* +1.0 * +1.0 +- 1.00000012 -> +2.00000012, +1.2e-7, -2.00000012, -1.2e-7 */ - { 0x3ff0000000000000, 0x3ff0000000000000, 0x3ff0000020000000, + { 0x3ff0000000000000, 0x3ff0000000000000, 0x3ff0000020000000, FPS_RN_NEAR, 0x4000000010000000, 0xbe80000000000000, 0xc000000010000000, 0x3e80000000000000 }, /* +(1 + 2^-52) * +(1 + 2^-52) +- +1.0 -> +(2 + 2^-51), +2^-51, -(2 + 2^-51), -2^-51 */ - { 0x3ff0000000000001, 0x3ff0000000000001, 0x3ff0000000000000, + { 0x3ff0000000000001, 0x3ff0000000000001, 0x3ff0000000000000, FPS_RN_NEAR, 0x4000000000000001, 0x3cc0000000000000, 0xc000000000000001, 0xbcc0000000000000 }, - /* +(1 + 3*2^-52) * +(1 + 2^-51) +- +1.0 -> +(2 + 2^-50), +5 * 2^-52 + 2^-101, -, - */ - { 0x3ff0000000000003, 0x3ff0000000000002, 0x3ff0000000000000, - 0x4000000000000002, 0x3cd4000000000002, 0xc000000000000002, 0xbcd4000000000002 }, + /* +(1 + 3*2^-52) * +(1 + 2^-51) +- +1.0 -> +(2 + 3*2^-51), +5 * 2^-52 + 2^-101, -, - */ + { 0x3ff0000000000003, 0x3ff0000000000002, 0x3ff0000000000000, FPS_RN_NEAR, + 0x4000000000000003, 0x3cd4000000000002, 0xc000000000000003, 0xbcd4000000000002 }, /* +2.443e-77 * 2.828 +- 6.909e-77 -> -1.402e-93, +1.382e-76, +1.402e-93, -1.382e-76 */ - { 0x3006a09e667f3bcc, 0x4006a09e667f3bcd, 0xb020000000000000, + { 0x3006a09e667f3bcc, 0x4006a09e667f3bcd, 0xb020000000000000, FPS_RN_NEAR, 0xaca765753908cd20, 0x3030000000000000, 0x2ca765753908cd20, 0xb030000000000000 }, /* +2.443e-77 * 2.828 +- 6.909e-77 -> +9.446e-93, +1.382e-76, -9.446e-93, -1.382e-76 */ - { 0x3006a09e667f3bcd, 0x4006a09e667f3bcd, 0xb020000000000000, + { 0x3006a09e667f3bcd, 0x4006a09e667f3bcd, 0xb020000000000000, FPS_RN_NEAR, 0x2cd3b3efbf5e2229, 0x3030000000000000, 0xacd3b3efbf5e2229, 0xb030000000000000 }, /* +2.443e-77 * 2.828 +- -1.1055e-75 -> -1.0364e-75, +1.1746e-75, +1.0364e-75, -1.1746e-75 */ - { 0x3006a09e667f3bcc, 0x4006a09e667f3bcd, 0xb060003450000000, + { 0x3006a09e667f3bcc, 0x4006a09e667f3bcd, 0xb060003450000000, FPS_RN_NEAR, 0xb05e0068a0000000, 0x3061003450000000, 0x305e0068a0000000, 0xb061003450000000 }, /* +2 * +3 +- 3 -> +9, +3, -9, -3 */ - { 0x4000000000000000, 0x4008000000000000, 0x4008000000000000, + { 0x4000000000000000, 0x4008000000000000, 0x4008000000000000, FPS_RN_NEAR, 0x4022000000000000, 0x4008000000000000, 0xc022000000000000, 0xc008000000000000 }, /* +2 * +3 +- 5 -> +11, +1, -11, -1 */ - { 0x4000000000000000, 0x4008000000000000, 0x4014000000000000, + { 0x4000000000000000, 0x4008000000000000, 0x4014000000000000, FPS_RN_NEAR, 0x4026000000000000, 0x3ff0000000000000, 0xc026000000000000, 0xbff0000000000000 }, /* +2 * +3 +- 7 -> +13, -1, -13, +1 */ - { 0x4000000000000000, 0x4008000000000000, 0x401c000000000000, + { 0x4000000000000000, 0x4008000000000000, 0x401c000000000000, FPS_RN_NEAR, 0x402a000000000000, 0xbff0000000000000, 0xc02a000000000000, 0x3ff0000000000000 }, /* +2 * +3 +- 9 -> +15, -3, -15, +3 */ - { 0x4000000000000000, 0x4008000000000000, 0x4022000000000000, + { 0x4000000000000000, 0x4008000000000000, 0x4022000000000000, FPS_RN_NEAR, 0x402e000000000000, 0xc008000000000000, 0xc02e000000000000, 0x4008000000000000 }, /* +2 * +3 +- -3 -> +3, +9, -3, -9 */ - { 0x4000000000000000, 0x4008000000000000, 0xc008000000000000, + { 0x4000000000000000, 0x4008000000000000, 0xc008000000000000, FPS_RN_NEAR, 0x4008000000000000, 0x4022000000000000, 0xc008000000000000, 0xc022000000000000 }, /* +2 * +3 +- -5 -> +1, +11, -1, -11 */ - { 0x4000000000000000, 0x4008000000000000, 0xc014000000000000, + { 0x4000000000000000, 0x4008000000000000, 0xc014000000000000, FPS_RN_NEAR, 0x3ff0000000000000, 0x4026000000000000, 0xbff0000000000000, 0xc026000000000000 }, /* +2 * +3 +- -7 -> -1, +13, +1, -13 */ - { 0x4000000000000000, 0x4008000000000000, 0xc01c000000000000, + { 0x4000000000000000, 0x4008000000000000, 0xc01c000000000000, FPS_RN_NEAR, 0xbff0000000000000, 0x402a000000000000, 0x3ff0000000000000, 0xc02a000000000000 }, /* +2 * +3 +- -9 -> -3, +15, +3, -15 */ - { 0x4000000000000000, 0x4008000000000000, 0xc022000000000000, + { 0x4000000000000000, 0x4008000000000000, 0xc022000000000000, FPS_RN_NEAR, 0xc008000000000000, 0x402e000000000000, 0x4008000000000000, 0xc02e000000000000 }, /* +2 * -3 +- 3 -> -3, -9, +3, +9 */ - { 0x4000000000000000, 0xc008000000000000, 0x4008000000000000, + { 0x4000000000000000, 0xc008000000000000, 0x4008000000000000, FPS_RN_NEAR, 0xc008000000000000, 0xc022000000000000, 0x4008000000000000, 0x4022000000000000 }, /* +2 * -3 +- 5 -> -1, -11, +1, +11 */ - { 0x4000000000000000, 0xc008000000000000, 0x4014000000000000, + { 0x4000000000000000, 0xc008000000000000, 0x4014000000000000, FPS_RN_NEAR, 0xbff0000000000000, 0xc026000000000000, 0x3ff0000000000000, 0x4026000000000000 }, /* +2 * -3 +- 7 -> +1, -13, -1, +13 */ - { 0x4000000000000000, 0xc008000000000000, 0x401c000000000000, + { 0x4000000000000000, 0xc008000000000000, 0x401c000000000000, FPS_RN_NEAR, 0x3ff0000000000000, 0xc02a000000000000, 0xbff0000000000000, 0x402a000000000000 }, /* +2 * -3 +- 9 -> +3, -15, -3, +15 */ - { 0x4000000000000000, 0xc008000000000000, 0x4022000000000000, + { 0x4000000000000000, 0xc008000000000000, 0x4022000000000000, FPS_RN_NEAR, 0x4008000000000000, 0xc02e000000000000, 0xc008000000000000, 0x402e000000000000 }, /* -2 * +3 +- -3 -> -9, -3, +9, +3 */ - { 0xc000000000000000, 0x4008000000000000, 0xc008000000000000, + { 0xc000000000000000, 0x4008000000000000, 0xc008000000000000, FPS_RN_NEAR, 0xc022000000000000, 0xc008000000000000, 0x4022000000000000, 0x4008000000000000 }, /* -2 * +3 +- -5 -> -11, -1, +11, +1 */ - { 0xc000000000000000, 0x4008000000000000, 0xc014000000000000, + { 0xc000000000000000, 0x4008000000000000, 0xc014000000000000, FPS_RN_NEAR, 0xc026000000000000, 0xbff0000000000000, 0x4026000000000000, 0x3ff0000000000000 }, /* -2 * +3 +- -7 -> -13, +1, +13, -1 */ - { 0xc000000000000000, 0x4008000000000000, 0xc01c000000000000, + { 0xc000000000000000, 0x4008000000000000, 0xc01c000000000000, FPS_RN_NEAR, 0xc02a000000000000, 0x3ff0000000000000, 0x402a000000000000, 0xbff0000000000000 }, /* -2 * +3 +- -9 -> -15, +3, +15, -3 */ - { 0xc000000000000000, 0x4008000000000000, 0xc022000000000000, + { 0xc000000000000000, 0x4008000000000000, 0xc022000000000000, FPS_RN_NEAR, 0xc02e000000000000, 0x4008000000000000, 0x402e000000000000, 0xc008000000000000 }, /* -2 * +3 +- +0 -> -6, -6, +6, +6 */ - { 0xc000000000000000, 0x4008000000000000, 0x0000000000000000, + { 0xc000000000000000, 0x4008000000000000, 0x0000000000000000, FPS_RN_NEAR, 0xc018000000000000, 0xc018000000000000, 0x4018000000000000, 0x4018000000000000 }, /* +2 * -3 +- -0 -> -6, -6, +6, +6 */ - { 0x4000000000000000, 0xc008000000000000, 0x8000000000000000, + { 0x4000000000000000, 0xc008000000000000, 0x8000000000000000, FPS_RN_NEAR, 0xc018000000000000, 0xc018000000000000, 0x4018000000000000, 0x4018000000000000 }, /* 2^-1026 * (1.5 * 2^1023) +- -0 -> (1.5 * 2^-3), ditto, -ditto, -ditto */ - { 0x0001000000000000, 0x7fe8000000000000, 0x8000000000000000, + { 0x0001000000000000, 0x7fe8000000000000, 0x8000000000000000, FPS_RN_NEAR, 0x3fc8000000000000, 0x3fc8000000000000, 0xbfc8000000000000, 0xbfc8000000000000 }, + /* 1 * -1 + tiny -> -1 + delta, -1, 1 - delta, 1 */ + { 0x3ff0000000000000, 0xbff0000000000000, 0x00000000b2200102, FPS_RN_CEIL, + 0xbfefffffffffffff, 0xbff0000000000000, 0x3fefffffffffffff, 0x3ff0000000000000 }, + /* from random exec tests */ + { 0x43eff79000000000, 0x00000000000000ff, 0x0000000000000081, FPS_RN_CEIL, + 0x014fd79870000001, 0x014fd79870000000, 0x814fd79870000001, 0x814fd79870000000 }, + { 0x00000000ffffffff, 0x1fc771af627f62ab, 0x8000000000000000, FPS_RN_ZERO, + 0x0000000000000000, 0x0000000000000000, 0x8000000000000000, 0x8000000000000000 }, + { 0x41efffffffe00000, 0xc1efffffffe00000, 0x43f0000000000000, FPS_RN_CEIL, + 0x41fffffffff00000, 0xc3ffffffffe00000, 0xc1fffffffff00000, 0x43ffffffffe00000 }, + { 0x3ff0000000000000, 0x000060fbffffefc1, 0x000060fbffffefc1, FPS_RN_NEAR, + 0x0000c1f7ffffdf82, 0x0000000000000000, 0x8000c1f7ffffdf82, 0x8000000000000000 }, }; int test23(long arg) @@ -1604,8 +1638,8 @@ int test23(long arg) struct fmavals *vp = fmavals; unsigned long fpscr; - set_fpscr(FPS_RN_NEAR); for (i = 0; i < sizeof(fmavals) / sizeof(fmavals[0]); ++i, ++vp) { + set_fpscr(vp->fpscr); asm("lfd 6,0(%0); lfd 7,8(%0); lfd 8,16(%0); fmadd 0,6,7,8; stfd 0,0(%1)" : : "b" (&vp->ra), "b" (results) : "memory"); asm("fmsub 1,6,7,8; fnmadd 2,6,7,8; fnmsub 3,6,7,8; stfd 1,8(%0); stfd 2,16(%0); stfd 3,24(%0)" diff --git a/tests/test_fpu.bin b/tests/test_fpu.bin index b2a293c..cd1d647 100755 Binary files a/tests/test_fpu.bin and b/tests/test_fpu.bin differ