mirror of
https://github.com/antonblanchard/microwatt.git
synced 2026-03-10 04:24:30 +00:00
FPU: Implement fmr and related instructions
This implements fmr, fneg, fabs, fnabs and fcpsgn and adds tests for them. This adds logic to unpack and repack floating-point data from the 64-bit packed form (as stored in memory and the register file) into the unpacked form in the fpr_reg_type record. This is not strictly necessary for fmr et al., but will be useful for when we do actual arithmetic. Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
This commit is contained in:
@@ -428,6 +428,11 @@ architecture behaviour of decode1 is
|
||||
2#011000100# => (FPU, OP_FPOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 4/6=mtfsfi
|
||||
2#011110010# => (FPU, OP_FPOP_I, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 18/7=mffs family
|
||||
2#011110110# => (FPU, OP_FPOP_I, NONE, FRB, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 22/7=mtfsf
|
||||
2#100000000# => (FPU, OP_FPOP, FRA, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 0/8=fcpsgn
|
||||
2#100000001# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 1/8=fneg
|
||||
2#100000010# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 2/8=fmr
|
||||
2#100000100# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 4/8=fnabs
|
||||
2#100001000# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 8/8=fabs
|
||||
others => illegal_inst
|
||||
);
|
||||
|
||||
|
||||
@@ -80,6 +80,8 @@ architecture behaviour of decode2 is
|
||||
return (is_fast_spr(ispr), ispr, reg_data);
|
||||
elsif t = CIA then
|
||||
return ('0', (others => '0'), instr_addr);
|
||||
elsif HAS_FPU and t = FRA then
|
||||
return ('1', fpr_to_gspr(insn_fra(insn_in)), reg_data);
|
||||
else
|
||||
return ('0', (others => '0'), (others => '0'));
|
||||
end if;
|
||||
@@ -300,6 +302,7 @@ begin
|
||||
end process;
|
||||
|
||||
r_out.read1_reg <= d_in.ispr1 when d_in.decode.input_reg_a = SPR
|
||||
else fpr_to_gspr(insn_fra(d_in.insn)) when d_in.decode.input_reg_a = FRA and HAS_FPU
|
||||
else gpr_to_gspr(insn_ra(d_in.insn));
|
||||
r_out.read2_reg <= d_in.ispr2 when d_in.decode.input_reg_b = SPR
|
||||
else fpr_to_gspr(insn_frb(d_in.insn)) when d_in.decode.input_reg_b = FRB and HAS_FPU
|
||||
|
||||
@@ -23,7 +23,7 @@ package decode_types is
|
||||
OP_BCD, OP_ADDG6S,
|
||||
OP_FETCH_FAILED
|
||||
);
|
||||
type input_reg_a_t is (NONE, RA, RA_OR_ZERO, SPR, CIA);
|
||||
type input_reg_a_t is (NONE, RA, RA_OR_ZERO, SPR, CIA, FRA);
|
||||
type input_reg_b_t is (NONE, RB, CONST_UI, CONST_SI, CONST_SI_HI, CONST_UI_HI, CONST_LI, CONST_BD,
|
||||
CONST_DXHI4, CONST_DS, CONST_M1, CONST_SH, CONST_SH32, SPR, FRB);
|
||||
type input_reg_c_t is (NONE, RS, RCR, FRS);
|
||||
|
||||
144
fpu.vhdl
144
fpu.vhdl
@@ -24,9 +24,20 @@ entity fpu is
|
||||
end entity fpu;
|
||||
|
||||
architecture behaviour of fpu is
|
||||
type fp_number_class is (ZERO, FINITE, INFINITY, NAN);
|
||||
|
||||
constant EXP_BITS : natural := 13;
|
||||
|
||||
type fpu_reg_type is record
|
||||
class : fp_number_class;
|
||||
negative : std_ulogic;
|
||||
exponent : signed(EXP_BITS-1 downto 0); -- unbiased
|
||||
mantissa : std_ulogic_vector(63 downto 0); -- 10.54 format
|
||||
end record;
|
||||
|
||||
type state_t is (IDLE,
|
||||
DO_MCRFS, DO_MTFSB, DO_MTFSFI, DO_MFFS, DO_MTFSF);
|
||||
DO_MCRFS, DO_MTFSB, DO_MTFSFI, DO_MFFS, DO_MTFSF,
|
||||
DO_FMR);
|
||||
|
||||
type reg_type is record
|
||||
state : state_t;
|
||||
@@ -41,9 +52,14 @@ architecture behaviour of fpu is
|
||||
is_cmp : std_ulogic;
|
||||
single_prec : std_ulogic;
|
||||
fpscr : std_ulogic_vector(31 downto 0);
|
||||
b : std_ulogic_vector(63 downto 0);
|
||||
a : fpu_reg_type;
|
||||
b : fpu_reg_type;
|
||||
r : std_ulogic_vector(63 downto 0);
|
||||
result_sign : std_ulogic;
|
||||
result_class : fp_number_class;
|
||||
result_exp : signed(EXP_BITS-1 downto 0);
|
||||
writing_back : std_ulogic;
|
||||
int_result : std_ulogic;
|
||||
cr_result : std_ulogic_vector(3 downto 0);
|
||||
cr_mask : std_ulogic_vector(7 downto 0);
|
||||
end record;
|
||||
@@ -51,6 +67,72 @@ architecture behaviour of fpu is
|
||||
signal r, rin : reg_type;
|
||||
|
||||
signal fp_result : std_ulogic_vector(63 downto 0);
|
||||
signal opsel_r : std_ulogic_vector(1 downto 0);
|
||||
signal result : std_ulogic_vector(63 downto 0);
|
||||
|
||||
-- Split a DP floating-point number into components and work out its class.
|
||||
-- If is_int = 1, the input is considered an integer
|
||||
function decode_dp(fpr: std_ulogic_vector(63 downto 0); is_int: std_ulogic) return fpu_reg_type is
|
||||
variable r : fpu_reg_type;
|
||||
variable exp_nz : std_ulogic;
|
||||
variable exp_ao : std_ulogic;
|
||||
variable frac_nz : std_ulogic;
|
||||
variable cls : std_ulogic_vector(2 downto 0);
|
||||
begin
|
||||
r.negative := fpr(63);
|
||||
exp_nz := or (fpr(62 downto 52));
|
||||
exp_ao := and (fpr(62 downto 52));
|
||||
frac_nz := or (fpr(51 downto 0));
|
||||
if is_int = '0' then
|
||||
r.exponent := signed(resize(unsigned(fpr(62 downto 52)), EXP_BITS)) - to_signed(1023, EXP_BITS);
|
||||
if exp_nz = '0' then
|
||||
r.exponent := to_signed(-1022, EXP_BITS);
|
||||
end if;
|
||||
r.mantissa := "000000000" & exp_nz & fpr(51 downto 0) & "00";
|
||||
cls := exp_ao & exp_nz & frac_nz;
|
||||
case cls is
|
||||
when "000" => r.class := ZERO;
|
||||
when "001" => r.class := FINITE; -- denormalized
|
||||
when "010" => r.class := FINITE;
|
||||
when "011" => r.class := FINITE;
|
||||
when "110" => r.class := INFINITY;
|
||||
when others => r.class := NAN;
|
||||
end case;
|
||||
else
|
||||
r.mantissa := fpr;
|
||||
r.exponent := (others => '0');
|
||||
if (fpr(63) or exp_nz or frac_nz) = '1' then
|
||||
r.class := FINITE;
|
||||
else
|
||||
r.class := ZERO;
|
||||
end if;
|
||||
end if;
|
||||
return r;
|
||||
end;
|
||||
|
||||
-- Construct a DP floating-point result from components
|
||||
function pack_dp(sign: std_ulogic; class: fp_number_class; exp: signed(EXP_BITS-1 downto 0);
|
||||
mantissa: std_ulogic_vector) return std_ulogic_vector is
|
||||
variable result : std_ulogic_vector(63 downto 0);
|
||||
begin
|
||||
result := (others => '0');
|
||||
result(63) := sign;
|
||||
case class is
|
||||
when ZERO =>
|
||||
when FINITE =>
|
||||
if mantissa(54) = '1' then
|
||||
-- normalized number
|
||||
result(62 downto 52) := std_ulogic_vector(resize(exp, 11) + 1023);
|
||||
end if;
|
||||
result(51 downto 0) := mantissa(53 downto 2);
|
||||
when INFINITY =>
|
||||
result(62 downto 52) := "11111111111";
|
||||
when NAN =>
|
||||
result(62 downto 52) := "11111111111";
|
||||
result(51 downto 0) := mantissa(53 downto 2);
|
||||
end case;
|
||||
return result;
|
||||
end;
|
||||
|
||||
begin
|
||||
fpu_0: process(clk)
|
||||
@@ -85,14 +167,18 @@ begin
|
||||
|
||||
fpu_1: process(all)
|
||||
variable v : reg_type;
|
||||
variable adec : fpu_reg_type;
|
||||
variable bdec : fpu_reg_type;
|
||||
variable fpscr_mask : std_ulogic_vector(31 downto 0);
|
||||
variable illegal : std_ulogic;
|
||||
variable j, k : integer;
|
||||
variable flm : std_ulogic_vector(7 downto 0);
|
||||
variable int_input : std_ulogic;
|
||||
begin
|
||||
v := r;
|
||||
illegal := '0';
|
||||
v.busy := '0';
|
||||
int_input := '0';
|
||||
|
||||
-- capture incoming instruction
|
||||
if e_in.valid = '1' then
|
||||
@@ -101,6 +187,7 @@ begin
|
||||
v.fe_mode := or (e_in.fe_mode);
|
||||
v.dest_fpr := e_in.frt;
|
||||
v.single_prec := e_in.single;
|
||||
v.int_result := '0';
|
||||
v.rc := e_in.rc;
|
||||
v.is_cmp := e_in.out_cr;
|
||||
if e_in.out_cr = '0' then
|
||||
@@ -108,11 +195,19 @@ begin
|
||||
else
|
||||
v.cr_mask := num_to_fxm(to_integer(unsigned(insn_bf(e_in.insn))));
|
||||
end if;
|
||||
v.b := e_in.frb;
|
||||
int_input := '0';
|
||||
if e_in.op = OP_FPOP_I then
|
||||
int_input := '1';
|
||||
end if;
|
||||
adec := decode_dp(e_in.fra, int_input);
|
||||
bdec := decode_dp(e_in.frb, int_input);
|
||||
v.a := adec;
|
||||
v.b := bdec;
|
||||
end if;
|
||||
|
||||
v.writing_back := '0';
|
||||
v.instr_done := '0';
|
||||
opsel_r <= "00";
|
||||
fpscr_mask := (others => '1');
|
||||
|
||||
case r.state is
|
||||
@@ -133,6 +228,8 @@ begin
|
||||
else
|
||||
v.state := DO_MTFSF;
|
||||
end if;
|
||||
when "01000" =>
|
||||
v.state := DO_FMR;
|
||||
when others =>
|
||||
illegal := '1';
|
||||
end case;
|
||||
@@ -177,7 +274,9 @@ begin
|
||||
v.state := IDLE;
|
||||
|
||||
when DO_MFFS =>
|
||||
v.int_result := '1';
|
||||
v.writing_back := '1';
|
||||
opsel_r <= "10";
|
||||
case r.insn(20 downto 16) is
|
||||
when "00000" =>
|
||||
-- mffs
|
||||
@@ -191,7 +290,7 @@ begin
|
||||
-- mffscrn
|
||||
fpscr_mask := x"000000FF";
|
||||
v.fpscr(FPSCR_RN+1 downto FPSCR_RN) :=
|
||||
r.b(FPSCR_RN+1 downto FPSCR_RN);
|
||||
r.b.mantissa(FPSCR_RN+1 downto FPSCR_RN);
|
||||
when "10111" =>
|
||||
-- mffscrni
|
||||
fpscr_mask := x"000000FF";
|
||||
@@ -216,19 +315,48 @@ begin
|
||||
for i in 0 to 7 loop
|
||||
k := i * 4;
|
||||
if flm(i) = '1' then
|
||||
v.fpscr(k + 3 downto k) := r.b(k + 3 downto k);
|
||||
v.fpscr(k + 3 downto k) := r.b.mantissa(k + 3 downto k);
|
||||
end if;
|
||||
end loop;
|
||||
v.instr_done := '1';
|
||||
v.state := IDLE;
|
||||
|
||||
when DO_FMR =>
|
||||
v.result_class := r.b.class;
|
||||
v.result_exp := r.b.exponent;
|
||||
if r.insn(9) = '1' then
|
||||
v.result_sign := '0'; -- fabs
|
||||
elsif r.insn(8) = '1' then
|
||||
v.result_sign := '1'; -- fnabs
|
||||
elsif r.insn(7) = '1' then
|
||||
v.result_sign := r.b.negative; -- fmr
|
||||
elsif r.insn(6) = '1' then
|
||||
v.result_sign := not r.b.negative; -- fneg
|
||||
else
|
||||
v.result_sign := r.a.negative; -- fcpsgn
|
||||
end if;
|
||||
v.writing_back := '1';
|
||||
v.instr_done := '1';
|
||||
v.state := IDLE;
|
||||
|
||||
end case;
|
||||
|
||||
-- Data path.
|
||||
-- Just enough to read FPSCR for now.
|
||||
v.r := x"00000000" & (r.fpscr and fpscr_mask);
|
||||
case opsel_r is
|
||||
when "00" =>
|
||||
result <= r.b.mantissa;
|
||||
when "10" =>
|
||||
result <= x"00000000" & (r.fpscr and fpscr_mask);
|
||||
when others =>
|
||||
result <= (others => '0');
|
||||
end case;
|
||||
v.r := result;
|
||||
|
||||
fp_result <= r.r;
|
||||
if r.int_result = '1' then
|
||||
fp_result <= r.r;
|
||||
else
|
||||
fp_result <= pack_dp(r.result_sign, r.result_class, r.result_exp, r.r);
|
||||
end if;
|
||||
|
||||
v.fpscr(FPSCR_VX) := (or (v.fpscr(FPSCR_VXSNAN downto FPSCR_VXVC))) or
|
||||
(or (v.fpscr(FPSCR_VXSOFT downto FPSCR_VXCVI)));
|
||||
|
||||
@@ -438,6 +438,39 @@ int fpu_test_5(void)
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define SIGN 0x8000000000000000ul
|
||||
|
||||
int test6(long arg)
|
||||
{
|
||||
long i;
|
||||
unsigned long results[6];
|
||||
unsigned long v;
|
||||
|
||||
for (i = 0; i < sizeof(sp_dp_equiv) / sizeof(sp_dp_equiv[0]); ++i) {
|
||||
v = sp_dp_equiv[i].dp;
|
||||
asm("lfd%U0%X0 3,%0; fmr 6,3; fneg 7,3; stfd 6,0(%1); stfd 7,8(%1)"
|
||||
: : "m" (sp_dp_equiv[i].dp), "b" (results) : "memory");
|
||||
asm("fabs 9,6; fnabs 10,6; stfd 9,16(%0); stfd 10,24(%0)"
|
||||
: : "b" (results) : "memory");
|
||||
asm("fcpsgn 4,9,3; stfd 4,32(%0); fcpsgn 5,10,3; stfd 5,40(%0)"
|
||||
: : "b" (results) : "memory");
|
||||
if (results[0] != v ||
|
||||
results[1] != (v ^ SIGN) ||
|
||||
results[2] != (v & ~SIGN) ||
|
||||
results[3] != (v | SIGN) ||
|
||||
results[4] != (v & ~SIGN) ||
|
||||
results[5] != (v | SIGN))
|
||||
return i + 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int fpu_test_6(void)
|
||||
{
|
||||
enable_fp();
|
||||
return trapit(0, test6);
|
||||
}
|
||||
|
||||
int fail = 0;
|
||||
|
||||
void do_test(int num, int (*test)(void))
|
||||
@@ -469,6 +502,7 @@ int main(void)
|
||||
do_test(3, fpu_test_3);
|
||||
do_test(4, fpu_test_4);
|
||||
do_test(5, fpu_test_5);
|
||||
do_test(6, fpu_test_6);
|
||||
|
||||
return fail;
|
||||
}
|
||||
|
||||
Binary file not shown.
@@ -3,3 +3,4 @@ test 02:PASS
|
||||
test 03:PASS
|
||||
test 04:PASS
|
||||
test 05:PASS
|
||||
test 06:PASS
|
||||
|
||||
Reference in New Issue
Block a user