mirror of
https://github.com/antonblanchard/microwatt.git
synced 2026-03-30 19:05:04 +00:00
execute1: Remember dest GPR, RC, OE, XER for slow operations
For multiply and divide operations, execute1 now records the destination GPR number, RC and OE from the instruction, and the XER value. This means that the multiply and divide units don't need to record those values and then send them back to execute1. This makes the interface to those units a bit simpler. They simply report an overflow signal along with the result value, and execute1 takes care of updating XER if necessary. Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
This commit is contained in:
33
common.vhdl
33
common.vhdl
@@ -133,21 +133,16 @@ package common is
|
||||
type Execute1ToMultiplyType is record
|
||||
valid: std_ulogic;
|
||||
insn_type: insn_type_t;
|
||||
write_reg: gpr_index_t;
|
||||
data1: std_ulogic_vector(64 downto 0);
|
||||
data2: std_ulogic_vector(64 downto 0);
|
||||
rc: std_ulogic;
|
||||
oe: std_ulogic;
|
||||
is_32bit: std_ulogic;
|
||||
xerc: xer_common_t;
|
||||
end record;
|
||||
constant Execute1ToMultiplyInit : Execute1ToMultiplyType := (valid => '0', insn_type => OP_ILLEGAL, rc => '0',
|
||||
oe => '0', is_32bit => '0', xerc => xerc_init,
|
||||
constant Execute1ToMultiplyInit : Execute1ToMultiplyType := (valid => '0', insn_type => OP_ILLEGAL,
|
||||
is_32bit => '0',
|
||||
others => (others => '0'));
|
||||
|
||||
type Execute1ToDividerType is record
|
||||
valid: std_ulogic;
|
||||
write_reg: gpr_index_t;
|
||||
dividend: std_ulogic_vector(63 downto 0);
|
||||
divisor: std_ulogic_vector(63 downto 0);
|
||||
is_signed: std_ulogic;
|
||||
@@ -155,13 +150,9 @@ package common is
|
||||
is_extended: std_ulogic;
|
||||
is_modulus: std_ulogic;
|
||||
neg_result: std_ulogic;
|
||||
rc: std_ulogic;
|
||||
oe: std_ulogic;
|
||||
xerc: xer_common_t;
|
||||
end record;
|
||||
constant Execute1ToDividerInit: Execute1ToDividerType := (valid => '0', is_signed => '0', is_32bit => '0',
|
||||
is_extended => '0', is_modulus => '0',
|
||||
rc => '0', oe => '0', xerc => xerc_init,
|
||||
neg_result => '0', others => (others => '0'));
|
||||
|
||||
type Decode2ToRegisterFileType is record
|
||||
@@ -264,30 +255,18 @@ package common is
|
||||
|
||||
type MultiplyToExecute1Type is record
|
||||
valid: std_ulogic;
|
||||
|
||||
write_reg_nr: gpr_index_t;
|
||||
write_reg_data: std_ulogic_vector(63 downto 0);
|
||||
write_xerc_enable : std_ulogic;
|
||||
xerc : xer_common_t;
|
||||
rc: std_ulogic;
|
||||
overflow : std_ulogic;
|
||||
end record;
|
||||
constant MultiplyToExecute1Init : MultiplyToExecute1Type := (valid => '0',
|
||||
rc => '0', write_xerc_enable => '0',
|
||||
xerc => xerc_init,
|
||||
constant MultiplyToExecute1Init : MultiplyToExecute1Type := (valid => '0', overflow => '0',
|
||||
others => (others => '0'));
|
||||
|
||||
type DividerToExecute1Type is record
|
||||
valid: std_ulogic;
|
||||
|
||||
write_reg_nr: gpr_index_t;
|
||||
write_reg_data: std_ulogic_vector(63 downto 0);
|
||||
write_xerc_enable : std_ulogic;
|
||||
xerc : xer_common_t;
|
||||
rc: std_ulogic;
|
||||
overflow : std_ulogic;
|
||||
end record;
|
||||
constant DividerToExecute1Init : DividerToExecute1Type := (valid => '0',
|
||||
rc => '0', write_xerc_enable => '0',
|
||||
xerc => xerc_init,
|
||||
constant DividerToExecute1Init : DividerToExecute1Type := (valid => '0', overflow => '0',
|
||||
others => (others => '0'));
|
||||
|
||||
type WritebackToRegisterFileType is record
|
||||
|
||||
@@ -300,7 +300,9 @@ begin
|
||||
v.e.read_data3 := decoded_reg_c.data;
|
||||
v.e.write_reg := decoded_reg_o.reg;
|
||||
v.e.rc := decode_rc(d_in.decode.rc, d_in.insn);
|
||||
v.e.oe := decode_oe(d_in.decode.rc, d_in.insn);
|
||||
if not (d_in.decode.insn_type = OP_MUL_H32 or d_in.decode.insn_type = OP_MUL_H64) then
|
||||
v.e.oe := decode_oe(d_in.decode.rc, d_in.insn);
|
||||
end if;
|
||||
v.e.cr := c_in.read_cr_data;
|
||||
v.e.xerc := c_in.read_xerc_data;
|
||||
v.e.invert_a := d_in.decode.invert_a;
|
||||
|
||||
25
divider.vhdl
25
divider.vhdl
@@ -29,13 +29,9 @@ architecture behaviour of divider is
|
||||
signal is_32bit : std_ulogic;
|
||||
signal extended : std_ulogic;
|
||||
signal is_signed : std_ulogic;
|
||||
signal rc : std_ulogic;
|
||||
signal write_reg : std_ulogic_vector(4 downto 0);
|
||||
signal overflow : std_ulogic;
|
||||
signal ovf32 : std_ulogic;
|
||||
signal did_ovf : std_ulogic;
|
||||
signal oe : std_ulogic;
|
||||
signal xerc : xer_common_t;
|
||||
begin
|
||||
divider_0: process(clk)
|
||||
begin
|
||||
@@ -54,15 +50,11 @@ begin
|
||||
end if;
|
||||
div <= unsigned(d_in.divisor);
|
||||
quot <= (others => '0');
|
||||
write_reg <= d_in.write_reg;
|
||||
neg_result <= d_in.neg_result;
|
||||
is_modulus <= d_in.is_modulus;
|
||||
extended <= d_in.is_extended;
|
||||
is_32bit <= d_in.is_32bit;
|
||||
is_signed <= d_in.is_signed;
|
||||
rc <= d_in.rc;
|
||||
oe <= d_in.oe;
|
||||
xerc <= d_in.xerc;
|
||||
count <= "1111111";
|
||||
running <= '1';
|
||||
overflow <= '0';
|
||||
@@ -98,9 +90,6 @@ begin
|
||||
|
||||
divider_1: process(all)
|
||||
begin
|
||||
d_out.write_reg_nr <= write_reg;
|
||||
d_out.rc <= rc;
|
||||
|
||||
if is_modulus = '1' then
|
||||
result <= dend(128 downto 65);
|
||||
else
|
||||
@@ -136,21 +125,9 @@ begin
|
||||
if rising_edge(clk) then
|
||||
d_out.valid <= '0';
|
||||
d_out.write_reg_data <= oresult;
|
||||
d_out.write_xerc_enable <= '0';
|
||||
d_out.xerc <= xerc;
|
||||
d_out.overflow <= did_ovf;
|
||||
if count = "1000000" then
|
||||
d_out.valid <= '1';
|
||||
d_out.write_xerc_enable <= oe;
|
||||
|
||||
-- We must test oe because the RC update code in writeback
|
||||
-- will use the xerc value to set CR0:SO so we must not clobber
|
||||
-- xerc if OE wasn't set.
|
||||
--
|
||||
if oe = '1' then
|
||||
d_out.xerc.ov <= did_ovf;
|
||||
d_out.xerc.ov32 <= did_ovf;
|
||||
d_out.xerc.so <= xerc.so or did_ovf;
|
||||
end if;
|
||||
end if;
|
||||
end if;
|
||||
end process;
|
||||
|
||||
@@ -43,7 +43,6 @@ begin
|
||||
rst <= '0';
|
||||
|
||||
d1.valid <= '1';
|
||||
d1.write_reg <= "10001";
|
||||
d1.dividend <= x"0000000010001000";
|
||||
d1.divisor <= x"0000000000001111";
|
||||
d1.is_signed <= '0';
|
||||
@@ -51,7 +50,6 @@ begin
|
||||
d1.is_extended <= '0';
|
||||
d1.is_modulus <= '0';
|
||||
d1.neg_result <= '0';
|
||||
d1.rc <= '0';
|
||||
|
||||
wait for clk_period;
|
||||
assert d2.valid = '0';
|
||||
@@ -66,15 +64,12 @@ begin
|
||||
end loop;
|
||||
|
||||
assert d2.valid = '1';
|
||||
assert d2.write_reg_nr = "10001";
|
||||
assert d2.write_reg_data = x"000000000000f001" report "result " & to_hstring(d2.write_reg_data);
|
||||
assert d2.rc = '0';
|
||||
|
||||
wait for clk_period;
|
||||
assert d2.valid = '0' report "valid";
|
||||
|
||||
d1.valid <= '1';
|
||||
d1.rc <= '1';
|
||||
|
||||
wait for clk_period;
|
||||
assert d2.valid = '0' report "valid";
|
||||
@@ -89,9 +84,7 @@ begin
|
||||
end loop;
|
||||
|
||||
assert d2.valid = '1';
|
||||
assert d2.write_reg_nr = "10001";
|
||||
assert d2.write_reg_data = x"000000000000f001" report "result " & to_hstring(d2.write_reg_data);
|
||||
assert d2.rc = '1';
|
||||
|
||||
wait for clk_period;
|
||||
assert d2.valid = '0';
|
||||
|
||||
@@ -38,6 +38,10 @@ architecture behaviour of execute1 is
|
||||
next_lr : std_ulogic_vector(63 downto 0);
|
||||
mul_in_progress : std_ulogic;
|
||||
div_in_progress : std_ulogic;
|
||||
slow_op_dest : gpr_index_t;
|
||||
slow_op_rc : std_ulogic;
|
||||
slow_op_oe : std_ulogic;
|
||||
slow_op_xerc : xer_common_t;
|
||||
end record;
|
||||
|
||||
signal r, rin : reg_type;
|
||||
@@ -187,6 +191,7 @@ begin
|
||||
variable carry_32, carry_64 : std_ulogic;
|
||||
variable sign1, sign2 : std_ulogic;
|
||||
variable abs1, abs2 : signed(63 downto 0);
|
||||
variable overflow : std_ulogic;
|
||||
begin
|
||||
result := (others => '0');
|
||||
result_with_carry := (others => '0');
|
||||
@@ -238,12 +243,6 @@ begin
|
||||
-- signals to multiply unit
|
||||
x_to_multiply <= Execute1ToMultiplyInit;
|
||||
x_to_multiply.insn_type <= e_in.insn_type;
|
||||
x_to_multiply.write_reg <= gspr_to_gpr(e_in.write_reg);
|
||||
x_to_multiply.rc <= e_in.rc;
|
||||
x_to_multiply.xerc <= v.e.xerc;
|
||||
if e_in.insn_type = OP_MUL_L64 then
|
||||
x_to_multiply.oe <= e_in.oe;
|
||||
end if;
|
||||
x_to_multiply.is_32bit <= e_in.is_32bit;
|
||||
|
||||
if e_in.is_32bit = '1' then
|
||||
@@ -291,16 +290,12 @@ begin
|
||||
end if;
|
||||
|
||||
x_to_divider <= Execute1ToDividerInit;
|
||||
x_to_divider.write_reg <= gspr_to_gpr(e_in.write_reg);
|
||||
x_to_divider.is_signed <= e_in.is_signed;
|
||||
x_to_divider.is_32bit <= e_in.is_32bit;
|
||||
if e_in.insn_type = OP_MOD then
|
||||
x_to_divider.is_modulus <= '1';
|
||||
end if;
|
||||
x_to_divider.neg_result <= sign1 xor (sign2 and not x_to_divider.is_modulus);
|
||||
x_to_divider.rc <= e_in.rc;
|
||||
x_to_divider.oe <= e_in.oe;
|
||||
x_to_divider.xerc <= v.e.xerc;
|
||||
if e_in.is_32bit = '0' then
|
||||
-- 64-bit forms
|
||||
if e_in.insn_type = OP_DIVE then
|
||||
@@ -342,6 +337,10 @@ begin
|
||||
v.e.write_reg := e_in.write_reg;
|
||||
v.e.write_len := x"8";
|
||||
v.e.sign_extend := '0';
|
||||
v.slow_op_dest := gspr_to_gpr(e_in.write_reg);
|
||||
v.slow_op_rc := e_in.rc;
|
||||
v.slow_op_oe := e_in.oe;
|
||||
v.slow_op_xerc := v.e.xerc;
|
||||
|
||||
case_0: case e_in.insn_type is
|
||||
|
||||
@@ -664,35 +663,36 @@ begin
|
||||
v.e.write_len := x"8";
|
||||
v.e.sign_extend := '0';
|
||||
v.e.valid := '1';
|
||||
elsif r.mul_in_progress = '1' then
|
||||
if multiply_to_x.valid = '1' then
|
||||
v.e.write_reg := gpr_to_gspr(multiply_to_x.write_reg_nr);
|
||||
result := multiply_to_x.write_reg_data;
|
||||
elsif r.mul_in_progress = '1' or r.div_in_progress = '1' then
|
||||
if (r.mul_in_progress = '1' and multiply_to_x.valid = '1') or
|
||||
(r.div_in_progress = '1' and divider_to_x.valid = '1') then
|
||||
if r.mul_in_progress = '1' then
|
||||
result := multiply_to_x.write_reg_data;
|
||||
overflow := multiply_to_x.overflow;
|
||||
else
|
||||
result := divider_to_x.write_reg_data;
|
||||
overflow := divider_to_x.overflow;
|
||||
end if;
|
||||
result_en := '1';
|
||||
v.e.rc := multiply_to_x.rc;
|
||||
v.e.xerc := multiply_to_x.xerc;
|
||||
v.e.write_xerc_enable := multiply_to_x.write_xerc_enable;
|
||||
v.e.write_reg := gpr_to_gspr(v.slow_op_dest);
|
||||
v.e.rc := v.slow_op_rc;
|
||||
v.e.xerc := v.slow_op_xerc;
|
||||
v.e.write_xerc_enable := v.slow_op_oe;
|
||||
-- We must test oe because the RC update code in writeback
|
||||
-- will use the xerc value to set CR0:SO so we must not clobber
|
||||
-- xerc if OE wasn't set.
|
||||
if v.slow_op_oe = '1' then
|
||||
v.e.xerc.ov := overflow;
|
||||
v.e.xerc.ov32 := overflow;
|
||||
v.e.xerc.so := v.slow_op_xerc.so or overflow;
|
||||
end if;
|
||||
v.e.valid := '1';
|
||||
v.e.write_len := x"8";
|
||||
v.e.sign_extend := '0';
|
||||
else
|
||||
stall_out <= '1';
|
||||
v.mul_in_progress := '1';
|
||||
end if;
|
||||
elsif r.div_in_progress = '1' then
|
||||
if divider_to_x.valid = '1' then
|
||||
v.e.write_reg := gpr_to_gspr(divider_to_x.write_reg_nr);
|
||||
result := divider_to_x.write_reg_data;
|
||||
result_en := '1';
|
||||
v.e.rc := divider_to_x.rc;
|
||||
v.e.xerc := divider_to_x.xerc;
|
||||
v.e.write_xerc_enable := divider_to_x.write_xerc_enable;
|
||||
v.e.valid := '1';
|
||||
v.e.write_len := x"8";
|
||||
v.e.sign_extend := '0';
|
||||
else
|
||||
stall_out <= '1';
|
||||
v.div_in_progress := '1';
|
||||
v.mul_in_progress := r.mul_in_progress;
|
||||
v.div_in_progress := r.div_in_progress;
|
||||
end if;
|
||||
end if;
|
||||
|
||||
|
||||
@@ -25,19 +25,12 @@ architecture behaviour of multiply is
|
||||
valid : std_ulogic;
|
||||
insn_type : insn_type_t;
|
||||
data : signed(129 downto 0);
|
||||
write_reg : std_ulogic_vector(4 downto 0);
|
||||
rc : std_ulogic;
|
||||
oe : std_ulogic;
|
||||
is_32bit : std_ulogic;
|
||||
xerc : xer_common_t;
|
||||
end record;
|
||||
constant MultiplyPipelineStageInit : multiply_pipeline_stage := (valid => '0',
|
||||
insn_type => OP_ILLEGAL,
|
||||
rc => '0', oe => '0',
|
||||
is_32bit => '0',
|
||||
xerc => xerc_init,
|
||||
data => (others => '0'),
|
||||
others => (others => '0'));
|
||||
data => (others => '0'));
|
||||
|
||||
type multiply_pipeline_type is array(0 to PIPELINE_DEPTH-1) of multiply_pipeline_stage;
|
||||
constant MultiplyPipelineInit : multiply_pipeline_type := (others => MultiplyPipelineStageInit);
|
||||
@@ -69,11 +62,7 @@ begin
|
||||
v.multiply_pipeline(0).valid := m.valid;
|
||||
v.multiply_pipeline(0).insn_type := m.insn_type;
|
||||
v.multiply_pipeline(0).data := signed(m.data1) * signed(m.data2);
|
||||
v.multiply_pipeline(0).write_reg := m.write_reg;
|
||||
v.multiply_pipeline(0).rc := m.rc;
|
||||
v.multiply_pipeline(0).oe := m.oe;
|
||||
v.multiply_pipeline(0).is_32bit := m.is_32bit;
|
||||
v.multiply_pipeline(0).xerc := m.xerc;
|
||||
|
||||
loop_0: for i in 1 to PIPELINE_DEPTH-1 loop
|
||||
v.multiply_pipeline(i) := r.multiply_pipeline(i-1);
|
||||
@@ -101,24 +90,10 @@ begin
|
||||
end case;
|
||||
|
||||
m_out.write_reg_data <= d2;
|
||||
m_out.write_reg_nr <= v.multiply_pipeline(PIPELINE_DEPTH-1).write_reg;
|
||||
m_out.xerc <= v.multiply_pipeline(PIPELINE_DEPTH-1).xerc;
|
||||
m_out.overflow <= ov;
|
||||
|
||||
-- Generate OV/OV32/SO when OE=1
|
||||
if v.multiply_pipeline(PIPELINE_DEPTH-1).valid = '1' then
|
||||
m_out.valid <= '1';
|
||||
m_out.rc <= v.multiply_pipeline(PIPELINE_DEPTH-1).rc;
|
||||
m_out.write_xerc_enable <= v.multiply_pipeline(PIPELINE_DEPTH-1).oe;
|
||||
|
||||
-- We must test oe because the RC update code in writeback
|
||||
-- will use the xerc value to set CR0:SO so we must not clobber
|
||||
-- xerc if OE wasn't set.
|
||||
--
|
||||
if v.multiply_pipeline(PIPELINE_DEPTH-1).oe = '1' then
|
||||
m_out.xerc.ov <= ov;
|
||||
m_out.xerc.ov32 <= ov;
|
||||
m_out.xerc.so <= v.multiply_pipeline(PIPELINE_DEPTH-1).xerc.so or ov;
|
||||
end if;
|
||||
end if;
|
||||
|
||||
rin <= v;
|
||||
|
||||
@@ -40,10 +40,8 @@ begin
|
||||
|
||||
m1.valid <= '1';
|
||||
m1.insn_type <= OP_MUL_L64;
|
||||
m1.write_reg <= "10001";
|
||||
m1.data1 <= '0' & x"0000000000001000";
|
||||
m1.data2 <= '0' & x"0000000000001111";
|
||||
m1.rc <= '0';
|
||||
|
||||
wait for clk_period;
|
||||
assert m2.valid = '0';
|
||||
@@ -58,15 +56,12 @@ begin
|
||||
|
||||
wait for clk_period;
|
||||
assert m2.valid = '1';
|
||||
assert m2.write_reg_nr = "10001";
|
||||
assert m2.write_reg_data = x"0000000001111000";
|
||||
assert m2.rc = '0';
|
||||
|
||||
wait for clk_period;
|
||||
assert m2.valid = '0';
|
||||
|
||||
m1.valid <= '1';
|
||||
m1.rc <= '1';
|
||||
|
||||
wait for clk_period;
|
||||
assert m2.valid = '0';
|
||||
@@ -75,9 +70,7 @@ begin
|
||||
|
||||
wait for clk_period * (pipeline_depth-1);
|
||||
assert m2.valid = '1';
|
||||
assert m2.write_reg_nr = "10001";
|
||||
assert m2.write_reg_data = x"0000000001111000";
|
||||
assert m2.rc = '1';
|
||||
|
||||
-- test mulld
|
||||
mulld_loop : for i in 0 to 1000 loop
|
||||
|
||||
Reference in New Issue
Block a user