mirror of
https://github.com/antonblanchard/microwatt.git
synced 2026-04-10 22:31:45 +00:00
execute1: Do forwarding of the CR result to the next instruction
This adds a path to allow the CR result of one instruction to be forwarded to the next instruction, so that sequences such as cmp; bc can avoid having a 1-cycle bubble. Forwarding is not available for dot-form (Rc=1) instructions, since the CR result for them is calculated in writeback. The decode.output_cr field is used to identify those instructions that compute the CR result in execute1. For some reason, the multiply instructions incorrectly had output_cr = 1 in the decode tables. This fixes that. Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
This commit is contained in:
@@ -151,6 +151,7 @@ package common is
|
||||
bypass_data2: std_ulogic;
|
||||
bypass_data3: std_ulogic;
|
||||
cr: std_ulogic_vector(31 downto 0);
|
||||
bypass_cr : std_ulogic;
|
||||
xerc: xer_common_t;
|
||||
lr: std_ulogic;
|
||||
rc: std_ulogic;
|
||||
@@ -173,7 +174,7 @@ package common is
|
||||
end record;
|
||||
constant Decode2ToExecute1Init : Decode2ToExecute1Type :=
|
||||
(valid => '0', unit => NONE, insn_type => OP_ILLEGAL, bypass_data1 => '0', bypass_data2 => '0', bypass_data3 => '0',
|
||||
lr => '0', rc => '0', oe => '0', invert_a => '0',
|
||||
bypass_cr => '0', lr => '0', rc => '0', oe => '0', invert_a => '0',
|
||||
invert_out => '0', input_carry => ZERO, output_carry => '0', input_cr => '0', output_cr => '0',
|
||||
is_32bit => '0', is_signed => '0', xerc => xerc_init, reserve => '0', br_pred => '0',
|
||||
byte_reverse => '0', sign_extend => '0', update => '0', nia => (others => '0'), read_data1 => (others => '0'), read_data2 => (others => '0'), read_data3 => (others => '0'), cr => (others => '0'), insn => (others => '0'), data_len => (others => '0'), others => (others => '0'));
|
||||
|
||||
@@ -38,6 +38,7 @@ entity control is
|
||||
|
||||
cr_read_in : in std_ulogic;
|
||||
cr_write_in : in std_ulogic;
|
||||
cr_bypassable : in std_ulogic;
|
||||
|
||||
valid_out : out std_ulogic;
|
||||
stall_out : out std_ulogic;
|
||||
@@ -45,7 +46,8 @@ entity control is
|
||||
|
||||
gpr_bypass_a : out std_ulogic;
|
||||
gpr_bypass_b : out std_ulogic;
|
||||
gpr_bypass_c : out std_ulogic
|
||||
gpr_bypass_c : out std_ulogic;
|
||||
cr_bypass : out std_ulogic
|
||||
);
|
||||
end entity control;
|
||||
|
||||
@@ -161,8 +163,10 @@ begin
|
||||
|
||||
cr_read_in => cr_read_in,
|
||||
cr_write_in => cr_write_valid,
|
||||
bypassable => cr_bypassable,
|
||||
|
||||
stall_out => cr_stall_out
|
||||
stall_out => cr_stall_out,
|
||||
use_bypass => cr_bypass
|
||||
);
|
||||
|
||||
control0: process(clk)
|
||||
|
||||
@@ -16,15 +16,18 @@ entity cr_hazard is
|
||||
|
||||
cr_read_in : in std_ulogic;
|
||||
cr_write_in : in std_ulogic;
|
||||
bypassable : in std_ulogic;
|
||||
|
||||
stall_out : out std_ulogic
|
||||
stall_out : out std_ulogic;
|
||||
use_bypass : out std_ulogic
|
||||
);
|
||||
end entity cr_hazard;
|
||||
architecture behaviour of cr_hazard is
|
||||
type pipeline_entry_type is record
|
||||
valid : std_ulogic;
|
||||
valid : std_ulogic;
|
||||
bypass : std_ulogic;
|
||||
end record;
|
||||
constant pipeline_entry_init : pipeline_entry_type := (valid => '0');
|
||||
constant pipeline_entry_init : pipeline_entry_type := (valid => '0', bypass => '0');
|
||||
|
||||
type pipeline_t is array(0 to PIPELINE_DEPTH) of pipeline_entry_type;
|
||||
constant pipeline_t_init : pipeline_t := (others => pipeline_entry_init);
|
||||
@@ -47,7 +50,20 @@ begin
|
||||
if complete_in = '1' then
|
||||
v(1).valid := '0';
|
||||
end if;
|
||||
stall_out <= cr_read_in and (v(0).valid or v(1).valid);
|
||||
|
||||
use_bypass <= '0';
|
||||
stall_out <= '0';
|
||||
if cr_read_in = '1' then
|
||||
loop_0: for i in 0 to PIPELINE_DEPTH loop
|
||||
if v(i).valid = '1' then
|
||||
if r(i).bypass = '1' then
|
||||
use_bypass <= '1';
|
||||
else
|
||||
stall_out <= '1';
|
||||
end if;
|
||||
end if;
|
||||
end loop;
|
||||
end if;
|
||||
|
||||
-- XXX assumes PIPELINE_DEPTH = 1
|
||||
if busy_in = '0' then
|
||||
@@ -56,6 +72,7 @@ begin
|
||||
end if;
|
||||
if deferred = '0' and issuing = '1' then
|
||||
v(0).valid := cr_write_in;
|
||||
v(0).bypass := bypassable;
|
||||
end if;
|
||||
if flush_in = '1' then
|
||||
v(0).valid := '0';
|
||||
|
||||
26
decode1.vhdl
26
decode1.vhdl
@@ -60,7 +60,7 @@ architecture behaviour of decode1 is
|
||||
41 => (LDST, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '1', '0', '0', '0', NONE, '0', '0'), -- lhzu
|
||||
32 => (LDST, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- lwz
|
||||
33 => (LDST, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '1', '0', '0', '0', NONE, '0', '0'), -- lwzu
|
||||
7 => (ALU, OP_MUL_L64, RA, CONST_SI, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', NONE, '0', '0'), -- mulli
|
||||
7 => (ALU, OP_MUL_L64, RA, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', NONE, '0', '0'), -- mulli
|
||||
24 => (ALU, OP_OR, NONE, CONST_UI, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- ori
|
||||
25 => (ALU, OP_OR, NONE, CONST_UI_HI, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- oris
|
||||
20 => (ALU, OP_RLC, RA, CONST_SH32, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- rlwimi
|
||||
@@ -262,19 +262,19 @@ architecture behaviour of decode1 is
|
||||
2#0010010000# => (ALU, OP_MTCRF, NONE, NONE, RS, NONE, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- mtcrf/mtocrf
|
||||
2#0010110010# => (ALU, OP_MTMSRD, NONE, NONE, RS, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- mtmsrd # ignore top bits and d
|
||||
2#0111010011# => (ALU, OP_MTSPR, NONE, NONE, RS, SPR, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- mtspr
|
||||
2#0001001001# => (ALU, OP_MUL_H64, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '0'), -- mulhd
|
||||
2#0000001001# => (ALU, OP_MUL_H64, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- mulhdu
|
||||
2#0001001011# => (ALU, OP_MUL_H32, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', RC, '0', '0'), -- mulhw
|
||||
2#0000001011# => (ALU, OP_MUL_H32, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- mulhwu
|
||||
2#0001001001# => (ALU, OP_MUL_H64, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '0'), -- mulhd
|
||||
2#0000001001# => (ALU, OP_MUL_H64, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- mulhdu
|
||||
2#0001001011# => (ALU, OP_MUL_H32, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', RC, '0', '0'), -- mulhw
|
||||
2#0000001011# => (ALU, OP_MUL_H32, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- mulhwu
|
||||
-- next 4 have reserved bit set
|
||||
2#1001001001# => (ALU, OP_MUL_H64, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '0'), -- mulhd
|
||||
2#1000001001# => (ALU, OP_MUL_H64, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- mulhdu
|
||||
2#1001001011# => (ALU, OP_MUL_H32, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', RC, '0', '0'), -- mulhw
|
||||
2#1000001011# => (ALU, OP_MUL_H32, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- mulhwu
|
||||
2#0011101001# => (ALU, OP_MUL_L64, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '0'), -- mulld
|
||||
2#1011101001# => (ALU, OP_MUL_L64, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '0'), -- mulldo
|
||||
2#0011101011# => (ALU, OP_MUL_L64, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', RC, '0', '0'), -- mullw
|
||||
2#1011101011# => (ALU, OP_MUL_L64, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', RC, '0', '0'), -- mullwo
|
||||
2#1001001001# => (ALU, OP_MUL_H64, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '0'), -- mulhd
|
||||
2#1000001001# => (ALU, OP_MUL_H64, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- mulhdu
|
||||
2#1001001011# => (ALU, OP_MUL_H32, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', RC, '0', '0'), -- mulhw
|
||||
2#1000001011# => (ALU, OP_MUL_H32, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- mulhwu
|
||||
2#0011101001# => (ALU, OP_MUL_L64, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '0'), -- mulld
|
||||
2#1011101001# => (ALU, OP_MUL_L64, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '0'), -- mulldo
|
||||
2#0011101011# => (ALU, OP_MUL_L64, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', RC, '0', '0'), -- mullw
|
||||
2#1011101011# => (ALU, OP_MUL_L64, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', RC, '0', '0'), -- mullwo
|
||||
2#0111011100# => (ALU, OP_AND, NONE, RB, RS, RA, '0', '0', '0', '1', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- nand
|
||||
2#0001101000# => (ALU, OP_ADD, RA, NONE, NONE, RT, '0', '0', '1', '0', ONE, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- neg
|
||||
2#1001101000# => (ALU, OP_ADD, RA, NONE, NONE, RT, '0', '0', '1', '0', ONE, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- nego
|
||||
|
||||
14
decode2.vhdl
14
decode2.vhdl
@@ -213,7 +213,10 @@ architecture behaviour of decode2 is
|
||||
signal gpr_c_read : gpr_index_t;
|
||||
signal gpr_c_bypass : std_ulogic;
|
||||
|
||||
signal cr_write_valid : std_ulogic;
|
||||
signal cr_write_valid : std_ulogic;
|
||||
signal cr_bypass : std_ulogic;
|
||||
signal cr_bypass_avail : std_ulogic;
|
||||
|
||||
begin
|
||||
control_0: entity work.control
|
||||
generic map (
|
||||
@@ -248,7 +251,9 @@ begin
|
||||
gpr_c_read_in => gpr_c_read,
|
||||
|
||||
cr_read_in => d_in.decode.input_cr,
|
||||
cr_write_in => cr_write_valid,
|
||||
cr_write_in => cr_write_valid,
|
||||
cr_bypass => cr_bypass,
|
||||
cr_bypassable => cr_bypass_avail,
|
||||
|
||||
valid_out => control_valid_out,
|
||||
stall_out => stall_out,
|
||||
@@ -342,6 +347,7 @@ begin
|
||||
v.e.oe := decode_oe(d_in.decode.rc, d_in.insn);
|
||||
end if;
|
||||
v.e.cr := c_in.read_cr_data;
|
||||
v.e.bypass_cr := cr_bypass;
|
||||
v.e.xerc := c_in.read_xerc_data;
|
||||
v.e.invert_a := d_in.decode.invert_a;
|
||||
v.e.invert_out := d_in.decode.invert_out;
|
||||
@@ -388,6 +394,10 @@ begin
|
||||
gpr_c_read <= gspr_to_gpr(decoded_reg_c.reg);
|
||||
|
||||
cr_write_valid <= d_in.decode.output_cr or decode_rc(d_in.decode.rc, d_in.insn);
|
||||
cr_bypass_avail <= '0';
|
||||
if EX1_BYPASS then
|
||||
cr_bypass_avail <= d_in.decode.output_cr;
|
||||
end if;
|
||||
|
||||
v.e.valid := control_valid_out;
|
||||
if d_in.decode.unit = NONE then
|
||||
|
||||
@@ -74,6 +74,7 @@ architecture behaviour of execute1 is
|
||||
signal r, rin : reg_type;
|
||||
|
||||
signal a_in, b_in, c_in : std_ulogic_vector(63 downto 0);
|
||||
signal cr_in : std_ulogic_vector(31 downto 0);
|
||||
|
||||
signal valid_in : std_ulogic;
|
||||
signal ctrl: ctrl_t := (irq_state => WRITE_SRR0, others => (others => '0'));
|
||||
@@ -355,6 +356,16 @@ begin
|
||||
v.e.xerc := e_in.xerc;
|
||||
end if;
|
||||
|
||||
-- CR forwarding
|
||||
cr_in <= e_in.cr;
|
||||
if EX1_BYPASS and e_in.bypass_cr = '1' and r.e.write_cr_enable = '1' then
|
||||
for i in 0 to 7 loop
|
||||
if r.e.write_cr_mask(i) = '1' then
|
||||
cr_in(i * 4 + 3 downto i * 4) <= r.e.write_cr_data(i * 4 + 3 downto i * 4);
|
||||
end if;
|
||||
end loop;
|
||||
end if;
|
||||
|
||||
v.lr_update := '0';
|
||||
v.mul_in_progress := '0';
|
||||
v.div_in_progress := '0';
|
||||
@@ -635,7 +646,7 @@ begin
|
||||
v.e.write_reg := fast_spr_num(SPR_CTR);
|
||||
end if;
|
||||
is_branch := '1';
|
||||
taken_branch := ppc_bc_taken(bo, bi, e_in.cr, a_in);
|
||||
taken_branch := ppc_bc_taken(bo, bi, cr_in, a_in);
|
||||
abs_branch := insn_aa(e_in.insn);
|
||||
when OP_BCREG =>
|
||||
-- read_data1 is CTR
|
||||
@@ -648,7 +659,7 @@ begin
|
||||
v.e.write_reg := fast_spr_num(SPR_CTR);
|
||||
end if;
|
||||
is_branch := '1';
|
||||
taken_branch := ppc_bc_taken(bo, bi, e_in.cr, a_in);
|
||||
taken_branch := ppc_bc_taken(bo, bi, cr_in, a_in);
|
||||
abs_branch := '1';
|
||||
|
||||
when OP_RFID =>
|
||||
@@ -675,7 +686,7 @@ begin
|
||||
v.busy := '1';
|
||||
when OP_ISEL =>
|
||||
crbit := to_integer(unsigned(insn_bc(e_in.insn)));
|
||||
if e_in.cr(31-crbit) = '1' then
|
||||
if cr_in(31-crbit) = '1' then
|
||||
result := a_in;
|
||||
else
|
||||
result := b_in;
|
||||
@@ -695,7 +706,7 @@ begin
|
||||
lo := (7-i)*4;
|
||||
hi := lo + 3;
|
||||
if i = scrnum then
|
||||
newcrf := e_in.cr(hi downto lo);
|
||||
newcrf := cr_in(hi downto lo);
|
||||
end if;
|
||||
end loop;
|
||||
for i in 0 to 7 loop
|
||||
@@ -713,14 +724,14 @@ begin
|
||||
bbnum := 31 - to_integer(unsigned(bb));
|
||||
-- Bits 5-8 of cr_op give the truth table of the requested
|
||||
-- logical operation
|
||||
cr_operands := e_in.cr(banum) & e_in.cr(bbnum);
|
||||
cr_operands := cr_in(banum) & cr_in(bbnum);
|
||||
crresult := cr_op(5 + to_integer(unsigned(cr_operands)));
|
||||
v.e.write_cr_mask := num_to_fxm((31-btnum) / 4);
|
||||
for i in 0 to 31 loop
|
||||
if i = btnum then
|
||||
v.e.write_cr_data(i) := crresult;
|
||||
else
|
||||
v.e.write_cr_data(i) := e_in.cr(i);
|
||||
v.e.write_cr_data(i) := cr_in(i);
|
||||
end if;
|
||||
end loop;
|
||||
end if;
|
||||
@@ -772,7 +783,7 @@ begin
|
||||
when OP_MFCR =>
|
||||
if e_in.insn(20) = '0' then
|
||||
-- mfcr
|
||||
result := x"00000000" & e_in.cr;
|
||||
result := x"00000000" & cr_in;
|
||||
else
|
||||
-- mfocrf
|
||||
crnum := fxm_to_num(insn_fxm(e_in.insn));
|
||||
@@ -781,7 +792,7 @@ begin
|
||||
lo := (7-i)*4;
|
||||
hi := lo + 3;
|
||||
if crnum = i then
|
||||
result(hi downto lo) := e_in.cr(hi downto lo);
|
||||
result(hi downto lo) := cr_in(hi downto lo);
|
||||
end if;
|
||||
end loop;
|
||||
end if;
|
||||
|
||||
Reference in New Issue
Block a user