mirror of
https://github.com/antonblanchard/microwatt.git
synced 2026-04-15 15:50:24 +00:00
Add a bypass path from the execute2 stage
This enables some instructions to issue earlier and thus improves performance, at the cost of some extra multiplexers in decode2. Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
This commit is contained in:
50
control.vhdl
50
control.vhdl
@@ -36,6 +36,8 @@ entity control is
|
||||
|
||||
execute_next_tag : in instr_tag_t;
|
||||
execute_next_cr_tag : in instr_tag_t;
|
||||
execute2_next_tag : in instr_tag_t;
|
||||
execute2_next_cr_tag : in instr_tag_t;
|
||||
|
||||
cr_read_in : in std_ulogic;
|
||||
cr_write_in : in std_ulogic;
|
||||
@@ -44,10 +46,10 @@ entity control is
|
||||
stall_out : out std_ulogic;
|
||||
stopped_out : out std_ulogic;
|
||||
|
||||
gpr_bypass_a : out std_ulogic;
|
||||
gpr_bypass_b : out std_ulogic;
|
||||
gpr_bypass_c : out std_ulogic;
|
||||
cr_bypass : out std_ulogic;
|
||||
gpr_bypass_a : out std_ulogic_vector(1 downto 0);
|
||||
gpr_bypass_b : out std_ulogic_vector(1 downto 0);
|
||||
gpr_bypass_c : out std_ulogic_vector(1 downto 0);
|
||||
cr_bypass : out std_ulogic_vector(1 downto 0);
|
||||
|
||||
instr_tag_out : out instr_tag_t
|
||||
);
|
||||
@@ -142,11 +144,11 @@ begin
|
||||
variable tag_s : instr_tag_t;
|
||||
variable tag_t : instr_tag_t;
|
||||
variable incr_tag : tag_number_t;
|
||||
variable byp_a : std_ulogic;
|
||||
variable byp_b : std_ulogic;
|
||||
variable byp_c : std_ulogic;
|
||||
variable byp_a : std_ulogic_vector(1 downto 0);
|
||||
variable byp_b : std_ulogic_vector(1 downto 0);
|
||||
variable byp_c : std_ulogic_vector(1 downto 0);
|
||||
variable tag_cr : instr_tag_t;
|
||||
variable byp_cr : std_ulogic;
|
||||
variable byp_cr : std_ulogic_vector(1 downto 0);
|
||||
begin
|
||||
tag_a := instr_tag_init;
|
||||
for i in tag_number_t loop
|
||||
@@ -179,26 +181,32 @@ begin
|
||||
tag_c.valid := '0';
|
||||
end if;
|
||||
|
||||
byp_a := '0';
|
||||
byp_a := "00";
|
||||
if EX1_BYPASS and tag_match(execute_next_tag, tag_a) then
|
||||
byp_a := '1';
|
||||
byp_a := "10";
|
||||
elsif EX1_BYPASS and tag_match(execute2_next_tag, tag_a) then
|
||||
byp_a := "11";
|
||||
end if;
|
||||
byp_b := '0';
|
||||
byp_b := "00";
|
||||
if EX1_BYPASS and tag_match(execute_next_tag, tag_b) then
|
||||
byp_b := '1';
|
||||
byp_b := "10";
|
||||
elsif EX1_BYPASS and tag_match(execute2_next_tag, tag_b) then
|
||||
byp_b := "11";
|
||||
end if;
|
||||
byp_c := '0';
|
||||
byp_c := "00";
|
||||
if EX1_BYPASS and tag_match(execute_next_tag, tag_c) then
|
||||
byp_c := '1';
|
||||
byp_c := "10";
|
||||
elsif EX1_BYPASS and tag_match(execute2_next_tag, tag_c) then
|
||||
byp_c := "11";
|
||||
end if;
|
||||
|
||||
gpr_bypass_a <= byp_a;
|
||||
gpr_bypass_b <= byp_b;
|
||||
gpr_bypass_c <= byp_c;
|
||||
|
||||
gpr_tag_stall <= (tag_a.valid and not byp_a) or
|
||||
(tag_b.valid and not byp_b) or
|
||||
(tag_c.valid and not byp_c);
|
||||
gpr_tag_stall <= (tag_a.valid and not byp_a(1)) or
|
||||
(tag_b.valid and not byp_b(1)) or
|
||||
(tag_c.valid and not byp_c(1));
|
||||
|
||||
incr_tag := curr_tag;
|
||||
instr_tag.tag <= curr_tag;
|
||||
@@ -215,13 +223,15 @@ begin
|
||||
if tag_match(tag_cr, complete_in) then
|
||||
tag_cr.valid := '0';
|
||||
end if;
|
||||
byp_cr := '0';
|
||||
byp_cr := "00";
|
||||
if EX1_BYPASS and tag_match(execute_next_cr_tag, tag_cr) then
|
||||
byp_cr := '1';
|
||||
byp_cr := "10";
|
||||
elsif EX1_BYPASS and tag_match(execute2_next_cr_tag, tag_cr) then
|
||||
byp_cr := "11";
|
||||
end if;
|
||||
|
||||
cr_bypass <= byp_cr;
|
||||
cr_tag_stall <= tag_cr.valid and not byp_cr;
|
||||
cr_tag_stall <= tag_cr.valid and not byp_cr(1);
|
||||
end process;
|
||||
|
||||
control1 : process(all)
|
||||
|
||||
@@ -79,6 +79,8 @@ architecture behave of core is
|
||||
signal execute1_to_writeback: Execute1ToWritebackType;
|
||||
signal execute1_bypass: bypass_data_t;
|
||||
signal execute1_cr_bypass: cr_bypass_data_t;
|
||||
signal execute2_bypass: bypass_data_t;
|
||||
signal execute2_cr_bypass: cr_bypass_data_t;
|
||||
|
||||
-- load store signals
|
||||
signal execute1_to_loadstore1: Execute1ToLoadstore1Type;
|
||||
@@ -298,6 +300,8 @@ begin
|
||||
c_out => decode2_to_cr_file,
|
||||
execute_bypass => execute1_bypass,
|
||||
execute_cr_bypass => execute1_cr_bypass,
|
||||
execute2_bypass => execute2_bypass,
|
||||
execute2_cr_bypass => execute2_cr_bypass,
|
||||
log_out => log_data(119 downto 110)
|
||||
);
|
||||
decode2_busy_in <= ex1_busy_out;
|
||||
@@ -359,6 +363,8 @@ begin
|
||||
e_out => execute1_to_writeback,
|
||||
bypass_data => execute1_bypass,
|
||||
bypass_cr_data => execute1_cr_bypass,
|
||||
bypass2_data => execute2_bypass,
|
||||
bypass2_cr_data => execute2_cr_bypass,
|
||||
icache_inval => ex1_icache_inval,
|
||||
dbg_ctrl_out => ctrl_debug,
|
||||
wb_events => writeback_events,
|
||||
|
||||
34
decode2.vhdl
34
decode2.vhdl
@@ -39,6 +39,8 @@ entity decode2 is
|
||||
|
||||
execute_bypass : in bypass_data_t;
|
||||
execute_cr_bypass : in cr_bypass_data_t;
|
||||
execute2_bypass : in bypass_data_t;
|
||||
execute2_cr_bypass : in cr_bypass_data_t;
|
||||
|
||||
log_out : out std_ulogic_vector(9 downto 0)
|
||||
);
|
||||
@@ -273,19 +275,19 @@ architecture behaviour of decode2 is
|
||||
|
||||
signal gpr_a_read_valid : std_ulogic;
|
||||
signal gpr_a_read : gspr_index_t;
|
||||
signal gpr_a_bypass : std_ulogic;
|
||||
signal gpr_a_bypass : std_ulogic_vector(1 downto 0);
|
||||
|
||||
signal gpr_b_read_valid : std_ulogic;
|
||||
signal gpr_b_read : gspr_index_t;
|
||||
signal gpr_b_bypass : std_ulogic;
|
||||
signal gpr_b_bypass : std_ulogic_vector(1 downto 0);
|
||||
|
||||
signal gpr_c_read_valid : std_ulogic;
|
||||
signal gpr_c_read : gspr_index_t;
|
||||
signal gpr_c_bypass : std_ulogic;
|
||||
signal gpr_c_bypass : std_ulogic_vector(1 downto 0);
|
||||
|
||||
signal cr_read_valid : std_ulogic;
|
||||
signal cr_write_valid : std_ulogic;
|
||||
signal cr_bypass : std_ulogic;
|
||||
signal cr_bypass : std_ulogic_vector(1 downto 0);
|
||||
|
||||
signal instr_tag : instr_tag_t;
|
||||
|
||||
@@ -321,6 +323,8 @@ begin
|
||||
|
||||
execute_next_tag => execute_bypass.tag,
|
||||
execute_next_cr_tag => execute_cr_bypass.tag,
|
||||
execute2_next_tag => execute2_bypass.tag,
|
||||
execute2_next_cr_tag => execute2_cr_bypass.tag,
|
||||
|
||||
cr_read_in => cr_read_valid,
|
||||
cr_write_in => cr_write_valid,
|
||||
@@ -504,27 +508,35 @@ begin
|
||||
|
||||
-- See if any of the operands can get their value via the bypass path.
|
||||
case gpr_a_bypass is
|
||||
when '1' =>
|
||||
when "10" =>
|
||||
v.e.read_data1 := execute_bypass.data;
|
||||
when "11" =>
|
||||
v.e.read_data1 := execute2_bypass.data;
|
||||
when others =>
|
||||
v.e.read_data1 := decoded_reg_a.data;
|
||||
end case;
|
||||
case gpr_b_bypass is
|
||||
when '1' =>
|
||||
when "10" =>
|
||||
v.e.read_data2 := execute_bypass.data;
|
||||
when "11" =>
|
||||
v.e.read_data2 := execute2_bypass.data;
|
||||
when others =>
|
||||
v.e.read_data2 := decoded_reg_b.data;
|
||||
end case;
|
||||
case gpr_c_bypass is
|
||||
when '1' =>
|
||||
when "10" =>
|
||||
v.e.read_data3 := execute_bypass.data;
|
||||
when "11" =>
|
||||
v.e.read_data3 := execute2_bypass.data;
|
||||
when others =>
|
||||
v.e.read_data3 := decoded_reg_c.data;
|
||||
end case;
|
||||
|
||||
v.e.cr := c_in.read_cr_data;
|
||||
if cr_bypass = '1' then
|
||||
if cr_bypass = "10" then
|
||||
v.e.cr := execute_cr_bypass.data;
|
||||
elsif cr_bypass = "11" then
|
||||
v.e.cr := execute2_cr_bypass.data;
|
||||
end if;
|
||||
|
||||
-- issue control
|
||||
@@ -577,9 +589,9 @@ begin
|
||||
r.e.valid &
|
||||
stopped_out &
|
||||
stall_out &
|
||||
gpr_a_bypass &
|
||||
gpr_b_bypass &
|
||||
gpr_c_bypass;
|
||||
(gpr_a_bypass(1) or gpr_a_bypass(0)) &
|
||||
(gpr_b_bypass(1) or gpr_b_bypass(0)) &
|
||||
(gpr_c_bypass(1) or gpr_c_bypass(0));
|
||||
end if;
|
||||
end process;
|
||||
log_out <= log_data;
|
||||
|
||||
@@ -40,6 +40,8 @@ entity execute1 is
|
||||
e_out : out Execute1ToWritebackType;
|
||||
bypass_data : out bypass_data_t;
|
||||
bypass_cr_data : out cr_bypass_data_t;
|
||||
bypass2_data : out bypass_data_t;
|
||||
bypass2_cr_data : out cr_bypass_data_t;
|
||||
|
||||
dbg_ctrl_out : out ctrl_t;
|
||||
|
||||
@@ -1482,6 +1484,7 @@ begin
|
||||
variable fv : Execute1ToFPUType;
|
||||
variable k : integer;
|
||||
variable go : std_ulogic;
|
||||
variable bypass_valid : std_ulogic;
|
||||
begin
|
||||
v := ex2;
|
||||
if (l_in.busy or fp_in.busy) = '0' then
|
||||
@@ -1559,6 +1562,19 @@ begin
|
||||
ctrl_tmp.msr(MSR_LE) <= '1';
|
||||
end if;
|
||||
|
||||
bypass_valid := ex1.e.valid;
|
||||
if (ex2.busy or l_in.busy or fp_in.busy) = '1' and ex1.res2_sel(1) = '1' then
|
||||
bypass_valid := '0';
|
||||
end if;
|
||||
|
||||
bypass2_data.tag.valid <= ex1.e.write_enable and bypass_valid;
|
||||
bypass2_data.tag.tag <= ex1.e.instr_tag.tag;
|
||||
bypass2_data.data <= ex_result;
|
||||
|
||||
bypass2_cr_data.tag.valid <= ex1.e.write_cr_enable and bypass_valid;
|
||||
bypass2_cr_data.tag.tag <= ex1.e.instr_tag.tag;
|
||||
bypass2_cr_data.data <= ex1.e.write_cr_data;
|
||||
|
||||
-- Update registers
|
||||
ex2in <= v;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user