mirror of
https://github.com/antonblanchard/microwatt.git
synced 2026-01-11 23:43:15 +00:00
core: Remove fetch2 pipeline stage
The fetch2 stage existed primarily to provide a stash buffer for the output of icache when a stall occurred. However, we can get the same effect -- of having the input to decode1 stay unchanged on a stall cycle -- by using the read enable of the BRAMs in icache, and by adding logic to keep the outputs unchanged on a clock cycle when stall_in = 1. This reduces branch and interrupt latency by one cycle. Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
This commit is contained in:
parent
49a4d9f67a
commit
b5a7dbb78d
2
Makefile
2
Makefile
@ -42,7 +42,7 @@ all = core_tb icache_tb dcache_tb multiply_tb dmi_dtm_tb divider_tb \
|
||||
all: $(all)
|
||||
|
||||
core_files = decode_types.vhdl common.vhdl wishbone_types.vhdl fetch1.vhdl \
|
||||
fetch2.vhdl utils.vhdl plru.vhdl cache_ram.vhdl icache.vhdl \
|
||||
utils.vhdl plru.vhdl cache_ram.vhdl icache.vhdl \
|
||||
decode1.vhdl helpers.vhdl insn_helpers.vhdl gpr_hazard.vhdl \
|
||||
cr_hazard.vhdl control.vhdl decode2.vhdl register_file.vhdl \
|
||||
cr_file.vhdl crhelpers.vhdl ppc_fx_insns.vhdl rotator.vhdl \
|
||||
|
||||
12
common.vhdl
12
common.vhdl
@ -96,7 +96,7 @@ package common is
|
||||
nia: std_ulogic_vector(63 downto 0);
|
||||
end record;
|
||||
|
||||
type IcacheToFetch2Type is record
|
||||
type IcacheToDecode1Type is record
|
||||
valid: std_ulogic;
|
||||
stop_mark: std_ulogic;
|
||||
fetch_failed: std_ulogic;
|
||||
@ -104,16 +104,6 @@ package common is
|
||||
insn: std_ulogic_vector(31 downto 0);
|
||||
end record;
|
||||
|
||||
type Fetch2ToDecode1Type is record
|
||||
valid: std_ulogic;
|
||||
stop_mark : std_ulogic;
|
||||
fetch_failed: std_ulogic;
|
||||
nia: std_ulogic_vector(63 downto 0);
|
||||
insn: std_ulogic_vector(31 downto 0);
|
||||
end record;
|
||||
constant Fetch2ToDecode1Init : Fetch2ToDecode1Type := (valid => '0', stop_mark => '0', fetch_failed => '0',
|
||||
nia => (others => '0'), insn => (others => '0'));
|
||||
|
||||
type Decode1ToDecode2Type is record
|
||||
valid: std_ulogic;
|
||||
stop_mark : std_ulogic;
|
||||
|
||||
25
core.vhdl
25
core.vhdl
@ -41,12 +41,9 @@ entity core is
|
||||
end core;
|
||||
|
||||
architecture behave of core is
|
||||
-- fetch signals
|
||||
signal fetch2_to_decode1: Fetch2ToDecode1Type;
|
||||
|
||||
-- icache signals
|
||||
signal fetch1_to_icache : Fetch1ToIcacheType;
|
||||
signal icache_to_fetch2 : IcacheToFetch2Type;
|
||||
signal icache_to_decode1 : IcacheToDecode1Type;
|
||||
signal mmu_to_icache : MmuToIcacheType;
|
||||
|
||||
-- decode signals
|
||||
@ -83,7 +80,7 @@ architecture behave of core is
|
||||
-- local signals
|
||||
signal fetch1_stall_in : std_ulogic;
|
||||
signal icache_stall_out : std_ulogic;
|
||||
signal fetch2_stall_in : std_ulogic;
|
||||
signal icache_stall_in : std_ulogic;
|
||||
signal decode1_stall_in : std_ulogic;
|
||||
signal decode2_stall_in : std_ulogic;
|
||||
signal decode2_stall_out : std_ulogic;
|
||||
@ -145,7 +142,6 @@ architecture behave of core is
|
||||
attribute keep_hierarchy : string;
|
||||
attribute keep_hierarchy of fetch1_0 : label is keep_h(DISABLE_FLATTEN);
|
||||
attribute keep_hierarchy of icache_0 : label is keep_h(DISABLE_FLATTEN);
|
||||
attribute keep_hierarchy of fetch2_0 : label is keep_h(DISABLE_FLATTEN);
|
||||
attribute keep_hierarchy of decode1_0 : label is keep_h(DISABLE_FLATTEN);
|
||||
attribute keep_hierarchy of decode2_0 : label is keep_h(DISABLE_FLATTEN);
|
||||
attribute keep_hierarchy of register_file_0 : label is keep_h(DISABLE_FLATTEN);
|
||||
@ -206,27 +202,18 @@ begin
|
||||
clk => clk,
|
||||
rst => rst_icache,
|
||||
i_in => fetch1_to_icache,
|
||||
i_out => icache_to_fetch2,
|
||||
i_out => icache_to_decode1,
|
||||
m_in => mmu_to_icache,
|
||||
flush_in => flush,
|
||||
inval_in => dbg_icache_rst or ex1_icache_inval,
|
||||
stall_in => icache_stall_in,
|
||||
stall_out => icache_stall_out,
|
||||
wishbone_out => wishbone_insn_out,
|
||||
wishbone_in => wishbone_insn_in,
|
||||
log_out => log_data(96 downto 43)
|
||||
);
|
||||
|
||||
fetch2_0: entity work.fetch2
|
||||
port map (
|
||||
clk => clk,
|
||||
rst => rst_fetch2,
|
||||
stall_in => fetch2_stall_in,
|
||||
flush_in => flush,
|
||||
i_in => icache_to_fetch2,
|
||||
f_out => fetch2_to_decode1
|
||||
);
|
||||
|
||||
fetch2_stall_in <= decode2_stall_out;
|
||||
icache_stall_in <= decode2_stall_out;
|
||||
|
||||
decode1_0: entity work.decode1
|
||||
port map (
|
||||
@ -234,7 +221,7 @@ begin
|
||||
rst => rst_dec1,
|
||||
stall_in => decode1_stall_in,
|
||||
flush_in => flush,
|
||||
f_in => fetch2_to_decode1,
|
||||
f_in => icache_to_decode1,
|
||||
d_out => decode1_to_decode2,
|
||||
log_out => log_data(109 downto 97)
|
||||
);
|
||||
|
||||
@ -14,9 +14,8 @@ entity decode1 is
|
||||
stall_in : in std_ulogic;
|
||||
flush_in : in std_ulogic;
|
||||
|
||||
f_in : in Fetch2ToDecode1Type;
|
||||
f_in : in IcacheToDecode1Type;
|
||||
d_out : out Decode1ToDecode2Type;
|
||||
|
||||
log_out : out std_ulogic_vector(12 downto 0)
|
||||
);
|
||||
end entity decode1;
|
||||
|
||||
123
fetch2.vhdl
123
fetch2.vhdl
@ -1,123 +0,0 @@
|
||||
library ieee;
|
||||
use ieee.std_logic_1164.all;
|
||||
use ieee.numeric_std.all;
|
||||
|
||||
library work;
|
||||
use work.common.all;
|
||||
use work.wishbone_types.all;
|
||||
|
||||
entity fetch2 is
|
||||
port(
|
||||
clk : in std_ulogic;
|
||||
rst : in std_ulogic;
|
||||
|
||||
stall_in : in std_ulogic;
|
||||
flush_in : in std_ulogic;
|
||||
|
||||
-- Results from icache
|
||||
i_in : in IcacheToFetch2Type;
|
||||
|
||||
-- Output to decode
|
||||
f_out : out Fetch2ToDecode1Type
|
||||
);
|
||||
end entity fetch2;
|
||||
|
||||
architecture behaviour of fetch2 is
|
||||
|
||||
-- The icache cannot stall, so we need to stash a cycle
|
||||
-- of output from it when we stall.
|
||||
type reg_internal_type is record
|
||||
stash : IcacheToFetch2Type;
|
||||
stash_valid : std_ulogic;
|
||||
stopped : std_ulogic;
|
||||
end record;
|
||||
|
||||
signal r_int, rin_int : reg_internal_type;
|
||||
signal r, rin : Fetch2ToDecode1Type;
|
||||
|
||||
begin
|
||||
regs : process(clk)
|
||||
begin
|
||||
if rising_edge(clk) then
|
||||
|
||||
if (r /= rin) then
|
||||
report "fetch2 rst:" & std_ulogic'image(rst) &
|
||||
" S:" & std_ulogic'image(stall_in) &
|
||||
" F:" & std_ulogic'image(flush_in) &
|
||||
" T:" & std_ulogic'image(rin.stop_mark) &
|
||||
" V:" & std_ulogic'image(rin.valid) &
|
||||
" FF:" & std_ulogic'image(rin.fetch_failed) &
|
||||
" nia:" & to_hstring(rin.nia);
|
||||
end if;
|
||||
|
||||
-- Output state remains unchanged on stall, unless we are flushing
|
||||
if rst = '1' or flush_in = '1' or stall_in = '0' then
|
||||
r <= rin;
|
||||
end if;
|
||||
|
||||
-- Internal state is updated on every clock
|
||||
r_int <= rin_int;
|
||||
end if;
|
||||
end process;
|
||||
|
||||
comb : process(all)
|
||||
variable v : Fetch2ToDecode1Type;
|
||||
variable v_int : reg_internal_type;
|
||||
variable v_i_in : IcacheToFetch2Type;
|
||||
begin
|
||||
v := r;
|
||||
v_int := r_int;
|
||||
|
||||
-- If stalling, stash away the current input from the icache
|
||||
if stall_in = '1' and v_int.stash_valid = '0' then
|
||||
v_int.stash := i_in;
|
||||
v_int.stash_valid := '1';
|
||||
end if;
|
||||
|
||||
-- If unstalling, source input from the stash and invalidate it,
|
||||
-- otherwise source normally from the icache.
|
||||
--
|
||||
v_i_in := i_in;
|
||||
if v_int.stash_valid = '1' and stall_in = '0' then
|
||||
v_i_in := v_int.stash;
|
||||
v_int.stash_valid := '0';
|
||||
end if;
|
||||
|
||||
v.valid := v_i_in.valid;
|
||||
v.stop_mark := v_i_in.stop_mark;
|
||||
v.fetch_failed := v_i_in.fetch_failed;
|
||||
v.nia := v_i_in.nia;
|
||||
v.insn := v_i_in.insn;
|
||||
|
||||
-- Clear stash internal valid bit on flush. We still mark
|
||||
-- the stash itself as valid since we still want to override
|
||||
-- whatever comes form icache when unstalling, but we'll
|
||||
-- override it with something invalid.
|
||||
--
|
||||
if flush_in = '1' then
|
||||
v_int.stash.valid := '0';
|
||||
v_int.stash.fetch_failed := '0';
|
||||
end if;
|
||||
|
||||
-- If we are flushing or the instruction comes with a stop mark
|
||||
-- we tag it as invalid so it doesn't get decoded and executed
|
||||
if flush_in = '1' or v.stop_mark = '1' then
|
||||
v.valid := '0';
|
||||
v.fetch_failed := '0';
|
||||
end if;
|
||||
|
||||
-- Clear stash on reset
|
||||
if rst = '1' then
|
||||
v_int.stash_valid := '0';
|
||||
v.valid := '0';
|
||||
end if;
|
||||
|
||||
-- Update registers
|
||||
rin <= v;
|
||||
rin_int <= v_int;
|
||||
|
||||
-- Update outputs
|
||||
f_out <= r;
|
||||
end process;
|
||||
|
||||
end architecture behaviour;
|
||||
50
icache.vhdl
50
icache.vhdl
@ -48,10 +48,11 @@ entity icache is
|
||||
rst : in std_ulogic;
|
||||
|
||||
i_in : in Fetch1ToIcacheType;
|
||||
i_out : out IcacheToFetch2Type;
|
||||
i_out : out IcacheToDecode1Type;
|
||||
|
||||
m_in : in MmuToIcacheType;
|
||||
|
||||
stall_in : in std_ulogic;
|
||||
stall_out : out std_ulogic;
|
||||
flush_in : in std_ulogic;
|
||||
inval_in : in std_ulogic;
|
||||
@ -366,7 +367,7 @@ begin
|
||||
);
|
||||
process(all)
|
||||
begin
|
||||
do_read <= '1';
|
||||
do_read <= not stall_in;
|
||||
do_write <= '0';
|
||||
if wishbone_in.ack = '1' and r.store_way = i then
|
||||
do_write <= '1';
|
||||
@ -533,25 +534,32 @@ begin
|
||||
icache_hit : process(clk)
|
||||
begin
|
||||
if rising_edge(clk) then
|
||||
-- On a hit, latch the request for the next cycle, when the BRAM data
|
||||
-- will be available on the cache_out output of the corresponding way
|
||||
--
|
||||
r.hit_valid <= req_is_hit;
|
||||
-- Send stop marks and NIA down regardless of validity
|
||||
r.hit_smark <= i_in.stop_mark;
|
||||
r.hit_nia <= i_in.nia;
|
||||
if req_is_hit = '1' then
|
||||
r.hit_way <= req_hit_way;
|
||||
r.hit_smark <= i_in.stop_mark;
|
||||
-- keep outputs to fetch2 unchanged on a stall
|
||||
-- except that flush or reset sets valid to 0
|
||||
if stall_in = '1' then
|
||||
if rst = '1' or flush_in = '1' then
|
||||
r.hit_valid <= '0';
|
||||
end if;
|
||||
else
|
||||
-- On a hit, latch the request for the next cycle, when the BRAM data
|
||||
-- will be available on the cache_out output of the corresponding way
|
||||
--
|
||||
r.hit_valid <= req_is_hit;
|
||||
-- Send stop marks and NIA down regardless of validity
|
||||
r.hit_smark <= i_in.stop_mark;
|
||||
r.hit_nia <= i_in.nia;
|
||||
if req_is_hit = '1' then
|
||||
r.hit_way <= req_hit_way;
|
||||
|
||||
report "cache hit nia:" & to_hstring(i_in.nia) &
|
||||
" IR:" & std_ulogic'image(i_in.virt_mode) &
|
||||
" SM:" & std_ulogic'image(i_in.stop_mark) &
|
||||
" idx:" & integer'image(req_index) &
|
||||
" tag:" & to_hstring(req_tag) &
|
||||
" way:" & integer'image(req_hit_way) &
|
||||
" RA:" & to_hstring(real_addr);
|
||||
end if;
|
||||
report "cache hit nia:" & to_hstring(i_in.nia) &
|
||||
" IR:" & std_ulogic'image(i_in.virt_mode) &
|
||||
" SM:" & std_ulogic'image(i_in.stop_mark) &
|
||||
" idx:" & integer'image(req_index) &
|
||||
" tag:" & to_hstring(req_tag) &
|
||||
" way:" & integer'image(req_hit_way) &
|
||||
" RA:" & to_hstring(real_addr);
|
||||
end if;
|
||||
end if;
|
||||
end if;
|
||||
end process;
|
||||
|
||||
@ -674,7 +682,7 @@ begin
|
||||
-- TLB miss and protection fault processing
|
||||
if rst = '1' or flush_in = '1' or m_in.tlbld = '1' then
|
||||
r.fetch_failed <= '0';
|
||||
elsif i_in.req = '1' and access_ok = '0' then
|
||||
elsif i_in.req = '1' and access_ok = '0' and stall_in = '0' then
|
||||
r.fetch_failed <= '1';
|
||||
end if;
|
||||
end if;
|
||||
|
||||
@ -13,7 +13,7 @@ architecture behave of icache_tb is
|
||||
signal rst : std_ulogic;
|
||||
|
||||
signal i_out : Fetch1ToIcacheType;
|
||||
signal i_in : IcacheToFetch2Type;
|
||||
signal i_in : IcacheToDecode1Type;
|
||||
|
||||
signal m_out : MmuToIcacheType;
|
||||
|
||||
@ -33,6 +33,7 @@ begin
|
||||
i_in => i_out,
|
||||
i_out => i_in,
|
||||
m_in => m_out,
|
||||
stall_in => '0',
|
||||
flush_in => '0',
|
||||
inval_in => '0',
|
||||
wishbone_out => wb_bram_in,
|
||||
|
||||
@ -9,7 +9,6 @@ filesets:
|
||||
- wishbone_types.vhdl
|
||||
- common.vhdl
|
||||
- fetch1.vhdl
|
||||
- fetch2.vhdl
|
||||
- decode1.vhdl
|
||||
- helpers.vhdl
|
||||
- decode2.vhdl
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user