mirror of
https://github.com/antonblanchard/microwatt.git
synced 2026-03-30 11:04:36 +00:00
fetch1: Implement a simple branch target cache
This implements a cache in fetch1, where each entry stores the address of a simple branch instruction (b or bc) and the target of the branch. When fetching sequentially, if the address being fetched matches the cache entry, then fetching will be redirected to the branch target. The cache has 1024 entries and is direct-mapped, i.e. indexed by bits 11..2 of the NIA. The bus from execute1 now carries information about taken and not-taken simple branches, which fetch1 uses to update the cache. The cache entry is updated for both taken and not-taken branches, with the valid bit being set if the branch was taken and cleared if the branch was not taken. If fetching is redirected to the branch target then that goes down the pipe as a predicted-taken branch, and decode1 does not do any static branch prediction. If fetching is not redirected, then the next instruction goes down the pipe as normal and decode1 does its static branch prediction. In order to make timing, the lookup of the cache is pipelined, so on each cycle the cache entry for the current NIA + 8 is read. This means that after a redirect (from decode1 or execute1), only the third and subsequent sequentially-fetched instructions will be able to be predicted. This improves the coremark value on the Arty A7-100 from about 180 to about 190 (more than 5%). The BTC is optional. Builds for the Artix 7 35-T part have it off by default because the extra ~1420 LUTs it takes mean that the design doesn't fit on the Arty A7-35 board. Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
This commit is contained in:
@@ -155,6 +155,7 @@ package common is
|
|||||||
big_endian : std_ulogic;
|
big_endian : std_ulogic;
|
||||||
stop_mark: std_ulogic;
|
stop_mark: std_ulogic;
|
||||||
sequential: std_ulogic;
|
sequential: std_ulogic;
|
||||||
|
predicted : std_ulogic;
|
||||||
nia: std_ulogic_vector(63 downto 0);
|
nia: std_ulogic_vector(63 downto 0);
|
||||||
end record;
|
end record;
|
||||||
|
|
||||||
@@ -165,6 +166,7 @@ package common is
|
|||||||
nia: std_ulogic_vector(63 downto 0);
|
nia: std_ulogic_vector(63 downto 0);
|
||||||
insn: std_ulogic_vector(31 downto 0);
|
insn: std_ulogic_vector(31 downto 0);
|
||||||
big_endian: std_ulogic;
|
big_endian: std_ulogic;
|
||||||
|
next_predicted: std_ulogic;
|
||||||
end record;
|
end record;
|
||||||
|
|
||||||
type Decode1ToDecode2Type is record
|
type Decode1ToDecode2Type is record
|
||||||
@@ -308,10 +310,14 @@ package common is
|
|||||||
big_endian: std_ulogic;
|
big_endian: std_ulogic;
|
||||||
mode_32bit: std_ulogic;
|
mode_32bit: std_ulogic;
|
||||||
redirect_nia: std_ulogic_vector(63 downto 0);
|
redirect_nia: std_ulogic_vector(63 downto 0);
|
||||||
|
br_nia : std_ulogic_vector(63 downto 0);
|
||||||
|
br_last : std_ulogic;
|
||||||
|
br_taken : std_ulogic;
|
||||||
end record;
|
end record;
|
||||||
constant Execute1ToFetch1Init : Execute1ToFetch1Type := (redirect => '0', virt_mode => '0',
|
constant Execute1ToFetch1Init : Execute1ToFetch1Type := (redirect => '0', virt_mode => '0',
|
||||||
priv_mode => '0', big_endian => '0',
|
priv_mode => '0', big_endian => '0',
|
||||||
mode_32bit => '0', others => (others => '0'));
|
mode_32bit => '0', br_taken => '0',
|
||||||
|
br_last => '0', others => (others => '0'));
|
||||||
|
|
||||||
type Execute1ToLoadstore1Type is record
|
type Execute1ToLoadstore1Type is record
|
||||||
valid : std_ulogic;
|
valid : std_ulogic;
|
||||||
|
|||||||
@@ -12,6 +12,7 @@ entity core is
|
|||||||
DISABLE_FLATTEN : boolean := false;
|
DISABLE_FLATTEN : boolean := false;
|
||||||
EX1_BYPASS : boolean := true;
|
EX1_BYPASS : boolean := true;
|
||||||
HAS_FPU : boolean := true;
|
HAS_FPU : boolean := true;
|
||||||
|
HAS_BTC : boolean := true;
|
||||||
ALT_RESET_ADDRESS : std_ulogic_vector(63 downto 0) := (others => '0');
|
ALT_RESET_ADDRESS : std_ulogic_vector(63 downto 0) := (others => '0');
|
||||||
LOG_LENGTH : natural := 512
|
LOG_LENGTH : natural := 512
|
||||||
);
|
);
|
||||||
@@ -187,7 +188,8 @@ begin
|
|||||||
fetch1_0: entity work.fetch1
|
fetch1_0: entity work.fetch1
|
||||||
generic map (
|
generic map (
|
||||||
RESET_ADDRESS => (others => '0'),
|
RESET_ADDRESS => (others => '0'),
|
||||||
ALT_RESET_ADDRESS => ALT_RESET_ADDRESS
|
ALT_RESET_ADDRESS => ALT_RESET_ADDRESS,
|
||||||
|
HAS_BTC => HAS_BTC
|
||||||
)
|
)
|
||||||
port map (
|
port map (
|
||||||
clk => clk,
|
clk => clk,
|
||||||
@@ -195,6 +197,7 @@ begin
|
|||||||
alt_reset_in => alt_reset_d,
|
alt_reset_in => alt_reset_d,
|
||||||
stall_in => fetch1_stall_in,
|
stall_in => fetch1_stall_in,
|
||||||
flush_in => fetch1_flush,
|
flush_in => fetch1_flush,
|
||||||
|
inval_btc => ex1_icache_inval or mmu_to_icache.tlbie,
|
||||||
stop_in => dbg_core_stop,
|
stop_in => dbg_core_stop,
|
||||||
d_in => decode1_to_fetch1,
|
d_in => decode1_to_fetch1,
|
||||||
e_in => execute1_to_fetch1,
|
e_in => execute1_to_fetch1,
|
||||||
|
|||||||
@@ -727,7 +727,10 @@ begin
|
|||||||
bv.br_nia := (others => '0');
|
bv.br_nia := (others => '0');
|
||||||
end if;
|
end if;
|
||||||
bv.br_offset := br_offset;
|
bv.br_offset := br_offset;
|
||||||
bv.predict := v.br_pred and f_in.valid and not flush_in and not busy_out;
|
if f_in.next_predicted = '1' then
|
||||||
|
v.br_pred := '1';
|
||||||
|
end if;
|
||||||
|
bv.predict := v.br_pred and f_in.valid and not flush_in and not busy_out and not f_in.next_predicted;
|
||||||
-- after a clock edge...
|
-- after a clock edge...
|
||||||
br_target := std_ulogic_vector(signed(br.br_nia) + br.br_offset);
|
br_target := std_ulogic_vector(signed(br.br_nia) + br.br_offset);
|
||||||
|
|
||||||
|
|||||||
@@ -68,6 +68,8 @@ architecture behaviour of execute1 is
|
|||||||
last_nia : std_ulogic_vector(63 downto 0);
|
last_nia : std_ulogic_vector(63 downto 0);
|
||||||
redirect : std_ulogic;
|
redirect : std_ulogic;
|
||||||
abs_br : std_ulogic;
|
abs_br : std_ulogic;
|
||||||
|
taken_br : std_ulogic;
|
||||||
|
br_last : std_ulogic;
|
||||||
do_intr : std_ulogic;
|
do_intr : std_ulogic;
|
||||||
vector : integer range 0 to 16#fff#;
|
vector : integer range 0 to 16#fff#;
|
||||||
br_offset : std_ulogic_vector(63 downto 0);
|
br_offset : std_ulogic_vector(63 downto 0);
|
||||||
@@ -81,7 +83,7 @@ architecture behaviour of execute1 is
|
|||||||
fp_exception_next => '0', trace_next => '0', prev_op => OP_ILLEGAL,
|
fp_exception_next => '0', trace_next => '0', prev_op => OP_ILLEGAL,
|
||||||
mul_in_progress => '0', mul_finish => '0', div_in_progress => '0', cntz_in_progress => '0',
|
mul_in_progress => '0', mul_finish => '0', div_in_progress => '0', cntz_in_progress => '0',
|
||||||
next_lr => (others => '0'), last_nia => (others => '0'),
|
next_lr => (others => '0'), last_nia => (others => '0'),
|
||||||
redirect => '0', abs_br => '0', do_intr => '0', vector => 0,
|
redirect => '0', abs_br => '0', taken_br => '0', br_last => '0', do_intr => '0', vector => 0,
|
||||||
br_offset => (others => '0'), redir_mode => "0000",
|
br_offset => (others => '0'), redir_mode => "0000",
|
||||||
others => (others => '0'));
|
others => (others => '0'));
|
||||||
|
|
||||||
@@ -365,6 +367,7 @@ begin
|
|||||||
variable trapval : std_ulogic_vector(4 downto 0);
|
variable trapval : std_ulogic_vector(4 downto 0);
|
||||||
variable illegal : std_ulogic;
|
variable illegal : std_ulogic;
|
||||||
variable is_branch : std_ulogic;
|
variable is_branch : std_ulogic;
|
||||||
|
variable is_direct_branch : std_ulogic;
|
||||||
variable taken_branch : std_ulogic;
|
variable taken_branch : std_ulogic;
|
||||||
variable abs_branch : std_ulogic;
|
variable abs_branch : std_ulogic;
|
||||||
variable spr_val : std_ulogic_vector(63 downto 0);
|
variable spr_val : std_ulogic_vector(63 downto 0);
|
||||||
@@ -377,6 +380,7 @@ begin
|
|||||||
sum_with_carry := (others => '0');
|
sum_with_carry := (others => '0');
|
||||||
newcrf := (others => '0');
|
newcrf := (others => '0');
|
||||||
is_branch := '0';
|
is_branch := '0';
|
||||||
|
is_direct_branch := '0';
|
||||||
taken_branch := '0';
|
taken_branch := '0';
|
||||||
abs_branch := '0';
|
abs_branch := '0';
|
||||||
hold_wr_data := '0';
|
hold_wr_data := '0';
|
||||||
@@ -390,6 +394,8 @@ begin
|
|||||||
v.br_offset := (others => '0');
|
v.br_offset := (others => '0');
|
||||||
v.redir_mode := ctrl.msr(MSR_IR) & not ctrl.msr(MSR_PR) &
|
v.redir_mode := ctrl.msr(MSR_IR) & not ctrl.msr(MSR_PR) &
|
||||||
not ctrl.msr(MSR_LE) & not ctrl.msr(MSR_SF);
|
not ctrl.msr(MSR_LE) & not ctrl.msr(MSR_SF);
|
||||||
|
v.taken_br := '0';
|
||||||
|
v.br_last := '0';
|
||||||
|
|
||||||
lv := Execute1ToLoadstore1Init;
|
lv := Execute1ToLoadstore1Init;
|
||||||
fv := Execute1ToFPUInit;
|
fv := Execute1ToFPUInit;
|
||||||
@@ -843,6 +849,7 @@ begin
|
|||||||
when OP_B =>
|
when OP_B =>
|
||||||
is_branch := '1';
|
is_branch := '1';
|
||||||
taken_branch := '1';
|
taken_branch := '1';
|
||||||
|
is_direct_branch := '1';
|
||||||
abs_branch := insn_aa(e_in.insn);
|
abs_branch := insn_aa(e_in.insn);
|
||||||
if ctrl.msr(MSR_BE) = '1' then
|
if ctrl.msr(MSR_BE) = '1' then
|
||||||
do_trace := '1';
|
do_trace := '1';
|
||||||
@@ -852,6 +859,7 @@ begin
|
|||||||
bo := insn_bo(e_in.insn);
|
bo := insn_bo(e_in.insn);
|
||||||
bi := insn_bi(e_in.insn);
|
bi := insn_bi(e_in.insn);
|
||||||
is_branch := '1';
|
is_branch := '1';
|
||||||
|
is_direct_branch := '1';
|
||||||
taken_branch := ppc_bc_taken(bo, bi, cr_in, a_in);
|
taken_branch := ppc_bc_taken(bo, bi, cr_in, a_in);
|
||||||
abs_branch := insn_aa(e_in.insn);
|
abs_branch := insn_aa(e_in.insn);
|
||||||
if ctrl.msr(MSR_BE) = '1' then
|
if ctrl.msr(MSR_BE) = '1' then
|
||||||
@@ -1093,7 +1101,7 @@ begin
|
|||||||
if taken_branch = '1' then
|
if taken_branch = '1' then
|
||||||
ctrl_tmp.cfar <= e_in.nia;
|
ctrl_tmp.cfar <= e_in.nia;
|
||||||
end if;
|
end if;
|
||||||
if e_in.br_pred = '0' then
|
if taken_branch = '1' then
|
||||||
v.br_offset := b_in;
|
v.br_offset := b_in;
|
||||||
v.abs_br := abs_branch;
|
v.abs_br := abs_branch;
|
||||||
else
|
else
|
||||||
@@ -1102,6 +1110,8 @@ begin
|
|||||||
if taken_branch /= e_in.br_pred then
|
if taken_branch /= e_in.br_pred then
|
||||||
v.redirect := '1';
|
v.redirect := '1';
|
||||||
end if;
|
end if;
|
||||||
|
v.br_last := is_direct_branch;
|
||||||
|
v.taken_br := taken_branch;
|
||||||
end if;
|
end if;
|
||||||
|
|
||||||
elsif valid_in = '1' and exception = '0' and illegal = '0' then
|
elsif valid_in = '1' and exception = '0' and illegal = '0' then
|
||||||
@@ -1300,6 +1310,9 @@ begin
|
|||||||
|
|
||||||
-- Outputs to fetch1
|
-- Outputs to fetch1
|
||||||
f.redirect := r.redirect;
|
f.redirect := r.redirect;
|
||||||
|
f.br_nia := r.last_nia;
|
||||||
|
f.br_last := r.br_last and not r.do_intr;
|
||||||
|
f.br_taken := r.taken_br;
|
||||||
if r.do_intr = '1' then
|
if r.do_intr = '1' then
|
||||||
f.redirect_nia := std_ulogic_vector(to_unsigned(r.vector, 64));
|
f.redirect_nia := std_ulogic_vector(to_unsigned(r.vector, 64));
|
||||||
f.virt_mode := '0';
|
f.virt_mode := '0';
|
||||||
|
|||||||
119
fetch1.vhdl
119
fetch1.vhdl
@@ -8,7 +8,8 @@ use work.common.all;
|
|||||||
entity fetch1 is
|
entity fetch1 is
|
||||||
generic(
|
generic(
|
||||||
RESET_ADDRESS : std_logic_vector(63 downto 0) := (others => '0');
|
RESET_ADDRESS : std_logic_vector(63 downto 0) := (others => '0');
|
||||||
ALT_RESET_ADDRESS : std_logic_vector(63 downto 0) := (others => '0')
|
ALT_RESET_ADDRESS : std_logic_vector(63 downto 0) := (others => '0');
|
||||||
|
HAS_BTC : boolean := true
|
||||||
);
|
);
|
||||||
port(
|
port(
|
||||||
clk : in std_ulogic;
|
clk : in std_ulogic;
|
||||||
@@ -17,6 +18,7 @@ entity fetch1 is
|
|||||||
-- Control inputs:
|
-- Control inputs:
|
||||||
stall_in : in std_ulogic;
|
stall_in : in std_ulogic;
|
||||||
flush_in : in std_ulogic;
|
flush_in : in std_ulogic;
|
||||||
|
inval_btc : in std_ulogic;
|
||||||
stop_in : in std_ulogic;
|
stop_in : in std_ulogic;
|
||||||
alt_reset_in : in std_ulogic;
|
alt_reset_in : in std_ulogic;
|
||||||
|
|
||||||
@@ -37,10 +39,25 @@ end entity fetch1;
|
|||||||
architecture behaviour of fetch1 is
|
architecture behaviour of fetch1 is
|
||||||
type reg_internal_t is record
|
type reg_internal_t is record
|
||||||
mode_32bit: std_ulogic;
|
mode_32bit: std_ulogic;
|
||||||
|
rd_is_niap4: std_ulogic;
|
||||||
|
predicted: std_ulogic;
|
||||||
|
predicted_nia: std_ulogic_vector(63 downto 0);
|
||||||
end record;
|
end record;
|
||||||
signal r, r_next : Fetch1ToIcacheType;
|
signal r, r_next : Fetch1ToIcacheType;
|
||||||
signal r_int, r_next_int : reg_internal_t;
|
signal r_int, r_next_int : reg_internal_t;
|
||||||
|
signal advance_nia : std_ulogic;
|
||||||
signal log_nia : std_ulogic_vector(42 downto 0);
|
signal log_nia : std_ulogic_vector(42 downto 0);
|
||||||
|
|
||||||
|
constant BTC_ADDR_BITS : integer := 10;
|
||||||
|
constant BTC_TAG_BITS : integer := 62 - BTC_ADDR_BITS;
|
||||||
|
constant BTC_TARGET_BITS : integer := 62;
|
||||||
|
constant BTC_SIZE : integer := 2 ** BTC_ADDR_BITS;
|
||||||
|
constant BTC_WIDTH : integer := BTC_TAG_BITS + BTC_TARGET_BITS;
|
||||||
|
type btc_mem_type is array (0 to BTC_SIZE - 1) of std_ulogic_vector(BTC_WIDTH - 1 downto 0);
|
||||||
|
|
||||||
|
signal btc_rd_data : std_ulogic_vector(BTC_WIDTH - 1 downto 0) := (others => '0');
|
||||||
|
signal btc_rd_valid : std_ulogic := '0';
|
||||||
|
|
||||||
begin
|
begin
|
||||||
|
|
||||||
regs : process(clk)
|
regs : process(clk)
|
||||||
@@ -56,15 +73,70 @@ begin
|
|||||||
" R:" & std_ulogic'image(e_in.redirect) & std_ulogic'image(d_in.redirect) &
|
" R:" & std_ulogic'image(e_in.redirect) & std_ulogic'image(d_in.redirect) &
|
||||||
" S:" & std_ulogic'image(stall_in) &
|
" S:" & std_ulogic'image(stall_in) &
|
||||||
" T:" & std_ulogic'image(stop_in) &
|
" T:" & std_ulogic'image(stop_in) &
|
||||||
" nia:" & to_hstring(r_next.nia) &
|
" nia:" & to_hstring(r_next.nia);
|
||||||
" SM:" & std_ulogic'image(r_next.stop_mark);
|
|
||||||
end if;
|
end if;
|
||||||
r <= r_next;
|
if rst = '1' or e_in.redirect = '1' or d_in.redirect = '1' or stall_in = '0' then
|
||||||
r_int <= r_next_int;
|
r.virt_mode <= r_next.virt_mode;
|
||||||
|
r.priv_mode <= r_next.priv_mode;
|
||||||
|
r.big_endian <= r_next.big_endian;
|
||||||
|
r_int.mode_32bit <= r_next_int.mode_32bit;
|
||||||
|
end if;
|
||||||
|
if advance_nia = '1' then
|
||||||
|
r.predicted <= r_next.predicted;
|
||||||
|
r.nia <= r_next.nia;
|
||||||
|
r_int.predicted <= r_next_int.predicted;
|
||||||
|
r_int.predicted_nia <= r_next_int.predicted_nia;
|
||||||
|
r_int.rd_is_niap4 <= r_next.sequential;
|
||||||
|
end if;
|
||||||
|
r.sequential <= r_next.sequential and advance_nia;
|
||||||
|
-- always send the up-to-date stop mark and req
|
||||||
|
r.stop_mark <= stop_in;
|
||||||
|
r.req <= not rst;
|
||||||
end if;
|
end if;
|
||||||
end process;
|
end process;
|
||||||
log_out <= log_nia;
|
log_out <= log_nia;
|
||||||
|
|
||||||
|
btc : if HAS_BTC generate
|
||||||
|
signal btc_memory : btc_mem_type;
|
||||||
|
attribute ram_style : string;
|
||||||
|
attribute ram_style of btc_memory : signal is "block";
|
||||||
|
|
||||||
|
signal btc_valids : std_ulogic_vector(BTC_SIZE - 1 downto 0);
|
||||||
|
attribute ram_style of btc_valids : signal is "distributed";
|
||||||
|
|
||||||
|
signal btc_wr : std_ulogic;
|
||||||
|
signal btc_wr_data : std_ulogic_vector(BTC_WIDTH - 1 downto 0);
|
||||||
|
signal btc_wr_addr : std_ulogic_vector(BTC_ADDR_BITS - 1 downto 0);
|
||||||
|
signal btc_wr_v : std_ulogic;
|
||||||
|
begin
|
||||||
|
btc_wr_data <= e_in.br_nia(63 downto BTC_ADDR_BITS + 2) &
|
||||||
|
e_in.redirect_nia(63 downto 2);
|
||||||
|
btc_wr_addr <= e_in.br_nia(BTC_ADDR_BITS + 1 downto 2);
|
||||||
|
btc_wr <= e_in.br_last;
|
||||||
|
btc_wr_v <= e_in.br_taken;
|
||||||
|
|
||||||
|
btc_ram : process(clk)
|
||||||
|
variable raddr : unsigned(BTC_ADDR_BITS - 1 downto 0);
|
||||||
|
begin
|
||||||
|
if rising_edge(clk) then
|
||||||
|
raddr := unsigned(r.nia(BTC_ADDR_BITS + 1 downto 2)) +
|
||||||
|
to_unsigned(2, BTC_ADDR_BITS);
|
||||||
|
if advance_nia = '1' then
|
||||||
|
btc_rd_data <= btc_memory(to_integer(raddr));
|
||||||
|
btc_rd_valid <= btc_valids(to_integer(raddr));
|
||||||
|
end if;
|
||||||
|
if btc_wr = '1' then
|
||||||
|
btc_memory(to_integer(unsigned(btc_wr_addr))) <= btc_wr_data;
|
||||||
|
end if;
|
||||||
|
if inval_btc = '1' or rst = '1' then
|
||||||
|
btc_valids <= (others => '0');
|
||||||
|
elsif btc_wr = '1' then
|
||||||
|
btc_valids(to_integer(unsigned(btc_wr_addr))) <= btc_wr_v;
|
||||||
|
end if;
|
||||||
|
end if;
|
||||||
|
end process;
|
||||||
|
end generate;
|
||||||
|
|
||||||
comb : process(all)
|
comb : process(all)
|
||||||
variable v : Fetch1ToIcacheType;
|
variable v : Fetch1ToIcacheType;
|
||||||
variable v_int : reg_internal_t;
|
variable v_int : reg_internal_t;
|
||||||
@@ -72,6 +144,8 @@ begin
|
|||||||
v := r;
|
v := r;
|
||||||
v_int := r_int;
|
v_int := r_int;
|
||||||
v.sequential := '0';
|
v.sequential := '0';
|
||||||
|
v.predicted := '0';
|
||||||
|
v_int.predicted := '0';
|
||||||
|
|
||||||
if rst = '1' then
|
if rst = '1' then
|
||||||
if alt_reset_in = '1' then
|
if alt_reset_in = '1' then
|
||||||
@@ -83,6 +157,7 @@ begin
|
|||||||
v.priv_mode := '1';
|
v.priv_mode := '1';
|
||||||
v.big_endian := '0';
|
v.big_endian := '0';
|
||||||
v_int.mode_32bit := '0';
|
v_int.mode_32bit := '0';
|
||||||
|
v_int.predicted_nia := (others => '0');
|
||||||
elsif e_in.redirect = '1' then
|
elsif e_in.redirect = '1' then
|
||||||
v.nia := e_in.redirect_nia(63 downto 2) & "00";
|
v.nia := e_in.redirect_nia(63 downto 2) & "00";
|
||||||
if e_in.mode_32bit = '1' then
|
if e_in.mode_32bit = '1' then
|
||||||
@@ -97,22 +172,26 @@ begin
|
|||||||
if r_int.mode_32bit = '1' then
|
if r_int.mode_32bit = '1' then
|
||||||
v.nia(63 downto 32) := (others => '0');
|
v.nia(63 downto 32) := (others => '0');
|
||||||
end if;
|
end if;
|
||||||
elsif stall_in = '0' then
|
elsif r_int.predicted = '1' then
|
||||||
|
v.nia := r_int.predicted_nia;
|
||||||
|
v.predicted := '1';
|
||||||
|
else
|
||||||
|
v.sequential := '1';
|
||||||
|
v.nia := std_ulogic_vector(unsigned(r.nia) + 4);
|
||||||
|
if r_int.mode_32bit = '1' then
|
||||||
|
v.nia(63 downto 32) := x"00000000";
|
||||||
|
end if;
|
||||||
|
if btc_rd_valid = '1' and r_int.rd_is_niap4 = '1' and
|
||||||
|
btc_rd_data(BTC_WIDTH - 1 downto BTC_TARGET_BITS)
|
||||||
|
= v.nia(BTC_TAG_BITS + BTC_ADDR_BITS + 1 downto BTC_ADDR_BITS + 2) then
|
||||||
|
v_int.predicted := '1';
|
||||||
|
end if;
|
||||||
|
end if;
|
||||||
|
v_int.predicted_nia := btc_rd_data(BTC_TARGET_BITS - 1 downto 0) & "00";
|
||||||
|
|
||||||
-- If the last NIA value went down with a stop mark, it didn't get
|
-- If the last NIA value went down with a stop mark, it didn't get
|
||||||
-- executed, and hence we shouldn't increment NIA.
|
-- executed, and hence we shouldn't increment NIA.
|
||||||
if r.stop_mark = '0' then
|
advance_nia <= rst or e_in.redirect or d_in.redirect or (not r.stop_mark and not stall_in);
|
||||||
if r_int.mode_32bit = '0' then
|
|
||||||
v.nia := std_ulogic_vector(unsigned(r.nia) + 4);
|
|
||||||
else
|
|
||||||
v.nia := x"00000000" & std_ulogic_vector(unsigned(r.nia(31 downto 0)) + 4);
|
|
||||||
end if;
|
|
||||||
v.sequential := '1';
|
|
||||||
end if;
|
|
||||||
end if;
|
|
||||||
|
|
||||||
v.req := not rst and not stop_in;
|
|
||||||
v.stop_mark := stop_in;
|
|
||||||
|
|
||||||
r_next <= v;
|
r_next <= v;
|
||||||
r_next_int <= v_int;
|
r_next_int <= v_int;
|
||||||
|
|||||||
@@ -15,6 +15,7 @@ entity toplevel is
|
|||||||
RESET_LOW : boolean := true;
|
RESET_LOW : boolean := true;
|
||||||
CLK_FREQUENCY : positive := 100000000;
|
CLK_FREQUENCY : positive := 100000000;
|
||||||
HAS_FPU : boolean := true;
|
HAS_FPU : boolean := true;
|
||||||
|
HAS_BTC : boolean := true;
|
||||||
USE_LITEDRAM : boolean := false;
|
USE_LITEDRAM : boolean := false;
|
||||||
NO_BRAM : boolean := false;
|
NO_BRAM : boolean := false;
|
||||||
DISABLE_FLATTEN_CORE : boolean := false;
|
DISABLE_FLATTEN_CORE : boolean := false;
|
||||||
@@ -170,6 +171,7 @@ begin
|
|||||||
SIM => false,
|
SIM => false,
|
||||||
CLK_FREQ => CLK_FREQUENCY,
|
CLK_FREQ => CLK_FREQUENCY,
|
||||||
HAS_FPU => HAS_FPU,
|
HAS_FPU => HAS_FPU,
|
||||||
|
HAS_BTC => HAS_BTC,
|
||||||
HAS_DRAM => USE_LITEDRAM,
|
HAS_DRAM => USE_LITEDRAM,
|
||||||
DRAM_SIZE => 256 * 1024 * 1024,
|
DRAM_SIZE => 256 * 1024 * 1024,
|
||||||
DRAM_INIT_SIZE => PAYLOAD_SIZE,
|
DRAM_INIT_SIZE => PAYLOAD_SIZE,
|
||||||
|
|||||||
@@ -12,6 +12,7 @@ entity toplevel is
|
|||||||
CLK_INPUT : positive := 100000000;
|
CLK_INPUT : positive := 100000000;
|
||||||
CLK_FREQUENCY : positive := 100000000;
|
CLK_FREQUENCY : positive := 100000000;
|
||||||
HAS_FPU : boolean := true;
|
HAS_FPU : boolean := true;
|
||||||
|
HAS_BTC : boolean := false;
|
||||||
LOG_LENGTH : natural := 512;
|
LOG_LENGTH : natural := 512;
|
||||||
DISABLE_FLATTEN_CORE : boolean := false;
|
DISABLE_FLATTEN_CORE : boolean := false;
|
||||||
UART_IS_16550 : boolean := true
|
UART_IS_16550 : boolean := true
|
||||||
@@ -71,6 +72,7 @@ begin
|
|||||||
SIM => false,
|
SIM => false,
|
||||||
CLK_FREQ => CLK_FREQUENCY,
|
CLK_FREQ => CLK_FREQUENCY,
|
||||||
HAS_FPU => HAS_FPU,
|
HAS_FPU => HAS_FPU,
|
||||||
|
HAS_BTC => HAS_BTC,
|
||||||
LOG_LENGTH => LOG_LENGTH,
|
LOG_LENGTH => LOG_LENGTH,
|
||||||
DISABLE_FLATTEN_CORE => DISABLE_FLATTEN_CORE,
|
DISABLE_FLATTEN_CORE => DISABLE_FLATTEN_CORE,
|
||||||
UART0_IS_16550 => UART_IS_16550
|
UART0_IS_16550 => UART_IS_16550
|
||||||
|
|||||||
@@ -15,6 +15,7 @@ entity toplevel is
|
|||||||
RESET_LOW : boolean := true;
|
RESET_LOW : boolean := true;
|
||||||
CLK_FREQUENCY : positive := 100000000;
|
CLK_FREQUENCY : positive := 100000000;
|
||||||
HAS_FPU : boolean := true;
|
HAS_FPU : boolean := true;
|
||||||
|
HAS_BTC : boolean := true;
|
||||||
USE_LITEDRAM : boolean := false;
|
USE_LITEDRAM : boolean := false;
|
||||||
NO_BRAM : boolean := false;
|
NO_BRAM : boolean := false;
|
||||||
DISABLE_FLATTEN_CORE : boolean := false;
|
DISABLE_FLATTEN_CORE : boolean := false;
|
||||||
@@ -122,6 +123,7 @@ begin
|
|||||||
SIM => false,
|
SIM => false,
|
||||||
CLK_FREQ => CLK_FREQUENCY,
|
CLK_FREQ => CLK_FREQUENCY,
|
||||||
HAS_FPU => HAS_FPU,
|
HAS_FPU => HAS_FPU,
|
||||||
|
HAS_BTC => HAS_BTC,
|
||||||
HAS_DRAM => USE_LITEDRAM,
|
HAS_DRAM => USE_LITEDRAM,
|
||||||
DRAM_SIZE => 512 * 1024 * 1024,
|
DRAM_SIZE => 512 * 1024 * 1024,
|
||||||
DRAM_INIT_SIZE => PAYLOAD_SIZE,
|
DRAM_INIT_SIZE => PAYLOAD_SIZE,
|
||||||
|
|||||||
@@ -565,6 +565,7 @@ begin
|
|||||||
i_out.stop_mark <= r.hit_smark;
|
i_out.stop_mark <= r.hit_smark;
|
||||||
i_out.fetch_failed <= r.fetch_failed;
|
i_out.fetch_failed <= r.fetch_failed;
|
||||||
i_out.big_endian <= r.big_endian;
|
i_out.big_endian <= r.big_endian;
|
||||||
|
i_out.next_predicted <= i_in.predicted;
|
||||||
|
|
||||||
-- Stall fetch1 if we have a miss on cache or TLB or a protection fault
|
-- Stall fetch1 if we have a miss on cache or TLB or a protection fault
|
||||||
stall_out <= not (is_hit and access_ok);
|
stall_out <= not (is_hit and access_ok);
|
||||||
|
|||||||
@@ -134,6 +134,7 @@ targets:
|
|||||||
- log_length=2048
|
- log_length=2048
|
||||||
- uart_is_16550
|
- uart_is_16550
|
||||||
- has_fpu
|
- has_fpu
|
||||||
|
- has_btc
|
||||||
tools:
|
tools:
|
||||||
vivado: {part : xc7a100tcsg324-1}
|
vivado: {part : xc7a100tcsg324-1}
|
||||||
toplevel : toplevel
|
toplevel : toplevel
|
||||||
@@ -218,6 +219,7 @@ targets:
|
|||||||
- log_length=2048
|
- log_length=2048
|
||||||
- uart_is_16550
|
- uart_is_16550
|
||||||
- has_fpu
|
- has_fpu
|
||||||
|
- has_btc
|
||||||
tools:
|
tools:
|
||||||
vivado: {part : xc7a200tsbg484-1}
|
vivado: {part : xc7a200tsbg484-1}
|
||||||
toplevel : toplevel
|
toplevel : toplevel
|
||||||
@@ -235,6 +237,7 @@ targets:
|
|||||||
- log_length=2048
|
- log_length=2048
|
||||||
- uart_is_16550
|
- uart_is_16550
|
||||||
- has_fpu
|
- has_fpu
|
||||||
|
- has_btc
|
||||||
generate: [litedram_nexys_video]
|
generate: [litedram_nexys_video]
|
||||||
tools:
|
tools:
|
||||||
vivado: {part : xc7a200tsbg484-1}
|
vivado: {part : xc7a200tsbg484-1}
|
||||||
@@ -254,6 +257,7 @@ targets:
|
|||||||
- uart_is_16550
|
- uart_is_16550
|
||||||
- has_uart1
|
- has_uart1
|
||||||
- has_fpu=false
|
- has_fpu=false
|
||||||
|
- has_btc=false
|
||||||
tools:
|
tools:
|
||||||
vivado: {part : xc7a35ticsg324-1L}
|
vivado: {part : xc7a35ticsg324-1L}
|
||||||
toplevel : toplevel
|
toplevel : toplevel
|
||||||
@@ -273,6 +277,7 @@ targets:
|
|||||||
- uart_is_16550
|
- uart_is_16550
|
||||||
- has_uart1
|
- has_uart1
|
||||||
- has_fpu=false
|
- has_fpu=false
|
||||||
|
- has_btc=false
|
||||||
generate: [litedram_arty, liteeth_arty]
|
generate: [litedram_arty, liteeth_arty]
|
||||||
tools:
|
tools:
|
||||||
vivado: {part : xc7a35ticsg324-1L}
|
vivado: {part : xc7a35ticsg324-1L}
|
||||||
@@ -292,6 +297,7 @@ targets:
|
|||||||
- uart_is_16550
|
- uart_is_16550
|
||||||
- has_uart1
|
- has_uart1
|
||||||
- has_fpu
|
- has_fpu
|
||||||
|
- has_btc
|
||||||
tools:
|
tools:
|
||||||
vivado: {part : xc7a100ticsg324-1L}
|
vivado: {part : xc7a100ticsg324-1L}
|
||||||
toplevel : toplevel
|
toplevel : toplevel
|
||||||
@@ -311,6 +317,7 @@ targets:
|
|||||||
- uart_is_16550
|
- uart_is_16550
|
||||||
- has_uart1
|
- has_uart1
|
||||||
- has_fpu
|
- has_fpu
|
||||||
|
- has_btc
|
||||||
generate: [litedram_arty, liteeth_arty]
|
generate: [litedram_arty, liteeth_arty]
|
||||||
tools:
|
tools:
|
||||||
vivado: {part : xc7a100ticsg324-1L}
|
vivado: {part : xc7a100ticsg324-1L}
|
||||||
@@ -329,6 +336,7 @@ targets:
|
|||||||
- log_length=512
|
- log_length=512
|
||||||
- uart_is_16550
|
- uart_is_16550
|
||||||
- has_fpu=false
|
- has_fpu=false
|
||||||
|
- has_btc=false
|
||||||
tools:
|
tools:
|
||||||
vivado: {part : xc7a35tcpg236-1}
|
vivado: {part : xc7a35tcpg236-1}
|
||||||
toplevel : toplevel
|
toplevel : toplevel
|
||||||
@@ -395,6 +403,12 @@ parameters:
|
|||||||
paramtype : generic
|
paramtype : generic
|
||||||
default : true
|
default : true
|
||||||
|
|
||||||
|
has_btc:
|
||||||
|
datatype : bool
|
||||||
|
description : Include a branch target cache in the core
|
||||||
|
paramtype : generic
|
||||||
|
default : true
|
||||||
|
|
||||||
disable_flatten_core:
|
disable_flatten_core:
|
||||||
datatype : bool
|
datatype : bool
|
||||||
description : Prevent Vivado from flattening the main core components
|
description : Prevent Vivado from flattening the main core components
|
||||||
|
|||||||
2
soc.vhdl
2
soc.vhdl
@@ -53,6 +53,7 @@ entity soc is
|
|||||||
CLK_FREQ : positive;
|
CLK_FREQ : positive;
|
||||||
SIM : boolean;
|
SIM : boolean;
|
||||||
HAS_FPU : boolean := true;
|
HAS_FPU : boolean := true;
|
||||||
|
HAS_BTC : boolean := true;
|
||||||
DISABLE_FLATTEN_CORE : boolean := false;
|
DISABLE_FLATTEN_CORE : boolean := false;
|
||||||
HAS_DRAM : boolean := false;
|
HAS_DRAM : boolean := false;
|
||||||
DRAM_SIZE : integer := 0;
|
DRAM_SIZE : integer := 0;
|
||||||
@@ -255,6 +256,7 @@ begin
|
|||||||
generic map(
|
generic map(
|
||||||
SIM => SIM,
|
SIM => SIM,
|
||||||
HAS_FPU => HAS_FPU,
|
HAS_FPU => HAS_FPU,
|
||||||
|
HAS_BTC => HAS_BTC,
|
||||||
DISABLE_FLATTEN => DISABLE_FLATTEN_CORE,
|
DISABLE_FLATTEN => DISABLE_FLATTEN_CORE,
|
||||||
ALT_RESET_ADDRESS => (23 downto 0 => '0', others => '1'),
|
ALT_RESET_ADDRESS => (23 downto 0 => '0', others => '1'),
|
||||||
LOG_LENGTH => LOG_LENGTH
|
LOG_LENGTH => LOG_LENGTH
|
||||||
|
|||||||
Reference in New Issue
Block a user