mirror of
https://github.com/antonblanchard/microwatt.git
synced 2026-04-26 20:36:58 +00:00
The goal is to have the icache fit in BRAM by latching the output into a register. In order to avoid timing issues , we need to give the BRAM a full cycle on reads, and thus we souce the BRAM address directly from fetch1 latched NIA. (Note: This will be problematic if/when we want to hash the address, we'll probably be better off having fetch1 latch a fully hashed address along with the normal one, so the icache can use the former to address the BRAM and pass the latter along) One difficulty is that we cannot really stall the icache without adding more combo logic that would break the "one full cycle" BRAM model. This means that on stalls from decode, by the time we stall fetch1, it has already gone to the next address, which the icache is already latching. We work around this by having a "stash" buffer in fetch2 that will stash away the icache output on a stall, and override the output of the icache with the content of the stash buffer when unstalling. This requires a rewrite of the stop/step debug logic as well. We now do most of the hard work in fetch1 which makes more sense. Note: Vivado is still not inferring an built-in output register for the BRAMs. I don't want to add another cycle... I don't fully understand why it wouldn't be able to treat current_row as such but clearly it won't. At least the timing seems good enough now for 100Mhz, possibly more. Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
120 lines
2.8 KiB
VHDL
120 lines
2.8 KiB
VHDL
library ieee;
|
|
use ieee.std_logic_1164.all;
|
|
use ieee.numeric_std.all;
|
|
|
|
library work;
|
|
use work.common.all;
|
|
use work.wishbone_types.all;
|
|
|
|
entity fetch2 is
|
|
port(
|
|
clk : in std_ulogic;
|
|
rst : in std_ulogic;
|
|
|
|
stall_in : in std_ulogic;
|
|
flush_in : in std_ulogic;
|
|
|
|
-- Results from icache
|
|
i_in : in IcacheToFetch2Type;
|
|
|
|
-- Output to decode
|
|
f_out : out Fetch2ToDecode1Type
|
|
);
|
|
end entity fetch2;
|
|
|
|
architecture behaviour of fetch2 is
|
|
|
|
-- The icache cannot stall, so we need to stash a cycle
|
|
-- of output from it when we stall.
|
|
type reg_internal_type is record
|
|
stash : IcacheToFetch2Type;
|
|
stash_valid : std_ulogic;
|
|
stopped : std_ulogic;
|
|
end record;
|
|
|
|
signal r_int, rin_int : reg_internal_type;
|
|
signal r, rin : Fetch2ToDecode1Type;
|
|
|
|
begin
|
|
regs : process(clk)
|
|
begin
|
|
if rising_edge(clk) then
|
|
|
|
if (r /= rin) then
|
|
report "fetch2 rst:" & std_ulogic'image(rst) &
|
|
" S:" & std_ulogic'image(stall_in) &
|
|
" F:" & std_ulogic'image(flush_in) &
|
|
" T:" & std_ulogic'image(rin.stop_mark) &
|
|
" V:" & std_ulogic'image(rin.valid) &
|
|
" nia:" & to_hstring(rin.nia);
|
|
end if;
|
|
|
|
-- Output state remains unchanged on stall, unless we are flushing
|
|
if rst = '1' or flush_in = '1' or stall_in = '0' then
|
|
r <= rin;
|
|
end if;
|
|
|
|
-- Internal state is updated on every clock
|
|
r_int <= rin_int;
|
|
end if;
|
|
end process;
|
|
|
|
comb : process(all)
|
|
variable v : Fetch2ToDecode1Type;
|
|
variable v_int : reg_internal_type;
|
|
variable v_i_in : IcacheToFetch2Type;
|
|
begin
|
|
v := r;
|
|
v_int := r_int;
|
|
|
|
-- If stalling, stash away the current input from the icache
|
|
if stall_in = '1' and v_int.stash_valid = '0' then
|
|
v_int.stash := i_in;
|
|
v_int.stash_valid := '1';
|
|
end if;
|
|
|
|
-- If unstalling, source input from the stash and invalidate it,
|
|
-- otherwise source normally from the icache.
|
|
--
|
|
v_i_in := i_in;
|
|
if v_int.stash_valid = '1' and stall_in = '0' then
|
|
v_i_in := v_int.stash;
|
|
v_int.stash_valid := '0';
|
|
end if;
|
|
|
|
v.valid := v_i_in.valid;
|
|
v.stop_mark := v_i_in.stop_mark;
|
|
v.nia := v_i_in.nia;
|
|
v.insn := v_i_in.insn;
|
|
|
|
-- Clear stash internal valid bit on flush. We still mark
|
|
-- the stash itself as valid since we still want to override
|
|
-- whatever comes form icache when unstalling, but we'll
|
|
-- override it with something invalid.
|
|
--
|
|
if flush_in = '1' then
|
|
v_int.stash.valid := '0';
|
|
end if;
|
|
|
|
-- If we are flushing or the instruction comes with a stop mark
|
|
-- we tag it as invalid so it doesn't get decoded and executed
|
|
if flush_in = '1' or v.stop_mark = '1' then
|
|
|
|
v.valid := '0';
|
|
end if;
|
|
|
|
-- Clear stash on reset
|
|
if rst = '1' then
|
|
v_int.stash_valid := '0';
|
|
end if;
|
|
|
|
-- Update registers
|
|
rin <= v;
|
|
rin_int <= v_int;
|
|
|
|
-- Update outputs
|
|
f_out <= r;
|
|
end process;
|
|
|
|
end architecture behaviour;
|