1
0
mirror of https://github.com/antonblanchard/microwatt.git synced 2026-01-11 23:43:15 +00:00
Paul Mackerras d112a7ad94 Implement scv and rfscv
The main quirk here is that scv sets LR and CTR instead of SRR0 and
SRR1, and likewise rfscv uses LR and CTR.  Also, scv uses a set of 128
interrupt vectors starting at 0x17000.  Fortunately, the layout of the
SPR RAM was already such that LR and CTR were in the even and odd
halves respectively at the same index, so reading or writing LR and
CTR instead of SRR0 and SRR1 is quite easy.

Use of scv is subject to an FSCR bit but not an HFSCR bit.

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
2024-12-20 22:24:20 +11:00

443 lines
16 KiB
VHDL

library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
library work;
use work.utils.all;
use work.common.all;
entity fetch1 is
generic(
RESET_ADDRESS : std_logic_vector(63 downto 0) := (others => '0');
ALT_RESET_ADDRESS : std_logic_vector(63 downto 0) := (others => '0');
TLB_SIZE : positive := 64; -- L1 ITLB number of entries (direct mapped)
HAS_BTC : boolean := true
);
port(
clk : in std_ulogic;
rst : in std_ulogic;
-- Control inputs:
stall_in : in std_ulogic;
flush_in : in std_ulogic;
inval_btc : in std_ulogic;
stop_in : in std_ulogic;
alt_reset_in : in std_ulogic;
m_in : in MmuToITLBType;
-- redirect from writeback unit
w_in : in WritebackToFetch1Type;
-- redirect from decode1
d_in : in Decode1ToFetch1Type;
-- Request to icache
i_out : out Fetch1ToIcacheType;
-- outputs to logger
log_out : out std_ulogic_vector(42 downto 0)
);
end entity fetch1;
architecture behaviour of fetch1 is
type reg_internal_t is record
mode_32bit: std_ulogic;
rd_is_niap4: std_ulogic;
tlbcheck: std_ulogic;
tlbstall: std_ulogic;
next_nia: std_ulogic_vector(63 downto 0);
end record;
-- Mini effective to real translation cache
type erat_t is record
epn0: std_ulogic_vector(63 - MIN_LG_PGSZ downto 0);
epn1: std_ulogic_vector(63 - MIN_LG_PGSZ downto 0);
rpn0: std_ulogic_vector(REAL_ADDR_BITS - MIN_LG_PGSZ - 1 downto 0);
rpn1: std_ulogic_vector(REAL_ADDR_BITS - MIN_LG_PGSZ - 1 downto 0);
priv0: std_ulogic;
priv1: std_ulogic;
valid: std_ulogic_vector(1 downto 0);
mru: std_ulogic; -- '1' => entry 1 most recently used
end record;
signal r, r_next : Fetch1ToIcacheType;
signal r_int, r_next_int : reg_internal_t;
signal advance_nia : std_ulogic;
signal log_nia : std_ulogic_vector(42 downto 0);
signal erat : erat_t;
signal erat_hit : std_ulogic;
signal erat_sel : std_ulogic;
constant BTC_ADDR_BITS : integer := 10;
constant BTC_TAG_BITS : integer := 62 - BTC_ADDR_BITS;
constant BTC_TARGET_BITS : integer := 62;
constant BTC_SIZE : integer := 2 ** BTC_ADDR_BITS;
constant BTC_WIDTH : integer := BTC_TAG_BITS + BTC_TARGET_BITS + 2;
type btc_mem_type is array (0 to BTC_SIZE - 1) of std_ulogic_vector(BTC_WIDTH - 1 downto 0);
signal btc_rd_addr : unsigned(BTC_ADDR_BITS - 1 downto 0);
signal btc_rd_data : std_ulogic_vector(BTC_WIDTH - 1 downto 0) := (others => '0');
signal btc_rd_valid : std_ulogic := '0';
-- L1 ITLB.
constant TLB_BITS : natural := log2(TLB_SIZE);
constant TLB_EA_TAG_BITS : natural := 64 - (MIN_LG_PGSZ + TLB_BITS);
constant TLB_PTE_BITS : natural := 64;
subtype tlb_index_t is integer range 0 to TLB_SIZE - 1;
type tlb_valids_t is array(tlb_index_t) of std_ulogic;
subtype tlb_tag_t is std_ulogic_vector(TLB_EA_TAG_BITS - 1 downto 0);
type tlb_tags_t is array(tlb_index_t) of tlb_tag_t;
subtype tlb_pte_t is std_ulogic_vector(TLB_PTE_BITS - 1 downto 0);
type tlb_ptes_t is array(tlb_index_t) of tlb_pte_t;
signal itlb_valids : tlb_valids_t;
signal itlb_tags : tlb_tags_t;
signal itlb_ptes : tlb_ptes_t;
-- Values read from above arrays on a clock edge
signal itlb_valid : std_ulogic;
signal itlb_ttag : tlb_tag_t;
signal itlb_pte : tlb_pte_t;
signal itlb_hit : std_ulogic;
-- Simple hash for direct-mapped TLB index
function hash_ea(addr: std_ulogic_vector(63 downto 0)) return std_ulogic_vector is
variable hash : std_ulogic_vector(TLB_BITS - 1 downto 0);
begin
hash := addr(MIN_LG_PGSZ + TLB_BITS - 1 downto MIN_LG_PGSZ)
xor addr(MIN_LG_PGSZ + 2 * TLB_BITS - 1 downto MIN_LG_PGSZ + TLB_BITS)
xor addr(MIN_LG_PGSZ + 3 * TLB_BITS - 1 downto MIN_LG_PGSZ + 2 * TLB_BITS);
return hash;
end;
begin
regs : process(clk)
begin
if rising_edge(clk) then
log_nia <= r.nia(63) & r.nia(43 downto 2);
if r /= r_next and advance_nia = '1' then
report "fetch1 rst:" & std_ulogic'image(rst) &
" IR:" & std_ulogic'image(r_next.virt_mode) &
" P:" & std_ulogic'image(r_next.priv_mode) &
" E:" & std_ulogic'image(r_next.big_endian) &
" 32:" & std_ulogic'image(r_next_int.mode_32bit) &
" I:" & std_ulogic'image(w_in.interrupt) &
" R:" & std_ulogic'image(w_in.redirect) & std_ulogic'image(d_in.redirect) &
" S:" & std_ulogic'image(stall_in) &
" T:" & std_ulogic'image(stop_in) &
" nia:" & to_hstring(r_next.nia) &
" req:" & std_ulogic'image(r_next.req) &
" FF:" & std_ulogic'image(r_next.fetch_fail);
end if;
if advance_nia = '1' then
r <= r_next;
r_int <= r_next_int;
end if;
-- always send the up-to-date stop mark and req
r.stop_mark <= stop_in;
r.req <= r_next.req;
r.fetch_fail <= r_next.fetch_fail;
r_int.tlbcheck <= r_next_int.tlbcheck;
r_int.tlbstall <= r_next_int.tlbstall;
end if;
end process;
log_out <= log_nia;
btc : if HAS_BTC generate
signal btc_memory : btc_mem_type;
attribute ram_style : string;
attribute ram_style of btc_memory : signal is "block";
signal btc_valids : std_ulogic_vector(BTC_SIZE - 1 downto 0);
-- attribute ram_style of btc_valids : signal is "distributed";
signal btc_wr : std_ulogic;
signal btc_wr_data : std_ulogic_vector(BTC_WIDTH - 1 downto 0);
signal btc_wr_addr : std_ulogic_vector(BTC_ADDR_BITS - 1 downto 0);
begin
btc_wr_data <= w_in.br_taken &
r.virt_mode &
w_in.br_nia(63 downto BTC_ADDR_BITS + 2) &
w_in.redirect_nia(63 downto 2);
btc_wr_addr <= w_in.br_nia(BTC_ADDR_BITS + 1 downto 2);
btc_wr <= w_in.br_last;
btc_ram : process(clk)
variable raddr : unsigned(BTC_ADDR_BITS - 1 downto 0);
begin
if rising_edge(clk) then
if advance_nia = '1' then
if is_X(btc_rd_addr) then
btc_rd_data <= (others => 'X');
btc_rd_valid <= 'X';
else
btc_rd_data <= btc_memory(to_integer(btc_rd_addr));
btc_rd_valid <= btc_valids(to_integer(btc_rd_addr));
end if;
end if;
if btc_wr = '1' then
assert not is_X(btc_wr_addr) report "Writing to unknown address" severity FAILURE;
btc_memory(to_integer(unsigned(btc_wr_addr))) <= btc_wr_data;
end if;
if inval_btc = '1' or rst = '1' then
btc_valids <= (others => '0');
elsif btc_wr = '1' then
assert not is_X(btc_wr_addr) report "Writing to unknown address" severity FAILURE;
btc_valids(to_integer(unsigned(btc_wr_addr))) <= '1';
end if;
end if;
end process;
end generate;
erat_sync : process(clk)
begin
if rising_edge(clk) then
if rst /= '0' or m_in.tlbie = '1' then
erat.valid <= "00";
erat.mru <= '0';
else
if erat_hit = '1' then
erat.mru <= erat_sel;
end if;
if m_in.tlbld = '1' then
erat.epn0 <= m_in.addr(63 downto MIN_LG_PGSZ);
erat.rpn0 <= m_in.pte(REAL_ADDR_BITS-1 downto MIN_LG_PGSZ);
erat.priv0 <= m_in.pte(3);
erat.valid(0) <= '1';
erat.valid(1) <= '0';
erat.mru <= '0';
elsif r_int.tlbcheck = '1' and itlb_hit = '1' then
if erat.mru = '0' then
erat.epn1 <= r.nia(63 downto MIN_LG_PGSZ);
erat.rpn1 <= itlb_pte(REAL_ADDR_BITS-1 downto MIN_LG_PGSZ);
erat.priv1 <= itlb_pte(3);
erat.valid(1) <= '1';
else
erat.epn0 <= r.nia(63 downto MIN_LG_PGSZ);
erat.rpn0 <= itlb_pte(REAL_ADDR_BITS-1 downto MIN_LG_PGSZ);
erat.priv0 <= itlb_pte(3);
erat.valid(0) <= '1';
end if;
erat.mru <= not erat.mru;
end if;
end if;
end if;
end process;
-- Read TLB using the NIA for the next cycle
itlb_read : process(clk)
variable tlb_req_index : std_ulogic_vector(TLB_BITS - 1 downto 0);
begin
if rising_edge(clk) then
if advance_nia = '1' then
tlb_req_index := hash_ea(r_next.nia);
if is_X(tlb_req_index) then
itlb_pte <= (others => 'X');
itlb_ttag <= (others => 'X');
itlb_valid <= 'X';
else
itlb_pte <= itlb_ptes(to_integer(unsigned(tlb_req_index)));
itlb_ttag <= itlb_tags(to_integer(unsigned(tlb_req_index)));
itlb_valid <= itlb_valids(to_integer(unsigned(tlb_req_index)));
end if;
end if;
end if;
end process;
-- TLB hit detection
itlb_lookup : process(all)
begin
itlb_hit <= '0';
if itlb_ttag = r.nia(63 downto MIN_LG_PGSZ + TLB_BITS) then
itlb_hit <= itlb_valid;
end if;
end process;
-- iTLB update
itlb_update: process(clk)
variable wr_index : std_ulogic_vector(TLB_BITS - 1 downto 0);
begin
if rising_edge(clk) then
wr_index := hash_ea(m_in.addr);
if rst = '1' or (m_in.tlbie = '1' and m_in.doall = '1') then
-- clear all valid bits
for i in tlb_index_t loop
itlb_valids(i) <= '0';
end loop;
elsif m_in.tlbie = '1' then
assert not is_X(wr_index) report "icache index invalid on write" severity FAILURE;
-- clear entry regardless of hit or miss
itlb_valids(to_integer(unsigned(wr_index))) <= '0';
elsif m_in.tlbld = '1' then
assert not is_X(wr_index) report "icache index invalid on write" severity FAILURE;
itlb_tags(to_integer(unsigned(wr_index))) <= m_in.addr(63 downto MIN_LG_PGSZ + TLB_BITS);
itlb_ptes(to_integer(unsigned(wr_index))) <= m_in.pte;
itlb_valids(to_integer(unsigned(wr_index))) <= '1';
end if;
--ev.itlb_miss_resolved <= m_in.tlbld and not rst;
end if;
end process;
comb : process(all)
variable v : Fetch1ToIcacheType;
variable v_int : reg_internal_t;
variable next_nia : std_ulogic_vector(63 downto 0);
variable m32 : std_ulogic;
variable ehit, esel : std_ulogic;
variable eaa_priv : std_ulogic;
begin
v := r;
v_int := r_int;
v.predicted := '0';
v.pred_ntaken := '0';
v.req := not stop_in;
v_int.tlbstall := r_int.tlbcheck;
v_int.tlbcheck := '0';
if r_int.tlbcheck = '1' and itlb_hit = '0' then
v.fetch_fail := '1';
end if;
-- Combinatorial computation of the CIA for the next cycle.
-- Needs to be simple so the result can be used for RAM
-- and TLB access in the icache.
-- If we are stalled, this still advances, and the assumption
-- is that it will not be used.
m32 := r_int.mode_32bit;
if w_in.redirect = '1' then
next_nia := w_in.redirect_nia(63 downto 2) & "00";
m32 := w_in.mode_32bit;
v.virt_mode := w_in.virt_mode;
v.priv_mode := w_in.priv_mode;
v.big_endian := w_in.big_endian;
v_int.mode_32bit := w_in.mode_32bit;
v.fetch_fail := '0';
elsif d_in.redirect = '1' then
next_nia := d_in.redirect_nia(63 downto 2) & "00";
v.fetch_fail := '0';
elsif r_int.tlbstall = '1' then
-- this case is needed so that the correct icache tags are read
next_nia := r.nia;
else
next_nia := r_int.next_nia;
end if;
if m32 = '1' then
next_nia(63 downto 32) := (others => '0');
end if;
v.nia := next_nia;
v_int.next_nia := std_ulogic_vector(unsigned(next_nia) + 4);
-- Use v_int.next_nia as the BTC read address before it gets possibly
-- overridden with the reset or interrupt address or the predicted branch
-- target address, in order to improve timing. If it gets overridden then
-- rd_is_niap4 gets cleared to indicate that the BTC data doesn't apply.
btc_rd_addr <= unsigned(v_int.next_nia(BTC_ADDR_BITS + 1 downto 2));
v_int.rd_is_niap4 := '1';
-- If the last NIA value went down with a stop mark, it didn't get
-- executed, and hence we shouldn't increment NIA.
advance_nia <= rst or w_in.interrupt or w_in.redirect or d_in.redirect or
(not r.stop_mark and not (r.req and stall_in));
-- reduce metavalue warnings in sim
if is_X(rst) then
advance_nia <= '1';
end if;
-- Translate next_nia to real if possible, otherwise we have to stall
-- and look up the TLB.
ehit := '0';
esel := '0';
eaa_priv := '1';
if next_nia(63 downto MIN_LG_PGSZ) = erat.epn1 and erat.valid(1) = '1' then
ehit := '1';
esel := '1';
end if;
if next_nia(63 downto MIN_LG_PGSZ) = erat.epn0 and erat.valid(0) = '1' then
ehit := '1';
end if;
if v.virt_mode = '0' then
v.rpn := v.nia(REAL_ADDR_BITS - 1 downto MIN_LG_PGSZ);
eaa_priv := '1';
elsif esel = '1' then
v.rpn := erat.rpn1;
eaa_priv := erat.priv1;
else
v.rpn := erat.rpn0;
eaa_priv := erat.priv0;
end if;
if advance_nia = '1' and ehit = '0' and v.virt_mode = '1' and
r_int.tlbcheck = '0' and v.fetch_fail = '0' then
v_int.tlbstall := '1';
v_int.tlbcheck := '1';
end if;
if ehit = '1' or v.virt_mode = '0' then
if eaa_priv = '1' and v.priv_mode = '0' then
v.fetch_fail := '1';
else
v.fetch_fail := '0';
end if;
end if;
erat_hit <= ehit and advance_nia;
erat_sel <= esel;
if rst /= '0' then
if alt_reset_in = '1' then
v_int.next_nia := ALT_RESET_ADDRESS;
else
v_int.next_nia := RESET_ADDRESS;
end if;
elsif w_in.interrupt = '1' then
v_int.next_nia := 47x"0" & w_in.intr_vec(16 downto 2) & "00";
end if;
if rst /= '0' or w_in.interrupt = '1' then
v.req := '0';
v.virt_mode := '0';
v.priv_mode := '1';
v.big_endian := '0';
v_int.mode_32bit := '0';
v_int.rd_is_niap4 := '0';
v_int.tlbstall := '0';
v_int.tlbcheck := '0';
v.fetch_fail := '0';
end if;
if v.fetch_fail = '1' then
v_int.tlbstall := '1';
end if;
if v_int.tlbstall = '1' then
v.req := '0';
end if;
-- If there is a valid entry in the BTC which corresponds to the next instruction,
-- use that to predict the address of the instruction after that.
-- (w_in.redirect = '0' and d_in.redirect = '0' and r_int.tlbstall = '0')
-- implies v.nia = r_int.next_nia.
-- r_int.rd_is_niap4 implies r_int.next_nia is the address used to read the BTC.
if v.req = '1' and w_in.redirect = '0' and d_in.redirect = '0' and r_int.tlbstall = '0' and
btc_rd_valid = '1' and r_int.rd_is_niap4 = '1' and
btc_rd_data(BTC_WIDTH - 2) = r.virt_mode and
btc_rd_data(BTC_WIDTH - 3 downto BTC_TARGET_BITS)
= r_int.next_nia(BTC_TAG_BITS + BTC_ADDR_BITS + 1 downto BTC_ADDR_BITS + 2) then
v.predicted := btc_rd_data(BTC_WIDTH - 1);
v.pred_ntaken := not btc_rd_data(BTC_WIDTH - 1);
if btc_rd_data(BTC_WIDTH - 1) = '1' then
v_int.next_nia := btc_rd_data(BTC_TARGET_BITS - 1 downto 0) & "00";
v_int.rd_is_niap4 := '0';
end if;
end if;
r_next <= v;
r_next_int <= v_int;
-- Update outputs to the icache
i_out <= r;
i_out.next_nia <= next_nia;
i_out.next_rpn <= v.rpn;
end process;
end architecture behaviour;