mirror of
https://github.com/antonblanchard/microwatt.git
synced 2026-04-04 20:28:30 +00:00
Add framework for implementing an MMU
This adds a new module to implement an MMU. At the moment it doesn't do very much. Tlbie instructions now get sent by loadstore1 to mmu, which sends them to dcache, rather than loadstore1 sending them directly to dcache. TLB misses from dcache now get sent by loadstore1 to mmu, which currently just returns an error. Loadstore1 then generates a DSI in response to the error return from mmu. Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
This commit is contained in:
5
Makefile
5
Makefile
@@ -31,7 +31,7 @@ common.o: decode_types.o
|
||||
control.o: gpr_hazard.o cr_hazard.o common.o
|
||||
sim_jtag.o: sim_jtag_socket.o
|
||||
core_tb.o: common.o wishbone_types.o core.o soc.o sim_jtag.o
|
||||
core.o: common.o wishbone_types.o fetch1.o fetch2.o icache.o decode1.o decode2.o register_file.o cr_file.o execute1.o loadstore1.o dcache.o writeback.o core_debug.o
|
||||
core.o: common.o wishbone_types.o fetch1.o fetch2.o icache.o decode1.o decode2.o register_file.o cr_file.o execute1.o loadstore1.o mmu.o dcache.o writeback.o core_debug.o
|
||||
core_debug.o: common.o
|
||||
countzero.o:
|
||||
countzero_tb.o: common.o glibc_random.o countzero.o
|
||||
@@ -58,10 +58,11 @@ icache_tb.o: common.o wishbone_types.o icache.o wishbone_bram_wrapper.o
|
||||
dcache.o: utils.o common.o wishbone_types.o plru.o cache_ram.o utils.o
|
||||
dcache_tb.o: common.o wishbone_types.o dcache.o wishbone_bram_wrapper.o
|
||||
insn_helpers.o:
|
||||
loadstore1.o: common.o helpers.o decode_types.o
|
||||
loadstore1.o: common.o decode_types.o
|
||||
logical.o: decode_types.o
|
||||
multiply_tb.o: decode_types.o common.o glibc_random.o ppc_fx_insns.o multiply.o
|
||||
multiply.o: common.o decode_types.o
|
||||
mmu.o: common.o
|
||||
divider_tb.o: decode_types.o common.o glibc_random.o ppc_fx_insns.o divider.o
|
||||
divider.o: common.o decode_types.o
|
||||
ppc_fx_insns.o: helpers.o
|
||||
|
||||
25
common.vhdl
25
common.vhdl
@@ -246,7 +246,6 @@ package common is
|
||||
type Loadstore1ToDcacheType is record
|
||||
valid : std_ulogic;
|
||||
load : std_ulogic; -- is this a load
|
||||
tlbie : std_ulogic; -- is this a tlbie
|
||||
dcbz : std_ulogic;
|
||||
nc : std_ulogic;
|
||||
reserve : std_ulogic;
|
||||
@@ -267,6 +266,30 @@ package common is
|
||||
rc_error : std_ulogic;
|
||||
end record;
|
||||
|
||||
type Loadstore1ToMmuType is record
|
||||
valid : std_ulogic;
|
||||
tlbie : std_ulogic;
|
||||
addr : std_ulogic_vector(63 downto 0);
|
||||
rs : std_ulogic_vector(63 downto 0);
|
||||
end record;
|
||||
|
||||
type MmuToLoadstore1Type is record
|
||||
done : std_ulogic;
|
||||
error : std_ulogic;
|
||||
end record;
|
||||
|
||||
type MmuToDcacheType is record
|
||||
valid : std_ulogic;
|
||||
tlbie : std_ulogic;
|
||||
addr : std_ulogic_vector(63 downto 0);
|
||||
pte : std_ulogic_vector(63 downto 0);
|
||||
end record;
|
||||
|
||||
type DcacheToMmuType is record
|
||||
stall : std_ulogic;
|
||||
done : std_ulogic;
|
||||
end record;
|
||||
|
||||
type Loadstore1ToWritebackType is record
|
||||
valid : std_ulogic;
|
||||
write_enable: std_ulogic;
|
||||
|
||||
19
core.vhdl
19
core.vhdl
@@ -65,10 +65,14 @@ architecture behave of core is
|
||||
signal execute1_to_loadstore1: Execute1ToLoadstore1Type;
|
||||
signal loadstore1_to_execute1: Loadstore1ToExecute1Type;
|
||||
signal loadstore1_to_writeback: Loadstore1ToWritebackType;
|
||||
signal loadstore1_to_mmu: Loadstore1ToMmuType;
|
||||
signal mmu_to_loadstore1: MmuToLoadstore1Type;
|
||||
|
||||
-- dcache signals
|
||||
signal loadstore1_to_dcache: Loadstore1ToDcacheType;
|
||||
signal dcache_to_loadstore1: DcacheToLoadstore1Type;
|
||||
signal mmu_to_dcache: MmuToDcacheType;
|
||||
signal dcache_to_mmu: DcacheToMmuType;
|
||||
|
||||
-- local signals
|
||||
signal fetch1_stall_in : std_ulogic;
|
||||
@@ -124,6 +128,7 @@ architecture behave of core is
|
||||
attribute keep_hierarchy of cr_file_0 : label is keep_h(DISABLE_FLATTEN);
|
||||
attribute keep_hierarchy of execute1_0 : label is keep_h(DISABLE_FLATTEN);
|
||||
attribute keep_hierarchy of loadstore1_0 : label is keep_h(DISABLE_FLATTEN);
|
||||
attribute keep_hierarchy of mmu_0 : label is keep_h(DISABLE_FLATTEN);
|
||||
attribute keep_hierarchy of dcache_0 : label is keep_h(DISABLE_FLATTEN);
|
||||
attribute keep_hierarchy of writeback_0 : label is keep_h(DISABLE_FLATTEN);
|
||||
attribute keep_hierarchy of debug_0 : label is keep_h(DISABLE_FLATTEN);
|
||||
@@ -270,10 +275,22 @@ begin
|
||||
l_out => loadstore1_to_writeback,
|
||||
d_out => loadstore1_to_dcache,
|
||||
d_in => dcache_to_loadstore1,
|
||||
m_out => loadstore1_to_mmu,
|
||||
m_in => mmu_to_loadstore1,
|
||||
dc_stall => dcache_stall_out,
|
||||
stall_out => ls1_stall_out
|
||||
);
|
||||
|
||||
mmu_0: entity work.mmu
|
||||
port map (
|
||||
clk => clk,
|
||||
rst => core_rst,
|
||||
l_in => loadstore1_to_mmu,
|
||||
l_out => mmu_to_loadstore1,
|
||||
d_out => mmu_to_dcache,
|
||||
d_in => dcache_to_mmu
|
||||
);
|
||||
|
||||
dcache_0: entity work.dcache
|
||||
generic map(
|
||||
LINE_SIZE => 64,
|
||||
@@ -285,6 +302,8 @@ begin
|
||||
rst => core_rst,
|
||||
d_in => loadstore1_to_dcache,
|
||||
d_out => dcache_to_loadstore1,
|
||||
m_in => mmu_to_dcache,
|
||||
m_out => dcache_to_mmu,
|
||||
stall_out => dcache_stall_out,
|
||||
wishbone_in => wishbone_data_in,
|
||||
wishbone_out => wishbone_data_out
|
||||
|
||||
140
dcache.vhdl
140
dcache.vhdl
@@ -40,6 +40,9 @@ entity dcache is
|
||||
d_in : in Loadstore1ToDcacheType;
|
||||
d_out : out DcacheToLoadstore1Type;
|
||||
|
||||
m_in : in MmuToDcacheType;
|
||||
m_out : out DcacheToMmuType;
|
||||
|
||||
stall_out : out std_ulogic;
|
||||
|
||||
wishbone_out : out wishbone_master_out;
|
||||
@@ -146,9 +149,6 @@ architecture rtl of dcache is
|
||||
attribute ram_style of dtlb_tags : signal is "distributed";
|
||||
attribute ram_style of dtlb_ptes : signal is "distributed";
|
||||
|
||||
signal r0 : Loadstore1ToDcacheType;
|
||||
signal r0_valid : std_ulogic;
|
||||
|
||||
-- Record for storing permission, attribute, etc. bits from a PTE
|
||||
type perm_attr_t is record
|
||||
reference : std_ulogic;
|
||||
@@ -205,6 +205,15 @@ architecture rtl of dcache is
|
||||
-- first stage emits a stall for a complex op.
|
||||
--
|
||||
|
||||
-- Stage 0 register, basically contains just the latched request
|
||||
type reg_stage_0_t is record
|
||||
req : Loadstore1ToDcacheType;
|
||||
tlbie : std_ulogic;
|
||||
end record;
|
||||
|
||||
signal r0 : reg_stage_0_t;
|
||||
signal r0_valid : std_ulogic;
|
||||
|
||||
-- First stage register, contains state for stage 1 of load hits
|
||||
-- and for the state machine used by all other operations
|
||||
--
|
||||
@@ -424,35 +433,61 @@ begin
|
||||
assert (64 = wishbone_data_bits)
|
||||
report "Can't yet handle a wishbone width that isn't 64-bits" severity FAILURE;
|
||||
|
||||
-- Latch the request in r0 as long as we're not stalling
|
||||
-- Latch the request in r0.req as long as we're not stalling
|
||||
stage_0 : process(clk)
|
||||
begin
|
||||
if rising_edge(clk) then
|
||||
if rst = '1' then
|
||||
r0.valid <= '0';
|
||||
r0.req.valid <= '0';
|
||||
elsif stall_out = '0' then
|
||||
r0 <= d_in;
|
||||
assert (d_in.valid and m_in.valid) = '0' report
|
||||
"request collision loadstore vs MMU";
|
||||
if m_in.valid = '1' then
|
||||
r0.req.valid <= '1';
|
||||
r0.req.load <= '0';
|
||||
r0.req.dcbz <= '0';
|
||||
r0.req.nc <= '0';
|
||||
r0.req.reserve <= '0';
|
||||
r0.req.virt_mode <= '0';
|
||||
r0.req.priv_mode <= '1';
|
||||
r0.req.addr <= m_in.addr;
|
||||
r0.req.data <= m_in.pte;
|
||||
r0.req.byte_sel <= (others => '1');
|
||||
r0.tlbie <= m_in.tlbie;
|
||||
assert m_in.tlbie = '1' report "unknown request from MMU";
|
||||
else
|
||||
r0.req <= d_in;
|
||||
r0.tlbie <= '0';
|
||||
end if;
|
||||
end if;
|
||||
end if;
|
||||
end process;
|
||||
|
||||
-- we don't yet handle collisions between loadstore1 requests and MMU requests
|
||||
m_out.stall <= '0';
|
||||
|
||||
-- Hold off the request in r0 when stalling,
|
||||
-- and cancel it if we get an error in a previous request.
|
||||
r0_valid <= r0.valid and not stall_out and not r1.error_done;
|
||||
r0_valid <= r0.req.valid and not stall_out and not r1.error_done;
|
||||
|
||||
-- TLB
|
||||
-- Operates in the second cycle on the request latched in r0.
|
||||
-- Operates in the second cycle on the request latched in r0.req.
|
||||
-- TLB updates write the entry at the end of the second cycle.
|
||||
tlb_read : process(clk)
|
||||
variable index : tlb_index_t;
|
||||
variable addrbits : std_ulogic_vector(TLB_SET_BITS - 1 downto 0);
|
||||
begin
|
||||
if rising_edge(clk) then
|
||||
if stall_out = '1' then
|
||||
-- keep reading the same thing while stalled
|
||||
index := tlb_req_index;
|
||||
else
|
||||
index := to_integer(unsigned(d_in.addr(TLB_LG_PGSZ + TLB_SET_BITS - 1
|
||||
downto TLB_LG_PGSZ)));
|
||||
if m_in.valid = '1' then
|
||||
addrbits := m_in.addr(TLB_LG_PGSZ + TLB_SET_BITS - 1 downto TLB_LG_PGSZ);
|
||||
else
|
||||
addrbits := d_in.addr(TLB_LG_PGSZ + TLB_SET_BITS - 1 downto TLB_LG_PGSZ);
|
||||
end if;
|
||||
index := to_integer(unsigned(addrbits));
|
||||
end if;
|
||||
tlb_valid_way <= dtlb_valids(index);
|
||||
tlb_tag_way <= dtlb_tags(index);
|
||||
@@ -500,11 +535,11 @@ begin
|
||||
variable hit : std_ulogic;
|
||||
variable eatag : tlb_tag_t;
|
||||
begin
|
||||
tlb_req_index <= to_integer(unsigned(r0.addr(TLB_LG_PGSZ + TLB_SET_BITS - 1
|
||||
tlb_req_index <= to_integer(unsigned(r0.req.addr(TLB_LG_PGSZ + TLB_SET_BITS - 1
|
||||
downto TLB_LG_PGSZ)));
|
||||
hitway := 0;
|
||||
hit := '0';
|
||||
eatag := r0.addr(63 downto TLB_LG_PGSZ + TLB_SET_BITS);
|
||||
eatag := r0.req.addr(63 downto TLB_LG_PGSZ + TLB_SET_BITS);
|
||||
for i in tlb_way_t loop
|
||||
if tlb_valid_way(i) = '1' and
|
||||
read_tlb_tag(i, tlb_tag_way) = eatag then
|
||||
@@ -515,13 +550,13 @@ begin
|
||||
tlb_hit <= hit and r0_valid;
|
||||
tlb_hit_way <= hitway;
|
||||
pte <= read_tlb_pte(hitway, tlb_pte_way);
|
||||
valid_ra <= tlb_hit or not r0.virt_mode;
|
||||
if r0.virt_mode = '1' then
|
||||
valid_ra <= tlb_hit or not r0.req.virt_mode;
|
||||
if r0.req.virt_mode = '1' then
|
||||
ra <= pte(REAL_ADDR_BITS - 1 downto TLB_LG_PGSZ) &
|
||||
r0.addr(TLB_LG_PGSZ - 1 downto 0);
|
||||
r0.req.addr(TLB_LG_PGSZ - 1 downto 0);
|
||||
perm_attr <= extract_perm_attr(pte);
|
||||
else
|
||||
ra <= r0.addr(REAL_ADDR_BITS - 1 downto 0);
|
||||
ra <= r0.req.addr(REAL_ADDR_BITS - 1 downto 0);
|
||||
perm_attr <= real_mode_perm_attr;
|
||||
end if;
|
||||
end process;
|
||||
@@ -540,9 +575,9 @@ begin
|
||||
tlbia := '0';
|
||||
tlbwe := '0';
|
||||
if r0_valid = '1' and r0.tlbie = '1' then
|
||||
if r0.addr(11 downto 10) /= "00" then
|
||||
if r0.req.addr(11 downto 10) /= "00" then
|
||||
tlbia := '1';
|
||||
elsif r0.addr(9) = '1' then
|
||||
elsif r0.req.addr(9) = '1' then
|
||||
tlbwe := '1';
|
||||
else
|
||||
tlbie := '1';
|
||||
@@ -563,15 +598,16 @@ begin
|
||||
else
|
||||
repl_way := to_integer(unsigned(tlb_plru_victim(tlb_req_index)));
|
||||
end if;
|
||||
eatag := r0.addr(63 downto TLB_LG_PGSZ + TLB_SET_BITS);
|
||||
eatag := r0.req.addr(63 downto TLB_LG_PGSZ + TLB_SET_BITS);
|
||||
tagset := tlb_tag_way;
|
||||
write_tlb_tag(repl_way, tagset, eatag);
|
||||
dtlb_tags(tlb_req_index) <= tagset;
|
||||
pteset := tlb_pte_way;
|
||||
write_tlb_pte(repl_way, pteset, r0.data);
|
||||
write_tlb_pte(repl_way, pteset, r0.req.data);
|
||||
dtlb_ptes(tlb_req_index) <= pteset;
|
||||
dtlb_valids(tlb_req_index)(repl_way) <= '1';
|
||||
end if;
|
||||
m_out.done <= r0_valid and r0.tlbie;
|
||||
end if;
|
||||
end process;
|
||||
|
||||
@@ -628,8 +664,8 @@ begin
|
||||
variable hit_way_set : hit_way_set_t;
|
||||
begin
|
||||
-- Extract line, row and tag from request
|
||||
req_index <= get_index(r0.addr);
|
||||
req_row <= get_row(r0.addr);
|
||||
req_index <= get_index(r0.req.addr);
|
||||
req_row <= get_row(r0.req.addr);
|
||||
req_tag <= get_tag(ra);
|
||||
|
||||
-- Only do anything if not being stalled by stage 1
|
||||
@@ -648,13 +684,13 @@ begin
|
||||
-- the TLB, and then decide later which match to use.
|
||||
hit_way := 0;
|
||||
is_hit := '0';
|
||||
if r0.virt_mode = '1' then
|
||||
if r0.req.virt_mode = '1' then
|
||||
for j in tlb_way_t loop
|
||||
hit_way_set(j) := 0;
|
||||
s_hit := '0';
|
||||
s_pte := read_tlb_pte(j, tlb_pte_way);
|
||||
s_ra := s_pte(REAL_ADDR_BITS - 1 downto TLB_LG_PGSZ) &
|
||||
r0.addr(TLB_LG_PGSZ - 1 downto 0);
|
||||
r0.req.addr(TLB_LG_PGSZ - 1 downto 0);
|
||||
s_tag := get_tag(s_ra);
|
||||
for i in way_t loop
|
||||
if go = '1' and cache_valids(req_index)(i) = '1' and
|
||||
@@ -671,7 +707,7 @@ begin
|
||||
hit_way := hit_way_set(tlb_hit_way);
|
||||
end if;
|
||||
else
|
||||
s_tag := get_tag(r0.addr(REAL_ADDR_BITS - 1 downto 0));
|
||||
s_tag := get_tag(r0.req.addr(REAL_ADDR_BITS - 1 downto 0));
|
||||
for i in way_t loop
|
||||
if go = '1' and cache_valids(req_index)(i) = '1' and
|
||||
read_tag(i, cache_tags(req_index)) = s_tag then
|
||||
@@ -689,18 +725,18 @@ begin
|
||||
|
||||
-- work out whether we have permission for this access
|
||||
-- NB we don't yet implement AMR, thus no KUAP
|
||||
rc_ok <= perm_attr.reference and (r0.load or perm_attr.changed);
|
||||
perm_ok <= (r0.priv_mode or not perm_attr.priv) and
|
||||
(perm_attr.wr_perm or (r0.load and perm_attr.rd_perm));
|
||||
rc_ok <= perm_attr.reference and (r0.req.load or perm_attr.changed);
|
||||
perm_ok <= (r0.req.priv_mode or not perm_attr.priv) and
|
||||
(perm_attr.wr_perm or (r0.req.load and perm_attr.rd_perm));
|
||||
|
||||
-- Combine the request and cache hit status to decide what
|
||||
-- operation needs to be done
|
||||
--
|
||||
nc := r0.nc or perm_attr.nocache;
|
||||
nc := r0.req.nc or perm_attr.nocache;
|
||||
op := OP_NONE;
|
||||
if go = '1' then
|
||||
if valid_ra = '1' and rc_ok = '1' and perm_ok = '1' then
|
||||
opsel := r0.load & nc & is_hit;
|
||||
opsel := r0.req.load & nc & is_hit;
|
||||
case opsel is
|
||||
when "101" => op := OP_LOAD_HIT;
|
||||
when "100" => op := OP_LOAD_MISS;
|
||||
@@ -723,7 +759,11 @@ begin
|
||||
-- If we're stalling then we need to keep reading the last
|
||||
-- row requested.
|
||||
if stall_out = '0' then
|
||||
early_req_row <= get_row(d_in.addr);
|
||||
if m_in.valid = '1' then
|
||||
early_req_row <= get_row(m_in.addr);
|
||||
else
|
||||
early_req_row <= get_row(d_in.addr);
|
||||
end if;
|
||||
else
|
||||
early_req_row <= req_row;
|
||||
end if;
|
||||
@@ -741,17 +781,17 @@ begin
|
||||
cancel_store <= '0';
|
||||
set_rsrv <= '0';
|
||||
clear_rsrv <= '0';
|
||||
if r0_valid = '1' and r0.reserve = '1' then
|
||||
if r0_valid = '1' and r0.req.reserve = '1' then
|
||||
-- XXX generate alignment interrupt if address is not aligned
|
||||
-- XXX or if r0.nc = '1'
|
||||
if r0.load = '1' then
|
||||
-- XXX or if r0.req.nc = '1'
|
||||
if r0.req.load = '1' then
|
||||
-- load with reservation
|
||||
set_rsrv <= '1';
|
||||
else
|
||||
-- store conditional
|
||||
clear_rsrv <= '1';
|
||||
if reservation.valid = '0' or
|
||||
r0.addr(63 downto LINE_OFF_BITS) /= reservation.addr then
|
||||
r0.req.addr(63 downto LINE_OFF_BITS) /= reservation.addr then
|
||||
cancel_store <= '1';
|
||||
end if;
|
||||
end if;
|
||||
@@ -765,7 +805,7 @@ begin
|
||||
reservation.valid <= '0';
|
||||
elsif set_rsrv = '1' then
|
||||
reservation.valid <= '1';
|
||||
reservation.addr <= r0.addr(63 downto LINE_OFF_BITS);
|
||||
reservation.addr <= r0.req.addr(63 downto LINE_OFF_BITS);
|
||||
end if;
|
||||
end if;
|
||||
end process;
|
||||
@@ -818,12 +858,6 @@ begin
|
||||
d_out.valid <= '1';
|
||||
end if;
|
||||
|
||||
-- tlbie is handled above and doesn't go through the cache state machine
|
||||
if r1.tlbie_done = '1' then
|
||||
report "completing tlbie";
|
||||
d_out.valid <= '1';
|
||||
end if;
|
||||
|
||||
-- Slow ops (load miss, NC, stores)
|
||||
if r1.slow_valid = '1' then
|
||||
-- If it's a load, enable register writeback and switch
|
||||
@@ -900,8 +934,8 @@ begin
|
||||
if r1.state = IDLE then
|
||||
-- In IDLE state, the only write path is the store-hit update case
|
||||
wr_addr <= std_ulogic_vector(to_unsigned(req_row, ROW_BITS));
|
||||
wr_data <= r0.data;
|
||||
wr_sel <= r0.byte_sel;
|
||||
wr_data <= r0.req.data;
|
||||
wr_sel <= r0.req.byte_sel;
|
||||
else
|
||||
-- Otherwise, we might be doing a reload or a DCBZ
|
||||
if r1.req.dcbz = '1' then
|
||||
@@ -936,17 +970,17 @@ begin
|
||||
dcache_fast_hit : process(clk)
|
||||
begin
|
||||
if rising_edge(clk) then
|
||||
-- If we have a request incoming, we have to latch it as r0.valid
|
||||
-- If we have a request incoming, we have to latch it as r0.req.valid
|
||||
-- is only set for a single cycle. It's up to the control logic to
|
||||
-- ensure we don't override an uncompleted request (for now we are
|
||||
-- single issue on load/stores so we are fine, later, we can generate
|
||||
-- a stall output if necessary).
|
||||
|
||||
if req_op /= OP_NONE and stall_out = '0' then
|
||||
r1.req <= r0;
|
||||
r1.req <= r0.req;
|
||||
report "op:" & op_t'image(req_op) &
|
||||
" addr:" & to_hstring(r0.addr) &
|
||||
" nc:" & std_ulogic'image(r0.nc) &
|
||||
" addr:" & to_hstring(r0.req.addr) &
|
||||
" nc:" & std_ulogic'image(r0.req.nc) &
|
||||
" idx:" & integer'image(req_index) &
|
||||
" tag:" & to_hstring(req_tag) &
|
||||
" way: " & integer'image(req_hit_way);
|
||||
@@ -1018,7 +1052,7 @@ begin
|
||||
when OP_LOAD_MISS =>
|
||||
-- Normal load cache miss, start the reload machine
|
||||
--
|
||||
report "cache miss addr:" & to_hstring(r0.addr) &
|
||||
report "cache miss addr:" & to_hstring(r0.req.addr) &
|
||||
" idx:" & integer'image(req_index) &
|
||||
" way:" & integer'image(replace_way) &
|
||||
" tag:" & to_hstring(req_tag);
|
||||
@@ -1053,7 +1087,7 @@ begin
|
||||
r1.state <= RELOAD_WAIT_ACK;
|
||||
|
||||
when OP_LOAD_NC =>
|
||||
r1.wb.sel <= r0.byte_sel;
|
||||
r1.wb.sel <= r0.req.byte_sel;
|
||||
r1.wb.adr <= ra(r1.wb.adr'left downto 3) & "000";
|
||||
r1.wb.cyc <= '1';
|
||||
r1.wb.stb <= '1';
|
||||
@@ -1061,10 +1095,10 @@ begin
|
||||
r1.state <= NC_LOAD_WAIT_ACK;
|
||||
|
||||
when OP_STORE_HIT | OP_STORE_MISS =>
|
||||
if r0.dcbz = '0' then
|
||||
r1.wb.sel <= r0.byte_sel;
|
||||
if r0.req.dcbz = '0' then
|
||||
r1.wb.sel <= r0.req.byte_sel;
|
||||
r1.wb.adr <= ra(r1.wb.adr'left downto 3) & "000";
|
||||
r1.wb.dat <= r0.data;
|
||||
r1.wb.dat <= r0.req.data;
|
||||
if cancel_store = '0' then
|
||||
r1.wb.cyc <= '1';
|
||||
r1.wb.stb <= '1';
|
||||
|
||||
@@ -15,6 +15,9 @@ architecture behave of dcache_tb is
|
||||
signal d_in : Loadstore1ToDcacheType;
|
||||
signal d_out : DcacheToLoadstore1Type;
|
||||
|
||||
signal m_in : MmuToDcacheType;
|
||||
signal m_out : DcacheToMmuType;
|
||||
|
||||
signal wb_bram_in : wishbone_master_out;
|
||||
signal wb_bram_out : wishbone_slave_out;
|
||||
|
||||
@@ -30,6 +33,8 @@ begin
|
||||
rst => rst,
|
||||
d_in => d_in,
|
||||
d_out => d_out,
|
||||
m_in => m_in,
|
||||
m_out => m_out,
|
||||
wishbone_out => wb_bram_in,
|
||||
wishbone_in => wb_bram_out
|
||||
);
|
||||
@@ -68,10 +73,12 @@ begin
|
||||
-- Clear stuff
|
||||
d_in.valid <= '0';
|
||||
d_in.load <= '0';
|
||||
d_in.tlbie <= '0';
|
||||
d_in.nc <= '0';
|
||||
d_in.addr <= (others => '0');
|
||||
d_in.data <= (others => '0');
|
||||
m_in.valid <= '0';
|
||||
m_in.addr <= (others => '0');
|
||||
m_in.pte <= (others => '0');
|
||||
|
||||
wait for 4*clk_period;
|
||||
wait until rising_edge(clk);
|
||||
|
||||
105
loadstore1.vhdl
105
loadstore1.vhdl
@@ -5,7 +5,6 @@ use ieee.numeric_std.all;
|
||||
library work;
|
||||
use work.decode_types.all;
|
||||
use work.common.all;
|
||||
use work.helpers.all;
|
||||
|
||||
-- 2 cycle LSU
|
||||
-- We calculate the address in the first cycle
|
||||
@@ -22,6 +21,9 @@ entity loadstore1 is
|
||||
d_out : out Loadstore1ToDcacheType;
|
||||
d_in : in DcacheToLoadstore1Type;
|
||||
|
||||
m_out : out Loadstore1ToMmuType;
|
||||
m_in : in MmuToLoadstore1Type;
|
||||
|
||||
dc_stall : in std_ulogic;
|
||||
stall_out : out std_ulogic
|
||||
);
|
||||
@@ -38,7 +40,9 @@ architecture behave of loadstore1 is
|
||||
SECOND_REQ, -- send 2nd request of unaligned xfer
|
||||
FIRST_ACK_WAIT, -- waiting for 1st ack from dcache
|
||||
LAST_ACK_WAIT, -- waiting for last ack from dcache
|
||||
LD_UPDATE -- writing rA with computed addr on load
|
||||
LD_UPDATE, -- writing rA with computed addr on load
|
||||
MMU_LOOKUP_1ST, -- waiting for MMU to look up translation
|
||||
MMU_LOOKUP_LAST
|
||||
);
|
||||
|
||||
type reg_stage_t is record
|
||||
@@ -62,6 +66,7 @@ architecture behave of loadstore1 is
|
||||
virt_mode : std_ulogic;
|
||||
priv_mode : std_ulogic;
|
||||
state : state_t;
|
||||
first_bytes : std_ulogic_vector(7 downto 0);
|
||||
second_bytes : std_ulogic_vector(7 downto 0);
|
||||
dar : std_ulogic_vector(63 downto 0);
|
||||
dsisr : std_ulogic_vector(31 downto 0);
|
||||
@@ -146,6 +151,7 @@ begin
|
||||
variable sprval : std_ulogic_vector(63 downto 0);
|
||||
variable exception : std_ulogic;
|
||||
variable next_addr : std_ulogic_vector(63 downto 0);
|
||||
variable mmureq : std_ulogic;
|
||||
variable dsisr : std_ulogic_vector(31 downto 0);
|
||||
begin
|
||||
v := r;
|
||||
@@ -158,6 +164,7 @@ begin
|
||||
sprval := (others => '0'); -- avoid inferred latches
|
||||
exception := '0';
|
||||
dsisr := (others => '0');
|
||||
mmureq := '0';
|
||||
|
||||
write_enable := '0';
|
||||
do_update := '0';
|
||||
@@ -230,7 +237,7 @@ begin
|
||||
req := '1';
|
||||
v.dcbz := '1';
|
||||
when OP_TLBIE =>
|
||||
req := '1';
|
||||
mmureq := '1';
|
||||
v.tlbie := '1';
|
||||
when OP_MFSPR =>
|
||||
done := '1';
|
||||
@@ -282,18 +289,14 @@ begin
|
||||
-- Do length_to_sel and work out if we are doing 2 dwords
|
||||
long_sel := xfer_data_sel(l_in.length, v.addr(2 downto 0));
|
||||
byte_sel := long_sel(7 downto 0);
|
||||
v.first_bytes := byte_sel;
|
||||
v.second_bytes := long_sel(15 downto 8);
|
||||
|
||||
v.addr := lsu_sum;
|
||||
|
||||
-- Do byte reversing and rotating for stores in the first cycle
|
||||
byte_offset := "000";
|
||||
byte_offset := unsigned(lsu_sum(2 downto 0));
|
||||
brev_lenm1 := "000";
|
||||
if v.tlbie = '0' then
|
||||
byte_offset := unsigned(lsu_sum(2 downto 0));
|
||||
if l_in.byte_reverse = '1' then
|
||||
brev_lenm1 := unsigned(l_in.length(2 downto 0)) - 1;
|
||||
end if;
|
||||
if l_in.byte_reverse = '1' then
|
||||
brev_lenm1 := unsigned(l_in.length(2 downto 0)) - 1;
|
||||
end if;
|
||||
for i in 0 to 7 loop
|
||||
k := (to_unsigned(i, 3) xor brev_lenm1) + byte_offset;
|
||||
@@ -309,6 +312,10 @@ begin
|
||||
v.state := SECOND_REQ;
|
||||
end if;
|
||||
end if;
|
||||
if mmureq = '1' then
|
||||
stall := '1';
|
||||
v.state := LAST_ACK_WAIT;
|
||||
end if;
|
||||
end if;
|
||||
|
||||
when SECOND_REQ =>
|
||||
@@ -323,12 +330,19 @@ begin
|
||||
if d_in.valid = '1' then
|
||||
if d_in.error = '1' then
|
||||
-- dcache will discard the second request
|
||||
exception := '1';
|
||||
dsisr(30) := d_in.tlb_miss;
|
||||
dsisr(63 - 36) := d_in.perm_error;
|
||||
dsisr(63 - 38) := not r.load;
|
||||
dsisr(63 - 45) := d_in.rc_error;
|
||||
v.state := IDLE;
|
||||
addr := r.addr;
|
||||
if d_in.tlb_miss = '1' then
|
||||
-- give it to the MMU to look up
|
||||
mmureq := '1';
|
||||
v.state := MMU_LOOKUP_1ST;
|
||||
else
|
||||
-- signal an interrupt straight away
|
||||
exception := '1';
|
||||
dsisr(63 - 36) := d_in.perm_error;
|
||||
dsisr(63 - 38) := not r.load;
|
||||
dsisr(63 - 45) := d_in.rc_error;
|
||||
v.state := IDLE;
|
||||
end if;
|
||||
else
|
||||
v.state := LAST_ACK_WAIT;
|
||||
if r.load = '1' then
|
||||
@@ -337,6 +351,32 @@ begin
|
||||
end if;
|
||||
end if;
|
||||
|
||||
when MMU_LOOKUP_1ST | MMU_LOOKUP_LAST =>
|
||||
stall := '1';
|
||||
if two_dwords = '1' and r.state = MMU_LOOKUP_LAST then
|
||||
addr := next_addr;
|
||||
byte_sel := r.second_bytes;
|
||||
else
|
||||
addr := r.addr;
|
||||
byte_sel := r.first_bytes;
|
||||
end if;
|
||||
if m_in.done = '1' then
|
||||
if m_in.error = '0' then
|
||||
-- retry the request now that the MMU has installed a TLB entry
|
||||
req := '1';
|
||||
if r.state = MMU_LOOKUP_1ST then
|
||||
v.state := SECOND_REQ;
|
||||
else
|
||||
v.state := LAST_ACK_WAIT;
|
||||
end if;
|
||||
else
|
||||
exception := '1';
|
||||
dsisr(63 - 33) := '1';
|
||||
dsisr(63 - 38) := not r.load;
|
||||
v.state := IDLE;
|
||||
end if;
|
||||
end if;
|
||||
|
||||
when LAST_ACK_WAIT =>
|
||||
stall := '1';
|
||||
if d_in.valid = '1' then
|
||||
@@ -346,12 +386,18 @@ begin
|
||||
else
|
||||
addr := r.addr;
|
||||
end if;
|
||||
exception := '1';
|
||||
dsisr(30) := d_in.tlb_miss;
|
||||
dsisr(63 - 36) := d_in.perm_error;
|
||||
dsisr(63 - 38) := not r.load;
|
||||
dsisr(63 - 45) := d_in.rc_error;
|
||||
v.state := IDLE;
|
||||
if d_in.tlb_miss = '1' then
|
||||
-- give it to the MMU to look up
|
||||
mmureq := '1';
|
||||
v.state := MMU_LOOKUP_LAST;
|
||||
else
|
||||
-- signal an interrupt straight away
|
||||
exception := '1';
|
||||
dsisr(63 - 36) := d_in.perm_error;
|
||||
dsisr(63 - 38) := not r.load;
|
||||
dsisr(63 - 45) := d_in.rc_error;
|
||||
v.state := IDLE;
|
||||
end if;
|
||||
else
|
||||
write_enable := r.load;
|
||||
if r.load = '1' and r.update = '1' then
|
||||
@@ -366,6 +412,12 @@ begin
|
||||
end if;
|
||||
end if;
|
||||
end if;
|
||||
if m_in.done = '1' then
|
||||
-- tlbie is finished
|
||||
stall := '0';
|
||||
done := '1';
|
||||
v.state := IDLE;
|
||||
end if;
|
||||
|
||||
when LD_UPDATE =>
|
||||
do_update := '1';
|
||||
@@ -376,7 +428,6 @@ begin
|
||||
-- Update outputs to dcache
|
||||
d_out.valid <= req;
|
||||
d_out.load <= v.load;
|
||||
d_out.tlbie <= v.tlbie;
|
||||
d_out.dcbz <= v.dcbz;
|
||||
d_out.nc <= v.nc;
|
||||
d_out.reserve <= v.reserve;
|
||||
@@ -386,6 +437,12 @@ begin
|
||||
d_out.virt_mode <= v.virt_mode;
|
||||
d_out.priv_mode <= v.priv_mode;
|
||||
|
||||
-- Update outputs to MMU
|
||||
m_out.valid <= mmureq;
|
||||
m_out.tlbie <= v.tlbie;
|
||||
m_out.addr <= addr;
|
||||
m_out.rs <= l_in.data;
|
||||
|
||||
-- Update outputs to writeback
|
||||
-- Multiplex either cache data to the destination GPR or
|
||||
-- the address for the rA update.
|
||||
|
||||
@@ -25,6 +25,7 @@ filesets:
|
||||
- control.vhdl
|
||||
- execute1.vhdl
|
||||
- loadstore1.vhdl
|
||||
- mmu.vhdl
|
||||
- dcache.vhdl
|
||||
- multiply.vhdl
|
||||
- divider.vhdl
|
||||
|
||||
109
mmu.vhdl
Normal file
109
mmu.vhdl
Normal file
@@ -0,0 +1,109 @@
|
||||
library ieee;
|
||||
use ieee.std_logic_1164.all;
|
||||
use ieee.numeric_std.all;
|
||||
|
||||
library work;
|
||||
use work.common.all;
|
||||
|
||||
-- Radix MMU
|
||||
-- Supports 4-level trees as in arch 3.0B, but not the two-step translation for
|
||||
-- guests under a hypervisor (i.e. there is no gRA -> hRA translation).
|
||||
|
||||
entity mmu is
|
||||
port (
|
||||
clk : in std_ulogic;
|
||||
rst : in std_ulogic;
|
||||
|
||||
l_in : in Loadstore1ToMmuType;
|
||||
l_out : out MmuToLoadstore1Type;
|
||||
|
||||
d_out : out MmuToDcacheType;
|
||||
d_in : in DcacheToMmuType
|
||||
);
|
||||
end mmu;
|
||||
|
||||
architecture behave of mmu is
|
||||
|
||||
type state_t is (IDLE,
|
||||
TLBIE_WAIT,
|
||||
RADIX_LOOKUP_0
|
||||
);
|
||||
|
||||
type reg_stage_t is record
|
||||
-- latched request from loadstore1
|
||||
valid : std_ulogic;
|
||||
addr : std_ulogic_vector(63 downto 0);
|
||||
state : state_t;
|
||||
end record;
|
||||
|
||||
signal r, rin : reg_stage_t;
|
||||
|
||||
begin
|
||||
|
||||
mmu_0: process(clk)
|
||||
begin
|
||||
if rising_edge(clk) then
|
||||
if rst = '1' then
|
||||
r.state <= IDLE;
|
||||
r.valid <= '0';
|
||||
else
|
||||
if rin.valid = '1' then
|
||||
report "MMU got tlb miss for " & to_hstring(rin.addr);
|
||||
end if;
|
||||
if l_out.done = '1' then
|
||||
report "MMU completing miss with error=" & std_ulogic'image(l_out.error);
|
||||
end if;
|
||||
r <= rin;
|
||||
end if;
|
||||
end if;
|
||||
end process;
|
||||
|
||||
mmu_1: process(all)
|
||||
variable v : reg_stage_t;
|
||||
variable dcreq : std_ulogic;
|
||||
variable done : std_ulogic;
|
||||
variable err : std_ulogic;
|
||||
begin
|
||||
v.valid := l_in.valid;
|
||||
v.addr := l_in.addr;
|
||||
v.state := r.state;
|
||||
dcreq := '0';
|
||||
done := '0';
|
||||
err := '0';
|
||||
|
||||
case r.state is
|
||||
when IDLE =>
|
||||
if l_in.valid = '1' then
|
||||
if l_in.tlbie = '1' then
|
||||
dcreq := '1';
|
||||
v.state := TLBIE_WAIT;
|
||||
else
|
||||
v.state := RADIX_LOOKUP_0;
|
||||
end if;
|
||||
end if;
|
||||
|
||||
when TLBIE_WAIT =>
|
||||
if d_in.done = '1' then
|
||||
done := '1';
|
||||
v.state := IDLE;
|
||||
end if;
|
||||
|
||||
when RADIX_LOOKUP_0 =>
|
||||
done := '1';
|
||||
err := '1';
|
||||
v.state := IDLE;
|
||||
end case;
|
||||
|
||||
-- update registers
|
||||
rin <= v;
|
||||
|
||||
-- drive outputs
|
||||
l_out.done <= done;
|
||||
l_out.error <= err;
|
||||
|
||||
d_out.valid <= dcreq;
|
||||
d_out.tlbie <= l_in.tlbie;
|
||||
d_out.addr <= l_in.addr;
|
||||
d_out.pte <= l_in.rs;
|
||||
end process;
|
||||
end;
|
||||
Reference in New Issue
Block a user