mirror of
https://github.com/antonblanchard/microwatt.git
synced 2026-05-03 14:59:22 +00:00
Merge pull request #408 from paulusmack/plru-improvement
PLRU improvements
This commit is contained in:
4
Makefile
4
Makefile
@@ -68,8 +68,8 @@ all: $(all)
|
|||||||
$(shell scripts/make_version.sh git.vhdl)
|
$(shell scripts/make_version.sh git.vhdl)
|
||||||
|
|
||||||
core_files = decode_types.vhdl common.vhdl wishbone_types.vhdl fetch1.vhdl \
|
core_files = decode_types.vhdl common.vhdl wishbone_types.vhdl fetch1.vhdl \
|
||||||
utils.vhdl plru.vhdl cache_ram.vhdl icache.vhdl predecode.vhdl \
|
utils.vhdl plru.vhdl plrufn.vhdl cache_ram.vhdl icache.vhdl \
|
||||||
decode1.vhdl helpers.vhdl insn_helpers.vhdl \
|
predecode.vhdl decode1.vhdl helpers.vhdl insn_helpers.vhdl \
|
||||||
control.vhdl decode2.vhdl register_file.vhdl \
|
control.vhdl decode2.vhdl register_file.vhdl \
|
||||||
cr_file.vhdl crhelpers.vhdl ppc_fx_insns.vhdl rotator.vhdl \
|
cr_file.vhdl crhelpers.vhdl ppc_fx_insns.vhdl rotator.vhdl \
|
||||||
logical.vhdl countbits.vhdl multiply.vhdl multiply-32s.vhdl divider.vhdl \
|
logical.vhdl countbits.vhdl multiply.vhdl multiply-32s.vhdl divider.vhdl \
|
||||||
|
|||||||
209
dcache.vhdl
209
dcache.vhdl
@@ -84,7 +84,8 @@ architecture rtl of dcache is
|
|||||||
-- TAG_WIDTH is the width in bits of each way of the tag RAM
|
-- TAG_WIDTH is the width in bits of each way of the tag RAM
|
||||||
constant TAG_WIDTH : natural := TAG_BITS + 7 - ((TAG_BITS + 7) mod 8);
|
constant TAG_WIDTH : natural := TAG_BITS + 7 - ((TAG_BITS + 7) mod 8);
|
||||||
-- WAY_BITS is the number of bits to select a way
|
-- WAY_BITS is the number of bits to select a way
|
||||||
constant WAY_BITS : natural := log2(NUM_WAYS);
|
-- Make sure this is at least 1, to avoid 0-element vectors
|
||||||
|
constant WAY_BITS : natural := maximum(log2(NUM_WAYS), 1);
|
||||||
|
|
||||||
-- Example of layout for 32 lines of 64 bytes:
|
-- Example of layout for 32 lines of 64 bytes:
|
||||||
--
|
--
|
||||||
@@ -130,7 +131,7 @@ architecture rtl of dcache is
|
|||||||
|
|
||||||
-- L1 TLB.
|
-- L1 TLB.
|
||||||
constant TLB_SET_BITS : natural := log2(TLB_SET_SIZE);
|
constant TLB_SET_BITS : natural := log2(TLB_SET_SIZE);
|
||||||
constant TLB_WAY_BITS : natural := log2(TLB_NUM_WAYS);
|
constant TLB_WAY_BITS : natural := maximum(log2(TLB_NUM_WAYS), 1);
|
||||||
constant TLB_EA_TAG_BITS : natural := 64 - (TLB_LG_PGSZ + TLB_SET_BITS);
|
constant TLB_EA_TAG_BITS : natural := 64 - (TLB_LG_PGSZ + TLB_SET_BITS);
|
||||||
constant TLB_TAG_WAY_BITS : natural := TLB_NUM_WAYS * TLB_EA_TAG_BITS;
|
constant TLB_TAG_WAY_BITS : natural := TLB_NUM_WAYS * TLB_EA_TAG_BITS;
|
||||||
constant TLB_PTE_BITS : natural := 64;
|
constant TLB_PTE_BITS : natural := 64;
|
||||||
@@ -316,6 +317,7 @@ architecture rtl of dcache is
|
|||||||
tlb_hit : std_ulogic;
|
tlb_hit : std_ulogic;
|
||||||
tlb_hit_way : tlb_way_sig_t;
|
tlb_hit_way : tlb_way_sig_t;
|
||||||
tlb_hit_index : tlb_index_sig_t;
|
tlb_hit_index : tlb_index_sig_t;
|
||||||
|
tlb_victim : tlb_way_sig_t;
|
||||||
|
|
||||||
-- data buffer for data forwarded from writes to reads
|
-- data buffer for data forwarded from writes to reads
|
||||||
forward_data : std_ulogic_vector(63 downto 0);
|
forward_data : std_ulogic_vector(63 downto 0);
|
||||||
@@ -341,6 +343,8 @@ architecture rtl of dcache is
|
|||||||
acks_pending : unsigned(2 downto 0);
|
acks_pending : unsigned(2 downto 0);
|
||||||
inc_acks : std_ulogic;
|
inc_acks : std_ulogic;
|
||||||
dec_acks : std_ulogic;
|
dec_acks : std_ulogic;
|
||||||
|
choose_victim : std_ulogic;
|
||||||
|
victim_way : way_t;
|
||||||
|
|
||||||
-- Signals to complete (possibly with error)
|
-- Signals to complete (possibly with error)
|
||||||
ls_valid : std_ulogic;
|
ls_valid : std_ulogic;
|
||||||
@@ -397,8 +401,7 @@ architecture rtl of dcache is
|
|||||||
signal ram_wr_select : std_ulogic_vector(ROW_SIZE - 1 downto 0);
|
signal ram_wr_select : std_ulogic_vector(ROW_SIZE - 1 downto 0);
|
||||||
|
|
||||||
-- PLRU output interface
|
-- PLRU output interface
|
||||||
type plru_out_t is array(0 to NUM_LINES-1) of std_ulogic_vector(WAY_BITS-1 downto 0);
|
signal plru_victim : way_t;
|
||||||
signal plru_victim : plru_out_t;
|
|
||||||
signal replace_way : way_t;
|
signal replace_way : way_t;
|
||||||
|
|
||||||
-- Wishbone read/write/cache write formatting signals
|
-- Wishbone read/write/cache write formatting signals
|
||||||
@@ -422,8 +425,7 @@ architecture rtl of dcache is
|
|||||||
signal tlb_miss : std_ulogic;
|
signal tlb_miss : std_ulogic;
|
||||||
|
|
||||||
-- TLB PLRU output interface
|
-- TLB PLRU output interface
|
||||||
type tlb_plru_out_t is array(tlb_index_t) of std_ulogic_vector(TLB_WAY_BITS-1 downto 0);
|
signal tlb_plru_victim : std_ulogic_vector(TLB_WAY_BITS-1 downto 0);
|
||||||
signal tlb_plru_victim : tlb_plru_out_t;
|
|
||||||
|
|
||||||
signal snoop_tag_set : cache_tags_set_t;
|
signal snoop_tag_set : cache_tags_set_t;
|
||||||
signal snoop_valid : std_ulogic;
|
signal snoop_valid : std_ulogic;
|
||||||
@@ -649,39 +651,49 @@ begin
|
|||||||
end process;
|
end process;
|
||||||
|
|
||||||
-- Generate TLB PLRUs
|
-- Generate TLB PLRUs
|
||||||
maybe_tlb_plrus: if TLB_NUM_WAYS > 1 generate
|
maybe_tlb_plrus : if TLB_NUM_WAYS > 1 generate
|
||||||
|
type tlb_plru_array is array(tlb_index_t) of std_ulogic_vector(TLB_NUM_WAYS - 2 downto 0);
|
||||||
|
signal tlb_plru_ram : tlb_plru_array;
|
||||||
|
signal tlb_plru_cur : std_ulogic_vector(TLB_NUM_WAYS - 2 downto 0);
|
||||||
|
signal tlb_plru_upd : std_ulogic_vector(TLB_NUM_WAYS - 2 downto 0);
|
||||||
|
signal tlb_plru_acc : std_ulogic_vector(TLB_WAY_BITS-1 downto 0);
|
||||||
|
signal tlb_plru_out : std_ulogic_vector(TLB_WAY_BITS-1 downto 0);
|
||||||
begin
|
begin
|
||||||
tlb_plrus: for i in 0 to TLB_SET_SIZE - 1 generate
|
tlb_plru : entity work.plrufn
|
||||||
-- TLB PLRU interface
|
generic map (
|
||||||
signal tlb_plru_acc : std_ulogic_vector(TLB_WAY_BITS-1 downto 0);
|
BITS => TLB_WAY_BITS
|
||||||
signal tlb_plru_acc_en : std_ulogic;
|
)
|
||||||
signal tlb_plru_out : std_ulogic_vector(TLB_WAY_BITS-1 downto 0);
|
port map (
|
||||||
begin
|
acc => tlb_plru_acc,
|
||||||
tlb_plru : entity work.plru
|
tree_in => tlb_plru_cur,
|
||||||
generic map (
|
tree_out => tlb_plru_upd,
|
||||||
BITS => TLB_WAY_BITS
|
lru => tlb_plru_out
|
||||||
)
|
);
|
||||||
port map (
|
|
||||||
clk => clk,
|
|
||||||
rst => rst,
|
|
||||||
acc => tlb_plru_acc,
|
|
||||||
acc_en => tlb_plru_acc_en,
|
|
||||||
lru => tlb_plru_out
|
|
||||||
);
|
|
||||||
|
|
||||||
process(all)
|
process(all)
|
||||||
begin
|
begin
|
||||||
-- PLRU interface
|
-- Read PLRU bits from array
|
||||||
if not is_X(r1.tlb_hit_index) and r1.tlb_hit_index = i then
|
if is_X(r1.tlb_hit_index) then
|
||||||
tlb_plru_acc_en <= r1.tlb_hit;
|
tlb_plru_cur <= (others => 'X');
|
||||||
assert not is_X(r1.tlb_hit_way);
|
else
|
||||||
else
|
tlb_plru_cur <= tlb_plru_ram(to_integer(r1.tlb_hit_index));
|
||||||
tlb_plru_acc_en <= '0';
|
end if;
|
||||||
end if;
|
|
||||||
tlb_plru_acc <= std_ulogic_vector(r1.tlb_hit_way);
|
-- PLRU interface
|
||||||
tlb_plru_victim(i) <= tlb_plru_out;
|
tlb_plru_acc <= std_ulogic_vector(r1.tlb_hit_way);
|
||||||
end process;
|
tlb_plru_victim <= tlb_plru_out;
|
||||||
end generate;
|
end process;
|
||||||
|
|
||||||
|
-- synchronous writes to TLB PLRU array
|
||||||
|
process(clk)
|
||||||
|
begin
|
||||||
|
if rising_edge(clk) then
|
||||||
|
if r1.tlb_hit = '1' then
|
||||||
|
assert not is_X(r1.tlb_hit_index) severity failure;
|
||||||
|
tlb_plru_ram(to_integer(r1.tlb_hit_index)) <= tlb_plru_upd;
|
||||||
|
end if;
|
||||||
|
end if;
|
||||||
|
end process;
|
||||||
end generate;
|
end generate;
|
||||||
|
|
||||||
tlb_search : process(all)
|
tlb_search : process(all)
|
||||||
@@ -747,13 +759,15 @@ begin
|
|||||||
end if;
|
end if;
|
||||||
elsif tlbwe = '1' then
|
elsif tlbwe = '1' then
|
||||||
assert not is_X(tlb_req_index);
|
assert not is_X(tlb_req_index);
|
||||||
if tlb_hit = '1' then
|
repl_way := to_unsigned(0, TLB_WAY_BITS);
|
||||||
repl_way := tlb_hit_way;
|
if TLB_NUM_WAYS > 1 then
|
||||||
else
|
if tlb_hit = '1' then
|
||||||
assert not is_X(tlb_plru_victim(to_integer(tlb_req_index)));
|
repl_way := tlb_hit_way;
|
||||||
repl_way := unsigned(tlb_plru_victim(to_integer(tlb_req_index)));
|
else
|
||||||
|
repl_way := unsigned(r1.tlb_victim);
|
||||||
|
end if;
|
||||||
|
assert not is_X(repl_way);
|
||||||
end if;
|
end if;
|
||||||
assert not is_X(repl_way);
|
|
||||||
eatag := r0.req.addr(63 downto TLB_LG_PGSZ + TLB_SET_BITS);
|
eatag := r0.req.addr(63 downto TLB_LG_PGSZ + TLB_SET_BITS);
|
||||||
tagset := tlb_tag_way;
|
tagset := tlb_tag_way;
|
||||||
write_tlb_tag(to_integer(repl_way), tagset, eatag);
|
write_tlb_tag(to_integer(repl_way), tagset, eatag);
|
||||||
@@ -767,39 +781,49 @@ begin
|
|||||||
end process;
|
end process;
|
||||||
|
|
||||||
-- Generate PLRUs
|
-- Generate PLRUs
|
||||||
maybe_plrus: if NUM_WAYS > 1 generate
|
maybe_plrus : if NUM_WAYS > 1 generate
|
||||||
|
type plru_array is array(0 to NUM_LINES-1) of std_ulogic_vector(NUM_WAYS - 2 downto 0);
|
||||||
|
signal plru_ram : plru_array;
|
||||||
|
signal plru_cur : std_ulogic_vector(NUM_WAYS - 2 downto 0);
|
||||||
|
signal plru_upd : std_ulogic_vector(NUM_WAYS - 2 downto 0);
|
||||||
|
signal plru_acc : std_ulogic_vector(WAY_BITS-1 downto 0);
|
||||||
|
signal plru_out : std_ulogic_vector(WAY_BITS-1 downto 0);
|
||||||
begin
|
begin
|
||||||
plrus: for i in 0 to NUM_LINES-1 generate
|
plru : entity work.plrufn
|
||||||
-- PLRU interface
|
generic map (
|
||||||
signal plru_acc : std_ulogic_vector(WAY_BITS-1 downto 0);
|
BITS => WAY_BITS
|
||||||
signal plru_acc_en : std_ulogic;
|
)
|
||||||
signal plru_out : std_ulogic_vector(WAY_BITS-1 downto 0);
|
port map (
|
||||||
|
acc => plru_acc,
|
||||||
begin
|
tree_in => plru_cur,
|
||||||
plru : entity work.plru
|
tree_out => plru_upd,
|
||||||
generic map (
|
lru => plru_out
|
||||||
BITS => WAY_BITS
|
);
|
||||||
)
|
|
||||||
port map (
|
|
||||||
clk => clk,
|
|
||||||
rst => rst,
|
|
||||||
acc => plru_acc,
|
|
||||||
acc_en => plru_acc_en,
|
|
||||||
lru => plru_out
|
|
||||||
);
|
|
||||||
|
|
||||||
process(all)
|
process(all)
|
||||||
begin
|
begin
|
||||||
-- PLRU interface
|
-- Read PLRU bits from array
|
||||||
if not is_X(r1.hit_index) and r1.hit_index = to_unsigned(i, INDEX_BITS) then
|
if is_X(r1.hit_index) then
|
||||||
plru_acc_en <= r1.cache_hit;
|
plru_cur <= (others => 'X');
|
||||||
else
|
else
|
||||||
plru_acc_en <= '0';
|
plru_cur <= plru_ram(to_integer(r1.hit_index));
|
||||||
end if;
|
end if;
|
||||||
plru_acc <= std_ulogic_vector(r1.hit_way);
|
|
||||||
plru_victim(i) <= plru_out;
|
-- PLRU interface
|
||||||
end process;
|
plru_acc <= std_ulogic_vector(r1.hit_way);
|
||||||
end generate;
|
plru_victim <= unsigned(plru_out);
|
||||||
|
end process;
|
||||||
|
|
||||||
|
-- synchronous writes to PLRU array
|
||||||
|
process(clk)
|
||||||
|
begin
|
||||||
|
if rising_edge(clk) then
|
||||||
|
if r1.cache_hit = '1' then
|
||||||
|
assert not is_X(r1.hit_index) severity failure;
|
||||||
|
plru_ram(to_integer(r1.hit_index)) <= plru_upd;
|
||||||
|
end if;
|
||||||
|
end if;
|
||||||
|
end process;
|
||||||
end generate;
|
end generate;
|
||||||
|
|
||||||
-- Cache tag RAM read port
|
-- Cache tag RAM read port
|
||||||
@@ -974,11 +998,19 @@ begin
|
|||||||
end if;
|
end if;
|
||||||
|
|
||||||
-- The way to replace on a miss
|
-- The way to replace on a miss
|
||||||
if r1.write_tag = '1' then
|
replace_way <= to_unsigned(0, WAY_BITS);
|
||||||
assert not is_X(r1.store_index);
|
if NUM_WAYS > 1 then
|
||||||
replace_way <= unsigned(plru_victim(to_integer(r1.store_index)));
|
if r1.write_tag = '1' then
|
||||||
else
|
if r1.choose_victim = '1' then
|
||||||
replace_way <= r1.store_way;
|
replace_way <= plru_victim;
|
||||||
|
else
|
||||||
|
-- Cache victim way was chosen earlier,
|
||||||
|
-- in the cycle after the miss was detected.
|
||||||
|
replace_way <= r1.victim_way;
|
||||||
|
end if;
|
||||||
|
else
|
||||||
|
replace_way <= r1.store_way;
|
||||||
|
end if;
|
||||||
end if;
|
end if;
|
||||||
|
|
||||||
-- See if the request matches the line currently being reloaded
|
-- See if the request matches the line currently being reloaded
|
||||||
@@ -1299,8 +1331,6 @@ begin
|
|||||||
end if;
|
end if;
|
||||||
|
|
||||||
-- Fast path for load/store hits. Set signals for the writeback controls.
|
-- Fast path for load/store hits. Set signals for the writeback controls.
|
||||||
r1.hit_way <= req_hit_way;
|
|
||||||
r1.hit_index <= req_index;
|
|
||||||
if req_op = OP_LOAD_HIT then
|
if req_op = OP_LOAD_HIT then
|
||||||
r1.hit_load_valid <= '1';
|
r1.hit_load_valid <= '1';
|
||||||
else
|
else
|
||||||
@@ -1334,6 +1364,11 @@ begin
|
|||||||
r1.tlb_hit <= tlb_hit;
|
r1.tlb_hit <= tlb_hit;
|
||||||
r1.tlb_hit_way <= tlb_hit_way;
|
r1.tlb_hit_way <= tlb_hit_way;
|
||||||
r1.tlb_hit_index <= tlb_req_index;
|
r1.tlb_hit_index <= tlb_req_index;
|
||||||
|
-- determine victim way in the TLB in the cycle after
|
||||||
|
-- we detect the TLB miss
|
||||||
|
if r1.ls_error = '1' then
|
||||||
|
r1.tlb_victim <= unsigned(tlb_plru_victim);
|
||||||
|
end if;
|
||||||
|
|
||||||
end if;
|
end if;
|
||||||
end process;
|
end process;
|
||||||
@@ -1358,6 +1393,7 @@ begin
|
|||||||
ev.load_miss <= '0';
|
ev.load_miss <= '0';
|
||||||
ev.store_miss <= '0';
|
ev.store_miss <= '0';
|
||||||
ev.dtlb_miss <= tlb_miss;
|
ev.dtlb_miss <= tlb_miss;
|
||||||
|
r1.choose_victim <= '0';
|
||||||
|
|
||||||
-- On reset, clear all valid bits to force misses
|
-- On reset, clear all valid bits to force misses
|
||||||
if rst = '1' then
|
if rst = '1' then
|
||||||
@@ -1454,6 +1490,17 @@ begin
|
|||||||
end if;
|
end if;
|
||||||
end if;
|
end if;
|
||||||
|
|
||||||
|
-- Signals for PLRU update and victim selection
|
||||||
|
r1.hit_way <= req_hit_way;
|
||||||
|
r1.hit_index <= req_index;
|
||||||
|
-- Record victim way in the cycle after we see a load or dcbz miss
|
||||||
|
if r1.choose_victim = '1' then
|
||||||
|
r1.victim_way <= plru_victim;
|
||||||
|
end if;
|
||||||
|
if req_op = OP_LOAD_MISS or (req_op = OP_STORE_MISS and r0.req.dcbz = '1') then
|
||||||
|
r1.choose_victim <= '1';
|
||||||
|
end if;
|
||||||
|
|
||||||
-- Main state machine
|
-- Main state machine
|
||||||
case r1.state is
|
case r1.state is
|
||||||
when IDLE =>
|
when IDLE =>
|
||||||
|
|||||||
86
icache.vhdl
86
icache.vhdl
@@ -12,7 +12,6 @@
|
|||||||
-- efficient use of distributed RAM and less logic/muxes. Currently we
|
-- efficient use of distributed RAM and less logic/muxes. Currently we
|
||||||
-- write TAG_BITS width which may not match full ram blocks and might
|
-- write TAG_BITS width which may not match full ram blocks and might
|
||||||
-- cause muxes to be inferred for "partial writes".
|
-- cause muxes to be inferred for "partial writes".
|
||||||
-- * Check if making the read size of PLRU a ROM helps utilization
|
|
||||||
--
|
--
|
||||||
library ieee;
|
library ieee;
|
||||||
use ieee.std_logic_1164.all;
|
use ieee.std_logic_1164.all;
|
||||||
@@ -102,7 +101,8 @@ architecture rtl of icache is
|
|||||||
-- the +1 is to allow the endianness to be stored in the tag
|
-- the +1 is to allow the endianness to be stored in the tag
|
||||||
constant TAG_BITS : natural := REAL_ADDR_BITS - SET_SIZE_BITS + 1;
|
constant TAG_BITS : natural := REAL_ADDR_BITS - SET_SIZE_BITS + 1;
|
||||||
-- WAY_BITS is the number of bits to select a way
|
-- WAY_BITS is the number of bits to select a way
|
||||||
constant WAY_BITS : natural := log2(NUM_WAYS);
|
-- Make sure this is at least 1, to avoid 0-element vectors
|
||||||
|
constant WAY_BITS : natural := maximum(log2(NUM_WAYS), 1);
|
||||||
|
|
||||||
-- Example of layout for 32 lines of 64 bytes:
|
-- Example of layout for 32 lines of 64 bytes:
|
||||||
--
|
--
|
||||||
@@ -235,8 +235,7 @@ architecture rtl of icache is
|
|||||||
signal wb_rd_data : std_ulogic_vector(ROW_SIZE_BITS - 1 downto 0);
|
signal wb_rd_data : std_ulogic_vector(ROW_SIZE_BITS - 1 downto 0);
|
||||||
|
|
||||||
-- PLRU output interface
|
-- PLRU output interface
|
||||||
type plru_out_t is array(index_t) of std_ulogic_vector(WAY_BITS-1 downto 0);
|
signal plru_victim : way_sig_t;
|
||||||
signal plru_victim : plru_out_t;
|
|
||||||
|
|
||||||
-- Memory write snoop signals
|
-- Memory write snoop signals
|
||||||
signal snoop_valid : std_ulogic;
|
signal snoop_valid : std_ulogic;
|
||||||
@@ -446,40 +445,48 @@ begin
|
|||||||
|
|
||||||
-- Generate PLRUs
|
-- Generate PLRUs
|
||||||
maybe_plrus: if NUM_WAYS > 1 generate
|
maybe_plrus: if NUM_WAYS > 1 generate
|
||||||
|
type plru_array is array(index_t) of std_ulogic_vector(NUM_WAYS - 2 downto 0);
|
||||||
|
signal plru_ram : plru_array;
|
||||||
|
signal plru_cur : std_ulogic_vector(NUM_WAYS - 2 downto 0);
|
||||||
|
signal plru_upd : std_ulogic_vector(NUM_WAYS - 2 downto 0);
|
||||||
|
signal plru_acc : std_ulogic_vector(WAY_BITS-1 downto 0);
|
||||||
|
signal plru_out : std_ulogic_vector(WAY_BITS-1 downto 0);
|
||||||
begin
|
begin
|
||||||
plrus: for i in 0 to NUM_LINES-1 generate
|
plru : entity work.plrufn
|
||||||
-- PLRU interface
|
generic map (
|
||||||
signal plru_acc : std_ulogic_vector(WAY_BITS-1 downto 0);
|
BITS => WAY_BITS
|
||||||
signal plru_acc_en : std_ulogic;
|
)
|
||||||
signal plru_out : std_ulogic_vector(WAY_BITS-1 downto 0);
|
port map (
|
||||||
|
acc => plru_acc,
|
||||||
begin
|
tree_in => plru_cur,
|
||||||
plru : entity work.plru
|
tree_out => plru_upd,
|
||||||
generic map (
|
lru => plru_out
|
||||||
BITS => WAY_BITS
|
);
|
||||||
)
|
|
||||||
port map (
|
|
||||||
clk => clk,
|
|
||||||
rst => rst,
|
|
||||||
acc => plru_acc,
|
|
||||||
acc_en => plru_acc_en,
|
|
||||||
lru => plru_out
|
|
||||||
);
|
|
||||||
|
|
||||||
process(all)
|
process(all)
|
||||||
begin
|
begin
|
||||||
-- PLRU interface
|
-- Read PLRU bits from array
|
||||||
if is_X(r.hit_nia) then
|
if is_X(r.hit_nia) then
|
||||||
plru_acc_en <= 'X';
|
plru_cur <= (others => 'X');
|
||||||
elsif get_index(r.hit_nia) = i then
|
else
|
||||||
plru_acc_en <= r.hit_valid;
|
plru_cur <= plru_ram(to_integer(get_index(r.hit_nia)));
|
||||||
else
|
end if;
|
||||||
plru_acc_en <= '0';
|
|
||||||
end if;
|
-- PLRU interface
|
||||||
plru_acc <= std_ulogic_vector(r.hit_way);
|
plru_acc <= std_ulogic_vector(r.hit_way);
|
||||||
plru_victim(i) <= plru_out;
|
plru_victim <= unsigned(plru_out);
|
||||||
end process;
|
end process;
|
||||||
end generate;
|
|
||||||
|
-- synchronous writes to PLRU array
|
||||||
|
process(clk)
|
||||||
|
begin
|
||||||
|
if rising_edge(clk) then
|
||||||
|
if r.hit_valid = '1' then
|
||||||
|
assert not is_X(r.hit_nia) severity failure;
|
||||||
|
plru_ram(to_integer(get_index(r.hit_nia))) <= plru_upd;
|
||||||
|
end if;
|
||||||
|
end if;
|
||||||
|
end process;
|
||||||
end generate;
|
end generate;
|
||||||
|
|
||||||
-- TLB hit detection and real address generation
|
-- TLB hit detection and real address generation
|
||||||
@@ -787,8 +794,11 @@ begin
|
|||||||
assert not is_X(r.store_row) severity failure;
|
assert not is_X(r.store_row) severity failure;
|
||||||
assert not is_X(r.recv_row) severity failure;
|
assert not is_X(r.recv_row) severity failure;
|
||||||
if r.state = CLR_TAG then
|
if r.state = CLR_TAG then
|
||||||
-- Get victim way from plru
|
replace_way := to_unsigned(0, WAY_BITS);
|
||||||
replace_way := unsigned(plru_victim(to_integer(r.store_index)));
|
if NUM_WAYS > 1 then
|
||||||
|
-- Get victim way from plru
|
||||||
|
replace_way := plru_victim;
|
||||||
|
end if;
|
||||||
r.store_way <= replace_way;
|
r.store_way <= replace_way;
|
||||||
|
|
||||||
-- Force misses on that way while reloading that line
|
-- Force misses on that way while reloading that line
|
||||||
|
|||||||
@@ -305,8 +305,7 @@ architecture behaviour of litedram_wrapper is
|
|||||||
signal cache_out : cache_ram_out_t;
|
signal cache_out : cache_ram_out_t;
|
||||||
|
|
||||||
-- PLRU output interface
|
-- PLRU output interface
|
||||||
type plru_out_t is array(index_t) of std_ulogic_vector(WAY_BITS-1 downto 0);
|
signal plru_victim : way_t;
|
||||||
signal plru_victim : plru_out_t;
|
|
||||||
|
|
||||||
--
|
--
|
||||||
-- Helper functions to decode incoming requests
|
-- Helper functions to decode incoming requests
|
||||||
@@ -565,39 +564,44 @@ begin
|
|||||||
end generate;
|
end generate;
|
||||||
|
|
||||||
-- Generate PLRUs
|
-- Generate PLRUs
|
||||||
maybe_plrus: if NUM_WAYS > 1 generate
|
maybe_plrus : if NUM_WAYS > 1 generate
|
||||||
|
type plru_array is array(index_t) of std_ulogic_vector(NUM_WAYS - 2 downto 0);
|
||||||
|
signal plru_ram : plru_array;
|
||||||
|
signal plru_cur : std_ulogic_vector(NUM_WAYS - 2 downto 0);
|
||||||
|
signal plru_upd : std_ulogic_vector(NUM_WAYS - 2 downto 0);
|
||||||
|
signal plru_acc : std_ulogic_vector(WAY_BITS-1 downto 0);
|
||||||
|
signal plru_out : std_ulogic_vector(WAY_BITS-1 downto 0);
|
||||||
begin
|
begin
|
||||||
plrus: for i in 0 to NUM_LINES-1 generate
|
plru : entity work.plrufn
|
||||||
-- PLRU interface
|
generic map (
|
||||||
signal plru_acc : std_ulogic_vector(WAY_BITS-1 downto 0);
|
BITS => WAY_BITS
|
||||||
signal plru_acc_en : std_ulogic;
|
)
|
||||||
signal plru_out : std_ulogic_vector(WAY_BITS-1 downto 0);
|
port map (
|
||||||
begin
|
acc => plru_acc,
|
||||||
plru : entity work.plru
|
tree_in => plru_cur,
|
||||||
generic map (
|
tree_out => plru_upd,
|
||||||
BITS => WAY_BITS
|
lru => plru_out
|
||||||
)
|
);
|
||||||
port map (
|
|
||||||
clk => system_clk,
|
|
||||||
rst => system_reset,
|
|
||||||
acc => plru_acc,
|
|
||||||
acc_en => plru_acc_en,
|
|
||||||
lru => plru_out
|
|
||||||
);
|
|
||||||
|
|
||||||
process(req_index, req_op, req_hit_way, plru_out)
|
process(all)
|
||||||
begin
|
begin
|
||||||
-- PLRU interface
|
-- Read PLRU bits from array
|
||||||
if (req_op = OP_LOAD_HIT or
|
plru_cur <= plru_ram(req_index);
|
||||||
req_op = OP_STORE_HIT) and req_index = i then
|
|
||||||
plru_acc_en <= '1';
|
-- PLRU interface
|
||||||
else
|
plru_acc <= std_ulogic_vector(to_unsigned(req_hit_way, WAY_BITS));
|
||||||
plru_acc_en <= '0';
|
plru_victim <= to_integer(unsigned(plru_out));
|
||||||
|
end process;
|
||||||
|
|
||||||
|
-- synchronous writes to PLRU array
|
||||||
|
process(system_clk)
|
||||||
|
begin
|
||||||
|
if rising_edge(system_clk) then
|
||||||
|
if (req_op = OP_LOAD_HIT or req_op = OP_STORE_HIT) then
|
||||||
|
plru_ram(req_index) <= plru_upd;
|
||||||
end if;
|
end if;
|
||||||
plru_acc <= std_ulogic_vector(to_unsigned(req_hit_way, WAY_BITS));
|
end if;
|
||||||
plru_victim(i) <= plru_out;
|
end process;
|
||||||
end process;
|
|
||||||
end generate;
|
|
||||||
end generate;
|
end generate;
|
||||||
|
|
||||||
--
|
--
|
||||||
@@ -1023,7 +1027,7 @@ begin
|
|||||||
-- We need to read a cache line
|
-- We need to read a cache line
|
||||||
if req_op = OP_LOAD_MISS and not wait_qdrain then
|
if req_op = OP_LOAD_MISS and not wait_qdrain then
|
||||||
-- Grab way to replace
|
-- Grab way to replace
|
||||||
refill_way <= to_integer(unsigned(plru_victim(req_index)));
|
refill_way <= plru_victim;
|
||||||
|
|
||||||
-- Keep track of our index and way for subsequent stores
|
-- Keep track of our index and way for subsequent stores
|
||||||
refill_index <= req_index;
|
refill_index <= req_index;
|
||||||
|
|||||||
@@ -34,6 +34,7 @@ filesets:
|
|||||||
- core.vhdl
|
- core.vhdl
|
||||||
- icache.vhdl
|
- icache.vhdl
|
||||||
- plru.vhdl
|
- plru.vhdl
|
||||||
|
- plrufn.vhdl
|
||||||
- cache_ram.vhdl
|
- cache_ram.vhdl
|
||||||
- core_debug.vhdl
|
- core_debug.vhdl
|
||||||
- utils.vhdl
|
- utils.vhdl
|
||||||
|
|||||||
72
plrufn.vhdl
Normal file
72
plrufn.vhdl
Normal file
@@ -0,0 +1,72 @@
|
|||||||
|
library ieee;
|
||||||
|
use ieee.std_logic_1164.all;
|
||||||
|
use ieee.numeric_std.all;
|
||||||
|
use ieee.math_real.all;
|
||||||
|
|
||||||
|
entity plrufn is
|
||||||
|
generic (
|
||||||
|
BITS : positive := 2
|
||||||
|
)
|
||||||
|
;
|
||||||
|
port (
|
||||||
|
acc : in std_ulogic_vector(BITS-1 downto 0);
|
||||||
|
tree_in : in std_ulogic_vector(2 ** BITS - 2 downto 0);
|
||||||
|
tree_out : out std_ulogic_vector(2 ** BITS - 2 downto 0);
|
||||||
|
lru : out std_ulogic_vector(BITS-1 downto 0)
|
||||||
|
);
|
||||||
|
end entity plrufn;
|
||||||
|
|
||||||
|
architecture rtl of plrufn is
|
||||||
|
-- Each level of the tree (from leaf to root) has half the number of nodes
|
||||||
|
-- of the previous level. So for a 2^N bits LRU, we have a level of N/2 bits
|
||||||
|
-- one of N/4 bits etc.. down to 1. This gives us 2^N-1 nodes. Ie, 2 bits
|
||||||
|
-- LRU has 3 nodes (2 + 1), 4 bits LRU has 15 nodes (8 + 4 + 2 + 1) etc...
|
||||||
|
constant count : positive := 2 ** BITS - 1;
|
||||||
|
subtype node_t is integer range 0 to count - 1;
|
||||||
|
begin
|
||||||
|
|
||||||
|
get_lru: process(tree_in)
|
||||||
|
variable node : node_t;
|
||||||
|
variable abit : std_ulogic;
|
||||||
|
begin
|
||||||
|
node := 0;
|
||||||
|
for i in 0 to BITS-1 loop
|
||||||
|
abit := tree_in(node);
|
||||||
|
if is_X(abit) then
|
||||||
|
abit := '0';
|
||||||
|
end if;
|
||||||
|
lru(BITS-1-i) <= abit;
|
||||||
|
if i /= BITS-1 then
|
||||||
|
node := node * 2;
|
||||||
|
if abit = '1' then
|
||||||
|
node := node + 2;
|
||||||
|
else
|
||||||
|
node := node + 1;
|
||||||
|
end if;
|
||||||
|
end if;
|
||||||
|
end loop;
|
||||||
|
end process;
|
||||||
|
|
||||||
|
update_lru: process(all)
|
||||||
|
variable node : node_t;
|
||||||
|
variable abit : std_ulogic;
|
||||||
|
begin
|
||||||
|
tree_out <= tree_in;
|
||||||
|
node := 0;
|
||||||
|
for i in 0 to BITS-1 loop
|
||||||
|
abit := acc(BITS-1-i);
|
||||||
|
if is_X(abit) then
|
||||||
|
abit := '0';
|
||||||
|
end if;
|
||||||
|
tree_out(node) <= not abit;
|
||||||
|
if i /= BITS-1 then
|
||||||
|
node := node * 2;
|
||||||
|
if abit = '1' then
|
||||||
|
node := node + 2;
|
||||||
|
else
|
||||||
|
node := node + 1;
|
||||||
|
end if;
|
||||||
|
end if;
|
||||||
|
end loop;
|
||||||
|
end process;
|
||||||
|
end;
|
||||||
Reference in New Issue
Block a user