mirror of
https://github.com/antonblanchard/microwatt.git
synced 2026-02-11 02:30:11 +00:00
Merge pull request #233 from paulusmack/master
Changes to improve timing
This commit is contained in:
@@ -315,6 +315,7 @@ package common is
|
||||
|
||||
type MmuToLoadstore1Type is record
|
||||
done : std_ulogic;
|
||||
err : std_ulogic;
|
||||
invalid : std_ulogic;
|
||||
badtree : std_ulogic;
|
||||
segerr : std_ulogic;
|
||||
|
||||
24
core.vhdl
24
core.vhdl
@@ -202,7 +202,8 @@ begin
|
||||
SIM => SIM,
|
||||
LINE_SIZE => 64,
|
||||
NUM_LINES => 64,
|
||||
NUM_WAYS => 2
|
||||
NUM_WAYS => 2,
|
||||
LOG_LENGTH => LOG_LENGTH
|
||||
)
|
||||
port map(
|
||||
clk => clk,
|
||||
@@ -222,6 +223,9 @@ begin
|
||||
icache_stall_in <= decode1_busy;
|
||||
|
||||
decode1_0: entity work.decode1
|
||||
generic map(
|
||||
LOG_LENGTH => LOG_LENGTH
|
||||
)
|
||||
port map (
|
||||
clk => clk,
|
||||
rst => rst_dec1,
|
||||
@@ -239,7 +243,8 @@ begin
|
||||
|
||||
decode2_0: entity work.decode2
|
||||
generic map (
|
||||
EX1_BYPASS => EX1_BYPASS
|
||||
EX1_BYPASS => EX1_BYPASS,
|
||||
LOG_LENGTH => LOG_LENGTH
|
||||
)
|
||||
port map (
|
||||
clk => clk,
|
||||
@@ -261,7 +266,8 @@ begin
|
||||
|
||||
register_file_0: entity work.register_file
|
||||
generic map (
|
||||
SIM => SIM
|
||||
SIM => SIM,
|
||||
LOG_LENGTH => LOG_LENGTH
|
||||
)
|
||||
port map (
|
||||
clk => clk,
|
||||
@@ -279,7 +285,8 @@ begin
|
||||
|
||||
cr_file_0: entity work.cr_file
|
||||
generic map (
|
||||
SIM => SIM
|
||||
SIM => SIM,
|
||||
LOG_LENGTH => LOG_LENGTH
|
||||
)
|
||||
port map (
|
||||
clk => clk,
|
||||
@@ -292,7 +299,8 @@ begin
|
||||
|
||||
execute1_0: entity work.execute1
|
||||
generic map (
|
||||
EX1_BYPASS => EX1_BYPASS
|
||||
EX1_BYPASS => EX1_BYPASS,
|
||||
LOG_LENGTH => LOG_LENGTH
|
||||
)
|
||||
port map (
|
||||
clk => clk,
|
||||
@@ -315,6 +323,9 @@ begin
|
||||
);
|
||||
|
||||
loadstore1_0: entity work.loadstore1
|
||||
generic map (
|
||||
LOG_LENGTH => LOG_LENGTH
|
||||
)
|
||||
port map (
|
||||
clk => clk,
|
||||
rst => rst_ls1,
|
||||
@@ -344,7 +355,8 @@ begin
|
||||
generic map(
|
||||
LINE_SIZE => 64,
|
||||
NUM_LINES => 64,
|
||||
NUM_WAYS => 2
|
||||
NUM_WAYS => 2,
|
||||
LOG_LENGTH => LOG_LENGTH
|
||||
)
|
||||
port map (
|
||||
clk => clk,
|
||||
|
||||
162
countzero.vhdl
162
countzero.vhdl
@@ -15,123 +15,81 @@ entity zero_counter is
|
||||
end entity zero_counter;
|
||||
|
||||
architecture behaviour of zero_counter is
|
||||
type intermediate_result is record
|
||||
v16: std_ulogic_vector(15 downto 0);
|
||||
sel_hi: std_ulogic_vector(1 downto 0);
|
||||
is_32bit: std_ulogic;
|
||||
count_right: std_ulogic;
|
||||
end record;
|
||||
|
||||
signal r, r_in : intermediate_result;
|
||||
|
||||
-- Return the index of the leftmost or rightmost 1 in a set of 4 bits.
|
||||
-- Assumes v is not "0000"; if it is, return (right ? "11" : "00").
|
||||
function encoder(v: std_ulogic_vector(3 downto 0); right: std_ulogic) return std_ulogic_vector is
|
||||
-- Reverse the order of bits in a word
|
||||
function bit_reverse(a: std_ulogic_vector) return std_ulogic_vector is
|
||||
variable ret: std_ulogic_vector(a'left downto a'right);
|
||||
begin
|
||||
if right = '0' then
|
||||
if v(3) = '1' then
|
||||
return "11";
|
||||
elsif v(2) = '1' then
|
||||
return "10";
|
||||
elsif v(1) = '1' then
|
||||
return "01";
|
||||
else
|
||||
return "00";
|
||||
end if;
|
||||
else
|
||||
if v(0) = '1' then
|
||||
return "00";
|
||||
elsif v(1) = '1' then
|
||||
return "01";
|
||||
elsif v(2) = '1' then
|
||||
return "10";
|
||||
else
|
||||
return "11";
|
||||
end if;
|
||||
end if;
|
||||
for i in a'right to a'left loop
|
||||
ret(a'left + a'right - i) := a(i);
|
||||
end loop;
|
||||
return ret;
|
||||
end;
|
||||
|
||||
begin
|
||||
zerocounter_0: process(clk)
|
||||
-- If there is only one bit set in a doubleword, return its bit number
|
||||
-- (counting from the right). Each bit of the result is obtained by
|
||||
-- ORing together 32 bits of the input:
|
||||
-- bit 0 = a[1] or a[3] or a[5] or ...
|
||||
-- bit 1 = a[2] or a[3] or a[6] or a[7] or ...
|
||||
-- bit 2 = a[4..7] or a[12..15] or ...
|
||||
-- bit 5 = a[32..63] ORed together
|
||||
function bit_number(a: std_ulogic_vector(63 downto 0)) return std_ulogic_vector is
|
||||
variable ret: std_ulogic_vector(5 downto 0);
|
||||
variable stride: natural;
|
||||
variable bit: std_ulogic;
|
||||
variable k: natural;
|
||||
begin
|
||||
if rising_edge(clk) then
|
||||
r <= r_in;
|
||||
stride := 2;
|
||||
for i in 0 to 5 loop
|
||||
bit := '0';
|
||||
for j in 0 to (64 / stride) - 1 loop
|
||||
k := j * stride;
|
||||
bit := bit or (or a(k + stride - 1 downto k + (stride / 2)));
|
||||
end loop;
|
||||
ret(i) := bit;
|
||||
stride := stride * 2;
|
||||
end loop;
|
||||
return ret;
|
||||
end;
|
||||
|
||||
signal inp : std_ulogic_vector(63 downto 0);
|
||||
signal sum : std_ulogic_vector(64 downto 0);
|
||||
signal msb_r : std_ulogic;
|
||||
signal onehot : std_ulogic_vector(63 downto 0);
|
||||
signal onehot_r : std_ulogic_vector(63 downto 0);
|
||||
signal bitnum : std_ulogic_vector(5 downto 0);
|
||||
|
||||
begin
|
||||
countzero_r: process(clk)
|
||||
begin
|
||||
if rising_edge(clk) then
|
||||
msb_r <= sum(64);
|
||||
onehot_r <= onehot;
|
||||
end if;
|
||||
end process;
|
||||
|
||||
zerocounter_1: process(all)
|
||||
variable v: intermediate_result;
|
||||
variable y, z: std_ulogic_vector(3 downto 0);
|
||||
variable sel: std_ulogic_vector(5 downto 0);
|
||||
variable v4: std_ulogic_vector(3 downto 0);
|
||||
|
||||
countzero: process(all)
|
||||
begin
|
||||
-- Test 4 groups of 16 bits each.
|
||||
-- The top 2 groups are considered to be zero in 32-bit mode.
|
||||
z(0) := or (rs(15 downto 0));
|
||||
z(1) := or (rs(31 downto 16));
|
||||
z(2) := or (rs(47 downto 32));
|
||||
z(3) := or (rs(63 downto 48));
|
||||
if is_32bit = '0' then
|
||||
v.sel_hi := encoder(z, count_right);
|
||||
else
|
||||
v.sel_hi(1) := '0';
|
||||
if count_right = '0' then
|
||||
v.sel_hi(0) := z(1);
|
||||
inp <= bit_reverse(rs);
|
||||
else
|
||||
v.sel_hi(0) := not z(0);
|
||||
inp <= rs;
|
||||
end if;
|
||||
else
|
||||
inp(63 downto 32) <= x"FFFFFFFF";
|
||||
if count_right = '0' then
|
||||
inp(31 downto 0) <= bit_reverse(rs(31 downto 0));
|
||||
else
|
||||
inp(31 downto 0) <= rs(31 downto 0);
|
||||
end if;
|
||||
end if;
|
||||
|
||||
-- Select the leftmost/rightmost non-zero group of 16 bits
|
||||
case v.sel_hi is
|
||||
when "00" =>
|
||||
v.v16 := rs(15 downto 0);
|
||||
when "01" =>
|
||||
v.v16 := rs(31 downto 16);
|
||||
when "10" =>
|
||||
v.v16 := rs(47 downto 32);
|
||||
when others =>
|
||||
v.v16 := rs(63 downto 48);
|
||||
end case;
|
||||
sum <= std_ulogic_vector(unsigned('0' & not inp) + 1);
|
||||
onehot <= sum(63 downto 0) and inp;
|
||||
|
||||
-- Latch this and do the rest in the next cycle, for the sake of timing
|
||||
v.is_32bit := is_32bit;
|
||||
v.count_right := count_right;
|
||||
r_in <= v;
|
||||
sel(5 downto 4) := r.sel_hi;
|
||||
|
||||
-- Test 4 groups of 4 bits
|
||||
y(0) := or (r.v16(3 downto 0));
|
||||
y(1) := or (r.v16(7 downto 4));
|
||||
y(2) := or (r.v16(11 downto 8));
|
||||
y(3) := or (r.v16(15 downto 12));
|
||||
sel(3 downto 2) := encoder(y, r.count_right);
|
||||
|
||||
-- Select the leftmost/rightmost non-zero group of 4 bits
|
||||
case sel(3 downto 2) is
|
||||
when "00" =>
|
||||
v4 := r.v16(3 downto 0);
|
||||
when "01" =>
|
||||
v4 := r.v16(7 downto 4);
|
||||
when "10" =>
|
||||
v4 := r.v16(11 downto 8);
|
||||
when others =>
|
||||
v4 := r.v16(15 downto 12);
|
||||
end case;
|
||||
|
||||
sel(1 downto 0) := encoder(v4, r.count_right);
|
||||
|
||||
-- sel is now the index of the leftmost/rightmost 1 bit in rs
|
||||
if v4 = "0000" then
|
||||
-- operand is zero, return 32 for 32-bit, else 64
|
||||
result <= x"00000000000000" & '0' & not r.is_32bit & r.is_32bit & "00000";
|
||||
elsif r.count_right = '0' then
|
||||
-- return (63 - sel), trimmed to 5 bits in 32-bit mode
|
||||
result <= x"00000000000000" & "00" & (not sel(5) and not r.is_32bit) & not sel(4 downto 0);
|
||||
else
|
||||
result <= x"00000000000000" & "00" & sel;
|
||||
end if;
|
||||
-- The following occurs after a clock edge
|
||||
bitnum <= bit_number(onehot_r);
|
||||
|
||||
result <= x"00000000000000" & "0" & msb_r & bitnum;
|
||||
end process;
|
||||
end behaviour;
|
||||
|
||||
25
cr_file.vhdl
25
cr_file.vhdl
@@ -7,7 +7,9 @@ use work.common.all;
|
||||
|
||||
entity cr_file is
|
||||
generic (
|
||||
SIM : boolean := false
|
||||
SIM : boolean := false;
|
||||
-- Non-zero to enable log data collection
|
||||
LOG_LENGTH : natural := 0
|
||||
);
|
||||
port(
|
||||
clk : in std_logic;
|
||||
@@ -29,7 +31,6 @@ architecture behaviour of cr_file is
|
||||
signal crs_updated : std_ulogic_vector(31 downto 0);
|
||||
signal xerc : xer_common_t := xerc_init;
|
||||
signal xerc_updated : xer_common_t;
|
||||
signal log_data : std_ulogic_vector(12 downto 0);
|
||||
begin
|
||||
cr_create_0: process(all)
|
||||
variable hi, lo : integer := 0;
|
||||
@@ -91,14 +92,18 @@ begin
|
||||
end process;
|
||||
end generate;
|
||||
|
||||
cr_log: process(clk)
|
||||
cf_log: if LOG_LENGTH > 0 generate
|
||||
signal log_data : std_ulogic_vector(12 downto 0);
|
||||
begin
|
||||
if rising_edge(clk) then
|
||||
log_data <= w_in.write_cr_enable &
|
||||
w_in.write_cr_data(31 downto 28) &
|
||||
w_in.write_cr_mask;
|
||||
end if;
|
||||
end process;
|
||||
log_out <= log_data;
|
||||
cr_log: process(clk)
|
||||
begin
|
||||
if rising_edge(clk) then
|
||||
log_data <= w_in.write_cr_enable &
|
||||
w_in.write_cr_data(31 downto 28) &
|
||||
w_in.write_cr_mask;
|
||||
end if;
|
||||
end process;
|
||||
log_out <= log_data;
|
||||
end generate;
|
||||
|
||||
end architecture behaviour;
|
||||
|
||||
248
dcache.vhdl
248
dcache.vhdl
@@ -31,7 +31,9 @@ entity dcache is
|
||||
-- L1 DTLB number of sets
|
||||
TLB_NUM_WAYS : positive := 2;
|
||||
-- L1 DTLB log_2(page_size)
|
||||
TLB_LG_PGSZ : positive := 12
|
||||
TLB_LG_PGSZ : positive := 12;
|
||||
-- Non-zero to enable log data collection
|
||||
LOG_LENGTH : natural := 0
|
||||
);
|
||||
port (
|
||||
clk : in std_ulogic;
|
||||
@@ -226,13 +228,14 @@ architecture rtl of dcache is
|
||||
|
||||
type mem_access_request_t is record
|
||||
op : op_t;
|
||||
valid : std_ulogic;
|
||||
dcbz : std_ulogic;
|
||||
real_addr : std_ulogic_vector(REAL_ADDR_BITS - 1 downto 0);
|
||||
data : std_ulogic_vector(63 downto 0);
|
||||
byte_sel : std_ulogic_vector(7 downto 0);
|
||||
hit_way : way_t;
|
||||
repl_way : way_t;
|
||||
same_tag : std_ulogic;
|
||||
mmu_req : std_ulogic;
|
||||
end record;
|
||||
|
||||
-- First stage register, contains state for stage 1 of load hits
|
||||
@@ -247,6 +250,13 @@ architecture rtl of dcache is
|
||||
-- Cache hit state
|
||||
hit_way : way_t;
|
||||
hit_load_valid : std_ulogic;
|
||||
hit_index : index_t;
|
||||
cache_hit : std_ulogic;
|
||||
|
||||
-- TLB hit state
|
||||
tlb_hit : std_ulogic;
|
||||
tlb_hit_way : tlb_way_t;
|
||||
tlb_hit_index : tlb_index_t;
|
||||
|
||||
-- 2-stage data buffer for data forwarded from writes to reads
|
||||
forward_data1 : std_ulogic_vector(63 downto 0);
|
||||
@@ -272,16 +282,18 @@ architecture rtl of dcache is
|
||||
end_row_ix : row_in_line_t;
|
||||
rows_valid : row_per_line_valid_t;
|
||||
acks_pending : unsigned(2 downto 0);
|
||||
inc_acks : std_ulogic;
|
||||
dec_acks : std_ulogic;
|
||||
|
||||
-- Signals to complete with error
|
||||
error_done : std_ulogic;
|
||||
-- Signals to complete (possibly with error)
|
||||
ls_valid : std_ulogic;
|
||||
ls_error : std_ulogic;
|
||||
mmu_done : std_ulogic;
|
||||
mmu_error : std_ulogic;
|
||||
cache_paradox : std_ulogic;
|
||||
|
||||
-- Signal to complete a failed stcx.
|
||||
stcx_fail : std_ulogic;
|
||||
|
||||
-- completion signal for tlbie
|
||||
tlbie_done : std_ulogic;
|
||||
end record;
|
||||
|
||||
signal r1 : reg_stage_1_t;
|
||||
@@ -303,6 +315,7 @@ architecture rtl of dcache is
|
||||
signal req_op : op_t;
|
||||
signal req_data : std_ulogic_vector(63 downto 0);
|
||||
signal req_same_tag : std_ulogic;
|
||||
signal req_go : std_ulogic;
|
||||
|
||||
signal early_req_row : row_t;
|
||||
|
||||
@@ -455,8 +468,6 @@ architecture rtl of dcache is
|
||||
ptes(j + TLB_PTE_BITS - 1 downto j) := newpte;
|
||||
end;
|
||||
|
||||
signal log_data : std_ulogic_vector(19 downto 0);
|
||||
|
||||
begin
|
||||
|
||||
assert LINE_SIZE mod ROW_SIZE = 0 report "LINE_SIZE not multiple of ROW_SIZE" severity FAILURE;
|
||||
@@ -566,15 +577,15 @@ begin
|
||||
lru => tlb_plru_out
|
||||
);
|
||||
|
||||
process(tlb_req_index, tlb_hit, tlb_hit_way, tlb_plru_out)
|
||||
process(all)
|
||||
begin
|
||||
-- PLRU interface
|
||||
if tlb_hit = '1' and tlb_req_index = i then
|
||||
tlb_plru_acc_en <= '1';
|
||||
if r1.tlb_hit_index = i then
|
||||
tlb_plru_acc_en <= r1.tlb_hit;
|
||||
else
|
||||
tlb_plru_acc_en <= '0';
|
||||
end if;
|
||||
tlb_plru_acc <= std_ulogic_vector(to_unsigned(tlb_hit_way, TLB_WAY_BITS));
|
||||
tlb_plru_acc <= std_ulogic_vector(to_unsigned(r1.tlb_hit_way, TLB_WAY_BITS));
|
||||
tlb_plru_victim(i) <= tlb_plru_out;
|
||||
end process;
|
||||
end generate;
|
||||
@@ -677,16 +688,15 @@ begin
|
||||
lru => plru_out
|
||||
);
|
||||
|
||||
process(req_index, req_op, req_hit_way, plru_out)
|
||||
process(all)
|
||||
begin
|
||||
-- PLRU interface
|
||||
if (req_op = OP_LOAD_HIT or
|
||||
req_op = OP_STORE_HIT) and req_index = i then
|
||||
plru_acc_en <= '1';
|
||||
if r1.hit_index = i then
|
||||
plru_acc_en <= r1.cache_hit;
|
||||
else
|
||||
plru_acc_en <= '0';
|
||||
end if;
|
||||
plru_acc <= std_ulogic_vector(to_unsigned(req_hit_way, WAY_BITS));
|
||||
plru_acc <= std_ulogic_vector(to_unsigned(r1.hit_way, WAY_BITS));
|
||||
plru_victim(i) <= plru_out;
|
||||
end process;
|
||||
end generate;
|
||||
@@ -730,7 +740,7 @@ begin
|
||||
req_row <= get_row(r0.req.addr);
|
||||
req_tag <= get_tag(ra);
|
||||
|
||||
go := r0_valid and not (r0.tlbie or r0.tlbld) and not r1.error_done;
|
||||
go := r0_valid and not (r0.tlbie or r0.tlbld) and not r1.ls_error;
|
||||
|
||||
-- Test if pending request is a hit on any way
|
||||
-- In order to make timing in virtual mode, when we are using the TLB,
|
||||
@@ -788,7 +798,7 @@ begin
|
||||
-- since it will be by the time we perform the store.
|
||||
-- For a load, check the appropriate row valid bit.
|
||||
is_hit := not r0.req.load or r1.rows_valid(req_row mod ROW_PER_LINE);
|
||||
hit_way := r1.store_way;
|
||||
hit_way := replace_way;
|
||||
end if;
|
||||
|
||||
-- Whether to use forwarded data for a load or not
|
||||
@@ -811,8 +821,12 @@ begin
|
||||
-- The way that matched on a hit
|
||||
req_hit_way <= hit_way;
|
||||
|
||||
-- The way to replace on a miss
|
||||
replace_way <= to_integer(unsigned(plru_victim(req_index)));
|
||||
-- The way to replace on a miss
|
||||
if r1.write_tag = '1' then
|
||||
replace_way <= to_integer(unsigned(plru_victim(r1.store_index)));
|
||||
else
|
||||
replace_way <= r1.store_way;
|
||||
end if;
|
||||
|
||||
-- work out whether we have permission for this access
|
||||
-- NB we don't yet implement AMR, thus no KUAP
|
||||
@@ -847,6 +861,7 @@ begin
|
||||
end if;
|
||||
end if;
|
||||
req_op <= op;
|
||||
req_go <= go;
|
||||
|
||||
-- Version of the row number that is valid one cycle earlier
|
||||
-- in the cases where we need to read the cache data BRAM.
|
||||
@@ -928,15 +943,15 @@ begin
|
||||
end if;
|
||||
end loop;
|
||||
|
||||
d_out.valid <= '0';
|
||||
d_out.valid <= r1.ls_valid;
|
||||
d_out.data <= data_out;
|
||||
d_out.store_done <= '0';
|
||||
d_out.error <= '0';
|
||||
d_out.cache_paradox <= '0';
|
||||
d_out.store_done <= not r1.stcx_fail;
|
||||
d_out.error <= r1.ls_error;
|
||||
d_out.cache_paradox <= r1.cache_paradox;
|
||||
|
||||
-- Outputs to MMU
|
||||
m_out.done <= r1.tlbie_done;
|
||||
m_out.err <= '0';
|
||||
m_out.done <= r1.mmu_done;
|
||||
m_out.err <= r1.mmu_error;
|
||||
m_out.data <= data_out;
|
||||
|
||||
-- We have a valid load or store hit or we just completed a slow
|
||||
@@ -962,47 +977,32 @@ begin
|
||||
-- Load hit case is the standard path
|
||||
if r1.hit_load_valid = '1' then
|
||||
report "completing load hit data=" & to_hstring(data_out);
|
||||
d_out.valid <= '1';
|
||||
end if;
|
||||
|
||||
-- error cases complete without stalling
|
||||
if r1.error_done = '1' then
|
||||
if r1.ls_error = '1' then
|
||||
report "completing ld/st with error";
|
||||
d_out.error <= '1';
|
||||
d_out.cache_paradox <= r1.cache_paradox;
|
||||
d_out.valid <= '1';
|
||||
end if;
|
||||
|
||||
-- Slow ops (load miss, NC, stores)
|
||||
if r1.slow_valid = '1' then
|
||||
d_out.store_done <= '1';
|
||||
report "completing store or load miss data=" & to_hstring(data_out);
|
||||
d_out.valid <= '1';
|
||||
end if;
|
||||
|
||||
if r1.stcx_fail = '1' then
|
||||
d_out.store_done <= '0';
|
||||
d_out.valid <= '1';
|
||||
end if;
|
||||
|
||||
else
|
||||
-- Request came from MMU
|
||||
if r1.hit_load_valid = '1' then
|
||||
report "completing load hit to MMU, data=" & to_hstring(m_out.data);
|
||||
m_out.done <= '1';
|
||||
end if;
|
||||
|
||||
-- error cases complete without stalling
|
||||
if r1.error_done = '1' then
|
||||
if r1.mmu_error = '1' then
|
||||
report "completing MMU ld with error";
|
||||
m_out.err <= '1';
|
||||
m_out.done <= '1';
|
||||
end if;
|
||||
|
||||
-- Slow ops (i.e. load miss)
|
||||
if r1.slow_valid = '1' then
|
||||
report "completing MMU load miss, data=" & to_hstring(m_out.data);
|
||||
m_out.done <= '1';
|
||||
end if;
|
||||
end if;
|
||||
|
||||
@@ -1079,7 +1079,7 @@ begin
|
||||
wr_addr <= std_ulogic_vector(to_unsigned(r1.store_row, ROW_BITS));
|
||||
wr_sel <= (others => '1');
|
||||
|
||||
if r1.state = RELOAD_WAIT_ACK and wishbone_in.ack = '1' and r1.store_way = i then
|
||||
if r1.state = RELOAD_WAIT_ACK and wishbone_in.ack = '1' and replace_way = i then
|
||||
do_write <= '1';
|
||||
end if;
|
||||
end if;
|
||||
@@ -1113,20 +1113,28 @@ begin
|
||||
end if;
|
||||
|
||||
-- Fast path for load/store hits. Set signals for the writeback controls.
|
||||
r1.hit_way <= req_hit_way;
|
||||
r1.hit_index <= req_index;
|
||||
if req_op = OP_LOAD_HIT then
|
||||
r1.hit_way <= req_hit_way;
|
||||
r1.hit_load_valid <= '1';
|
||||
else
|
||||
r1.hit_load_valid <= '0';
|
||||
end if;
|
||||
if req_op = OP_LOAD_HIT or req_op = OP_STORE_HIT then
|
||||
r1.cache_hit <= '1';
|
||||
else
|
||||
r1.cache_hit <= '0';
|
||||
end if;
|
||||
|
||||
if req_op = OP_BAD then
|
||||
report "Signalling ld/st error valid_ra=" & std_ulogic'image(valid_ra) &
|
||||
" rc_ok=" & std_ulogic'image(rc_ok) & " perm_ok=" & std_ulogic'image(perm_ok);
|
||||
r1.error_done <= '1';
|
||||
r1.ls_error <= not r0.mmu_req;
|
||||
r1.mmu_error <= r0.mmu_req;
|
||||
r1.cache_paradox <= access_ok;
|
||||
else
|
||||
r1.error_done <= '0';
|
||||
r1.ls_error <= '0';
|
||||
r1.mmu_error <= '0';
|
||||
r1.cache_paradox <= '0';
|
||||
end if;
|
||||
|
||||
@@ -1136,8 +1144,11 @@ begin
|
||||
r1.stcx_fail <= '0';
|
||||
end if;
|
||||
|
||||
-- complete tlbies and TLB loads in the third cycle
|
||||
r1.tlbie_done <= r0_valid and (r0.tlbie or r0.tlbld);
|
||||
-- Record TLB hit information for updating TLB PLRU
|
||||
r1.tlb_hit <= tlb_hit;
|
||||
r1.tlb_hit_way <= tlb_hit_way;
|
||||
r1.tlb_hit_index <= tlb_req_index;
|
||||
|
||||
end if;
|
||||
end process;
|
||||
|
||||
@@ -1179,7 +1190,7 @@ begin
|
||||
r1.forward_data1 <= wishbone_in.dat;
|
||||
end if;
|
||||
r1.forward_sel1 <= (others => '1');
|
||||
r1.forward_way1 <= r1.store_way;
|
||||
r1.forward_way1 <= replace_way;
|
||||
r1.forward_row1 <= r1.store_row;
|
||||
r1.forward_valid1 <= '0';
|
||||
end if;
|
||||
@@ -1194,6 +1205,8 @@ begin
|
||||
r1.slow_valid <= '0';
|
||||
r1.wb.cyc <= '0';
|
||||
r1.wb.stb <= '0';
|
||||
r1.ls_valid <= '0';
|
||||
r1.mmu_done <= '0';
|
||||
|
||||
-- Not useful normally but helps avoiding tons of sim warnings
|
||||
r1.wb.adr <= (others => '0');
|
||||
@@ -1201,15 +1214,29 @@ begin
|
||||
-- One cycle pulses reset
|
||||
r1.slow_valid <= '0';
|
||||
r1.write_bram <= '0';
|
||||
r1.inc_acks <= '0';
|
||||
r1.dec_acks <= '0';
|
||||
|
||||
r1.ls_valid <= '0';
|
||||
-- complete tlbies and TLB loads in the third cycle
|
||||
r1.mmu_done <= r0_valid and (r0.tlbie or r0.tlbld);
|
||||
if req_op = OP_LOAD_HIT or req_op = OP_STCX_FAIL then
|
||||
if r0.mmu_req = '0' then
|
||||
r1.ls_valid <= '1';
|
||||
else
|
||||
r1.mmu_done <= '1';
|
||||
end if;
|
||||
end if;
|
||||
|
||||
if r1.write_tag = '1' then
|
||||
-- Store new tag in selected way
|
||||
for i in 0 to NUM_WAYS-1 loop
|
||||
if i = r1.store_way then
|
||||
if i = replace_way then
|
||||
cache_tags(r1.store_index)((i + 1) * TAG_WIDTH - 1 downto i * TAG_WIDTH) <=
|
||||
(TAG_WIDTH - 1 downto TAG_BITS => '0') & r1.reload_tag;
|
||||
end if;
|
||||
end loop;
|
||||
r1.store_way <= replace_way;
|
||||
r1.write_tag <= '0';
|
||||
end if;
|
||||
|
||||
@@ -1219,12 +1246,23 @@ begin
|
||||
req := r1.req;
|
||||
else
|
||||
req.op := req_op;
|
||||
req.valid := req_go;
|
||||
req.mmu_req := r0.mmu_req;
|
||||
req.dcbz := r0.req.dcbz;
|
||||
req.real_addr := ra;
|
||||
req.data := r0.req.data;
|
||||
req.byte_sel := r0.req.byte_sel;
|
||||
-- Force data to 0 for dcbz
|
||||
if r0.req.dcbz = '0' then
|
||||
req.data := r0.req.data;
|
||||
else
|
||||
req.data := (others => '0');
|
||||
end if;
|
||||
-- Select all bytes for dcbz and for cacheable loads
|
||||
if r0.req.dcbz = '1' or (r0.req.load = '1' and r0.req.nc = '0') then
|
||||
req.byte_sel := (others => '1');
|
||||
else
|
||||
req.byte_sel := r0.req.byte_sel;
|
||||
end if;
|
||||
req.hit_way := req_hit_way;
|
||||
req.repl_way := replace_way;
|
||||
req.same_tag := req_same_tag;
|
||||
|
||||
-- Store the incoming request from r0, if it is a slow request
|
||||
@@ -1240,7 +1278,9 @@ begin
|
||||
case r1.state is
|
||||
when IDLE =>
|
||||
r1.wb.adr <= req.real_addr(r1.wb.adr'left downto 0);
|
||||
r1.dcbz <= '0';
|
||||
r1.wb.sel <= req.byte_sel;
|
||||
r1.wb.dat <= req.data;
|
||||
r1.dcbz <= req.dcbz;
|
||||
|
||||
-- Keep track of our index and way for subsequent stores.
|
||||
r1.store_index <= get_index(req.real_addr);
|
||||
@@ -1251,8 +1291,6 @@ begin
|
||||
|
||||
if req.op = OP_STORE_HIT then
|
||||
r1.store_way <= req.hit_way;
|
||||
else
|
||||
r1.store_way <= req.repl_way;
|
||||
end if;
|
||||
|
||||
-- Reset per-row valid bits, ready for handling OP_LOAD_MISS
|
||||
@@ -1269,11 +1307,9 @@ begin
|
||||
--
|
||||
report "cache miss real addr:" & to_hstring(req.real_addr) &
|
||||
" idx:" & integer'image(get_index(req.real_addr)) &
|
||||
" way:" & integer'image(req.repl_way) &
|
||||
" tag:" & to_hstring(get_tag(req.real_addr));
|
||||
|
||||
-- Start the wishbone cycle
|
||||
r1.wb.sel <= (others => '1');
|
||||
r1.wb.we <= '0';
|
||||
r1.wb.cyc <= '1';
|
||||
r1.wb.stb <= '1';
|
||||
@@ -1283,7 +1319,6 @@ begin
|
||||
r1.write_tag <= '1';
|
||||
|
||||
when OP_LOAD_NC =>
|
||||
r1.wb.sel <= req.byte_sel;
|
||||
r1.wb.cyc <= '1';
|
||||
r1.wb.stb <= '1';
|
||||
r1.wb.we <= '0';
|
||||
@@ -1291,27 +1326,25 @@ begin
|
||||
|
||||
when OP_STORE_HIT | OP_STORE_MISS =>
|
||||
if req.dcbz = '0' then
|
||||
r1.wb.sel <= req.byte_sel;
|
||||
r1.wb.dat <= req.data;
|
||||
r1.state <= STORE_WAIT_ACK;
|
||||
r1.acks_pending <= to_unsigned(1, 3);
|
||||
r1.full <= '0';
|
||||
r1.slow_valid <= '1';
|
||||
if req.mmu_req = '0' then
|
||||
r1.ls_valid <= '1';
|
||||
else
|
||||
r1.mmu_done <= '1';
|
||||
end if;
|
||||
if req.op = OP_STORE_HIT then
|
||||
r1.write_bram <= '1';
|
||||
end if;
|
||||
else
|
||||
-- dcbz is handled much like a load miss except
|
||||
-- that we are writing to memory instead of reading
|
||||
|
||||
-- Start the wishbone writes
|
||||
r1.wb.sel <= (others => '1');
|
||||
r1.wb.dat <= (others => '0');
|
||||
|
||||
-- Handle the rest like a load miss
|
||||
r1.state <= RELOAD_WAIT_ACK;
|
||||
r1.write_tag <= '1';
|
||||
r1.dcbz <= '1';
|
||||
if req.op = OP_STORE_MISS then
|
||||
r1.write_tag <= '1';
|
||||
end if;
|
||||
end if;
|
||||
r1.wb.we <= '1';
|
||||
r1.wb.cyc <= '1';
|
||||
@@ -1357,6 +1390,11 @@ begin
|
||||
r1.store_row = get_row(r1.req.real_addr) then
|
||||
r1.full <= '0';
|
||||
r1.slow_valid <= '1';
|
||||
if r1.mmu_req = '0' then
|
||||
r1.ls_valid <= '1';
|
||||
else
|
||||
r1.mmu_done <= '1';
|
||||
end if;
|
||||
r1.forward_sel <= (others => '1');
|
||||
r1.use_forward1 <= '1';
|
||||
end if;
|
||||
@@ -1379,15 +1417,26 @@ begin
|
||||
when STORE_WAIT_ACK =>
|
||||
stbs_done := r1.wb.stb = '0';
|
||||
acks := r1.acks_pending;
|
||||
if r1.inc_acks /= r1.dec_acks then
|
||||
if r1.inc_acks = '1' then
|
||||
acks := acks + 1;
|
||||
else
|
||||
acks := acks - 1;
|
||||
end if;
|
||||
end if;
|
||||
r1.acks_pending <= acks;
|
||||
-- Clear stb when slave accepted request
|
||||
if wishbone_in.stall = '0' then
|
||||
-- See if there is another store waiting to be done
|
||||
-- which is in the same real page.
|
||||
if acks < 7 and req.same_tag = '1' and
|
||||
(req.op = OP_STORE_MISS or req.op = OP_STORE_HIT) then
|
||||
r1.wb.adr <= req.real_addr(r1.wb.adr'left downto 0);
|
||||
if req.valid = '1' then
|
||||
r1.wb.adr(SET_SIZE_BITS - 1 downto 0) <=
|
||||
req.real_addr(SET_SIZE_BITS - 1 downto 0);
|
||||
r1.wb.dat <= req.data;
|
||||
r1.wb.sel <= req.byte_sel;
|
||||
end if;
|
||||
if acks < 7 and req.same_tag = '1' and
|
||||
(req.op = OP_STORE_MISS or req.op = OP_STORE_HIT) then
|
||||
r1.wb.stb <= '1';
|
||||
stbs_done := false;
|
||||
if req.op = OP_STORE_HIT then
|
||||
@@ -1395,7 +1444,10 @@ begin
|
||||
end if;
|
||||
r1.full <= '0';
|
||||
r1.slow_valid <= '1';
|
||||
acks := acks + 1;
|
||||
-- Store requests never come from the MMU
|
||||
r1.ls_valid <= '1';
|
||||
stbs_done := false;
|
||||
r1.inc_acks <= '1';
|
||||
else
|
||||
r1.wb.stb <= '0';
|
||||
stbs_done := true;
|
||||
@@ -1409,9 +1461,8 @@ begin
|
||||
r1.wb.cyc <= '0';
|
||||
r1.wb.stb <= '0';
|
||||
end if;
|
||||
acks := acks - 1;
|
||||
r1.dec_acks <= '1';
|
||||
end if;
|
||||
r1.acks_pending <= acks;
|
||||
|
||||
when NC_LOAD_WAIT_ACK =>
|
||||
-- Clear stb when slave accepted request
|
||||
@@ -1424,6 +1475,11 @@ begin
|
||||
r1.state <= IDLE;
|
||||
r1.full <= '0';
|
||||
r1.slow_valid <= '1';
|
||||
if r1.mmu_req = '0' then
|
||||
r1.ls_valid <= '1';
|
||||
else
|
||||
r1.mmu_done <= '1';
|
||||
end if;
|
||||
r1.forward_sel <= (others => '1');
|
||||
r1.use_forward1 <= '1';
|
||||
r1.wb.cyc <= '0';
|
||||
@@ -1434,21 +1490,25 @@ begin
|
||||
end if;
|
||||
end process;
|
||||
|
||||
dcache_log: process(clk)
|
||||
dc_log: if LOG_LENGTH > 0 generate
|
||||
signal log_data : std_ulogic_vector(19 downto 0);
|
||||
begin
|
||||
if rising_edge(clk) then
|
||||
log_data <= r1.wb.adr(5 downto 3) &
|
||||
wishbone_in.stall &
|
||||
wishbone_in.ack &
|
||||
r1.wb.stb & r1.wb.cyc &
|
||||
d_out.error &
|
||||
d_out.valid &
|
||||
std_ulogic_vector(to_unsigned(op_t'pos(req_op), 3)) &
|
||||
stall_out &
|
||||
std_ulogic_vector(to_unsigned(tlb_hit_way, 3)) &
|
||||
valid_ra &
|
||||
std_ulogic_vector(to_unsigned(state_t'pos(r1.state), 3));
|
||||
end if;
|
||||
end process;
|
||||
log_out <= log_data;
|
||||
dcache_log: process(clk)
|
||||
begin
|
||||
if rising_edge(clk) then
|
||||
log_data <= r1.wb.adr(5 downto 3) &
|
||||
wishbone_in.stall &
|
||||
wishbone_in.ack &
|
||||
r1.wb.stb & r1.wb.cyc &
|
||||
d_out.error &
|
||||
d_out.valid &
|
||||
std_ulogic_vector(to_unsigned(op_t'pos(req_op), 3)) &
|
||||
stall_out &
|
||||
std_ulogic_vector(to_unsigned(tlb_hit_way, 3)) &
|
||||
valid_ra &
|
||||
std_ulogic_vector(to_unsigned(state_t'pos(r1.state), 3));
|
||||
end if;
|
||||
end process;
|
||||
log_out <= log_data;
|
||||
end generate;
|
||||
end;
|
||||
|
||||
36
decode1.vhdl
36
decode1.vhdl
@@ -7,6 +7,10 @@ use work.common.all;
|
||||
use work.decode_types.all;
|
||||
|
||||
entity decode1 is
|
||||
generic (
|
||||
-- Non-zero to enable log data collection
|
||||
LOG_LENGTH : natural := 0
|
||||
);
|
||||
port (
|
||||
clk : in std_ulogic;
|
||||
rst : in std_ulogic;
|
||||
@@ -47,7 +51,7 @@ architecture behaviour of decode1 is
|
||||
15 => (ALU, OP_ADD, RA_OR_ZERO, CONST_SI_HI, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- addis
|
||||
28 => (ALU, OP_AND, NONE, CONST_UI, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', ONE, '0', '0'), -- andi.
|
||||
29 => (ALU, OP_AND, NONE, CONST_UI_HI, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', ONE, '0', '0'), -- andis.
|
||||
0 => (ALU, OP_ATTN, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- attn
|
||||
0 => (ALU, OP_ATTN, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- attn
|
||||
18 => (ALU, OP_B, NONE, CONST_LI, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '1', '0'), -- b
|
||||
16 => (ALU, OP_BC, SPR, CONST_BD, NONE, SPR , '1', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '1', '0'), -- bc
|
||||
11 => (ALU, OP_CMP, RA, CONST_SI, NONE, NONE, '0', '1', '1', '0', ONE, '0', NONE, '0', '0', '0', '0', '0', '1', NONE, '0', '0'), -- cmpi
|
||||
@@ -73,9 +77,9 @@ architecture behaviour of decode1 is
|
||||
45 => (LDST, OP_STORE, RA_OR_ZERO, CONST_SI, RS, NONE, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '1', '0', '0', '0', NONE, '0', '0'), -- sthu
|
||||
36 => (LDST, OP_STORE, RA_OR_ZERO, CONST_SI, RS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- stw
|
||||
37 => (LDST, OP_STORE, RA_OR_ZERO, CONST_SI, RS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '1', '0', '0', '0', NONE, '0', '0'), -- stwu
|
||||
8 => (ALU, OP_ADD, RA, CONST_SI, NONE, RT, '0', '0', '1', '0', ONE, '1', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- subfic
|
||||
2 => (ALU, OP_TRAP, RA, CONST_SI, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- tdi
|
||||
3 => (ALU, OP_TRAP, RA, CONST_SI, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', NONE, '0', '1'), -- twi
|
||||
8 => (ALU, OP_ADD, RA, CONST_SI, NONE, RT, '0', '0', '1', '0', ONE, '1', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- subfic
|
||||
2 => (ALU, OP_TRAP, RA, CONST_SI, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- tdi
|
||||
3 => (ALU, OP_TRAP, RA, CONST_SI, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', NONE, '0', '1'), -- twi
|
||||
26 => (ALU, OP_XOR, NONE, CONST_UI, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- xori
|
||||
27 => (ALU, OP_XOR, NONE, CONST_UI_HI, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- xoris
|
||||
others => illegal_inst
|
||||
@@ -357,8 +361,6 @@ architecture behaviour of decode1 is
|
||||
constant nop_instr : decode_rom_t := (ALU, OP_NOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0');
|
||||
constant fetch_fail_inst: decode_rom_t := (LDST, OP_FETCH_FAILED, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0');
|
||||
|
||||
signal log_data : std_ulogic_vector(12 downto 0);
|
||||
|
||||
begin
|
||||
decode1_0: process(clk)
|
||||
begin
|
||||
@@ -524,15 +526,19 @@ begin
|
||||
flush_out <= f.redirect;
|
||||
end process;
|
||||
|
||||
dec1_log : process(clk)
|
||||
d1_log: if LOG_LENGTH > 0 generate
|
||||
signal log_data : std_ulogic_vector(12 downto 0);
|
||||
begin
|
||||
if rising_edge(clk) then
|
||||
log_data <= std_ulogic_vector(to_unsigned(insn_type_t'pos(r.decode.insn_type), 6)) &
|
||||
r.nia(5 downto 2) &
|
||||
std_ulogic_vector(to_unsigned(unit_t'pos(r.decode.unit), 2)) &
|
||||
r.valid;
|
||||
end if;
|
||||
end process;
|
||||
log_out <= log_data;
|
||||
dec1_log : process(clk)
|
||||
begin
|
||||
if rising_edge(clk) then
|
||||
log_data <= std_ulogic_vector(to_unsigned(insn_type_t'pos(r.decode.insn_type), 6)) &
|
||||
r.nia(5 downto 2) &
|
||||
std_ulogic_vector(to_unsigned(unit_t'pos(r.decode.unit), 2)) &
|
||||
r.valid;
|
||||
end if;
|
||||
end process;
|
||||
log_out <= log_data;
|
||||
end generate;
|
||||
|
||||
end architecture behaviour;
|
||||
|
||||
34
decode2.vhdl
34
decode2.vhdl
@@ -10,7 +10,9 @@ use work.insn_helpers.all;
|
||||
|
||||
entity decode2 is
|
||||
generic (
|
||||
EX1_BYPASS : boolean := true
|
||||
EX1_BYPASS : boolean := true;
|
||||
-- Non-zero to enable log data collection
|
||||
LOG_LENGTH : natural := 0
|
||||
);
|
||||
port (
|
||||
clk : in std_ulogic;
|
||||
@@ -47,8 +49,6 @@ architecture behaviour of decode2 is
|
||||
|
||||
signal deferred : std_ulogic;
|
||||
|
||||
signal log_data : std_ulogic_vector(9 downto 0);
|
||||
|
||||
type decode_input_reg_t is record
|
||||
reg_valid : std_ulogic;
|
||||
reg : gspr_index_t;
|
||||
@@ -415,18 +415,22 @@ begin
|
||||
e_out <= r.e;
|
||||
end process;
|
||||
|
||||
dec2_log : process(clk)
|
||||
d2_log: if LOG_LENGTH > 0 generate
|
||||
signal log_data : std_ulogic_vector(9 downto 0);
|
||||
begin
|
||||
if rising_edge(clk) then
|
||||
log_data <= r.e.nia(5 downto 2) &
|
||||
r.e.valid &
|
||||
stopped_out &
|
||||
stall_out &
|
||||
r.e.bypass_data3 &
|
||||
r.e.bypass_data2 &
|
||||
r.e.bypass_data1;
|
||||
end if;
|
||||
end process;
|
||||
log_out <= log_data;
|
||||
dec2_log : process(clk)
|
||||
begin
|
||||
if rising_edge(clk) then
|
||||
log_data <= r.e.nia(5 downto 2) &
|
||||
r.e.valid &
|
||||
stopped_out &
|
||||
stall_out &
|
||||
r.e.bypass_data3 &
|
||||
r.e.bypass_data2 &
|
||||
r.e.bypass_data1;
|
||||
end if;
|
||||
end process;
|
||||
log_out <= log_data;
|
||||
end generate;
|
||||
|
||||
end architecture behaviour;
|
||||
|
||||
@@ -12,7 +12,9 @@ use work.ppc_fx_insns.all;
|
||||
|
||||
entity execute1 is
|
||||
generic (
|
||||
EX1_BYPASS : boolean := true
|
||||
EX1_BYPASS : boolean := true;
|
||||
-- Non-zero to enable log data collection
|
||||
LOG_LENGTH : natural := 0
|
||||
);
|
||||
port (
|
||||
clk : in std_ulogic;
|
||||
@@ -97,7 +99,6 @@ architecture behaviour of execute1 is
|
||||
-- signals for logging
|
||||
signal exception_log : std_ulogic;
|
||||
signal irq_valid_log : std_ulogic;
|
||||
signal log_data : std_ulogic_vector(14 downto 0);
|
||||
|
||||
type privilege_level is (USER, SUPER);
|
||||
type op_privilege_array is array(insn_type_t) of privilege_level;
|
||||
@@ -619,12 +620,12 @@ begin
|
||||
end loop;
|
||||
else
|
||||
-- trap instructions (tw, twi, td, tdi)
|
||||
v.f.redirect_nia := std_logic_vector(to_unsigned(16#700#, 64));
|
||||
-- set bit 46 to say trap occurred
|
||||
ctrl_tmp.srr1(63 - 46) <= '1';
|
||||
if or (trapval and insn_to(e_in.insn)) = '1' then
|
||||
-- generate trap-type program interrupt
|
||||
exception := '1';
|
||||
v.f.redirect_nia := std_logic_vector(to_unsigned(16#700#, 64));
|
||||
-- set bit 46 to say trap occurred
|
||||
ctrl_tmp.srr1(63 - 46) <= '1';
|
||||
report "trap";
|
||||
end if;
|
||||
end if;
|
||||
@@ -1083,21 +1084,25 @@ begin
|
||||
irq_valid_log <= irq_valid;
|
||||
end process;
|
||||
|
||||
ex1_log : process(clk)
|
||||
e1_log: if LOG_LENGTH > 0 generate
|
||||
signal log_data : std_ulogic_vector(14 downto 0);
|
||||
begin
|
||||
if rising_edge(clk) then
|
||||
log_data <= ctrl.msr(MSR_EE) & ctrl.msr(MSR_PR) &
|
||||
ctrl.msr(MSR_IR) & ctrl.msr(MSR_DR) &
|
||||
exception_log &
|
||||
irq_valid_log &
|
||||
std_ulogic_vector(to_unsigned(irq_state_t'pos(ctrl.irq_state), 1)) &
|
||||
"000" &
|
||||
r.e.write_enable &
|
||||
r.e.valid &
|
||||
f_out.redirect &
|
||||
r.busy &
|
||||
flush_out;
|
||||
end if;
|
||||
end process;
|
||||
log_out <= log_data;
|
||||
ex1_log : process(clk)
|
||||
begin
|
||||
if rising_edge(clk) then
|
||||
log_data <= ctrl.msr(MSR_EE) & ctrl.msr(MSR_PR) &
|
||||
ctrl.msr(MSR_IR) & ctrl.msr(MSR_DR) &
|
||||
exception_log &
|
||||
irq_valid_log &
|
||||
std_ulogic_vector(to_unsigned(irq_state_t'pos(ctrl.irq_state), 1)) &
|
||||
"000" &
|
||||
r.e.write_enable &
|
||||
r.e.valid &
|
||||
f_out.redirect &
|
||||
r.busy &
|
||||
flush_out;
|
||||
end if;
|
||||
end process;
|
||||
log_out <= log_data;
|
||||
end generate;
|
||||
end architecture behaviour;
|
||||
|
||||
92
icache.vhdl
92
icache.vhdl
@@ -47,7 +47,9 @@ entity icache is
|
||||
-- L1 ITLB log_2(page_size)
|
||||
TLB_LG_PGSZ : positive := 12;
|
||||
-- Number of real address bits that we store
|
||||
REAL_ADDR_BITS : positive := 56
|
||||
REAL_ADDR_BITS : positive := 56;
|
||||
-- Non-zero to enable log data collection
|
||||
LOG_LENGTH : natural := 0
|
||||
);
|
||||
port (
|
||||
clk : in std_ulogic;
|
||||
@@ -207,9 +209,6 @@ architecture rtl of icache is
|
||||
signal access_ok : std_ulogic;
|
||||
signal use_previous : std_ulogic;
|
||||
|
||||
-- Output data to logger
|
||||
signal log_data : std_ulogic_vector(53 downto 0);
|
||||
|
||||
-- Cache RAM interface
|
||||
type cache_ram_out_t is array(way_t) of cache_row_t;
|
||||
signal cache_out : cache_ram_out_t;
|
||||
@@ -379,7 +378,7 @@ begin
|
||||
begin
|
||||
do_read <= not (stall_in or use_previous);
|
||||
do_write <= '0';
|
||||
if wishbone_in.ack = '1' and r.store_way = i then
|
||||
if wishbone_in.ack = '1' and replace_way = i then
|
||||
do_write <= '1';
|
||||
end if;
|
||||
cache_out(i) <= dout;
|
||||
@@ -413,15 +412,15 @@ begin
|
||||
lru => plru_out
|
||||
);
|
||||
|
||||
process(req_index, req_is_hit, req_hit_way, req_is_hit, plru_out)
|
||||
process(all)
|
||||
begin
|
||||
-- PLRU interface
|
||||
if req_is_hit = '1' and req_index = i then
|
||||
plru_acc_en <= req_is_hit;
|
||||
if get_index(r.hit_nia) = i then
|
||||
plru_acc_en <= r.hit_valid;
|
||||
else
|
||||
plru_acc_en <= '0';
|
||||
end if;
|
||||
plru_acc <= std_ulogic_vector(to_unsigned(req_hit_way, WAY_BITS));
|
||||
plru_acc <= std_ulogic_vector(to_unsigned(r.hit_way, WAY_BITS));
|
||||
plru_victim(i) <= plru_out;
|
||||
end process;
|
||||
end generate;
|
||||
@@ -531,8 +530,12 @@ begin
|
||||
end if;
|
||||
req_hit_way <= hit_way;
|
||||
|
||||
-- The way to replace on a miss
|
||||
replace_way <= to_integer(unsigned(plru_victim(req_index)));
|
||||
-- The way to replace on a miss
|
||||
if r.state = CLR_TAG then
|
||||
replace_way <= to_integer(unsigned(plru_victim(r.store_index)));
|
||||
else
|
||||
replace_way <= r.store_way;
|
||||
end if;
|
||||
|
||||
-- Output instruction from current cache row
|
||||
--
|
||||
@@ -642,7 +645,6 @@ begin
|
||||
|
||||
-- Keep track of our index and way for subsequent stores
|
||||
r.store_index <= req_index;
|
||||
r.store_way <= replace_way;
|
||||
r.store_row <= get_row(req_laddr);
|
||||
r.store_tag <= req_tag;
|
||||
r.store_valid <= '1';
|
||||
@@ -661,12 +663,15 @@ begin
|
||||
|
||||
when CLR_TAG | WAIT_ACK =>
|
||||
if r.state = CLR_TAG then
|
||||
-- Get victim way from plru
|
||||
r.store_way <= replace_way;
|
||||
|
||||
-- Force misses on that way while reloading that line
|
||||
cache_valids(req_index)(r.store_way) <= '0';
|
||||
cache_valids(req_index)(replace_way) <= '0';
|
||||
|
||||
-- Store new tag in selected way
|
||||
for i in 0 to NUM_WAYS-1 loop
|
||||
if i = r.store_way then
|
||||
if i = replace_way then
|
||||
tagset := cache_tags(r.store_index);
|
||||
write_tag(i, tagset, r.store_tag);
|
||||
cache_tags(r.store_index) <= tagset;
|
||||
@@ -702,7 +707,7 @@ begin
|
||||
r.wb.cyc <= '0';
|
||||
|
||||
-- Cache line is now valid
|
||||
cache_valids(r.store_index)(r.store_way) <= r.store_valid and not inval_in;
|
||||
cache_valids(r.store_index)(replace_way) <= r.store_valid and not inval_in;
|
||||
|
||||
-- We are done
|
||||
r.state <= IDLE;
|
||||
@@ -723,35 +728,36 @@ begin
|
||||
end if;
|
||||
end process;
|
||||
|
||||
data_log: process(clk)
|
||||
variable lway: way_t;
|
||||
variable wstate: std_ulogic;
|
||||
icache_log: if LOG_LENGTH > 0 generate
|
||||
-- Output data to logger
|
||||
signal log_data : std_ulogic_vector(53 downto 0);
|
||||
begin
|
||||
if rising_edge(clk) then
|
||||
if req_is_hit then
|
||||
data_log: process(clk)
|
||||
variable lway: way_t;
|
||||
variable wstate: std_ulogic;
|
||||
begin
|
||||
if rising_edge(clk) then
|
||||
lway := req_hit_way;
|
||||
else
|
||||
lway := replace_way;
|
||||
wstate := '0';
|
||||
if r.state /= IDLE then
|
||||
wstate := '1';
|
||||
end if;
|
||||
log_data <= i_out.valid &
|
||||
i_out.insn &
|
||||
wishbone_in.ack &
|
||||
r.wb.adr(5 downto 3) &
|
||||
r.wb.stb & r.wb.cyc &
|
||||
wishbone_in.stall &
|
||||
stall_out &
|
||||
r.fetch_failed &
|
||||
r.hit_nia(5 downto 2) &
|
||||
wstate &
|
||||
std_ulogic_vector(to_unsigned(lway, 3)) &
|
||||
req_is_hit & req_is_miss &
|
||||
access_ok &
|
||||
ra_valid;
|
||||
end if;
|
||||
wstate := '0';
|
||||
if r.state /= IDLE then
|
||||
wstate := '1';
|
||||
end if;
|
||||
log_data <= i_out.valid &
|
||||
i_out.insn &
|
||||
wishbone_in.ack &
|
||||
r.wb.adr(5 downto 3) &
|
||||
r.wb.stb & r.wb.cyc &
|
||||
wishbone_in.stall &
|
||||
stall_out &
|
||||
r.fetch_failed &
|
||||
r.hit_nia(5 downto 2) &
|
||||
wstate &
|
||||
std_ulogic_vector(to_unsigned(lway, 3)) &
|
||||
req_is_hit & req_is_miss &
|
||||
access_ok &
|
||||
ra_valid;
|
||||
end if;
|
||||
end process;
|
||||
log_out <= log_data;
|
||||
end process;
|
||||
log_out <= log_data;
|
||||
end generate;
|
||||
end;
|
||||
|
||||
278
loadstore1.vhdl
278
loadstore1.vhdl
@@ -10,6 +10,10 @@ use work.common.all;
|
||||
-- We calculate the address in the first cycle
|
||||
|
||||
entity loadstore1 is
|
||||
generic (
|
||||
-- Non-zero to enable log data collection
|
||||
LOG_LENGTH : natural := 0
|
||||
);
|
||||
port (
|
||||
clk : in std_ulogic;
|
||||
rst : in std_ulogic;
|
||||
@@ -40,10 +44,9 @@ architecture behave of loadstore1 is
|
||||
type state_t is (IDLE, -- ready for instruction
|
||||
SECOND_REQ, -- send 2nd request of unaligned xfer
|
||||
ACK_WAIT, -- waiting for ack from dcache
|
||||
LD_UPDATE, -- writing rA with computed addr on load
|
||||
MMU_LOOKUP, -- waiting for MMU to look up translation
|
||||
TLBIE_WAIT, -- waiting for MMU to finish doing a tlbie
|
||||
SPR_CMPLT -- complete a mf/tspr operation
|
||||
COMPLETE -- extra cycle to complete an operation
|
||||
);
|
||||
|
||||
type reg_stage_t is record
|
||||
@@ -69,12 +72,18 @@ architecture behave of loadstore1 is
|
||||
priv_mode : std_ulogic;
|
||||
state : state_t;
|
||||
dwords_done : std_ulogic;
|
||||
last_dword : std_ulogic;
|
||||
first_bytes : std_ulogic_vector(7 downto 0);
|
||||
second_bytes : std_ulogic_vector(7 downto 0);
|
||||
dar : std_ulogic_vector(63 downto 0);
|
||||
dsisr : std_ulogic_vector(31 downto 0);
|
||||
instr_fault : std_ulogic;
|
||||
sprval : std_ulogic_vector(63 downto 0);
|
||||
busy : std_ulogic;
|
||||
wait_dcache : std_ulogic;
|
||||
wait_mmu : std_ulogic;
|
||||
do_update : std_ulogic;
|
||||
extra_cycle : std_ulogic;
|
||||
end record;
|
||||
|
||||
type byte_sel_t is array(0 to 7) of std_ulogic;
|
||||
@@ -84,8 +93,6 @@ architecture behave of loadstore1 is
|
||||
signal r, rin : reg_stage_t;
|
||||
signal lsu_sum : std_ulogic_vector(63 downto 0);
|
||||
|
||||
signal log_data : std_ulogic_vector(9 downto 0);
|
||||
|
||||
-- Generate byte enables from sizes
|
||||
function length_to_sel(length : in std_logic_vector(3 downto 0)) return std_ulogic_vector is
|
||||
begin
|
||||
@@ -125,6 +132,8 @@ begin
|
||||
if rising_edge(clk) then
|
||||
if rst = '1' then
|
||||
r.state <= IDLE;
|
||||
r.busy <= '0';
|
||||
r.do_update <= '0';
|
||||
else
|
||||
r <= rin;
|
||||
end if;
|
||||
@@ -143,13 +152,14 @@ begin
|
||||
variable req : std_ulogic;
|
||||
variable busy : std_ulogic;
|
||||
variable addr : std_ulogic_vector(63 downto 0);
|
||||
variable maddr : std_ulogic_vector(63 downto 0);
|
||||
variable wdata : std_ulogic_vector(63 downto 0);
|
||||
variable write_enable : std_ulogic;
|
||||
variable do_update : std_ulogic;
|
||||
variable two_dwords : std_ulogic;
|
||||
variable done : std_ulogic;
|
||||
variable data_permuted : std_ulogic_vector(63 downto 0);
|
||||
variable data_trimmed : std_ulogic_vector(63 downto 0);
|
||||
variable store_data : std_ulogic_vector(63 downto 0);
|
||||
variable use_second : byte_sel_t;
|
||||
variable trim_ctl : trim_ctl_t;
|
||||
variable negative : std_ulogic;
|
||||
@@ -163,8 +173,6 @@ begin
|
||||
begin
|
||||
v := r;
|
||||
req := '0';
|
||||
byte_sel := (others => '0');
|
||||
addr := lsu_sum;
|
||||
v.mfspr := '0';
|
||||
mmu_mtspr := '0';
|
||||
itlb_fault := '0';
|
||||
@@ -173,8 +181,9 @@ begin
|
||||
mmureq := '0';
|
||||
|
||||
write_enable := '0';
|
||||
do_update := '0';
|
||||
two_dwords := or (r.second_bytes);
|
||||
|
||||
do_update := r.do_update;
|
||||
v.do_update := '0';
|
||||
|
||||
-- load data formatting
|
||||
byte_offset := unsigned(r.addr(2 downto 0));
|
||||
@@ -204,10 +213,10 @@ begin
|
||||
-- trim and sign-extend
|
||||
for i in 0 to 7 loop
|
||||
if i < to_integer(unsigned(r.length)) then
|
||||
if two_dwords = '1' then
|
||||
if r.dwords_done = '1' then
|
||||
trim_ctl(i) := '1' & not use_second(i);
|
||||
else
|
||||
trim_ctl(i) := not use_second(i) & '0';
|
||||
trim_ctl(i) := "10";
|
||||
end if;
|
||||
else
|
||||
trim_ctl(i) := '0' & (negative and r.sign_extend);
|
||||
@@ -224,121 +233,127 @@ begin
|
||||
end case;
|
||||
end loop;
|
||||
|
||||
-- Byte reversing and rotating for stores
|
||||
-- Done in the first cycle (when l_in.valid = 1)
|
||||
store_data := r.store_data;
|
||||
if l_in.valid = '1' then
|
||||
byte_offset := unsigned(lsu_sum(2 downto 0));
|
||||
brev_lenm1 := "000";
|
||||
if l_in.byte_reverse = '1' then
|
||||
brev_lenm1 := unsigned(l_in.length(2 downto 0)) - 1;
|
||||
end if;
|
||||
for i in 0 to 7 loop
|
||||
k := (to_unsigned(i, 3) - byte_offset) xor brev_lenm1;
|
||||
j := to_integer(k) * 8;
|
||||
store_data(i * 8 + 7 downto i * 8) := l_in.data(j + 7 downto j);
|
||||
end loop;
|
||||
end if;
|
||||
v.store_data := store_data;
|
||||
|
||||
-- compute (addr + 8) & ~7 for the second doubleword when unaligned
|
||||
next_addr := std_ulogic_vector(unsigned(r.addr(63 downto 3)) + 1) & "000";
|
||||
|
||||
-- Busy calculation.
|
||||
-- We need to minimize the delay from clock to busy valid because it
|
||||
-- gates the start of execution of the next instruction.
|
||||
busy := r.busy and not ((r.wait_dcache and d_in.valid) or (r.wait_mmu and m_in.done));
|
||||
v.busy := busy;
|
||||
|
||||
done := '0';
|
||||
if r.state /= IDLE and busy = '0' then
|
||||
done := '1';
|
||||
end if;
|
||||
exception := '0';
|
||||
|
||||
if r.dwords_done = '1' or r.state = SECOND_REQ then
|
||||
maddr := next_addr;
|
||||
byte_sel := r.second_bytes;
|
||||
else
|
||||
maddr := r.addr;
|
||||
byte_sel := r.first_bytes;
|
||||
end if;
|
||||
addr := maddr;
|
||||
|
||||
case r.state is
|
||||
when IDLE =>
|
||||
|
||||
when SECOND_REQ =>
|
||||
addr := next_addr;
|
||||
byte_sel := r.second_bytes;
|
||||
req := '1';
|
||||
v.state := ACK_WAIT;
|
||||
v.last_dword := '0';
|
||||
|
||||
when ACK_WAIT =>
|
||||
if d_in.valid = '1' then
|
||||
if d_in.error = '1' then
|
||||
-- dcache will discard the second request if it
|
||||
-- gets an error on the 1st of two requests
|
||||
if r.dwords_done = '1' then
|
||||
addr := next_addr;
|
||||
else
|
||||
addr := r.addr;
|
||||
end if;
|
||||
if d_in.cache_paradox = '1' then
|
||||
-- signal an interrupt straight away
|
||||
exception := '1';
|
||||
dsisr(63 - 38) := not r.load;
|
||||
-- XXX there is no architected bit for this
|
||||
dsisr(63 - 35) := d_in.cache_paradox;
|
||||
v.state := IDLE;
|
||||
else
|
||||
-- Look up the translation for TLB miss
|
||||
-- and also for permission error and RC error
|
||||
-- in case the PTE has been updated.
|
||||
mmureq := '1';
|
||||
v.state := MMU_LOOKUP;
|
||||
end if;
|
||||
if d_in.error = '1' then
|
||||
-- dcache will discard the second request if it
|
||||
-- gets an error on the 1st of two requests
|
||||
if d_in.cache_paradox = '1' then
|
||||
-- signal an interrupt straight away
|
||||
exception := '1';
|
||||
dsisr(63 - 38) := not r.load;
|
||||
-- XXX there is no architected bit for this
|
||||
dsisr(63 - 35) := d_in.cache_paradox;
|
||||
else
|
||||
if two_dwords = '1' and r.dwords_done = '0' then
|
||||
v.dwords_done := '1';
|
||||
if r.load = '1' then
|
||||
v.load_data := data_permuted;
|
||||
end if;
|
||||
else
|
||||
write_enable := r.load;
|
||||
if r.load = '1' and r.update = '1' then
|
||||
-- loads with rA update need an extra cycle
|
||||
v.state := LD_UPDATE;
|
||||
else
|
||||
-- stores write back rA update in this cycle
|
||||
do_update := r.update;
|
||||
done := '1';
|
||||
v.state := IDLE;
|
||||
end if;
|
||||
end if;
|
||||
-- Look up the translation for TLB miss
|
||||
-- and also for permission error and RC error
|
||||
-- in case the PTE has been updated.
|
||||
mmureq := '1';
|
||||
v.state := MMU_LOOKUP;
|
||||
end if;
|
||||
end if;
|
||||
if d_in.valid = '1' then
|
||||
if r.last_dword = '0' then
|
||||
v.dwords_done := '1';
|
||||
v.last_dword := '1';
|
||||
if r.load = '1' then
|
||||
v.load_data := data_permuted;
|
||||
end if;
|
||||
else
|
||||
write_enable := r.load;
|
||||
if r.extra_cycle = '1' then
|
||||
-- loads with rA update need an extra cycle
|
||||
v.state := COMPLETE;
|
||||
v.do_update := r.update;
|
||||
else
|
||||
-- stores write back rA update in this cycle
|
||||
do_update := r.update;
|
||||
end if;
|
||||
v.busy := '0';
|
||||
end if;
|
||||
end if;
|
||||
-- r.wait_dcache gets set one cycle after we come into ACK_WAIT state,
|
||||
-- which is OK because the dcache always takes at least two cycles.
|
||||
v.wait_dcache := r.last_dword and not r.extra_cycle;
|
||||
|
||||
when MMU_LOOKUP =>
|
||||
if r.dwords_done = '1' then
|
||||
addr := next_addr;
|
||||
byte_sel := r.second_bytes;
|
||||
else
|
||||
addr := r.addr;
|
||||
byte_sel := r.first_bytes;
|
||||
end if;
|
||||
if m_in.done = '1' then
|
||||
if m_in.invalid = '0' and m_in.perm_error = '0' and m_in.rc_error = '0' and
|
||||
m_in.badtree = '0' and m_in.segerr = '0' then
|
||||
if r.instr_fault = '0' then
|
||||
-- retry the request now that the MMU has installed a TLB entry
|
||||
req := '1';
|
||||
if two_dwords = '1' and r.dwords_done = '0' then
|
||||
v.state := SECOND_REQ;
|
||||
else
|
||||
v.state := ACK_WAIT;
|
||||
end if;
|
||||
if r.instr_fault = '0' then
|
||||
-- retry the request now that the MMU has installed a TLB entry
|
||||
req := '1';
|
||||
if r.last_dword = '0' then
|
||||
v.state := SECOND_REQ;
|
||||
else
|
||||
-- nothing to do, the icache retries automatically
|
||||
done := '1';
|
||||
v.state := IDLE;
|
||||
v.state := ACK_WAIT;
|
||||
end if;
|
||||
else
|
||||
exception := '1';
|
||||
dsisr(63 - 33) := m_in.invalid;
|
||||
dsisr(63 - 36) := m_in.perm_error;
|
||||
dsisr(63 - 38) := not r.load;
|
||||
dsisr(63 - 44) := m_in.badtree;
|
||||
dsisr(63 - 45) := m_in.rc_error;
|
||||
v.state := IDLE;
|
||||
end if;
|
||||
end if;
|
||||
if m_in.err = '1' then
|
||||
exception := '1';
|
||||
dsisr(63 - 33) := m_in.invalid;
|
||||
dsisr(63 - 36) := m_in.perm_error;
|
||||
dsisr(63 - 38) := not r.load;
|
||||
dsisr(63 - 44) := m_in.badtree;
|
||||
dsisr(63 - 45) := m_in.rc_error;
|
||||
end if;
|
||||
|
||||
when TLBIE_WAIT =>
|
||||
if m_in.done = '1' then
|
||||
-- tlbie is finished
|
||||
done := '1';
|
||||
v.state := IDLE;
|
||||
end if;
|
||||
|
||||
when LD_UPDATE =>
|
||||
do_update := '1';
|
||||
v.state := IDLE;
|
||||
done := '1';
|
||||
|
||||
when SPR_CMPLT =>
|
||||
done := '1';
|
||||
v.state := IDLE;
|
||||
when COMPLETE =>
|
||||
|
||||
end case;
|
||||
|
||||
busy := '1';
|
||||
if r.state = IDLE or done = '1' then
|
||||
busy := '0';
|
||||
if done = '1' or exception = '1' then
|
||||
v.state := IDLE;
|
||||
v.busy := '0';
|
||||
end if;
|
||||
|
||||
-- Note that l_in.valid is gated with busy inside execute1
|
||||
@@ -349,6 +364,7 @@ begin
|
||||
v.tlbie := '0';
|
||||
v.instr_fault := '0';
|
||||
v.dwords_done := '0';
|
||||
v.last_dword := '1';
|
||||
v.write_reg := l_in.write_reg;
|
||||
v.length := l_in.length;
|
||||
v.byte_reverse := l_in.byte_reverse;
|
||||
@@ -361,6 +377,13 @@ begin
|
||||
v.nc := l_in.ci;
|
||||
v.virt_mode := l_in.virt_mode;
|
||||
v.priv_mode := l_in.priv_mode;
|
||||
v.wait_dcache := '0';
|
||||
v.wait_mmu := '0';
|
||||
v.do_update := '0';
|
||||
v.extra_cycle := '0';
|
||||
|
||||
addr := lsu_sum;
|
||||
maddr := l_in.addr2; -- address from RB for tlbie
|
||||
|
||||
-- XXX Temporary hack. Mark the op as non-cachable if the address
|
||||
-- is the form 0xc------- for a real-mode access.
|
||||
@@ -374,24 +397,14 @@ begin
|
||||
v.first_bytes := byte_sel;
|
||||
v.second_bytes := long_sel(15 downto 8);
|
||||
|
||||
-- Do byte reversing and rotating for stores in the first cycle
|
||||
byte_offset := unsigned(lsu_sum(2 downto 0));
|
||||
brev_lenm1 := "000";
|
||||
if l_in.byte_reverse = '1' then
|
||||
brev_lenm1 := unsigned(l_in.length(2 downto 0)) - 1;
|
||||
end if;
|
||||
for i in 0 to 7 loop
|
||||
k := (to_unsigned(i, 3) xor brev_lenm1) + byte_offset;
|
||||
j := to_integer(k) * 8;
|
||||
v.store_data(j + 7 downto j) := l_in.data(i * 8 + 7 downto i * 8);
|
||||
end loop;
|
||||
|
||||
case l_in.op is
|
||||
when OP_STORE =>
|
||||
req := '1';
|
||||
when OP_LOAD =>
|
||||
req := '1';
|
||||
v.load := '1';
|
||||
-- Allow an extra cycle for RA update on loads
|
||||
v.extra_cycle := l_in.update;
|
||||
when OP_DCBZ =>
|
||||
req := '1';
|
||||
v.dcbz := '1';
|
||||
@@ -399,6 +412,7 @@ begin
|
||||
mmureq := '1';
|
||||
v.tlbie := '1';
|
||||
v.state := TLBIE_WAIT;
|
||||
v.wait_mmu := '1';
|
||||
when OP_MFSPR =>
|
||||
v.mfspr := '1';
|
||||
-- partial decode on SPR number should be adequate given
|
||||
@@ -413,7 +427,7 @@ begin
|
||||
-- reading one of the SPRs in the MMU
|
||||
v.sprval := m_in.sprval;
|
||||
end if;
|
||||
v.state := SPR_CMPLT;
|
||||
v.state := COMPLETE;
|
||||
when OP_MTSPR =>
|
||||
if sprn(9) = '0' and sprn(5) = '0' then
|
||||
if sprn(0) = '0' then
|
||||
@@ -421,19 +435,20 @@ begin
|
||||
else
|
||||
v.dar := l_in.data;
|
||||
end if;
|
||||
v.state := SPR_CMPLT;
|
||||
v.state := COMPLETE;
|
||||
else
|
||||
-- writing one of the SPRs in the MMU
|
||||
mmu_mtspr := '1';
|
||||
v.state := TLBIE_WAIT;
|
||||
v.wait_mmu := '1';
|
||||
end if;
|
||||
when OP_FETCH_FAILED =>
|
||||
-- send it to the MMU to do the radix walk
|
||||
addr := l_in.nia;
|
||||
v.addr := l_in.nia;
|
||||
maddr := l_in.nia;
|
||||
v.instr_fault := '1';
|
||||
mmureq := '1';
|
||||
v.state := MMU_LOOKUP;
|
||||
v.wait_mmu := '1';
|
||||
when others =>
|
||||
assert false report "unknown op sent to loadstore1";
|
||||
end case;
|
||||
@@ -445,6 +460,8 @@ begin
|
||||
v.state := SECOND_REQ;
|
||||
end if;
|
||||
end if;
|
||||
|
||||
v.busy := req or mmureq or mmu_mtspr;
|
||||
end if;
|
||||
|
||||
-- Update outputs to dcache
|
||||
@@ -454,7 +471,7 @@ begin
|
||||
d_out.nc <= v.nc;
|
||||
d_out.reserve <= v.reserve;
|
||||
d_out.addr <= addr;
|
||||
d_out.data <= v.store_data;
|
||||
d_out.data <= store_data;
|
||||
d_out.byte_sel <= byte_sel;
|
||||
d_out.virt_mode <= v.virt_mode;
|
||||
d_out.priv_mode <= v.priv_mode;
|
||||
@@ -467,7 +484,7 @@ begin
|
||||
m_out.tlbie <= v.tlbie;
|
||||
m_out.mtspr <= mmu_mtspr;
|
||||
m_out.sprn <= sprn;
|
||||
m_out.addr <= addr;
|
||||
m_out.addr <= maddr;
|
||||
m_out.slbia <= l_in.insn(7);
|
||||
m_out.rs <= l_in.data;
|
||||
|
||||
@@ -513,18 +530,23 @@ begin
|
||||
|
||||
end process;
|
||||
|
||||
ls1_log: process(clk)
|
||||
l1_log: if LOG_LENGTH > 0 generate
|
||||
signal log_data : std_ulogic_vector(9 downto 0);
|
||||
begin
|
||||
if rising_edge(clk) then
|
||||
log_data <= e_out.busy &
|
||||
e_out.exception &
|
||||
l_out.valid &
|
||||
m_out.valid &
|
||||
d_out.valid &
|
||||
m_in.done &
|
||||
r.dwords_done &
|
||||
std_ulogic_vector(to_unsigned(state_t'pos(r.state), 3));
|
||||
end if;
|
||||
end process;
|
||||
log_out <= log_data;
|
||||
ls1_log: process(clk)
|
||||
begin
|
||||
if rising_edge(clk) then
|
||||
log_data <= e_out.busy &
|
||||
e_out.exception &
|
||||
l_out.valid &
|
||||
m_out.valid &
|
||||
d_out.valid &
|
||||
m_in.done &
|
||||
r.dwords_done &
|
||||
std_ulogic_vector(to_unsigned(state_t'pos(r.state), 3));
|
||||
end if;
|
||||
end process;
|
||||
log_out <= log_data;
|
||||
end generate;
|
||||
|
||||
end;
|
||||
|
||||
160
mmu.vhdl
160
mmu.vhdl
@@ -35,7 +35,7 @@ architecture behave of mmu is
|
||||
RADIX_LOOKUP,
|
||||
RADIX_READ_WAIT,
|
||||
RADIX_LOAD_TLB,
|
||||
RADIX_ERROR
|
||||
RADIX_FINISH
|
||||
);
|
||||
|
||||
type reg_stage_t is record
|
||||
@@ -51,6 +51,8 @@ architecture behave of mmu is
|
||||
pid : std_ulogic_vector(31 downto 0);
|
||||
-- internal state
|
||||
state : state_t;
|
||||
done : std_ulogic;
|
||||
err : std_ulogic;
|
||||
pgtbl0 : std_ulogic_vector(63 downto 0);
|
||||
pt0_valid : std_ulogic;
|
||||
pgtbl3 : std_ulogic_vector(63 downto 0);
|
||||
@@ -91,7 +93,10 @@ begin
|
||||
report "MMU got tlb miss for " & to_hstring(rin.addr);
|
||||
end if;
|
||||
if l_out.done = '1' then
|
||||
report "MMU completing op with invalid=" & std_ulogic'image(l_out.invalid) &
|
||||
report "MMU completing op without error";
|
||||
end if;
|
||||
if l_out.err = '1' then
|
||||
report "MMU completing op with err invalid=" & std_ulogic'image(l_out.invalid) &
|
||||
" badtree=" & std_ulogic'image(l_out.badtree);
|
||||
end if;
|
||||
if rin.state = RADIX_LOOKUP then
|
||||
@@ -176,7 +181,6 @@ begin
|
||||
mmu_1: process(all)
|
||||
variable v : reg_stage_t;
|
||||
variable dcreq : std_ulogic;
|
||||
variable done : std_ulogic;
|
||||
variable tlb_load : std_ulogic;
|
||||
variable itlb_load : std_ulogic;
|
||||
variable tlbie_req : std_ulogic;
|
||||
@@ -199,7 +203,8 @@ begin
|
||||
v := r;
|
||||
v.valid := '0';
|
||||
dcreq := '0';
|
||||
done := '0';
|
||||
v.done := '0';
|
||||
v.err := '0';
|
||||
v.invalid := '0';
|
||||
v.badtree := '0';
|
||||
v.segerror := '0';
|
||||
@@ -262,7 +267,7 @@ begin
|
||||
v.state := PROC_TBL_READ;
|
||||
elsif mbits = 0 then
|
||||
-- Use RPDS = 0 to disable radix tree walks
|
||||
v.state := RADIX_ERROR;
|
||||
v.state := RADIX_FINISH;
|
||||
v.invalid := '1';
|
||||
else
|
||||
v.state := SEGMENT_CHECK;
|
||||
@@ -291,8 +296,7 @@ begin
|
||||
|
||||
when TLB_WAIT =>
|
||||
if d_in.done = '1' then
|
||||
done := '1';
|
||||
v.state := IDLE;
|
||||
v.state := RADIX_FINISH;
|
||||
end if;
|
||||
|
||||
when PROC_TBL_READ =>
|
||||
@@ -302,32 +306,31 @@ begin
|
||||
|
||||
when PROC_TBL_WAIT =>
|
||||
if d_in.done = '1' then
|
||||
if d_in.err = '0' then
|
||||
if r.addr(63) = '1' then
|
||||
v.pgtbl3 := data;
|
||||
v.pt3_valid := '1';
|
||||
else
|
||||
v.pgtbl0 := data;
|
||||
v.pt0_valid := '1';
|
||||
end if;
|
||||
-- rts == radix tree size, # address bits being translated
|
||||
rts := unsigned('0' & data(62 downto 61) & data(7 downto 5));
|
||||
-- mbits == # address bits to index top level of tree
|
||||
mbits := unsigned('0' & data(4 downto 0));
|
||||
-- set v.shift to rts so that we can use finalmask for the segment check
|
||||
v.shift := rts;
|
||||
v.mask_size := mbits(4 downto 0);
|
||||
v.pgbase := data(55 downto 8) & x"00";
|
||||
if mbits = 0 then
|
||||
v.state := RADIX_ERROR;
|
||||
v.invalid := '1';
|
||||
else
|
||||
v.state := SEGMENT_CHECK;
|
||||
end if;
|
||||
if r.addr(63) = '1' then
|
||||
v.pgtbl3 := data;
|
||||
v.pt3_valid := '1';
|
||||
else
|
||||
v.state := RADIX_ERROR;
|
||||
v.badtree := '1';
|
||||
v.pgtbl0 := data;
|
||||
v.pt0_valid := '1';
|
||||
end if;
|
||||
-- rts == radix tree size, # address bits being translated
|
||||
rts := unsigned('0' & data(62 downto 61) & data(7 downto 5));
|
||||
-- mbits == # address bits to index top level of tree
|
||||
mbits := unsigned('0' & data(4 downto 0));
|
||||
-- set v.shift to rts so that we can use finalmask for the segment check
|
||||
v.shift := rts;
|
||||
v.mask_size := mbits(4 downto 0);
|
||||
v.pgbase := data(55 downto 8) & x"00";
|
||||
if mbits = 0 then
|
||||
v.state := RADIX_FINISH;
|
||||
v.invalid := '1';
|
||||
else
|
||||
v.state := SEGMENT_CHECK;
|
||||
end if;
|
||||
end if;
|
||||
if d_in.err = '1' then
|
||||
v.state := RADIX_FINISH;
|
||||
v.badtree := '1';
|
||||
end if;
|
||||
|
||||
when SEGMENT_CHECK =>
|
||||
@@ -335,10 +338,10 @@ begin
|
||||
v.shift := r.shift + (31 - 12) - mbits;
|
||||
nonzero := or(r.addr(61 downto 31) and not finalmask(30 downto 0));
|
||||
if r.addr(63) /= r.addr(62) or nonzero = '1' then
|
||||
v.state := RADIX_ERROR;
|
||||
v.state := RADIX_FINISH;
|
||||
v.segerror := '1';
|
||||
elsif mbits < 5 or mbits > 16 or mbits > (r.shift + (31 - 12)) then
|
||||
v.state := RADIX_ERROR;
|
||||
v.state := RADIX_FINISH;
|
||||
v.badtree := '1';
|
||||
else
|
||||
v.state := RADIX_LOOKUP;
|
||||
@@ -350,54 +353,53 @@ begin
|
||||
|
||||
when RADIX_READ_WAIT =>
|
||||
if d_in.done = '1' then
|
||||
if d_in.err = '0' then
|
||||
v.pde := data;
|
||||
-- test valid bit
|
||||
if data(63) = '1' then
|
||||
-- test leaf bit
|
||||
if data(62) = '1' then
|
||||
-- check permissions and RC bits
|
||||
perm_ok := '0';
|
||||
if r.priv = '1' or data(3) = '0' then
|
||||
if r.iside = '0' then
|
||||
perm_ok := data(1) or (data(2) and not r.store);
|
||||
else
|
||||
-- no IAMR, so no KUEP support for now
|
||||
-- deny execute permission if cache inhibited
|
||||
perm_ok := data(0) and not data(5);
|
||||
end if;
|
||||
end if;
|
||||
rc_ok := data(8) and (data(7) or not r.store);
|
||||
if perm_ok = '1' and rc_ok = '1' then
|
||||
v.state := RADIX_LOAD_TLB;
|
||||
v.pde := data;
|
||||
-- test valid bit
|
||||
if data(63) = '1' then
|
||||
-- test leaf bit
|
||||
if data(62) = '1' then
|
||||
-- check permissions and RC bits
|
||||
perm_ok := '0';
|
||||
if r.priv = '1' or data(3) = '0' then
|
||||
if r.iside = '0' then
|
||||
perm_ok := data(1) or (data(2) and not r.store);
|
||||
else
|
||||
v.state := RADIX_ERROR;
|
||||
v.perm_err := not perm_ok;
|
||||
-- permission error takes precedence over RC error
|
||||
v.rc_error := perm_ok;
|
||||
end if;
|
||||
else
|
||||
mbits := unsigned('0' & data(4 downto 0));
|
||||
if mbits < 5 or mbits > 16 or mbits > r.shift then
|
||||
v.state := RADIX_ERROR;
|
||||
v.badtree := '1';
|
||||
else
|
||||
v.shift := v.shift - mbits;
|
||||
v.mask_size := mbits(4 downto 0);
|
||||
v.pgbase := data(55 downto 8) & x"00";
|
||||
v.state := RADIX_LOOKUP;
|
||||
-- no IAMR, so no KUEP support for now
|
||||
-- deny execute permission if cache inhibited
|
||||
perm_ok := data(0) and not data(5);
|
||||
end if;
|
||||
end if;
|
||||
rc_ok := data(8) and (data(7) or not r.store);
|
||||
if perm_ok = '1' and rc_ok = '1' then
|
||||
v.state := RADIX_LOAD_TLB;
|
||||
else
|
||||
v.state := RADIX_FINISH;
|
||||
v.perm_err := not perm_ok;
|
||||
-- permission error takes precedence over RC error
|
||||
v.rc_error := perm_ok;
|
||||
end if;
|
||||
else
|
||||
-- non-present PTE, generate a DSI
|
||||
v.state := RADIX_ERROR;
|
||||
v.invalid := '1';
|
||||
mbits := unsigned('0' & data(4 downto 0));
|
||||
if mbits < 5 or mbits > 16 or mbits > r.shift then
|
||||
v.state := RADIX_FINISH;
|
||||
v.badtree := '1';
|
||||
else
|
||||
v.shift := v.shift - mbits;
|
||||
v.mask_size := mbits(4 downto 0);
|
||||
v.pgbase := data(55 downto 8) & x"00";
|
||||
v.state := RADIX_LOOKUP;
|
||||
end if;
|
||||
end if;
|
||||
else
|
||||
v.state := RADIX_ERROR;
|
||||
v.badtree := '1';
|
||||
-- non-present PTE, generate a DSI
|
||||
v.state := RADIX_FINISH;
|
||||
v.invalid := '1';
|
||||
end if;
|
||||
end if;
|
||||
if d_in.err = '1' then
|
||||
v.state := RADIX_FINISH;
|
||||
v.badtree := '1';
|
||||
end if;
|
||||
|
||||
when RADIX_LOAD_TLB =>
|
||||
tlb_load := '1';
|
||||
@@ -406,16 +408,19 @@ begin
|
||||
v.state := TLB_WAIT;
|
||||
else
|
||||
itlb_load := '1';
|
||||
done := '1';
|
||||
v.state := IDLE;
|
||||
end if;
|
||||
|
||||
when RADIX_ERROR =>
|
||||
done := '1';
|
||||
when RADIX_FINISH =>
|
||||
v.state := IDLE;
|
||||
|
||||
end case;
|
||||
|
||||
if v.state = RADIX_FINISH or (v.state = RADIX_LOAD_TLB and r.iside = '1') then
|
||||
v.err := v.invalid or v.badtree or v.segerror or v.perm_err or v.rc_error;
|
||||
v.done := not v.err;
|
||||
end if;
|
||||
|
||||
if r.addr(63) = '1' then
|
||||
effpid := x"00000000";
|
||||
else
|
||||
@@ -451,7 +456,8 @@ begin
|
||||
tlb_data := (others => '0');
|
||||
end if;
|
||||
|
||||
l_out.done <= done;
|
||||
l_out.done <= r.done;
|
||||
l_out.err <= r.err;
|
||||
l_out.invalid <= r.invalid;
|
||||
l_out.badtree <= r.badtree;
|
||||
l_out.segerr <= r.segerror;
|
||||
|
||||
@@ -7,7 +7,9 @@ use work.common.all;
|
||||
|
||||
entity register_file is
|
||||
generic (
|
||||
SIM : boolean := false
|
||||
SIM : boolean := false;
|
||||
-- Non-zero to enable log data collection
|
||||
LOG_LENGTH : natural := 0
|
||||
);
|
||||
port(
|
||||
clk : in std_logic;
|
||||
@@ -36,7 +38,6 @@ architecture behaviour of register_file is
|
||||
signal rd_port_b : std_ulogic_vector(63 downto 0);
|
||||
signal dbg_data : std_ulogic_vector(63 downto 0);
|
||||
signal dbg_ack : std_ulogic;
|
||||
signal log_data : std_ulogic_vector(70 downto 0);
|
||||
begin
|
||||
-- synchronous writes
|
||||
register_write_0: process(clk)
|
||||
@@ -134,13 +135,18 @@ begin
|
||||
sim_dump_done <= '0';
|
||||
end generate;
|
||||
|
||||
reg_log: process(clk)
|
||||
rf_log: if LOG_LENGTH > 0 generate
|
||||
signal log_data : std_ulogic_vector(70 downto 0);
|
||||
begin
|
||||
if rising_edge(clk) then
|
||||
log_data <= w_in.write_data &
|
||||
w_in.write_enable &
|
||||
w_in.write_reg;
|
||||
end if;
|
||||
end process;
|
||||
log_out <= log_data;
|
||||
reg_log: process(clk)
|
||||
begin
|
||||
if rising_edge(clk) then
|
||||
log_data <= w_in.write_data &
|
||||
w_in.write_enable &
|
||||
w_in.write_reg;
|
||||
end if;
|
||||
end process;
|
||||
log_out <= log_data;
|
||||
end generate;
|
||||
|
||||
end architecture behaviour;
|
||||
|
||||
Reference in New Issue
Block a user