mirror of
https://github.com/antonblanchard/microwatt.git
synced 2026-03-10 04:24:30 +00:00
dcache: Implement the dcbz instruction
This adds logic to dcache and loadstore1 to implement dcbz. For now it zeroes a single cache line (by default 64 bytes), not 128 bytes like IBM Power processors do. The dcbz operation is performed much like a load miss, except that we are writing zeroes to memory instead of reading. As each ack comes back, we write zeroes to the BRAM instead of data from memory. In this way we zero the line in memory and also zero the line of cache memory, establishing the line in the cache if it wasn't already resident. If it was already resident then we overwrite the existing line in the cache. Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
This commit is contained in:
@@ -236,6 +236,7 @@ package common is
|
||||
type Loadstore1ToDcacheType is record
|
||||
valid : std_ulogic;
|
||||
load : std_ulogic;
|
||||
dcbz : std_ulogic;
|
||||
nc : std_ulogic;
|
||||
reserve : std_ulogic;
|
||||
addr : std_ulogic_vector(63 downto 0);
|
||||
|
||||
66
dcache.vhdl
66
dcache.vhdl
@@ -581,8 +581,12 @@ begin
|
||||
wr_data <= r0.data;
|
||||
wr_sel <= r0.byte_sel;
|
||||
else
|
||||
-- Otherwise, we might be doing a reload
|
||||
wr_data <= wishbone_in.dat;
|
||||
-- Otherwise, we might be doing a reload or a DCBZ
|
||||
if r1.req.dcbz = '1' then
|
||||
wr_data <= (others => '0');
|
||||
else
|
||||
wr_data <= wishbone_in.dat;
|
||||
end if;
|
||||
wr_sel <= (others => '1');
|
||||
wr_addr <= std_ulogic_vector(to_unsigned(r1.store_row, ROW_BITS));
|
||||
end if;
|
||||
@@ -718,18 +722,54 @@ begin
|
||||
r1.wb.we <= '0';
|
||||
r1.state <= NC_LOAD_WAIT_ACK;
|
||||
|
||||
when OP_STORE_HIT | OP_STORE_MISS =>
|
||||
r1.wb.sel <= r0.byte_sel;
|
||||
r1.wb.adr <= r0.addr(r1.wb.adr'left downto 3) & "000";
|
||||
r1.wb.dat <= r0.data;
|
||||
if cancel_store = '0' then
|
||||
when OP_STORE_HIT | OP_STORE_MISS =>
|
||||
if r0.dcbz = '0' then
|
||||
r1.wb.sel <= r0.byte_sel;
|
||||
r1.wb.adr <= r0.addr(r1.wb.adr'left downto 3) & "000";
|
||||
r1.wb.dat <= r0.data;
|
||||
if cancel_store = '0' then
|
||||
r1.wb.cyc <= '1';
|
||||
r1.wb.stb <= '1';
|
||||
r1.wb.we <= '1';
|
||||
r1.state <= STORE_WAIT_ACK;
|
||||
else
|
||||
r1.stcx_fail <= '1';
|
||||
r1.state <= IDLE;
|
||||
end if;
|
||||
else
|
||||
-- dcbz is handled much like a load miss except
|
||||
-- that we are writing to memory instead of reading
|
||||
r1.store_index <= req_index;
|
||||
r1.store_row <= get_row(req_laddr);
|
||||
|
||||
if req_op = OP_STORE_HIT then
|
||||
r1.store_way <= req_hit_way;
|
||||
else
|
||||
r1.store_way <= replace_way;
|
||||
|
||||
-- Force misses on the victim way while zeroing
|
||||
cache_valids(req_index)(replace_way) <= '0';
|
||||
|
||||
-- Store new tag in selected way
|
||||
for i in 0 to NUM_WAYS-1 loop
|
||||
if i = replace_way then
|
||||
tagset := cache_tags(req_index);
|
||||
write_tag(i, tagset, req_tag);
|
||||
cache_tags(req_index) <= tagset;
|
||||
end if;
|
||||
end loop;
|
||||
end if;
|
||||
|
||||
-- Set up for wishbone writes
|
||||
r1.wb.adr <= req_laddr(r1.wb.adr'left downto 0);
|
||||
r1.wb.sel <= (others => '1');
|
||||
r1.wb.we <= '1';
|
||||
r1.wb.dat <= (others => '0');
|
||||
r1.wb.cyc <= '1';
|
||||
r1.wb.stb <= '1';
|
||||
r1.wb.we <= '1';
|
||||
r1.state <= STORE_WAIT_ACK;
|
||||
else
|
||||
r1.stcx_fail <= '1';
|
||||
r1.state <= IDLE;
|
||||
|
||||
-- Handle the rest like a load miss
|
||||
r1.state <= RELOAD_WAIT_ACK;
|
||||
end if;
|
||||
|
||||
-- OP_NONE and OP_BAD do nothing
|
||||
@@ -766,7 +806,7 @@ begin
|
||||
-- not idle, which we don't currently know how to deal
|
||||
-- with.
|
||||
--
|
||||
if r1.store_row = get_row(r1.req.addr) then
|
||||
if r1.store_row = get_row(r1.req.addr) and r1.req.dcbz = '0' then
|
||||
r1.slow_data <= wishbone_in.dat;
|
||||
end if;
|
||||
|
||||
|
||||
@@ -164,7 +164,7 @@ architecture behaviour of decode1 is
|
||||
2#0000110110# => (ALU, OP_NOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- dcbst
|
||||
2#0100010110# => (ALU, OP_NOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- dcbt
|
||||
2#0011110110# => (ALU, OP_NOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- dcbtst
|
||||
-- 2#1111110110# dcbz
|
||||
2#1111110110# => (LDST, OP_DCBZ, RA_OR_ZERO, RB, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- dcbz
|
||||
2#0110001001# => (ALU, OP_DIVE, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- divdeu
|
||||
2#1110001001# => (ALU, OP_DIVE, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- divdeuo
|
||||
2#0110001011# => (ALU, OP_DIVE, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- divweu
|
||||
|
||||
@@ -43,6 +43,7 @@ architecture behave of loadstore1 is
|
||||
type reg_stage_t is record
|
||||
-- latch most of the input request
|
||||
load : std_ulogic;
|
||||
dcbz : std_ulogic;
|
||||
addr : std_ulogic_vector(63 downto 0);
|
||||
store_data : std_ulogic_vector(63 downto 0);
|
||||
load_data : std_ulogic_vector(63 downto 0);
|
||||
@@ -198,8 +199,11 @@ begin
|
||||
when IDLE =>
|
||||
if l_in.valid = '1' then
|
||||
v.load := '0';
|
||||
v.dcbz := '0';
|
||||
if l_in.op = OP_LOAD then
|
||||
v.load := '1';
|
||||
elsif l_in.op = OP_DCBZ then
|
||||
v.dcbz := '1';
|
||||
end if;
|
||||
v.addr := lsu_sum;
|
||||
v.write_reg := l_in.write_reg;
|
||||
@@ -293,6 +297,7 @@ begin
|
||||
-- Update outputs to dcache
|
||||
d_out.valid <= req;
|
||||
d_out.load <= v.load;
|
||||
d_out.dcbz <= v.dcbz;
|
||||
d_out.nc <= v.nc;
|
||||
d_out.reserve <= v.reserve;
|
||||
d_out.addr <= addr;
|
||||
|
||||
Reference in New Issue
Block a user