1
0
mirror of https://github.com/antonblanchard/microwatt.git synced 2026-01-13 07:09:54 +00:00
antonblanchard.microwatt/cache_ram.vhdl
Benjamin Herrenschmidt 174378b190 dcache: Introduce an extra cycle latency to make timing
This makes the BRAMs use an output buffer, introducing an extra
cycle latency. Without this, Vivado won't make timing at 100Mhz.

We stash all the necessary response data in delayed latches, the
extra cycle is NOT a state in the state machine, thus it's fully
pipelined and doesn't involve stalling.

This introduces an extra non-pipelined cycle for loads with update
to avoid collision on the writeback output between the now delayed
load data and the register update. We could avoid it by moving
the register update in the pipeline bubble created by the extra
update state, but it's a bit trickier, so I leave that for a latter
optimization.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2019-10-23 12:30:49 +11:00

87 lines
2.1 KiB
VHDL

library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
use ieee.math_real.all;
entity cache_ram is
generic(
ROW_BITS : integer := 16;
WIDTH : integer := 64;
TRACE : boolean := false;
ADD_BUF : boolean := false
);
port(
clk : in std_logic;
rd_en : in std_logic;
rd_addr : in std_logic_vector(ROW_BITS - 1 downto 0);
rd_data : out std_logic_vector(WIDTH - 1 downto 0);
wr_en : in std_logic;
wr_sel : in std_logic_vector(WIDTH/8 - 1 downto 0);
wr_addr : in std_logic_vector(ROW_BITS - 1 downto 0);
wr_data : in std_logic_vector(WIDTH - 1 downto 0)
);
end cache_ram;
architecture rtl of cache_ram is
constant SIZE : integer := 2**ROW_BITS;
type ram_type is array (0 to SIZE - 1) of std_logic_vector(WIDTH - 1 downto 0);
signal ram : ram_type;
attribute ram_style : string;
attribute ram_style of ram : signal is "block";
attribute ram_decomp : string;
attribute ram_decomp of ram : signal is "power";
signal rd_data0 : std_logic_vector(WIDTH - 1 downto 0);
begin
process(clk)
variable lbit : integer range 0 to WIDTH - 1;
variable mbit : integer range 0 to WIDTH - 1;
variable widx : integer range 0 to SIZE - 1;
begin
if rising_edge(clk) then
if wr_en = '1' then
if TRACE then
report "write a:" & to_hstring(wr_addr) &
" sel:" & to_hstring(wr_sel) &
" dat:" & to_hstring(wr_data);
end if;
for i in 0 to WIDTH/8-1 loop
lbit := i * 8;
mbit := lbit + 7;
widx := to_integer(unsigned(wr_addr));
if wr_sel(i) = '1' then
ram(widx)(mbit downto lbit) <= wr_data(mbit downto lbit);
end if;
end loop;
end if;
if rd_en = '1' then
rd_data0 <= ram(to_integer(unsigned(rd_addr)));
if TRACE then
report "read a:" & to_hstring(rd_addr) &
" dat:" & to_hstring(ram(to_integer(unsigned(rd_addr))));
end if;
end if;
end if;
end process;
buf: if ADD_BUF generate
begin
process(clk)
begin
if rising_edge(clk) then
rd_data <= rd_data0;
end if;
end process;
end generate;
nobuf: if not ADD_BUF generate
begin
rd_data <= rd_data0;
end generate;
end;