mirror of
https://github.com/antonblanchard/microwatt.git
synced 2026-01-13 07:09:54 +00:00
This makes the BRAMs use an output buffer, introducing an extra cycle latency. Without this, Vivado won't make timing at 100Mhz. We stash all the necessary response data in delayed latches, the extra cycle is NOT a state in the state machine, thus it's fully pipelined and doesn't involve stalling. This introduces an extra non-pipelined cycle for loads with update to avoid collision on the writeback output between the now delayed load data and the register update. We could avoid it by moving the register update in the pipeline bubble created by the extra update state, but it's a bit trickier, so I leave that for a latter optimization. Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
87 lines
2.1 KiB
VHDL
87 lines
2.1 KiB
VHDL
library ieee;
|
|
use ieee.std_logic_1164.all;
|
|
use ieee.numeric_std.all;
|
|
use ieee.math_real.all;
|
|
|
|
entity cache_ram is
|
|
generic(
|
|
ROW_BITS : integer := 16;
|
|
WIDTH : integer := 64;
|
|
TRACE : boolean := false;
|
|
ADD_BUF : boolean := false
|
|
);
|
|
|
|
port(
|
|
clk : in std_logic;
|
|
rd_en : in std_logic;
|
|
rd_addr : in std_logic_vector(ROW_BITS - 1 downto 0);
|
|
rd_data : out std_logic_vector(WIDTH - 1 downto 0);
|
|
wr_en : in std_logic;
|
|
wr_sel : in std_logic_vector(WIDTH/8 - 1 downto 0);
|
|
wr_addr : in std_logic_vector(ROW_BITS - 1 downto 0);
|
|
wr_data : in std_logic_vector(WIDTH - 1 downto 0)
|
|
);
|
|
|
|
end cache_ram;
|
|
|
|
architecture rtl of cache_ram is
|
|
constant SIZE : integer := 2**ROW_BITS;
|
|
|
|
type ram_type is array (0 to SIZE - 1) of std_logic_vector(WIDTH - 1 downto 0);
|
|
signal ram : ram_type;
|
|
attribute ram_style : string;
|
|
attribute ram_style of ram : signal is "block";
|
|
attribute ram_decomp : string;
|
|
attribute ram_decomp of ram : signal is "power";
|
|
|
|
signal rd_data0 : std_logic_vector(WIDTH - 1 downto 0);
|
|
|
|
begin
|
|
process(clk)
|
|
variable lbit : integer range 0 to WIDTH - 1;
|
|
variable mbit : integer range 0 to WIDTH - 1;
|
|
variable widx : integer range 0 to SIZE - 1;
|
|
begin
|
|
if rising_edge(clk) then
|
|
if wr_en = '1' then
|
|
if TRACE then
|
|
report "write a:" & to_hstring(wr_addr) &
|
|
" sel:" & to_hstring(wr_sel) &
|
|
" dat:" & to_hstring(wr_data);
|
|
end if;
|
|
for i in 0 to WIDTH/8-1 loop
|
|
lbit := i * 8;
|
|
mbit := lbit + 7;
|
|
widx := to_integer(unsigned(wr_addr));
|
|
if wr_sel(i) = '1' then
|
|
ram(widx)(mbit downto lbit) <= wr_data(mbit downto lbit);
|
|
end if;
|
|
end loop;
|
|
end if;
|
|
if rd_en = '1' then
|
|
rd_data0 <= ram(to_integer(unsigned(rd_addr)));
|
|
if TRACE then
|
|
report "read a:" & to_hstring(rd_addr) &
|
|
" dat:" & to_hstring(ram(to_integer(unsigned(rd_addr))));
|
|
end if;
|
|
end if;
|
|
end if;
|
|
end process;
|
|
|
|
buf: if ADD_BUF generate
|
|
begin
|
|
process(clk)
|
|
begin
|
|
if rising_edge(clk) then
|
|
rd_data <= rd_data0;
|
|
end if;
|
|
end process;
|
|
end generate;
|
|
|
|
nobuf: if not ADD_BUF generate
|
|
begin
|
|
rd_data <= rd_data0;
|
|
end generate;
|
|
|
|
end;
|