mirror of
https://github.com/antonblanchard/microwatt.git
synced 2026-01-13 15:18:09 +00:00
register_file: Make read access to register file synchronous
With this, the register RAM is read synchronously using the addresses supplied by decode1. That means the register RAM can now be block RAM rather than LUT RAM. Debug accesses are done via the B port on cycles when decode1 indicates that there is no valid instruction or the instruction doesn't use a [F]RB operand. We latch the addresses being read in each cycle and use the same address next cycle if stalled. Data that is being written is latched and a multiplexer on each read port then supplies the latched write data if the read address for that port equals the write address. Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
This commit is contained in:
parent
06c13d4988
commit
1d7de2f1da
@ -280,6 +280,9 @@ package common is
|
||||
reg_1_addr : gspr_index_t;
|
||||
reg_2_addr : gspr_index_t;
|
||||
reg_3_addr : gspr_index_t;
|
||||
read_1_enable : std_ulogic;
|
||||
read_2_enable : std_ulogic;
|
||||
read_3_enable : std_ulogic;
|
||||
end record;
|
||||
|
||||
type bypass_data_t is record
|
||||
|
||||
14
decode1.vhdl
14
decode1.vhdl
@ -641,6 +641,7 @@ begin
|
||||
variable bv : br_predictor_t;
|
||||
variable fprs, fprabc : std_ulogic;
|
||||
variable in3rc : std_ulogic;
|
||||
variable may_read_rb : std_ulogic;
|
||||
begin
|
||||
v := Decode1ToDecode2Init;
|
||||
vi := reg_internal_t_init;
|
||||
@ -654,6 +655,7 @@ begin
|
||||
fprs := '0';
|
||||
fprabc := '0';
|
||||
in3rc := '0';
|
||||
may_read_rb := '0';
|
||||
|
||||
if f_in.valid = '1' then
|
||||
report "Decode insn " & to_hstring(f_in.insn) & " at " & to_hstring(f_in.nia);
|
||||
@ -675,10 +677,16 @@ begin
|
||||
vi.override := not decode_op_4_valid(to_integer(unsigned(minor4op)));
|
||||
v.decode := decode_op_4_array(to_integer(unsigned(f_in.insn(5 downto 0))));
|
||||
in3rc := '1';
|
||||
may_read_rb := '1';
|
||||
|
||||
when 23 =>
|
||||
-- rlwnm[.]
|
||||
may_read_rb := '1';
|
||||
|
||||
when 31 =>
|
||||
-- major opcode 31, lots of things
|
||||
v.decode := decode_op_31_array(to_integer(unsigned(f_in.insn(10 downto 1))));
|
||||
may_read_rb := '1';
|
||||
|
||||
if std_match(f_in.insn(10 downto 1), "01-1010011") then
|
||||
-- mfspr or mtspr
|
||||
@ -728,6 +736,7 @@ begin
|
||||
|
||||
when 30 =>
|
||||
v.decode := decode_op_30_array(to_integer(unsigned(f_in.insn(4 downto 1))));
|
||||
may_read_rb := f_in.insn(4);
|
||||
|
||||
when 52 | 53 | 54 | 55 =>
|
||||
-- stfd[u] and stfs[u]
|
||||
@ -748,6 +757,7 @@ begin
|
||||
in3rc := '1';
|
||||
fprabc := '1';
|
||||
fprs := '1';
|
||||
may_read_rb := '1';
|
||||
end if;
|
||||
|
||||
when 62 =>
|
||||
@ -764,6 +774,7 @@ begin
|
||||
in3rc := '1';
|
||||
fprabc := '1';
|
||||
fprs := '1';
|
||||
may_read_rb := '1';
|
||||
end if;
|
||||
|
||||
when others =>
|
||||
@ -777,6 +788,9 @@ begin
|
||||
else
|
||||
vr.reg_3_addr := fprs & insn_rs(f_in.insn);
|
||||
end if;
|
||||
vr.read_1_enable := f_in.valid and not f_in.fetch_failed;
|
||||
vr.read_2_enable := f_in.valid and not f_in.fetch_failed and may_read_rb;
|
||||
vr.read_3_enable := f_in.valid and not f_in.fetch_failed;
|
||||
|
||||
if f_in.fetch_failed = '1' then
|
||||
v.valid := '1';
|
||||
|
||||
@ -38,17 +38,27 @@ end entity register_file;
|
||||
architecture behaviour of register_file is
|
||||
type regfile is array(0 to 63) of std_ulogic_vector(63 downto 0);
|
||||
signal registers : regfile := (others => (others => '0'));
|
||||
signal rd_port_b : std_ulogic_vector(63 downto 0);
|
||||
signal dbg_data : std_ulogic_vector(63 downto 0);
|
||||
signal dbg_ack : std_ulogic;
|
||||
signal dbg_gpr_done : std_ulogic;
|
||||
signal addr_1_reg : gspr_index_t;
|
||||
signal addr_2_reg : gspr_index_t;
|
||||
signal addr_3_reg : gspr_index_t;
|
||||
signal rd_2 : std_ulogic;
|
||||
signal fwd_1 : std_ulogic;
|
||||
signal fwd_2 : std_ulogic;
|
||||
signal fwd_3 : std_ulogic;
|
||||
signal data_1 : std_ulogic_vector(63 downto 0);
|
||||
signal data_2 : std_ulogic_vector(63 downto 0);
|
||||
signal data_3 : std_ulogic_vector(63 downto 0);
|
||||
signal prev_write_data : std_ulogic_vector(63 downto 0);
|
||||
|
||||
begin
|
||||
-- synchronous writes
|
||||
-- synchronous reads and writes
|
||||
register_write_0: process(clk)
|
||||
variable a_addr, b_addr, c_addr : gspr_index_t;
|
||||
variable w_addr : gspr_index_t;
|
||||
variable b_enable : std_ulogic;
|
||||
begin
|
||||
if rising_edge(clk) then
|
||||
if w_in.write_enable = '1' then
|
||||
@ -66,57 +76,94 @@ begin
|
||||
a_addr := d1_in.reg_1_addr;
|
||||
b_addr := d1_in.reg_2_addr;
|
||||
c_addr := d1_in.reg_3_addr;
|
||||
|
||||
if stall = '0' then
|
||||
b_enable := d1_in.read_2_enable;
|
||||
if stall = '1' then
|
||||
a_addr := addr_1_reg;
|
||||
b_addr := addr_2_reg;
|
||||
c_addr := addr_3_reg;
|
||||
b_enable := rd_2;
|
||||
else
|
||||
addr_1_reg <= a_addr;
|
||||
addr_2_reg <= b_addr;
|
||||
addr_3_reg <= c_addr;
|
||||
rd_2 <= b_enable;
|
||||
end if;
|
||||
|
||||
fwd_1 <= '0';
|
||||
fwd_2 <= '0';
|
||||
fwd_3 <= '0';
|
||||
if w_in.write_enable = '1' then
|
||||
if w_addr = a_addr then
|
||||
fwd_1 <= '1';
|
||||
end if;
|
||||
if w_addr = b_addr then
|
||||
fwd_2 <= '1';
|
||||
end if;
|
||||
if w_addr = c_addr then
|
||||
fwd_3 <= '1';
|
||||
end if;
|
||||
end if;
|
||||
|
||||
-- Do debug reads to GPRs and FPRs using the B port when it is not in use
|
||||
if dbg_gpr_req = '1' then
|
||||
if b_enable = '0' then
|
||||
b_addr := dbg_gpr_addr(5 downto 0);
|
||||
dbg_gpr_done <= '1';
|
||||
end if;
|
||||
else
|
||||
dbg_gpr_done <= '0';
|
||||
end if;
|
||||
|
||||
if not HAS_FPU then
|
||||
-- Make it obvious that we only want 32 GSPRs for a no-FPU implementation
|
||||
a_addr(5) := '0';
|
||||
b_addr(5) := '0';
|
||||
c_addr(5) := '0';
|
||||
end if;
|
||||
data_1 <= registers(to_integer(unsigned(a_addr)));
|
||||
data_2 <= registers(to_integer(unsigned(b_addr)));
|
||||
data_3 <= registers(to_integer(unsigned(c_addr)));
|
||||
|
||||
prev_write_data <= w_in.write_data;
|
||||
|
||||
assert (d_in.read1_enable = '0') or (d_in.read1_reg = addr_1_reg) severity failure;
|
||||
assert (d_in.read2_enable = '0') or (d_in.read2_reg = addr_2_reg) severity failure;
|
||||
assert (d_in.read3_enable = '0') or (d_in.read3_reg = addr_3_reg) severity failure;
|
||||
end if;
|
||||
end process register_write_0;
|
||||
|
||||
-- asynchronous reads
|
||||
-- asynchronous forwarding of write data
|
||||
register_read_0: process(all)
|
||||
variable a_addr, b_addr, c_addr : gspr_index_t;
|
||||
variable w_addr : gspr_index_t;
|
||||
variable out_data_1 : std_ulogic_vector(63 downto 0);
|
||||
variable out_data_2 : std_ulogic_vector(63 downto 0);
|
||||
variable out_data_3 : std_ulogic_vector(63 downto 0);
|
||||
begin
|
||||
a_addr := d_in.read1_reg;
|
||||
b_addr := d_in.read2_reg;
|
||||
c_addr := d_in.read3_reg;
|
||||
w_addr := w_in.write_reg;
|
||||
if not HAS_FPU then
|
||||
-- Make it obvious that we only want 32 GSPRs for a no-FPU implementation
|
||||
a_addr(5) := '0';
|
||||
b_addr(5) := '0';
|
||||
c_addr(5) := '0';
|
||||
w_addr(5) := '0';
|
||||
out_data_1 := data_1;
|
||||
out_data_2 := data_2;
|
||||
out_data_3 := data_3;
|
||||
if fwd_1 = '1' then
|
||||
out_data_1 := prev_write_data;
|
||||
end if;
|
||||
if fwd_2 = '1' then
|
||||
out_data_2 := prev_write_data;
|
||||
end if;
|
||||
if fwd_3 = '1' then
|
||||
out_data_3 := prev_write_data;
|
||||
end if;
|
||||
|
||||
if d_in.read1_enable = '1' then
|
||||
report "Reading GPR " & to_hstring(a_addr) & " " & to_hstring(registers(to_integer(unsigned(a_addr))));
|
||||
report "Reading GPR " & to_hstring(addr_1_reg) & " " & to_hstring(out_data_1);
|
||||
end if;
|
||||
if d_in.read2_enable = '1' then
|
||||
report "Reading GPR " & to_hstring(b_addr) & " " & to_hstring(registers(to_integer(unsigned(b_addr))));
|
||||
report "Reading GPR " & to_hstring(addr_2_reg) & " " & to_hstring(out_data_2);
|
||||
end if;
|
||||
if d_in.read3_enable = '1' then
|
||||
report "Reading GPR " & to_hstring(c_addr) & " " & to_hstring(registers(to_integer(unsigned(c_addr))));
|
||||
report "Reading GPR " & to_hstring(addr_3_reg) & " " & to_hstring(out_data_3);
|
||||
end if;
|
||||
d_out.read1_data <= registers(to_integer(unsigned(a_addr)));
|
||||
-- B read port is multiplexed with reads from the debug circuitry
|
||||
if d_in.read2_enable = '0' and dbg_gpr_req = '1' and dbg_ack = '0' then
|
||||
b_addr := dbg_gpr_addr;
|
||||
if not HAS_FPU then
|
||||
b_addr(5) := '0';
|
||||
end if;
|
||||
end if;
|
||||
rd_port_b <= registers(to_integer(unsigned(b_addr)));
|
||||
d_out.read2_data <= rd_port_b;
|
||||
d_out.read3_data <= registers(to_integer(unsigned(c_addr)));
|
||||
|
||||
-- Forwarding of written data is now done explicitly with a bypass path
|
||||
-- from writeback to decode2.
|
||||
d_out.read1_data <= out_data_1;
|
||||
d_out.read2_data <= out_data_2;
|
||||
d_out.read3_data <= out_data_3;
|
||||
end process register_read_0;
|
||||
|
||||
-- Latch read data and ack if dbg read requested and B port not busy
|
||||
@ -124,8 +171,8 @@ begin
|
||||
begin
|
||||
if rising_edge(clk) then
|
||||
if dbg_gpr_req = '1' then
|
||||
if d_in.read2_enable = '0' and dbg_ack = '0' then
|
||||
dbg_data <= rd_port_b;
|
||||
if dbg_ack = '0' and dbg_gpr_done = '1' then
|
||||
dbg_data <= data_2;
|
||||
dbg_ack <= '1';
|
||||
end if;
|
||||
else
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user