mirror of
https://github.com/antonblanchard/microwatt.git
synced 2026-02-26 08:43:26 +00:00
loadstore1: Move load data formatting from writeback to loadstore1
This puts all the data formatting (byte rotation based on lowest three bits of the address, byte reversal, sign extension, zero extension) in loadstore1. Writeback now simply sends the data provided to the register files. Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
This commit is contained in:
@@ -236,17 +236,11 @@ package common is
|
||||
write_enable: std_ulogic;
|
||||
write_reg : gpr_index_t;
|
||||
write_data : std_ulogic_vector(63 downto 0);
|
||||
write_len : std_ulogic_vector(3 downto 0);
|
||||
write_shift : std_ulogic_vector(2 downto 0);
|
||||
sign_extend : std_ulogic;
|
||||
byte_reverse : std_ulogic;
|
||||
second_word : std_ulogic;
|
||||
xerc : xer_common_t;
|
||||
rc : std_ulogic;
|
||||
store_done : std_ulogic;
|
||||
end record;
|
||||
constant Loadstore1ToWritebackInit : Loadstore1ToWritebackType := (valid => '0', write_enable => '0', sign_extend => '0',
|
||||
byte_reverse => '0', second_word => '0', xerc => xerc_init,
|
||||
constant Loadstore1ToWritebackInit : Loadstore1ToWritebackType := (valid => '0', write_enable => '0', xerc => xerc_init,
|
||||
rc => '0', store_done => '0', others => (others => '0'));
|
||||
|
||||
type Execute1ToWritebackType is record
|
||||
|
||||
102
loadstore1.vhdl
102
loadstore1.vhdl
@@ -43,7 +43,8 @@ architecture behave of loadstore1 is
|
||||
-- latch most of the input request
|
||||
load : std_ulogic;
|
||||
addr : std_ulogic_vector(63 downto 0);
|
||||
data : std_ulogic_vector(63 downto 0);
|
||||
store_data : std_ulogic_vector(63 downto 0);
|
||||
load_data : std_ulogic_vector(63 downto 0);
|
||||
write_reg : gpr_index_t;
|
||||
length : std_ulogic_vector(3 downto 0);
|
||||
byte_reverse : std_ulogic;
|
||||
@@ -58,6 +59,10 @@ architecture behave of loadstore1 is
|
||||
second_bytes : std_ulogic_vector(7 downto 0);
|
||||
end record;
|
||||
|
||||
type byte_sel_t is array(0 to 7) of std_ulogic;
|
||||
subtype byte_trim_t is std_ulogic_vector(1 downto 0);
|
||||
type trim_ctl_t is array(0 to 7) of byte_trim_t;
|
||||
|
||||
signal r, rin : reg_stage_t;
|
||||
signal lsu_sum : std_ulogic_vector(63 downto 0);
|
||||
|
||||
@@ -112,6 +117,7 @@ begin
|
||||
variable byte_offset : unsigned(2 downto 0);
|
||||
variable j : integer;
|
||||
variable k : unsigned(2 downto 0);
|
||||
variable kk : unsigned(3 downto 0);
|
||||
variable long_sel : std_ulogic_vector(15 downto 0);
|
||||
variable byte_sel : std_ulogic_vector(7 downto 0);
|
||||
variable req : std_ulogic;
|
||||
@@ -120,8 +126,13 @@ begin
|
||||
variable wdata : std_ulogic_vector(63 downto 0);
|
||||
variable write_enable : std_ulogic;
|
||||
variable do_update : std_ulogic;
|
||||
variable second_dword : std_ulogic;
|
||||
variable two_dwords : std_ulogic;
|
||||
variable done : std_ulogic;
|
||||
variable data_permuted : std_ulogic_vector(63 downto 0);
|
||||
variable data_trimmed : std_ulogic_vector(63 downto 0);
|
||||
variable use_second : byte_sel_t;
|
||||
variable trim_ctl : trim_ctl_t;
|
||||
variable negative : std_ulogic;
|
||||
begin
|
||||
v := r;
|
||||
req := '0';
|
||||
@@ -132,14 +143,63 @@ begin
|
||||
|
||||
write_enable := '0';
|
||||
do_update := '0';
|
||||
second_dword := '0';
|
||||
two_dwords := or (r.second_bytes);
|
||||
|
||||
-- load data formatting
|
||||
if r.load = '1' then
|
||||
byte_offset := unsigned(r.addr(2 downto 0));
|
||||
brev_lenm1 := "000";
|
||||
if r.byte_reverse = '1' then
|
||||
brev_lenm1 := unsigned(r.length(2 downto 0)) - 1;
|
||||
end if;
|
||||
|
||||
-- shift and byte-reverse data bytes
|
||||
for i in 0 to 7 loop
|
||||
kk := ('0' & (to_unsigned(i, 3) xor brev_lenm1)) + ('0' & byte_offset);
|
||||
use_second(i) := kk(3);
|
||||
j := to_integer(kk(2 downto 0)) * 8;
|
||||
data_permuted(i * 8 + 7 downto i * 8) := d_in.data(j + 7 downto j);
|
||||
end loop;
|
||||
|
||||
-- Work out the sign bit for sign extension.
|
||||
-- Assumes we are not doing both sign extension and byte reversal,
|
||||
-- in that for unaligned loads crossing two dwords we end up
|
||||
-- using a bit from the second dword, whereas for a byte-reversed
|
||||
-- (i.e. big-endian) load the sign bit would be in the first dword.
|
||||
negative := (r.length(3) and data_permuted(63)) or
|
||||
(r.length(2) and data_permuted(31)) or
|
||||
(r.length(1) and data_permuted(15)) or
|
||||
(r.length(0) and data_permuted(7));
|
||||
|
||||
-- trim and sign-extend
|
||||
for i in 0 to 7 loop
|
||||
if i < to_integer(unsigned(r.length)) then
|
||||
if two_dwords = '1' then
|
||||
trim_ctl(i) := '1' & not use_second(i);
|
||||
else
|
||||
trim_ctl(i) := not use_second(i) & '0';
|
||||
end if;
|
||||
else
|
||||
trim_ctl(i) := '0' & (negative and r.sign_extend);
|
||||
end if;
|
||||
case trim_ctl(i) is
|
||||
when "11" =>
|
||||
data_trimmed(i * 8 + 7 downto i * 8) := r.load_data(i * 8 + 7 downto i * 8);
|
||||
when "10" =>
|
||||
data_trimmed(i * 8 + 7 downto i * 8) := data_permuted(i * 8 + 7 downto i * 8);
|
||||
when "01" =>
|
||||
data_trimmed(i * 8 + 7 downto i * 8) := x"FF";
|
||||
when others =>
|
||||
data_trimmed(i * 8 + 7 downto i * 8) := x"00";
|
||||
end case;
|
||||
end loop;
|
||||
end if;
|
||||
|
||||
case r.state is
|
||||
when IDLE =>
|
||||
if l_in.valid = '1' then
|
||||
v.load := l_in.load;
|
||||
v.addr := lsu_sum;
|
||||
v.data := l_in.data;
|
||||
v.write_reg := l_in.write_reg;
|
||||
v.length := l_in.length;
|
||||
v.byte_reverse := l_in.byte_reverse;
|
||||
@@ -179,7 +239,7 @@ begin
|
||||
for i in 0 to 7 loop
|
||||
k := (to_unsigned(i, 3) xor brev_lenm1) + byte_offset;
|
||||
j := to_integer(k) * 8;
|
||||
v.data(j + 7 downto j) := l_in.data(i * 8 + 7 downto i * 8);
|
||||
v.store_data(j + 7 downto j) := l_in.data(i * 8 + 7 downto i * 8);
|
||||
end loop;
|
||||
end if;
|
||||
|
||||
@@ -203,13 +263,14 @@ begin
|
||||
when FIRST_ACK_WAIT =>
|
||||
stall := '1';
|
||||
if d_in.valid = '1' then
|
||||
write_enable := r.load;
|
||||
v.state := LAST_ACK_WAIT;
|
||||
if r.load = '1' then
|
||||
v.load_data := data_permuted;
|
||||
end if;
|
||||
end if;
|
||||
|
||||
when LAST_ACK_WAIT =>
|
||||
stall := '1';
|
||||
second_dword := or (r.second_bytes);
|
||||
if d_in.valid = '1' then
|
||||
write_enable := r.load;
|
||||
if r.load = '1' and r.update = '1' then
|
||||
@@ -230,16 +291,13 @@ begin
|
||||
done := '1';
|
||||
end case;
|
||||
|
||||
-- Update registers
|
||||
rin <= v;
|
||||
|
||||
-- Update outputs to dcache
|
||||
d_out.valid <= req;
|
||||
d_out.load <= v.load;
|
||||
d_out.nc <= v.nc;
|
||||
d_out.reserve <= v.reserve;
|
||||
d_out.addr <= addr;
|
||||
d_out.data <= v.data;
|
||||
d_out.data <= v.store_data;
|
||||
d_out.byte_sel <= byte_sel;
|
||||
|
||||
-- Update outputs to writeback
|
||||
@@ -250,28 +308,20 @@ begin
|
||||
l_out.write_enable <= '1';
|
||||
l_out.write_reg <= r.update_reg;
|
||||
l_out.write_data <= r.addr;
|
||||
l_out.write_len <= x"8";
|
||||
l_out.write_shift <= "000";
|
||||
l_out.sign_extend <= '0';
|
||||
l_out.byte_reverse <= '0';
|
||||
l_out.second_word <= '0';
|
||||
l_out.rc <= '0';
|
||||
l_out.store_done <= '0';
|
||||
else
|
||||
l_out.write_enable <= write_enable;
|
||||
l_out.write_reg <= r.write_reg;
|
||||
l_out.write_data <= d_in.data;
|
||||
l_out.write_len <= r.length;
|
||||
l_out.write_shift <= r.addr(2 downto 0);
|
||||
l_out.sign_extend <= r.sign_extend;
|
||||
l_out.byte_reverse <= r.byte_reverse;
|
||||
l_out.second_word <= second_dword;
|
||||
l_out.rc <= r.rc and done;
|
||||
l_out.store_done <= d_in.store_done;
|
||||
l_out.write_data <= data_trimmed;
|
||||
end if;
|
||||
l_out.xerc <= r.xerc;
|
||||
l_out.rc <= r.rc and done;
|
||||
l_out.store_done <= d_in.store_done;
|
||||
|
||||
stall_out <= stall;
|
||||
|
||||
-- Update registers
|
||||
rin <= v;
|
||||
|
||||
end process;
|
||||
|
||||
end;
|
||||
|
||||
113
writeback.vhdl
113
writeback.vhdl
@@ -21,46 +21,12 @@ entity writeback is
|
||||
end entity writeback;
|
||||
|
||||
architecture behaviour of writeback is
|
||||
subtype byte_index_t is unsigned(2 downto 0);
|
||||
type permutation_t is array(0 to 7) of byte_index_t;
|
||||
subtype byte_trim_t is std_ulogic_vector(1 downto 0);
|
||||
type trim_ctl_t is array(0 to 7) of byte_trim_t;
|
||||
type byte_sel_t is array(0 to 7) of std_ulogic;
|
||||
|
||||
signal data_len : unsigned(3 downto 0);
|
||||
signal data_in : std_ulogic_vector(63 downto 0);
|
||||
signal data_permuted : std_ulogic_vector(63 downto 0);
|
||||
signal data_trimmed : std_ulogic_vector(63 downto 0);
|
||||
signal data_latched : std_ulogic_vector(63 downto 0);
|
||||
signal perm : permutation_t;
|
||||
signal use_second : byte_sel_t;
|
||||
signal byte_offset : unsigned(2 downto 0);
|
||||
signal brev_lenm1 : unsigned(2 downto 0);
|
||||
signal trim_ctl : trim_ctl_t;
|
||||
signal rc : std_ulogic;
|
||||
signal partial_write : std_ulogic;
|
||||
signal sign_extend : std_ulogic;
|
||||
signal negative : std_ulogic;
|
||||
signal second_word : std_ulogic;
|
||||
begin
|
||||
writeback_0: process(clk)
|
||||
begin
|
||||
if rising_edge(clk) then
|
||||
if partial_write = '1' then
|
||||
data_latched <= data_permuted;
|
||||
end if;
|
||||
end if;
|
||||
end process;
|
||||
|
||||
writeback_1: process(all)
|
||||
variable x : std_ulogic_vector(0 downto 0);
|
||||
variable y : std_ulogic_vector(0 downto 0);
|
||||
variable z : std_ulogic_vector(0 downto 0);
|
||||
variable w : std_ulogic_vector(0 downto 0);
|
||||
variable j : integer;
|
||||
variable k : unsigned(3 downto 0);
|
||||
variable cf: std_ulogic_vector(3 downto 0);
|
||||
variable xe: xer_common_t;
|
||||
variable zero : std_ulogic;
|
||||
variable sign : std_ulogic;
|
||||
variable scf : std_ulogic_vector(3 downto 0);
|
||||
@@ -85,17 +51,10 @@ begin
|
||||
complete_out <= '1';
|
||||
end if;
|
||||
|
||||
rc <= '0';
|
||||
brev_lenm1 <= "000";
|
||||
partial_write <= '0';
|
||||
second_word <= '0';
|
||||
xe := e_in.xerc;
|
||||
data_in <= (others => '0');
|
||||
|
||||
if e_in.write_enable = '1' then
|
||||
w_out.write_reg <= e_in.write_reg;
|
||||
w_out.write_data <= e_in.write_data;
|
||||
w_out.write_enable <= '1';
|
||||
rc <= e_in.rc;
|
||||
end if;
|
||||
|
||||
if e_in.write_cr_enable = '1' then
|
||||
@@ -109,20 +68,10 @@ begin
|
||||
c_out.write_xerc_data <= e_in.xerc;
|
||||
end if;
|
||||
|
||||
sign_extend <= l_in.sign_extend;
|
||||
data_len <= unsigned(l_in.write_len);
|
||||
byte_offset <= unsigned(l_in.write_shift);
|
||||
if l_in.write_enable = '1' then
|
||||
w_out.write_reg <= gpr_to_gspr(l_in.write_reg);
|
||||
if l_in.byte_reverse = '1' then
|
||||
brev_lenm1 <= unsigned(l_in.write_len(2 downto 0)) - 1;
|
||||
end if;
|
||||
second_word <= l_in.second_word;
|
||||
if l_in.valid = '0' and (data_len + byte_offset > 8) then
|
||||
partial_write <= '1';
|
||||
end if;
|
||||
xe := l_in.xerc;
|
||||
w_out.write_enable <= not partial_write or second_word;
|
||||
w_out.write_data <= l_in.write_data;
|
||||
w_out.write_enable <= '1';
|
||||
end if;
|
||||
|
||||
if l_in.rc = '1' then
|
||||
@@ -130,65 +79,15 @@ begin
|
||||
scf(3) := '0';
|
||||
scf(2) := '0';
|
||||
scf(1) := l_in.store_done;
|
||||
scf(0) := xe.so;
|
||||
scf(0) := l_in.xerc.so;
|
||||
c_out.write_cr_enable <= '1';
|
||||
c_out.write_cr_mask <= num_to_fxm(0);
|
||||
c_out.write_cr_data(31 downto 28) <= scf;
|
||||
end if;
|
||||
|
||||
-- shift and byte-reverse data bytes
|
||||
for i in 0 to 7 loop
|
||||
k := ('0' & (to_unsigned(i, 3) xor brev_lenm1)) + ('0' & byte_offset);
|
||||
perm(i) <= k(2 downto 0);
|
||||
use_second(i) <= k(3);
|
||||
end loop;
|
||||
for i in 0 to 7 loop
|
||||
j := to_integer(perm(i)) * 8;
|
||||
data_permuted(i * 8 + 7 downto i * 8) <= l_in.write_data(j + 7 downto j);
|
||||
end loop;
|
||||
|
||||
-- If the data can arrive split over two cycles, this will be correct
|
||||
-- provided we don't have both sign extension and byte reversal.
|
||||
negative <= (data_len(3) and data_permuted(63)) or
|
||||
(data_len(2) and data_permuted(31)) or
|
||||
(data_len(1) and data_permuted(15)) or
|
||||
(data_len(0) and data_permuted(7));
|
||||
|
||||
-- trim and sign-extend
|
||||
for i in 0 to 7 loop
|
||||
if i < to_integer(data_len) then
|
||||
if second_word = '1' then
|
||||
trim_ctl(i) <= '1' & not use_second(i);
|
||||
else
|
||||
trim_ctl(i) <= not use_second(i) & '0';
|
||||
end if;
|
||||
else
|
||||
trim_ctl(i) <= '0' & (negative and sign_extend);
|
||||
end if;
|
||||
end loop;
|
||||
for i in 0 to 7 loop
|
||||
case trim_ctl(i) is
|
||||
when "11" =>
|
||||
data_trimmed(i * 8 + 7 downto i * 8) <= data_latched(i * 8 + 7 downto i * 8);
|
||||
when "10" =>
|
||||
data_trimmed(i * 8 + 7 downto i * 8) <= data_permuted(i * 8 + 7 downto i * 8);
|
||||
when "01" =>
|
||||
data_trimmed(i * 8 + 7 downto i * 8) <= x"FF";
|
||||
when others =>
|
||||
data_trimmed(i * 8 + 7 downto i * 8) <= x"00";
|
||||
end case;
|
||||
end loop;
|
||||
|
||||
-- deliver to regfile
|
||||
if l_in.write_enable = '1' then
|
||||
w_out.write_data <= data_trimmed;
|
||||
else
|
||||
w_out.write_data <= e_in.write_data;
|
||||
end if;
|
||||
|
||||
-- Perform CR0 update for RC forms
|
||||
-- Note that loads never have a form with an RC bit, therefore this can test e_in.write_data
|
||||
if rc = '1' then
|
||||
if e_in.rc = '1' and e_in.write_enable = '1' then
|
||||
sign := e_in.write_data(63);
|
||||
zero := not (or e_in.write_data);
|
||||
c_out.write_cr_enable <= '1';
|
||||
@@ -196,7 +95,7 @@ begin
|
||||
cf(3) := sign;
|
||||
cf(2) := not sign and not zero;
|
||||
cf(1) := zero;
|
||||
cf(0) := xe.so;
|
||||
cf(0) := e_in.xerc.so;
|
||||
c_out.write_cr_data(31 downto 28) <= cf;
|
||||
end if;
|
||||
end process;
|
||||
|
||||
Reference in New Issue
Block a user