mirror of
https://github.com/antonblanchard/microwatt.git
synced 2026-04-04 20:28:30 +00:00
execute: Implement bypass from output of execute1 to input
This enables back-to-back execution of integer instructions where the first instruction writes a GPR and the second reads the same GPR. This is done with a set of multiplexers at the start of execute1 which enable any of the three input operands to be taken from the output of execute1 (i.e. r.e.write_data) rather than the input from decode2 (i.e. e_in.read_data[123]). This also requires changes to the hazard detection and handling. Decode2 generates a signal indicating that the GPR being written is available for bypass, which is true for instructions that are executed in execute1 (rather than loadstore1/dcache). The gpr_hazard module stores this "bypassable" bit, and if the same GPR needs to be read by a subsequent instruction, it outputs a "use_bypass" signal rather than generating a stall. The use_bypass signal is then latched at the output of decode2 and passed down to execute1 to control the input multiplexer. At the moment there is no bypass on the inputs to loadstore1, but that is OK because all load and store instructions are marked as single-issue. Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
This commit is contained in:
@@ -109,6 +109,9 @@ package common is
|
||||
read_data1: std_ulogic_vector(63 downto 0);
|
||||
read_data2: std_ulogic_vector(63 downto 0);
|
||||
read_data3: std_ulogic_vector(63 downto 0);
|
||||
bypass_data1: std_ulogic;
|
||||
bypass_data2: std_ulogic;
|
||||
bypass_data3: std_ulogic;
|
||||
cr: std_ulogic_vector(31 downto 0);
|
||||
xerc: xer_common_t;
|
||||
lr: std_ulogic;
|
||||
@@ -126,7 +129,8 @@ package common is
|
||||
data_len: std_ulogic_vector(3 downto 0);
|
||||
end record;
|
||||
constant Decode2ToExecute1Init : Decode2ToExecute1Type :=
|
||||
(valid => '0', insn_type => OP_ILLEGAL, lr => '0', rc => '0', oe => '0', invert_a => '0',
|
||||
(valid => '0', insn_type => OP_ILLEGAL, bypass_data1 => '0', bypass_data2 => '0', bypass_data3 => '0',
|
||||
lr => '0', rc => '0', oe => '0', invert_a => '0',
|
||||
invert_out => '0', input_carry => ZERO, output_carry => '0', input_cr => '0', output_cr => '0',
|
||||
is_32bit => '0', is_signed => '0', xerc => xerc_init, others => (others => '0'));
|
||||
|
||||
|
||||
19
control.vhdl
19
control.vhdl
@@ -21,6 +21,7 @@ entity control is
|
||||
|
||||
gpr_write_valid_in : in std_ulogic;
|
||||
gpr_write_in : in gspr_index_t;
|
||||
gpr_bypassable : in std_ulogic;
|
||||
|
||||
gpr_a_read_valid_in : in std_ulogic;
|
||||
gpr_a_read_in : in gspr_index_t;
|
||||
@@ -36,7 +37,11 @@ entity control is
|
||||
|
||||
valid_out : out std_ulogic;
|
||||
stall_out : out std_ulogic;
|
||||
stopped_out : out std_ulogic
|
||||
stopped_out : out std_ulogic;
|
||||
|
||||
gpr_bypass_a : out std_ulogic;
|
||||
gpr_bypass_b : out std_ulogic;
|
||||
gpr_bypass_c : out std_ulogic
|
||||
);
|
||||
end entity control;
|
||||
|
||||
@@ -71,10 +76,12 @@ begin
|
||||
|
||||
gpr_write_valid_in => gpr_write_valid,
|
||||
gpr_write_in => gpr_write_in,
|
||||
bypass_avail => gpr_bypassable,
|
||||
gpr_read_valid_in => gpr_a_read_valid_in,
|
||||
gpr_read_in => gpr_a_read_in,
|
||||
|
||||
stall_out => stall_a_out
|
||||
stall_out => stall_a_out,
|
||||
use_bypass => gpr_bypass_a
|
||||
);
|
||||
|
||||
gpr_hazard1: entity work.gpr_hazard
|
||||
@@ -87,10 +94,12 @@ begin
|
||||
|
||||
gpr_write_valid_in => gpr_write_valid,
|
||||
gpr_write_in => gpr_write_in,
|
||||
bypass_avail => gpr_bypassable,
|
||||
gpr_read_valid_in => gpr_b_read_valid_in,
|
||||
gpr_read_in => gpr_b_read_in,
|
||||
|
||||
stall_out => stall_b_out
|
||||
stall_out => stall_b_out,
|
||||
use_bypass => gpr_bypass_b
|
||||
);
|
||||
|
||||
gpr_c_read_in_fmt <= "0" & gpr_c_read_in;
|
||||
@@ -105,10 +114,12 @@ begin
|
||||
|
||||
gpr_write_valid_in => gpr_write_valid,
|
||||
gpr_write_in => gpr_write_in,
|
||||
bypass_avail => gpr_bypassable,
|
||||
gpr_read_valid_in => gpr_c_read_valid_in,
|
||||
gpr_read_in => gpr_c_read_in_fmt,
|
||||
|
||||
stall_out => stall_c_out
|
||||
stall_out => stall_c_out,
|
||||
use_bypass => gpr_bypass_c
|
||||
);
|
||||
|
||||
cr_hazard0: entity work.cr_hazard
|
||||
|
||||
@@ -9,7 +9,8 @@ use work.wishbone_types.all;
|
||||
entity core is
|
||||
generic (
|
||||
SIM : boolean := false;
|
||||
DISABLE_FLATTEN : boolean := false
|
||||
DISABLE_FLATTEN : boolean := false;
|
||||
EX1_BYPASS : boolean := true
|
||||
);
|
||||
port (
|
||||
clk : in std_logic;
|
||||
@@ -176,6 +177,9 @@ begin
|
||||
decode1_stall_in <= decode2_stall_out;
|
||||
|
||||
decode2_0: entity work.decode2
|
||||
generic map (
|
||||
EX1_BYPASS => EX1_BYPASS
|
||||
)
|
||||
port map (
|
||||
clk => clk,
|
||||
rst => core_rst,
|
||||
@@ -220,6 +224,9 @@ begin
|
||||
);
|
||||
|
||||
execute1_0: entity work.execute1
|
||||
generic map (
|
||||
EX1_BYPASS => EX1_BYPASS
|
||||
)
|
||||
port map (
|
||||
clk => clk,
|
||||
rst => core_rst,
|
||||
|
||||
21
decode2.vhdl
21
decode2.vhdl
@@ -9,6 +9,9 @@ use work.helpers.all;
|
||||
use work.insn_helpers.all;
|
||||
|
||||
entity decode2 is
|
||||
generic (
|
||||
EX1_BYPASS : boolean := true
|
||||
);
|
||||
port (
|
||||
clk : in std_ulogic;
|
||||
rst : in std_ulogic;
|
||||
@@ -184,15 +187,19 @@ architecture behaviour of decode2 is
|
||||
|
||||
signal gpr_write_valid : std_ulogic;
|
||||
signal gpr_write : gspr_index_t;
|
||||
signal gpr_bypassable : std_ulogic;
|
||||
|
||||
signal gpr_a_read_valid : std_ulogic;
|
||||
signal gpr_a_read :gspr_index_t;
|
||||
signal gpr_a_bypass : std_ulogic;
|
||||
|
||||
signal gpr_b_read_valid : std_ulogic;
|
||||
signal gpr_b_read : gspr_index_t;
|
||||
signal gpr_b_bypass : std_ulogic;
|
||||
|
||||
signal gpr_c_read_valid : std_ulogic;
|
||||
signal gpr_c_read : gpr_index_t;
|
||||
signal gpr_c_bypass : std_ulogic;
|
||||
|
||||
signal cr_write_valid : std_ulogic;
|
||||
begin
|
||||
@@ -213,6 +220,7 @@ begin
|
||||
|
||||
gpr_write_valid_in => gpr_write_valid,
|
||||
gpr_write_in => gpr_write,
|
||||
gpr_bypassable => gpr_bypassable,
|
||||
|
||||
gpr_a_read_valid_in => gpr_a_read_valid,
|
||||
gpr_a_read_in => gpr_a_read,
|
||||
@@ -228,7 +236,11 @@ begin
|
||||
|
||||
valid_out => control_valid_out,
|
||||
stall_out => stall_out,
|
||||
stopped_out => stopped_out
|
||||
stopped_out => stopped_out,
|
||||
|
||||
gpr_bypass_a => gpr_a_bypass,
|
||||
gpr_bypass_b => gpr_b_bypass,
|
||||
gpr_bypass_c => gpr_c_bypass
|
||||
);
|
||||
|
||||
decode2_0: process(clk)
|
||||
@@ -295,9 +307,12 @@ begin
|
||||
v.e.insn_type := d_in.decode.insn_type;
|
||||
v.e.read_reg1 := decoded_reg_a.reg;
|
||||
v.e.read_data1 := decoded_reg_a.data;
|
||||
v.e.bypass_data1 := gpr_a_bypass;
|
||||
v.e.read_reg2 := decoded_reg_b.reg;
|
||||
v.e.read_data2 := decoded_reg_b.data;
|
||||
v.e.bypass_data2 := gpr_b_bypass;
|
||||
v.e.read_data3 := decoded_reg_c.data;
|
||||
v.e.bypass_data3 := gpr_c_bypass;
|
||||
v.e.write_reg := decoded_reg_o.reg;
|
||||
v.e.rc := decode_rc(d_in.decode.rc, d_in.insn);
|
||||
if not (d_in.decode.insn_type = OP_MUL_H32 or d_in.decode.insn_type = OP_MUL_H64) then
|
||||
@@ -342,6 +357,10 @@ begin
|
||||
|
||||
gpr_write_valid <= decoded_reg_o.reg_valid;
|
||||
gpr_write <= decoded_reg_o.reg;
|
||||
gpr_bypassable <= '0';
|
||||
if EX1_BYPASS and d_in.decode.unit = ALU then
|
||||
gpr_bypassable <= '1';
|
||||
end if;
|
||||
|
||||
gpr_a_read_valid <= decoded_reg_a.reg_valid;
|
||||
gpr_a_read <= decoded_reg_a.reg;
|
||||
|
||||
135
execute1.vhdl
135
execute1.vhdl
@@ -11,6 +11,9 @@ use work.insn_helpers.all;
|
||||
use work.ppc_fx_insns.all;
|
||||
|
||||
entity execute1 is
|
||||
generic (
|
||||
EX1_BYPASS : boolean := true
|
||||
);
|
||||
port (
|
||||
clk : in std_ulogic;
|
||||
rst : in std_ulogic;
|
||||
@@ -46,6 +49,8 @@ architecture behaviour of execute1 is
|
||||
|
||||
signal r, rin : reg_type;
|
||||
|
||||
signal a_in, b_in, c_in : std_ulogic_vector(63 downto 0);
|
||||
|
||||
signal ctrl: ctrl_t := (others => (others => '0'));
|
||||
signal ctrl_tmp: ctrl_t := (others => (others => '0'));
|
||||
|
||||
@@ -109,9 +114,9 @@ begin
|
||||
|
||||
rotator_0: entity work.rotator
|
||||
port map (
|
||||
rs => e_in.read_data3,
|
||||
ra => e_in.read_data1,
|
||||
shift => e_in.read_data2(6 downto 0),
|
||||
rs => c_in,
|
||||
ra => a_in,
|
||||
shift => b_in(6 downto 0),
|
||||
insn => e_in.insn,
|
||||
is_32bit => e_in.is_32bit,
|
||||
right_shift => right_shift,
|
||||
@@ -124,8 +129,8 @@ begin
|
||||
|
||||
logical_0: entity work.logical
|
||||
port map (
|
||||
rs => e_in.read_data3,
|
||||
rb => e_in.read_data2,
|
||||
rs => c_in,
|
||||
rb => b_in,
|
||||
op => e_in.insn_type,
|
||||
invert_in => e_in.invert_a,
|
||||
invert_out => e_in.invert_out,
|
||||
@@ -137,7 +142,7 @@ begin
|
||||
|
||||
countzero_0: entity work.zero_counter
|
||||
port map (
|
||||
rs => e_in.read_data3,
|
||||
rs => c_in,
|
||||
count_right => e_in.insn(10),
|
||||
is_32bit => e_in.is_32bit,
|
||||
result => countzero_result
|
||||
@@ -158,6 +163,10 @@ begin
|
||||
d_out => divider_to_x
|
||||
);
|
||||
|
||||
a_in <= r.e.write_data when EX1_BYPASS and e_in.bypass_data1 = '1' else e_in.read_data1;
|
||||
b_in <= r.e.write_data when EX1_BYPASS and e_in.bypass_data2 = '1' else e_in.read_data2;
|
||||
c_in <= r.e.write_data when EX1_BYPASS and e_in.bypass_data3 = '1' else e_in.read_data3;
|
||||
|
||||
execute1_0: process(clk)
|
||||
begin
|
||||
if rising_edge(clk) then
|
||||
@@ -256,21 +265,21 @@ begin
|
||||
|
||||
if e_in.is_32bit = '1' then
|
||||
if e_in.is_signed = '1' then
|
||||
x_to_multiply.data1 <= (others => e_in.read_data1(31));
|
||||
x_to_multiply.data1(31 downto 0) <= e_in.read_data1(31 downto 0);
|
||||
x_to_multiply.data2 <= (others => e_in.read_data2(31));
|
||||
x_to_multiply.data2(31 downto 0) <= e_in.read_data2(31 downto 0);
|
||||
x_to_multiply.data1 <= (others => a_in(31));
|
||||
x_to_multiply.data1(31 downto 0) <= a_in(31 downto 0);
|
||||
x_to_multiply.data2 <= (others => b_in(31));
|
||||
x_to_multiply.data2(31 downto 0) <= b_in(31 downto 0);
|
||||
else
|
||||
x_to_multiply.data1 <= '0' & x"00000000" & e_in.read_data1(31 downto 0);
|
||||
x_to_multiply.data2 <= '0' & x"00000000" & e_in.read_data2(31 downto 0);
|
||||
x_to_multiply.data1 <= '0' & x"00000000" & a_in(31 downto 0);
|
||||
x_to_multiply.data2 <= '0' & x"00000000" & b_in(31 downto 0);
|
||||
end if;
|
||||
else
|
||||
if e_in.is_signed = '1' then
|
||||
x_to_multiply.data1 <= e_in.read_data1(63) & e_in.read_data1;
|
||||
x_to_multiply.data2 <= e_in.read_data2(63) & e_in.read_data2;
|
||||
x_to_multiply.data1 <= a_in(63) & a_in;
|
||||
x_to_multiply.data2 <= b_in(63) & b_in;
|
||||
else
|
||||
x_to_multiply.data1 <= '0' & e_in.read_data1;
|
||||
x_to_multiply.data2 <= '0' & e_in.read_data2;
|
||||
x_to_multiply.data1 <= '0' & a_in;
|
||||
x_to_multiply.data2 <= '0' & b_in;
|
||||
end if;
|
||||
end if;
|
||||
|
||||
@@ -279,23 +288,23 @@ begin
|
||||
sign2 := '0';
|
||||
if e_in.is_signed = '1' then
|
||||
if e_in.is_32bit = '1' then
|
||||
sign1 := e_in.read_data1(31);
|
||||
sign2 := e_in.read_data2(31);
|
||||
sign1 := a_in(31);
|
||||
sign2 := b_in(31);
|
||||
else
|
||||
sign1 := e_in.read_data1(63);
|
||||
sign2 := e_in.read_data2(63);
|
||||
sign1 := a_in(63);
|
||||
sign2 := b_in(63);
|
||||
end if;
|
||||
end if;
|
||||
-- take absolute values
|
||||
if sign1 = '0' then
|
||||
abs1 := signed(e_in.read_data1);
|
||||
abs1 := signed(a_in);
|
||||
else
|
||||
abs1 := - signed(e_in.read_data1);
|
||||
abs1 := - signed(a_in);
|
||||
end if;
|
||||
if sign2 = '0' then
|
||||
abs2 := signed(e_in.read_data2);
|
||||
abs2 := signed(b_in);
|
||||
else
|
||||
abs2 := - signed(e_in.read_data2);
|
||||
abs2 := - signed(b_in);
|
||||
end if;
|
||||
|
||||
x_to_divider <= Execute1ToDividerInit;
|
||||
@@ -358,14 +367,14 @@ begin
|
||||
-- Do nothing
|
||||
when OP_ADD | OP_CMP =>
|
||||
if e_in.invert_a = '0' then
|
||||
a_inv := e_in.read_data1;
|
||||
a_inv := a_in;
|
||||
else
|
||||
a_inv := not e_in.read_data1;
|
||||
a_inv := not a_in;
|
||||
end if;
|
||||
result_with_carry := ppc_adde(a_inv, e_in.read_data2,
|
||||
result_with_carry := ppc_adde(a_inv, b_in,
|
||||
decode_input_carry(e_in.input_carry, v.e.xerc));
|
||||
result := result_with_carry(63 downto 0);
|
||||
carry_32 := result(32) xor a_inv(32) xor e_in.read_data2(32);
|
||||
carry_32 := result(32) xor a_inv(32) xor b_in(32);
|
||||
carry_64 := result_with_carry(64);
|
||||
if e_in.insn_type = OP_ADD then
|
||||
if e_in.output_carry = '1' then
|
||||
@@ -373,8 +382,8 @@ begin
|
||||
end if;
|
||||
if e_in.oe = '1' then
|
||||
set_ov(v.e,
|
||||
calc_ov(a_inv(63), e_in.read_data2(63), carry_64, result_with_carry(63)),
|
||||
calc_ov(a_inv(31), e_in.read_data2(31), carry_32, result_with_carry(31)));
|
||||
calc_ov(a_inv(63), b_in(63), carry_64, result_with_carry(63)),
|
||||
calc_ov(a_inv(31), b_in(31), carry_32, result_with_carry(31)));
|
||||
end if;
|
||||
result_en := '1';
|
||||
else
|
||||
@@ -385,20 +394,20 @@ begin
|
||||
v.e.write_cr_enable := '1';
|
||||
crnum := to_integer(unsigned(bf));
|
||||
v.e.write_cr_mask := num_to_fxm(crnum);
|
||||
zerolo := not (or (e_in.read_data1(31 downto 0) xor e_in.read_data2(31 downto 0)));
|
||||
zerohi := not (or (e_in.read_data1(63 downto 32) xor e_in.read_data2(63 downto 32)));
|
||||
zerolo := not (or (a_in(31 downto 0) xor b_in(31 downto 0)));
|
||||
zerohi := not (or (a_in(63 downto 32) xor b_in(63 downto 32)));
|
||||
if zerolo = '1' and (l = '0' or zerohi = '1') then
|
||||
-- values are equal
|
||||
newcrf := "001" & v.e.xerc.so;
|
||||
else
|
||||
if l = '1' then
|
||||
-- 64-bit comparison
|
||||
msb_a := e_in.read_data1(63);
|
||||
msb_b := e_in.read_data2(63);
|
||||
msb_a := a_in(63);
|
||||
msb_b := b_in(63);
|
||||
else
|
||||
-- 32-bit comparison
|
||||
msb_a := e_in.read_data1(31);
|
||||
msb_b := e_in.read_data2(31);
|
||||
msb_a := a_in(31);
|
||||
msb_b := b_in(31);
|
||||
end if;
|
||||
if msb_a /= msb_b then
|
||||
-- Subtraction might overflow, but
|
||||
@@ -424,25 +433,25 @@ begin
|
||||
when OP_B =>
|
||||
f_out.redirect <= '1';
|
||||
if (insn_aa(e_in.insn)) then
|
||||
f_out.redirect_nia <= std_ulogic_vector(signed(e_in.read_data2));
|
||||
f_out.redirect_nia <= std_ulogic_vector(signed(b_in));
|
||||
else
|
||||
f_out.redirect_nia <= std_ulogic_vector(signed(e_in.nia) + signed(e_in.read_data2));
|
||||
f_out.redirect_nia <= std_ulogic_vector(signed(e_in.nia) + signed(b_in));
|
||||
end if;
|
||||
when OP_BC =>
|
||||
-- read_data1 is CTR
|
||||
bo := insn_bo(e_in.insn);
|
||||
bi := insn_bi(e_in.insn);
|
||||
if bo(4-2) = '0' then
|
||||
result := std_ulogic_vector(unsigned(e_in.read_data1) - 1);
|
||||
result := std_ulogic_vector(unsigned(a_in) - 1);
|
||||
result_en := '1';
|
||||
v.e.write_reg := fast_spr_num(SPR_CTR);
|
||||
end if;
|
||||
if ppc_bc_taken(bo, bi, e_in.cr, e_in.read_data1) = 1 then
|
||||
if ppc_bc_taken(bo, bi, e_in.cr, a_in) = 1 then
|
||||
f_out.redirect <= '1';
|
||||
if (insn_aa(e_in.insn)) then
|
||||
f_out.redirect_nia <= std_ulogic_vector(signed(e_in.read_data2));
|
||||
f_out.redirect_nia <= std_ulogic_vector(signed(b_in));
|
||||
else
|
||||
f_out.redirect_nia <= std_ulogic_vector(signed(e_in.nia) + signed(e_in.read_data2));
|
||||
f_out.redirect_nia <= std_ulogic_vector(signed(e_in.nia) + signed(b_in));
|
||||
end if;
|
||||
end if;
|
||||
when OP_BCREG =>
|
||||
@@ -451,40 +460,40 @@ begin
|
||||
bo := insn_bo(e_in.insn);
|
||||
bi := insn_bi(e_in.insn);
|
||||
if bo(4-2) = '0' and e_in.insn(10) = '0' then
|
||||
result := std_ulogic_vector(unsigned(e_in.read_data1) - 1);
|
||||
result := std_ulogic_vector(unsigned(a_in) - 1);
|
||||
result_en := '1';
|
||||
v.e.write_reg := fast_spr_num(SPR_CTR);
|
||||
end if;
|
||||
if ppc_bc_taken(bo, bi, e_in.cr, e_in.read_data1) = 1 then
|
||||
if ppc_bc_taken(bo, bi, e_in.cr, a_in) = 1 then
|
||||
f_out.redirect <= '1';
|
||||
f_out.redirect_nia <= e_in.read_data2(63 downto 2) & "00";
|
||||
f_out.redirect_nia <= b_in(63 downto 2) & "00";
|
||||
end if;
|
||||
when OP_CMPB =>
|
||||
result := ppc_cmpb(e_in.read_data3, e_in.read_data2);
|
||||
result := ppc_cmpb(c_in, b_in);
|
||||
result_en := '1';
|
||||
when OP_CNTZ =>
|
||||
result := countzero_result;
|
||||
result_en := '1';
|
||||
when OP_EXTS =>
|
||||
-- note data_len is a 1-hot encoding
|
||||
negative := (e_in.data_len(0) and e_in.read_data3(7)) or
|
||||
(e_in.data_len(1) and e_in.read_data3(15)) or
|
||||
(e_in.data_len(2) and e_in.read_data3(31));
|
||||
negative := (e_in.data_len(0) and c_in(7)) or
|
||||
(e_in.data_len(1) and c_in(15)) or
|
||||
(e_in.data_len(2) and c_in(31));
|
||||
result := (others => negative);
|
||||
if e_in.data_len(2) = '1' then
|
||||
result(31 downto 16) := e_in.read_data3(31 downto 16);
|
||||
result(31 downto 16) := c_in(31 downto 16);
|
||||
end if;
|
||||
if e_in.data_len(2) = '1' or e_in.data_len(1) = '1' then
|
||||
result(15 downto 8) := e_in.read_data3(15 downto 8);
|
||||
result(15 downto 8) := c_in(15 downto 8);
|
||||
end if;
|
||||
result(7 downto 0) := e_in.read_data3(7 downto 0);
|
||||
result(7 downto 0) := c_in(7 downto 0);
|
||||
result_en := '1';
|
||||
when OP_ISEL =>
|
||||
crbit := to_integer(unsigned(insn_bc(e_in.insn)));
|
||||
if e_in.cr(31-crbit) = '1' then
|
||||
result := e_in.read_data1;
|
||||
result := a_in;
|
||||
else
|
||||
result := e_in.read_data2;
|
||||
result := b_in;
|
||||
end if;
|
||||
result_en := '1';
|
||||
when OP_MCRF =>
|
||||
@@ -549,7 +558,7 @@ begin
|
||||
end if;
|
||||
when OP_MFSPR =>
|
||||
if is_fast_spr(e_in.read_reg1) then
|
||||
result := e_in.read_data1;
|
||||
result := a_in;
|
||||
if decode_spr_num(e_in.insn) = SPR_XER then
|
||||
-- bits 0:31 and 35:43 are treated as reserved and return 0s when read using mfxer
|
||||
result(63 downto 32) := (others => '0');
|
||||
@@ -596,19 +605,19 @@ begin
|
||||
crnum := fxm_to_num(insn_fxm(e_in.insn));
|
||||
v.e.write_cr_mask := num_to_fxm(crnum);
|
||||
end if;
|
||||
v.e.write_cr_data := e_in.read_data3(31 downto 0);
|
||||
v.e.write_cr_data := c_in(31 downto 0);
|
||||
when OP_MTSPR =>
|
||||
report "MTSPR to SPR " & integer'image(decode_spr_num(e_in.insn)) &
|
||||
"=" & to_hstring(e_in.read_data3);
|
||||
"=" & to_hstring(c_in);
|
||||
if is_fast_spr(e_in.write_reg) then
|
||||
result := e_in.read_data3;
|
||||
result := c_in;
|
||||
result_en := '1';
|
||||
if decode_spr_num(e_in.insn) = SPR_XER then
|
||||
v.e.xerc.so := e_in.read_data3(63-32);
|
||||
v.e.xerc.ov := e_in.read_data3(63-33);
|
||||
v.e.xerc.ca := e_in.read_data3(63-34);
|
||||
v.e.xerc.ov32 := e_in.read_data3(63-44);
|
||||
v.e.xerc.ca32 := e_in.read_data3(63-45);
|
||||
v.e.xerc.so := c_in(63-32);
|
||||
v.e.xerc.ov := c_in(63-33);
|
||||
v.e.xerc.ca := c_in(63-34);
|
||||
v.e.xerc.ov32 := c_in(63-44);
|
||||
v.e.xerc.ca32 := c_in(63-45);
|
||||
v.e.write_xerc_enable := '1';
|
||||
end if;
|
||||
else
|
||||
|
||||
@@ -12,18 +12,21 @@ entity gpr_hazard is
|
||||
|
||||
gpr_write_valid_in : in std_ulogic;
|
||||
gpr_write_in : in std_ulogic_vector(5 downto 0);
|
||||
bypass_avail : in std_ulogic;
|
||||
gpr_read_valid_in : in std_ulogic;
|
||||
gpr_read_in : in std_ulogic_vector(5 downto 0);
|
||||
|
||||
stall_out : out std_ulogic
|
||||
stall_out : out std_ulogic;
|
||||
use_bypass : out std_ulogic
|
||||
);
|
||||
end entity gpr_hazard;
|
||||
architecture behaviour of gpr_hazard is
|
||||
type pipeline_entry_type is record
|
||||
valid : std_ulogic;
|
||||
gpr : std_ulogic_vector(5 downto 0);
|
||||
valid : std_ulogic;
|
||||
bypass : std_ulogic;
|
||||
gpr : std_ulogic_vector(5 downto 0);
|
||||
end record;
|
||||
constant pipeline_entry_init : pipeline_entry_type := (valid => '0', gpr => (others => '0'));
|
||||
constant pipeline_entry_init : pipeline_entry_type := (valid => '0', bypass => '0', gpr => (others => '0'));
|
||||
|
||||
type pipeline_t is array(0 to PIPELINE_DEPTH-1) of pipeline_entry_type;
|
||||
constant pipeline_t_init : pipeline_t := (others => pipeline_entry_init);
|
||||
@@ -33,9 +36,7 @@ begin
|
||||
gpr_hazard0: process(clk)
|
||||
begin
|
||||
if rising_edge(clk) then
|
||||
if stall_in = '0' then
|
||||
r <= rin;
|
||||
end if;
|
||||
r <= rin;
|
||||
end if;
|
||||
end process;
|
||||
|
||||
@@ -45,22 +46,49 @@ begin
|
||||
v := r;
|
||||
|
||||
stall_out <= '0';
|
||||
loop_0: for i in 0 to PIPELINE_DEPTH-1 loop
|
||||
if ((r(i).valid = gpr_read_valid_in) and r(i).gpr = gpr_read_in) then
|
||||
stall_out <= '1';
|
||||
use_bypass <= '0';
|
||||
if gpr_read_valid_in = '1' then
|
||||
if r(0).valid = '1' and r(0).gpr = gpr_read_in then
|
||||
if r(0).bypass = '1' and stall_in = '0' then
|
||||
use_bypass <= '1';
|
||||
else
|
||||
stall_out <= '1';
|
||||
end if;
|
||||
end if;
|
||||
end loop;
|
||||
loop_0: for i in 1 to PIPELINE_DEPTH-1 loop
|
||||
if r(i).valid = '1' and r(i).gpr = gpr_read_in then
|
||||
if r(i).bypass = '1' then
|
||||
use_bypass <= '1';
|
||||
else
|
||||
stall_out <= '1';
|
||||
end if;
|
||||
end if;
|
||||
end loop;
|
||||
end if;
|
||||
|
||||
v(0).valid := gpr_write_valid_in;
|
||||
v(0).gpr := gpr_write_in;
|
||||
loop_1: for i in 0 to PIPELINE_DEPTH-2 loop
|
||||
-- propagate to next slot
|
||||
v(i+1) := r(i);
|
||||
end loop;
|
||||
if stall_in = '0' then
|
||||
v(0).valid := gpr_write_valid_in;
|
||||
v(0).bypass := bypass_avail;
|
||||
v(0).gpr := gpr_write_in;
|
||||
loop_1: for i in 1 to PIPELINE_DEPTH-1 loop
|
||||
-- propagate to next slot
|
||||
v(i).valid := r(i-1).valid;
|
||||
v(i).bypass := r(i-1).bypass;
|
||||
v(i).gpr := r(i-1).gpr;
|
||||
end loop;
|
||||
|
||||
-- asynchronous output
|
||||
if gpr_read_valid_in = '0' then
|
||||
stall_out <= '0';
|
||||
else
|
||||
-- stage 0 stalled, so stage 1 becomes empty
|
||||
loop_1b: for i in 1 to PIPELINE_DEPTH-1 loop
|
||||
-- propagate to next slot
|
||||
if i = 1 then
|
||||
v(i).valid := '0';
|
||||
else
|
||||
v(i).valid := r(i-1).valid;
|
||||
v(i).bypass := r(i-1).bypass;
|
||||
v(i).gpr := r(i-1).gpr;
|
||||
end if;
|
||||
end loop;
|
||||
end if;
|
||||
|
||||
-- update registers
|
||||
|
||||
Reference in New Issue
Block a user