mirror of
https://github.com/antonblanchard/microwatt.git
synced 2026-03-30 19:05:04 +00:00
core: Use a busy signal rather than a stall
This changes the instruction dependency tracking so that we can generate a "busy" signal from execute1 and loadstore1 which comes along one cycle later than the current "stall" signal. This will enable us to signal busy cycles only when we need to from loadstore1. The "busy" signal from execute1/loadstore1 indicates "I didn't take the thing you gave me on this cycle", as distinct from the previous stall signal which meant "I took that but don't give me anything next cycle". That means that decode2 proactively gives execute1 a new instruction as soon as it has taken the previous one (assuming there is a valid instruction available from decode1), and that then sits in decode2's output until execute1 can take it. So instructions are issued by decode2 somewhat earlier than they used to be. Decode2 now only signals a stall upstream when its output buffer is full, meaning that we can fill up bubbles in the upstream pipe while a long instruction is executing. This gives a small boost in performance. This also adds dependency tracking for rA updates by update-form load/store instructions. The GPR and CR hazard detection machinery now has one extra stage, which may not be strictly necessary. Some of the code now really only applies to PIPELINE_DEPTH=1. Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
This commit is contained in:
@@ -244,6 +244,7 @@ package common is
|
||||
others => (others => '0'));
|
||||
|
||||
type Loadstore1ToExecute1Type is record
|
||||
busy : std_ulogic;
|
||||
exception : std_ulogic;
|
||||
invalid : std_ulogic;
|
||||
perm_error : std_ulogic;
|
||||
|
||||
61
control.vhdl
61
control.vhdl
@@ -15,7 +15,8 @@ entity control is
|
||||
complete_in : in std_ulogic;
|
||||
valid_in : in std_ulogic;
|
||||
flush_in : in std_ulogic;
|
||||
stall_in : in std_ulogic;
|
||||
busy_in : in std_ulogic;
|
||||
deferred : in std_ulogic;
|
||||
sgl_pipe_in : in std_ulogic;
|
||||
stop_mark_in : in std_ulogic;
|
||||
|
||||
@@ -23,6 +24,9 @@ entity control is
|
||||
gpr_write_in : in gspr_index_t;
|
||||
gpr_bypassable : in std_ulogic;
|
||||
|
||||
update_gpr_write_valid : in std_ulogic;
|
||||
update_gpr_write_reg : in gspr_index_t;
|
||||
|
||||
gpr_a_read_valid_in : in std_ulogic;
|
||||
gpr_a_read_in : in gspr_index_t;
|
||||
|
||||
@@ -72,7 +76,11 @@ begin
|
||||
)
|
||||
port map (
|
||||
clk => clk,
|
||||
stall_in => stall_in,
|
||||
busy_in => busy_in,
|
||||
deferred => deferred,
|
||||
complete_in => complete_in,
|
||||
flush_in => flush_in,
|
||||
issuing => valid_out,
|
||||
|
||||
gpr_write_valid_in => gpr_write_valid,
|
||||
gpr_write_in => gpr_write_in,
|
||||
@@ -80,6 +88,9 @@ begin
|
||||
gpr_read_valid_in => gpr_a_read_valid_in,
|
||||
gpr_read_in => gpr_a_read_in,
|
||||
|
||||
ugpr_write_valid => update_gpr_write_valid,
|
||||
ugpr_write_reg => update_gpr_write_reg,
|
||||
|
||||
stall_out => stall_a_out,
|
||||
use_bypass => gpr_bypass_a
|
||||
);
|
||||
@@ -90,7 +101,11 @@ begin
|
||||
)
|
||||
port map (
|
||||
clk => clk,
|
||||
stall_in => stall_in,
|
||||
busy_in => busy_in,
|
||||
deferred => deferred,
|
||||
complete_in => complete_in,
|
||||
flush_in => flush_in,
|
||||
issuing => valid_out,
|
||||
|
||||
gpr_write_valid_in => gpr_write_valid,
|
||||
gpr_write_in => gpr_write_in,
|
||||
@@ -98,6 +113,9 @@ begin
|
||||
gpr_read_valid_in => gpr_b_read_valid_in,
|
||||
gpr_read_in => gpr_b_read_in,
|
||||
|
||||
ugpr_write_valid => update_gpr_write_valid,
|
||||
ugpr_write_reg => update_gpr_write_reg,
|
||||
|
||||
stall_out => stall_b_out,
|
||||
use_bypass => gpr_bypass_b
|
||||
);
|
||||
@@ -110,7 +128,11 @@ begin
|
||||
)
|
||||
port map (
|
||||
clk => clk,
|
||||
stall_in => stall_in,
|
||||
busy_in => busy_in,
|
||||
deferred => deferred,
|
||||
complete_in => complete_in,
|
||||
flush_in => flush_in,
|
||||
issuing => valid_out,
|
||||
|
||||
gpr_write_valid_in => gpr_write_valid,
|
||||
gpr_write_in => gpr_write_in,
|
||||
@@ -118,6 +140,9 @@ begin
|
||||
gpr_read_valid_in => gpr_c_read_valid_in,
|
||||
gpr_read_in => gpr_c_read_in_fmt,
|
||||
|
||||
ugpr_write_valid => update_gpr_write_valid,
|
||||
ugpr_write_reg => update_gpr_write_reg,
|
||||
|
||||
stall_out => stall_c_out,
|
||||
use_bypass => gpr_bypass_c
|
||||
);
|
||||
@@ -128,7 +153,11 @@ begin
|
||||
)
|
||||
port map (
|
||||
clk => clk,
|
||||
stall_in => stall_in,
|
||||
busy_in => busy_in,
|
||||
deferred => deferred,
|
||||
complete_in => complete_in,
|
||||
flush_in => flush_in,
|
||||
issuing => valid_out,
|
||||
|
||||
cr_read_in => cr_read_in,
|
||||
cr_write_in => cr_write_valid,
|
||||
@@ -139,7 +168,8 @@ begin
|
||||
control0: process(clk)
|
||||
begin
|
||||
if rising_edge(clk) then
|
||||
assert r_int.outstanding >= 0 and r_int.outstanding <= (PIPELINE_DEPTH+1) report "Outstanding bad " & integer'image(r_int.outstanding) severity failure;
|
||||
assert rin_int.outstanding >= 0 and rin_int.outstanding <= (PIPELINE_DEPTH+1)
|
||||
report "Outstanding bad " & integer'image(rin_int.outstanding) severity failure;
|
||||
r_int <= rin_int;
|
||||
end if;
|
||||
end process;
|
||||
@@ -152,17 +182,18 @@ begin
|
||||
v_int := r_int;
|
||||
|
||||
-- asynchronous
|
||||
valid_tmp := valid_in and not flush_in and not stall_in;
|
||||
stall_tmp := stall_in;
|
||||
valid_tmp := valid_in and not flush_in;
|
||||
stall_tmp := '0';
|
||||
|
||||
if complete_in = '1' then
|
||||
if flush_in = '1' then
|
||||
-- expect to see complete_in next cycle
|
||||
v_int.outstanding := 1;
|
||||
elsif complete_in = '1' then
|
||||
v_int.outstanding := r_int.outstanding - 1;
|
||||
end if;
|
||||
|
||||
if rst = '1' then
|
||||
v_int.state := IDLE;
|
||||
v_int.outstanding := 0;
|
||||
stall_tmp := '0';
|
||||
v_int := reg_internal_init;
|
||||
valid_tmp := '0';
|
||||
end if;
|
||||
|
||||
@@ -227,7 +258,9 @@ begin
|
||||
end if;
|
||||
|
||||
if valid_tmp = '1' then
|
||||
v_int.outstanding := v_int.outstanding + 1;
|
||||
if deferred = '0' then
|
||||
v_int.outstanding := v_int.outstanding + 1;
|
||||
end if;
|
||||
gpr_write_valid <= gpr_write_valid_in;
|
||||
cr_write_valid <= cr_write_in;
|
||||
else
|
||||
@@ -237,7 +270,7 @@ begin
|
||||
|
||||
-- update outputs
|
||||
valid_out <= valid_tmp;
|
||||
stall_out <= stall_tmp;
|
||||
stall_out <= stall_tmp or deferred;
|
||||
|
||||
-- update registers
|
||||
rin_int <= v_int;
|
||||
|
||||
12
core.vhdl
12
core.vhdl
@@ -82,11 +82,10 @@ architecture behave of core is
|
||||
signal icache_stall_out : std_ulogic;
|
||||
signal icache_stall_in : std_ulogic;
|
||||
signal decode1_stall_in : std_ulogic;
|
||||
signal decode2_stall_in : std_ulogic;
|
||||
signal decode2_busy_in : std_ulogic;
|
||||
signal decode2_stall_out : std_ulogic;
|
||||
signal ex1_icache_inval: std_ulogic;
|
||||
signal ex1_stall_out: std_ulogic;
|
||||
signal ls1_stall_out: std_ulogic;
|
||||
signal ex1_busy_out: std_ulogic;
|
||||
signal dcache_stall_out: std_ulogic;
|
||||
|
||||
signal flush: std_ulogic;
|
||||
@@ -235,7 +234,7 @@ begin
|
||||
port map (
|
||||
clk => clk,
|
||||
rst => rst_dec2,
|
||||
stall_in => decode2_stall_in,
|
||||
busy_in => decode2_busy_in,
|
||||
stall_out => decode2_stall_out,
|
||||
flush_in => flush,
|
||||
complete_in => complete,
|
||||
@@ -248,7 +247,7 @@ begin
|
||||
c_out => decode2_to_cr_file,
|
||||
log_out => log_data(119 downto 110)
|
||||
);
|
||||
decode2_stall_in <= ex1_stall_out or ls1_stall_out;
|
||||
decode2_busy_in <= ex1_busy_out;
|
||||
|
||||
register_file_0: entity work.register_file
|
||||
generic map (
|
||||
@@ -289,7 +288,7 @@ begin
|
||||
clk => clk,
|
||||
rst => rst_ex1,
|
||||
flush_out => flush,
|
||||
stall_out => ex1_stall_out,
|
||||
busy_out => ex1_busy_out,
|
||||
e_in => decode2_to_execute1,
|
||||
l_in => loadstore1_to_execute1,
|
||||
ext_irq_in => ext_irq,
|
||||
@@ -317,7 +316,6 @@ begin
|
||||
m_out => loadstore1_to_mmu,
|
||||
m_in => mmu_to_loadstore1,
|
||||
dc_stall => dcache_stall_out,
|
||||
stall_out => ls1_stall_out,
|
||||
log_out => log_data(149 downto 140)
|
||||
);
|
||||
|
||||
|
||||
@@ -4,11 +4,15 @@ use ieee.numeric_std.all;
|
||||
|
||||
entity cr_hazard is
|
||||
generic (
|
||||
PIPELINE_DEPTH : natural := 2
|
||||
PIPELINE_DEPTH : natural := 1
|
||||
);
|
||||
port(
|
||||
clk : in std_ulogic;
|
||||
stall_in : in std_ulogic;
|
||||
busy_in : in std_ulogic;
|
||||
deferred : in std_ulogic;
|
||||
complete_in : in std_ulogic;
|
||||
flush_in : in std_ulogic;
|
||||
issuing : in std_ulogic;
|
||||
|
||||
cr_read_in : in std_ulogic;
|
||||
cr_write_in : in std_ulogic;
|
||||
@@ -22,7 +26,7 @@ architecture behaviour of cr_hazard is
|
||||
end record;
|
||||
constant pipeline_entry_init : pipeline_entry_type := (valid => '0');
|
||||
|
||||
type pipeline_t is array(0 to PIPELINE_DEPTH-1) of pipeline_entry_type;
|
||||
type pipeline_t is array(0 to PIPELINE_DEPTH) of pipeline_entry_type;
|
||||
constant pipeline_t_init : pipeline_t := (others => pipeline_entry_init);
|
||||
|
||||
signal r, rin : pipeline_t := pipeline_t_init;
|
||||
@@ -30,9 +34,7 @@ begin
|
||||
cr_hazard0: process(clk)
|
||||
begin
|
||||
if rising_edge(clk) then
|
||||
if stall_in = '0' then
|
||||
r <= rin;
|
||||
end if;
|
||||
r <= rin;
|
||||
end if;
|
||||
end process;
|
||||
|
||||
@@ -41,22 +43,23 @@ begin
|
||||
begin
|
||||
v := r;
|
||||
|
||||
stall_out <= '0';
|
||||
loop_0: for i in 0 to PIPELINE_DEPTH-1 loop
|
||||
if (r(i).valid = cr_read_in) then
|
||||
stall_out <= '1';
|
||||
end if;
|
||||
end loop;
|
||||
-- XXX assumes PIPELINE_DEPTH = 1
|
||||
if complete_in = '1' then
|
||||
v(1).valid := '0';
|
||||
end if;
|
||||
stall_out <= cr_read_in and (v(0).valid or v(1).valid);
|
||||
|
||||
v(0).valid := cr_write_in;
|
||||
loop_1: for i in 0 to PIPELINE_DEPTH-2 loop
|
||||
-- propagate to next slot
|
||||
v(i+1) := r(i);
|
||||
end loop;
|
||||
|
||||
-- asynchronous output
|
||||
if cr_read_in = '0' then
|
||||
stall_out <= '0';
|
||||
-- XXX assumes PIPELINE_DEPTH = 1
|
||||
if busy_in = '0' then
|
||||
v(1) := r(0);
|
||||
v(0).valid := '0';
|
||||
end if;
|
||||
if deferred = '0' and issuing = '1' then
|
||||
v(0).valid := cr_write_in;
|
||||
end if;
|
||||
if flush_in = '1' then
|
||||
v(0).valid := '0';
|
||||
v(1).valid := '0';
|
||||
end if;
|
||||
|
||||
-- update registers
|
||||
|
||||
27
decode2.vhdl
27
decode2.vhdl
@@ -17,7 +17,7 @@ entity decode2 is
|
||||
rst : in std_ulogic;
|
||||
|
||||
complete_in : in std_ulogic;
|
||||
stall_in : in std_ulogic;
|
||||
busy_in : in std_ulogic;
|
||||
stall_out : out std_ulogic;
|
||||
|
||||
stopped_out : out std_ulogic;
|
||||
@@ -45,6 +45,8 @@ architecture behaviour of decode2 is
|
||||
|
||||
signal r, rin : reg_type;
|
||||
|
||||
signal deferred : std_ulogic;
|
||||
|
||||
signal log_data : std_ulogic_vector(9 downto 0);
|
||||
|
||||
type decode_input_reg_t is record
|
||||
@@ -200,6 +202,9 @@ architecture behaviour of decode2 is
|
||||
signal gpr_write : gspr_index_t;
|
||||
signal gpr_bypassable : std_ulogic;
|
||||
|
||||
signal update_gpr_write_valid : std_ulogic;
|
||||
signal update_gpr_write_reg : gspr_index_t;
|
||||
|
||||
signal gpr_a_read_valid : std_ulogic;
|
||||
signal gpr_a_read :gspr_index_t;
|
||||
signal gpr_a_bypass : std_ulogic;
|
||||
@@ -224,7 +229,8 @@ begin
|
||||
|
||||
complete_in => complete_in,
|
||||
valid_in => control_valid_in,
|
||||
stall_in => stall_in,
|
||||
busy_in => busy_in,
|
||||
deferred => deferred,
|
||||
flush_in => flush_in,
|
||||
sgl_pipe_in => control_sgl_pipe,
|
||||
stop_mark_in => d_in.stop_mark,
|
||||
@@ -233,6 +239,9 @@ begin
|
||||
gpr_write_in => gpr_write,
|
||||
gpr_bypassable => gpr_bypassable,
|
||||
|
||||
update_gpr_write_valid => update_gpr_write_valid,
|
||||
update_gpr_write_reg => update_gpr_write_reg,
|
||||
|
||||
gpr_a_read_valid_in => gpr_a_read_valid,
|
||||
gpr_a_read_in => gpr_a_read,
|
||||
|
||||
@@ -254,13 +263,17 @@ begin
|
||||
gpr_bypass_c => gpr_c_bypass
|
||||
);
|
||||
|
||||
deferred <= r.e.valid and busy_in;
|
||||
|
||||
decode2_0: process(clk)
|
||||
begin
|
||||
if rising_edge(clk) then
|
||||
if rin.e.valid = '1' then
|
||||
report "execute " & to_hstring(rin.e.nia);
|
||||
if rst = '1' or flush_in = '1' or deferred = '0' then
|
||||
if rin.e.valid = '1' then
|
||||
report "execute " & to_hstring(rin.e.nia);
|
||||
end if;
|
||||
r <= rin;
|
||||
end if;
|
||||
r <= rin;
|
||||
end if;
|
||||
end process;
|
||||
|
||||
@@ -358,6 +371,8 @@ begin
|
||||
if EX1_BYPASS and d_in.decode.unit = ALU then
|
||||
gpr_bypassable <= '1';
|
||||
end if;
|
||||
update_gpr_write_valid <= d_in.decode.update;
|
||||
update_gpr_write_reg <= decoded_reg_a.reg;
|
||||
|
||||
gpr_a_read_valid <= decoded_reg_a.reg_valid;
|
||||
gpr_a_read <= decoded_reg_a.reg;
|
||||
@@ -375,7 +390,7 @@ begin
|
||||
v.e.insn_type := OP_ILLEGAL;
|
||||
end if;
|
||||
|
||||
if rst = '1' then
|
||||
if rst = '1' or flush_in = '1' then
|
||||
v.e := Decode2ToExecute1Init;
|
||||
end if;
|
||||
|
||||
|
||||
@@ -20,7 +20,7 @@ entity execute1 is
|
||||
|
||||
-- asynchronous
|
||||
flush_out : out std_ulogic;
|
||||
stall_out : out std_ulogic;
|
||||
busy_out : out std_ulogic;
|
||||
|
||||
e_in : in Decode2ToExecute1Type;
|
||||
l_in : in Loadstore1ToExecute1Type;
|
||||
@@ -48,6 +48,8 @@ end entity execute1;
|
||||
architecture behaviour of execute1 is
|
||||
type reg_type is record
|
||||
e : Execute1ToWritebackType;
|
||||
busy: std_ulogic;
|
||||
terminate: std_ulogic;
|
||||
lr_update : std_ulogic;
|
||||
next_lr : std_ulogic_vector(63 downto 0);
|
||||
mul_in_progress : std_ulogic;
|
||||
@@ -62,7 +64,7 @@ architecture behaviour of execute1 is
|
||||
log_addr_spr : std_ulogic_vector(31 downto 0);
|
||||
end record;
|
||||
constant reg_type_init : reg_type :=
|
||||
(e => Execute1ToWritebackInit, lr_update => '0',
|
||||
(e => Execute1ToWritebackInit, busy => '0', lr_update => '0', terminate => '0',
|
||||
mul_in_progress => '0', div_in_progress => '0', cntz_in_progress => '0',
|
||||
slow_op_insn => OP_ILLEGAL, slow_op_rc => '0', slow_op_oe => '0', slow_op_xerc => xerc_init,
|
||||
next_lr => (others => '0'), ldst_nia => (others => '0'), others => (others => '0'));
|
||||
@@ -71,6 +73,7 @@ architecture behaviour of execute1 is
|
||||
|
||||
signal a_in, b_in, c_in : std_ulogic_vector(63 downto 0);
|
||||
|
||||
signal valid_in : std_ulogic;
|
||||
signal ctrl: ctrl_t := (irq_state => WRITE_SRR0, others => (others => '0'));
|
||||
signal ctrl_tmp: ctrl_t := (irq_state => WRITE_SRR0, others => (others => '0'));
|
||||
signal right_shift, rot_clear_left, rot_clear_right: std_ulogic;
|
||||
@@ -241,6 +244,11 @@ begin
|
||||
b_in <= r.e.write_data when EX1_BYPASS and e_in.bypass_data2 = '1' else e_in.read_data2;
|
||||
c_in <= r.e.write_data when EX1_BYPASS and e_in.bypass_data3 = '1' else e_in.read_data3;
|
||||
|
||||
busy_out <= l_in.busy or r.busy;
|
||||
valid_in <= e_in.valid and not busy_out;
|
||||
|
||||
terminate_out <= r.terminate;
|
||||
|
||||
execute1_0: process(clk)
|
||||
begin
|
||||
if rising_edge(clk) then
|
||||
@@ -251,7 +259,7 @@ begin
|
||||
else
|
||||
r <= rin;
|
||||
ctrl <= ctrl_tmp;
|
||||
assert not (r.lr_update = '1' and e_in.valid = '1')
|
||||
assert not (r.lr_update = '1' and valid_in = '1')
|
||||
report "LR update collision with valid in EX1"
|
||||
severity failure;
|
||||
if r.lr_update = '1' then
|
||||
@@ -423,9 +431,9 @@ begin
|
||||
end if;
|
||||
end if;
|
||||
|
||||
terminate_out <= '0';
|
||||
v.terminate := '0';
|
||||
icache_inval <= '0';
|
||||
stall_out <= '0';
|
||||
v.busy := '0';
|
||||
f_out <= Execute1ToFetch1TypeInit;
|
||||
-- send MSR[IR] and ~MSR[PR] up to fetch1
|
||||
f_out.virt_mode <= ctrl.msr(MSR_IR);
|
||||
@@ -463,10 +471,10 @@ begin
|
||||
f_out.virt_mode <= '0';
|
||||
f_out.priv_mode <= '1';
|
||||
f_out.redirect_nia <= ctrl.irq_nia;
|
||||
v.e.valid := e_in.valid;
|
||||
v.e.valid := '1';
|
||||
report "Writing SRR1: " & to_hstring(ctrl.srr1);
|
||||
|
||||
elsif irq_valid = '1' and e_in.valid = '1' then
|
||||
elsif irq_valid = '1' and valid_in = '1' then
|
||||
-- we need two cycles to write srr0 and 1
|
||||
-- will need more when we have to write HEIR
|
||||
-- Don't deliver the interrupt until we have a valid instruction
|
||||
@@ -474,7 +482,7 @@ begin
|
||||
exception := '1';
|
||||
ctrl_tmp.srr1 <= msr_copy(ctrl.msr);
|
||||
|
||||
elsif e_in.valid = '1' and ctrl.msr(MSR_PR) = '1' and
|
||||
elsif valid_in = '1' and ctrl.msr(MSR_PR) = '1' and
|
||||
instr_is_privileged(e_in.insn_type, e_in.insn) then
|
||||
-- generate a program interrupt
|
||||
exception := '1';
|
||||
@@ -484,7 +492,7 @@ begin
|
||||
ctrl_tmp.srr1(63 - 45) <= '1';
|
||||
report "privileged instruction";
|
||||
|
||||
elsif e_in.valid = '1' and e_in.unit = ALU then
|
||||
elsif valid_in = '1' and e_in.unit = ALU then
|
||||
|
||||
report "execute nia " & to_hstring(e_in.nia);
|
||||
|
||||
@@ -519,7 +527,7 @@ begin
|
||||
-- check bits 1-10 of the instruction to make sure it's attn
|
||||
-- if not then it is illegal
|
||||
if e_in.insn(10 downto 1) = "0100000000" then
|
||||
terminate_out <= '1';
|
||||
v.terminate := '1';
|
||||
report "ATTN";
|
||||
else
|
||||
illegal := '1';
|
||||
@@ -674,7 +682,7 @@ begin
|
||||
when OP_CNTZ =>
|
||||
v.e.valid := '0';
|
||||
v.cntz_in_progress := '1';
|
||||
stall_out <= '1';
|
||||
v.busy := '1';
|
||||
when OP_EXTS =>
|
||||
-- note data_len is a 1-hot encoding
|
||||
negative := (e_in.data_len(0) and c_in(7)) or
|
||||
@@ -876,21 +884,21 @@ begin
|
||||
when OP_MUL_L64 | OP_MUL_H64 | OP_MUL_H32 =>
|
||||
v.e.valid := '0';
|
||||
v.mul_in_progress := '1';
|
||||
stall_out <= '1';
|
||||
v.busy := '1';
|
||||
x_to_multiply.valid <= '1';
|
||||
|
||||
when OP_DIV | OP_DIVE | OP_MOD =>
|
||||
v.e.valid := '0';
|
||||
v.div_in_progress := '1';
|
||||
stall_out <= '1';
|
||||
v.busy := '1';
|
||||
x_to_divider.valid <= '1';
|
||||
|
||||
when others =>
|
||||
terminate_out <= '1';
|
||||
v.terminate := '1';
|
||||
report "illegal";
|
||||
end case;
|
||||
|
||||
v.e.rc := e_in.rc and e_in.valid;
|
||||
v.e.rc := e_in.rc and valid_in;
|
||||
|
||||
-- Update LR on the next cycle after a branch link
|
||||
--
|
||||
@@ -908,10 +916,10 @@ begin
|
||||
v.next_lr := next_nia;
|
||||
v.e.valid := '0';
|
||||
report "Delayed LR update to " & to_hstring(next_nia);
|
||||
stall_out <= '1';
|
||||
v.busy := '1';
|
||||
end if;
|
||||
|
||||
elsif e_in.valid = '1' then
|
||||
elsif valid_in = '1' then
|
||||
-- instruction for other units, i.e. LDST
|
||||
v.ldst_nia := e_in.nia;
|
||||
v.e.valid := '0';
|
||||
@@ -967,7 +975,7 @@ begin
|
||||
end if;
|
||||
v.e.valid := '1';
|
||||
else
|
||||
stall_out <= '1';
|
||||
v.busy := '1';
|
||||
v.mul_in_progress := r.mul_in_progress;
|
||||
v.div_in_progress := r.div_in_progress;
|
||||
end if;
|
||||
@@ -988,7 +996,8 @@ begin
|
||||
v.e.exc_write_data := next_nia;
|
||||
end if;
|
||||
ctrl_tmp.irq_state <= WRITE_SRR1;
|
||||
v.e.valid := '1';
|
||||
v.busy := '1';
|
||||
v.e.valid := '0';
|
||||
end if;
|
||||
|
||||
v.e.write_data := result;
|
||||
@@ -1020,7 +1029,6 @@ begin
|
||||
v.e.exc_write_data := r.ldst_nia;
|
||||
report "ldst exception writing srr0=" & to_hstring(r.ldst_nia);
|
||||
ctrl_tmp.irq_state <= WRITE_SRR1;
|
||||
v.e.valid := '1'; -- complete the original load or store
|
||||
end if;
|
||||
|
||||
-- Outputs to loadstore1 (async)
|
||||
@@ -1072,7 +1080,7 @@ begin
|
||||
r.e.write_enable &
|
||||
r.e.valid &
|
||||
f_out.redirect &
|
||||
stall_out &
|
||||
r.busy &
|
||||
flush_out;
|
||||
end if;
|
||||
end process;
|
||||
|
||||
@@ -4,11 +4,15 @@ use ieee.numeric_std.all;
|
||||
|
||||
entity gpr_hazard is
|
||||
generic (
|
||||
PIPELINE_DEPTH : natural := 2
|
||||
PIPELINE_DEPTH : natural := 1
|
||||
);
|
||||
port(
|
||||
clk : in std_ulogic;
|
||||
stall_in : in std_ulogic;
|
||||
busy_in : in std_ulogic;
|
||||
deferred : in std_ulogic;
|
||||
complete_in : in std_ulogic;
|
||||
flush_in : in std_ulogic;
|
||||
issuing : in std_ulogic;
|
||||
|
||||
gpr_write_valid_in : in std_ulogic;
|
||||
gpr_write_in : in std_ulogic_vector(5 downto 0);
|
||||
@@ -16,6 +20,9 @@ entity gpr_hazard is
|
||||
gpr_read_valid_in : in std_ulogic;
|
||||
gpr_read_in : in std_ulogic_vector(5 downto 0);
|
||||
|
||||
ugpr_write_valid : in std_ulogic;
|
||||
ugpr_write_reg : in std_ulogic_vector(5 downto 0);
|
||||
|
||||
stall_out : out std_ulogic;
|
||||
use_bypass : out std_ulogic
|
||||
);
|
||||
@@ -25,10 +32,13 @@ architecture behaviour of gpr_hazard is
|
||||
valid : std_ulogic;
|
||||
bypass : std_ulogic;
|
||||
gpr : std_ulogic_vector(5 downto 0);
|
||||
ugpr_valid : std_ulogic;
|
||||
ugpr : std_ulogic_vector(5 downto 0);
|
||||
end record;
|
||||
constant pipeline_entry_init : pipeline_entry_type := (valid => '0', bypass => '0', gpr => (others => '0'));
|
||||
constant pipeline_entry_init : pipeline_entry_type := (valid => '0', bypass => '0', gpr => (others => '0'),
|
||||
ugpr_valid => '0', ugpr => (others => '0'));
|
||||
|
||||
type pipeline_t is array(0 to PIPELINE_DEPTH-1) of pipeline_entry_type;
|
||||
type pipeline_t is array(0 to PIPELINE_DEPTH) of pipeline_entry_type;
|
||||
constant pipeline_t_init : pipeline_t := (others => pipeline_entry_init);
|
||||
|
||||
signal r, rin : pipeline_t := pipeline_t_init;
|
||||
@@ -45,50 +55,46 @@ begin
|
||||
begin
|
||||
v := r;
|
||||
|
||||
if complete_in = '1' then
|
||||
v(PIPELINE_DEPTH).valid := '0';
|
||||
v(PIPELINE_DEPTH).ugpr_valid := '0';
|
||||
end if;
|
||||
|
||||
stall_out <= '0';
|
||||
use_bypass <= '0';
|
||||
if gpr_read_valid_in = '1' then
|
||||
if r(0).valid = '1' and r(0).gpr = gpr_read_in then
|
||||
if r(0).bypass = '1' and stall_in = '0' then
|
||||
use_bypass <= '1';
|
||||
else
|
||||
stall_out <= '1';
|
||||
end if;
|
||||
end if;
|
||||
loop_0: for i in 1 to PIPELINE_DEPTH-1 loop
|
||||
if r(i).valid = '1' and r(i).gpr = gpr_read_in then
|
||||
loop_0: for i in 0 to PIPELINE_DEPTH loop
|
||||
if v(i).valid = '1' and r(i).gpr = gpr_read_in then
|
||||
if r(i).bypass = '1' then
|
||||
use_bypass <= '1';
|
||||
else
|
||||
stall_out <= '1';
|
||||
end if;
|
||||
end if;
|
||||
if v(i).ugpr_valid = '1' and r(i).ugpr = gpr_read_in then
|
||||
stall_out <= '1';
|
||||
end if;
|
||||
end loop;
|
||||
end if;
|
||||
|
||||
if stall_in = '0' then
|
||||
-- XXX assumes PIPELINE_DEPTH = 1
|
||||
if busy_in = '0' then
|
||||
v(1) := v(0);
|
||||
v(0).valid := '0';
|
||||
v(0).ugpr_valid := '0';
|
||||
end if;
|
||||
if deferred = '0' and issuing = '1' then
|
||||
v(0).valid := gpr_write_valid_in;
|
||||
v(0).bypass := bypass_avail;
|
||||
v(0).gpr := gpr_write_in;
|
||||
loop_1: for i in 1 to PIPELINE_DEPTH-1 loop
|
||||
-- propagate to next slot
|
||||
v(i).valid := r(i-1).valid;
|
||||
v(i).bypass := r(i-1).bypass;
|
||||
v(i).gpr := r(i-1).gpr;
|
||||
end loop;
|
||||
|
||||
else
|
||||
-- stage 0 stalled, so stage 1 becomes empty
|
||||
loop_1b: for i in 1 to PIPELINE_DEPTH-1 loop
|
||||
-- propagate to next slot
|
||||
if i = 1 then
|
||||
v(i).valid := '0';
|
||||
else
|
||||
v(i).valid := r(i-1).valid;
|
||||
v(i).bypass := r(i-1).bypass;
|
||||
v(i).gpr := r(i-1).gpr;
|
||||
end if;
|
||||
end loop;
|
||||
v(0).ugpr_valid := ugpr_write_valid;
|
||||
v(0).ugpr := ugpr_write_reg;
|
||||
end if;
|
||||
if flush_in = '1' then
|
||||
v(0).valid := '0';
|
||||
v(0).ugpr_valid := '0';
|
||||
v(1).valid := '0';
|
||||
v(1).ugpr_valid := '0';
|
||||
end if;
|
||||
|
||||
-- update registers
|
||||
|
||||
@@ -25,7 +25,6 @@ entity loadstore1 is
|
||||
m_in : in MmuToLoadstore1Type;
|
||||
|
||||
dc_stall : in std_ulogic;
|
||||
stall_out : out std_ulogic;
|
||||
|
||||
log_out : out std_ulogic_vector(9 downto 0)
|
||||
);
|
||||
@@ -47,6 +46,7 @@ architecture behave of loadstore1 is
|
||||
);
|
||||
|
||||
type reg_stage_t is record
|
||||
busy : std_ulogic;
|
||||
-- latch most of the input request
|
||||
load : std_ulogic;
|
||||
tlbie : std_ulogic;
|
||||
@@ -123,6 +123,7 @@ begin
|
||||
if rising_edge(clk) then
|
||||
if rst = '1' then
|
||||
r.state <= IDLE;
|
||||
r.busy <= '0';
|
||||
else
|
||||
r <= rin;
|
||||
end if;
|
||||
@@ -499,6 +500,7 @@ begin
|
||||
l_out.store_done <= d_in.store_done;
|
||||
|
||||
-- update exception info back to execute1
|
||||
e_out.busy <= r.busy;
|
||||
e_out.exception <= exception;
|
||||
e_out.instr_fault <= r.instr_fault;
|
||||
e_out.invalid <= m_in.invalid;
|
||||
@@ -513,7 +515,7 @@ begin
|
||||
end if;
|
||||
end if;
|
||||
|
||||
stall_out <= stall;
|
||||
v.busy := stall;
|
||||
|
||||
-- Update registers
|
||||
rin <= v;
|
||||
@@ -523,7 +525,7 @@ begin
|
||||
ls1_log: process(clk)
|
||||
begin
|
||||
if rising_edge(clk) then
|
||||
log_data <= stall_out &
|
||||
log_data <= r.busy &
|
||||
e_out.exception &
|
||||
l_out.valid &
|
||||
m_out.valid &
|
||||
|
||||
@@ -22,27 +22,33 @@ end entity writeback;
|
||||
|
||||
architecture behaviour of writeback is
|
||||
begin
|
||||
writeback_1: process(all)
|
||||
writeback_0: process(clk)
|
||||
variable x : std_ulogic_vector(0 downto 0);
|
||||
variable y : std_ulogic_vector(0 downto 0);
|
||||
variable w : std_ulogic_vector(0 downto 0);
|
||||
begin
|
||||
if rising_edge(clk) then
|
||||
-- Do consistency checks only on the clock edge
|
||||
x(0) := e_in.valid;
|
||||
y(0) := l_in.valid;
|
||||
assert (to_integer(unsigned(x)) + to_integer(unsigned(y))) <= 1 severity failure;
|
||||
|
||||
x(0) := e_in.write_enable or e_in.exc_write_enable;
|
||||
y(0) := l_in.write_enable;
|
||||
assert (to_integer(unsigned(x)) + to_integer(unsigned(y))) <= 1 severity failure;
|
||||
|
||||
w(0) := e_in.write_cr_enable;
|
||||
x(0) := (e_in.write_enable and e_in.rc);
|
||||
assert (to_integer(unsigned(w)) + to_integer(unsigned(x))) <= 1 severity failure;
|
||||
end if;
|
||||
end process;
|
||||
|
||||
writeback_1: process(all)
|
||||
variable cf: std_ulogic_vector(3 downto 0);
|
||||
variable zero : std_ulogic;
|
||||
variable sign : std_ulogic;
|
||||
variable scf : std_ulogic_vector(3 downto 0);
|
||||
begin
|
||||
x(0) := e_in.valid;
|
||||
y(0) := l_in.valid;
|
||||
assert (to_integer(unsigned(x)) + to_integer(unsigned(y))) <= 1 severity failure;
|
||||
|
||||
x(0) := e_in.write_enable or e_in.exc_write_enable;
|
||||
y(0) := l_in.write_enable;
|
||||
assert (to_integer(unsigned(x)) + to_integer(unsigned(y))) <= 1 severity failure;
|
||||
|
||||
w(0) := e_in.write_cr_enable;
|
||||
x(0) := (e_in.write_enable and e_in.rc);
|
||||
assert (to_integer(unsigned(w)) + to_integer(unsigned(x))) <= 1 severity failure;
|
||||
|
||||
w_out <= WritebackToRegisterFileInit;
|
||||
c_out <= WritebackToCrFileInit;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user