mirror of
https://github.com/antonblanchard/microwatt.git
synced 2026-02-26 16:53:16 +00:00
core: Allow multiple loadstore instructions to be in flight
The idea here is that we can have multiple instructions in progress at the same time as long as they all go to the same unit, because that unit will keep them in order. If we get an instruction for a different unit, we wait for all the previous instructions to finish before executing it. Since the loadstore unit is the only one that is currently pipelined, this boils down to saying that loadstore instructions can go ahead while l_in.in_progress = 1 but other instructions have to wait until it is 0. This gives a 2% increase on coremark performance on the Arty A7-100 (from ~190 to ~194). Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
This commit is contained in:
@@ -368,6 +368,7 @@ package common is
|
||||
|
||||
type Loadstore1ToExecute1Type is record
|
||||
busy : std_ulogic;
|
||||
in_progress : std_ulogic;
|
||||
end record;
|
||||
|
||||
type Loadstore1ToDcacheType is record
|
||||
|
||||
@@ -7,7 +7,7 @@ use work.common.all;
|
||||
entity control is
|
||||
generic (
|
||||
EX1_BYPASS : boolean := true;
|
||||
PIPELINE_DEPTH : natural := 2
|
||||
PIPELINE_DEPTH : natural := 3
|
||||
);
|
||||
port (
|
||||
clk : in std_ulogic;
|
||||
@@ -239,6 +239,10 @@ begin
|
||||
elsif complete_in.valid = '1' then
|
||||
v_int.outstanding := r_int.outstanding - 1;
|
||||
end if;
|
||||
if r_int.outstanding >= PIPELINE_DEPTH + 1 then
|
||||
valid_tmp := '0';
|
||||
stall_tmp := '1';
|
||||
end if;
|
||||
|
||||
if rst = '1' then
|
||||
v_int := reg_internal_init;
|
||||
|
||||
@@ -300,8 +300,7 @@ architecture behaviour of decode2 is
|
||||
begin
|
||||
control_0: entity work.control
|
||||
generic map (
|
||||
EX1_BYPASS => EX1_BYPASS,
|
||||
PIPELINE_DEPTH => 1
|
||||
EX1_BYPASS => EX1_BYPASS
|
||||
)
|
||||
port map (
|
||||
clk => clk,
|
||||
|
||||
@@ -293,7 +293,10 @@ begin
|
||||
-- writeback.
|
||||
xerc_in <= r.e.xerc when r.e.write_xerc_enable = '1' or r.busy = '1' else e_in.xerc;
|
||||
|
||||
busy_out <= l_in.busy or r.busy or fp_in.busy;
|
||||
with e_in.unit select busy_out <=
|
||||
l_in.busy or r.busy or fp_in.busy when LDST,
|
||||
l_in.busy or l_in.in_progress or r.busy or fp_in.busy when others;
|
||||
|
||||
valid_in <= e_in.valid and not busy_out and not flush_in;
|
||||
|
||||
terminate_out <= r.terminate;
|
||||
@@ -744,7 +747,7 @@ begin
|
||||
|
||||
-- Determine if there is any exception to be taken
|
||||
-- before/instead of executing this instruction
|
||||
if valid_in = '1' and e_in.second = '0' then
|
||||
if valid_in = '1' and e_in.second = '0' and l_in.in_progress = '0' then
|
||||
if HAS_FPU and r.fp_exception_next = '1' then
|
||||
-- This is used for FP-type program interrupts that
|
||||
-- become pending due to MSR[FE0,FE1] changing from 00 to non-zero.
|
||||
|
||||
@@ -155,6 +155,7 @@ architecture behave of loadstore1 is
|
||||
|
||||
signal busy : std_ulogic;
|
||||
signal complete : std_ulogic;
|
||||
signal in_progress : std_ulogic;
|
||||
signal flushing : std_ulogic;
|
||||
|
||||
signal store_sp_data : std_ulogic_vector(31 downto 0);
|
||||
@@ -494,13 +495,13 @@ begin
|
||||
req_in <= v;
|
||||
end process;
|
||||
|
||||
--busy <= r1.req.valid and ((r1.req.dc_req and not r1.issued) or
|
||||
-- (r1.issued and d_in.error) or
|
||||
-- stage2_busy_next or
|
||||
-- (r1.req.dc_req and r1.req.two_dwords and not r1.req.dword_index));
|
||||
busy <= r1.req.valid and ((r1.req.dc_req and not r1.issued) or
|
||||
(r1.issued and d_in.error) or
|
||||
stage2_busy_next or
|
||||
(r1.req.dc_req and r1.req.two_dwords and not r1.req.dword_index));
|
||||
complete <= r2.one_cycle or (r2.wait_dc and d_in.valid) or
|
||||
(r2.wait_mmu and m_in.done) or r3.convert_lfs;
|
||||
busy <= r1.req.valid or (r2.req.valid and not complete);
|
||||
in_progress <= r1.req.valid or (r2.req.valid and not complete);
|
||||
|
||||
stage1_issue_enable <= r3.stage1_en and not (r1.req.valid and r1.req.mmu_op) and
|
||||
not (r2.req.valid and r2.req.mmu_op);
|
||||
@@ -940,6 +941,7 @@ begin
|
||||
|
||||
-- update busy signal back to execute1
|
||||
e_out.busy <= busy;
|
||||
e_out.in_progress <= in_progress;
|
||||
|
||||
-- Busy calculation.
|
||||
stage3_busy_next <= r2.req.valid and not (complete or part_done or exception);
|
||||
|
||||
Reference in New Issue
Block a user