mirror of
https://github.com/antonblanchard/microwatt.git
synced 2026-04-13 23:23:55 +00:00
core: Crack branches that update both CTR and LR
This uses the instruction doubling machinery to convert conditional branch instructions that update both CTR and LR (e.g., bdnzl, bdnzlrl) into two instructions, of which the first updates CTR and determines whether the branch is taken, and the second updates LR and does the redirect if necessary. Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
This commit is contained in:
12
common.vhdl
12
common.vhdl
@@ -176,14 +176,15 @@ package common is
|
||||
insn: std_ulogic_vector(31 downto 0);
|
||||
ispr1: gspr_index_t; -- (G)SPR used for branch condition (CTR) or mfspr
|
||||
ispr2: gspr_index_t; -- (G)SPR used for branch target (CTR, LR, TAR)
|
||||
ispro: gspr_index_t; -- (G)SPR written with LR or CTR
|
||||
decode: decode_rom_t;
|
||||
br_pred: std_ulogic; -- Branch was predicted to be taken
|
||||
big_endian: std_ulogic;
|
||||
end record;
|
||||
constant Decode1ToDecode2Init : Decode1ToDecode2Type :=
|
||||
(valid => '0', stop_mark => '0', nia => (others => '0'), insn => (others => '0'),
|
||||
ispr1 => (others => '0'), ispr2 => (others => '0'), decode => decode_rom_init,
|
||||
br_pred => '0', big_endian => '0');
|
||||
ispr1 => (others => '0'), ispr2 => (others => '0'), ispro => (others => '0'),
|
||||
decode => decode_rom_init, br_pred => '0', big_endian => '0');
|
||||
|
||||
type Decode1ToFetch1Type is record
|
||||
redirect : std_ulogic;
|
||||
@@ -210,6 +211,7 @@ package common is
|
||||
bypass_cr : std_ulogic;
|
||||
xerc: xer_common_t;
|
||||
lr: std_ulogic;
|
||||
br_abs: std_ulogic;
|
||||
rc: std_ulogic;
|
||||
oe: std_ulogic;
|
||||
invert_a: std_ulogic;
|
||||
@@ -236,7 +238,7 @@ package common is
|
||||
constant Decode2ToExecute1Init : Decode2ToExecute1Type :=
|
||||
(valid => '0', unit => NONE, fac => NONE, insn_type => OP_ILLEGAL,
|
||||
write_reg_enable => '0', bypass_data1 => '0', bypass_data2 => '0', bypass_data3 => '0',
|
||||
bypass_cr => '0', lr => '0', rc => '0', oe => '0', invert_a => '0', addm1 => '0',
|
||||
bypass_cr => '0', lr => '0', br_abs => '0', rc => '0', oe => '0', invert_a => '0', addm1 => '0',
|
||||
invert_out => '0', input_carry => ZERO, output_carry => '0', input_cr => '0', output_cr => '0',
|
||||
is_32bit => '0', is_signed => '0', xerc => xerc_init, reserve => '0', br_pred => '0',
|
||||
byte_reverse => '0', sign_extend => '0', update => '0', nia => (others => '0'),
|
||||
@@ -552,9 +554,9 @@ package body common is
|
||||
begin
|
||||
case spr is
|
||||
when SPR_LR =>
|
||||
n := 0;
|
||||
n := 0; -- N.B. decode2 relies on this specific value
|
||||
when SPR_CTR =>
|
||||
n:= 1;
|
||||
n := 1; -- N.B. decode2 relies on this specific value
|
||||
when SPR_SRR0 =>
|
||||
n := 2;
|
||||
when SPR_SRR1 =>
|
||||
|
||||
17
decode1.vhdl
17
decode1.vhdl
@@ -79,7 +79,7 @@ architecture behaviour of decode1 is
|
||||
28 => (ALU, NONE, OP_AND, NONE, CONST_UI, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', ONE, '0', '0', NONE), -- andi.
|
||||
29 => (ALU, NONE, OP_AND, NONE, CONST_UI_HI, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', ONE, '0', '0', NONE), -- andis.
|
||||
0 => (ALU, NONE, OP_ATTN, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1', NONE), -- attn
|
||||
18 => (ALU, NONE, OP_B, NONE, CONST_LI, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '1', '0', NONE), -- b
|
||||
18 => (ALU, NONE, OP_B, NONE, CONST_LI, NONE, SPR, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '1', '0', NONE), -- b
|
||||
16 => (ALU, NONE, OP_BC, SPR, CONST_BD, NONE, SPR , '1', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '1', '0', NONE), -- bc
|
||||
11 => (ALU, NONE, OP_CMP, RA, CONST_SI, NONE, NONE, '0', '1', '1', '0', ONE, '0', NONE, '0', '0', '0', '0', '0', '1', NONE, '0', '0', NONE), -- cmpi
|
||||
10 => (ALU, NONE, OP_CMP, RA, CONST_UI, NONE, NONE, '0', '1', '1', '0', ONE, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- cmpli
|
||||
@@ -597,9 +597,10 @@ begin
|
||||
-- major opcode 31, lots of things
|
||||
v.decode := decode_op_31_array(to_integer(unsigned(f_in.insn(10 downto 1))));
|
||||
|
||||
-- Work out ispr1/ispr2 independent of v.decode since they seem to be critical path
|
||||
-- Work out ispr1/ispro independent of v.decode since they seem to be critical path
|
||||
sprn := decode_spr_num(f_in.insn);
|
||||
v.ispr1 := fast_spr_num(sprn);
|
||||
v.ispro := fast_spr_num(sprn);
|
||||
|
||||
if std_match(f_in.insn(10 downto 1), "01-1010011") then
|
||||
-- mfspr or mtspr
|
||||
@@ -627,6 +628,9 @@ begin
|
||||
-- CTR may be needed as input to bc
|
||||
if f_in.insn(23) = '0' then
|
||||
v.ispr1 := fast_spr_num(SPR_CTR);
|
||||
v.ispro := fast_spr_num(SPR_CTR);
|
||||
elsif f_in.insn(0) = '1' then
|
||||
v.ispro := fast_spr_num(SPR_LR);
|
||||
end if;
|
||||
-- Predict backward branches as taken, forward as untaken
|
||||
v.br_pred := f_in.insn(15);
|
||||
@@ -636,6 +640,9 @@ begin
|
||||
-- Unconditional branches are always taken
|
||||
v.br_pred := '1';
|
||||
br_offset := signed(f_in.insn(25 downto 2));
|
||||
if f_in.insn(0) = '1' then
|
||||
v.ispro := fast_spr_num(SPR_LR);
|
||||
end if;
|
||||
|
||||
when 19 =>
|
||||
vi.override := not decode_op_19_valid(to_integer(unsigned(f_in.insn(5 downto 1) & f_in.insn(10 downto 6))));
|
||||
@@ -648,8 +655,12 @@ begin
|
||||
-- Branch uses CTR as condition when BO(2) is 0. This is
|
||||
-- also used to indicate that CTR is modified (they go
|
||||
-- together).
|
||||
if f_in.insn(23) = '0' then
|
||||
-- bcctr doesn't update CTR or use it in the branch condition
|
||||
if f_in.insn(23) = '0' and (f_in.insn(10) = '0' or f_in.insn(6) = '1') then
|
||||
v.ispr1 := fast_spr_num(SPR_CTR);
|
||||
v.ispro := fast_spr_num(SPR_CTR);
|
||||
elsif f_in.insn(0) = '1' then
|
||||
v.ispro := fast_spr_num(SPR_LR);
|
||||
end if;
|
||||
if f_in.insn(10) = '0' then
|
||||
v.ispr2 := fast_spr_num(SPR_LR);
|
||||
|
||||
55
decode2.vhdl
55
decode2.vhdl
@@ -249,6 +249,9 @@ architecture behaviour of decode2 is
|
||||
OP_MOD => "011",
|
||||
OP_CNTZ => "100", -- countzero_result
|
||||
OP_MFSPR => "101", -- spr_result
|
||||
OP_B => "110", -- next_nia
|
||||
OP_BC => "110",
|
||||
OP_BCREG => "110",
|
||||
OP_ADDG6S => "111", -- misc_result
|
||||
OP_ISEL => "111",
|
||||
OP_DARN => "111",
|
||||
@@ -284,9 +287,6 @@ architecture behaviour of decode2 is
|
||||
signal gpr_write : gspr_index_t;
|
||||
signal gpr_bypassable : std_ulogic;
|
||||
|
||||
signal update_gpr_write_valid : std_ulogic;
|
||||
signal update_gpr_write_reg : gspr_index_t;
|
||||
|
||||
signal gpr_a_read_valid : std_ulogic;
|
||||
signal gpr_a_read :gspr_index_t;
|
||||
signal gpr_a_bypass : std_ulogic;
|
||||
@@ -325,8 +325,8 @@ begin
|
||||
gpr_write_in => gpr_write,
|
||||
gpr_bypassable => gpr_bypassable,
|
||||
|
||||
update_gpr_write_valid => update_gpr_write_valid,
|
||||
update_gpr_write_reg => update_gpr_write_reg,
|
||||
update_gpr_write_valid => '0',
|
||||
update_gpr_write_reg => 7x"00",
|
||||
|
||||
gpr_a_read_valid_in => gpr_a_read_valid,
|
||||
gpr_a_read_in => gpr_a_read,
|
||||
@@ -376,6 +376,7 @@ begin
|
||||
variable decoded_reg_c : decode_input_reg_t;
|
||||
variable decoded_reg_o : decode_output_reg_t;
|
||||
variable length : std_ulogic_vector(3 downto 0);
|
||||
variable op : insn_type_t;
|
||||
begin
|
||||
v := r;
|
||||
|
||||
@@ -391,7 +392,14 @@ begin
|
||||
d_in.nia);
|
||||
decoded_reg_b := decode_input_reg_b (d_in.decode.input_reg_b, d_in.insn, r_in.read2_data, d_in.ispr2);
|
||||
decoded_reg_c := decode_input_reg_c (d_in.decode.input_reg_c, d_in.insn, r_in.read3_data);
|
||||
decoded_reg_o := decode_output_reg (d_in.decode.output_reg_a, d_in.insn, d_in.ispr1);
|
||||
decoded_reg_o := decode_output_reg (d_in.decode.output_reg_a, d_in.insn, d_in.ispro);
|
||||
|
||||
if d_in.decode.lr = '1' then
|
||||
v.e.lr := insn_lk(d_in.insn);
|
||||
-- b and bc have even major opcodes; bcreg is considered absolute
|
||||
v.e.br_abs := insn_aa(d_in.insn) or d_in.insn(26);
|
||||
end if;
|
||||
op := d_in.decode.insn_type;
|
||||
|
||||
if d_in.decode.repeat /= NONE then
|
||||
v.e.repeat := '1';
|
||||
@@ -414,6 +422,12 @@ begin
|
||||
end if;
|
||||
when others =>
|
||||
end case;
|
||||
elsif v.e.lr = '1' and decoded_reg_a.reg_valid = '1' then
|
||||
-- bcl/bclrl/bctarl that needs to write both CTR and LR has to be doubled
|
||||
v.e.repeat := '1';
|
||||
v.e.second := r.repeat;
|
||||
-- first one does CTR, second does LR
|
||||
decoded_reg_o.reg(0) := not r.repeat;
|
||||
end if;
|
||||
|
||||
r_out.read1_enable <= decoded_reg_a.reg_valid and d_in.valid;
|
||||
@@ -440,7 +454,6 @@ begin
|
||||
v.e.nia := d_in.nia;
|
||||
v.e.unit := d_in.decode.unit;
|
||||
v.e.fac := d_in.decode.facility;
|
||||
v.e.insn_type := d_in.decode.insn_type;
|
||||
v.e.read_reg1 := decoded_reg_a.reg;
|
||||
v.e.read_data1 := decoded_reg_a.data;
|
||||
v.e.bypass_data1 := gpr_a_bypass;
|
||||
@@ -460,23 +473,12 @@ begin
|
||||
v.e.xerc := c_in.read_xerc_data;
|
||||
v.e.invert_a := d_in.decode.invert_a;
|
||||
v.e.addm1 := '0';
|
||||
if d_in.decode.insn_type = OP_BC or d_in.decode.insn_type = OP_BCREG then
|
||||
-- add -1 to CTR
|
||||
v.e.addm1 := '1';
|
||||
if d_in.insn(23) = '1' or
|
||||
(d_in.decode.insn_type = OP_BCREG and d_in.insn(10) = '0') then
|
||||
-- don't write decremented CTR if BO(2) = 1 or bcctr
|
||||
v.e.write_reg_enable := '0';
|
||||
end if;
|
||||
end if;
|
||||
v.e.insn_type := op;
|
||||
v.e.invert_out := d_in.decode.invert_out;
|
||||
v.e.input_carry := d_in.decode.input_carry;
|
||||
v.e.output_carry := d_in.decode.output_carry;
|
||||
v.e.is_32bit := d_in.decode.is_32bit;
|
||||
v.e.is_signed := d_in.decode.is_signed;
|
||||
if d_in.decode.lr = '1' then
|
||||
v.e.lr := insn_lk(d_in.insn);
|
||||
end if;
|
||||
v.e.insn := d_in.insn;
|
||||
v.e.data_len := length;
|
||||
v.e.byte_reverse := d_in.decode.byte_reverse;
|
||||
@@ -484,8 +486,16 @@ begin
|
||||
v.e.update := d_in.decode.update;
|
||||
v.e.reserve := d_in.decode.reserve;
|
||||
v.e.br_pred := d_in.br_pred;
|
||||
v.e.result_sel := result_select(d_in.decode.insn_type);
|
||||
v.e.sub_select := subresult_select(d_in.decode.insn_type);
|
||||
v.e.result_sel := result_select(op);
|
||||
v.e.sub_select := subresult_select(op);
|
||||
if op = OP_BC or op = OP_BCREG then
|
||||
if d_in.insn(23) = '0' and r.repeat = '0' and
|
||||
not (d_in.decode.insn_type = OP_BCREG and d_in.insn(10) = '0') then
|
||||
-- decrement CTR if BO(2) = 0 and not bcctr
|
||||
v.e.addm1 := '1';
|
||||
v.e.result_sel := "000"; -- select adder output
|
||||
end if;
|
||||
end if;
|
||||
|
||||
-- issue control
|
||||
control_valid_in <= d_in.valid;
|
||||
@@ -498,9 +508,6 @@ begin
|
||||
gpr_bypassable <= '1';
|
||||
end if;
|
||||
|
||||
update_gpr_write_valid <= v.e.lr;
|
||||
update_gpr_write_reg <= fast_spr_num(SPR_LR);
|
||||
|
||||
gpr_a_read_valid <= decoded_reg_a.reg_valid;
|
||||
gpr_a_read <= decoded_reg_a.reg;
|
||||
|
||||
|
||||
@@ -59,8 +59,8 @@ architecture behaviour of execute1 is
|
||||
fp_exception_next : std_ulogic;
|
||||
trace_next : std_ulogic;
|
||||
prev_op : insn_type_t;
|
||||
lr_update : std_ulogic;
|
||||
next_lr : std_ulogic_vector(63 downto 0);
|
||||
br_taken : std_ulogic;
|
||||
mul_in_progress : std_ulogic;
|
||||
mul_finish : std_ulogic;
|
||||
div_in_progress : std_ulogic;
|
||||
@@ -79,8 +79,8 @@ architecture behaviour of execute1 is
|
||||
constant reg_type_init : reg_type :=
|
||||
(e => Execute1ToWritebackInit,
|
||||
cur_instr => Decode2ToExecute1Init,
|
||||
busy => '0', lr_update => '0', terminate => '0',
|
||||
fp_exception_next => '0', trace_next => '0', prev_op => OP_ILLEGAL,
|
||||
busy => '0', terminate => '0',
|
||||
fp_exception_next => '0', trace_next => '0', prev_op => OP_ILLEGAL, br_taken => '0',
|
||||
mul_in_progress => '0', mul_finish => '0', div_in_progress => '0', cntz_in_progress => '0',
|
||||
next_lr => (others => '0'), last_nia => (others => '0'),
|
||||
redirect => '0', abs_br => '0', taken_br => '0', br_last => '0', do_intr => '0', vector => 0,
|
||||
@@ -108,6 +108,7 @@ architecture behaviour of execute1 is
|
||||
signal spr_result: std_ulogic_vector(63 downto 0);
|
||||
signal result_mux_sel: std_ulogic_vector(2 downto 0);
|
||||
signal sub_mux_sel: std_ulogic_vector(2 downto 0);
|
||||
signal next_nia : std_ulogic_vector(63 downto 0);
|
||||
signal current: Decode2ToExecute1Type;
|
||||
|
||||
-- multiply signals
|
||||
@@ -301,6 +302,7 @@ begin
|
||||
muldiv_result when "011",
|
||||
countzero_result when "100",
|
||||
spr_result when "101",
|
||||
next_nia when "110",
|
||||
misc_result when others;
|
||||
|
||||
execute1_0: process(clk)
|
||||
@@ -315,11 +317,9 @@ begin
|
||||
else
|
||||
r <= rin;
|
||||
ctrl <= ctrl_tmp;
|
||||
assert not (r.lr_update = '1' and valid_in = '1')
|
||||
report "LR update collision with valid in EX1"
|
||||
severity failure;
|
||||
if r.lr_update = '1' then
|
||||
report "LR update to " & to_hstring(r.next_lr);
|
||||
if valid_in = '1' then
|
||||
report "execute " & to_hstring(e_in.nia) & " op=" & insn_type_t'image(e_in.insn_type) &
|
||||
" wr=" & to_hstring(rin.e.write_reg);
|
||||
end if;
|
||||
end if;
|
||||
end if;
|
||||
@@ -350,7 +350,6 @@ begin
|
||||
variable btnum, banum, bbnum : integer range 0 to 31;
|
||||
variable crresult : std_ulogic;
|
||||
variable l : std_ulogic;
|
||||
variable next_nia : std_ulogic_vector(63 downto 0);
|
||||
variable carry_32, carry_64 : std_ulogic;
|
||||
variable sign1, sign2 : std_ulogic;
|
||||
variable abs1, abs2 : signed(63 downto 0);
|
||||
@@ -421,7 +420,6 @@ begin
|
||||
end loop;
|
||||
end if;
|
||||
|
||||
v.lr_update := '0';
|
||||
v.mul_in_progress := '0';
|
||||
v.div_in_progress := '0';
|
||||
v.cntz_in_progress := '0';
|
||||
@@ -673,7 +671,7 @@ begin
|
||||
v.busy := '0';
|
||||
|
||||
-- Next insn adder used in a couple of places
|
||||
next_nia := std_ulogic_vector(unsigned(e_in.nia) + 4);
|
||||
next_nia <= std_ulogic_vector(unsigned(e_in.nia) + 4);
|
||||
|
||||
-- rotator control signals
|
||||
right_shift <= '1' when e_in.insn_type = OP_SHR else '0';
|
||||
@@ -846,35 +844,39 @@ begin
|
||||
newcrf & newcrf & newcrf & newcrf;
|
||||
when OP_AND | OP_OR | OP_XOR | OP_POPCNT | OP_PRTY | OP_CMPB | OP_EXTS |
|
||||
OP_BPERM | OP_BCD =>
|
||||
|
||||
when OP_B =>
|
||||
is_branch := '1';
|
||||
taken_branch := '1';
|
||||
is_direct_branch := '1';
|
||||
abs_branch := insn_aa(e_in.insn);
|
||||
abs_branch := e_in.br_abs;
|
||||
if ctrl.msr(MSR_BE) = '1' then
|
||||
do_trace := '1';
|
||||
end if;
|
||||
when OP_BC =>
|
||||
-- read_data1 is CTR
|
||||
when OP_BC | OP_BCREG =>
|
||||
-- read_data1 is CTR
|
||||
-- for OP_BCREG, read_data2 is target register (CTR, LR or TAR)
|
||||
-- If this instruction updates both CTR and LR, then it is
|
||||
-- doubled; the first instruction decrements CTR and determines
|
||||
-- whether the branch is taken, and the second does the
|
||||
-- redirect and the LR update.
|
||||
bo := insn_bo(e_in.insn);
|
||||
bi := insn_bi(e_in.insn);
|
||||
is_branch := '1';
|
||||
is_direct_branch := '1';
|
||||
taken_branch := ppc_bc_taken(bo, bi, cr_in, a_in);
|
||||
abs_branch := insn_aa(e_in.insn);
|
||||
if ctrl.msr(MSR_BE) = '1' then
|
||||
do_trace := '1';
|
||||
if e_in.second = '0' then
|
||||
taken_branch := ppc_bc_taken(bo, bi, cr_in, a_in);
|
||||
else
|
||||
taken_branch := r.br_taken;
|
||||
end if;
|
||||
when OP_BCREG =>
|
||||
-- read_data1 is CTR
|
||||
-- read_data2 is target register (CTR, LR or TAR)
|
||||
bo := insn_bo(e_in.insn);
|
||||
bi := insn_bi(e_in.insn);
|
||||
is_branch := '1';
|
||||
taken_branch := ppc_bc_taken(bo, bi, cr_in, a_in);
|
||||
abs_branch := '1';
|
||||
if ctrl.msr(MSR_BE) = '1' then
|
||||
do_trace := '1';
|
||||
v.br_taken := taken_branch;
|
||||
abs_branch := e_in.br_abs;
|
||||
if e_in.repeat = '0' or e_in.second = '1' then
|
||||
is_branch := '1';
|
||||
if e_in.insn_type = OP_BC then
|
||||
is_direct_branch := '1';
|
||||
end if;
|
||||
if ctrl.msr(MSR_BE) = '1' then
|
||||
do_trace := '1';
|
||||
end if;
|
||||
end if;
|
||||
|
||||
when OP_RFID =>
|
||||
@@ -1197,11 +1199,6 @@ begin
|
||||
end if;
|
||||
v.e.valid := '1';
|
||||
end if;
|
||||
-- When doing delayed LR update, keep r.e.write_data unchanged
|
||||
-- next cycle in case it is needed for a forwarded result (e.g. CTR).
|
||||
if r.lr_update = '1' then
|
||||
hold_wr_data := '1';
|
||||
end if;
|
||||
|
||||
-- Generate FP-type program interrupt. fp_in.interrupt will only
|
||||
-- be set during the execution of a FP instruction.
|
||||
@@ -1274,30 +1271,6 @@ begin
|
||||
v.e.write_enable := current.write_reg_enable and v.e.valid and not exception;
|
||||
v.e.rc := current.rc and v.e.valid and not exception;
|
||||
|
||||
-- Update LR on the next cycle after a branch link
|
||||
-- If we're not writing back anything else, we can write back LR
|
||||
-- this cycle, otherwise we take an extra cycle. We use the
|
||||
-- exc_write path since next_nia is written through that path
|
||||
-- in other places.
|
||||
if v.e.valid = '1' and exception = '0' and current.lr = '1' then
|
||||
if current.write_reg_enable = '0' then
|
||||
v.e.exc_write_enable := '1';
|
||||
v.e.exc_write_data := next_nia;
|
||||
v.e.exc_write_reg := fast_spr_num(SPR_LR);
|
||||
else
|
||||
v.lr_update := '1';
|
||||
v.e.valid := '0';
|
||||
report "Delayed LR update to " & to_hstring(next_nia);
|
||||
v.busy := '1';
|
||||
end if;
|
||||
end if;
|
||||
if r.lr_update = '1' then
|
||||
v.e.exc_write_enable := '1';
|
||||
v.e.exc_write_data := r.next_lr;
|
||||
v.e.exc_write_reg := fast_spr_num(SPR_LR);
|
||||
v.e.valid := '1';
|
||||
end if;
|
||||
|
||||
-- Defer completion for one cycle when redirecting.
|
||||
-- This also ensures r.busy = 1 when ctrl.irq_state = WRITE_SRR1
|
||||
if v.redirect = '1' then
|
||||
|
||||
Reference in New Issue
Block a user