mirror of
https://github.com/antonblanchard/microwatt.git
synced 2026-03-01 17:35:38 +00:00
core: Change bperm to a simpler and slower implementation
This does bperm in the bitsort unit instead of the logical unit, and no longer tries to do it in a single cycle with eight 64-to-1 multiplexers. Instead it is now a state machine in the bitsort unit, takes 8 cycles, and only has one 64-to-1 multiplexer. This helps improve timing and reduces LUT usage. Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
This commit is contained in:
47
bitsort.vhdl
47
bitsort.vhdl
@@ -1,5 +1,6 @@
|
|||||||
-- Implements instructions that involve sorting bits,
|
-- Implements instructions that involve sorting bits,
|
||||||
-- that is, cfuged, pextd and pdepd.
|
-- that is, cfuged, pextd and pdepd.
|
||||||
|
-- Also does bperm, which is somewhat different.
|
||||||
--
|
--
|
||||||
-- cfuged: Sort the bits in the mask in RB into 0s at the left, 1s at the right
|
-- cfuged: Sort the bits in the mask in RB into 0s at the left, 1s at the right
|
||||||
-- and move the bits in RS in the same fashion to give the result
|
-- and move the bits in RS in the same fashion to give the result
|
||||||
@@ -7,6 +8,7 @@
|
|||||||
-- corresponding bit in RB is 1
|
-- corresponding bit in RB is 1
|
||||||
-- pdepd: Inverse of pextd; take the low-order bits of RS and spread them out
|
-- pdepd: Inverse of pextd; take the low-order bits of RS and spread them out
|
||||||
-- to the bit positions which have a 1 in RB
|
-- to the bit positions which have a 1 in RB
|
||||||
|
-- bperm: Select 8 arbitrary bits
|
||||||
|
|
||||||
-- NB opc is bits 7-6 of the instruction:
|
-- NB opc is bits 7-6 of the instruction:
|
||||||
-- 00 = pdepd, 01 = pextd, 10 = cfuged
|
-- 00 = pdepd, 01 = pextd, 10 = cfuged
|
||||||
@@ -27,6 +29,8 @@ entity bit_sorter is
|
|||||||
go : in std_ulogic;
|
go : in std_ulogic;
|
||||||
opc : in std_ulogic_vector(1 downto 0);
|
opc : in std_ulogic_vector(1 downto 0);
|
||||||
done : out std_ulogic;
|
done : out std_ulogic;
|
||||||
|
do_bperm : in std_ulogic;
|
||||||
|
bperm_done : out std_ulogic;
|
||||||
result : out std_ulogic_vector(63 downto 0)
|
result : out std_ulogic_vector(63 downto 0)
|
||||||
);
|
);
|
||||||
end entity bit_sorter;
|
end entity bit_sorter;
|
||||||
@@ -45,6 +49,13 @@ architecture behaviour of bit_sorter is
|
|||||||
signal sr_vl : std_ulogic_vector(63 downto 0);
|
signal sr_vl : std_ulogic_vector(63 downto 0);
|
||||||
signal sr_vr : std_ulogic_vector(63 downto 0);
|
signal sr_vr : std_ulogic_vector(63 downto 0);
|
||||||
|
|
||||||
|
signal is_bperm : std_ulogic;
|
||||||
|
signal bpc : unsigned(2 downto 0);
|
||||||
|
signal bp_done : std_ulogic;
|
||||||
|
signal bperm_res : std_ulogic_vector(7 downto 0);
|
||||||
|
signal rs_sr : std_ulogic_vector(63 downto 0);
|
||||||
|
signal rb_bp : std_ulogic_vector(63 downto 0);
|
||||||
|
|
||||||
begin
|
begin
|
||||||
bsort_r: process(clk)
|
bsort_r: process(clk)
|
||||||
begin
|
begin
|
||||||
@@ -96,7 +107,41 @@ begin
|
|||||||
end if;
|
end if;
|
||||||
end process;
|
end process;
|
||||||
|
|
||||||
|
-- bit permutation
|
||||||
|
bperm_res(7) <= rb_bp(to_integer(unsigned(not rs_sr(5 downto 0)))) when not is_X(rs_sr)
|
||||||
|
else 'X';
|
||||||
|
|
||||||
|
bperm_r: process(clk)
|
||||||
|
begin
|
||||||
|
if rising_edge(clk) then
|
||||||
|
if rst = '1' then
|
||||||
|
is_bperm <= '0';
|
||||||
|
bp_done <= '0';
|
||||||
|
bperm_res(6 downto 0) <= (others => '0');
|
||||||
|
bpc <= to_unsigned(0, 3);
|
||||||
|
elsif do_bperm = '1' then
|
||||||
|
is_bperm <= '1';
|
||||||
|
bp_done <= '0';
|
||||||
|
bperm_res(6 downto 0) <= (others => '0');
|
||||||
|
bpc <= to_unsigned(0, 3);
|
||||||
|
rs_sr <= rs;
|
||||||
|
rb_bp <= rb;
|
||||||
|
elsif bp_done = '1' then
|
||||||
|
is_bperm <= '0';
|
||||||
|
bp_done <= '0';
|
||||||
|
elsif is_bperm = '1' then
|
||||||
|
bperm_res(6 downto 0) <= bperm_res(7 downto 1);
|
||||||
|
rs_sr <= x"00" & rs_sr(63 downto 8);
|
||||||
|
if bpc = "110" then
|
||||||
|
bp_done <= '1';
|
||||||
|
end if;
|
||||||
|
bpc <= bpc + 1;
|
||||||
|
end if;
|
||||||
|
end if;
|
||||||
|
end process;
|
||||||
|
|
||||||
done <= sd;
|
done <= sd;
|
||||||
result <= val;
|
bperm_done <= bp_done;
|
||||||
|
result <= val when is_bperm = '0' else (56x"0" & bperm_res);
|
||||||
|
|
||||||
end behaviour;
|
end behaviour;
|
||||||
|
|||||||
@@ -227,7 +227,6 @@ architecture behaviour of decode2 is
|
|||||||
OP_PRTY => "001",
|
OP_PRTY => "001",
|
||||||
OP_CMPB => "001",
|
OP_CMPB => "001",
|
||||||
OP_EXTS => "001",
|
OP_EXTS => "001",
|
||||||
OP_BPERM => "001",
|
|
||||||
OP_BREV => "001",
|
OP_BREV => "001",
|
||||||
OP_BCD => "001",
|
OP_BCD => "001",
|
||||||
OP_MTSPR => "001",
|
OP_MTSPR => "001",
|
||||||
@@ -256,6 +255,7 @@ architecture behaviour of decode2 is
|
|||||||
OP_DIVE => "101",
|
OP_DIVE => "101",
|
||||||
OP_MOD => "101",
|
OP_MOD => "101",
|
||||||
OP_BSORT => "100",
|
OP_BSORT => "100",
|
||||||
|
OP_BPERM => "100",
|
||||||
OP_ADDG6S => "001", -- misc_result
|
OP_ADDG6S => "001", -- misc_result
|
||||||
OP_ISEL => "010",
|
OP_ISEL => "010",
|
||||||
OP_DARN => "011",
|
OP_DARN => "011",
|
||||||
|
|||||||
@@ -116,6 +116,7 @@ architecture behaviour of execute1 is
|
|||||||
start_mul : std_ulogic;
|
start_mul : std_ulogic;
|
||||||
start_div : std_ulogic;
|
start_div : std_ulogic;
|
||||||
start_bsort : std_ulogic;
|
start_bsort : std_ulogic;
|
||||||
|
start_bperm : std_ulogic;
|
||||||
do_trace : std_ulogic;
|
do_trace : std_ulogic;
|
||||||
ciabr_trace : std_ulogic;
|
ciabr_trace : std_ulogic;
|
||||||
fp_intr : std_ulogic;
|
fp_intr : std_ulogic;
|
||||||
@@ -150,6 +151,7 @@ architecture behaviour of execute1 is
|
|||||||
mul_finish : std_ulogic;
|
mul_finish : std_ulogic;
|
||||||
div_in_progress : std_ulogic;
|
div_in_progress : std_ulogic;
|
||||||
bsort_in_progress : std_ulogic;
|
bsort_in_progress : std_ulogic;
|
||||||
|
bperm_in_progress : std_ulogic;
|
||||||
no_instr_avail : std_ulogic;
|
no_instr_avail : std_ulogic;
|
||||||
instr_dispatch : std_ulogic;
|
instr_dispatch : std_ulogic;
|
||||||
ext_interrupt : std_ulogic;
|
ext_interrupt : std_ulogic;
|
||||||
@@ -174,7 +176,7 @@ architecture behaviour of execute1 is
|
|||||||
spr_select => spr_id_init, pmu_spr_num => 5x"0",
|
spr_select => spr_id_init, pmu_spr_num => 5x"0",
|
||||||
redir_to_next => '0', advance_nia => '0', lr_from_next => '0',
|
redir_to_next => '0', advance_nia => '0', lr_from_next => '0',
|
||||||
mul_in_progress => '0', mul_finish => '0', div_in_progress => '0',
|
mul_in_progress => '0', mul_finish => '0', div_in_progress => '0',
|
||||||
bsort_in_progress => '0',
|
bsort_in_progress => '0', bperm_in_progress => '0',
|
||||||
no_instr_avail => '0', instr_dispatch => '0', ext_interrupt => '0',
|
no_instr_avail => '0', instr_dispatch => '0', ext_interrupt => '0',
|
||||||
taken_branch_event => '0', br_mispredict => '0',
|
taken_branch_event => '0', br_mispredict => '0',
|
||||||
msr => 64x"0",
|
msr => 64x"0",
|
||||||
@@ -245,6 +247,8 @@ architecture behaviour of execute1 is
|
|||||||
-- bit-sort unit signals
|
-- bit-sort unit signals
|
||||||
signal bsort_start : std_ulogic;
|
signal bsort_start : std_ulogic;
|
||||||
signal bsort_done : std_ulogic;
|
signal bsort_done : std_ulogic;
|
||||||
|
signal bperm_start : std_ulogic;
|
||||||
|
signal bperm_done : std_ulogic;
|
||||||
|
|
||||||
-- random number generator signals
|
-- random number generator signals
|
||||||
signal random_raw : std_ulogic_vector(63 downto 0);
|
signal random_raw : std_ulogic_vector(63 downto 0);
|
||||||
@@ -515,6 +519,8 @@ begin
|
|||||||
go => bsort_start,
|
go => bsort_start,
|
||||||
opc => e_in.insn(7 downto 6),
|
opc => e_in.insn(7 downto 6),
|
||||||
done => bsort_done,
|
done => bsort_done,
|
||||||
|
do_bperm => bperm_start,
|
||||||
|
bperm_done => bperm_done,
|
||||||
result => bsort_result
|
result => bsort_result
|
||||||
);
|
);
|
||||||
|
|
||||||
@@ -1228,7 +1234,7 @@ begin
|
|||||||
when OP_CMPRB =>
|
when OP_CMPRB =>
|
||||||
when OP_CMPEQB =>
|
when OP_CMPEQB =>
|
||||||
when OP_LOGIC | OP_XOR | OP_PRTY | OP_CMPB | OP_EXTS |
|
when OP_LOGIC | OP_XOR | OP_PRTY | OP_CMPB | OP_EXTS |
|
||||||
OP_BPERM | OP_BREV | OP_BCD =>
|
OP_BREV | OP_BCD =>
|
||||||
|
|
||||||
when OP_B =>
|
when OP_B =>
|
||||||
v.take_branch := '1';
|
v.take_branch := '1';
|
||||||
@@ -1433,6 +1439,11 @@ begin
|
|||||||
slow_op := '1';
|
slow_op := '1';
|
||||||
owait := '1';
|
owait := '1';
|
||||||
|
|
||||||
|
when OP_BPERM =>
|
||||||
|
v.start_bperm := '1';
|
||||||
|
slow_op := '1';
|
||||||
|
owait := '1';
|
||||||
|
|
||||||
when OP_MUL_L64 =>
|
when OP_MUL_L64 =>
|
||||||
if e_in.is_32bit = '1' then
|
if e_in.is_32bit = '1' then
|
||||||
v.se.mult_32s := '1';
|
v.se.mult_32s := '1';
|
||||||
@@ -1718,6 +1729,7 @@ begin
|
|||||||
x_to_divider.valid <= actions.start_div;
|
x_to_divider.valid <= actions.start_div;
|
||||||
v.div_in_progress := actions.start_div;
|
v.div_in_progress := actions.start_div;
|
||||||
v.bsort_in_progress := actions.start_bsort;
|
v.bsort_in_progress := actions.start_bsort;
|
||||||
|
v.bperm_in_progress := actions.start_bperm;
|
||||||
v.br_mispredict := v.e.redirect and actions.direct_branch;
|
v.br_mispredict := v.e.redirect and actions.direct_branch;
|
||||||
v.advance_nia := actions.advance_nia;
|
v.advance_nia := actions.advance_nia;
|
||||||
v.redir_to_next := actions.redir_to_next;
|
v.redir_to_next := actions.redir_to_next;
|
||||||
@@ -1728,7 +1740,8 @@ begin
|
|||||||
-- multiply is happening in order to stop following
|
-- multiply is happening in order to stop following
|
||||||
-- instructions from using the wrong XER value
|
-- instructions from using the wrong XER value
|
||||||
-- (and for simplicity in the OE=0 case).
|
-- (and for simplicity in the OE=0 case).
|
||||||
v.busy := actions.start_div or actions.start_mul or actions.start_bsort;
|
v.busy := actions.start_div or actions.start_mul or
|
||||||
|
actions.start_bsort or actions.start_bperm;
|
||||||
|
|
||||||
-- instruction for other units, i.e. LDST
|
-- instruction for other units, i.e. LDST
|
||||||
if e_in.unit = LDST then
|
if e_in.unit = LDST then
|
||||||
@@ -1740,6 +1753,7 @@ begin
|
|||||||
end if;
|
end if;
|
||||||
is_scv := go and actions.se.scv_trap;
|
is_scv := go and actions.se.scv_trap;
|
||||||
bsort_start <= go and actions.start_bsort;
|
bsort_start <= go and actions.start_bsort;
|
||||||
|
bperm_start <= go and actions.start_bperm;
|
||||||
pmu_trace <= go and actions.do_trace;
|
pmu_trace <= go and actions.do_trace;
|
||||||
|
|
||||||
if not HAS_FPU and ex1.div_in_progress = '1' then
|
if not HAS_FPU and ex1.div_in_progress = '1' then
|
||||||
@@ -1780,6 +1794,13 @@ begin
|
|||||||
v.e.write_data := alu_result;
|
v.e.write_data := alu_result;
|
||||||
bypass_valid := bsort_done;
|
bypass_valid := bsort_done;
|
||||||
end if;
|
end if;
|
||||||
|
if ex1.bperm_in_progress = '1' then
|
||||||
|
v.bperm_in_progress := not bperm_done;
|
||||||
|
v.e.valid := bperm_done;
|
||||||
|
v.busy := not bperm_done;
|
||||||
|
v.e.write_data := alu_result;
|
||||||
|
bypass_valid := bperm_done;
|
||||||
|
end if;
|
||||||
|
|
||||||
if v.e.write_xerc_enable = '1' and v.e.valid = '1' then
|
if v.e.write_xerc_enable = '1' and v.e.valid = '1' then
|
||||||
v.xerc := v.e.xerc;
|
v.xerc := v.e.xerc;
|
||||||
|
|||||||
13
logical.vhdl
13
logical.vhdl
@@ -23,7 +23,6 @@ architecture behaviour of logical is
|
|||||||
|
|
||||||
signal par0, par1 : std_ulogic;
|
signal par0, par1 : std_ulogic;
|
||||||
signal parity : std_ulogic_vector(63 downto 0);
|
signal parity : std_ulogic_vector(63 downto 0);
|
||||||
signal permute : std_ulogic_vector(7 downto 0);
|
|
||||||
|
|
||||||
function bcd_to_dpd(bcd: std_ulogic_vector(11 downto 0)) return std_ulogic_vector is
|
function bcd_to_dpd(bcd: std_ulogic_vector(11 downto 0)) return std_ulogic_vector is
|
||||||
variable dpd: std_ulogic_vector(9 downto 0);
|
variable dpd: std_ulogic_vector(9 downto 0);
|
||||||
@@ -109,16 +108,6 @@ begin
|
|||||||
parity(32) <= par1;
|
parity(32) <= par1;
|
||||||
end if;
|
end if;
|
||||||
|
|
||||||
-- bit permutation
|
|
||||||
for i in 0 to 7 loop
|
|
||||||
j := i * 8;
|
|
||||||
if rs(j+7 downto j+6) = "00" then
|
|
||||||
permute(i) <= rb(to_integer(unsigned(not rs(j+5 downto j))));
|
|
||||||
else
|
|
||||||
permute(i) <= '0';
|
|
||||||
end if;
|
|
||||||
end loop;
|
|
||||||
|
|
||||||
rb_adj := rb;
|
rb_adj := rb;
|
||||||
if invert_in = '1' then
|
if invert_in = '1' then
|
||||||
rb_adj := not rb;
|
rb_adj := not rb;
|
||||||
@@ -157,8 +146,6 @@ begin
|
|||||||
tmp := parity;
|
tmp := parity;
|
||||||
when OP_CMPB =>
|
when OP_CMPB =>
|
||||||
tmp := ppc_cmpb(rs, rb);
|
tmp := ppc_cmpb(rs, rb);
|
||||||
when OP_BPERM =>
|
|
||||||
tmp := std_ulogic_vector(resize(unsigned(permute), 64));
|
|
||||||
when OP_BCD =>
|
when OP_BCD =>
|
||||||
-- invert_in is abused to indicate direction of conversion
|
-- invert_in is abused to indicate direction of conversion
|
||||||
if invert_in = '0' then
|
if invert_in = '0' then
|
||||||
|
|||||||
Reference in New Issue
Block a user