mirror of
https://github.com/antonblanchard/microwatt.git
synced 2026-01-11 23:43:15 +00:00
execute: Move popcnt and prty instructions into the logical unit
This implements logic in the logical entity to calculate the results of the popcnt* and prty* instructions. We now have one insn_type_t value for the 3 popcnt variants and one for the two prty variants, using the length field of the decode_rom_t to distinguish between them. The implementations in logical.vhdl using recursive algorithms rather than the simple functions in ppc_fx_insns.vhdl. This gives a saving of about 140 slice LUTs on the A7-100 and improves timing slightly. Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
This commit is contained in:
parent
d2ca625b3b
commit
0c714f1be6
10
decode1.vhdl
10
decode1.vhdl
@ -263,11 +263,11 @@ architecture behaviour of decode1 is
|
||||
2#0001111100# => (ALU, OP_OR, NONE, RB, RS, RA, '0', '0', '0', '1', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- nor
|
||||
2#0110111100# => (ALU, OP_OR, NONE, RB, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- or
|
||||
2#0110011100# => (ALU, OP_OR, NONE, RB, RS, RA, '0', '0', '1', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- orc
|
||||
2#0001111010# => (ALU, OP_POPCNTB, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- popcntb
|
||||
2#0111111010# => (ALU, OP_POPCNTD, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- popcntd
|
||||
2#0101111010# => (ALU, OP_POPCNTW, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- popcntw
|
||||
2#0010111010# => (ALU, OP_PRTYD, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- prtyd
|
||||
2#0010011010# => (ALU, OP_PRTYW, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- prtyw
|
||||
2#0001111010# => (ALU, OP_POPCNT, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- popcntb
|
||||
2#0111111010# => (ALU, OP_POPCNT, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- popcntd
|
||||
2#0101111010# => (ALU, OP_POPCNT, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- popcntw
|
||||
2#0010111010# => (ALU, OP_PRTY, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- prtyd
|
||||
2#0010011010# => (ALU, OP_PRTY, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- prtyw
|
||||
-- 2#0010000000# setb
|
||||
2#0000011011# => (ALU, OP_SHL, NONE, RB, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- sld
|
||||
2#0000011000# => (ALU, OP_SHL, NONE, RB, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- slw
|
||||
|
||||
@ -14,8 +14,8 @@ package decode_types is
|
||||
OP_MCRXR, OP_MCRXRX, OP_MFCR, OP_MFSPR, OP_MOD,
|
||||
OP_MTCRF, OP_MTSPR, OP_MUL_L64,
|
||||
OP_MUL_H64, OP_MUL_H32, OP_OR,
|
||||
OP_POPCNTB, OP_POPCNTD, OP_POPCNTW, OP_PRTYD,
|
||||
OP_PRTYW, OP_RLC, OP_RLCL, OP_RLCR, OP_SETB,
|
||||
OP_POPCNT, OP_PRTY,
|
||||
OP_RLC, OP_RLCL, OP_RLCR, OP_SETB,
|
||||
OP_SHL, OP_SHR,
|
||||
OP_SYNC, OP_TD, OP_TDI, OP_TW,
|
||||
OP_TWI, OP_XOR, OP_SIM_CONFIG
|
||||
|
||||
@ -54,6 +54,8 @@ architecture behaviour of execute1 is
|
||||
signal rotator_carry: std_ulogic;
|
||||
signal logical_result: std_ulogic_vector(63 downto 0);
|
||||
signal countzero_result: std_ulogic_vector(63 downto 0);
|
||||
signal popcnt_result: std_ulogic_vector(63 downto 0);
|
||||
signal parity_result: std_ulogic_vector(63 downto 0);
|
||||
|
||||
-- multiply signals
|
||||
signal x_to_multiply: Execute1ToMultiplyType;
|
||||
@ -127,7 +129,10 @@ begin
|
||||
op => e_in.insn_type,
|
||||
invert_in => e_in.invert_a,
|
||||
invert_out => e_in.invert_out,
|
||||
result => logical_result
|
||||
result => logical_result,
|
||||
datalen => e_in.data_len,
|
||||
popcnt => popcnt_result,
|
||||
parity => parity_result
|
||||
);
|
||||
|
||||
countzero_0: entity work.zero_counter
|
||||
@ -612,20 +617,11 @@ begin
|
||||
-- when others =>
|
||||
-- end case;
|
||||
end if;
|
||||
when OP_POPCNTB =>
|
||||
result := ppc_popcntb(e_in.read_data3);
|
||||
when OP_POPCNT =>
|
||||
result := popcnt_result;
|
||||
result_en := '1';
|
||||
when OP_POPCNTW =>
|
||||
result := ppc_popcntw(e_in.read_data3);
|
||||
result_en := '1';
|
||||
when OP_POPCNTD =>
|
||||
result := ppc_popcntd(e_in.read_data3);
|
||||
result_en := '1';
|
||||
when OP_PRTYD =>
|
||||
result := ppc_prtyd(e_in.read_data3);
|
||||
result_en := '1';
|
||||
when OP_PRTYW =>
|
||||
result := ppc_prtyw(e_in.read_data3);
|
||||
when OP_PRTY =>
|
||||
result := parity_result;
|
||||
result_en := '1';
|
||||
when OP_RLC | OP_RLCL | OP_RLCR | OP_SHL | OP_SHR =>
|
||||
result := rotator_result;
|
||||
|
||||
60
logical.vhdl
60
logical.vhdl
@ -12,11 +12,29 @@ entity logical is
|
||||
op : in insn_type_t;
|
||||
invert_in : in std_ulogic;
|
||||
invert_out : in std_ulogic;
|
||||
result : out std_ulogic_vector(63 downto 0)
|
||||
result : out std_ulogic_vector(63 downto 0);
|
||||
datalen : in std_logic_vector(3 downto 0);
|
||||
popcnt : out std_ulogic_vector(63 downto 0);
|
||||
parity : out std_ulogic_vector(63 downto 0)
|
||||
);
|
||||
end entity logical;
|
||||
|
||||
architecture behaviour of logical is
|
||||
|
||||
subtype twobit is unsigned(1 downto 0);
|
||||
type twobit32 is array(0 to 31) of twobit;
|
||||
signal pc2 : twobit32;
|
||||
subtype threebit is unsigned(2 downto 0);
|
||||
type threebit16 is array(0 to 15) of threebit;
|
||||
signal pc4 : threebit16;
|
||||
subtype fourbit is unsigned(3 downto 0);
|
||||
type fourbit8 is array(0 to 7) of fourbit;
|
||||
signal pc8 : fourbit8;
|
||||
subtype sixbit is unsigned(5 downto 0);
|
||||
type sixbit2 is array(0 to 1) of sixbit;
|
||||
signal pc32 : sixbit2;
|
||||
signal par0, par1 : std_ulogic;
|
||||
|
||||
begin
|
||||
logical_0: process(all)
|
||||
variable rb_adj, tmp : std_ulogic_vector(63 downto 0);
|
||||
@ -40,5 +58,45 @@ begin
|
||||
result <= not tmp;
|
||||
end if;
|
||||
|
||||
-- population counts
|
||||
for i in 0 to 31 loop
|
||||
pc2(i) <= unsigned("0" & rs(i * 2 downto i * 2)) + unsigned("0" & rs(i * 2 + 1 downto i * 2 + 1));
|
||||
end loop;
|
||||
for i in 0 to 15 loop
|
||||
pc4(i) <= ('0' & pc2(i * 2)) + ('0' & pc2(i * 2 + 1));
|
||||
end loop;
|
||||
for i in 0 to 7 loop
|
||||
pc8(i) <= ('0' & pc4(i * 2)) + ('0' & pc4(i * 2 + 1));
|
||||
end loop;
|
||||
for i in 0 to 1 loop
|
||||
pc32(i) <= ("00" & pc8(i * 4)) + ("00" & pc8(i * 4 + 1)) +
|
||||
("00" & pc8(i * 4 + 2)) + ("00" & pc8(i * 4 + 3));
|
||||
end loop;
|
||||
popcnt <= (others => '0');
|
||||
if datalen(3 downto 2) = "00" then
|
||||
-- popcntb
|
||||
for i in 0 to 7 loop
|
||||
popcnt(i * 8 + 3 downto i * 8) <= std_ulogic_vector(pc8(i));
|
||||
end loop;
|
||||
elsif datalen(3) = '0' then
|
||||
-- popcntw
|
||||
for i in 0 to 1 loop
|
||||
popcnt(i * 32 + 5 downto i * 32) <= std_ulogic_vector(pc32(i));
|
||||
end loop;
|
||||
else
|
||||
popcnt(6 downto 0) <= std_ulogic_vector(('0' & pc32(0)) + ('0' & pc32(1)));
|
||||
end if;
|
||||
|
||||
-- parity calculations
|
||||
par0 <= rs(0) xor rs(8) xor rs(16) xor rs(24);
|
||||
par1 <= rs(32) xor rs(40) xor rs(48) xor rs(56);
|
||||
parity <= (others => '0');
|
||||
if datalen(3) = '1' then
|
||||
parity(0) <= par0 xor par1;
|
||||
else
|
||||
parity(0) <= par0;
|
||||
parity(32) <= par1;
|
||||
end if;
|
||||
|
||||
end process;
|
||||
end behaviour;
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user