mirror of
https://github.com/antonblanchard/microwatt.git
synced 2026-01-29 13:01:15 +00:00
execute: Consolidate count-leading/trailing-zeroes implementations
This adds combinatorial logic that does 32-bit and 64-bit count leading and trailing zeroes in one unit, and consolidates the four instructions under a single OP_CNTZ opcode. This saves 84 slice LUTs on the Arty A7-100. Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
This commit is contained in:
3
Makefile
3
Makefile
@@ -18,12 +18,13 @@ sim_jtag.o: sim_jtag_socket.o
|
||||
core_tb.o: common.o wishbone_types.o core.o soc.o sim_jtag.o
|
||||
core.o: common.o wishbone_types.o fetch1.o fetch2.o icache.o decode1.o decode2.o register_file.o cr_file.o execute1.o execute2.o loadstore1.o loadstore2.o multiply.o writeback.o core_debug.o divider.o
|
||||
core_debug.o: common.o
|
||||
countzero.o:
|
||||
cr_file.o: common.o
|
||||
crhelpers.o: common.o
|
||||
decode1.o: common.o decode_types.o
|
||||
decode2.o: decode_types.o common.o helpers.o insn_helpers.o
|
||||
decode_types.o:
|
||||
execute1.o: decode_types.o common.o helpers.o crhelpers.o insn_helpers.o ppc_fx_insns.o rotator.o logical.o
|
||||
execute1.o: decode_types.o common.o helpers.o crhelpers.o insn_helpers.o ppc_fx_insns.o rotator.o logical.o countzero.o
|
||||
execute2.o: common.o crhelpers.o ppc_fx_insns.o
|
||||
fetch1.o: common.o
|
||||
fetch2.o: common.o wishbone_types.o
|
||||
|
||||
103
countzero.vhdl
Normal file
103
countzero.vhdl
Normal file
@@ -0,0 +1,103 @@
|
||||
library ieee;
|
||||
use ieee.std_logic_1164.all;
|
||||
use ieee.numeric_std.all;
|
||||
|
||||
library work;
|
||||
|
||||
entity zero_counter is
|
||||
port (
|
||||
rs : in std_ulogic_vector(63 downto 0);
|
||||
count_right : in std_ulogic;
|
||||
is_32bit : in std_ulogic;
|
||||
result : out std_ulogic_vector(63 downto 0)
|
||||
);
|
||||
end entity zero_counter;
|
||||
|
||||
architecture behaviour of zero_counter is
|
||||
signal l32, r32 : std_ulogic;
|
||||
signal v32 : std_ulogic_vector(31 downto 0);
|
||||
signal v16 : std_ulogic_vector(15 downto 0);
|
||||
signal v8 : std_ulogic_vector(7 downto 0);
|
||||
signal v4 : std_ulogic_vector(3 downto 0);
|
||||
signal sel : std_ulogic_vector(5 downto 0);
|
||||
begin
|
||||
zerocounter0: process(all)
|
||||
begin
|
||||
l32 <= or (rs(63 downto 32));
|
||||
r32 <= or (rs(31 downto 0));
|
||||
if (l32 = '0' or is_32bit = '1') and r32 = '0' then
|
||||
-- operand is zero, return 32 for 32-bit, else 64
|
||||
result <= x"00000000000000" & '0' & not is_32bit & is_32bit & "00000";
|
||||
else
|
||||
|
||||
if count_right = '0' then
|
||||
sel(5) <= l32 and (not is_32bit);
|
||||
else
|
||||
sel(5) <= (not r32) and (not is_32bit);
|
||||
end if;
|
||||
if sel(5) = '1' then
|
||||
v32 <= rs(63 downto 32);
|
||||
else
|
||||
v32 <= rs(31 downto 0);
|
||||
end if;
|
||||
|
||||
if count_right = '0' then
|
||||
sel(4) <= or (v32(31 downto 16));
|
||||
else
|
||||
sel(4) <= not (or (v32(15 downto 0)));
|
||||
end if;
|
||||
if sel(4) = '1' then
|
||||
v16 <= v32(31 downto 16);
|
||||
else
|
||||
v16 <= v32(15 downto 0);
|
||||
end if;
|
||||
|
||||
if count_right = '0' then
|
||||
sel(3) <= or (v16(15 downto 8));
|
||||
else
|
||||
sel(3) <= not (or (v16(7 downto 0)));
|
||||
end if;
|
||||
if sel(3) = '1' then
|
||||
v8 <= v16(15 downto 8);
|
||||
else
|
||||
v8 <= v16(7 downto 0);
|
||||
end if;
|
||||
|
||||
if count_right = '0' then
|
||||
sel(2) <= or (v8(7 downto 4));
|
||||
else
|
||||
sel(2) <= not (or (v8(3 downto 0)));
|
||||
end if;
|
||||
if sel(2) = '1' then
|
||||
v4 <= v8(7 downto 4);
|
||||
else
|
||||
v4 <= v8(3 downto 0);
|
||||
end if;
|
||||
|
||||
if count_right = '0' then
|
||||
if v4(3) = '1' then
|
||||
sel(1 downto 0) <= "11";
|
||||
elsif v4(2) = '1' then
|
||||
sel(1 downto 0) <= "10";
|
||||
elsif v4(1) = '1' then
|
||||
sel(1 downto 0) <= "01";
|
||||
else
|
||||
sel(1 downto 0) <= "00";
|
||||
end if;
|
||||
result <= x"00000000000000" & "00" & (not sel(5) and not is_32bit) & not sel(4 downto 0);
|
||||
else
|
||||
if v4(0) = '1' then
|
||||
sel(1 downto 0) <= "00";
|
||||
elsif v4(1) = '1' then
|
||||
sel(1 downto 0) <= "01";
|
||||
elsif v4(2) = '1' then
|
||||
sel(1 downto 0) <= "10";
|
||||
else
|
||||
sel(1 downto 0) <= "11";
|
||||
end if;
|
||||
result <= x"00000000000000" & "00" & sel;
|
||||
end if;
|
||||
end if;
|
||||
|
||||
end process;
|
||||
end behaviour;
|
||||
@@ -145,10 +145,10 @@ architecture behaviour of decode1 is
|
||||
-- 2#0011100000# cmpeqb
|
||||
2#0000100000# => (ALU, OP_CMPL, RA, RB, NONE, NONE, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- cmpl
|
||||
-- 2#0011000000# cmprb
|
||||
2#0000111010# => (ALU, OP_CNTLZD, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- cntlzd
|
||||
2#0000011010# => (ALU, OP_CNTLZW, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- cntlzw
|
||||
2#1000111010# => (ALU, OP_CNTTZD, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- cnttzd
|
||||
2#1000011010# => (ALU, OP_CNTTZW, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- cnttzw
|
||||
2#0000111010# => (ALU, OP_CNTZ, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- cntlzd
|
||||
2#0000011010# => (ALU, OP_CNTZ, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '1'), -- cntlzw
|
||||
2#1000111010# => (ALU, OP_CNTZ, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- cnttzd
|
||||
2#1000011010# => (ALU, OP_CNTZ, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '1'), -- cnttzw
|
||||
-- 2#1011110011# darn
|
||||
2#0001010110# => (ALU, OP_NOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- dcbf
|
||||
2#0000110110# => (ALU, OP_NOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- dcbst
|
||||
|
||||
@@ -5,7 +5,7 @@ package decode_types is
|
||||
type insn_type_t is (OP_ILLEGAL, OP_NOP, OP_ADD,
|
||||
OP_ADDPCIS, OP_AND, OP_ATTN, OP_B, OP_BC, OP_BCREG,
|
||||
OP_BPERM, OP_CMP, OP_CMPB, OP_CMPEQB, OP_CMPL, OP_CMPRB,
|
||||
OP_CNTLZD, OP_CNTLZW, OP_CNTTZD, OP_CNTTZW, OP_CRAND,
|
||||
OP_CNTZ, OP_CRAND,
|
||||
OP_CRANDC, OP_CREQV, OP_CRNAND, OP_CRNOR, OP_CROR, OP_CRORC,
|
||||
OP_CRXOR, OP_DARN, OP_DCBF, OP_DCBST, OP_DCBT, OP_DCBTST,
|
||||
OP_DCBZ, OP_DIV, OP_EXTSB, OP_EXTSH, OP_EXTSW,
|
||||
|
||||
@@ -46,6 +46,7 @@ architecture behaviour of execute1 is
|
||||
signal rotator_result: std_ulogic_vector(63 downto 0);
|
||||
signal rotator_carry: std_ulogic;
|
||||
signal logical_result: std_ulogic_vector(63 downto 0);
|
||||
signal countzero_result: std_ulogic_vector(63 downto 0);
|
||||
|
||||
function decode_input_carry (carry_sel : carry_in_t; ca_in : std_ulogic) return std_ulogic is
|
||||
begin
|
||||
@@ -85,6 +86,14 @@ begin
|
||||
result => logical_result
|
||||
);
|
||||
|
||||
countzero_0: entity work.zero_counter
|
||||
port map (
|
||||
rs => e_in.read_data3,
|
||||
count_right => e_in.insn(10),
|
||||
is_32bit => e_in.is_32bit,
|
||||
result => countzero_result
|
||||
);
|
||||
|
||||
execute1_0: process(clk)
|
||||
begin
|
||||
if rising_edge(clk) then
|
||||
@@ -217,17 +226,8 @@ begin
|
||||
hi := lo + 3;
|
||||
v.e.write_cr_data(hi downto lo) := ppc_cmpl(l, e_in.read_data1, e_in.read_data2);
|
||||
end loop;
|
||||
when OP_CNTLZW =>
|
||||
result := ppc_cntlzw(e_in.read_data3);
|
||||
result_en := 1;
|
||||
when OP_CNTTZW =>
|
||||
result := ppc_cnttzw(e_in.read_data3);
|
||||
result_en := 1;
|
||||
when OP_CNTLZD =>
|
||||
result := ppc_cntlzd(e_in.read_data3);
|
||||
result_en := 1;
|
||||
when OP_CNTTZD =>
|
||||
result := ppc_cnttzd(e_in.read_data3);
|
||||
when OP_CNTZ =>
|
||||
result := countzero_result;
|
||||
result_en := 1;
|
||||
when OP_EXTSB =>
|
||||
result := ppc_extsb(e_in.read_data3);
|
||||
|
||||
@@ -19,6 +19,7 @@ filesets:
|
||||
- ppc_fx_insns.vhdl
|
||||
- sim_console.vhdl
|
||||
- logical.vhdl
|
||||
- countzero.vhdl
|
||||
- execute1.vhdl
|
||||
- execute2.vhdl
|
||||
- loadstore1.vhdl
|
||||
|
||||
Reference in New Issue
Block a user