mirror of
https://github.com/antonblanchard/microwatt.git
synced 2026-01-29 21:21:03 +00:00
execute: Consolidate count-leading/trailing-zeroes implementations
This adds combinatorial logic that does 32-bit and 64-bit count leading and trailing zeroes in one unit, and consolidates the four instructions under a single OP_CNTZ opcode. This saves 84 slice LUTs on the Arty A7-100. Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
This commit is contained in:
3
Makefile
3
Makefile
@@ -18,12 +18,13 @@ sim_jtag.o: sim_jtag_socket.o
|
|||||||
core_tb.o: common.o wishbone_types.o core.o soc.o sim_jtag.o
|
core_tb.o: common.o wishbone_types.o core.o soc.o sim_jtag.o
|
||||||
core.o: common.o wishbone_types.o fetch1.o fetch2.o icache.o decode1.o decode2.o register_file.o cr_file.o execute1.o execute2.o loadstore1.o loadstore2.o multiply.o writeback.o core_debug.o divider.o
|
core.o: common.o wishbone_types.o fetch1.o fetch2.o icache.o decode1.o decode2.o register_file.o cr_file.o execute1.o execute2.o loadstore1.o loadstore2.o multiply.o writeback.o core_debug.o divider.o
|
||||||
core_debug.o: common.o
|
core_debug.o: common.o
|
||||||
|
countzero.o:
|
||||||
cr_file.o: common.o
|
cr_file.o: common.o
|
||||||
crhelpers.o: common.o
|
crhelpers.o: common.o
|
||||||
decode1.o: common.o decode_types.o
|
decode1.o: common.o decode_types.o
|
||||||
decode2.o: decode_types.o common.o helpers.o insn_helpers.o
|
decode2.o: decode_types.o common.o helpers.o insn_helpers.o
|
||||||
decode_types.o:
|
decode_types.o:
|
||||||
execute1.o: decode_types.o common.o helpers.o crhelpers.o insn_helpers.o ppc_fx_insns.o rotator.o logical.o
|
execute1.o: decode_types.o common.o helpers.o crhelpers.o insn_helpers.o ppc_fx_insns.o rotator.o logical.o countzero.o
|
||||||
execute2.o: common.o crhelpers.o ppc_fx_insns.o
|
execute2.o: common.o crhelpers.o ppc_fx_insns.o
|
||||||
fetch1.o: common.o
|
fetch1.o: common.o
|
||||||
fetch2.o: common.o wishbone_types.o
|
fetch2.o: common.o wishbone_types.o
|
||||||
|
|||||||
103
countzero.vhdl
Normal file
103
countzero.vhdl
Normal file
@@ -0,0 +1,103 @@
|
|||||||
|
library ieee;
|
||||||
|
use ieee.std_logic_1164.all;
|
||||||
|
use ieee.numeric_std.all;
|
||||||
|
|
||||||
|
library work;
|
||||||
|
|
||||||
|
entity zero_counter is
|
||||||
|
port (
|
||||||
|
rs : in std_ulogic_vector(63 downto 0);
|
||||||
|
count_right : in std_ulogic;
|
||||||
|
is_32bit : in std_ulogic;
|
||||||
|
result : out std_ulogic_vector(63 downto 0)
|
||||||
|
);
|
||||||
|
end entity zero_counter;
|
||||||
|
|
||||||
|
architecture behaviour of zero_counter is
|
||||||
|
signal l32, r32 : std_ulogic;
|
||||||
|
signal v32 : std_ulogic_vector(31 downto 0);
|
||||||
|
signal v16 : std_ulogic_vector(15 downto 0);
|
||||||
|
signal v8 : std_ulogic_vector(7 downto 0);
|
||||||
|
signal v4 : std_ulogic_vector(3 downto 0);
|
||||||
|
signal sel : std_ulogic_vector(5 downto 0);
|
||||||
|
begin
|
||||||
|
zerocounter0: process(all)
|
||||||
|
begin
|
||||||
|
l32 <= or (rs(63 downto 32));
|
||||||
|
r32 <= or (rs(31 downto 0));
|
||||||
|
if (l32 = '0' or is_32bit = '1') and r32 = '0' then
|
||||||
|
-- operand is zero, return 32 for 32-bit, else 64
|
||||||
|
result <= x"00000000000000" & '0' & not is_32bit & is_32bit & "00000";
|
||||||
|
else
|
||||||
|
|
||||||
|
if count_right = '0' then
|
||||||
|
sel(5) <= l32 and (not is_32bit);
|
||||||
|
else
|
||||||
|
sel(5) <= (not r32) and (not is_32bit);
|
||||||
|
end if;
|
||||||
|
if sel(5) = '1' then
|
||||||
|
v32 <= rs(63 downto 32);
|
||||||
|
else
|
||||||
|
v32 <= rs(31 downto 0);
|
||||||
|
end if;
|
||||||
|
|
||||||
|
if count_right = '0' then
|
||||||
|
sel(4) <= or (v32(31 downto 16));
|
||||||
|
else
|
||||||
|
sel(4) <= not (or (v32(15 downto 0)));
|
||||||
|
end if;
|
||||||
|
if sel(4) = '1' then
|
||||||
|
v16 <= v32(31 downto 16);
|
||||||
|
else
|
||||||
|
v16 <= v32(15 downto 0);
|
||||||
|
end if;
|
||||||
|
|
||||||
|
if count_right = '0' then
|
||||||
|
sel(3) <= or (v16(15 downto 8));
|
||||||
|
else
|
||||||
|
sel(3) <= not (or (v16(7 downto 0)));
|
||||||
|
end if;
|
||||||
|
if sel(3) = '1' then
|
||||||
|
v8 <= v16(15 downto 8);
|
||||||
|
else
|
||||||
|
v8 <= v16(7 downto 0);
|
||||||
|
end if;
|
||||||
|
|
||||||
|
if count_right = '0' then
|
||||||
|
sel(2) <= or (v8(7 downto 4));
|
||||||
|
else
|
||||||
|
sel(2) <= not (or (v8(3 downto 0)));
|
||||||
|
end if;
|
||||||
|
if sel(2) = '1' then
|
||||||
|
v4 <= v8(7 downto 4);
|
||||||
|
else
|
||||||
|
v4 <= v8(3 downto 0);
|
||||||
|
end if;
|
||||||
|
|
||||||
|
if count_right = '0' then
|
||||||
|
if v4(3) = '1' then
|
||||||
|
sel(1 downto 0) <= "11";
|
||||||
|
elsif v4(2) = '1' then
|
||||||
|
sel(1 downto 0) <= "10";
|
||||||
|
elsif v4(1) = '1' then
|
||||||
|
sel(1 downto 0) <= "01";
|
||||||
|
else
|
||||||
|
sel(1 downto 0) <= "00";
|
||||||
|
end if;
|
||||||
|
result <= x"00000000000000" & "00" & (not sel(5) and not is_32bit) & not sel(4 downto 0);
|
||||||
|
else
|
||||||
|
if v4(0) = '1' then
|
||||||
|
sel(1 downto 0) <= "00";
|
||||||
|
elsif v4(1) = '1' then
|
||||||
|
sel(1 downto 0) <= "01";
|
||||||
|
elsif v4(2) = '1' then
|
||||||
|
sel(1 downto 0) <= "10";
|
||||||
|
else
|
||||||
|
sel(1 downto 0) <= "11";
|
||||||
|
end if;
|
||||||
|
result <= x"00000000000000" & "00" & sel;
|
||||||
|
end if;
|
||||||
|
end if;
|
||||||
|
|
||||||
|
end process;
|
||||||
|
end behaviour;
|
||||||
@@ -145,10 +145,10 @@ architecture behaviour of decode1 is
|
|||||||
-- 2#0011100000# cmpeqb
|
-- 2#0011100000# cmpeqb
|
||||||
2#0000100000# => (ALU, OP_CMPL, RA, RB, NONE, NONE, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- cmpl
|
2#0000100000# => (ALU, OP_CMPL, RA, RB, NONE, NONE, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- cmpl
|
||||||
-- 2#0011000000# cmprb
|
-- 2#0011000000# cmprb
|
||||||
2#0000111010# => (ALU, OP_CNTLZD, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- cntlzd
|
2#0000111010# => (ALU, OP_CNTZ, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- cntlzd
|
||||||
2#0000011010# => (ALU, OP_CNTLZW, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- cntlzw
|
2#0000011010# => (ALU, OP_CNTZ, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '1'), -- cntlzw
|
||||||
2#1000111010# => (ALU, OP_CNTTZD, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- cnttzd
|
2#1000111010# => (ALU, OP_CNTZ, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- cnttzd
|
||||||
2#1000011010# => (ALU, OP_CNTTZW, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- cnttzw
|
2#1000011010# => (ALU, OP_CNTZ, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '1'), -- cnttzw
|
||||||
-- 2#1011110011# darn
|
-- 2#1011110011# darn
|
||||||
2#0001010110# => (ALU, OP_NOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- dcbf
|
2#0001010110# => (ALU, OP_NOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- dcbf
|
||||||
2#0000110110# => (ALU, OP_NOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- dcbst
|
2#0000110110# => (ALU, OP_NOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- dcbst
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ package decode_types is
|
|||||||
type insn_type_t is (OP_ILLEGAL, OP_NOP, OP_ADD,
|
type insn_type_t is (OP_ILLEGAL, OP_NOP, OP_ADD,
|
||||||
OP_ADDPCIS, OP_AND, OP_ATTN, OP_B, OP_BC, OP_BCREG,
|
OP_ADDPCIS, OP_AND, OP_ATTN, OP_B, OP_BC, OP_BCREG,
|
||||||
OP_BPERM, OP_CMP, OP_CMPB, OP_CMPEQB, OP_CMPL, OP_CMPRB,
|
OP_BPERM, OP_CMP, OP_CMPB, OP_CMPEQB, OP_CMPL, OP_CMPRB,
|
||||||
OP_CNTLZD, OP_CNTLZW, OP_CNTTZD, OP_CNTTZW, OP_CRAND,
|
OP_CNTZ, OP_CRAND,
|
||||||
OP_CRANDC, OP_CREQV, OP_CRNAND, OP_CRNOR, OP_CROR, OP_CRORC,
|
OP_CRANDC, OP_CREQV, OP_CRNAND, OP_CRNOR, OP_CROR, OP_CRORC,
|
||||||
OP_CRXOR, OP_DARN, OP_DCBF, OP_DCBST, OP_DCBT, OP_DCBTST,
|
OP_CRXOR, OP_DARN, OP_DCBF, OP_DCBST, OP_DCBT, OP_DCBTST,
|
||||||
OP_DCBZ, OP_DIV, OP_EXTSB, OP_EXTSH, OP_EXTSW,
|
OP_DCBZ, OP_DIV, OP_EXTSB, OP_EXTSH, OP_EXTSW,
|
||||||
|
|||||||
@@ -46,6 +46,7 @@ architecture behaviour of execute1 is
|
|||||||
signal rotator_result: std_ulogic_vector(63 downto 0);
|
signal rotator_result: std_ulogic_vector(63 downto 0);
|
||||||
signal rotator_carry: std_ulogic;
|
signal rotator_carry: std_ulogic;
|
||||||
signal logical_result: std_ulogic_vector(63 downto 0);
|
signal logical_result: std_ulogic_vector(63 downto 0);
|
||||||
|
signal countzero_result: std_ulogic_vector(63 downto 0);
|
||||||
|
|
||||||
function decode_input_carry (carry_sel : carry_in_t; ca_in : std_ulogic) return std_ulogic is
|
function decode_input_carry (carry_sel : carry_in_t; ca_in : std_ulogic) return std_ulogic is
|
||||||
begin
|
begin
|
||||||
@@ -85,6 +86,14 @@ begin
|
|||||||
result => logical_result
|
result => logical_result
|
||||||
);
|
);
|
||||||
|
|
||||||
|
countzero_0: entity work.zero_counter
|
||||||
|
port map (
|
||||||
|
rs => e_in.read_data3,
|
||||||
|
count_right => e_in.insn(10),
|
||||||
|
is_32bit => e_in.is_32bit,
|
||||||
|
result => countzero_result
|
||||||
|
);
|
||||||
|
|
||||||
execute1_0: process(clk)
|
execute1_0: process(clk)
|
||||||
begin
|
begin
|
||||||
if rising_edge(clk) then
|
if rising_edge(clk) then
|
||||||
@@ -217,17 +226,8 @@ begin
|
|||||||
hi := lo + 3;
|
hi := lo + 3;
|
||||||
v.e.write_cr_data(hi downto lo) := ppc_cmpl(l, e_in.read_data1, e_in.read_data2);
|
v.e.write_cr_data(hi downto lo) := ppc_cmpl(l, e_in.read_data1, e_in.read_data2);
|
||||||
end loop;
|
end loop;
|
||||||
when OP_CNTLZW =>
|
when OP_CNTZ =>
|
||||||
result := ppc_cntlzw(e_in.read_data3);
|
result := countzero_result;
|
||||||
result_en := 1;
|
|
||||||
when OP_CNTTZW =>
|
|
||||||
result := ppc_cnttzw(e_in.read_data3);
|
|
||||||
result_en := 1;
|
|
||||||
when OP_CNTLZD =>
|
|
||||||
result := ppc_cntlzd(e_in.read_data3);
|
|
||||||
result_en := 1;
|
|
||||||
when OP_CNTTZD =>
|
|
||||||
result := ppc_cnttzd(e_in.read_data3);
|
|
||||||
result_en := 1;
|
result_en := 1;
|
||||||
when OP_EXTSB =>
|
when OP_EXTSB =>
|
||||||
result := ppc_extsb(e_in.read_data3);
|
result := ppc_extsb(e_in.read_data3);
|
||||||
|
|||||||
@@ -19,6 +19,7 @@ filesets:
|
|||||||
- ppc_fx_insns.vhdl
|
- ppc_fx_insns.vhdl
|
||||||
- sim_console.vhdl
|
- sim_console.vhdl
|
||||||
- logical.vhdl
|
- logical.vhdl
|
||||||
|
- countzero.vhdl
|
||||||
- execute1.vhdl
|
- execute1.vhdl
|
||||||
- execute2.vhdl
|
- execute2.vhdl
|
||||||
- loadstore1.vhdl
|
- loadstore1.vhdl
|
||||||
|
|||||||
Reference in New Issue
Block a user