mirror of
https://github.com/antonblanchard/microwatt.git
synced 2026-01-11 23:43:15 +00:00
Implement cfuged, pdepd and pextd
This implements the cfuged, pdepd and pextd instructions in a new unit called bit_sorter (so called because cfuged and pextd can be viewed as sorting the bits of the mask). The cnt* instructions and the popcnt* instructions now use the same OP_COUNTB insn_type so as to free up an insn_type value to use for the new instructions. The new instructions are implemented using a slow and simple algorithm that takes 64 cycles to compute the result. The ex1 stage is stalled while this happens, as for a 64-bit multiply, or for a divide when there is no FPU. Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
This commit is contained in:
parent
d7d7a3afd4
commit
fa9df33f7e
2
Makefile
2
Makefile
@ -74,7 +74,7 @@ core_files = decode_types.vhdl common.vhdl wishbone_types.vhdl fetch1.vhdl \
|
||||
cr_file.vhdl crhelpers.vhdl ppc_fx_insns.vhdl rotator.vhdl \
|
||||
logical.vhdl countbits.vhdl multiply.vhdl multiply-32s.vhdl divider.vhdl \
|
||||
execute1.vhdl loadstore1.vhdl mmu.vhdl dcache.vhdl writeback.vhdl \
|
||||
core_debug.vhdl core.vhdl fpu.vhdl pmu.vhdl
|
||||
core_debug.vhdl core.vhdl fpu.vhdl pmu.vhdl bitsort.vhdl
|
||||
|
||||
soc_files = wishbone_arbiter.vhdl wishbone_bram_wrapper.vhdl sync_fifo.vhdl \
|
||||
wishbone_debug_master.vhdl xics.vhdl syscon.vhdl gpio.vhdl soc.vhdl \
|
||||
|
||||
102
bitsort.vhdl
Normal file
102
bitsort.vhdl
Normal file
@ -0,0 +1,102 @@
|
||||
-- Implements instructions that involve sorting bits,
|
||||
-- that is, cfuged, pextd and pdepd.
|
||||
--
|
||||
-- cfuged: Sort the bits in the mask in RB into 0s at the left, 1s at the right
|
||||
-- and move the bits in RS in the same fashion to give the result
|
||||
-- pextd: Like cfuged but the only use the bits of RS where the
|
||||
-- corresponding bit in RB is 1
|
||||
-- pdepd: Inverse of pextd; take the low-order bits of RS and spread them out
|
||||
-- to the bit positions which have a 1 in RB
|
||||
|
||||
-- NB opc is bits 7-6 of the instruction:
|
||||
-- 00 = pdepd, 01 = pextd, 10 = cfuged
|
||||
|
||||
library ieee;
|
||||
use ieee.std_logic_1164.all;
|
||||
use ieee.numeric_std.all;
|
||||
|
||||
library work;
|
||||
use work.helpers.all;
|
||||
|
||||
entity bit_sorter is
|
||||
port (
|
||||
clk : in std_ulogic;
|
||||
rst : in std_ulogic;
|
||||
rs : in std_ulogic_vector(63 downto 0);
|
||||
rb : in std_ulogic_vector(63 downto 0);
|
||||
go : in std_ulogic;
|
||||
opc : in std_ulogic_vector(1 downto 0);
|
||||
done : out std_ulogic;
|
||||
result : out std_ulogic_vector(63 downto 0)
|
||||
);
|
||||
end entity bit_sorter;
|
||||
|
||||
architecture behaviour of bit_sorter is
|
||||
|
||||
signal val : std_ulogic_vector(63 downto 0);
|
||||
signal st : std_ulogic;
|
||||
signal sd : std_ulogic;
|
||||
signal opr : std_ulogic_vector(1 downto 0);
|
||||
signal bc : unsigned(5 downto 0);
|
||||
signal jl : unsigned(5 downto 0);
|
||||
signal jr : unsigned(5 downto 0);
|
||||
signal sr_ml : std_ulogic_vector(63 downto 0);
|
||||
signal sr_mr : std_ulogic_vector(63 downto 0);
|
||||
signal sr_vl : std_ulogic_vector(63 downto 0);
|
||||
signal sr_vr : std_ulogic_vector(63 downto 0);
|
||||
|
||||
begin
|
||||
bsort_r: process(clk)
|
||||
begin
|
||||
if rising_edge(clk) then
|
||||
sd <= '0';
|
||||
if rst = '1' then
|
||||
st <= '0';
|
||||
opr <= "00";
|
||||
val <= (others => '0');
|
||||
elsif go = '1' then
|
||||
st <= '1';
|
||||
sr_ml <= rb;
|
||||
sr_mr <= rb;
|
||||
sr_vl <= rs;
|
||||
sr_vr <= rs;
|
||||
opr <= opc;
|
||||
val <= (others => '0');
|
||||
bc <= to_unsigned(0, 6);
|
||||
jl <= to_unsigned(63, 6);
|
||||
jr <= to_unsigned(0, 6);
|
||||
elsif st = '1' then
|
||||
if bc = 6x"3f" then
|
||||
st <= '0';
|
||||
sd <= '1';
|
||||
end if;
|
||||
bc <= bc + 1;
|
||||
if sr_ml(63) = '0' and opr(1) = '1' then
|
||||
-- cfuged
|
||||
val(to_integer(jl)) <= sr_vl(63);
|
||||
jl <= jl - 1;
|
||||
end if;
|
||||
if sr_mr(0) = '1' then
|
||||
if opr = "00" then
|
||||
-- pdepd
|
||||
val(to_integer(bc)) <= sr_vr(0);
|
||||
else
|
||||
-- cfuged or pextd
|
||||
val(to_integer(jr)) <= sr_vr(0);
|
||||
end if;
|
||||
jr <= jr + 1;
|
||||
end if;
|
||||
sr_vl <= sr_vl(62 downto 0) & '0';
|
||||
if opr /= "00" or sr_mr(0) = '1' then
|
||||
sr_vr <= '0' & sr_vr(63 downto 1);
|
||||
end if;
|
||||
sr_ml <= sr_ml(62 downto 0) & '0';
|
||||
sr_mr <= '0' & sr_mr(63 downto 1);
|
||||
end if;
|
||||
end if;
|
||||
end process;
|
||||
|
||||
done <= sd;
|
||||
result <= val;
|
||||
|
||||
end behaviour;
|
||||
17
decode1.vhdl
17
decode1.vhdl
@ -106,6 +106,7 @@ architecture behaviour of decode1 is
|
||||
INSN_brd => (ALU, NONE, OP_BREV, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
|
||||
INSN_cbcdtd => (ALU, NONE, OP_BCD, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
|
||||
INSN_cdtbcd => (ALU, NONE, OP_BCD, NONE, NONE, RS, RA, '0', '0', '1', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
|
||||
INSN_cfuged => (ALU, NONE, OP_BSORT, NONE, RB, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
|
||||
INSN_cmp => (ALU, NONE, OP_CMP, RA, RB, NONE, NONE, '0', '1', '1', '0', ONE, '0', NONE, '0', '0', '0', '0', '0', '1', NONE, '0', '0', NONE),
|
||||
INSN_cmpb => (ALU, NONE, OP_CMPB, NONE, RB, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
|
||||
INSN_cmpeqb => (ALU, NONE, OP_CMPEQB, RA, RB, NONE, NONE, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
|
||||
@ -113,10 +114,10 @@ architecture behaviour of decode1 is
|
||||
INSN_cmpl => (ALU, NONE, OP_CMP, RA, RB, NONE, NONE, '0', '1', '1', '0', ONE, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
|
||||
INSN_cmpli => (ALU, NONE, OP_CMP, RA, CONST_UI, NONE, NONE, '0', '1', '1', '0', ONE, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
|
||||
INSN_cmprb => (ALU, NONE, OP_CMPRB, RA, RB, NONE, NONE, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
|
||||
INSN_cntlzd => (ALU, NONE, OP_CNTZ, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0', NONE),
|
||||
INSN_cntlzw => (ALU, NONE, OP_CNTZ, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0', NONE),
|
||||
INSN_cnttzd => (ALU, NONE, OP_CNTZ, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0', NONE),
|
||||
INSN_cnttzw => (ALU, NONE, OP_CNTZ, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0', NONE),
|
||||
INSN_cntlzd => (ALU, NONE, OP_COUNTB, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0', NONE),
|
||||
INSN_cntlzw => (ALU, NONE, OP_COUNTB, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0', NONE),
|
||||
INSN_cnttzd => (ALU, NONE, OP_COUNTB, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0', NONE),
|
||||
INSN_cnttzw => (ALU, NONE, OP_COUNTB, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0', NONE),
|
||||
INSN_crand => (ALU, NONE, OP_CROP, NONE, NONE, NONE, NONE, '1', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
|
||||
INSN_crandc => (ALU, NONE, OP_CROP, NONE, NONE, NONE, NONE, '1', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
|
||||
INSN_creqv => (ALU, NONE, OP_CROP, NONE, NONE, NONE, NONE, '1', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
|
||||
@ -281,6 +282,8 @@ architecture behaviour of decode1 is
|
||||
INSN_ori => (ALU, NONE, OP_LOGIC, NONE, CONST_UI, RS, RA, '0', '0', '1', '1', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', NONE, '0', '0', NONE),
|
||||
INSN_oris => (ALU, NONE, OP_LOGIC, NONE, CONST_UI_HI, RS, RA, '0', '0', '1', '1', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', NONE, '0', '0', NONE),
|
||||
INSN_paddi => (ALU, NONE, OP_ADD, RA0_OR_CIA, CONST_PSI, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
|
||||
INSN_pdepd => (ALU, NONE, OP_BSORT, NONE, RB, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
|
||||
INSN_pextd => (ALU, NONE, OP_BSORT, NONE, RB, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
|
||||
INSN_plbz => (LDST, NONE, OP_LOAD, RA0_OR_CIA, CONST_PSI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
|
||||
INSN_pld => (LDST, NONE, OP_LOAD, RA0_OR_CIA, CONST_PSI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
|
||||
INSN_plfd => (LDST, FPU, OP_LOAD, RA0_OR_CIA, CONST_PSI, NONE, FRT, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
|
||||
@ -296,9 +299,9 @@ architecture behaviour of decode1 is
|
||||
INSN_pstfs => (LDST, FPU, OP_STORE, RA0_OR_CIA, CONST_PSI, FRS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '1', '0', NONE, '0', '0', NONE),
|
||||
INSN_psth => (LDST, NONE, OP_STORE, RA0_OR_CIA, CONST_PSI, RS, NONE, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
|
||||
INSN_pstw => (LDST, NONE, OP_STORE, RA0_OR_CIA, CONST_PSI, RS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
|
||||
INSN_popcntb => (ALU, NONE, OP_POPCNT, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
|
||||
INSN_popcntd => (ALU, NONE, OP_POPCNT, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
|
||||
INSN_popcntw => (ALU, NONE, OP_POPCNT, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
|
||||
INSN_popcntb => (ALU, NONE, OP_COUNTB, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
|
||||
INSN_popcntd => (ALU, NONE, OP_COUNTB, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
|
||||
INSN_popcntw => (ALU, NONE, OP_COUNTB, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
|
||||
INSN_prtyd => (ALU, NONE, OP_PRTY, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
|
||||
INSN_prtyw => (ALU, NONE, OP_PRTY, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
|
||||
INSN_rfid => (ALU, NONE, OP_RFID, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
|
||||
|
||||
13
decode2.vhdl
13
decode2.vhdl
@ -232,12 +232,13 @@ architecture behaviour of decode2 is
|
||||
);
|
||||
|
||||
constant subresult_select : mux_select_array_t := (
|
||||
OP_MUL_L64 => "000", -- muldiv_result
|
||||
OP_MUL_H64 => "001",
|
||||
OP_MUL_H32 => "010",
|
||||
OP_DIV => "011",
|
||||
OP_DIVE => "011",
|
||||
OP_MOD => "011",
|
||||
OP_MUL_L64 => "000", -- multicyc_result
|
||||
OP_MUL_H64 => "010",
|
||||
OP_MUL_H32 => "001",
|
||||
OP_DIV => "101",
|
||||
OP_DIVE => "101",
|
||||
OP_MOD => "101",
|
||||
OP_BSORT => "100",
|
||||
OP_ADDG6S => "001", -- misc_result
|
||||
OP_ISEL => "010",
|
||||
OP_DARN => "011",
|
||||
|
||||
@ -6,7 +6,7 @@ package decode_types is
|
||||
OP_ATTN, OP_B, OP_BC, OP_BCREG,
|
||||
OP_BCD, OP_BPERM, OP_BREV,
|
||||
OP_CMP, OP_CMPB, OP_CMPEQB, OP_CMPRB,
|
||||
OP_CNTZ, OP_CROP,
|
||||
OP_COUNTB, OP_CROP,
|
||||
OP_DARN, OP_DCBF, OP_DCBST, OP_XCBT, OP_DCBTST,
|
||||
OP_DCBZ, OP_ICBI,
|
||||
OP_FP_CMP, OP_FP_ARITH, OP_FP_MOVE, OP_FP_MISC,
|
||||
@ -18,7 +18,8 @@ package decode_types is
|
||||
OP_MCRXRX, OP_MFCR, OP_MFMSR, OP_MFSPR,
|
||||
OP_MTCRF, OP_MTMSRD, OP_MTSPR, OP_MUL_L64,
|
||||
OP_MUL_H64, OP_MUL_H32,
|
||||
OP_POPCNT, OP_PRTY, OP_RFID,
|
||||
OP_BSORT,
|
||||
OP_PRTY, OP_RFID,
|
||||
OP_RLC, OP_RLCL, OP_RLCR, OP_SC, OP_SETB,
|
||||
OP_SHL, OP_SHR,
|
||||
OP_SYNC, OP_TLBIE, OP_TRAP,
|
||||
@ -179,11 +180,12 @@ package decode_types is
|
||||
INSN_and,
|
||||
INSN_andc,
|
||||
INSN_bperm,
|
||||
INSN_cfuged,
|
||||
INSN_cmp,
|
||||
INSN_cmpb,
|
||||
INSN_cmpeqb,
|
||||
INSN_cmpl,
|
||||
INSN_cmprb, -- 140
|
||||
INSN_cmpl, -- 140
|
||||
INSN_cmprb,
|
||||
INSN_dcbf,
|
||||
INSN_dcbst,
|
||||
INSN_dcbt,
|
||||
@ -192,8 +194,8 @@ package decode_types is
|
||||
INSN_divd,
|
||||
INSN_divdu,
|
||||
INSN_divde,
|
||||
INSN_divdeu,
|
||||
INSN_divw, -- 150
|
||||
INSN_divdeu, -- 150
|
||||
INSN_divw,
|
||||
INSN_divwu,
|
||||
INSN_divwe,
|
||||
INSN_divweu,
|
||||
@ -202,8 +204,8 @@ package decode_types is
|
||||
INSN_icbt,
|
||||
INSN_isel,
|
||||
INSN_lbarx,
|
||||
INSN_lbzcix,
|
||||
INSN_lbzux, -- 160
|
||||
INSN_lbzcix, -- 160
|
||||
INSN_lbzux,
|
||||
INSN_lbzx,
|
||||
INSN_ldarx,
|
||||
INSN_ldbrx,
|
||||
@ -212,8 +214,8 @@ package decode_types is
|
||||
INSN_ldux,
|
||||
INSN_lharx,
|
||||
INSN_lhax,
|
||||
INSN_lhaux,
|
||||
INSN_lhbrx, -- 170
|
||||
INSN_lhaux, -- 170
|
||||
INSN_lhbrx,
|
||||
INSN_lhzcix,
|
||||
INSN_lhzx,
|
||||
INSN_lhzux,
|
||||
@ -222,8 +224,8 @@ package decode_types is
|
||||
INSN_lwaux,
|
||||
INSN_lwbrx,
|
||||
INSN_lwzcix,
|
||||
INSN_lwzx,
|
||||
INSN_lwzux, -- 180
|
||||
INSN_lwzx, -- 180
|
||||
INSN_lwzux,
|
||||
INSN_modsd,
|
||||
INSN_modsw,
|
||||
INSN_moduw,
|
||||
@ -232,51 +234,54 @@ package decode_types is
|
||||
INSN_mulhwu,
|
||||
INSN_mulhd,
|
||||
INSN_mulhdu,
|
||||
INSN_mullw,
|
||||
INSN_mulld, -- 190
|
||||
INSN_mullw, -- 190
|
||||
INSN_mulld,
|
||||
INSN_nand,
|
||||
INSN_nor,
|
||||
INSN_or,
|
||||
INSN_orc,
|
||||
INSN_pdepd,
|
||||
INSN_pextd,
|
||||
INSN_rldcl,
|
||||
INSN_rldcr,
|
||||
INSN_rlwnm,
|
||||
INSN_rlwnm, -- 200
|
||||
INSN_slw,
|
||||
INSN_sld,
|
||||
INSN_sraw, -- 200
|
||||
INSN_sraw,
|
||||
INSN_srad,
|
||||
INSN_srw,
|
||||
INSN_srd,
|
||||
INSN_stbcix,
|
||||
INSN_stbcx,
|
||||
INSN_stbx,
|
||||
INSN_stbux,
|
||||
INSN_stbux, -- 210
|
||||
INSN_stdbrx,
|
||||
INSN_stdcix,
|
||||
INSN_stdcx, -- 210
|
||||
INSN_stdcx,
|
||||
INSN_stdx,
|
||||
INSN_stdux,
|
||||
INSN_sthbrx,
|
||||
INSN_sthcix,
|
||||
INSN_sthcx,
|
||||
INSN_sthx,
|
||||
INSN_sthux,
|
||||
INSN_sthux, -- 220
|
||||
INSN_stwbrx,
|
||||
INSN_stwcix,
|
||||
INSN_stwcx, -- 220
|
||||
INSN_stwcx,
|
||||
INSN_stwx,
|
||||
INSN_stwux,
|
||||
INSN_subf,
|
||||
INSN_subfc,
|
||||
INSN_subfe,
|
||||
INSN_td,
|
||||
INSN_tlbie,
|
||||
INSN_tlbie, -- 230
|
||||
INSN_tlbiel,
|
||||
INSN_tw,
|
||||
INSN_xor, -- 230
|
||||
INSN_xor,
|
||||
|
||||
-- pad to 232 to simplify comparison logic
|
||||
INSN_231,
|
||||
-- pad to 240 to simplify comparison logic
|
||||
INSN_234, INSN_235,
|
||||
INSN_236, INSN_237, INSN_238, INSN_239,
|
||||
|
||||
-- The following instructions have a third input addressed by RC
|
||||
INSN_maddld,
|
||||
@ -284,9 +289,7 @@ package decode_types is
|
||||
INSN_maddhdu,
|
||||
|
||||
-- pad to 256 to simplify comparison logic
|
||||
INSN_235,
|
||||
INSN_236, INSN_237, INSN_238, INSN_239,
|
||||
INSN_240, INSN_241, INSN_242, INSN_243,
|
||||
INSN_243,
|
||||
INSN_244, INSN_245, INSN_246, INSN_247,
|
||||
INSN_248, INSN_249, INSN_250, INSN_251,
|
||||
INSN_252, INSN_253, INSN_254, INSN_255,
|
||||
|
||||
@ -113,6 +113,7 @@ architecture behaviour of execute1 is
|
||||
direct_branch : std_ulogic;
|
||||
start_mul : std_ulogic;
|
||||
start_div : std_ulogic;
|
||||
start_bsort : std_ulogic;
|
||||
do_trace : std_ulogic;
|
||||
fp_intr : std_ulogic;
|
||||
res2_sel : std_ulogic_vector(1 downto 0);
|
||||
@ -134,7 +135,7 @@ architecture behaviour of execute1 is
|
||||
prev_op : insn_type_t;
|
||||
prev_prefixed : std_ulogic;
|
||||
oe : std_ulogic;
|
||||
mul_select : std_ulogic_vector(1 downto 0);
|
||||
mul_select : std_ulogic_vector(2 downto 0);
|
||||
res2_sel : std_ulogic_vector(1 downto 0);
|
||||
spr_select : spr_id;
|
||||
pmu_spr_num : std_ulogic_vector(4 downto 0);
|
||||
@ -144,6 +145,7 @@ architecture behaviour of execute1 is
|
||||
mul_in_progress : std_ulogic;
|
||||
mul_finish : std_ulogic;
|
||||
div_in_progress : std_ulogic;
|
||||
bsort_in_progress : std_ulogic;
|
||||
no_instr_avail : std_ulogic;
|
||||
instr_dispatch : std_ulogic;
|
||||
ext_interrupt : std_ulogic;
|
||||
@ -164,10 +166,11 @@ architecture behaviour of execute1 is
|
||||
busy => '0',
|
||||
fp_exception_next => '0', trace_next => '0', prev_op => OP_ILLEGAL,
|
||||
prev_prefixed => '0',
|
||||
oe => '0', mul_select => "00", res2_sel => "00",
|
||||
oe => '0', mul_select => "000", res2_sel => "00",
|
||||
spr_select => spr_id_init, pmu_spr_num => 5x"0",
|
||||
redir_to_next => '0', advance_nia => '0', lr_from_next => '0',
|
||||
mul_in_progress => '0', mul_finish => '0', div_in_progress => '0',
|
||||
bsort_in_progress => '0',
|
||||
no_instr_avail => '0', instr_dispatch => '0', ext_interrupt => '0',
|
||||
taken_branch_event => '0', br_mispredict => '0',
|
||||
msr => 64x"0",
|
||||
@ -209,7 +212,8 @@ architecture behaviour of execute1 is
|
||||
signal alu_result: std_ulogic_vector(63 downto 0);
|
||||
signal adder_result: std_ulogic_vector(63 downto 0);
|
||||
signal misc_result: std_ulogic_vector(63 downto 0);
|
||||
signal muldiv_result: std_ulogic_vector(63 downto 0);
|
||||
signal multicyc_result: std_ulogic_vector(63 downto 0);
|
||||
signal bsort_result: std_ulogic_vector(63 downto 0);
|
||||
signal spr_result: std_ulogic_vector(63 downto 0);
|
||||
signal next_nia : std_ulogic_vector(63 downto 0);
|
||||
signal s1_sel : std_ulogic_vector(2 downto 0);
|
||||
@ -234,6 +238,10 @@ architecture behaviour of execute1 is
|
||||
signal x_to_divider: Execute1ToDividerType;
|
||||
signal divider_to_x: DividerToExecute1Type := DividerToExecute1Init;
|
||||
|
||||
-- bit-sort unit signals
|
||||
signal bsort_start : std_ulogic;
|
||||
signal bsort_done : std_ulogic;
|
||||
|
||||
-- random number generator signals
|
||||
signal random_raw : std_ulogic_vector(63 downto 0);
|
||||
signal random_cond : std_ulogic_vector(63 downto 0);
|
||||
@ -493,6 +501,18 @@ begin
|
||||
);
|
||||
end generate;
|
||||
|
||||
bsort_0: entity work.bit_sorter
|
||||
port map (
|
||||
clk => clk,
|
||||
rst => rst,
|
||||
rs => c_in,
|
||||
rb => b_in,
|
||||
go => bsort_start,
|
||||
opc => e_in.insn(7 downto 6),
|
||||
done => bsort_done,
|
||||
result => bsort_result
|
||||
);
|
||||
|
||||
random_0: entity work.random
|
||||
port map (
|
||||
clk => clk,
|
||||
@ -664,7 +684,7 @@ begin
|
||||
adder_result when "000",
|
||||
logical_result when "001",
|
||||
rotator_result when "010",
|
||||
muldiv_result when "100",
|
||||
multicyc_result when "100",
|
||||
ramspr_result when "101",
|
||||
misc_result when others;
|
||||
|
||||
@ -845,17 +865,21 @@ begin
|
||||
x_to_mult_32s.subtract <= '0';
|
||||
x_to_mult_32s.addend <= (others => '0');
|
||||
|
||||
case ex1.mul_select is
|
||||
when "00" =>
|
||||
muldiv_result <= multiply_to_x.result(63 downto 0);
|
||||
when "01" =>
|
||||
muldiv_result <= multiply_to_x.result(127 downto 64);
|
||||
when "10" =>
|
||||
muldiv_result <= multiply_to_x.result(63 downto 32) &
|
||||
multiply_to_x.result(63 downto 32);
|
||||
when others =>
|
||||
muldiv_result <= divider_to_x.write_reg_data;
|
||||
end case;
|
||||
if ex1.mul_select(2) = '0' then
|
||||
case ex1.mul_select(1 downto 0) is
|
||||
when "00" =>
|
||||
multicyc_result <= multiply_to_x.result(63 downto 0);
|
||||
when "01" =>
|
||||
multicyc_result <= multiply_to_x.result(63 downto 32) &
|
||||
multiply_to_x.result(63 downto 32);
|
||||
when others =>
|
||||
multicyc_result <= multiply_to_x.result(127 downto 64);
|
||||
end case;
|
||||
elsif ex1.mul_select(0) = '1' and not HAS_FPU then
|
||||
multicyc_result <= divider_to_x.write_reg_data;
|
||||
else
|
||||
multicyc_result <= bsort_result;
|
||||
end if;
|
||||
|
||||
-- Compute misc_result
|
||||
case e_in.sub_select is
|
||||
@ -1266,7 +1290,7 @@ begin
|
||||
end if;
|
||||
v.do_trace := '0';
|
||||
|
||||
when OP_CNTZ | OP_POPCNT =>
|
||||
when OP_COUNTB =>
|
||||
v.res2_sel := "01";
|
||||
slow_op := '1';
|
||||
when OP_ISEL =>
|
||||
@ -1388,6 +1412,11 @@ begin
|
||||
when OP_ICBI =>
|
||||
v.se.icache_inval := '1';
|
||||
|
||||
when OP_BSORT =>
|
||||
v.start_bsort := '1';
|
||||
slow_op := '1';
|
||||
owait := '1';
|
||||
|
||||
when OP_MUL_L64 =>
|
||||
if e_in.is_32bit = '1' then
|
||||
v.se.mult_32s := '1';
|
||||
@ -1565,7 +1594,7 @@ begin
|
||||
v.oe := e_in.oe;
|
||||
v.spr_select := e_in.spr_select;
|
||||
v.pmu_spr_num := e_in.insn(20 downto 16);
|
||||
v.mul_select := e_in.sub_select(1 downto 0);
|
||||
v.mul_select := e_in.sub_select;
|
||||
v.se := side_effect_init;
|
||||
v.ramspr_wraddr := e_in.ramspr_wraddr;
|
||||
v.lr_from_next := e_in.lr;
|
||||
@ -1596,7 +1625,7 @@ begin
|
||||
rot_clear_right <= '1' when e_in.insn_type = OP_RLC or e_in.insn_type = OP_RLCR else '0';
|
||||
rot_sign_ext <= '1' when e_in.insn_type = OP_EXTSWSLI else '0';
|
||||
|
||||
do_popcnt <= '1' when e_in.insn_type = OP_POPCNT else '0';
|
||||
do_popcnt <= '1' when e_in.insn_type = OP_COUNTB and e_in.insn(7 downto 6) = "11" else '0';
|
||||
|
||||
if valid_in = '1' then
|
||||
v.prev_op := e_in.insn_type;
|
||||
@ -1671,6 +1700,7 @@ begin
|
||||
v.mul_in_progress := actions.start_mul;
|
||||
x_to_divider.valid <= actions.start_div;
|
||||
v.div_in_progress := actions.start_div;
|
||||
v.bsort_in_progress := actions.start_bsort;
|
||||
v.br_mispredict := v.e.redirect and actions.direct_branch;
|
||||
v.advance_nia := actions.advance_nia;
|
||||
v.redir_to_next := actions.redir_to_next;
|
||||
@ -1681,7 +1711,7 @@ begin
|
||||
-- multiply is happening in order to stop following
|
||||
-- instructions from using the wrong XER value
|
||||
-- (and for simplicity in the OE=0 case).
|
||||
v.busy := actions.start_div or actions.start_mul;
|
||||
v.busy := actions.start_div or actions.start_mul or actions.start_bsort;
|
||||
|
||||
-- instruction for other units, i.e. LDST
|
||||
if e_in.unit = LDST then
|
||||
@ -1692,6 +1722,7 @@ begin
|
||||
end if;
|
||||
end if;
|
||||
is_scv := go and actions.se.scv_trap;
|
||||
bsort_start <= go and actions.start_bsort;
|
||||
|
||||
if not HAS_FPU and ex1.div_in_progress = '1' then
|
||||
v.div_in_progress := not divider_to_x.valid;
|
||||
@ -1724,6 +1755,13 @@ begin
|
||||
end if;
|
||||
v.e.valid := '1';
|
||||
end if;
|
||||
if ex1.bsort_in_progress = '1' then
|
||||
v.bsort_in_progress := not bsort_done;
|
||||
v.e.valid := bsort_done;
|
||||
v.busy := not bsort_done;
|
||||
v.e.write_data := alu_result;
|
||||
bypass_valid := bsort_done;
|
||||
end if;
|
||||
|
||||
if v.e.write_xerc_enable = '1' and v.e.valid = '1' then
|
||||
v.xerc := v.e.xerc;
|
||||
|
||||
@ -20,6 +20,7 @@ filesets:
|
||||
- sim_console.vhdl
|
||||
- logical.vhdl
|
||||
- countbits.vhdl
|
||||
- bitsort.vhdl
|
||||
- control.vhdl
|
||||
- execute1.vhdl
|
||||
- fpu.vhdl
|
||||
|
||||
@ -219,6 +219,7 @@ architecture behaviour of predecoder is
|
||||
2#0_00101_11011# => INSN_brd,
|
||||
2#0_01001_11010# => INSN_cbcdtd,
|
||||
2#0_01000_11010# => INSN_cdtbcd,
|
||||
2#0_00110_11100# => INSN_cfuged,
|
||||
2#0_00000_00000# => INSN_cmp,
|
||||
2#0_01111_11100# => INSN_cmpb,
|
||||
2#0_00111_00000# => INSN_cmpeqb,
|
||||
@ -363,6 +364,8 @@ architecture behaviour of predecoder is
|
||||
2#0_00011_11100# => INSN_nor,
|
||||
2#0_01101_11100# => INSN_or,
|
||||
2#0_01100_11100# => INSN_orc,
|
||||
2#0_00100_11100# => INSN_pdepd,
|
||||
2#0_00101_11100# => INSN_pextd,
|
||||
2#0_00011_11010# => INSN_popcntb,
|
||||
2#0_01111_11010# => INSN_popcntd,
|
||||
2#0_01011_11010# => INSN_popcntw,
|
||||
|
||||
@ -87,11 +87,11 @@ const char *units[4] = { "al", "ls", "fp", "3?" };
|
||||
const char *ops[64] =
|
||||
{
|
||||
"illegal", "nop ", "add ", "attn ", "b ", "bc ", "bcreg ", "bcd ",
|
||||
"bperm ", "brev ", "cmp ", "cmpb ", "cmpeqb ", "cmprb ", "cntz ", "crop ",
|
||||
"bperm ", "brev ", "cmp ", "cmpb ", "cmpeqb ", "cmprb ", "countb ", "crop ",
|
||||
"darn ", "dcbf ", "dcbst ", "xcbt ", "dcbtst ", "dcbz ", "icbi ", "fpcmp ",
|
||||
"fparith", "fpmove ", "fpmisc ", "div ", "dive ", "mod ", "exts ", "extswsl",
|
||||
"isel ", "isync ", "logic ", "ld ", "st ", "mcrxrx ", "mfcr ", "mfmsr ",
|
||||
"mfspr ", "mtcrf ", "mtmsr ", "mtspr ", "mull64 ", "mulh64 ", "mulh32 ", "popcnt ",
|
||||
"mfspr ", "mtcrf ", "mtmsr ", "mtspr ", "mull64 ", "mulh64 ", "mulh32 ", "bsort ",
|
||||
"prty ", "rfid ", "rlc ", "rlcl ", "rlcr ", "sc ", "setb ", "shl ",
|
||||
"shr ", "sync ", "tlbie ", "trap ", "xor ", "addg6s ", "wait ", "ffail ",
|
||||
};
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user