mirror of
https://github.com/antonblanchard/microwatt.git
synced 2026-04-26 12:27:28 +00:00
Merge pull request #134 from paulusmack/master
Add bypass from execute1 output to input
This commit is contained in:
4
Makefile
4
Makefile
@@ -31,7 +31,7 @@ common.o: decode_types.o
|
||||
control.o: gpr_hazard.o cr_hazard.o common.o
|
||||
sim_jtag.o: sim_jtag_socket.o
|
||||
core_tb.o: common.o wishbone_types.o core.o soc.o sim_jtag.o
|
||||
core.o: common.o wishbone_types.o fetch1.o fetch2.o icache.o decode1.o decode2.o register_file.o cr_file.o execute1.o loadstore1.o dcache.o multiply.o writeback.o core_debug.o divider.o
|
||||
core.o: common.o wishbone_types.o fetch1.o fetch2.o icache.o decode1.o decode2.o register_file.o cr_file.o execute1.o loadstore1.o dcache.o writeback.o core_debug.o
|
||||
core_debug.o: common.o
|
||||
countzero.o:
|
||||
countzero_tb.o: common.o glibc_random.o countzero.o
|
||||
@@ -40,7 +40,7 @@ crhelpers.o: common.o
|
||||
decode1.o: common.o decode_types.o
|
||||
decode2.o: decode_types.o common.o helpers.o insn_helpers.o control.o
|
||||
decode_types.o:
|
||||
execute1.o: decode_types.o common.o helpers.o crhelpers.o insn_helpers.o ppc_fx_insns.o rotator.o logical.o countzero.o
|
||||
execute1.o: decode_types.o common.o helpers.o crhelpers.o insn_helpers.o ppc_fx_insns.o rotator.o logical.o countzero.o multiply.o divider.o
|
||||
fetch1.o: common.o
|
||||
fetch2.o: common.o wishbone_types.o
|
||||
glibc_random_helpers.o:
|
||||
|
||||
86
common.vhdl
86
common.vhdl
@@ -109,6 +109,9 @@ package common is
|
||||
read_data1: std_ulogic_vector(63 downto 0);
|
||||
read_data2: std_ulogic_vector(63 downto 0);
|
||||
read_data3: std_ulogic_vector(63 downto 0);
|
||||
bypass_data1: std_ulogic;
|
||||
bypass_data2: std_ulogic;
|
||||
bypass_data3: std_ulogic;
|
||||
cr: std_ulogic_vector(31 downto 0);
|
||||
xerc: xer_common_t;
|
||||
lr: std_ulogic;
|
||||
@@ -124,44 +127,41 @@ package common is
|
||||
is_signed: std_ulogic;
|
||||
insn: std_ulogic_vector(31 downto 0);
|
||||
data_len: std_ulogic_vector(3 downto 0);
|
||||
byte_reverse : std_ulogic;
|
||||
sign_extend : std_ulogic; -- do we need to sign extend?
|
||||
update : std_ulogic; -- is this an update instruction?
|
||||
end record;
|
||||
constant Decode2ToExecute1Init : Decode2ToExecute1Type :=
|
||||
(valid => '0', insn_type => OP_ILLEGAL, lr => '0', rc => '0', oe => '0', invert_a => '0',
|
||||
(valid => '0', insn_type => OP_ILLEGAL, bypass_data1 => '0', bypass_data2 => '0', bypass_data3 => '0',
|
||||
lr => '0', rc => '0', oe => '0', invert_a => '0',
|
||||
invert_out => '0', input_carry => ZERO, output_carry => '0', input_cr => '0', output_cr => '0',
|
||||
is_32bit => '0', is_signed => '0', xerc => xerc_init, others => (others => '0'));
|
||||
is_32bit => '0', is_signed => '0', xerc => xerc_init,
|
||||
byte_reverse => '0', sign_extend => '0', update => '0', others => (others => '0'));
|
||||
|
||||
type Decode2ToMultiplyType is record
|
||||
type Execute1ToMultiplyType is record
|
||||
valid: std_ulogic;
|
||||
insn_type: insn_type_t;
|
||||
write_reg: gpr_index_t;
|
||||
data1: std_ulogic_vector(64 downto 0);
|
||||
data2: std_ulogic_vector(64 downto 0);
|
||||
rc: std_ulogic;
|
||||
oe: std_ulogic;
|
||||
is_32bit: std_ulogic;
|
||||
xerc: xer_common_t;
|
||||
end record;
|
||||
constant Decode2ToMultiplyInit : Decode2ToMultiplyType := (valid => '0', insn_type => OP_ILLEGAL, rc => '0',
|
||||
oe => '0', is_32bit => '0', xerc => xerc_init,
|
||||
others => (others => '0'));
|
||||
constant Execute1ToMultiplyInit : Execute1ToMultiplyType := (valid => '0', insn_type => OP_ILLEGAL,
|
||||
is_32bit => '0',
|
||||
others => (others => '0'));
|
||||
|
||||
type Decode2ToDividerType is record
|
||||
type Execute1ToDividerType is record
|
||||
valid: std_ulogic;
|
||||
write_reg: gpr_index_t;
|
||||
dividend: std_ulogic_vector(63 downto 0);
|
||||
divisor: std_ulogic_vector(63 downto 0);
|
||||
is_signed: std_ulogic;
|
||||
is_32bit: std_ulogic;
|
||||
is_extended: std_ulogic;
|
||||
is_modulus: std_ulogic;
|
||||
rc: std_ulogic;
|
||||
oe: std_ulogic;
|
||||
xerc: xer_common_t;
|
||||
neg_result: std_ulogic;
|
||||
end record;
|
||||
constant Decode2ToDividerInit: Decode2ToDividerType := (valid => '0', is_signed => '0', is_32bit => '0',
|
||||
is_extended => '0', is_modulus => '0',
|
||||
rc => '0', oe => '0', xerc => xerc_init,
|
||||
others => (others => '0'));
|
||||
constant Execute1ToDividerInit: Execute1ToDividerType := (valid => '0', is_signed => '0', is_32bit => '0',
|
||||
is_extended => '0', is_modulus => '0',
|
||||
neg_result => '0', others => (others => '0'));
|
||||
|
||||
type Decode2ToRegisterFileType is record
|
||||
read1_enable : std_ulogic;
|
||||
@@ -193,7 +193,7 @@ package common is
|
||||
end record;
|
||||
constant Execute1ToFetch1TypeInit : Execute1ToFetch1Type := (redirect => '0', others => (others => '0'));
|
||||
|
||||
type Decode2ToLoadstore1Type is record
|
||||
type Execute1ToLoadstore1Type is record
|
||||
valid : std_ulogic;
|
||||
load : std_ulogic; -- is this a load or store
|
||||
addr1 : std_ulogic_vector(63 downto 0);
|
||||
@@ -207,9 +207,9 @@ package common is
|
||||
update_reg : gpr_index_t; -- if so, the register to update
|
||||
xerc : xer_common_t;
|
||||
end record;
|
||||
constant Decode2ToLoadstore1Init : Decode2ToLoadstore1Type := (valid => '0', load => '0', byte_reverse => '0',
|
||||
sign_extend => '0', update => '0', xerc => xerc_init,
|
||||
others => (others => '0'));
|
||||
constant Execute1ToLoadstore1Init : Execute1ToLoadstore1Type := (valid => '0', load => '0', byte_reverse => '0',
|
||||
sign_extend => '0', update => '0', xerc => xerc_init,
|
||||
others => (others => '0'));
|
||||
|
||||
type Loadstore1ToDcacheType is record
|
||||
valid : std_ulogic;
|
||||
@@ -248,49 +248,33 @@ package common is
|
||||
write_enable : std_ulogic;
|
||||
write_reg: gspr_index_t;
|
||||
write_data: std_ulogic_vector(63 downto 0);
|
||||
write_len : std_ulogic_vector(3 downto 0);
|
||||
write_cr_enable : std_ulogic;
|
||||
write_cr_mask : std_ulogic_vector(7 downto 0);
|
||||
write_cr_data : std_ulogic_vector(31 downto 0);
|
||||
write_xerc_enable : std_ulogic;
|
||||
xerc : xer_common_t;
|
||||
sign_extend: std_ulogic;
|
||||
end record;
|
||||
constant Execute1ToWritebackInit : Execute1ToWritebackType := (valid => '0', rc => '0', write_enable => '0',
|
||||
write_cr_enable => '0', sign_extend => '0',
|
||||
write_cr_enable => '0',
|
||||
write_xerc_enable => '0', xerc => xerc_init,
|
||||
others => (others => '0'));
|
||||
|
||||
type MultiplyToWritebackType is record
|
||||
type MultiplyToExecute1Type is record
|
||||
valid: std_ulogic;
|
||||
|
||||
write_reg_enable : std_ulogic;
|
||||
write_reg_nr: gpr_index_t;
|
||||
write_reg_data: std_ulogic_vector(63 downto 0);
|
||||
write_xerc_enable : std_ulogic;
|
||||
xerc : xer_common_t;
|
||||
rc: std_ulogic;
|
||||
overflow : std_ulogic;
|
||||
end record;
|
||||
constant MultiplyToWritebackInit : MultiplyToWritebackType := (valid => '0', write_reg_enable => '0',
|
||||
rc => '0', write_xerc_enable => '0',
|
||||
xerc => xerc_init,
|
||||
others => (others => '0'));
|
||||
|
||||
type DividerToWritebackType is record
|
||||
valid: std_ulogic;
|
||||
|
||||
write_reg_enable : std_ulogic;
|
||||
write_reg_nr: gpr_index_t;
|
||||
write_reg_data: std_ulogic_vector(63 downto 0);
|
||||
write_xerc_enable : std_ulogic;
|
||||
xerc : xer_common_t;
|
||||
rc: std_ulogic;
|
||||
end record;
|
||||
constant DividerToWritebackInit : DividerToWritebackType := (valid => '0', write_reg_enable => '0',
|
||||
rc => '0', write_xerc_enable => '0',
|
||||
xerc => xerc_init,
|
||||
constant MultiplyToExecute1Init : MultiplyToExecute1Type := (valid => '0', overflow => '0',
|
||||
others => (others => '0'));
|
||||
|
||||
type DividerToExecute1Type is record
|
||||
valid: std_ulogic;
|
||||
write_reg_data: std_ulogic_vector(63 downto 0);
|
||||
overflow : std_ulogic;
|
||||
end record;
|
||||
constant DividerToExecute1Init : DividerToExecute1Type := (valid => '0', overflow => '0',
|
||||
others => (others => '0'));
|
||||
|
||||
type WritebackToRegisterFileType is record
|
||||
write_reg : gspr_index_t;
|
||||
write_data : std_ulogic_vector(63 downto 0);
|
||||
|
||||
19
control.vhdl
19
control.vhdl
@@ -21,6 +21,7 @@ entity control is
|
||||
|
||||
gpr_write_valid_in : in std_ulogic;
|
||||
gpr_write_in : in gspr_index_t;
|
||||
gpr_bypassable : in std_ulogic;
|
||||
|
||||
gpr_a_read_valid_in : in std_ulogic;
|
||||
gpr_a_read_in : in gspr_index_t;
|
||||
@@ -36,7 +37,11 @@ entity control is
|
||||
|
||||
valid_out : out std_ulogic;
|
||||
stall_out : out std_ulogic;
|
||||
stopped_out : out std_ulogic
|
||||
stopped_out : out std_ulogic;
|
||||
|
||||
gpr_bypass_a : out std_ulogic;
|
||||
gpr_bypass_b : out std_ulogic;
|
||||
gpr_bypass_c : out std_ulogic
|
||||
);
|
||||
end entity control;
|
||||
|
||||
@@ -71,10 +76,12 @@ begin
|
||||
|
||||
gpr_write_valid_in => gpr_write_valid,
|
||||
gpr_write_in => gpr_write_in,
|
||||
bypass_avail => gpr_bypassable,
|
||||
gpr_read_valid_in => gpr_a_read_valid_in,
|
||||
gpr_read_in => gpr_a_read_in,
|
||||
|
||||
stall_out => stall_a_out
|
||||
stall_out => stall_a_out,
|
||||
use_bypass => gpr_bypass_a
|
||||
);
|
||||
|
||||
gpr_hazard1: entity work.gpr_hazard
|
||||
@@ -87,10 +94,12 @@ begin
|
||||
|
||||
gpr_write_valid_in => gpr_write_valid,
|
||||
gpr_write_in => gpr_write_in,
|
||||
bypass_avail => gpr_bypassable,
|
||||
gpr_read_valid_in => gpr_b_read_valid_in,
|
||||
gpr_read_in => gpr_b_read_in,
|
||||
|
||||
stall_out => stall_b_out
|
||||
stall_out => stall_b_out,
|
||||
use_bypass => gpr_bypass_b
|
||||
);
|
||||
|
||||
gpr_c_read_in_fmt <= "0" & gpr_c_read_in;
|
||||
@@ -105,10 +114,12 @@ begin
|
||||
|
||||
gpr_write_valid_in => gpr_write_valid,
|
||||
gpr_write_in => gpr_write_in,
|
||||
bypass_avail => gpr_bypassable,
|
||||
gpr_read_valid_in => gpr_c_read_valid_in,
|
||||
gpr_read_in => gpr_c_read_in_fmt,
|
||||
|
||||
stall_out => stall_c_out
|
||||
stall_out => stall_c_out,
|
||||
use_bypass => gpr_bypass_c
|
||||
);
|
||||
|
||||
cr_hazard0: entity work.cr_hazard
|
||||
|
||||
45
core.vhdl
45
core.vhdl
@@ -9,7 +9,8 @@ use work.wishbone_types.all;
|
||||
entity core is
|
||||
generic (
|
||||
SIM : boolean := false;
|
||||
DISABLE_FLATTEN : boolean := false
|
||||
DISABLE_FLATTEN : boolean := false;
|
||||
EX1_BYPASS : boolean := true
|
||||
);
|
||||
port (
|
||||
clk : in std_logic;
|
||||
@@ -59,18 +60,10 @@ architecture behave of core is
|
||||
signal execute1_to_fetch1: Execute1ToFetch1Type;
|
||||
|
||||
-- load store signals
|
||||
signal decode2_to_loadstore1: Decode2ToLoadstore1Type;
|
||||
signal execute1_to_loadstore1: Execute1ToLoadstore1Type;
|
||||
signal loadstore1_to_dcache: Loadstore1ToDcacheType;
|
||||
signal dcache_to_writeback: DcacheToWritebackType;
|
||||
|
||||
-- multiply signals
|
||||
signal decode2_to_multiply: Decode2ToMultiplyType;
|
||||
signal multiply_to_writeback: MultiplyToWritebackType;
|
||||
|
||||
-- divider signals
|
||||
signal decode2_to_divider: Decode2ToDividerType;
|
||||
signal divider_to_writeback: DividerToWritebackType;
|
||||
|
||||
-- local signals
|
||||
signal fetch1_stall_in : std_ulogic;
|
||||
signal icache_stall_out : std_ulogic;
|
||||
@@ -115,8 +108,6 @@ architecture behave of core is
|
||||
attribute keep_hierarchy of register_file_0 : label is keep_h(DISABLE_FLATTEN);
|
||||
attribute keep_hierarchy of cr_file_0 : label is keep_h(DISABLE_FLATTEN);
|
||||
attribute keep_hierarchy of execute1_0 : label is keep_h(DISABLE_FLATTEN);
|
||||
attribute keep_hierarchy of multiply_0 : label is keep_h(DISABLE_FLATTEN);
|
||||
attribute keep_hierarchy of divider_0 : label is keep_h(DISABLE_FLATTEN);
|
||||
attribute keep_hierarchy of loadstore1_0 : label is keep_h(DISABLE_FLATTEN);
|
||||
attribute keep_hierarchy of dcache_0 : label is keep_h(DISABLE_FLATTEN);
|
||||
attribute keep_hierarchy of writeback_0 : label is keep_h(DISABLE_FLATTEN);
|
||||
@@ -186,6 +177,9 @@ begin
|
||||
decode1_stall_in <= decode2_stall_out;
|
||||
|
||||
decode2_0: entity work.decode2
|
||||
generic map (
|
||||
EX1_BYPASS => EX1_BYPASS
|
||||
)
|
||||
port map (
|
||||
clk => clk,
|
||||
rst => core_rst,
|
||||
@@ -196,9 +190,6 @@ begin
|
||||
stopped_out => dbg_core_is_stopped,
|
||||
d_in => decode1_to_decode2,
|
||||
e_out => decode2_to_execute1,
|
||||
l_out => decode2_to_loadstore1,
|
||||
m_out => decode2_to_multiply,
|
||||
d_out => decode2_to_divider,
|
||||
r_in => register_file_to_decode2,
|
||||
r_out => decode2_to_register_file,
|
||||
c_in => cr_file_to_decode2,
|
||||
@@ -232,11 +223,16 @@ begin
|
||||
);
|
||||
|
||||
execute1_0: entity work.execute1
|
||||
generic map (
|
||||
EX1_BYPASS => EX1_BYPASS
|
||||
)
|
||||
port map (
|
||||
clk => clk,
|
||||
rst => core_rst,
|
||||
flush_out => flush,
|
||||
stall_out => ex1_stall_out,
|
||||
e_in => decode2_to_execute1,
|
||||
l_out => execute1_to_loadstore1,
|
||||
f_out => execute1_to_fetch1,
|
||||
e_out => execute1_to_writeback,
|
||||
icache_inval => ex1_icache_inval,
|
||||
@@ -246,7 +242,7 @@ begin
|
||||
loadstore1_0: entity work.loadstore1
|
||||
port map (
|
||||
clk => clk,
|
||||
l_in => decode2_to_loadstore1,
|
||||
l_in => execute1_to_loadstore1,
|
||||
l_out => loadstore1_to_dcache
|
||||
);
|
||||
|
||||
@@ -265,28 +261,11 @@ begin
|
||||
wishbone_out => wishbone_data_out
|
||||
);
|
||||
|
||||
multiply_0: entity work.multiply
|
||||
port map (
|
||||
clk => clk,
|
||||
m_in => decode2_to_multiply,
|
||||
m_out => multiply_to_writeback
|
||||
);
|
||||
|
||||
divider_0: entity work.divider
|
||||
port map (
|
||||
clk => clk,
|
||||
rst => core_rst,
|
||||
d_in => decode2_to_divider,
|
||||
d_out => divider_to_writeback
|
||||
);
|
||||
|
||||
writeback_0: entity work.writeback
|
||||
port map (
|
||||
clk => clk,
|
||||
e_in => execute1_to_writeback,
|
||||
l_in => dcache_to_writeback,
|
||||
m_in => multiply_to_writeback,
|
||||
d_in => divider_to_writeback,
|
||||
w_out => writeback_to_register_file,
|
||||
c_out => writeback_to_cr_file,
|
||||
complete_out => complete
|
||||
|
||||
@@ -6,6 +6,7 @@ library work;
|
||||
|
||||
entity zero_counter is
|
||||
port (
|
||||
clk : in std_logic;
|
||||
rs : in std_ulogic_vector(63 downto 0);
|
||||
count_right : in std_ulogic;
|
||||
is_32bit : in std_ulogic;
|
||||
@@ -14,10 +15,14 @@ entity zero_counter is
|
||||
end entity zero_counter;
|
||||
|
||||
architecture behaviour of zero_counter is
|
||||
signal y, z : std_ulogic_vector(3 downto 0);
|
||||
signal v16 : std_ulogic_vector(15 downto 0);
|
||||
signal v4 : std_ulogic_vector(3 downto 0);
|
||||
signal sel : std_ulogic_vector(5 downto 0);
|
||||
type intermediate_result is record
|
||||
v16: std_ulogic_vector(15 downto 0);
|
||||
sel_hi: std_ulogic_vector(1 downto 0);
|
||||
is_32bit: std_ulogic;
|
||||
count_right: std_ulogic;
|
||||
end record;
|
||||
|
||||
signal r, r_in : intermediate_result;
|
||||
|
||||
-- Return the index of the leftmost or rightmost 1 in a set of 4 bits.
|
||||
-- Assumes v is not "0000"; if it is, return (right ? "11" : "00").
|
||||
@@ -47,65 +52,83 @@ architecture behaviour of zero_counter is
|
||||
end;
|
||||
|
||||
begin
|
||||
zerocounter0: process(all)
|
||||
zerocounter_0: process(clk)
|
||||
begin
|
||||
if rising_edge(clk) then
|
||||
r <= r_in;
|
||||
end if;
|
||||
end process;
|
||||
|
||||
zerocounter_1: process(all)
|
||||
variable v: intermediate_result;
|
||||
variable y, z: std_ulogic_vector(3 downto 0);
|
||||
variable sel: std_ulogic_vector(5 downto 0);
|
||||
variable v4: std_ulogic_vector(3 downto 0);
|
||||
|
||||
begin
|
||||
-- Test 4 groups of 16 bits each.
|
||||
-- The top 2 groups are considered to be zero in 32-bit mode.
|
||||
z(0) <= or (rs(15 downto 0));
|
||||
z(1) <= or (rs(31 downto 16));
|
||||
z(2) <= or (rs(47 downto 32));
|
||||
z(3) <= or (rs(63 downto 48));
|
||||
z(0) := or (rs(15 downto 0));
|
||||
z(1) := or (rs(31 downto 16));
|
||||
z(2) := or (rs(47 downto 32));
|
||||
z(3) := or (rs(63 downto 48));
|
||||
if is_32bit = '0' then
|
||||
sel(5 downto 4) <= encoder(z, count_right);
|
||||
v.sel_hi := encoder(z, count_right);
|
||||
else
|
||||
sel(5) <= '0';
|
||||
v.sel_hi(1) := '0';
|
||||
if count_right = '0' then
|
||||
sel(4) <= z(1);
|
||||
v.sel_hi(0) := z(1);
|
||||
else
|
||||
sel(4) <= not z(0);
|
||||
v.sel_hi(0) := not z(0);
|
||||
end if;
|
||||
end if;
|
||||
|
||||
-- Select the leftmost/rightmost non-zero group of 16 bits
|
||||
case sel(5 downto 4) is
|
||||
case v.sel_hi is
|
||||
when "00" =>
|
||||
v16 <= rs(15 downto 0);
|
||||
v.v16 := rs(15 downto 0);
|
||||
when "01" =>
|
||||
v16 <= rs(31 downto 16);
|
||||
v.v16 := rs(31 downto 16);
|
||||
when "10" =>
|
||||
v16 <= rs(47 downto 32);
|
||||
v.v16 := rs(47 downto 32);
|
||||
when others =>
|
||||
v16 <= rs(63 downto 48);
|
||||
v.v16 := rs(63 downto 48);
|
||||
end case;
|
||||
|
||||
-- Latch this and do the rest in the next cycle, for the sake of timing
|
||||
v.is_32bit := is_32bit;
|
||||
v.count_right := count_right;
|
||||
r_in <= v;
|
||||
sel(5 downto 4) := r.sel_hi;
|
||||
|
||||
-- Test 4 groups of 4 bits
|
||||
y(0) <= or (v16(3 downto 0));
|
||||
y(1) <= or (v16(7 downto 4));
|
||||
y(2) <= or (v16(11 downto 8));
|
||||
y(3) <= or (v16(15 downto 12));
|
||||
sel(3 downto 2) <= encoder(y, count_right);
|
||||
y(0) := or (r.v16(3 downto 0));
|
||||
y(1) := or (r.v16(7 downto 4));
|
||||
y(2) := or (r.v16(11 downto 8));
|
||||
y(3) := or (r.v16(15 downto 12));
|
||||
sel(3 downto 2) := encoder(y, r.count_right);
|
||||
|
||||
-- Select the leftmost/rightmost non-zero group of 4 bits
|
||||
case sel(3 downto 2) is
|
||||
when "00" =>
|
||||
v4 <= v16(3 downto 0);
|
||||
v4 := r.v16(3 downto 0);
|
||||
when "01" =>
|
||||
v4 <= v16(7 downto 4);
|
||||
v4 := r.v16(7 downto 4);
|
||||
when "10" =>
|
||||
v4 <= v16(11 downto 8);
|
||||
v4 := r.v16(11 downto 8);
|
||||
when others =>
|
||||
v4 <= v16(15 downto 12);
|
||||
v4 := r.v16(15 downto 12);
|
||||
end case;
|
||||
|
||||
sel(1 downto 0) <= encoder(v4, count_right);
|
||||
sel(1 downto 0) := encoder(v4, r.count_right);
|
||||
|
||||
-- sel is now the index of the leftmost/rightmost 1 bit in rs
|
||||
if v4 = "0000" then
|
||||
-- operand is zero, return 32 for 32-bit, else 64
|
||||
result <= x"00000000000000" & '0' & not is_32bit & is_32bit & "00000";
|
||||
elsif count_right = '0' then
|
||||
result <= x"00000000000000" & '0' & not r.is_32bit & r.is_32bit & "00000";
|
||||
elsif r.count_right = '0' then
|
||||
-- return (63 - sel), trimmed to 5 bits in 32-bit mode
|
||||
result <= x"00000000000000" & "00" & (not sel(5) and not is_32bit) & not sel(4 downto 0);
|
||||
result <= x"00000000000000" & "00" & (not sel(5) and not r.is_32bit) & not sel(4 downto 0);
|
||||
else
|
||||
result <= x"00000000000000" & "00" & sel;
|
||||
end if;
|
||||
|
||||
@@ -15,16 +15,26 @@ architecture behave of countzero_tb is
|
||||
signal is_32bit, count_right: std_ulogic := '0';
|
||||
signal result: std_ulogic_vector(63 downto 0);
|
||||
signal randno: std_ulogic_vector(63 downto 0);
|
||||
signal clk: std_ulogic;
|
||||
|
||||
begin
|
||||
zerocounter_0: entity work.zero_counter
|
||||
port map (
|
||||
clk => clk,
|
||||
rs => rs,
|
||||
result => result,
|
||||
count_right => count_right,
|
||||
is_32bit => is_32bit
|
||||
);
|
||||
|
||||
clk_process: process
|
||||
begin
|
||||
clk <= '0';
|
||||
wait for clk_period/2;
|
||||
clk <= '1';
|
||||
wait for clk_period/2;
|
||||
end process;
|
||||
|
||||
stim_process: process
|
||||
variable r: std_ulogic_vector(63 downto 0);
|
||||
begin
|
||||
|
||||
86
decode1.vhdl
86
decode1.vhdl
@@ -44,8 +44,8 @@ architecture behaviour of decode1 is
|
||||
29 => (ALU, OP_AND, NONE, CONST_UI_HI, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', ONE, '0', '0'), -- andis.
|
||||
18 => (ALU, OP_B, NONE, CONST_LI, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '1', '0'), -- b
|
||||
16 => (ALU, OP_BC, SPR, CONST_BD, NONE, SPR , '1', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '1', '0'), -- bc
|
||||
11 => (ALU, OP_CMP, RA, CONST_SI, NONE, NONE, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- cmpi
|
||||
10 => (ALU, OP_CMPL, RA, CONST_UI, NONE, NONE, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- cmpli
|
||||
11 => (ALU, OP_CMP, RA, CONST_SI, NONE, NONE, '0', '1', '1', '0', ONE, '0', NONE, '0', '0', '0', '0', '0', '1', NONE, '0', '0'), -- cmpi
|
||||
10 => (ALU, OP_CMP, RA, CONST_UI, NONE, NONE, '0', '1', '1', '0', ONE, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- cmpli
|
||||
34 => (LDST, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- lbz
|
||||
35 => (LDST, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '1', '0', '0', '0', NONE, '0', '1'), -- lbzu
|
||||
42 => (LDST, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '1', '0', '0', '0', '0', NONE, '0', '1'), -- lha
|
||||
@@ -54,7 +54,7 @@ architecture behaviour of decode1 is
|
||||
41 => (LDST, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '1', '0', '0', '0', NONE, '0', '1'), -- lhzu
|
||||
32 => (LDST, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- lwz
|
||||
33 => (LDST, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '1', '0', '0', '0', NONE, '0', '1'), -- lwzu
|
||||
7 => (MUL, OP_MUL_L64, RA, CONST_SI, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', NONE, '0', '1'), -- mulli
|
||||
7 => (ALU, OP_MUL_L64, RA, CONST_SI, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', NONE, '0', '0'), -- mulli
|
||||
24 => (ALU, OP_OR, NONE, CONST_UI, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- ori
|
||||
25 => (ALU, OP_OR, NONE, CONST_UI_HI, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- oris
|
||||
20 => (ALU, OP_RLC, RA, CONST_SH32, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- rlwimi
|
||||
@@ -66,7 +66,7 @@ architecture behaviour of decode1 is
|
||||
45 => (LDST, OP_STORE, RA_OR_ZERO, CONST_SI, RS, NONE, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '1', '0', '0', '0', NONE, '0', '1'), -- sthu
|
||||
36 => (LDST, OP_STORE, RA_OR_ZERO, CONST_SI, RS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- stw
|
||||
37 => (LDST, OP_STORE, RA_OR_ZERO, CONST_SI, RS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '1', '0', '0', '0', NONE, '0', '1'), -- stwu
|
||||
8 => (ALU, OP_ADD, RA, CONST_SI, NONE, RT, '0', '0', '1', '0', ONE, '1', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- subfic
|
||||
8 => (ALU, OP_ADD, RA, CONST_SI, NONE, RT, '0', '0', '1', '0', ONE, '1', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- subfic
|
||||
2 => (ALU, OP_TDI, RA, CONST_SI, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- tdi
|
||||
--PPC_TWI 3
|
||||
26 => (ALU, OP_XOR, NONE, CONST_UI, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- xori
|
||||
@@ -145,10 +145,10 @@ architecture behaviour of decode1 is
|
||||
2#0000011100# => (ALU, OP_AND, NONE, RB, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- and
|
||||
2#0000111100# => (ALU, OP_AND, NONE, RB, RS, RA, '0', '0', '1', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- andc
|
||||
-- 2#0011111100# bperm
|
||||
2#0000000000# => (ALU, OP_CMP, RA, RB, NONE, NONE, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- cmp
|
||||
2#0000000000# => (ALU, OP_CMP, RA, RB, NONE, NONE, '0', '1', '1', '0', ONE, '0', NONE, '0', '0', '0', '0', '0', '1', NONE, '0', '0'), -- cmp
|
||||
2#0111111100# => (ALU, OP_CMPB, NONE, RB, RS, RA, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- cmpb
|
||||
-- 2#0011100000# cmpeqb
|
||||
2#0000100000# => (ALU, OP_CMPL, RA, RB, NONE, NONE, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- cmpl
|
||||
2#0000100000# => (ALU, OP_CMP, RA, RB, NONE, NONE, '0', '1', '1', '0', ONE, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- cmpl
|
||||
-- 2#0011000000# cmprb
|
||||
2#0000111010# => (ALU, OP_CNTZ, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- cntlzd
|
||||
2#0000011010# => (ALU, OP_CNTZ, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- cntlzw
|
||||
@@ -160,22 +160,22 @@ architecture behaviour of decode1 is
|
||||
2#0100010110# => (ALU, OP_NOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- dcbt
|
||||
2#0011110110# => (ALU, OP_NOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- dcbtst
|
||||
-- 2#1111110110# dcbz
|
||||
2#0110001001# => (DIV, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- divdeu
|
||||
2#1110001001# => (DIV, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- divdeuo
|
||||
2#0110001011# => (DIV, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- divweu
|
||||
2#1110001011# => (DIV, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- divweuo
|
||||
2#0110101001# => (DIV, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- divde
|
||||
2#1110101001# => (DIV, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- divdeo
|
||||
2#0110101011# => (DIV, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- divwe
|
||||
2#1110101011# => (DIV, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- divweo
|
||||
2#0111001001# => (DIV, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- divdu
|
||||
2#1111001001# => (DIV, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- divduo
|
||||
2#0111001011# => (DIV, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- divwu
|
||||
2#1111001011# => (DIV, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- divwuo
|
||||
2#0111101001# => (DIV, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- divd
|
||||
2#1111101001# => (DIV, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- divdo
|
||||
2#0111101011# => (DIV, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- divw
|
||||
2#1111101011# => (DIV, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- divwo
|
||||
2#0110001001# => (ALU, OP_DIVE, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- divdeu
|
||||
2#1110001001# => (ALU, OP_DIVE, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- divdeuo
|
||||
2#0110001011# => (ALU, OP_DIVE, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- divweu
|
||||
2#1110001011# => (ALU, OP_DIVE, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- divweuo
|
||||
2#0110101001# => (ALU, OP_DIVE, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '0'), -- divde
|
||||
2#1110101001# => (ALU, OP_DIVE, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '0'), -- divdeo
|
||||
2#0110101011# => (ALU, OP_DIVE, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', RC, '0', '0'), -- divwe
|
||||
2#1110101011# => (ALU, OP_DIVE, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', RC, '0', '0'), -- divweo
|
||||
2#0111001001# => (ALU, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- divdu
|
||||
2#1111001001# => (ALU, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- divduo
|
||||
2#0111001011# => (ALU, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- divwu
|
||||
2#1111001011# => (ALU, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- divwuo
|
||||
2#0111101001# => (ALU, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '0'), -- divd
|
||||
2#1111101001# => (ALU, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '0'), -- divdo
|
||||
2#0111101011# => (ALU, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', RC, '0', '0'), -- divw
|
||||
2#1111101011# => (ALU, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', RC, '0', '0'), -- divwo
|
||||
2#0100011100# => (ALU, OP_XOR, NONE, RB, RS, RA, '0', '0', '0', '1', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- eqv
|
||||
2#1110111010# => (ALU, OP_EXTS, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- extsb
|
||||
2#1110011010# => (ALU, OP_EXTS, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- extsh
|
||||
@@ -238,36 +238,36 @@ architecture behaviour of decode1 is
|
||||
-- 2#1001000000# mcrxrx
|
||||
2#0000010011# => (ALU, OP_MFCR, NONE, NONE, NONE, RT, '1', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- mfcr/mfocrf
|
||||
2#0101010011# => (ALU, OP_MFSPR, SPR, NONE, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- mfspr
|
||||
2#0100001001# => (DIV, OP_MOD, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- modud
|
||||
2#0100001011# => (DIV, OP_MOD, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- moduw
|
||||
2#1100001001# => (DIV, OP_MOD, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- modsd
|
||||
2#1100001011# => (DIV, OP_MOD, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- modsw
|
||||
2#0100001001# => (ALU, OP_MOD, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- modud
|
||||
2#0100001011# => (ALU, OP_MOD, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', NONE, '0', '0'), -- moduw
|
||||
2#1100001001# => (ALU, OP_MOD, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', NONE, '0', '0'), -- modsd
|
||||
2#1100001011# => (ALU, OP_MOD, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', NONE, '0', '0'), -- modsw
|
||||
2#0010010000# => (ALU, OP_MTCRF, NONE, NONE, RS, NONE, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- mtcrf/mtocrf
|
||||
2#0111010011# => (ALU, OP_MTSPR, NONE, NONE, RS, SPR, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- mtspr
|
||||
2#0001001001# => (MUL, OP_MUL_H64, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '1'), -- mulhd
|
||||
2#0000001001# => (MUL, OP_MUL_H64, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- mulhdu
|
||||
2#0001001011# => (MUL, OP_MUL_H32, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', RC, '0', '1'), -- mulhw
|
||||
2#0000001011# => (MUL, OP_MUL_H32, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '1'), -- mulhwu
|
||||
2#0001001001# => (ALU, OP_MUL_H64, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '0'), -- mulhd
|
||||
2#0000001001# => (ALU, OP_MUL_H64, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- mulhdu
|
||||
2#0001001011# => (ALU, OP_MUL_H32, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', RC, '0', '0'), -- mulhw
|
||||
2#0000001011# => (ALU, OP_MUL_H32, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- mulhwu
|
||||
-- next 4 have reserved bit set
|
||||
2#1001001001# => (MUL, OP_MUL_H64, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '1'), -- mulhd
|
||||
2#1000001001# => (MUL, OP_MUL_H64, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- mulhdu
|
||||
2#1001001011# => (MUL, OP_MUL_H32, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', RC, '0', '1'), -- mulhw
|
||||
2#1000001011# => (MUL, OP_MUL_H32, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '1'), -- mulhwu
|
||||
2#0011101001# => (MUL, OP_MUL_L64, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '1'), -- mulld
|
||||
2#1011101001# => (MUL, OP_MUL_L64, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '1'), -- mulldo
|
||||
2#0011101011# => (MUL, OP_MUL_L64, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', RC, '0', '1'), -- mullw
|
||||
2#1011101011# => (MUL, OP_MUL_L64, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', RC, '0', '1'), -- mullwo
|
||||
2#1001001001# => (ALU, OP_MUL_H64, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '0'), -- mulhd
|
||||
2#1000001001# => (ALU, OP_MUL_H64, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- mulhdu
|
||||
2#1001001011# => (ALU, OP_MUL_H32, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', RC, '0', '0'), -- mulhw
|
||||
2#1000001011# => (ALU, OP_MUL_H32, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- mulhwu
|
||||
2#0011101001# => (ALU, OP_MUL_L64, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '0'), -- mulld
|
||||
2#1011101001# => (ALU, OP_MUL_L64, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '0'), -- mulldo
|
||||
2#0011101011# => (ALU, OP_MUL_L64, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', RC, '0', '0'), -- mullw
|
||||
2#1011101011# => (ALU, OP_MUL_L64, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', RC, '0', '0'), -- mullwo
|
||||
2#0111011100# => (ALU, OP_AND, NONE, RB, RS, RA, '0', '0', '0', '1', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- nand
|
||||
2#0001101000# => (ALU, OP_ADD, RA, NONE, NONE, RT, '0', '0', '1', '0', ONE, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- neg
|
||||
2#1001101000# => (ALU, OP_ADD, RA, NONE, NONE, RT, '0', '0', '1', '0', ONE, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- nego
|
||||
2#0001111100# => (ALU, OP_OR, NONE, RB, RS, RA, '0', '0', '0', '1', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- nor
|
||||
2#0110111100# => (ALU, OP_OR, NONE, RB, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- or
|
||||
2#0110011100# => (ALU, OP_OR, NONE, RB, RS, RA, '0', '0', '1', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- orc
|
||||
2#0001111010# => (ALU, OP_POPCNTB, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- popcntb
|
||||
2#0111111010# => (ALU, OP_POPCNTD, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- popcntd
|
||||
2#0101111010# => (ALU, OP_POPCNTW, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- popcntw
|
||||
2#0010111010# => (ALU, OP_PRTYD, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- prtyd
|
||||
2#0010011010# => (ALU, OP_PRTYW, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- prtyw
|
||||
2#0001111010# => (ALU, OP_POPCNT, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- popcntb
|
||||
2#0111111010# => (ALU, OP_POPCNT, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- popcntd
|
||||
2#0101111010# => (ALU, OP_POPCNT, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- popcntw
|
||||
2#0010111010# => (ALU, OP_PRTY, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- prtyd
|
||||
2#0010011010# => (ALU, OP_PRTY, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- prtyw
|
||||
-- 2#0010000000# setb
|
||||
2#0000011011# => (ALU, OP_SHL, NONE, RB, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- sld
|
||||
2#0000011000# => (ALU, OP_SHL, NONE, RB, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- slw
|
||||
|
||||
162
decode2.vhdl
162
decode2.vhdl
@@ -9,6 +9,9 @@ use work.helpers.all;
|
||||
use work.insn_helpers.all;
|
||||
|
||||
entity decode2 is
|
||||
generic (
|
||||
EX1_BYPASS : boolean := true
|
||||
);
|
||||
port (
|
||||
clk : in std_ulogic;
|
||||
rst : in std_ulogic;
|
||||
@@ -24,9 +27,6 @@ entity decode2 is
|
||||
d_in : in Decode1ToDecode2Type;
|
||||
|
||||
e_out : out Decode2ToExecute1Type;
|
||||
m_out : out Decode2ToMultiplyType;
|
||||
d_out : out Decode2ToDividerType;
|
||||
l_out : out Decode2ToLoadstore1Type;
|
||||
|
||||
r_in : in RegisterFileToDecode2Type;
|
||||
r_out : out Decode2ToRegisterFileType;
|
||||
@@ -39,9 +39,6 @@ end entity decode2;
|
||||
architecture behaviour of decode2 is
|
||||
type reg_type is record
|
||||
e : Decode2ToExecute1Type;
|
||||
m : Decode2ToMultiplyType;
|
||||
d : Decode2ToDividerType;
|
||||
l : Decode2ToLoadstore1Type;
|
||||
end record;
|
||||
|
||||
signal r, rin : reg_type;
|
||||
@@ -188,15 +185,19 @@ architecture behaviour of decode2 is
|
||||
|
||||
signal gpr_write_valid : std_ulogic;
|
||||
signal gpr_write : gspr_index_t;
|
||||
signal gpr_bypassable : std_ulogic;
|
||||
|
||||
signal gpr_a_read_valid : std_ulogic;
|
||||
signal gpr_a_read :gspr_index_t;
|
||||
signal gpr_a_bypass : std_ulogic;
|
||||
|
||||
signal gpr_b_read_valid : std_ulogic;
|
||||
signal gpr_b_read : gspr_index_t;
|
||||
signal gpr_b_bypass : std_ulogic;
|
||||
|
||||
signal gpr_c_read_valid : std_ulogic;
|
||||
signal gpr_c_read : gpr_index_t;
|
||||
signal gpr_c_bypass : std_ulogic;
|
||||
|
||||
signal cr_write_valid : std_ulogic;
|
||||
begin
|
||||
@@ -217,6 +218,7 @@ begin
|
||||
|
||||
gpr_write_valid_in => gpr_write_valid,
|
||||
gpr_write_in => gpr_write,
|
||||
gpr_bypassable => gpr_bypassable,
|
||||
|
||||
gpr_a_read_valid_in => gpr_a_read_valid,
|
||||
gpr_a_read_in => gpr_a_read,
|
||||
@@ -232,13 +234,17 @@ begin
|
||||
|
||||
valid_out => control_valid_out,
|
||||
stall_out => stall_out,
|
||||
stopped_out => stopped_out
|
||||
stopped_out => stopped_out,
|
||||
|
||||
gpr_bypass_a => gpr_a_bypass,
|
||||
gpr_bypass_b => gpr_b_bypass,
|
||||
gpr_bypass_c => gpr_c_bypass
|
||||
);
|
||||
|
||||
decode2_0: process(clk)
|
||||
begin
|
||||
if rising_edge(clk) then
|
||||
if rin.e.valid = '1' or rin.l.valid = '1' or rin.m.valid = '1' or rin.d.valid = '1' then
|
||||
if rin.e.valid = '1' then
|
||||
report "execute " & to_hstring(rin.e.nia);
|
||||
end if;
|
||||
r <= rin;
|
||||
@@ -259,21 +265,16 @@ begin
|
||||
variable decoded_reg_b : decode_input_reg_t;
|
||||
variable decoded_reg_c : decode_input_reg_t;
|
||||
variable decoded_reg_o : decode_output_reg_t;
|
||||
variable signed_division: std_ulogic;
|
||||
variable length : std_ulogic_vector(3 downto 0);
|
||||
begin
|
||||
v := r;
|
||||
|
||||
v.e := Decode2ToExecute1Init;
|
||||
v.l := Decode2ToLoadStore1Init;
|
||||
v.m := Decode2ToMultiplyInit;
|
||||
v.d := Decode2ToDividerInit;
|
||||
|
||||
mul_a := (others => '0');
|
||||
mul_b := (others => '0');
|
||||
|
||||
--v.e.input_cr := d_in.decode.input_cr;
|
||||
--v.m.input_cr := d_in.decode.input_cr;
|
||||
--v.e.output_cr := d_in.decode.output_cr;
|
||||
|
||||
decoded_reg_a := decode_input_reg_a (d_in.decode.input_reg_a, d_in.insn, r_in.read1_data, d_in.ispr1);
|
||||
@@ -303,12 +304,17 @@ begin
|
||||
v.e.insn_type := d_in.decode.insn_type;
|
||||
v.e.read_reg1 := decoded_reg_a.reg;
|
||||
v.e.read_data1 := decoded_reg_a.data;
|
||||
v.e.bypass_data1 := gpr_a_bypass;
|
||||
v.e.read_reg2 := decoded_reg_b.reg;
|
||||
v.e.read_data2 := decoded_reg_b.data;
|
||||
v.e.bypass_data2 := gpr_b_bypass;
|
||||
v.e.read_data3 := decoded_reg_c.data;
|
||||
v.e.bypass_data3 := gpr_c_bypass;
|
||||
v.e.write_reg := decoded_reg_o.reg;
|
||||
v.e.rc := decode_rc(d_in.decode.rc, d_in.insn);
|
||||
v.e.oe := decode_oe(d_in.decode.rc, d_in.insn);
|
||||
if not (d_in.decode.insn_type = OP_MUL_H32 or d_in.decode.insn_type = OP_MUL_H64) then
|
||||
v.e.oe := decode_oe(d_in.decode.rc, d_in.insn);
|
||||
end if;
|
||||
v.e.cr := c_in.read_cr_data;
|
||||
v.e.xerc := c_in.read_xerc_data;
|
||||
v.e.invert_a := d_in.decode.invert_a;
|
||||
@@ -322,102 +328,9 @@ begin
|
||||
end if;
|
||||
v.e.insn := d_in.insn;
|
||||
v.e.data_len := length;
|
||||
|
||||
-- multiply unit
|
||||
v.m.insn_type := d_in.decode.insn_type;
|
||||
mul_a := decoded_reg_a.data;
|
||||
mul_b := decoded_reg_b.data;
|
||||
v.m.write_reg := gspr_to_gpr(decoded_reg_o.reg);
|
||||
v.m.rc := decode_rc(d_in.decode.rc, d_in.insn);
|
||||
v.m.xerc := c_in.read_xerc_data;
|
||||
if v.m.insn_type = OP_MUL_L64 then
|
||||
v.m.oe := decode_oe(d_in.decode.rc, d_in.insn);
|
||||
end if;
|
||||
v.m.is_32bit := d_in.decode.is_32bit;
|
||||
|
||||
if d_in.decode.is_32bit = '1' then
|
||||
if d_in.decode.is_signed = '1' then
|
||||
v.m.data1 := (others => mul_a(31));
|
||||
v.m.data1(31 downto 0) := mul_a(31 downto 0);
|
||||
v.m.data2 := (others => mul_b(31));
|
||||
v.m.data2(31 downto 0) := mul_b(31 downto 0);
|
||||
else
|
||||
v.m.data1 := '0' & x"00000000" & mul_a(31 downto 0);
|
||||
v.m.data2 := '0' & x"00000000" & mul_b(31 downto 0);
|
||||
end if;
|
||||
else
|
||||
if d_in.decode.is_signed = '1' then
|
||||
v.m.data1 := mul_a(63) & mul_a;
|
||||
v.m.data2 := mul_b(63) & mul_b;
|
||||
else
|
||||
v.m.data1 := '0' & mul_a;
|
||||
v.m.data2 := '0' & mul_b;
|
||||
end if;
|
||||
end if;
|
||||
|
||||
-- divide unit
|
||||
-- PPC divide and modulus instruction words have these bits in
|
||||
-- the bottom 11 bits: o1dns 010t1 r
|
||||
-- where o = OE for div instrs, signedness for mod instrs
|
||||
-- d = 1 for div*, 0 for mod*
|
||||
-- n = 1 for normal, 0 for extended (dividend << 32/64)
|
||||
-- s = 1 for signed, 0 for unsigned (for div*)
|
||||
-- t = 1 for 32-bit, 0 for 64-bit
|
||||
-- r = RC bit (record condition code)
|
||||
v.d.write_reg := gspr_to_gpr(decoded_reg_o.reg);
|
||||
v.d.is_modulus := not d_in.insn(8);
|
||||
v.d.is_32bit := d_in.insn(2);
|
||||
if d_in.insn(8) = '1' then
|
||||
signed_division := d_in.insn(6);
|
||||
else
|
||||
signed_division := d_in.insn(10);
|
||||
end if;
|
||||
v.d.is_signed := signed_division;
|
||||
if d_in.insn(2) = '0' then
|
||||
-- 64-bit forms
|
||||
if d_in.insn(8) = '1' and d_in.insn(7) = '0' then
|
||||
v.d.is_extended := '1';
|
||||
end if;
|
||||
v.d.dividend := decoded_reg_a.data;
|
||||
v.d.divisor := decoded_reg_b.data;
|
||||
else
|
||||
-- 32-bit forms
|
||||
if d_in.insn(8) = '1' and d_in.insn(7) = '0' then -- extended forms
|
||||
v.d.dividend := decoded_reg_a.data(31 downto 0) & x"00000000";
|
||||
elsif signed_division = '1' and decoded_reg_a.data(31) = '1' then
|
||||
-- sign extend to 64 bits
|
||||
v.d.dividend := x"ffffffff" & decoded_reg_a.data(31 downto 0);
|
||||
else
|
||||
v.d.dividend := x"00000000" & decoded_reg_a.data(31 downto 0);
|
||||
end if;
|
||||
if signed_division = '1' and decoded_reg_b.data(31) = '1' then
|
||||
v.d.divisor := x"ffffffff" & decoded_reg_b.data(31 downto 0);
|
||||
else
|
||||
v.d.divisor := x"00000000" & decoded_reg_b.data(31 downto 0);
|
||||
end if;
|
||||
end if;
|
||||
v.d.rc := decode_rc(d_in.decode.rc, d_in.insn);
|
||||
v.d.xerc := c_in.read_xerc_data;
|
||||
v.d.oe := decode_oe(d_in.decode.rc, d_in.insn);
|
||||
|
||||
-- load/store unit
|
||||
v.l.update_reg := gspr_to_gpr(decoded_reg_a.reg);
|
||||
v.l.addr1 := decoded_reg_a.data;
|
||||
v.l.addr2 := decoded_reg_b.data;
|
||||
v.l.data := decoded_reg_c.data;
|
||||
v.l.write_reg := gspr_to_gpr(decoded_reg_o.reg);
|
||||
|
||||
if d_in.decode.insn_type = OP_LOAD then
|
||||
v.l.load := '1';
|
||||
else
|
||||
v.l.load := '0';
|
||||
end if;
|
||||
|
||||
v.l.length := length;
|
||||
v.l.byte_reverse := d_in.decode.byte_reverse;
|
||||
v.l.sign_extend := d_in.decode.sign_extend;
|
||||
v.l.update := d_in.decode.update;
|
||||
v.l.xerc := c_in.read_xerc_data;
|
||||
v.e.byte_reverse := d_in.decode.byte_reverse;
|
||||
v.e.sign_extend := d_in.decode.sign_extend;
|
||||
v.e.update := d_in.decode.update;
|
||||
|
||||
-- issue control
|
||||
control_valid_in <= d_in.valid;
|
||||
@@ -425,6 +338,10 @@ begin
|
||||
|
||||
gpr_write_valid <= decoded_reg_o.reg_valid;
|
||||
gpr_write <= decoded_reg_o.reg;
|
||||
gpr_bypassable <= '0';
|
||||
if EX1_BYPASS and d_in.decode.unit = ALU then
|
||||
gpr_bypassable <= '1';
|
||||
end if;
|
||||
|
||||
gpr_a_read_valid <= decoded_reg_a.reg_valid;
|
||||
gpr_a_read <= decoded_reg_a.reg;
|
||||
@@ -437,29 +354,13 @@ begin
|
||||
|
||||
cr_write_valid <= d_in.decode.output_cr or decode_rc(d_in.decode.rc, d_in.insn);
|
||||
|
||||
v.e.valid := '0';
|
||||
v.m.valid := '0';
|
||||
v.d.valid := '0';
|
||||
v.l.valid := '0';
|
||||
case d_in.decode.unit is
|
||||
when ALU =>
|
||||
v.e.valid := control_valid_out;
|
||||
when LDST =>
|
||||
v.l.valid := control_valid_out;
|
||||
when MUL =>
|
||||
v.m.valid := control_valid_out;
|
||||
when DIV =>
|
||||
v.d.valid := control_valid_out;
|
||||
when NONE =>
|
||||
v.e.valid := control_valid_out;
|
||||
v.e.valid := control_valid_out;
|
||||
if d_in.decode.unit = NONE then
|
||||
v.e.insn_type := OP_ILLEGAL;
|
||||
end case;
|
||||
end if;
|
||||
|
||||
if rst = '1' then
|
||||
v.e := Decode2ToExecute1Init;
|
||||
v.l := Decode2ToLoadStore1Init;
|
||||
v.m := Decode2ToMultiplyInit;
|
||||
v.d := Decode2ToDividerInit;
|
||||
end if;
|
||||
|
||||
-- Update registers
|
||||
@@ -467,8 +368,5 @@ begin
|
||||
|
||||
-- Update outputs
|
||||
e_out <= r.e;
|
||||
l_out <= r.l;
|
||||
m_out <= r.m;
|
||||
d_out <= r.d;
|
||||
end process;
|
||||
end architecture behaviour;
|
||||
|
||||
@@ -4,18 +4,18 @@ use ieee.std_logic_1164.all;
|
||||
package decode_types is
|
||||
type insn_type_t is (OP_ILLEGAL, OP_NOP, OP_ADD,
|
||||
OP_ADDPCIS, OP_AND, OP_ATTN, OP_B, OP_BC, OP_BCREG,
|
||||
OP_BPERM, OP_CMP, OP_CMPB, OP_CMPEQB, OP_CMPL, OP_CMPRB,
|
||||
OP_BPERM, OP_CMP, OP_CMPB, OP_CMPEQB, OP_CMPRB,
|
||||
OP_CNTZ, OP_CRAND,
|
||||
OP_CRANDC, OP_CREQV, OP_CRNAND, OP_CRNOR, OP_CROR, OP_CRORC,
|
||||
OP_CRXOR, OP_DARN, OP_DCBF, OP_DCBST, OP_DCBT, OP_DCBTST,
|
||||
OP_DCBZ, OP_DIV, OP_EXTS,
|
||||
OP_DCBZ, OP_DIV, OP_DIVE, OP_EXTS,
|
||||
OP_EXTSWSLI, OP_ICBI, OP_ICBT, OP_ISEL, OP_ISYNC,
|
||||
OP_LOAD, OP_STORE, OP_MADDHD, OP_MADDHDU, OP_MADDLD, OP_MCRF,
|
||||
OP_MCRXR, OP_MCRXRX, OP_MFCR, OP_MFSPR, OP_MOD,
|
||||
OP_MTCRF, OP_MTSPR, OP_MUL_L64,
|
||||
OP_MUL_H64, OP_MUL_H32, OP_OR,
|
||||
OP_POPCNTB, OP_POPCNTD, OP_POPCNTW, OP_PRTYD,
|
||||
OP_PRTYW, OP_RLC, OP_RLCL, OP_RLCR, OP_SETB,
|
||||
OP_POPCNT, OP_PRTY,
|
||||
OP_RLC, OP_RLCL, OP_RLCR, OP_SETB,
|
||||
OP_SHL, OP_SHR,
|
||||
OP_SYNC, OP_TD, OP_TDI, OP_TW,
|
||||
OP_TWI, OP_XOR, OP_SIM_CONFIG
|
||||
@@ -46,7 +46,7 @@ package decode_types is
|
||||
|
||||
constant TOO_OFFSET : integer := 0;
|
||||
|
||||
type unit_t is (NONE, ALU, LDST, MUL, DIV);
|
||||
type unit_t is (NONE, ALU, LDST);
|
||||
type length_t is (NONE, is1B, is2B, is4B, is8B);
|
||||
|
||||
type decode_rom_t is record
|
||||
|
||||
50
divider.vhdl
50
divider.vhdl
@@ -10,8 +10,8 @@ entity divider is
|
||||
port (
|
||||
clk : in std_logic;
|
||||
rst : in std_logic;
|
||||
d_in : in Decode2ToDividerType;
|
||||
d_out : out DividerToWritebackType
|
||||
d_in : in Execute1ToDividerType;
|
||||
d_out : out DividerToExecute1Type
|
||||
);
|
||||
end entity divider;
|
||||
|
||||
@@ -23,20 +23,15 @@ architecture behaviour of divider is
|
||||
signal sresult : std_ulogic_vector(64 downto 0);
|
||||
signal oresult : std_ulogic_vector(63 downto 0);
|
||||
signal running : std_ulogic;
|
||||
signal signcheck : std_ulogic;
|
||||
signal count : unsigned(6 downto 0);
|
||||
signal neg_result : std_ulogic;
|
||||
signal is_modulus : std_ulogic;
|
||||
signal is_32bit : std_ulogic;
|
||||
signal extended : std_ulogic;
|
||||
signal is_signed : std_ulogic;
|
||||
signal rc : std_ulogic;
|
||||
signal write_reg : std_ulogic_vector(4 downto 0);
|
||||
signal overflow : std_ulogic;
|
||||
signal ovf32 : std_ulogic;
|
||||
signal did_ovf : std_ulogic;
|
||||
signal oe : std_ulogic;
|
||||
signal xerc : xer_common_t;
|
||||
begin
|
||||
divider_0: process(clk)
|
||||
begin
|
||||
@@ -48,40 +43,22 @@ begin
|
||||
running <= '0';
|
||||
count <= "0000000";
|
||||
elsif d_in.valid = '1' then
|
||||
if d_in.is_extended = '1' and not (d_in.is_signed = '1' and d_in.dividend(63) = '1') then
|
||||
if d_in.is_extended = '1' then
|
||||
dend <= '0' & d_in.dividend & x"0000000000000000";
|
||||
else
|
||||
dend <= '0' & x"0000000000000000" & d_in.dividend;
|
||||
end if;
|
||||
div <= unsigned(d_in.divisor);
|
||||
quot <= (others => '0');
|
||||
write_reg <= d_in.write_reg;
|
||||
neg_result <= '0';
|
||||
neg_result <= d_in.neg_result;
|
||||
is_modulus <= d_in.is_modulus;
|
||||
extended <= d_in.is_extended;
|
||||
is_32bit <= d_in.is_32bit;
|
||||
is_signed <= d_in.is_signed;
|
||||
rc <= d_in.rc;
|
||||
oe <= d_in.oe;
|
||||
xerc <= d_in.xerc;
|
||||
count <= "1111111";
|
||||
running <= '1';
|
||||
overflow <= '0';
|
||||
ovf32 <= '0';
|
||||
signcheck <= d_in.is_signed and (d_in.dividend(63) or d_in.divisor(63));
|
||||
elsif signcheck = '1' then
|
||||
signcheck <= '0';
|
||||
neg_result <= dend(63) xor (div(63) and not is_modulus);
|
||||
if dend(63) = '1' then
|
||||
if extended = '1' then
|
||||
dend <= '0' & std_ulogic_vector(- signed(dend(63 downto 0))) & x"0000000000000000";
|
||||
else
|
||||
dend <= '0' & x"0000000000000000" & std_ulogic_vector(- signed(dend(63 downto 0)));
|
||||
end if;
|
||||
end if;
|
||||
if div(63) = '1' then
|
||||
div <= unsigned(- signed(div));
|
||||
end if;
|
||||
elsif running = '1' then
|
||||
if count = "0111111" then
|
||||
running <= '0';
|
||||
@@ -113,9 +90,6 @@ begin
|
||||
|
||||
divider_1: process(all)
|
||||
begin
|
||||
d_out.write_reg_nr <= write_reg;
|
||||
d_out.rc <= rc;
|
||||
|
||||
if is_modulus = '1' then
|
||||
result <= dend(128 downto 65);
|
||||
else
|
||||
@@ -151,23 +125,9 @@ begin
|
||||
if rising_edge(clk) then
|
||||
d_out.valid <= '0';
|
||||
d_out.write_reg_data <= oresult;
|
||||
d_out.write_reg_enable <= '0';
|
||||
d_out.write_xerc_enable <= '0';
|
||||
d_out.xerc <= xerc;
|
||||
d_out.overflow <= did_ovf;
|
||||
if count = "1000000" then
|
||||
d_out.valid <= '1';
|
||||
d_out.write_reg_enable <= '1';
|
||||
d_out.write_xerc_enable <= oe;
|
||||
|
||||
-- We must test oe because the RC update code in writeback
|
||||
-- will use the xerc value to set CR0:SO so we must not clobber
|
||||
-- xerc if OE wasn't set.
|
||||
--
|
||||
if oe = '1' then
|
||||
d_out.xerc.ov <= did_ovf;
|
||||
d_out.xerc.ov32 <= did_ovf;
|
||||
d_out.xerc.so <= xerc.so or did_ovf;
|
||||
end if;
|
||||
end if;
|
||||
end if;
|
||||
end process;
|
||||
|
||||
@@ -16,8 +16,8 @@ architecture behave of divider_tb is
|
||||
signal rst : std_ulogic;
|
||||
constant clk_period : time := 10 ns;
|
||||
|
||||
signal d1 : Decode2ToDividerType;
|
||||
signal d2 : DividerToWritebackType;
|
||||
signal d1 : Execute1ToDividerType;
|
||||
signal d2 : DividerToExecute1Type;
|
||||
begin
|
||||
divider_0: entity work.divider
|
||||
port map (clk => clk, rst => rst, d_in => d1, d_out => d2);
|
||||
@@ -43,14 +43,13 @@ begin
|
||||
rst <= '0';
|
||||
|
||||
d1.valid <= '1';
|
||||
d1.write_reg <= "10001";
|
||||
d1.dividend <= x"0000000010001000";
|
||||
d1.divisor <= x"0000000000001111";
|
||||
d1.is_signed <= '0';
|
||||
d1.is_32bit <= '0';
|
||||
d1.is_extended <= '0';
|
||||
d1.is_modulus <= '0';
|
||||
d1.rc <= '0';
|
||||
d1.neg_result <= '0';
|
||||
|
||||
wait for clk_period;
|
||||
assert d2.valid = '0';
|
||||
@@ -65,16 +64,12 @@ begin
|
||||
end loop;
|
||||
|
||||
assert d2.valid = '1';
|
||||
assert d2.write_reg_enable = '1';
|
||||
assert d2.write_reg_nr = "10001";
|
||||
assert d2.write_reg_data = x"000000000000f001" report "result " & to_hstring(d2.write_reg_data);
|
||||
assert d2.rc = '0';
|
||||
|
||||
wait for clk_period;
|
||||
assert d2.valid = '0' report "valid";
|
||||
|
||||
d1.valid <= '1';
|
||||
d1.rc <= '1';
|
||||
|
||||
wait for clk_period;
|
||||
assert d2.valid = '0' report "valid";
|
||||
@@ -89,10 +84,7 @@ begin
|
||||
end loop;
|
||||
|
||||
assert d2.valid = '1';
|
||||
assert d2.write_reg_enable = '1';
|
||||
assert d2.write_reg_nr = "10001";
|
||||
assert d2.write_reg_data = x"000000000000f001" report "result " & to_hstring(d2.write_reg_data);
|
||||
assert d2.rc = '1';
|
||||
|
||||
wait for clk_period;
|
||||
assert d2.valid = '0';
|
||||
@@ -105,9 +97,10 @@ begin
|
||||
ra := std_ulogic_vector(resize(signed(pseudorand(dlength * 8)), 64));
|
||||
rb := std_ulogic_vector(resize(signed(pseudorand(vlength * 8)), 64));
|
||||
|
||||
d1.dividend <= ra;
|
||||
d1.divisor <= rb;
|
||||
d1.dividend <= ra when ra(63) = '0' else std_ulogic_vector(- signed(ra));
|
||||
d1.divisor <= rb when rb(63) = '0' else std_ulogic_vector(- signed(rb));
|
||||
d1.is_signed <= '1';
|
||||
d1.neg_result <= ra(63) xor rb(63);
|
||||
d1.valid <= '1';
|
||||
|
||||
wait for clk_period;
|
||||
@@ -142,6 +135,7 @@ begin
|
||||
d1.dividend <= ra;
|
||||
d1.divisor <= rb;
|
||||
d1.is_signed <= '0';
|
||||
d1.neg_result <= '0';
|
||||
d1.valid <= '1';
|
||||
|
||||
wait for clk_period;
|
||||
@@ -173,9 +167,10 @@ begin
|
||||
ra := std_ulogic_vector(resize(signed(pseudorand(dlength * 8)), 64));
|
||||
rb := std_ulogic_vector(resize(signed(pseudorand(vlength * 8)), 64));
|
||||
|
||||
d1.dividend <= ra;
|
||||
d1.divisor <= rb;
|
||||
d1.dividend <= ra when ra(63) = '0' else std_ulogic_vector(- signed(ra));
|
||||
d1.divisor <= rb when rb(63) = '0' else std_ulogic_vector(- signed(rb));
|
||||
d1.is_signed <= '1';
|
||||
d1.neg_result <= ra(63) xor rb(63);
|
||||
d1.is_extended <= '1';
|
||||
d1.valid <= '1';
|
||||
|
||||
@@ -216,6 +211,7 @@ begin
|
||||
d1.dividend <= ra;
|
||||
d1.divisor <= rb;
|
||||
d1.is_signed <= '0';
|
||||
d1.neg_result <= '0';
|
||||
d1.is_extended <= '1';
|
||||
d1.valid <= '1';
|
||||
|
||||
@@ -250,9 +246,10 @@ begin
|
||||
ra := std_ulogic_vector(resize(signed(pseudorand(dlength * 8)), 64));
|
||||
rb := std_ulogic_vector(resize(signed(pseudorand(vlength * 8)), 64));
|
||||
|
||||
d1.dividend <= ra;
|
||||
d1.divisor <= rb;
|
||||
d1.dividend <= ra when ra(63) = '0' else std_ulogic_vector(- signed(ra));
|
||||
d1.divisor <= rb when rb(63) = '0' else std_ulogic_vector(- signed(rb));
|
||||
d1.is_signed <= '1';
|
||||
d1.neg_result <= ra(63) xor rb(63);
|
||||
d1.is_extended <= '0';
|
||||
d1.is_32bit <= '1';
|
||||
d1.valid <= '1';
|
||||
@@ -289,6 +286,7 @@ begin
|
||||
d1.dividend <= ra;
|
||||
d1.divisor <= rb;
|
||||
d1.is_signed <= '0';
|
||||
d1.neg_result <= '0';
|
||||
d1.is_extended <= '0';
|
||||
d1.is_32bit <= '1';
|
||||
d1.valid <= '1';
|
||||
@@ -322,9 +320,10 @@ begin
|
||||
ra := std_ulogic_vector(resize(signed(pseudorand(dlength * 8)), 32)) & x"00000000";
|
||||
rb := std_ulogic_vector(resize(signed(pseudorand(vlength * 8)), 64));
|
||||
|
||||
d1.dividend <= ra;
|
||||
d1.divisor <= rb;
|
||||
d1.dividend <= ra when ra(63) = '0' else std_ulogic_vector(- signed(ra));
|
||||
d1.divisor <= rb when rb(63) = '0' else std_ulogic_vector(- signed(rb));
|
||||
d1.is_signed <= '1';
|
||||
d1.neg_result <= ra(63) xor rb(63);
|
||||
d1.is_extended <= '0';
|
||||
d1.is_32bit <= '1';
|
||||
d1.valid <= '1';
|
||||
@@ -365,6 +364,7 @@ begin
|
||||
d1.dividend <= ra;
|
||||
d1.divisor <= rb;
|
||||
d1.is_signed <= '0';
|
||||
d1.neg_result <= '0';
|
||||
d1.is_extended <= '0';
|
||||
d1.is_32bit <= '1';
|
||||
d1.valid <= '1';
|
||||
@@ -398,9 +398,10 @@ begin
|
||||
ra := std_ulogic_vector(resize(signed(pseudorand(dlength * 8)), 64));
|
||||
rb := std_ulogic_vector(resize(signed(pseudorand(vlength * 8)), 64));
|
||||
|
||||
d1.dividend <= ra;
|
||||
d1.divisor <= rb;
|
||||
d1.dividend <= ra when ra(63) = '0' else std_ulogic_vector(- signed(ra));
|
||||
d1.divisor <= rb when rb(63) = '0' else std_ulogic_vector(- signed(rb));
|
||||
d1.is_signed <= '1';
|
||||
d1.neg_result <= ra(63);
|
||||
d1.is_extended <= '0';
|
||||
d1.is_32bit <= '0';
|
||||
d1.is_modulus <= '1';
|
||||
@@ -438,6 +439,7 @@ begin
|
||||
d1.dividend <= ra;
|
||||
d1.divisor <= rb;
|
||||
d1.is_signed <= '0';
|
||||
d1.neg_result <= '0';
|
||||
d1.is_extended <= '0';
|
||||
d1.is_32bit <= '0';
|
||||
d1.is_modulus <= '1';
|
||||
@@ -472,9 +474,10 @@ begin
|
||||
ra := std_ulogic_vector(resize(signed(pseudorand(dlength * 8)), 64));
|
||||
rb := std_ulogic_vector(resize(signed(pseudorand(vlength * 8)), 64));
|
||||
|
||||
d1.dividend <= ra;
|
||||
d1.divisor <= rb;
|
||||
d1.dividend <= ra when ra(63) = '0' else std_ulogic_vector(- signed(ra));
|
||||
d1.divisor <= rb when rb(63) = '0' else std_ulogic_vector(- signed(rb));
|
||||
d1.is_signed <= '1';
|
||||
d1.neg_result <= ra(63);
|
||||
d1.is_extended <= '0';
|
||||
d1.is_32bit <= '1';
|
||||
d1.is_modulus <= '1';
|
||||
@@ -517,6 +520,7 @@ begin
|
||||
d1.dividend <= ra;
|
||||
d1.divisor <= rb;
|
||||
d1.is_signed <= '0';
|
||||
d1.neg_result <= '0';
|
||||
d1.is_extended <= '0';
|
||||
d1.is_32bit <= '1';
|
||||
d1.is_modulus <= '1';
|
||||
|
||||
407
execute1.vhdl
407
execute1.vhdl
@@ -11,8 +11,12 @@ use work.insn_helpers.all;
|
||||
use work.ppc_fx_insns.all;
|
||||
|
||||
entity execute1 is
|
||||
generic (
|
||||
EX1_BYPASS : boolean := true
|
||||
);
|
||||
port (
|
||||
clk : in std_ulogic;
|
||||
rst : in std_ulogic;
|
||||
|
||||
-- asynchronous
|
||||
flush_out : out std_ulogic;
|
||||
@@ -21,6 +25,7 @@ entity execute1 is
|
||||
e_in : in Decode2ToExecute1Type;
|
||||
|
||||
-- asynchronous
|
||||
l_out : out Execute1ToLoadstore1Type;
|
||||
f_out : out Execute1ToFetch1Type;
|
||||
|
||||
e_out : out Execute1ToWritebackType;
|
||||
@@ -35,10 +40,19 @@ architecture behaviour of execute1 is
|
||||
e : Execute1ToWritebackType;
|
||||
lr_update : std_ulogic;
|
||||
next_lr : std_ulogic_vector(63 downto 0);
|
||||
mul_in_progress : std_ulogic;
|
||||
div_in_progress : std_ulogic;
|
||||
cntz_in_progress : std_ulogic;
|
||||
slow_op_dest : gpr_index_t;
|
||||
slow_op_rc : std_ulogic;
|
||||
slow_op_oe : std_ulogic;
|
||||
slow_op_xerc : xer_common_t;
|
||||
end record;
|
||||
|
||||
signal r, rin : reg_type;
|
||||
|
||||
signal a_in, b_in, c_in : std_ulogic_vector(63 downto 0);
|
||||
|
||||
signal ctrl: ctrl_t := (others => (others => '0'));
|
||||
signal ctrl_tmp: ctrl_t := (others => (others => '0'));
|
||||
|
||||
@@ -47,6 +61,16 @@ architecture behaviour of execute1 is
|
||||
signal rotator_carry: std_ulogic;
|
||||
signal logical_result: std_ulogic_vector(63 downto 0);
|
||||
signal countzero_result: std_ulogic_vector(63 downto 0);
|
||||
signal popcnt_result: std_ulogic_vector(63 downto 0);
|
||||
signal parity_result: std_ulogic_vector(63 downto 0);
|
||||
|
||||
-- multiply signals
|
||||
signal x_to_multiply: Execute1ToMultiplyType;
|
||||
signal multiply_to_x: MultiplyToExecute1Type;
|
||||
|
||||
-- divider signals
|
||||
signal x_to_divider: Execute1ToDividerType;
|
||||
signal divider_to_x: DividerToExecute1Type;
|
||||
|
||||
procedure set_carry(e: inout Execute1ToWritebackType;
|
||||
carry32 : in std_ulogic;
|
||||
@@ -92,9 +116,9 @@ begin
|
||||
|
||||
rotator_0: entity work.rotator
|
||||
port map (
|
||||
rs => e_in.read_data3,
|
||||
ra => e_in.read_data1,
|
||||
shift => e_in.read_data2(6 downto 0),
|
||||
rs => c_in,
|
||||
ra => a_in,
|
||||
shift => b_in(6 downto 0),
|
||||
insn => e_in.insn,
|
||||
is_32bit => e_in.is_32bit,
|
||||
right_shift => right_shift,
|
||||
@@ -107,22 +131,45 @@ begin
|
||||
|
||||
logical_0: entity work.logical
|
||||
port map (
|
||||
rs => e_in.read_data3,
|
||||
rb => e_in.read_data2,
|
||||
rs => c_in,
|
||||
rb => b_in,
|
||||
op => e_in.insn_type,
|
||||
invert_in => e_in.invert_a,
|
||||
invert_out => e_in.invert_out,
|
||||
result => logical_result
|
||||
result => logical_result,
|
||||
datalen => e_in.data_len,
|
||||
popcnt => popcnt_result,
|
||||
parity => parity_result
|
||||
);
|
||||
|
||||
countzero_0: entity work.zero_counter
|
||||
port map (
|
||||
rs => e_in.read_data3,
|
||||
clk => clk,
|
||||
rs => c_in,
|
||||
count_right => e_in.insn(10),
|
||||
is_32bit => e_in.is_32bit,
|
||||
result => countzero_result
|
||||
);
|
||||
|
||||
multiply_0: entity work.multiply
|
||||
port map (
|
||||
clk => clk,
|
||||
m_in => x_to_multiply,
|
||||
m_out => multiply_to_x
|
||||
);
|
||||
|
||||
divider_0: entity work.divider
|
||||
port map (
|
||||
clk => clk,
|
||||
rst => rst,
|
||||
d_in => x_to_divider,
|
||||
d_out => divider_to_x
|
||||
);
|
||||
|
||||
a_in <= r.e.write_data when EX1_BYPASS and e_in.bypass_data1 = '1' else e_in.read_data1;
|
||||
b_in <= r.e.write_data when EX1_BYPASS and e_in.bypass_data2 = '1' else e_in.read_data2;
|
||||
c_in <= r.e.write_data when EX1_BYPASS and e_in.bypass_data3 = '1' else e_in.read_data3;
|
||||
|
||||
execute1_0: process(clk)
|
||||
begin
|
||||
if rising_edge(clk) then
|
||||
@@ -159,6 +206,14 @@ begin
|
||||
variable l : std_ulogic;
|
||||
variable next_nia : std_ulogic_vector(63 downto 0);
|
||||
variable carry_32, carry_64 : std_ulogic;
|
||||
variable sign1, sign2 : std_ulogic;
|
||||
variable abs1, abs2 : signed(63 downto 0);
|
||||
variable overflow : std_ulogic;
|
||||
variable negative : std_ulogic;
|
||||
variable zerohi, zerolo : std_ulogic;
|
||||
variable msb_a, msb_b : std_ulogic;
|
||||
variable a_lt : std_ulogic;
|
||||
variable lv : Execute1ToLoadstore1Type;
|
||||
begin
|
||||
result := (others => '0');
|
||||
result_with_carry := (others => '0');
|
||||
@@ -204,6 +259,83 @@ begin
|
||||
end if;
|
||||
|
||||
v.lr_update := '0';
|
||||
v.mul_in_progress := '0';
|
||||
v.div_in_progress := '0';
|
||||
v.cntz_in_progress := '0';
|
||||
|
||||
-- signals to multiply unit
|
||||
x_to_multiply <= Execute1ToMultiplyInit;
|
||||
x_to_multiply.insn_type <= e_in.insn_type;
|
||||
x_to_multiply.is_32bit <= e_in.is_32bit;
|
||||
|
||||
if e_in.is_32bit = '1' then
|
||||
if e_in.is_signed = '1' then
|
||||
x_to_multiply.data1 <= (others => a_in(31));
|
||||
x_to_multiply.data1(31 downto 0) <= a_in(31 downto 0);
|
||||
x_to_multiply.data2 <= (others => b_in(31));
|
||||
x_to_multiply.data2(31 downto 0) <= b_in(31 downto 0);
|
||||
else
|
||||
x_to_multiply.data1 <= '0' & x"00000000" & a_in(31 downto 0);
|
||||
x_to_multiply.data2 <= '0' & x"00000000" & b_in(31 downto 0);
|
||||
end if;
|
||||
else
|
||||
if e_in.is_signed = '1' then
|
||||
x_to_multiply.data1 <= a_in(63) & a_in;
|
||||
x_to_multiply.data2 <= b_in(63) & b_in;
|
||||
else
|
||||
x_to_multiply.data1 <= '0' & a_in;
|
||||
x_to_multiply.data2 <= '0' & b_in;
|
||||
end if;
|
||||
end if;
|
||||
|
||||
-- signals to divide unit
|
||||
sign1 := '0';
|
||||
sign2 := '0';
|
||||
if e_in.is_signed = '1' then
|
||||
if e_in.is_32bit = '1' then
|
||||
sign1 := a_in(31);
|
||||
sign2 := b_in(31);
|
||||
else
|
||||
sign1 := a_in(63);
|
||||
sign2 := b_in(63);
|
||||
end if;
|
||||
end if;
|
||||
-- take absolute values
|
||||
if sign1 = '0' then
|
||||
abs1 := signed(a_in);
|
||||
else
|
||||
abs1 := - signed(a_in);
|
||||
end if;
|
||||
if sign2 = '0' then
|
||||
abs2 := signed(b_in);
|
||||
else
|
||||
abs2 := - signed(b_in);
|
||||
end if;
|
||||
|
||||
x_to_divider <= Execute1ToDividerInit;
|
||||
x_to_divider.is_signed <= e_in.is_signed;
|
||||
x_to_divider.is_32bit <= e_in.is_32bit;
|
||||
if e_in.insn_type = OP_MOD then
|
||||
x_to_divider.is_modulus <= '1';
|
||||
end if;
|
||||
x_to_divider.neg_result <= sign1 xor (sign2 and not x_to_divider.is_modulus);
|
||||
if e_in.is_32bit = '0' then
|
||||
-- 64-bit forms
|
||||
if e_in.insn_type = OP_DIVE then
|
||||
x_to_divider.is_extended <= '1';
|
||||
end if;
|
||||
x_to_divider.dividend <= std_ulogic_vector(abs1);
|
||||
x_to_divider.divisor <= std_ulogic_vector(abs2);
|
||||
else
|
||||
-- 32-bit forms
|
||||
x_to_divider.is_extended <= '0';
|
||||
if e_in.insn_type = OP_DIVE then -- extended forms
|
||||
x_to_divider.dividend <= std_ulogic_vector(abs1(31 downto 0)) & x"00000000";
|
||||
else
|
||||
x_to_divider.dividend <= x"00000000" & std_ulogic_vector(abs1(31 downto 0));
|
||||
end if;
|
||||
x_to_divider.divisor <= x"00000000" & std_ulogic_vector(abs2(31 downto 0));
|
||||
end if;
|
||||
|
||||
ctrl_tmp <= ctrl;
|
||||
-- FIXME: run at 512MHz not core freq
|
||||
@@ -226,8 +358,10 @@ begin
|
||||
|
||||
v.e.valid := '1';
|
||||
v.e.write_reg := e_in.write_reg;
|
||||
v.e.write_len := x"8";
|
||||
v.e.sign_extend := '0';
|
||||
v.slow_op_dest := gspr_to_gpr(e_in.write_reg);
|
||||
v.slow_op_rc := e_in.rc;
|
||||
v.slow_op_oe := e_in.oe;
|
||||
v.slow_op_xerc := v.e.xerc;
|
||||
|
||||
case_0: case e_in.insn_type is
|
||||
|
||||
@@ -236,51 +370,93 @@ begin
|
||||
report "illegal";
|
||||
when OP_NOP =>
|
||||
-- Do nothing
|
||||
when OP_ADD =>
|
||||
when OP_ADD | OP_CMP =>
|
||||
if e_in.invert_a = '0' then
|
||||
a_inv := e_in.read_data1;
|
||||
a_inv := a_in;
|
||||
else
|
||||
a_inv := not e_in.read_data1;
|
||||
a_inv := not a_in;
|
||||
end if;
|
||||
result_with_carry := ppc_adde(a_inv, e_in.read_data2,
|
||||
result_with_carry := ppc_adde(a_inv, b_in,
|
||||
decode_input_carry(e_in.input_carry, v.e.xerc));
|
||||
result := result_with_carry(63 downto 0);
|
||||
carry_32 := result(32) xor a_inv(32) xor e_in.read_data2(32);
|
||||
carry_32 := result(32) xor a_inv(32) xor b_in(32);
|
||||
carry_64 := result_with_carry(64);
|
||||
if e_in.output_carry = '1' then
|
||||
set_carry(v.e, carry_32, carry_64);
|
||||
end if;
|
||||
if e_in.oe = '1' then
|
||||
set_ov(v.e,
|
||||
calc_ov(a_inv(63), e_in.read_data2(63), carry_64, result_with_carry(63)),
|
||||
calc_ov(a_inv(31), e_in.read_data2(31), carry_32, result_with_carry(31)));
|
||||
end if;
|
||||
result_en := '1';
|
||||
if e_in.insn_type = OP_ADD then
|
||||
if e_in.output_carry = '1' then
|
||||
set_carry(v.e, carry_32, carry_64);
|
||||
end if;
|
||||
if e_in.oe = '1' then
|
||||
set_ov(v.e,
|
||||
calc_ov(a_inv(63), b_in(63), carry_64, result_with_carry(63)),
|
||||
calc_ov(a_inv(31), b_in(31), carry_32, result_with_carry(31)));
|
||||
end if;
|
||||
result_en := '1';
|
||||
else
|
||||
-- CMP and CMPL instructions
|
||||
-- Note, we have done RB - RA, not RA - RB
|
||||
bf := insn_bf(e_in.insn);
|
||||
l := insn_l(e_in.insn);
|
||||
v.e.write_cr_enable := '1';
|
||||
crnum := to_integer(unsigned(bf));
|
||||
v.e.write_cr_mask := num_to_fxm(crnum);
|
||||
zerolo := not (or (a_in(31 downto 0) xor b_in(31 downto 0)));
|
||||
zerohi := not (or (a_in(63 downto 32) xor b_in(63 downto 32)));
|
||||
if zerolo = '1' and (l = '0' or zerohi = '1') then
|
||||
-- values are equal
|
||||
newcrf := "001" & v.e.xerc.so;
|
||||
else
|
||||
if l = '1' then
|
||||
-- 64-bit comparison
|
||||
msb_a := a_in(63);
|
||||
msb_b := b_in(63);
|
||||
else
|
||||
-- 32-bit comparison
|
||||
msb_a := a_in(31);
|
||||
msb_b := b_in(31);
|
||||
end if;
|
||||
if msb_a /= msb_b then
|
||||
-- Subtraction might overflow, but
|
||||
-- comparison is clear from MSB difference.
|
||||
-- for signed, 0 is greater; for unsigned, 1 is greater
|
||||
a_lt := msb_a xnor e_in.is_signed;
|
||||
else
|
||||
-- Subtraction cannot overflow since MSBs are equal.
|
||||
-- carry = 1 indicates RA is smaller (signed or unsigned)
|
||||
a_lt := (not l and carry_32) or (l and carry_64);
|
||||
end if;
|
||||
newcrf := a_lt & not a_lt & '0' & v.e.xerc.so;
|
||||
end if;
|
||||
for i in 0 to 7 loop
|
||||
lo := i*4;
|
||||
hi := lo + 3;
|
||||
v.e.write_cr_data(hi downto lo) := newcrf;
|
||||
end loop;
|
||||
end if;
|
||||
when OP_AND | OP_OR | OP_XOR =>
|
||||
result := logical_result;
|
||||
result_en := '1';
|
||||
when OP_B =>
|
||||
f_out.redirect <= '1';
|
||||
if (insn_aa(e_in.insn)) then
|
||||
f_out.redirect_nia <= std_ulogic_vector(signed(e_in.read_data2));
|
||||
f_out.redirect_nia <= std_ulogic_vector(signed(b_in));
|
||||
else
|
||||
f_out.redirect_nia <= std_ulogic_vector(signed(e_in.nia) + signed(e_in.read_data2));
|
||||
f_out.redirect_nia <= std_ulogic_vector(signed(e_in.nia) + signed(b_in));
|
||||
end if;
|
||||
when OP_BC =>
|
||||
-- read_data1 is CTR
|
||||
bo := insn_bo(e_in.insn);
|
||||
bi := insn_bi(e_in.insn);
|
||||
if bo(4-2) = '0' then
|
||||
result := std_ulogic_vector(unsigned(e_in.read_data1) - 1);
|
||||
result := std_ulogic_vector(unsigned(a_in) - 1);
|
||||
result_en := '1';
|
||||
v.e.write_reg := fast_spr_num(SPR_CTR);
|
||||
end if;
|
||||
if ppc_bc_taken(bo, bi, e_in.cr, e_in.read_data1) = 1 then
|
||||
if ppc_bc_taken(bo, bi, e_in.cr, a_in) = 1 then
|
||||
f_out.redirect <= '1';
|
||||
if (insn_aa(e_in.insn)) then
|
||||
f_out.redirect_nia <= std_ulogic_vector(signed(e_in.read_data2));
|
||||
f_out.redirect_nia <= std_ulogic_vector(signed(b_in));
|
||||
else
|
||||
f_out.redirect_nia <= std_ulogic_vector(signed(e_in.nia) + signed(e_in.read_data2));
|
||||
f_out.redirect_nia <= std_ulogic_vector(signed(e_in.nia) + signed(b_in));
|
||||
end if;
|
||||
end if;
|
||||
when OP_BCREG =>
|
||||
@@ -289,53 +465,41 @@ begin
|
||||
bo := insn_bo(e_in.insn);
|
||||
bi := insn_bi(e_in.insn);
|
||||
if bo(4-2) = '0' and e_in.insn(10) = '0' then
|
||||
result := std_ulogic_vector(unsigned(e_in.read_data1) - 1);
|
||||
result := std_ulogic_vector(unsigned(a_in) - 1);
|
||||
result_en := '1';
|
||||
v.e.write_reg := fast_spr_num(SPR_CTR);
|
||||
end if;
|
||||
if ppc_bc_taken(bo, bi, e_in.cr, e_in.read_data1) = 1 then
|
||||
if ppc_bc_taken(bo, bi, e_in.cr, a_in) = 1 then
|
||||
f_out.redirect <= '1';
|
||||
f_out.redirect_nia <= e_in.read_data2(63 downto 2) & "00";
|
||||
f_out.redirect_nia <= b_in(63 downto 2) & "00";
|
||||
end if;
|
||||
when OP_CMPB =>
|
||||
result := ppc_cmpb(e_in.read_data3, e_in.read_data2);
|
||||
result := ppc_cmpb(c_in, b_in);
|
||||
result_en := '1';
|
||||
when OP_CMP =>
|
||||
bf := insn_bf(e_in.insn);
|
||||
l := insn_l(e_in.insn);
|
||||
v.e.write_cr_enable := '1';
|
||||
crnum := to_integer(unsigned(bf));
|
||||
v.e.write_cr_mask := num_to_fxm(crnum);
|
||||
for i in 0 to 7 loop
|
||||
lo := i*4;
|
||||
hi := lo + 3;
|
||||
v.e.write_cr_data(hi downto lo) := ppc_cmp(l, e_in.read_data1, e_in.read_data2, v.e.xerc.so);
|
||||
end loop;
|
||||
when OP_CMPL =>
|
||||
bf := insn_bf(e_in.insn);
|
||||
l := insn_l(e_in.insn);
|
||||
v.e.write_cr_enable := '1';
|
||||
crnum := to_integer(unsigned(bf));
|
||||
v.e.write_cr_mask := num_to_fxm(crnum);
|
||||
for i in 0 to 7 loop
|
||||
lo := i*4;
|
||||
hi := lo + 3;
|
||||
v.e.write_cr_data(hi downto lo) := ppc_cmpl(l, e_in.read_data1, e_in.read_data2, v.e.xerc.so);
|
||||
end loop;
|
||||
when OP_CNTZ =>
|
||||
result := countzero_result;
|
||||
result_en := '1';
|
||||
when OP_EXTS =>
|
||||
v.e.write_len := e_in.data_len;
|
||||
v.e.sign_extend := '1';
|
||||
result := e_in.read_data3;
|
||||
when OP_CNTZ =>
|
||||
v.e.valid := '0';
|
||||
v.cntz_in_progress := '1';
|
||||
stall_out <= '1';
|
||||
when OP_EXTS =>
|
||||
-- note data_len is a 1-hot encoding
|
||||
negative := (e_in.data_len(0) and c_in(7)) or
|
||||
(e_in.data_len(1) and c_in(15)) or
|
||||
(e_in.data_len(2) and c_in(31));
|
||||
result := (others => negative);
|
||||
if e_in.data_len(2) = '1' then
|
||||
result(31 downto 16) := c_in(31 downto 16);
|
||||
end if;
|
||||
if e_in.data_len(2) = '1' or e_in.data_len(1) = '1' then
|
||||
result(15 downto 8) := c_in(15 downto 8);
|
||||
end if;
|
||||
result(7 downto 0) := c_in(7 downto 0);
|
||||
result_en := '1';
|
||||
when OP_ISEL =>
|
||||
crbit := to_integer(unsigned(insn_bc(e_in.insn)));
|
||||
if e_in.cr(31-crbit) = '1' then
|
||||
result := e_in.read_data1;
|
||||
result := a_in;
|
||||
else
|
||||
result := e_in.read_data2;
|
||||
result := b_in;
|
||||
end if;
|
||||
result_en := '1';
|
||||
when OP_MCRF =>
|
||||
@@ -400,7 +564,7 @@ begin
|
||||
end if;
|
||||
when OP_MFSPR =>
|
||||
if is_fast_spr(e_in.read_reg1) then
|
||||
result := e_in.read_data1;
|
||||
result := a_in;
|
||||
if decode_spr_num(e_in.insn) = SPR_XER then
|
||||
-- bits 0:31 and 35:43 are treated as reserved and return 0s when read using mfxer
|
||||
result(63 downto 32) := (others => '0');
|
||||
@@ -447,19 +611,19 @@ begin
|
||||
crnum := fxm_to_num(insn_fxm(e_in.insn));
|
||||
v.e.write_cr_mask := num_to_fxm(crnum);
|
||||
end if;
|
||||
v.e.write_cr_data := e_in.read_data3(31 downto 0);
|
||||
v.e.write_cr_data := c_in(31 downto 0);
|
||||
when OP_MTSPR =>
|
||||
report "MTSPR to SPR " & integer'image(decode_spr_num(e_in.insn)) &
|
||||
"=" & to_hstring(e_in.read_data3);
|
||||
"=" & to_hstring(c_in);
|
||||
if is_fast_spr(e_in.write_reg) then
|
||||
result := e_in.read_data3;
|
||||
result := c_in;
|
||||
result_en := '1';
|
||||
if decode_spr_num(e_in.insn) = SPR_XER then
|
||||
v.e.xerc.so := e_in.read_data3(63-32);
|
||||
v.e.xerc.ov := e_in.read_data3(63-33);
|
||||
v.e.xerc.ca := e_in.read_data3(63-34);
|
||||
v.e.xerc.ov32 := e_in.read_data3(63-44);
|
||||
v.e.xerc.ca32 := e_in.read_data3(63-45);
|
||||
v.e.xerc.so := c_in(63-32);
|
||||
v.e.xerc.ov := c_in(63-33);
|
||||
v.e.xerc.ca := c_in(63-34);
|
||||
v.e.xerc.ov32 := c_in(63-44);
|
||||
v.e.xerc.ca32 := c_in(63-45);
|
||||
v.e.write_xerc_enable := '1';
|
||||
end if;
|
||||
else
|
||||
@@ -468,20 +632,11 @@ begin
|
||||
-- when others =>
|
||||
-- end case;
|
||||
end if;
|
||||
when OP_POPCNTB =>
|
||||
result := ppc_popcntb(e_in.read_data3);
|
||||
when OP_POPCNT =>
|
||||
result := popcnt_result;
|
||||
result_en := '1';
|
||||
when OP_POPCNTW =>
|
||||
result := ppc_popcntw(e_in.read_data3);
|
||||
result_en := '1';
|
||||
when OP_POPCNTD =>
|
||||
result := ppc_popcntd(e_in.read_data3);
|
||||
result_en := '1';
|
||||
when OP_PRTYD =>
|
||||
result := ppc_prtyd(e_in.read_data3);
|
||||
result_en := '1';
|
||||
when OP_PRTYW =>
|
||||
result := ppc_prtyw(e_in.read_data3);
|
||||
when OP_PRTY =>
|
||||
result := parity_result;
|
||||
result_en := '1';
|
||||
when OP_RLC | OP_RLCL | OP_RLCR | OP_SHL | OP_SHR =>
|
||||
result := rotator_result;
|
||||
@@ -506,11 +661,29 @@ begin
|
||||
when OP_ICBI =>
|
||||
icache_inval <= '1';
|
||||
|
||||
when others =>
|
||||
when OP_MUL_L64 | OP_MUL_H64 | OP_MUL_H32 =>
|
||||
v.e.valid := '0';
|
||||
v.mul_in_progress := '1';
|
||||
stall_out <= '1';
|
||||
x_to_multiply.valid <= '1';
|
||||
|
||||
when OP_DIV | OP_DIVE | OP_MOD =>
|
||||
v.e.valid := '0';
|
||||
v.div_in_progress := '1';
|
||||
stall_out <= '1';
|
||||
x_to_divider.valid <= '1';
|
||||
|
||||
when OP_LOAD | OP_STORE =>
|
||||
-- loadstore/dcache has its own port to writeback
|
||||
v.e.valid := '0';
|
||||
|
||||
when others =>
|
||||
terminate_out <= '1';
|
||||
report "illegal";
|
||||
end case;
|
||||
|
||||
v.e.rc := e_in.rc and e_in.valid;
|
||||
|
||||
-- Update LR on the next cycle after a branch link
|
||||
--
|
||||
-- WARNING: The LR update isn't tracked by our hazard tracker. This
|
||||
@@ -533,20 +706,74 @@ begin
|
||||
result_en := '1';
|
||||
result := r.next_lr;
|
||||
v.e.write_reg := fast_spr_num(SPR_LR);
|
||||
v.e.write_len := x"8";
|
||||
v.e.sign_extend := '0';
|
||||
v.e.valid := '1';
|
||||
elsif r.cntz_in_progress = '1' then
|
||||
-- cnt[lt]z always takes two cycles
|
||||
result := countzero_result;
|
||||
result_en := '1';
|
||||
v.e.write_reg := gpr_to_gspr(v.slow_op_dest);
|
||||
v.e.rc := v.slow_op_rc;
|
||||
v.e.xerc := v.slow_op_xerc;
|
||||
v.e.valid := '1';
|
||||
elsif r.mul_in_progress = '1' or r.div_in_progress = '1' then
|
||||
if (r.mul_in_progress = '1' and multiply_to_x.valid = '1') or
|
||||
(r.div_in_progress = '1' and divider_to_x.valid = '1') then
|
||||
if r.mul_in_progress = '1' then
|
||||
result := multiply_to_x.write_reg_data;
|
||||
overflow := multiply_to_x.overflow;
|
||||
else
|
||||
result := divider_to_x.write_reg_data;
|
||||
overflow := divider_to_x.overflow;
|
||||
end if;
|
||||
result_en := '1';
|
||||
v.e.write_reg := gpr_to_gspr(v.slow_op_dest);
|
||||
v.e.rc := v.slow_op_rc;
|
||||
v.e.xerc := v.slow_op_xerc;
|
||||
v.e.write_xerc_enable := v.slow_op_oe;
|
||||
-- We must test oe because the RC update code in writeback
|
||||
-- will use the xerc value to set CR0:SO so we must not clobber
|
||||
-- xerc if OE wasn't set.
|
||||
if v.slow_op_oe = '1' then
|
||||
v.e.xerc.ov := overflow;
|
||||
v.e.xerc.ov32 := overflow;
|
||||
v.e.xerc.so := v.slow_op_xerc.so or overflow;
|
||||
end if;
|
||||
v.e.valid := '1';
|
||||
else
|
||||
stall_out <= '1';
|
||||
v.mul_in_progress := r.mul_in_progress;
|
||||
v.div_in_progress := r.div_in_progress;
|
||||
end if;
|
||||
end if;
|
||||
|
||||
v.e.write_data := result;
|
||||
v.e.write_enable := result_en;
|
||||
v.e.rc := e_in.rc and e_in.valid;
|
||||
|
||||
-- Outputs to loadstore1 (async)
|
||||
lv := Execute1ToLoadstore1Init;
|
||||
if e_in.valid = '1' and (e_in.insn_type = OP_LOAD or e_in.insn_type = OP_STORE) then
|
||||
lv.valid := '1';
|
||||
end if;
|
||||
if e_in.insn_type = OP_LOAD then
|
||||
lv.load := '1';
|
||||
end if;
|
||||
lv.addr1 := a_in;
|
||||
lv.addr2 := b_in;
|
||||
lv.data := c_in;
|
||||
lv.write_reg := gspr_to_gpr(e_in.write_reg);
|
||||
lv.length := e_in.data_len;
|
||||
lv.byte_reverse := e_in.byte_reverse;
|
||||
lv.sign_extend := e_in.sign_extend;
|
||||
lv.update := e_in.update;
|
||||
lv.update_reg := gspr_to_gpr(e_in.read_reg1);
|
||||
lv.xerc := v.e.xerc;
|
||||
|
||||
-- Update registers
|
||||
rin <= v;
|
||||
|
||||
-- update outputs
|
||||
--f_out <= r.f;
|
||||
l_out <= lv;
|
||||
e_out <= r.e;
|
||||
flush_out <= f_out.redirect;
|
||||
end process;
|
||||
|
||||
@@ -12,18 +12,21 @@ entity gpr_hazard is
|
||||
|
||||
gpr_write_valid_in : in std_ulogic;
|
||||
gpr_write_in : in std_ulogic_vector(5 downto 0);
|
||||
bypass_avail : in std_ulogic;
|
||||
gpr_read_valid_in : in std_ulogic;
|
||||
gpr_read_in : in std_ulogic_vector(5 downto 0);
|
||||
|
||||
stall_out : out std_ulogic
|
||||
stall_out : out std_ulogic;
|
||||
use_bypass : out std_ulogic
|
||||
);
|
||||
end entity gpr_hazard;
|
||||
architecture behaviour of gpr_hazard is
|
||||
type pipeline_entry_type is record
|
||||
valid : std_ulogic;
|
||||
gpr : std_ulogic_vector(5 downto 0);
|
||||
valid : std_ulogic;
|
||||
bypass : std_ulogic;
|
||||
gpr : std_ulogic_vector(5 downto 0);
|
||||
end record;
|
||||
constant pipeline_entry_init : pipeline_entry_type := (valid => '0', gpr => (others => '0'));
|
||||
constant pipeline_entry_init : pipeline_entry_type := (valid => '0', bypass => '0', gpr => (others => '0'));
|
||||
|
||||
type pipeline_t is array(0 to PIPELINE_DEPTH-1) of pipeline_entry_type;
|
||||
constant pipeline_t_init : pipeline_t := (others => pipeline_entry_init);
|
||||
@@ -33,9 +36,7 @@ begin
|
||||
gpr_hazard0: process(clk)
|
||||
begin
|
||||
if rising_edge(clk) then
|
||||
if stall_in = '0' then
|
||||
r <= rin;
|
||||
end if;
|
||||
r <= rin;
|
||||
end if;
|
||||
end process;
|
||||
|
||||
@@ -45,22 +46,49 @@ begin
|
||||
v := r;
|
||||
|
||||
stall_out <= '0';
|
||||
loop_0: for i in 0 to PIPELINE_DEPTH-1 loop
|
||||
if ((r(i).valid = gpr_read_valid_in) and r(i).gpr = gpr_read_in) then
|
||||
stall_out <= '1';
|
||||
use_bypass <= '0';
|
||||
if gpr_read_valid_in = '1' then
|
||||
if r(0).valid = '1' and r(0).gpr = gpr_read_in then
|
||||
if r(0).bypass = '1' and stall_in = '0' then
|
||||
use_bypass <= '1';
|
||||
else
|
||||
stall_out <= '1';
|
||||
end if;
|
||||
end if;
|
||||
end loop;
|
||||
loop_0: for i in 1 to PIPELINE_DEPTH-1 loop
|
||||
if r(i).valid = '1' and r(i).gpr = gpr_read_in then
|
||||
if r(i).bypass = '1' then
|
||||
use_bypass <= '1';
|
||||
else
|
||||
stall_out <= '1';
|
||||
end if;
|
||||
end if;
|
||||
end loop;
|
||||
end if;
|
||||
|
||||
v(0).valid := gpr_write_valid_in;
|
||||
v(0).gpr := gpr_write_in;
|
||||
loop_1: for i in 0 to PIPELINE_DEPTH-2 loop
|
||||
-- propagate to next slot
|
||||
v(i+1) := r(i);
|
||||
end loop;
|
||||
if stall_in = '0' then
|
||||
v(0).valid := gpr_write_valid_in;
|
||||
v(0).bypass := bypass_avail;
|
||||
v(0).gpr := gpr_write_in;
|
||||
loop_1: for i in 1 to PIPELINE_DEPTH-1 loop
|
||||
-- propagate to next slot
|
||||
v(i).valid := r(i-1).valid;
|
||||
v(i).bypass := r(i-1).bypass;
|
||||
v(i).gpr := r(i-1).gpr;
|
||||
end loop;
|
||||
|
||||
-- asynchronous output
|
||||
if gpr_read_valid_in = '0' then
|
||||
stall_out <= '0';
|
||||
else
|
||||
-- stage 0 stalled, so stage 1 becomes empty
|
||||
loop_1b: for i in 1 to PIPELINE_DEPTH-1 loop
|
||||
-- propagate to next slot
|
||||
if i = 1 then
|
||||
v(i).valid := '0';
|
||||
else
|
||||
v(i).valid := r(i-1).valid;
|
||||
v(i).bypass := r(i-1).bypass;
|
||||
v(i).gpr := r(i-1).gpr;
|
||||
end if;
|
||||
end loop;
|
||||
end if;
|
||||
|
||||
-- update registers
|
||||
|
||||
@@ -13,7 +13,7 @@ entity loadstore1 is
|
||||
port (
|
||||
clk : in std_ulogic;
|
||||
|
||||
l_in : in Decode2ToLoadstore1Type;
|
||||
l_in : in Execute1ToLoadstore1Type;
|
||||
|
||||
l_out : out Loadstore1ToDcacheType
|
||||
);
|
||||
|
||||
60
logical.vhdl
60
logical.vhdl
@@ -12,11 +12,29 @@ entity logical is
|
||||
op : in insn_type_t;
|
||||
invert_in : in std_ulogic;
|
||||
invert_out : in std_ulogic;
|
||||
result : out std_ulogic_vector(63 downto 0)
|
||||
result : out std_ulogic_vector(63 downto 0);
|
||||
datalen : in std_logic_vector(3 downto 0);
|
||||
popcnt : out std_ulogic_vector(63 downto 0);
|
||||
parity : out std_ulogic_vector(63 downto 0)
|
||||
);
|
||||
end entity logical;
|
||||
|
||||
architecture behaviour of logical is
|
||||
|
||||
subtype twobit is unsigned(1 downto 0);
|
||||
type twobit32 is array(0 to 31) of twobit;
|
||||
signal pc2 : twobit32;
|
||||
subtype threebit is unsigned(2 downto 0);
|
||||
type threebit16 is array(0 to 15) of threebit;
|
||||
signal pc4 : threebit16;
|
||||
subtype fourbit is unsigned(3 downto 0);
|
||||
type fourbit8 is array(0 to 7) of fourbit;
|
||||
signal pc8 : fourbit8;
|
||||
subtype sixbit is unsigned(5 downto 0);
|
||||
type sixbit2 is array(0 to 1) of sixbit;
|
||||
signal pc32 : sixbit2;
|
||||
signal par0, par1 : std_ulogic;
|
||||
|
||||
begin
|
||||
logical_0: process(all)
|
||||
variable rb_adj, tmp : std_ulogic_vector(63 downto 0);
|
||||
@@ -40,5 +58,45 @@ begin
|
||||
result <= not tmp;
|
||||
end if;
|
||||
|
||||
-- population counts
|
||||
for i in 0 to 31 loop
|
||||
pc2(i) <= unsigned("0" & rs(i * 2 downto i * 2)) + unsigned("0" & rs(i * 2 + 1 downto i * 2 + 1));
|
||||
end loop;
|
||||
for i in 0 to 15 loop
|
||||
pc4(i) <= ('0' & pc2(i * 2)) + ('0' & pc2(i * 2 + 1));
|
||||
end loop;
|
||||
for i in 0 to 7 loop
|
||||
pc8(i) <= ('0' & pc4(i * 2)) + ('0' & pc4(i * 2 + 1));
|
||||
end loop;
|
||||
for i in 0 to 1 loop
|
||||
pc32(i) <= ("00" & pc8(i * 4)) + ("00" & pc8(i * 4 + 1)) +
|
||||
("00" & pc8(i * 4 + 2)) + ("00" & pc8(i * 4 + 3));
|
||||
end loop;
|
||||
popcnt <= (others => '0');
|
||||
if datalen(3 downto 2) = "00" then
|
||||
-- popcntb
|
||||
for i in 0 to 7 loop
|
||||
popcnt(i * 8 + 3 downto i * 8) <= std_ulogic_vector(pc8(i));
|
||||
end loop;
|
||||
elsif datalen(3) = '0' then
|
||||
-- popcntw
|
||||
for i in 0 to 1 loop
|
||||
popcnt(i * 32 + 5 downto i * 32) <= std_ulogic_vector(pc32(i));
|
||||
end loop;
|
||||
else
|
||||
popcnt(6 downto 0) <= std_ulogic_vector(('0' & pc32(0)) + ('0' & pc32(1)));
|
||||
end if;
|
||||
|
||||
-- parity calculations
|
||||
par0 <= rs(0) xor rs(8) xor rs(16) xor rs(24);
|
||||
par1 <= rs(32) xor rs(40) xor rs(48) xor rs(56);
|
||||
parity <= (others => '0');
|
||||
if datalen(3) = '1' then
|
||||
parity(0) <= par0 xor par1;
|
||||
else
|
||||
parity(0) <= par0;
|
||||
parity(32) <= par1;
|
||||
end if;
|
||||
|
||||
end process;
|
||||
end behaviour;
|
||||
|
||||
@@ -13,31 +13,24 @@ entity multiply is
|
||||
port (
|
||||
clk : in std_logic;
|
||||
|
||||
m_in : in Decode2ToMultiplyType;
|
||||
m_out : out MultiplyToWritebackType
|
||||
m_in : in Execute1ToMultiplyType;
|
||||
m_out : out MultiplyToExecute1Type
|
||||
);
|
||||
end entity multiply;
|
||||
|
||||
architecture behaviour of multiply is
|
||||
signal m: Decode2ToMultiplyType;
|
||||
signal m: Execute1ToMultiplyType;
|
||||
|
||||
type multiply_pipeline_stage is record
|
||||
valid : std_ulogic;
|
||||
insn_type : insn_type_t;
|
||||
data : signed(129 downto 0);
|
||||
write_reg : std_ulogic_vector(4 downto 0);
|
||||
rc : std_ulogic;
|
||||
oe : std_ulogic;
|
||||
is_32bit : std_ulogic;
|
||||
xerc : xer_common_t;
|
||||
end record;
|
||||
constant MultiplyPipelineStageInit : multiply_pipeline_stage := (valid => '0',
|
||||
insn_type => OP_ILLEGAL,
|
||||
rc => '0', oe => '0',
|
||||
is_32bit => '0',
|
||||
xerc => xerc_init,
|
||||
data => (others => '0'),
|
||||
others => (others => '0'));
|
||||
data => (others => '0'));
|
||||
|
||||
type multiply_pipeline_type is array(0 to PIPELINE_DEPTH-1) of multiply_pipeline_stage;
|
||||
constant MultiplyPipelineInit : multiply_pipeline_type := (others => MultiplyPipelineStageInit);
|
||||
@@ -64,16 +57,12 @@ begin
|
||||
begin
|
||||
v := r;
|
||||
|
||||
m_out <= MultiplyToWritebackInit;
|
||||
m_out <= MultiplyToExecute1Init;
|
||||
|
||||
v.multiply_pipeline(0).valid := m.valid;
|
||||
v.multiply_pipeline(0).insn_type := m.insn_type;
|
||||
v.multiply_pipeline(0).data := signed(m.data1) * signed(m.data2);
|
||||
v.multiply_pipeline(0).write_reg := m.write_reg;
|
||||
v.multiply_pipeline(0).rc := m.rc;
|
||||
v.multiply_pipeline(0).oe := m.oe;
|
||||
v.multiply_pipeline(0).is_32bit := m.is_32bit;
|
||||
v.multiply_pipeline(0).xerc := m.xerc;
|
||||
|
||||
loop_0: for i in 1 to PIPELINE_DEPTH-1 loop
|
||||
v.multiply_pipeline(i) := r.multiply_pipeline(i-1);
|
||||
@@ -101,25 +90,10 @@ begin
|
||||
end case;
|
||||
|
||||
m_out.write_reg_data <= d2;
|
||||
m_out.write_reg_nr <= v.multiply_pipeline(PIPELINE_DEPTH-1).write_reg;
|
||||
m_out.xerc <= v.multiply_pipeline(PIPELINE_DEPTH-1).xerc;
|
||||
m_out.overflow <= ov;
|
||||
|
||||
-- Generate OV/OV32/SO when OE=1
|
||||
if v.multiply_pipeline(PIPELINE_DEPTH-1).valid = '1' then
|
||||
m_out.valid <= '1';
|
||||
m_out.write_reg_enable <= '1';
|
||||
m_out.rc <= v.multiply_pipeline(PIPELINE_DEPTH-1).rc;
|
||||
m_out.write_xerc_enable <= v.multiply_pipeline(PIPELINE_DEPTH-1).oe;
|
||||
|
||||
-- We must test oe because the RC update code in writeback
|
||||
-- will use the xerc value to set CR0:SO so we must not clobber
|
||||
-- xerc if OE wasn't set.
|
||||
--
|
||||
if v.multiply_pipeline(PIPELINE_DEPTH-1).oe = '1' then
|
||||
m_out.xerc.ov <= ov;
|
||||
m_out.xerc.ov32 <= ov;
|
||||
m_out.xerc.so <= v.multiply_pipeline(PIPELINE_DEPTH-1).xerc.so or ov;
|
||||
end if;
|
||||
end if;
|
||||
|
||||
rin <= v;
|
||||
|
||||
@@ -17,8 +17,8 @@ architecture behave of multiply_tb is
|
||||
|
||||
constant pipeline_depth : integer := 4;
|
||||
|
||||
signal m1 : Decode2ToMultiplyType;
|
||||
signal m2 : MultiplyToWritebackType;
|
||||
signal m1 : Execute1ToMultiplyType;
|
||||
signal m2 : MultiplyToExecute1Type;
|
||||
begin
|
||||
multiply_0: entity work.multiply
|
||||
generic map (PIPELINE_DEPTH => pipeline_depth)
|
||||
@@ -40,10 +40,8 @@ begin
|
||||
|
||||
m1.valid <= '1';
|
||||
m1.insn_type <= OP_MUL_L64;
|
||||
m1.write_reg <= "10001";
|
||||
m1.data1 <= '0' & x"0000000000001000";
|
||||
m1.data2 <= '0' & x"0000000000001111";
|
||||
m1.rc <= '0';
|
||||
|
||||
wait for clk_period;
|
||||
assert m2.valid = '0';
|
||||
@@ -58,16 +56,12 @@ begin
|
||||
|
||||
wait for clk_period;
|
||||
assert m2.valid = '1';
|
||||
assert m2.write_reg_enable = '1';
|
||||
assert m2.write_reg_nr = "10001";
|
||||
assert m2.write_reg_data = x"0000000001111000";
|
||||
assert m2.rc = '0';
|
||||
|
||||
wait for clk_period;
|
||||
assert m2.valid = '0';
|
||||
|
||||
m1.valid <= '1';
|
||||
m1.rc <= '1';
|
||||
|
||||
wait for clk_period;
|
||||
assert m2.valid = '0';
|
||||
@@ -76,10 +70,7 @@ begin
|
||||
|
||||
wait for clk_period * (pipeline_depth-1);
|
||||
assert m2.valid = '1';
|
||||
assert m2.write_reg_enable = '1';
|
||||
assert m2.write_reg_nr = "10001";
|
||||
assert m2.write_reg_data = x"0000000001111000";
|
||||
assert m2.rc = '1';
|
||||
|
||||
-- test mulld
|
||||
mulld_loop : for i in 0 to 1000 loop
|
||||
|
||||
@@ -12,8 +12,6 @@ entity writeback is
|
||||
|
||||
e_in : in Execute1ToWritebackType;
|
||||
l_in : in DcacheToWritebackType;
|
||||
m_in : in MultiplyToWritebackType;
|
||||
d_in : in DividerToWritebackType;
|
||||
|
||||
w_out : out WritebackToRegisterFileType;
|
||||
c_out : out WritebackToCrFileType;
|
||||
@@ -44,7 +42,6 @@ architecture behaviour of writeback is
|
||||
signal sign_extend : std_ulogic;
|
||||
signal negative : std_ulogic;
|
||||
signal second_word : std_ulogic;
|
||||
signal zero : std_ulogic;
|
||||
begin
|
||||
writeback_0: process(clk)
|
||||
begin
|
||||
@@ -64,44 +61,32 @@ begin
|
||||
variable k : unsigned(3 downto 0);
|
||||
variable cf: std_ulogic_vector(3 downto 0);
|
||||
variable xe: xer_common_t;
|
||||
variable zero : std_ulogic;
|
||||
variable sign : std_ulogic;
|
||||
begin
|
||||
x := "" & e_in.valid;
|
||||
y := "" & l_in.valid;
|
||||
z := "" & m_in.valid;
|
||||
w := "" & d_in.valid;
|
||||
assert (to_integer(unsigned(x)) + to_integer(unsigned(y)) + to_integer(unsigned(z)) + to_integer(unsigned(w))) <= 1 severity failure;
|
||||
assert (to_integer(unsigned(x)) + to_integer(unsigned(y))) <= 1 severity failure;
|
||||
|
||||
x := "" & e_in.write_enable;
|
||||
y := "" & l_in.write_enable;
|
||||
z := "" & m_in.write_reg_enable;
|
||||
w := "" & d_in.write_reg_enable;
|
||||
assert (to_integer(unsigned(x)) + to_integer(unsigned(y)) + to_integer(unsigned(z)) + to_integer(unsigned(w))) <= 1 severity failure;
|
||||
assert (to_integer(unsigned(x)) + to_integer(unsigned(y))) <= 1 severity failure;
|
||||
|
||||
w := "" & e_in.write_cr_enable;
|
||||
x := "" & (e_in.write_enable and e_in.rc);
|
||||
y := "" & (m_in.valid and m_in.rc);
|
||||
z := "" & (d_in.valid and d_in.rc);
|
||||
assert (to_integer(unsigned(w)) + to_integer(unsigned(x)) + to_integer(unsigned(y)) + to_integer(unsigned(z))) <= 1 severity failure;
|
||||
|
||||
x := "" & e_in.write_xerc_enable;
|
||||
y := "" & m_in.write_xerc_enable;
|
||||
z := "" & D_in.write_xerc_enable;
|
||||
assert (to_integer(unsigned(x)) + to_integer(unsigned(y)) + to_integer(unsigned(z))) <= 1 severity failure;
|
||||
assert (to_integer(unsigned(w)) + to_integer(unsigned(x))) <= 1 severity failure;
|
||||
|
||||
w_out <= WritebackToRegisterFileInit;
|
||||
c_out <= WritebackToCrFileInit;
|
||||
|
||||
complete_out <= '0';
|
||||
if e_in.valid = '1' or l_in.valid = '1' or m_in.valid = '1' or d_in.valid = '1' then
|
||||
if e_in.valid = '1' or l_in.valid = '1' then
|
||||
complete_out <= '1';
|
||||
end if;
|
||||
|
||||
rc <= '0';
|
||||
brev_lenm1 <= "000";
|
||||
byte_offset <= "000";
|
||||
data_len <= x"8";
|
||||
partial_write <= '0';
|
||||
sign_extend <= '0';
|
||||
second_word <= '0';
|
||||
xe := e_in.xerc;
|
||||
data_in <= (others => '0');
|
||||
@@ -109,9 +94,6 @@ begin
|
||||
if e_in.write_enable = '1' then
|
||||
w_out.write_reg <= e_in.write_reg;
|
||||
w_out.write_enable <= '1';
|
||||
data_in <= e_in.write_data;
|
||||
data_len <= unsigned(e_in.write_len);
|
||||
sign_extend <= e_in.sign_extend;
|
||||
rc <= e_in.rc;
|
||||
end if;
|
||||
|
||||
@@ -126,12 +108,11 @@ begin
|
||||
c_out.write_xerc_data <= e_in.xerc;
|
||||
end if;
|
||||
|
||||
sign_extend <= l_in.sign_extend;
|
||||
data_len <= unsigned(l_in.write_len);
|
||||
byte_offset <= unsigned(l_in.write_shift);
|
||||
if l_in.write_enable = '1' then
|
||||
w_out.write_reg <= gpr_to_gspr(l_in.write_reg);
|
||||
data_in <= l_in.write_data;
|
||||
data_len <= unsigned(l_in.write_len);
|
||||
byte_offset <= unsigned(l_in.write_shift);
|
||||
sign_extend <= l_in.sign_extend;
|
||||
if l_in.byte_reverse = '1' then
|
||||
brev_lenm1 <= unsigned(l_in.write_len(2 downto 0)) - 1;
|
||||
end if;
|
||||
@@ -143,32 +124,6 @@ begin
|
||||
xe := l_in.xerc;
|
||||
end if;
|
||||
|
||||
if m_in.write_reg_enable = '1' then
|
||||
w_out.write_enable <= '1';
|
||||
w_out.write_reg <= gpr_to_gspr(m_in.write_reg_nr);
|
||||
data_in <= m_in.write_reg_data;
|
||||
rc <= m_in.rc;
|
||||
xe := m_in.xerc;
|
||||
end if;
|
||||
|
||||
if m_in.write_xerc_enable = '1' then
|
||||
c_out.write_xerc_enable <= '1';
|
||||
c_out.write_xerc_data <= m_in.xerc;
|
||||
end if;
|
||||
|
||||
if d_in.write_reg_enable = '1' then
|
||||
w_out.write_enable <= '1';
|
||||
w_out.write_reg <= gpr_to_gspr(d_in.write_reg_nr);
|
||||
data_in <= d_in.write_reg_data;
|
||||
rc <= d_in.rc;
|
||||
xe := d_in.xerc;
|
||||
end if;
|
||||
|
||||
if d_in.write_xerc_enable = '1' then
|
||||
c_out.write_xerc_enable <= '1';
|
||||
c_out.write_xerc_data <= d_in.xerc;
|
||||
end if;
|
||||
|
||||
-- shift and byte-reverse data bytes
|
||||
for i in 0 to 7 loop
|
||||
k := ('0' & (to_unsigned(i, 3) xor brev_lenm1)) + ('0' & byte_offset);
|
||||
@@ -177,7 +132,7 @@ begin
|
||||
end loop;
|
||||
for i in 0 to 7 loop
|
||||
j := to_integer(perm(i)) * 8;
|
||||
data_permuted(i * 8 + 7 downto i * 8) <= data_in(j + 7 downto j);
|
||||
data_permuted(i * 8 + 7 downto i * 8) <= l_in.write_data(j + 7 downto j);
|
||||
end loop;
|
||||
|
||||
-- If the data can arrive split over two cycles, this will be correct
|
||||
@@ -199,16 +154,12 @@ begin
|
||||
trim_ctl(i) <= '0' & (negative and sign_extend);
|
||||
end if;
|
||||
end loop;
|
||||
zero <= not negative;
|
||||
for i in 0 to 7 loop
|
||||
case trim_ctl(i) is
|
||||
when "11" =>
|
||||
data_trimmed(i * 8 + 7 downto i * 8) <= data_latched(i * 8 + 7 downto i * 8);
|
||||
when "10" =>
|
||||
data_trimmed(i * 8 + 7 downto i * 8) <= data_permuted(i * 8 + 7 downto i * 8);
|
||||
if or data_permuted(i * 8 + 7 downto i * 8) /= '0' then
|
||||
zero <= '0';
|
||||
end if;
|
||||
when "01" =>
|
||||
data_trimmed(i * 8 + 7 downto i * 8) <= x"FF";
|
||||
when others =>
|
||||
@@ -217,14 +168,21 @@ begin
|
||||
end loop;
|
||||
|
||||
-- deliver to regfile
|
||||
w_out.write_data <= data_trimmed;
|
||||
if l_in.write_enable = '1' then
|
||||
w_out.write_data <= data_trimmed;
|
||||
else
|
||||
w_out.write_data <= e_in.write_data;
|
||||
end if;
|
||||
|
||||
-- Perform CR0 update for RC forms
|
||||
-- Note that loads never have a form with an RC bit, therefore this can test e_in.write_data
|
||||
if rc = '1' then
|
||||
sign := e_in.write_data(63);
|
||||
zero := not (or e_in.write_data);
|
||||
c_out.write_cr_enable <= '1';
|
||||
c_out.write_cr_mask <= num_to_fxm(0);
|
||||
cf(3) := negative;
|
||||
cf(2) := not negative and not zero;
|
||||
cf(3) := sign;
|
||||
cf(2) := not sign and not zero;
|
||||
cf(1) := zero;
|
||||
cf(0) := xe.so;
|
||||
c_out.write_cr_data(31 downto 28) <= cf;
|
||||
|
||||
Reference in New Issue
Block a user