mirror of
https://github.com/antonblanchard/microwatt.git
synced 2026-02-27 00:59:41 +00:00
Merge pull request #208 from paulusmack/faster
Make the core go faster Several major improvements in here: - Simple branch predictor - Reduced latency for mispredicted branches and interrupts by removing fetch2 stage - Cache improvements o Request critical dword first on refill o Handle hits while refilling, including on line being refilled o Sizes doubled for both D and I - Loadstore improvements: can now do one load or store every two cycles in most cases - Optimized 2-cycle multiplier for Xilinx 7-series parts using DSP slices - Timing improvements, including: o Stash buffer in decode1 o Reduced width of execute1 result mux o Improved SPR decode in decode1 o Some non-critical operation take a cycle longer so we can break some long combinatorial chains - Core logging: logs 256 bits of info every cycle into a ring buffer, to help with debugging and performance analysis This increases the LUT usage for the "synth" + A35 target from 9182 to 10297 = 12%.
This commit is contained in:
2
Makefile
2
Makefile
@@ -42,7 +42,7 @@ all = core_tb icache_tb dcache_tb multiply_tb dmi_dtm_tb divider_tb \
|
||||
all: $(all)
|
||||
|
||||
core_files = decode_types.vhdl common.vhdl wishbone_types.vhdl fetch1.vhdl \
|
||||
fetch2.vhdl utils.vhdl plru.vhdl cache_ram.vhdl icache.vhdl \
|
||||
utils.vhdl plru.vhdl cache_ram.vhdl icache.vhdl \
|
||||
decode1.vhdl helpers.vhdl insn_helpers.vhdl gpr_hazard.vhdl \
|
||||
cr_hazard.vhdl control.vhdl decode2.vhdl register_file.vhdl \
|
||||
cr_file.vhdl crhelpers.vhdl ppc_fx_insns.vhdl rotator.vhdl \
|
||||
|
||||
39
common.vhdl
39
common.vhdl
@@ -93,10 +93,11 @@ package common is
|
||||
virt_mode : std_ulogic;
|
||||
priv_mode : std_ulogic;
|
||||
stop_mark: std_ulogic;
|
||||
sequential: std_ulogic;
|
||||
nia: std_ulogic_vector(63 downto 0);
|
||||
end record;
|
||||
|
||||
type IcacheToFetch2Type is record
|
||||
type IcacheToDecode1Type is record
|
||||
valid: std_ulogic;
|
||||
stop_mark: std_ulogic;
|
||||
fetch_failed: std_ulogic;
|
||||
@@ -104,16 +105,6 @@ package common is
|
||||
insn: std_ulogic_vector(31 downto 0);
|
||||
end record;
|
||||
|
||||
type Fetch2ToDecode1Type is record
|
||||
valid: std_ulogic;
|
||||
stop_mark : std_ulogic;
|
||||
fetch_failed: std_ulogic;
|
||||
nia: std_ulogic_vector(63 downto 0);
|
||||
insn: std_ulogic_vector(31 downto 0);
|
||||
end record;
|
||||
constant Fetch2ToDecode1Init : Fetch2ToDecode1Type := (valid => '0', stop_mark => '0', fetch_failed => '0',
|
||||
nia => (others => '0'), insn => (others => '0'));
|
||||
|
||||
type Decode1ToDecode2Type is record
|
||||
valid: std_ulogic;
|
||||
stop_mark : std_ulogic;
|
||||
@@ -122,8 +113,16 @@ package common is
|
||||
ispr1: gspr_index_t; -- (G)SPR used for branch condition (CTR) or mfspr
|
||||
ispr2: gspr_index_t; -- (G)SPR used for branch target (CTR, LR, TAR)
|
||||
decode: decode_rom_t;
|
||||
br_pred: std_ulogic; -- Branch was predicted to be taken
|
||||
end record;
|
||||
constant Decode1ToDecode2Init : Decode1ToDecode2Type :=
|
||||
(valid => '0', stop_mark => '0', nia => (others => '0'), insn => (others => '0'),
|
||||
ispr1 => (others => '0'), ispr2 => (others => '0'), decode => decode_rom_init, br_pred => '0');
|
||||
|
||||
type Decode1ToFetch1Type is record
|
||||
redirect : std_ulogic;
|
||||
redirect_nia : std_ulogic_vector(63 downto 0);
|
||||
end record;
|
||||
constant Decode1ToDecode2Init : Decode1ToDecode2Type := (valid => '0', stop_mark => '0', nia => (others => '0'), insn => (others => '0'), ispr1 => (others => '0'), ispr2 => (others => '0'), decode => decode_rom_init);
|
||||
|
||||
type Decode2ToExecute1Type is record
|
||||
valid: std_ulogic;
|
||||
@@ -158,23 +157,24 @@ package common is
|
||||
sign_extend : std_ulogic; -- do we need to sign extend?
|
||||
update : std_ulogic; -- is this an update instruction?
|
||||
reserve : std_ulogic; -- set for larx/stcx
|
||||
br_pred : std_ulogic;
|
||||
end record;
|
||||
constant Decode2ToExecute1Init : Decode2ToExecute1Type :=
|
||||
(valid => '0', unit => NONE, insn_type => OP_ILLEGAL, bypass_data1 => '0', bypass_data2 => '0', bypass_data3 => '0',
|
||||
lr => '0', rc => '0', oe => '0', invert_a => '0',
|
||||
invert_out => '0', input_carry => ZERO, output_carry => '0', input_cr => '0', output_cr => '0',
|
||||
is_32bit => '0', is_signed => '0', xerc => xerc_init, reserve => '0',
|
||||
is_32bit => '0', is_signed => '0', xerc => xerc_init, reserve => '0', br_pred => '0',
|
||||
byte_reverse => '0', sign_extend => '0', update => '0', nia => (others => '0'), read_data1 => (others => '0'), read_data2 => (others => '0'), read_data3 => (others => '0'), cr => (others => '0'), insn => (others => '0'), data_len => (others => '0'), others => (others => '0'));
|
||||
|
||||
type Execute1ToMultiplyType is record
|
||||
valid: std_ulogic;
|
||||
insn_type: insn_type_t;
|
||||
data1: std_ulogic_vector(64 downto 0);
|
||||
data2: std_ulogic_vector(64 downto 0);
|
||||
data1: std_ulogic_vector(63 downto 0);
|
||||
data2: std_ulogic_vector(63 downto 0);
|
||||
is_32bit: std_ulogic;
|
||||
neg_result: std_ulogic;
|
||||
end record;
|
||||
constant Execute1ToMultiplyInit : Execute1ToMultiplyType := (valid => '0', insn_type => OP_ILLEGAL,
|
||||
is_32bit => '0',
|
||||
constant Execute1ToMultiplyInit : Execute1ToMultiplyType := (valid => '0',
|
||||
is_32bit => '0', neg_result => '0',
|
||||
others => (others => '0'));
|
||||
|
||||
type Execute1ToDividerType is record
|
||||
@@ -253,6 +253,7 @@ package common is
|
||||
others => (others => '0'));
|
||||
|
||||
type Loadstore1ToExecute1Type is record
|
||||
busy : std_ulogic;
|
||||
exception : std_ulogic;
|
||||
invalid : std_ulogic;
|
||||
perm_error : std_ulogic;
|
||||
@@ -366,7 +367,7 @@ package common is
|
||||
|
||||
type MultiplyToExecute1Type is record
|
||||
valid: std_ulogic;
|
||||
write_reg_data: std_ulogic_vector(63 downto 0);
|
||||
result: std_ulogic_vector(127 downto 0);
|
||||
overflow : std_ulogic;
|
||||
end record;
|
||||
constant MultiplyToExecute1Init : MultiplyToExecute1Type := (valid => '0', overflow => '0',
|
||||
|
||||
61
control.vhdl
61
control.vhdl
@@ -15,7 +15,8 @@ entity control is
|
||||
complete_in : in std_ulogic;
|
||||
valid_in : in std_ulogic;
|
||||
flush_in : in std_ulogic;
|
||||
stall_in : in std_ulogic;
|
||||
busy_in : in std_ulogic;
|
||||
deferred : in std_ulogic;
|
||||
sgl_pipe_in : in std_ulogic;
|
||||
stop_mark_in : in std_ulogic;
|
||||
|
||||
@@ -23,6 +24,9 @@ entity control is
|
||||
gpr_write_in : in gspr_index_t;
|
||||
gpr_bypassable : in std_ulogic;
|
||||
|
||||
update_gpr_write_valid : in std_ulogic;
|
||||
update_gpr_write_reg : in gspr_index_t;
|
||||
|
||||
gpr_a_read_valid_in : in std_ulogic;
|
||||
gpr_a_read_in : in gspr_index_t;
|
||||
|
||||
@@ -72,7 +76,11 @@ begin
|
||||
)
|
||||
port map (
|
||||
clk => clk,
|
||||
stall_in => stall_in,
|
||||
busy_in => busy_in,
|
||||
deferred => deferred,
|
||||
complete_in => complete_in,
|
||||
flush_in => flush_in,
|
||||
issuing => valid_out,
|
||||
|
||||
gpr_write_valid_in => gpr_write_valid,
|
||||
gpr_write_in => gpr_write_in,
|
||||
@@ -80,6 +88,9 @@ begin
|
||||
gpr_read_valid_in => gpr_a_read_valid_in,
|
||||
gpr_read_in => gpr_a_read_in,
|
||||
|
||||
ugpr_write_valid => update_gpr_write_valid,
|
||||
ugpr_write_reg => update_gpr_write_reg,
|
||||
|
||||
stall_out => stall_a_out,
|
||||
use_bypass => gpr_bypass_a
|
||||
);
|
||||
@@ -90,7 +101,11 @@ begin
|
||||
)
|
||||
port map (
|
||||
clk => clk,
|
||||
stall_in => stall_in,
|
||||
busy_in => busy_in,
|
||||
deferred => deferred,
|
||||
complete_in => complete_in,
|
||||
flush_in => flush_in,
|
||||
issuing => valid_out,
|
||||
|
||||
gpr_write_valid_in => gpr_write_valid,
|
||||
gpr_write_in => gpr_write_in,
|
||||
@@ -98,6 +113,9 @@ begin
|
||||
gpr_read_valid_in => gpr_b_read_valid_in,
|
||||
gpr_read_in => gpr_b_read_in,
|
||||
|
||||
ugpr_write_valid => update_gpr_write_valid,
|
||||
ugpr_write_reg => update_gpr_write_reg,
|
||||
|
||||
stall_out => stall_b_out,
|
||||
use_bypass => gpr_bypass_b
|
||||
);
|
||||
@@ -110,7 +128,11 @@ begin
|
||||
)
|
||||
port map (
|
||||
clk => clk,
|
||||
stall_in => stall_in,
|
||||
busy_in => busy_in,
|
||||
deferred => deferred,
|
||||
complete_in => complete_in,
|
||||
flush_in => flush_in,
|
||||
issuing => valid_out,
|
||||
|
||||
gpr_write_valid_in => gpr_write_valid,
|
||||
gpr_write_in => gpr_write_in,
|
||||
@@ -118,6 +140,9 @@ begin
|
||||
gpr_read_valid_in => gpr_c_read_valid_in,
|
||||
gpr_read_in => gpr_c_read_in_fmt,
|
||||
|
||||
ugpr_write_valid => update_gpr_write_valid,
|
||||
ugpr_write_reg => update_gpr_write_reg,
|
||||
|
||||
stall_out => stall_c_out,
|
||||
use_bypass => gpr_bypass_c
|
||||
);
|
||||
@@ -128,7 +153,11 @@ begin
|
||||
)
|
||||
port map (
|
||||
clk => clk,
|
||||
stall_in => stall_in,
|
||||
busy_in => busy_in,
|
||||
deferred => deferred,
|
||||
complete_in => complete_in,
|
||||
flush_in => flush_in,
|
||||
issuing => valid_out,
|
||||
|
||||
cr_read_in => cr_read_in,
|
||||
cr_write_in => cr_write_valid,
|
||||
@@ -139,7 +168,8 @@ begin
|
||||
control0: process(clk)
|
||||
begin
|
||||
if rising_edge(clk) then
|
||||
assert r_int.outstanding >= 0 and r_int.outstanding <= (PIPELINE_DEPTH+1) report "Outstanding bad " & integer'image(r_int.outstanding) severity failure;
|
||||
assert rin_int.outstanding >= 0 and rin_int.outstanding <= (PIPELINE_DEPTH+1)
|
||||
report "Outstanding bad " & integer'image(rin_int.outstanding) severity failure;
|
||||
r_int <= rin_int;
|
||||
end if;
|
||||
end process;
|
||||
@@ -152,17 +182,18 @@ begin
|
||||
v_int := r_int;
|
||||
|
||||
-- asynchronous
|
||||
valid_tmp := valid_in and not flush_in and not stall_in;
|
||||
stall_tmp := stall_in;
|
||||
valid_tmp := valid_in and not flush_in;
|
||||
stall_tmp := '0';
|
||||
|
||||
if complete_in = '1' then
|
||||
if flush_in = '1' then
|
||||
-- expect to see complete_in next cycle
|
||||
v_int.outstanding := 1;
|
||||
elsif complete_in = '1' then
|
||||
v_int.outstanding := r_int.outstanding - 1;
|
||||
end if;
|
||||
|
||||
if rst = '1' then
|
||||
v_int.state := IDLE;
|
||||
v_int.outstanding := 0;
|
||||
stall_tmp := '0';
|
||||
v_int := reg_internal_init;
|
||||
valid_tmp := '0';
|
||||
end if;
|
||||
|
||||
@@ -227,7 +258,9 @@ begin
|
||||
end if;
|
||||
|
||||
if valid_tmp = '1' then
|
||||
v_int.outstanding := v_int.outstanding + 1;
|
||||
if deferred = '0' then
|
||||
v_int.outstanding := v_int.outstanding + 1;
|
||||
end if;
|
||||
gpr_write_valid <= gpr_write_valid_in;
|
||||
cr_write_valid <= cr_write_in;
|
||||
else
|
||||
@@ -237,7 +270,7 @@ begin
|
||||
|
||||
-- update outputs
|
||||
valid_out <= valid_tmp;
|
||||
stall_out <= stall_tmp;
|
||||
stall_out <= stall_tmp or deferred;
|
||||
|
||||
-- update registers
|
||||
rin_int <= v_int;
|
||||
|
||||
103
core.vhdl
103
core.vhdl
@@ -11,7 +11,8 @@ entity core is
|
||||
SIM : boolean := false;
|
||||
DISABLE_FLATTEN : boolean := false;
|
||||
EX1_BYPASS : boolean := true;
|
||||
ALT_RESET_ADDRESS : std_ulogic_vector(63 downto 0) := (others => '0')
|
||||
ALT_RESET_ADDRESS : std_ulogic_vector(63 downto 0) := (others => '0');
|
||||
LOG_LENGTH : natural := 512
|
||||
);
|
||||
port (
|
||||
clk : in std_ulogic;
|
||||
@@ -41,16 +42,14 @@ entity core is
|
||||
end core;
|
||||
|
||||
architecture behave of core is
|
||||
-- fetch signals
|
||||
signal fetch2_to_decode1: Fetch2ToDecode1Type;
|
||||
|
||||
-- icache signals
|
||||
signal fetch1_to_icache : Fetch1ToIcacheType;
|
||||
signal icache_to_fetch2 : IcacheToFetch2Type;
|
||||
signal icache_to_decode1 : IcacheToDecode1Type;
|
||||
signal mmu_to_icache : MmuToIcacheType;
|
||||
|
||||
-- decode signals
|
||||
signal decode1_to_decode2: Decode1ToDecode2Type;
|
||||
signal decode1_to_fetch1: Decode1ToFetch1Type;
|
||||
signal decode2_to_execute1: Decode2ToExecute1Type;
|
||||
|
||||
-- register file signals
|
||||
@@ -83,16 +82,18 @@ architecture behave of core is
|
||||
-- local signals
|
||||
signal fetch1_stall_in : std_ulogic;
|
||||
signal icache_stall_out : std_ulogic;
|
||||
signal fetch2_stall_in : std_ulogic;
|
||||
signal icache_stall_in : std_ulogic;
|
||||
signal decode1_stall_in : std_ulogic;
|
||||
signal decode2_stall_in : std_ulogic;
|
||||
signal decode1_busy : std_ulogic;
|
||||
signal decode2_busy_in : std_ulogic;
|
||||
signal decode2_stall_out : std_ulogic;
|
||||
signal ex1_icache_inval: std_ulogic;
|
||||
signal ex1_stall_out: std_ulogic;
|
||||
signal ls1_stall_out: std_ulogic;
|
||||
signal ex1_busy_out: std_ulogic;
|
||||
signal dcache_stall_out: std_ulogic;
|
||||
|
||||
signal flush: std_ulogic;
|
||||
signal decode1_flush: std_ulogic;
|
||||
signal fetch1_flush: std_ulogic;
|
||||
|
||||
signal complete: std_ulogic;
|
||||
signal terminate: std_ulogic;
|
||||
@@ -128,6 +129,12 @@ architecture behave of core is
|
||||
-- Debug status
|
||||
signal dbg_core_is_stopped: std_ulogic;
|
||||
|
||||
-- Logging signals
|
||||
signal log_data : std_ulogic_vector(255 downto 0);
|
||||
signal log_rd_addr : std_ulogic_vector(31 downto 0);
|
||||
signal log_wr_addr : std_ulogic_vector(31 downto 0);
|
||||
signal log_rd_data : std_ulogic_vector(63 downto 0);
|
||||
|
||||
function keep_h(disable : boolean) return string is
|
||||
begin
|
||||
if disable then
|
||||
@@ -139,7 +146,6 @@ architecture behave of core is
|
||||
attribute keep_hierarchy : string;
|
||||
attribute keep_hierarchy of fetch1_0 : label is keep_h(DISABLE_FLATTEN);
|
||||
attribute keep_hierarchy of icache_0 : label is keep_h(DISABLE_FLATTEN);
|
||||
attribute keep_hierarchy of fetch2_0 : label is keep_h(DISABLE_FLATTEN);
|
||||
attribute keep_hierarchy of decode1_0 : label is keep_h(DISABLE_FLATTEN);
|
||||
attribute keep_hierarchy of decode2_0 : label is keep_h(DISABLE_FLATTEN);
|
||||
attribute keep_hierarchy of register_file_0 : label is keep_h(DISABLE_FLATTEN);
|
||||
@@ -180,45 +186,40 @@ begin
|
||||
rst => rst_fetch1,
|
||||
alt_reset_in => alt_reset_d,
|
||||
stall_in => fetch1_stall_in,
|
||||
flush_in => flush,
|
||||
flush_in => fetch1_flush,
|
||||
stop_in => dbg_core_stop,
|
||||
d_in => decode1_to_fetch1,
|
||||
e_in => execute1_to_fetch1,
|
||||
i_out => fetch1_to_icache
|
||||
i_out => fetch1_to_icache,
|
||||
log_out => log_data(42 downto 0)
|
||||
);
|
||||
|
||||
fetch1_stall_in <= icache_stall_out or decode2_stall_out;
|
||||
fetch1_stall_in <= icache_stall_out or decode1_busy;
|
||||
fetch1_flush <= flush or decode1_flush;
|
||||
|
||||
icache_0: entity work.icache
|
||||
generic map(
|
||||
SIM => SIM,
|
||||
LINE_SIZE => 64,
|
||||
NUM_LINES => 32,
|
||||
NUM_LINES => 64,
|
||||
NUM_WAYS => 2
|
||||
)
|
||||
port map(
|
||||
clk => clk,
|
||||
rst => rst_icache,
|
||||
i_in => fetch1_to_icache,
|
||||
i_out => icache_to_fetch2,
|
||||
i_out => icache_to_decode1,
|
||||
m_in => mmu_to_icache,
|
||||
flush_in => flush,
|
||||
flush_in => fetch1_flush,
|
||||
inval_in => dbg_icache_rst or ex1_icache_inval,
|
||||
stall_in => icache_stall_in,
|
||||
stall_out => icache_stall_out,
|
||||
wishbone_out => wishbone_insn_out,
|
||||
wishbone_in => wishbone_insn_in
|
||||
wishbone_in => wishbone_insn_in,
|
||||
log_out => log_data(96 downto 43)
|
||||
);
|
||||
|
||||
fetch2_0: entity work.fetch2
|
||||
port map (
|
||||
clk => clk,
|
||||
rst => rst_fetch2,
|
||||
stall_in => fetch2_stall_in,
|
||||
flush_in => flush,
|
||||
i_in => icache_to_fetch2,
|
||||
f_out => fetch2_to_decode1
|
||||
);
|
||||
|
||||
fetch2_stall_in <= decode2_stall_out;
|
||||
icache_stall_in <= decode1_busy;
|
||||
|
||||
decode1_0: entity work.decode1
|
||||
port map (
|
||||
@@ -226,8 +227,12 @@ begin
|
||||
rst => rst_dec1,
|
||||
stall_in => decode1_stall_in,
|
||||
flush_in => flush,
|
||||
f_in => fetch2_to_decode1,
|
||||
d_out => decode1_to_decode2
|
||||
flush_out => decode1_flush,
|
||||
busy_out => decode1_busy,
|
||||
f_in => icache_to_decode1,
|
||||
d_out => decode1_to_decode2,
|
||||
f_out => decode1_to_fetch1,
|
||||
log_out => log_data(109 downto 97)
|
||||
);
|
||||
|
||||
decode1_stall_in <= decode2_stall_out;
|
||||
@@ -239,7 +244,7 @@ begin
|
||||
port map (
|
||||
clk => clk,
|
||||
rst => rst_dec2,
|
||||
stall_in => decode2_stall_in,
|
||||
busy_in => decode2_busy_in,
|
||||
stall_out => decode2_stall_out,
|
||||
flush_in => flush,
|
||||
complete_in => complete,
|
||||
@@ -249,9 +254,10 @@ begin
|
||||
r_in => register_file_to_decode2,
|
||||
r_out => decode2_to_register_file,
|
||||
c_in => cr_file_to_decode2,
|
||||
c_out => decode2_to_cr_file
|
||||
c_out => decode2_to_cr_file,
|
||||
log_out => log_data(119 downto 110)
|
||||
);
|
||||
decode2_stall_in <= ex1_stall_out or ls1_stall_out;
|
||||
decode2_busy_in <= ex1_busy_out;
|
||||
|
||||
register_file_0: entity work.register_file
|
||||
generic map (
|
||||
@@ -267,7 +273,8 @@ begin
|
||||
dbg_gpr_addr => dbg_gpr_addr,
|
||||
dbg_gpr_data => dbg_gpr_data,
|
||||
sim_dump => terminate,
|
||||
sim_dump_done => sim_cr_dump
|
||||
sim_dump_done => sim_cr_dump,
|
||||
log_out => log_data(255 downto 185)
|
||||
);
|
||||
|
||||
cr_file_0: entity work.cr_file
|
||||
@@ -279,7 +286,8 @@ begin
|
||||
d_in => decode2_to_cr_file,
|
||||
d_out => cr_file_to_decode2,
|
||||
w_in => writeback_to_cr_file,
|
||||
sim_dump => sim_cr_dump
|
||||
sim_dump => sim_cr_dump,
|
||||
log_out => log_data(184 downto 172)
|
||||
);
|
||||
|
||||
execute1_0: entity work.execute1
|
||||
@@ -290,7 +298,7 @@ begin
|
||||
clk => clk,
|
||||
rst => rst_ex1,
|
||||
flush_out => flush,
|
||||
stall_out => ex1_stall_out,
|
||||
busy_out => ex1_busy_out,
|
||||
e_in => decode2_to_execute1,
|
||||
l_in => loadstore1_to_execute1,
|
||||
ext_irq_in => ext_irq,
|
||||
@@ -299,7 +307,11 @@ begin
|
||||
e_out => execute1_to_writeback,
|
||||
icache_inval => ex1_icache_inval,
|
||||
dbg_msr_out => msr,
|
||||
terminate_out => terminate
|
||||
terminate_out => terminate,
|
||||
log_out => log_data(134 downto 120),
|
||||
log_rd_addr => log_rd_addr,
|
||||
log_rd_data => log_rd_data,
|
||||
log_wr_addr => log_wr_addr
|
||||
);
|
||||
|
||||
loadstore1_0: entity work.loadstore1
|
||||
@@ -314,7 +326,7 @@ begin
|
||||
m_out => loadstore1_to_mmu,
|
||||
m_in => mmu_to_loadstore1,
|
||||
dc_stall => dcache_stall_out,
|
||||
stall_out => ls1_stall_out
|
||||
log_out => log_data(149 downto 140)
|
||||
);
|
||||
|
||||
mmu_0: entity work.mmu
|
||||
@@ -331,7 +343,7 @@ begin
|
||||
dcache_0: entity work.dcache
|
||||
generic map(
|
||||
LINE_SIZE => 64,
|
||||
NUM_LINES => 32,
|
||||
NUM_LINES => 64,
|
||||
NUM_WAYS => 2
|
||||
)
|
||||
port map (
|
||||
@@ -343,7 +355,8 @@ begin
|
||||
m_out => dcache_to_mmu,
|
||||
stall_out => dcache_stall_out,
|
||||
wishbone_in => wishbone_data_in,
|
||||
wishbone_out => wishbone_data_out
|
||||
wishbone_out => wishbone_data_out,
|
||||
log_out => log_data(171 downto 152)
|
||||
);
|
||||
|
||||
writeback_0: entity work.writeback
|
||||
@@ -356,7 +369,13 @@ begin
|
||||
complete_out => complete
|
||||
);
|
||||
|
||||
log_data(151 downto 150) <= "00";
|
||||
log_data(139 downto 135) <= "00000";
|
||||
|
||||
debug_0: entity work.core_debug
|
||||
generic map (
|
||||
LOG_LENGTH => LOG_LENGTH
|
||||
)
|
||||
port map (
|
||||
clk => clk,
|
||||
rst => rst_dbg,
|
||||
@@ -377,6 +396,10 @@ begin
|
||||
dbg_gpr_ack => dbg_gpr_ack,
|
||||
dbg_gpr_addr => dbg_gpr_addr,
|
||||
dbg_gpr_data => dbg_gpr_data,
|
||||
log_data => log_data,
|
||||
log_read_addr => log_rd_addr,
|
||||
log_read_data => log_rd_data,
|
||||
log_write_addr => log_wr_addr,
|
||||
terminated_out => terminated_out
|
||||
);
|
||||
|
||||
|
||||
123
core_debug.vhdl
123
core_debug.vhdl
@@ -3,9 +3,14 @@ use ieee.std_logic_1164.all;
|
||||
use ieee.numeric_std.all;
|
||||
|
||||
library work;
|
||||
use work.utils.all;
|
||||
use work.common.all;
|
||||
|
||||
entity core_debug is
|
||||
generic (
|
||||
-- Length of log buffer
|
||||
LOG_LENGTH : natural := 512
|
||||
);
|
||||
port (
|
||||
clk : in std_logic;
|
||||
rst : in std_logic;
|
||||
@@ -34,6 +39,12 @@ entity core_debug is
|
||||
dbg_gpr_addr : out gspr_index_t;
|
||||
dbg_gpr_data : in std_ulogic_vector(63 downto 0);
|
||||
|
||||
-- Core logging data
|
||||
log_data : in std_ulogic_vector(255 downto 0);
|
||||
log_read_addr : in std_ulogic_vector(31 downto 0);
|
||||
log_read_data : out std_ulogic_vector(63 downto 0);
|
||||
log_write_addr : out std_ulogic_vector(31 downto 0);
|
||||
|
||||
-- Misc
|
||||
terminated_out : out std_ulogic
|
||||
);
|
||||
@@ -77,6 +88,12 @@ architecture behave of core_debug is
|
||||
-- GSPR register data
|
||||
constant DBG_CORE_GSPR_DATA : std_ulogic_vector(3 downto 0) := "0101";
|
||||
|
||||
-- Log buffer address and data registers
|
||||
constant DBG_CORE_LOG_ADDR : std_ulogic_vector(3 downto 0) := "0110";
|
||||
constant DBG_CORE_LOG_DATA : std_ulogic_vector(3 downto 0) := "0111";
|
||||
|
||||
constant LOG_INDEX_BITS : natural := log2(LOG_LENGTH);
|
||||
|
||||
-- Some internal wires
|
||||
signal stat_reg : std_ulogic_vector(63 downto 0);
|
||||
|
||||
@@ -89,6 +106,12 @@ architecture behave of core_debug is
|
||||
signal do_gspr_rd : std_ulogic;
|
||||
signal gspr_index : gspr_index_t;
|
||||
|
||||
signal log_dmi_addr : std_ulogic_vector(31 downto 0) := (others => '0');
|
||||
signal log_dmi_data : std_ulogic_vector(63 downto 0) := (others => '0');
|
||||
signal do_dmi_log_rd : std_ulogic;
|
||||
signal dmi_read_log_data : std_ulogic;
|
||||
signal dmi_read_log_data_1 : std_ulogic;
|
||||
|
||||
begin
|
||||
-- Single cycle register accesses on DMI except for GSPR data
|
||||
dmi_ack <= dmi_req when dmi_addr /= DBG_CORE_GSPR_DATA
|
||||
@@ -108,6 +131,8 @@ begin
|
||||
nia when DBG_CORE_NIA,
|
||||
msr when DBG_CORE_MSR,
|
||||
dbg_gpr_data when DBG_CORE_GSPR_DATA,
|
||||
log_write_addr & log_dmi_addr when DBG_CORE_LOG_ADDR,
|
||||
log_dmi_data when DBG_CORE_LOG_DATA,
|
||||
(others => '0') when others;
|
||||
|
||||
-- DMI writes
|
||||
@@ -118,6 +143,7 @@ begin
|
||||
do_step <= '0';
|
||||
do_reset <= '0';
|
||||
do_icreset <= '0';
|
||||
do_dmi_log_rd <= '0';
|
||||
|
||||
if (rst) then
|
||||
stopping <= '0';
|
||||
@@ -151,11 +177,26 @@ begin
|
||||
end if;
|
||||
elsif dmi_addr = DBG_CORE_GSPR_INDEX then
|
||||
gspr_index <= dmi_din(gspr_index_t'left downto 0);
|
||||
elsif dmi_addr = DBG_CORE_LOG_ADDR then
|
||||
log_dmi_addr <= dmi_din(31 downto 0);
|
||||
do_dmi_log_rd <= '1';
|
||||
end if;
|
||||
else
|
||||
report("DMI read from " & to_string(dmi_addr));
|
||||
end if;
|
||||
|
||||
elsif dmi_read_log_data = '0' and dmi_read_log_data_1 = '1' then
|
||||
-- Increment log_dmi_addr after the end of a read from DBG_CORE_LOG_DATA
|
||||
log_dmi_addr(LOG_INDEX_BITS + 1 downto 0) <=
|
||||
std_ulogic_vector(unsigned(log_dmi_addr(LOG_INDEX_BITS+1 downto 0)) + 1);
|
||||
do_dmi_log_rd <= '1';
|
||||
end if;
|
||||
dmi_read_log_data_1 <= dmi_read_log_data;
|
||||
if dmi_req = '1' and dmi_addr = DBG_CORE_LOG_DATA then
|
||||
dmi_read_log_data <= '1';
|
||||
else
|
||||
dmi_read_log_data <= '0';
|
||||
end if;
|
||||
|
||||
-- Set core stop on terminate. We'll be stopping some time *after*
|
||||
-- the offending instruction, at least until we can do back flushes
|
||||
@@ -175,5 +216,87 @@ begin
|
||||
core_rst <= do_reset;
|
||||
icache_rst <= do_icreset;
|
||||
terminated_out <= terminated;
|
||||
|
||||
-- Logging RAM
|
||||
maybe_log: if LOG_LENGTH > 0 generate
|
||||
subtype log_ptr_t is unsigned(LOG_INDEX_BITS - 1 downto 0);
|
||||
type log_array_t is array(0 to LOG_LENGTH - 1) of std_ulogic_vector(255 downto 0);
|
||||
signal log_array : log_array_t;
|
||||
signal log_rd_ptr : log_ptr_t;
|
||||
signal log_wr_ptr : log_ptr_t;
|
||||
signal log_toggle : std_ulogic;
|
||||
signal log_wr_enable : std_ulogic;
|
||||
signal log_rd_ptr_latched : log_ptr_t;
|
||||
signal log_rd : std_ulogic_vector(255 downto 0);
|
||||
signal log_dmi_reading : std_ulogic;
|
||||
signal log_dmi_read_done : std_ulogic;
|
||||
|
||||
function select_dword(data : std_ulogic_vector(255 downto 0);
|
||||
addr : std_ulogic_vector(31 downto 0)) return std_ulogic_vector is
|
||||
variable firstbit : integer;
|
||||
begin
|
||||
firstbit := to_integer(unsigned(addr(1 downto 0))) * 64;
|
||||
return data(firstbit + 63 downto firstbit);
|
||||
end;
|
||||
|
||||
attribute ram_style : string;
|
||||
attribute ram_style of log_array : signal is "block";
|
||||
attribute ram_decomp : string;
|
||||
attribute ram_decomp of log_array : signal is "power";
|
||||
|
||||
begin
|
||||
-- Use MSB of read addresses to stop the logging
|
||||
log_wr_enable <= not (log_read_addr(31) or log_dmi_addr(31));
|
||||
|
||||
log_ram: process(clk)
|
||||
begin
|
||||
if rising_edge(clk) then
|
||||
if log_wr_enable = '1' then
|
||||
log_array(to_integer(log_wr_ptr)) <= log_data;
|
||||
end if;
|
||||
log_rd <= log_array(to_integer(log_rd_ptr_latched));
|
||||
end if;
|
||||
end process;
|
||||
|
||||
|
||||
log_buffer: process(clk)
|
||||
variable b : integer;
|
||||
variable data : std_ulogic_vector(255 downto 0);
|
||||
begin
|
||||
if rising_edge(clk) then
|
||||
if rst = '1' then
|
||||
log_wr_ptr <= (others => '0');
|
||||
log_toggle <= '0';
|
||||
elsif log_wr_enable = '1' then
|
||||
if log_wr_ptr = to_unsigned(LOG_LENGTH - 1, LOG_INDEX_BITS) then
|
||||
log_toggle <= not log_toggle;
|
||||
end if;
|
||||
log_wr_ptr <= log_wr_ptr + 1;
|
||||
end if;
|
||||
if do_dmi_log_rd = '1' then
|
||||
log_rd_ptr_latched <= unsigned(log_dmi_addr(LOG_INDEX_BITS + 1 downto 2));
|
||||
else
|
||||
log_rd_ptr_latched <= unsigned(log_read_addr(LOG_INDEX_BITS + 1 downto 2));
|
||||
end if;
|
||||
if log_dmi_read_done = '1' then
|
||||
log_dmi_data <= select_dword(log_rd, log_dmi_addr);
|
||||
else
|
||||
log_read_data <= select_dword(log_rd, log_read_addr);
|
||||
end if;
|
||||
log_dmi_read_done <= log_dmi_reading;
|
||||
log_dmi_reading <= do_dmi_log_rd;
|
||||
end if;
|
||||
end process;
|
||||
log_write_addr(LOG_INDEX_BITS - 1 downto 0) <= std_ulogic_vector(log_wr_ptr);
|
||||
log_write_addr(LOG_INDEX_BITS) <= '1';
|
||||
log_write_addr(31 downto LOG_INDEX_BITS + 1) <= (others => '0');
|
||||
end generate;
|
||||
|
||||
no_log: if LOG_LENGTH = 0 generate
|
||||
begin
|
||||
log_read_data <= (others => '0');
|
||||
log_write_addr <= x"00000001";
|
||||
end generate;
|
||||
|
||||
end behave;
|
||||
|
||||
|
||||
15
cr_file.vhdl
15
cr_file.vhdl
@@ -18,7 +18,9 @@ entity cr_file is
|
||||
w_in : in WritebackToCrFileType;
|
||||
|
||||
-- debug
|
||||
sim_dump : in std_ulogic
|
||||
sim_dump : in std_ulogic;
|
||||
|
||||
log_out : out std_ulogic_vector(12 downto 0)
|
||||
);
|
||||
end entity cr_file;
|
||||
|
||||
@@ -27,6 +29,7 @@ architecture behaviour of cr_file is
|
||||
signal crs_updated : std_ulogic_vector(31 downto 0);
|
||||
signal xerc : xer_common_t := xerc_init;
|
||||
signal xerc_updated : xer_common_t;
|
||||
signal log_data : std_ulogic_vector(12 downto 0);
|
||||
begin
|
||||
cr_create_0: process(all)
|
||||
variable hi, lo : integer := 0;
|
||||
@@ -88,4 +91,14 @@ begin
|
||||
end process;
|
||||
end generate;
|
||||
|
||||
cr_log: process(clk)
|
||||
begin
|
||||
if rising_edge(clk) then
|
||||
log_data <= w_in.write_cr_enable &
|
||||
w_in.write_cr_data(31 downto 28) &
|
||||
w_in.write_cr_mask;
|
||||
end if;
|
||||
end process;
|
||||
log_out <= log_data;
|
||||
|
||||
end architecture behaviour;
|
||||
|
||||
@@ -4,11 +4,15 @@ use ieee.numeric_std.all;
|
||||
|
||||
entity cr_hazard is
|
||||
generic (
|
||||
PIPELINE_DEPTH : natural := 2
|
||||
PIPELINE_DEPTH : natural := 1
|
||||
);
|
||||
port(
|
||||
clk : in std_ulogic;
|
||||
stall_in : in std_ulogic;
|
||||
busy_in : in std_ulogic;
|
||||
deferred : in std_ulogic;
|
||||
complete_in : in std_ulogic;
|
||||
flush_in : in std_ulogic;
|
||||
issuing : in std_ulogic;
|
||||
|
||||
cr_read_in : in std_ulogic;
|
||||
cr_write_in : in std_ulogic;
|
||||
@@ -22,7 +26,7 @@ architecture behaviour of cr_hazard is
|
||||
end record;
|
||||
constant pipeline_entry_init : pipeline_entry_type := (valid => '0');
|
||||
|
||||
type pipeline_t is array(0 to PIPELINE_DEPTH-1) of pipeline_entry_type;
|
||||
type pipeline_t is array(0 to PIPELINE_DEPTH) of pipeline_entry_type;
|
||||
constant pipeline_t_init : pipeline_t := (others => pipeline_entry_init);
|
||||
|
||||
signal r, rin : pipeline_t := pipeline_t_init;
|
||||
@@ -30,9 +34,7 @@ begin
|
||||
cr_hazard0: process(clk)
|
||||
begin
|
||||
if rising_edge(clk) then
|
||||
if stall_in = '0' then
|
||||
r <= rin;
|
||||
end if;
|
||||
r <= rin;
|
||||
end if;
|
||||
end process;
|
||||
|
||||
@@ -41,22 +43,23 @@ begin
|
||||
begin
|
||||
v := r;
|
||||
|
||||
stall_out <= '0';
|
||||
loop_0: for i in 0 to PIPELINE_DEPTH-1 loop
|
||||
if (r(i).valid = cr_read_in) then
|
||||
stall_out <= '1';
|
||||
end if;
|
||||
end loop;
|
||||
-- XXX assumes PIPELINE_DEPTH = 1
|
||||
if complete_in = '1' then
|
||||
v(1).valid := '0';
|
||||
end if;
|
||||
stall_out <= cr_read_in and (v(0).valid or v(1).valid);
|
||||
|
||||
v(0).valid := cr_write_in;
|
||||
loop_1: for i in 0 to PIPELINE_DEPTH-2 loop
|
||||
-- propagate to next slot
|
||||
v(i+1) := r(i);
|
||||
end loop;
|
||||
|
||||
-- asynchronous output
|
||||
if cr_read_in = '0' then
|
||||
stall_out <= '0';
|
||||
-- XXX assumes PIPELINE_DEPTH = 1
|
||||
if busy_in = '0' then
|
||||
v(1) := r(0);
|
||||
v(0).valid := '0';
|
||||
end if;
|
||||
if deferred = '0' and issuing = '1' then
|
||||
v(0).valid := cr_write_in;
|
||||
end if;
|
||||
if flush_in = '1' then
|
||||
v(0).valid := '0';
|
||||
v(1).valid := '0';
|
||||
end if;
|
||||
|
||||
-- update registers
|
||||
|
||||
728
dcache.vhdl
728
dcache.vhdl
File diff suppressed because it is too large
Load Diff
167
decode1.vhdl
167
decode1.vhdl
@@ -8,19 +8,24 @@ use work.decode_types.all;
|
||||
|
||||
entity decode1 is
|
||||
port (
|
||||
clk : in std_ulogic;
|
||||
rst : in std_ulogic;
|
||||
clk : in std_ulogic;
|
||||
rst : in std_ulogic;
|
||||
|
||||
stall_in : in std_ulogic;
|
||||
flush_in : in std_ulogic;
|
||||
stall_in : in std_ulogic;
|
||||
flush_in : in std_ulogic;
|
||||
busy_out : out std_ulogic;
|
||||
flush_out : out std_ulogic;
|
||||
|
||||
f_in : in Fetch2ToDecode1Type;
|
||||
d_out : out Decode1ToDecode2Type
|
||||
f_in : in IcacheToDecode1Type;
|
||||
f_out : out Decode1ToFetch1Type;
|
||||
d_out : out Decode1ToDecode2Type;
|
||||
log_out : out std_ulogic_vector(12 downto 0)
|
||||
);
|
||||
end entity decode1;
|
||||
|
||||
architecture behaviour of decode1 is
|
||||
signal r, rin : Decode1ToDecode2Type;
|
||||
signal s : Decode1ToDecode2Type;
|
||||
|
||||
subtype major_opcode_t is unsigned(5 downto 0);
|
||||
type major_rom_array_t is array(0 to 63) of decode_rom_t;
|
||||
@@ -352,24 +357,45 @@ architecture behaviour of decode1 is
|
||||
constant nop_instr : decode_rom_t := (ALU, OP_NOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0');
|
||||
constant fetch_fail_inst: decode_rom_t := (LDST, OP_FETCH_FAILED, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0');
|
||||
|
||||
signal log_data : std_ulogic_vector(12 downto 0);
|
||||
|
||||
begin
|
||||
decode1_0: process(clk)
|
||||
begin
|
||||
if rising_edge(clk) then
|
||||
-- Output state remains unchanged on stall, unless we are flushing
|
||||
if rst = '1' or flush_in = '1' or stall_in = '0' then
|
||||
r <= rin;
|
||||
if rst = '1' then
|
||||
r <= Decode1ToDecode2Init;
|
||||
s <= Decode1ToDecode2Init;
|
||||
elsif flush_in = '1' then
|
||||
r.valid <= '0';
|
||||
s.valid <= '0';
|
||||
elsif s.valid = '1' then
|
||||
if stall_in = '0' then
|
||||
r <= s;
|
||||
s.valid <= '0';
|
||||
end if;
|
||||
else
|
||||
s <= rin;
|
||||
s.valid <= rin.valid and r.valid and stall_in;
|
||||
if r.valid = '0' or stall_in = '0' then
|
||||
r <= rin;
|
||||
end if;
|
||||
end if;
|
||||
end if;
|
||||
end process;
|
||||
busy_out <= s.valid;
|
||||
|
||||
decode1_1: process(all)
|
||||
variable v : Decode1ToDecode2Type;
|
||||
variable f : Decode1ToFetch1Type;
|
||||
variable majorop : major_opcode_t;
|
||||
variable op_19_bits: std_ulogic_vector(2 downto 0);
|
||||
variable sprn : spr_num_t;
|
||||
variable br_nia : std_ulogic_vector(61 downto 0);
|
||||
variable br_target : std_ulogic_vector(61 downto 0);
|
||||
variable br_offset : signed(23 downto 0);
|
||||
begin
|
||||
v := r;
|
||||
v := Decode1ToDecode2Init;
|
||||
|
||||
v.valid := f_in.valid;
|
||||
v.nia := f_in.nia;
|
||||
@@ -395,6 +421,31 @@ begin
|
||||
-- major opcode 31, lots of things
|
||||
v.decode := decode_op_31_array(to_integer(unsigned(f_in.insn(10 downto 1))));
|
||||
|
||||
-- Work out ispr1/ispr2 independent of v.decode since they seem to be critical path
|
||||
sprn := decode_spr_num(f_in.insn);
|
||||
v.ispr1 := fast_spr_num(sprn);
|
||||
|
||||
if std_match(f_in.insn(10 downto 1), "01-1010011") then
|
||||
-- mfspr or mtspr
|
||||
-- Make slow SPRs single issue
|
||||
if is_fast_spr(v.ispr1) = '0' then
|
||||
v.decode.sgl_pipe := '1';
|
||||
-- send MMU-related SPRs to loadstore1
|
||||
case sprn is
|
||||
when SPR_DAR | SPR_DSISR | SPR_PID | SPR_PRTBL =>
|
||||
v.decode.unit := LDST;
|
||||
when others =>
|
||||
end case;
|
||||
end if;
|
||||
end if;
|
||||
|
||||
elsif majorop = "010000" then
|
||||
-- CTR may be needed as input to bc
|
||||
v.decode := major_decode_rom_array(to_integer(majorop));
|
||||
if f_in.insn(23) = '0' then
|
||||
v.ispr1 := fast_spr_num(SPR_CTR);
|
||||
end if;
|
||||
|
||||
elsif majorop = "010011" then
|
||||
if decode_op_19_valid(to_integer(unsigned(f_in.insn(10 downto 1)))) = '0' then
|
||||
report "op 19 illegal subcode";
|
||||
@@ -405,6 +456,27 @@ begin
|
||||
report "op 19 sub " & to_hstring(op_19_bits);
|
||||
end if;
|
||||
|
||||
-- Work out ispr1/ispr2 independent of v.decode since they seem to be critical path
|
||||
if f_in.insn(2) = '0' then
|
||||
-- Could be OP_BCREG: bclr, bcctr, bctar
|
||||
-- Branch uses CTR as condition when BO(2) is 0. This is
|
||||
-- also used to indicate that CTR is modified (they go
|
||||
-- together).
|
||||
if f_in.insn(23) = '0' then
|
||||
v.ispr1 := fast_spr_num(SPR_CTR);
|
||||
end if;
|
||||
-- TODO: Add TAR
|
||||
if f_in.insn(10) = '0' then
|
||||
v.ispr2 := fast_spr_num(SPR_LR);
|
||||
else
|
||||
v.ispr2 := fast_spr_num(SPR_CTR);
|
||||
end if;
|
||||
else
|
||||
-- Could be OP_RFID
|
||||
v.ispr1 := fast_spr_num(SPR_SRR0);
|
||||
v.ispr2 := fast_spr_num(SPR_SRR1);
|
||||
end if;
|
||||
|
||||
elsif majorop = "011110" then
|
||||
v.decode := decode_op_30_array(to_integer(unsigned(f_in.insn(4 downto 1))));
|
||||
|
||||
@@ -422,56 +494,45 @@ begin
|
||||
v.decode := major_decode_rom_array(to_integer(majorop));
|
||||
end if;
|
||||
|
||||
-- Set ISPR1/ISPR2 when needed
|
||||
if v.decode.insn_type = OP_BC or v.decode.insn_type = OP_BCREG then
|
||||
-- Branch uses CTR as condition when BO(2) is 0. This is
|
||||
-- also used to indicate that CTR is modified (they go
|
||||
-- together).
|
||||
--
|
||||
if f_in.insn(23) = '0' then
|
||||
v.ispr1 := fast_spr_num(SPR_CTR);
|
||||
end if;
|
||||
|
||||
-- Branch source register is an SPR
|
||||
if v.decode.insn_type = OP_BCREG then
|
||||
-- TODO: Add TAR
|
||||
if f_in.insn(10) = '0' then
|
||||
v.ispr2 := fast_spr_num(SPR_LR);
|
||||
else
|
||||
v.ispr2 := fast_spr_num(SPR_CTR);
|
||||
end if;
|
||||
end if;
|
||||
elsif v.decode.insn_type = OP_MFSPR or v.decode.insn_type = OP_MTSPR then
|
||||
sprn := decode_spr_num(f_in.insn);
|
||||
v.ispr1 := fast_spr_num(sprn);
|
||||
-- Make slow SPRs single issue
|
||||
if is_fast_spr(v.ispr1) = '0' then
|
||||
v.decode.sgl_pipe := '1';
|
||||
-- send MMU-related SPRs to loadstore1
|
||||
case sprn is
|
||||
when SPR_DAR | SPR_DSISR | SPR_PID | SPR_PRTBL =>
|
||||
v.decode.unit := LDST;
|
||||
when others =>
|
||||
end case;
|
||||
end if;
|
||||
elsif v.decode.insn_type = OP_RFID then
|
||||
report "PPC RFID";
|
||||
v.ispr1 := fast_spr_num(SPR_SRR0);
|
||||
v.ispr2 := fast_spr_num(SPR_SRR1);
|
||||
-- Branch predictor
|
||||
-- Note bclr, bcctr and bctar are predicted not taken as we have no
|
||||
-- count cache or link stack.
|
||||
br_offset := (others => '0');
|
||||
if majorop = 18 then
|
||||
-- Unconditional branches are always taken
|
||||
v.br_pred := '1';
|
||||
br_offset := signed(f_in.insn(25 downto 2));
|
||||
elsif majorop = 16 then
|
||||
-- Predict backward branches as taken, forward as untaken
|
||||
v.br_pred := f_in.insn(15);
|
||||
br_offset := resize(signed(f_in.insn(15 downto 2)), 24);
|
||||
end if;
|
||||
|
||||
if flush_in = '1' then
|
||||
v.valid := '0';
|
||||
end if;
|
||||
|
||||
if rst = '1' then
|
||||
v := Decode1ToDecode2Init;
|
||||
br_nia := f_in.nia(63 downto 2);
|
||||
if f_in.insn(1) = '1' then
|
||||
br_nia := (others => '0');
|
||||
end if;
|
||||
br_target := std_ulogic_vector(signed(br_nia) + br_offset);
|
||||
f.redirect := v.br_pred and f_in.valid and not flush_in and not s.valid;
|
||||
f.redirect_nia := br_target & "00";
|
||||
|
||||
-- Update registers
|
||||
rin <= v;
|
||||
|
||||
-- Update outputs
|
||||
d_out <= r;
|
||||
f_out <= f;
|
||||
flush_out <= f.redirect;
|
||||
end process;
|
||||
|
||||
dec1_log : process(clk)
|
||||
begin
|
||||
if rising_edge(clk) then
|
||||
log_data <= std_ulogic_vector(to_unsigned(insn_type_t'pos(r.decode.insn_type), 6)) &
|
||||
r.nia(5 downto 2) &
|
||||
std_ulogic_vector(to_unsigned(unit_t'pos(r.decode.unit), 2)) &
|
||||
r.valid;
|
||||
end if;
|
||||
end process;
|
||||
log_out <= log_data;
|
||||
|
||||
end architecture behaviour;
|
||||
|
||||
64
decode2.vhdl
64
decode2.vhdl
@@ -17,7 +17,7 @@ entity decode2 is
|
||||
rst : in std_ulogic;
|
||||
|
||||
complete_in : in std_ulogic;
|
||||
stall_in : in std_ulogic;
|
||||
busy_in : in std_ulogic;
|
||||
stall_out : out std_ulogic;
|
||||
|
||||
stopped_out : out std_ulogic;
|
||||
@@ -32,7 +32,9 @@ entity decode2 is
|
||||
r_out : out Decode2ToRegisterFileType;
|
||||
|
||||
c_in : in CrFileToDecode2Type;
|
||||
c_out : out Decode2ToCrFileType
|
||||
c_out : out Decode2ToCrFileType;
|
||||
|
||||
log_out : out std_ulogic_vector(9 downto 0)
|
||||
);
|
||||
end entity decode2;
|
||||
|
||||
@@ -43,6 +45,10 @@ architecture behaviour of decode2 is
|
||||
|
||||
signal r, rin : reg_type;
|
||||
|
||||
signal deferred : std_ulogic;
|
||||
|
||||
signal log_data : std_ulogic_vector(9 downto 0);
|
||||
|
||||
type decode_input_reg_t is record
|
||||
reg_valid : std_ulogic;
|
||||
reg : gspr_index_t;
|
||||
@@ -61,8 +67,6 @@ architecture behaviour of decode2 is
|
||||
return decode_input_reg_t is
|
||||
begin
|
||||
if t = RA or (t = RA_OR_ZERO and insn_ra(insn_in) /= "00000") then
|
||||
assert is_fast_spr(ispr) = '0' report "Decode A says GPR but ISPR says SPR:" &
|
||||
to_hstring(ispr) severity failure;
|
||||
return ('1', gpr_to_gspr(insn_ra(insn_in)), reg_data);
|
||||
elsif t = SPR then
|
||||
-- ISPR must be either a valid fast SPR number or all 0 for a slow SPR.
|
||||
@@ -87,8 +91,6 @@ architecture behaviour of decode2 is
|
||||
begin
|
||||
case t is
|
||||
when RB =>
|
||||
assert is_fast_spr(ispr) = '0' report "Decode B says GPR but ISPR says SPR:" &
|
||||
to_hstring(ispr) severity failure;
|
||||
ret := ('1', gpr_to_gspr(insn_rb(insn_in)), reg_data);
|
||||
when CONST_UI =>
|
||||
ret := ('0', (others => '0'), std_ulogic_vector(resize(unsigned(insn_ui(insn_in)), 64)));
|
||||
@@ -196,6 +198,9 @@ architecture behaviour of decode2 is
|
||||
signal gpr_write : gspr_index_t;
|
||||
signal gpr_bypassable : std_ulogic;
|
||||
|
||||
signal update_gpr_write_valid : std_ulogic;
|
||||
signal update_gpr_write_reg : gspr_index_t;
|
||||
|
||||
signal gpr_a_read_valid : std_ulogic;
|
||||
signal gpr_a_read :gspr_index_t;
|
||||
signal gpr_a_bypass : std_ulogic;
|
||||
@@ -220,7 +225,8 @@ begin
|
||||
|
||||
complete_in => complete_in,
|
||||
valid_in => control_valid_in,
|
||||
stall_in => stall_in,
|
||||
busy_in => busy_in,
|
||||
deferred => deferred,
|
||||
flush_in => flush_in,
|
||||
sgl_pipe_in => control_sgl_pipe,
|
||||
stop_mark_in => d_in.stop_mark,
|
||||
@@ -229,6 +235,9 @@ begin
|
||||
gpr_write_in => gpr_write,
|
||||
gpr_bypassable => gpr_bypassable,
|
||||
|
||||
update_gpr_write_valid => update_gpr_write_valid,
|
||||
update_gpr_write_reg => update_gpr_write_reg,
|
||||
|
||||
gpr_a_read_valid_in => gpr_a_read_valid,
|
||||
gpr_a_read_in => gpr_a_read,
|
||||
|
||||
@@ -250,18 +259,24 @@ begin
|
||||
gpr_bypass_c => gpr_c_bypass
|
||||
);
|
||||
|
||||
deferred <= r.e.valid and busy_in;
|
||||
|
||||
decode2_0: process(clk)
|
||||
begin
|
||||
if rising_edge(clk) then
|
||||
if rin.e.valid = '1' then
|
||||
report "execute " & to_hstring(rin.e.nia);
|
||||
if rst = '1' or flush_in = '1' or deferred = '0' then
|
||||
if rin.e.valid = '1' then
|
||||
report "execute " & to_hstring(rin.e.nia);
|
||||
end if;
|
||||
r <= rin;
|
||||
end if;
|
||||
r <= rin;
|
||||
end if;
|
||||
end process;
|
||||
|
||||
r_out.read1_reg <= gpr_or_spr_to_gspr(insn_ra(d_in.insn), d_in.ispr1);
|
||||
r_out.read2_reg <= gpr_or_spr_to_gspr(insn_rb(d_in.insn), d_in.ispr2);
|
||||
r_out.read1_reg <= d_in.ispr1 when d_in.decode.input_reg_a = SPR
|
||||
else gpr_to_gspr(insn_ra(d_in.insn));
|
||||
r_out.read2_reg <= d_in.ispr2 when d_in.decode.input_reg_b = SPR
|
||||
else gpr_to_gspr(insn_rb(d_in.insn));
|
||||
r_out.read3_reg <= insn_rs(d_in.insn);
|
||||
|
||||
c_out.read <= d_in.decode.input_cr;
|
||||
@@ -343,6 +358,7 @@ begin
|
||||
v.e.sign_extend := d_in.decode.sign_extend;
|
||||
v.e.update := d_in.decode.update;
|
||||
v.e.reserve := d_in.decode.reserve;
|
||||
v.e.br_pred := d_in.br_pred;
|
||||
|
||||
-- issue control
|
||||
control_valid_in <= d_in.valid;
|
||||
@@ -354,6 +370,13 @@ begin
|
||||
if EX1_BYPASS and d_in.decode.unit = ALU then
|
||||
gpr_bypassable <= '1';
|
||||
end if;
|
||||
update_gpr_write_valid <= d_in.decode.update;
|
||||
update_gpr_write_reg <= decoded_reg_a.reg;
|
||||
if v.e.lr = '1' then
|
||||
-- there are no instructions that have both update=1 and lr=1
|
||||
update_gpr_write_valid <= '1';
|
||||
update_gpr_write_reg <= fast_spr_num(SPR_LR);
|
||||
end if;
|
||||
|
||||
gpr_a_read_valid <= decoded_reg_a.reg_valid;
|
||||
gpr_a_read <= decoded_reg_a.reg;
|
||||
@@ -371,7 +394,7 @@ begin
|
||||
v.e.insn_type := OP_ILLEGAL;
|
||||
end if;
|
||||
|
||||
if rst = '1' then
|
||||
if rst = '1' or flush_in = '1' then
|
||||
v.e := Decode2ToExecute1Init;
|
||||
end if;
|
||||
|
||||
@@ -381,4 +404,19 @@ begin
|
||||
-- Update outputs
|
||||
e_out <= r.e;
|
||||
end process;
|
||||
|
||||
dec2_log : process(clk)
|
||||
begin
|
||||
if rising_edge(clk) then
|
||||
log_data <= r.e.nia(5 downto 2) &
|
||||
r.e.valid &
|
||||
stopped_out &
|
||||
stall_out &
|
||||
r.e.bypass_data3 &
|
||||
r.e.bypass_data2 &
|
||||
r.e.bypass_data1;
|
||||
end if;
|
||||
end process;
|
||||
log_out <= log_data;
|
||||
|
||||
end architecture behaviour;
|
||||
|
||||
273
execute1.vhdl
273
execute1.vhdl
@@ -20,7 +20,7 @@ entity execute1 is
|
||||
|
||||
-- asynchronous
|
||||
flush_out : out std_ulogic;
|
||||
stall_out : out std_ulogic;
|
||||
busy_out : out std_ulogic;
|
||||
|
||||
e_in : in Decode2ToExecute1Type;
|
||||
l_in : in Loadstore1ToExecute1Type;
|
||||
@@ -36,34 +36,44 @@ entity execute1 is
|
||||
dbg_msr_out : out std_ulogic_vector(63 downto 0);
|
||||
|
||||
icache_inval : out std_ulogic;
|
||||
terminate_out : out std_ulogic
|
||||
terminate_out : out std_ulogic;
|
||||
|
||||
log_out : out std_ulogic_vector(14 downto 0);
|
||||
log_rd_addr : out std_ulogic_vector(31 downto 0);
|
||||
log_rd_data : in std_ulogic_vector(63 downto 0);
|
||||
log_wr_addr : in std_ulogic_vector(31 downto 0)
|
||||
);
|
||||
end entity execute1;
|
||||
|
||||
architecture behaviour of execute1 is
|
||||
type reg_type is record
|
||||
e : Execute1ToWritebackType;
|
||||
busy: std_ulogic;
|
||||
terminate: std_ulogic;
|
||||
lr_update : std_ulogic;
|
||||
next_lr : std_ulogic_vector(63 downto 0);
|
||||
mul_in_progress : std_ulogic;
|
||||
div_in_progress : std_ulogic;
|
||||
cntz_in_progress : std_ulogic;
|
||||
slow_op_insn : insn_type_t;
|
||||
slow_op_dest : gpr_index_t;
|
||||
slow_op_rc : std_ulogic;
|
||||
slow_op_oe : std_ulogic;
|
||||
slow_op_xerc : xer_common_t;
|
||||
ldst_nia : std_ulogic_vector(63 downto 0);
|
||||
last_nia : std_ulogic_vector(63 downto 0);
|
||||
log_addr_spr : std_ulogic_vector(31 downto 0);
|
||||
end record;
|
||||
constant reg_type_init : reg_type :=
|
||||
(e => Execute1ToWritebackInit, lr_update => '0',
|
||||
(e => Execute1ToWritebackInit, busy => '0', lr_update => '0', terminate => '0',
|
||||
mul_in_progress => '0', div_in_progress => '0', cntz_in_progress => '0',
|
||||
slow_op_rc => '0', slow_op_oe => '0', slow_op_xerc => xerc_init,
|
||||
next_lr => (others => '0'), ldst_nia => (others => '0'), others => (others => '0'));
|
||||
slow_op_insn => OP_ILLEGAL, slow_op_rc => '0', slow_op_oe => '0', slow_op_xerc => xerc_init,
|
||||
next_lr => (others => '0'), last_nia => (others => '0'), others => (others => '0'));
|
||||
|
||||
signal r, rin : reg_type;
|
||||
|
||||
signal a_in, b_in, c_in : std_ulogic_vector(63 downto 0);
|
||||
|
||||
signal valid_in : std_ulogic;
|
||||
signal ctrl: ctrl_t := (irq_state => WRITE_SRR0, others => (others => '0'));
|
||||
signal ctrl_tmp: ctrl_t := (irq_state => WRITE_SRR0, others => (others => '0'));
|
||||
signal right_shift, rot_clear_left, rot_clear_right: std_ulogic;
|
||||
@@ -72,8 +82,6 @@ architecture behaviour of execute1 is
|
||||
signal rotator_carry: std_ulogic;
|
||||
signal logical_result: std_ulogic_vector(63 downto 0);
|
||||
signal countzero_result: std_ulogic_vector(63 downto 0);
|
||||
signal popcnt_result: std_ulogic_vector(63 downto 0);
|
||||
signal parity_result: std_ulogic_vector(63 downto 0);
|
||||
|
||||
-- multiply signals
|
||||
signal x_to_multiply: Execute1ToMultiplyType;
|
||||
@@ -83,6 +91,11 @@ architecture behaviour of execute1 is
|
||||
signal x_to_divider: Execute1ToDividerType;
|
||||
signal divider_to_x: DividerToExecute1Type;
|
||||
|
||||
-- signals for logging
|
||||
signal exception_log : std_ulogic;
|
||||
signal irq_valid_log : std_ulogic;
|
||||
signal log_data : std_ulogic_vector(14 downto 0);
|
||||
|
||||
type privilege_level is (USER, SUPER);
|
||||
type op_privilege_array is array(insn_type_t) of privilege_level;
|
||||
constant op_privilege: op_privilege_array := (
|
||||
@@ -193,9 +206,7 @@ begin
|
||||
invert_in => e_in.invert_a,
|
||||
invert_out => e_in.invert_out,
|
||||
result => logical_result,
|
||||
datalen => e_in.data_len,
|
||||
popcnt => popcnt_result,
|
||||
parity => parity_result
|
||||
datalen => e_in.data_len
|
||||
);
|
||||
|
||||
countzero_0: entity work.zero_counter
|
||||
@@ -223,11 +234,17 @@ begin
|
||||
);
|
||||
|
||||
dbg_msr_out <= ctrl.msr;
|
||||
log_rd_addr <= r.log_addr_spr;
|
||||
|
||||
a_in <= r.e.write_data when EX1_BYPASS and e_in.bypass_data1 = '1' else e_in.read_data1;
|
||||
b_in <= r.e.write_data when EX1_BYPASS and e_in.bypass_data2 = '1' else e_in.read_data2;
|
||||
c_in <= r.e.write_data when EX1_BYPASS and e_in.bypass_data3 = '1' else e_in.read_data3;
|
||||
|
||||
busy_out <= l_in.busy or r.busy;
|
||||
valid_in <= e_in.valid and not busy_out;
|
||||
|
||||
terminate_out <= r.terminate;
|
||||
|
||||
execute1_0: process(clk)
|
||||
begin
|
||||
if rising_edge(clk) then
|
||||
@@ -238,7 +255,7 @@ begin
|
||||
else
|
||||
r <= rin;
|
||||
ctrl <= ctrl_tmp;
|
||||
assert not (r.lr_update = '1' and e_in.valid = '1')
|
||||
assert not (r.lr_update = '1' and valid_in = '1')
|
||||
report "LR update collision with valid in EX1"
|
||||
severity failure;
|
||||
if r.lr_update = '1' then
|
||||
@@ -274,7 +291,6 @@ begin
|
||||
variable sign1, sign2 : std_ulogic;
|
||||
variable abs1, abs2 : signed(63 downto 0);
|
||||
variable overflow : std_ulogic;
|
||||
variable negative : std_ulogic;
|
||||
variable zerohi, zerolo : std_ulogic;
|
||||
variable msb_a, msb_b : std_ulogic;
|
||||
variable a_lt : std_ulogic;
|
||||
@@ -284,11 +300,18 @@ begin
|
||||
variable exception_nextpc : std_ulogic;
|
||||
variable trapval : std_ulogic_vector(4 downto 0);
|
||||
variable illegal : std_ulogic;
|
||||
variable is_branch : std_ulogic;
|
||||
variable taken_branch : std_ulogic;
|
||||
variable abs_branch : std_ulogic;
|
||||
variable spr_val : std_ulogic_vector(63 downto 0);
|
||||
begin
|
||||
result := (others => '0');
|
||||
result_with_carry := (others => '0');
|
||||
result_en := '0';
|
||||
newcrf := (others => '0');
|
||||
is_branch := '0';
|
||||
taken_branch := '0';
|
||||
abs_branch := '0';
|
||||
|
||||
v := r;
|
||||
v.e := Execute1ToWritebackInit;
|
||||
@@ -334,32 +357,7 @@ begin
|
||||
v.div_in_progress := '0';
|
||||
v.cntz_in_progress := '0';
|
||||
|
||||
-- signals to multiply unit
|
||||
x_to_multiply <= Execute1ToMultiplyInit;
|
||||
x_to_multiply.insn_type <= e_in.insn_type;
|
||||
x_to_multiply.is_32bit <= e_in.is_32bit;
|
||||
|
||||
if e_in.is_32bit = '1' then
|
||||
if e_in.is_signed = '1' then
|
||||
x_to_multiply.data1 <= (others => a_in(31));
|
||||
x_to_multiply.data1(31 downto 0) <= a_in(31 downto 0);
|
||||
x_to_multiply.data2 <= (others => b_in(31));
|
||||
x_to_multiply.data2(31 downto 0) <= b_in(31 downto 0);
|
||||
else
|
||||
x_to_multiply.data1 <= '0' & x"00000000" & a_in(31 downto 0);
|
||||
x_to_multiply.data2 <= '0' & x"00000000" & b_in(31 downto 0);
|
||||
end if;
|
||||
else
|
||||
if e_in.is_signed = '1' then
|
||||
x_to_multiply.data1 <= a_in(63) & a_in;
|
||||
x_to_multiply.data2 <= b_in(63) & b_in;
|
||||
else
|
||||
x_to_multiply.data1 <= '0' & a_in;
|
||||
x_to_multiply.data2 <= '0' & b_in;
|
||||
end if;
|
||||
end if;
|
||||
|
||||
-- signals to divide unit
|
||||
-- signals to multiply and divide units
|
||||
sign1 := '0';
|
||||
sign2 := '0';
|
||||
if e_in.is_signed = '1' then
|
||||
@@ -383,15 +381,22 @@ begin
|
||||
abs2 := - signed(b_in);
|
||||
end if;
|
||||
|
||||
x_to_multiply <= Execute1ToMultiplyInit;
|
||||
x_to_multiply.is_32bit <= e_in.is_32bit;
|
||||
|
||||
x_to_divider <= Execute1ToDividerInit;
|
||||
x_to_divider.is_signed <= e_in.is_signed;
|
||||
x_to_divider.is_32bit <= e_in.is_32bit;
|
||||
if e_in.insn_type = OP_MOD then
|
||||
x_to_divider.is_modulus <= '1';
|
||||
end if;
|
||||
|
||||
x_to_multiply.neg_result <= sign1 xor sign2;
|
||||
x_to_divider.neg_result <= sign1 xor (sign2 and not x_to_divider.is_modulus);
|
||||
if e_in.is_32bit = '0' then
|
||||
-- 64-bit forms
|
||||
x_to_multiply.data1 <= std_ulogic_vector(abs1);
|
||||
x_to_multiply.data2 <= std_ulogic_vector(abs2);
|
||||
if e_in.insn_type = OP_DIVE then
|
||||
x_to_divider.is_extended <= '1';
|
||||
end if;
|
||||
@@ -399,6 +404,8 @@ begin
|
||||
x_to_divider.divisor <= std_ulogic_vector(abs2);
|
||||
else
|
||||
-- 32-bit forms
|
||||
x_to_multiply.data1 <= x"00000000" & std_ulogic_vector(abs1(31 downto 0));
|
||||
x_to_multiply.data2 <= x"00000000" & std_ulogic_vector(abs2(31 downto 0));
|
||||
x_to_divider.is_extended <= '0';
|
||||
if e_in.insn_type = OP_DIVE then -- extended forms
|
||||
x_to_divider.dividend <= std_ulogic_vector(abs1(31 downto 0)) & x"00000000";
|
||||
@@ -426,9 +433,9 @@ begin
|
||||
end if;
|
||||
end if;
|
||||
|
||||
terminate_out <= '0';
|
||||
v.terminate := '0';
|
||||
icache_inval <= '0';
|
||||
stall_out <= '0';
|
||||
v.busy := '0';
|
||||
f_out <= Execute1ToFetch1TypeInit;
|
||||
-- send MSR[IR] and ~MSR[PR] up to fetch1
|
||||
f_out.virt_mode <= ctrl.msr(MSR_IR);
|
||||
@@ -450,6 +457,9 @@ begin
|
||||
v.e.exc_write_enable := '0';
|
||||
v.e.exc_write_reg := fast_spr_num(SPR_SRR0);
|
||||
v.e.exc_write_data := e_in.nia;
|
||||
if valid_in = '1' then
|
||||
v.last_nia := e_in.nia;
|
||||
end if;
|
||||
|
||||
if ctrl.irq_state = WRITE_SRR1 then
|
||||
v.e.exc_write_reg := fast_spr_num(SPR_SRR1);
|
||||
@@ -466,10 +476,10 @@ begin
|
||||
f_out.virt_mode <= '0';
|
||||
f_out.priv_mode <= '1';
|
||||
f_out.redirect_nia <= ctrl.irq_nia;
|
||||
v.e.valid := e_in.valid;
|
||||
v.e.valid := '1';
|
||||
report "Writing SRR1: " & to_hstring(ctrl.srr1);
|
||||
|
||||
elsif irq_valid = '1' and e_in.valid = '1' then
|
||||
elsif irq_valid = '1' and valid_in = '1' then
|
||||
-- we need two cycles to write srr0 and 1
|
||||
-- will need more when we have to write HEIR
|
||||
-- Don't deliver the interrupt until we have a valid instruction
|
||||
@@ -477,7 +487,7 @@ begin
|
||||
exception := '1';
|
||||
ctrl_tmp.srr1 <= msr_copy(ctrl.msr);
|
||||
|
||||
elsif e_in.valid = '1' and ctrl.msr(MSR_PR) = '1' and
|
||||
elsif valid_in = '1' and ctrl.msr(MSR_PR) = '1' and
|
||||
instr_is_privileged(e_in.insn_type, e_in.insn) then
|
||||
-- generate a program interrupt
|
||||
exception := '1';
|
||||
@@ -487,12 +497,13 @@ begin
|
||||
ctrl_tmp.srr1(63 - 45) <= '1';
|
||||
report "privileged instruction";
|
||||
|
||||
elsif e_in.valid = '1' and e_in.unit = ALU then
|
||||
elsif valid_in = '1' and e_in.unit = ALU then
|
||||
|
||||
report "execute nia " & to_hstring(e_in.nia);
|
||||
|
||||
v.e.valid := '1';
|
||||
v.e.write_reg := e_in.write_reg;
|
||||
v.slow_op_insn := e_in.insn_type;
|
||||
v.slow_op_dest := gspr_to_gpr(e_in.write_reg);
|
||||
v.slow_op_rc := e_in.rc;
|
||||
v.slow_op_oe := e_in.oe;
|
||||
@@ -521,7 +532,7 @@ begin
|
||||
-- check bits 1-10 of the instruction to make sure it's attn
|
||||
-- if not then it is illegal
|
||||
if e_in.insn(10 downto 1) = "0100000000" then
|
||||
terminate_out <= '1';
|
||||
v.terminate := '1';
|
||||
report "ATTN";
|
||||
else
|
||||
illegal := '1';
|
||||
@@ -612,16 +623,13 @@ begin
|
||||
end if;
|
||||
end if;
|
||||
end if;
|
||||
when OP_AND | OP_OR | OP_XOR =>
|
||||
when OP_AND | OP_OR | OP_XOR | OP_POPCNT | OP_PRTY | OP_CMPB | OP_EXTS =>
|
||||
result := logical_result;
|
||||
result_en := '1';
|
||||
when OP_B =>
|
||||
f_out.redirect <= '1';
|
||||
if (insn_aa(e_in.insn)) then
|
||||
f_out.redirect_nia <= b_in;
|
||||
else
|
||||
f_out.redirect_nia <= std_ulogic_vector(signed(e_in.nia) + signed(b_in));
|
||||
end if;
|
||||
is_branch := '1';
|
||||
taken_branch := '1';
|
||||
abs_branch := insn_aa(e_in.insn);
|
||||
when OP_BC =>
|
||||
-- read_data1 is CTR
|
||||
bo := insn_bo(e_in.insn);
|
||||
@@ -631,14 +639,9 @@ begin
|
||||
result_en := '1';
|
||||
v.e.write_reg := fast_spr_num(SPR_CTR);
|
||||
end if;
|
||||
if ppc_bc_taken(bo, bi, e_in.cr, a_in) = 1 then
|
||||
f_out.redirect <= '1';
|
||||
if (insn_aa(e_in.insn)) then
|
||||
f_out.redirect_nia <= b_in;
|
||||
else
|
||||
f_out.redirect_nia <= std_ulogic_vector(signed(e_in.nia) + signed(b_in));
|
||||
end if;
|
||||
end if;
|
||||
is_branch := '1';
|
||||
taken_branch := ppc_bc_taken(bo, bi, e_in.cr, a_in);
|
||||
abs_branch := insn_aa(e_in.insn);
|
||||
when OP_BCREG =>
|
||||
-- read_data1 is CTR
|
||||
-- read_data2 is target register (CTR, LR or TAR)
|
||||
@@ -649,7 +652,7 @@ begin
|
||||
result_en := '1';
|
||||
v.e.write_reg := fast_spr_num(SPR_CTR);
|
||||
end if;
|
||||
if ppc_bc_taken(bo, bi, e_in.cr, a_in) = 1 then
|
||||
if ppc_bc_taken(bo, bi, e_in.cr, a_in) = '1' then
|
||||
f_out.redirect <= '1';
|
||||
f_out.redirect_nia <= b_in(63 downto 2) & "00";
|
||||
end if;
|
||||
@@ -670,27 +673,10 @@ begin
|
||||
ctrl_tmp.msr(MSR_DR) <= '1';
|
||||
end if;
|
||||
|
||||
when OP_CMPB =>
|
||||
result := ppc_cmpb(c_in, b_in);
|
||||
result_en := '1';
|
||||
when OP_CNTZ =>
|
||||
v.e.valid := '0';
|
||||
v.cntz_in_progress := '1';
|
||||
stall_out <= '1';
|
||||
when OP_EXTS =>
|
||||
-- note data_len is a 1-hot encoding
|
||||
negative := (e_in.data_len(0) and c_in(7)) or
|
||||
(e_in.data_len(1) and c_in(15)) or
|
||||
(e_in.data_len(2) and c_in(31));
|
||||
result := (others => negative);
|
||||
if e_in.data_len(2) = '1' then
|
||||
result(31 downto 16) := c_in(31 downto 16);
|
||||
end if;
|
||||
if e_in.data_len(2) = '1' or e_in.data_len(1) = '1' then
|
||||
result(15 downto 8) := c_in(15 downto 8);
|
||||
end if;
|
||||
result(7 downto 0) := c_in(7 downto 0);
|
||||
result_en := '1';
|
||||
v.busy := '1';
|
||||
when OP_ISEL =>
|
||||
crbit := to_integer(unsigned(insn_bc(e_in.insn)));
|
||||
if e_in.cr(31-crbit) = '1' then
|
||||
@@ -762,19 +748,25 @@ begin
|
||||
result(63-45) := v.e.xerc.ca32;
|
||||
end if;
|
||||
else
|
||||
spr_val := c_in;
|
||||
case decode_spr_num(e_in.insn) is
|
||||
when SPR_TB =>
|
||||
result := ctrl.tb;
|
||||
spr_val := ctrl.tb;
|
||||
when SPR_DEC =>
|
||||
result := ctrl.dec;
|
||||
spr_val := ctrl.dec;
|
||||
when 724 => -- LOG_ADDR SPR
|
||||
spr_val := log_wr_addr & r.log_addr_spr;
|
||||
when 725 => -- LOG_DATA SPR
|
||||
spr_val := log_rd_data;
|
||||
v.log_addr_spr := std_ulogic_vector(unsigned(r.log_addr_spr) + 1);
|
||||
when others =>
|
||||
-- mfspr from unimplemented SPRs should be a nop in
|
||||
-- supervisor mode and a program interrupt for user mode
|
||||
result := c_in;
|
||||
if ctrl.msr(MSR_PR) = '1' then
|
||||
illegal := '1';
|
||||
end if;
|
||||
end case;
|
||||
result := spr_val;
|
||||
end if;
|
||||
when OP_MFCR =>
|
||||
if e_in.insn(20) = '0' then
|
||||
@@ -840,6 +832,8 @@ begin
|
||||
case decode_spr_num(e_in.insn) is
|
||||
when SPR_DEC =>
|
||||
ctrl_tmp.dec <= c_in;
|
||||
when 724 => -- LOG_ADDR SPR
|
||||
v.log_addr_spr := c_in(31 downto 0);
|
||||
when others =>
|
||||
-- mtspr to unimplemented SPRs should be a nop in
|
||||
-- supervisor mode and a program interrupt for user mode
|
||||
@@ -848,12 +842,6 @@ begin
|
||||
end if;
|
||||
end case;
|
||||
end if;
|
||||
when OP_POPCNT =>
|
||||
result := popcnt_result;
|
||||
result_en := '1';
|
||||
when OP_PRTY =>
|
||||
result := parity_result;
|
||||
result_en := '1';
|
||||
when OP_RLC | OP_RLCL | OP_RLCR | OP_SHL | OP_SHR | OP_EXTSWSLI =>
|
||||
result := rotator_result;
|
||||
if e_in.output_carry = '1' then
|
||||
@@ -871,53 +859,65 @@ begin
|
||||
when OP_MUL_L64 | OP_MUL_H64 | OP_MUL_H32 =>
|
||||
v.e.valid := '0';
|
||||
v.mul_in_progress := '1';
|
||||
stall_out <= '1';
|
||||
v.busy := '1';
|
||||
x_to_multiply.valid <= '1';
|
||||
|
||||
when OP_DIV | OP_DIVE | OP_MOD =>
|
||||
v.e.valid := '0';
|
||||
v.div_in_progress := '1';
|
||||
stall_out <= '1';
|
||||
v.busy := '1';
|
||||
x_to_divider.valid <= '1';
|
||||
|
||||
when others =>
|
||||
terminate_out <= '1';
|
||||
v.terminate := '1';
|
||||
report "illegal";
|
||||
end case;
|
||||
|
||||
v.e.rc := e_in.rc and e_in.valid;
|
||||
v.e.rc := e_in.rc and valid_in;
|
||||
|
||||
-- Mispredicted branches cause a redirect
|
||||
if is_branch = '1' and taken_branch /= e_in.br_pred then
|
||||
f_out.redirect <= '1';
|
||||
if taken_branch = '1' then
|
||||
if abs_branch = '1' then
|
||||
f_out.redirect_nia <= b_in;
|
||||
else
|
||||
f_out.redirect_nia <= std_ulogic_vector(signed(e_in.nia) + signed(b_in));
|
||||
end if;
|
||||
else
|
||||
f_out.redirect_nia <= next_nia;
|
||||
end if;
|
||||
end if;
|
||||
|
||||
-- Update LR on the next cycle after a branch link
|
||||
--
|
||||
-- WARNING: The LR update isn't tracked by our hazard tracker. This
|
||||
-- will work (well I hope) because it only happens on branches
|
||||
-- which will flush all decoded instructions. By the time
|
||||
-- fetch catches up, we'll have the new LR. This will
|
||||
-- *not* work properly however if we have a branch predictor,
|
||||
-- in which case the solution would probably be to keep a
|
||||
-- local cache of the updated LR in execute1 (flushed on
|
||||
-- exceptions) that is used instead of the value from
|
||||
-- decode when its content is valid.
|
||||
-- If we're not writing back anything else, we can write back LR
|
||||
-- this cycle, otherwise we take an extra cycle. We use the
|
||||
-- exc_write path since next_nia is written through that path
|
||||
-- in other places.
|
||||
if e_in.lr = '1' then
|
||||
v.lr_update := '1';
|
||||
v.next_lr := next_nia;
|
||||
v.e.valid := '0';
|
||||
report "Delayed LR update to " & to_hstring(next_nia);
|
||||
stall_out <= '1';
|
||||
if result_en = '0' then
|
||||
v.e.exc_write_enable := '1';
|
||||
v.e.exc_write_data := next_nia;
|
||||
v.e.exc_write_reg := fast_spr_num(SPR_LR);
|
||||
else
|
||||
v.lr_update := '1';
|
||||
v.next_lr := next_nia;
|
||||
v.e.valid := '0';
|
||||
report "Delayed LR update to " & to_hstring(next_nia);
|
||||
v.busy := '1';
|
||||
end if;
|
||||
end if;
|
||||
|
||||
elsif e_in.valid = '1' then
|
||||
elsif valid_in = '1' then
|
||||
-- instruction for other units, i.e. LDST
|
||||
v.ldst_nia := e_in.nia;
|
||||
v.e.valid := '0';
|
||||
if e_in.unit = LDST then
|
||||
lv.valid := '1';
|
||||
end if;
|
||||
|
||||
elsif r.lr_update = '1' then
|
||||
result_en := '1';
|
||||
result := r.next_lr;
|
||||
v.e.write_reg := fast_spr_num(SPR_LR);
|
||||
v.e.exc_write_enable := '1';
|
||||
v.e.exc_write_data := r.next_lr;
|
||||
v.e.exc_write_reg := fast_spr_num(SPR_LR);
|
||||
v.e.valid := '1';
|
||||
elsif r.cntz_in_progress = '1' then
|
||||
-- cnt[lt]z always takes two cycles
|
||||
@@ -931,8 +931,18 @@ begin
|
||||
if (r.mul_in_progress = '1' and multiply_to_x.valid = '1') or
|
||||
(r.div_in_progress = '1' and divider_to_x.valid = '1') then
|
||||
if r.mul_in_progress = '1' then
|
||||
result := multiply_to_x.write_reg_data;
|
||||
overflow := multiply_to_x.overflow;
|
||||
overflow := '0';
|
||||
case r.slow_op_insn is
|
||||
when OP_MUL_H32 =>
|
||||
result := multiply_to_x.result(63 downto 32) &
|
||||
multiply_to_x.result(63 downto 32);
|
||||
when OP_MUL_H64 =>
|
||||
result := multiply_to_x.result(127 downto 64);
|
||||
when others =>
|
||||
-- i.e. OP_MUL_L64
|
||||
result := multiply_to_x.result(63 downto 0);
|
||||
overflow := multiply_to_x.overflow;
|
||||
end case;
|
||||
else
|
||||
result := divider_to_x.write_reg_data;
|
||||
overflow := divider_to_x.overflow;
|
||||
@@ -952,7 +962,7 @@ begin
|
||||
end if;
|
||||
v.e.valid := '1';
|
||||
else
|
||||
stall_out <= '1';
|
||||
v.busy := '1';
|
||||
v.mul_in_progress := r.mul_in_progress;
|
||||
v.div_in_progress := r.div_in_progress;
|
||||
end if;
|
||||
@@ -973,7 +983,8 @@ begin
|
||||
v.e.exc_write_data := next_nia;
|
||||
end if;
|
||||
ctrl_tmp.irq_state <= WRITE_SRR1;
|
||||
v.e.valid := '1';
|
||||
v.busy := '1';
|
||||
v.e.valid := '0';
|
||||
end if;
|
||||
|
||||
v.e.write_data := result;
|
||||
@@ -1002,10 +1013,9 @@ begin
|
||||
end if;
|
||||
v.e.exc_write_enable := '1';
|
||||
v.e.exc_write_reg := fast_spr_num(SPR_SRR0);
|
||||
v.e.exc_write_data := r.ldst_nia;
|
||||
report "ldst exception writing srr0=" & to_hstring(r.ldst_nia);
|
||||
v.e.exc_write_data := r.last_nia;
|
||||
report "ldst exception writing srr0=" & to_hstring(r.last_nia);
|
||||
ctrl_tmp.irq_state <= WRITE_SRR1;
|
||||
v.e.valid := '1'; -- complete the original load or store
|
||||
end if;
|
||||
|
||||
-- Outputs to loadstore1 (async)
|
||||
@@ -1040,5 +1050,26 @@ begin
|
||||
l_out <= lv;
|
||||
e_out <= r.e;
|
||||
flush_out <= f_out.redirect;
|
||||
|
||||
exception_log <= exception;
|
||||
irq_valid_log <= irq_valid;
|
||||
end process;
|
||||
|
||||
ex1_log : process(clk)
|
||||
begin
|
||||
if rising_edge(clk) then
|
||||
log_data <= ctrl.msr(MSR_EE) & ctrl.msr(MSR_PR) &
|
||||
ctrl.msr(MSR_IR) & ctrl.msr(MSR_DR) &
|
||||
exception_log &
|
||||
irq_valid_log &
|
||||
std_ulogic_vector(to_unsigned(irq_state_t'pos(ctrl.irq_state), 1)) &
|
||||
"000" &
|
||||
r.e.write_enable &
|
||||
r.e.valid &
|
||||
f_out.redirect &
|
||||
r.busy &
|
||||
flush_out;
|
||||
end if;
|
||||
end process;
|
||||
log_out <= log_data;
|
||||
end architecture behaviour;
|
||||
|
||||
17
fetch1.vhdl
17
fetch1.vhdl
@@ -23,8 +23,14 @@ entity fetch1 is
|
||||
-- redirect from execution unit
|
||||
e_in : in Execute1ToFetch1Type;
|
||||
|
||||
-- redirect from decode1
|
||||
d_in : in Decode1ToFetch1Type;
|
||||
|
||||
-- Request to icache
|
||||
i_out : out Fetch1ToIcacheType
|
||||
i_out : out Fetch1ToIcacheType;
|
||||
|
||||
-- outputs to logger
|
||||
log_out : out std_ulogic_vector(42 downto 0)
|
||||
);
|
||||
end entity fetch1;
|
||||
|
||||
@@ -35,16 +41,18 @@ architecture behaviour of fetch1 is
|
||||
end record;
|
||||
signal r, r_next : Fetch1ToIcacheType;
|
||||
signal r_int, r_next_int : reg_internal_t;
|
||||
signal log_nia : std_ulogic_vector(42 downto 0);
|
||||
begin
|
||||
|
||||
regs : process(clk)
|
||||
begin
|
||||
if rising_edge(clk) then
|
||||
log_nia <= r.nia(63) & r.nia(43 downto 2);
|
||||
if r /= r_next then
|
||||
report "fetch1 rst:" & std_ulogic'image(rst) &
|
||||
" IR:" & std_ulogic'image(e_in.virt_mode) &
|
||||
" P:" & std_ulogic'image(e_in.priv_mode) &
|
||||
" R:" & std_ulogic'image(e_in.redirect) &
|
||||
" R:" & std_ulogic'image(e_in.redirect) & std_ulogic'image(d_in.redirect) &
|
||||
" S:" & std_ulogic'image(stall_in) &
|
||||
" T:" & std_ulogic'image(stop_in) &
|
||||
" nia:" & to_hstring(r_next.nia) &
|
||||
@@ -54,6 +62,7 @@ begin
|
||||
r_int <= r_next_int;
|
||||
end if;
|
||||
end process;
|
||||
log_out <= log_nia;
|
||||
|
||||
comb : process(all)
|
||||
variable v : Fetch1ToIcacheType;
|
||||
@@ -62,6 +71,7 @@ begin
|
||||
begin
|
||||
v := r;
|
||||
v_int := r_int;
|
||||
v.sequential := '0';
|
||||
|
||||
if rst = '1' then
|
||||
if alt_reset_in = '1' then
|
||||
@@ -76,6 +86,8 @@ begin
|
||||
v.nia := e_in.redirect_nia;
|
||||
v.virt_mode := e_in.virt_mode;
|
||||
v.priv_mode := e_in.priv_mode;
|
||||
elsif d_in.redirect = '1' then
|
||||
v.nia := d_in.redirect_nia;
|
||||
elsif stall_in = '0' then
|
||||
|
||||
-- For debug stop/step to work properly we need a little bit of
|
||||
@@ -122,6 +134,7 @@ begin
|
||||
|
||||
if increment then
|
||||
v.nia := std_logic_vector(unsigned(v.nia) + 4);
|
||||
v.sequential := '1';
|
||||
end if;
|
||||
end if;
|
||||
|
||||
|
||||
123
fetch2.vhdl
123
fetch2.vhdl
@@ -1,123 +0,0 @@
|
||||
library ieee;
|
||||
use ieee.std_logic_1164.all;
|
||||
use ieee.numeric_std.all;
|
||||
|
||||
library work;
|
||||
use work.common.all;
|
||||
use work.wishbone_types.all;
|
||||
|
||||
entity fetch2 is
|
||||
port(
|
||||
clk : in std_ulogic;
|
||||
rst : in std_ulogic;
|
||||
|
||||
stall_in : in std_ulogic;
|
||||
flush_in : in std_ulogic;
|
||||
|
||||
-- Results from icache
|
||||
i_in : in IcacheToFetch2Type;
|
||||
|
||||
-- Output to decode
|
||||
f_out : out Fetch2ToDecode1Type
|
||||
);
|
||||
end entity fetch2;
|
||||
|
||||
architecture behaviour of fetch2 is
|
||||
|
||||
-- The icache cannot stall, so we need to stash a cycle
|
||||
-- of output from it when we stall.
|
||||
type reg_internal_type is record
|
||||
stash : IcacheToFetch2Type;
|
||||
stash_valid : std_ulogic;
|
||||
stopped : std_ulogic;
|
||||
end record;
|
||||
|
||||
signal r_int, rin_int : reg_internal_type;
|
||||
signal r, rin : Fetch2ToDecode1Type;
|
||||
|
||||
begin
|
||||
regs : process(clk)
|
||||
begin
|
||||
if rising_edge(clk) then
|
||||
|
||||
if (r /= rin) then
|
||||
report "fetch2 rst:" & std_ulogic'image(rst) &
|
||||
" S:" & std_ulogic'image(stall_in) &
|
||||
" F:" & std_ulogic'image(flush_in) &
|
||||
" T:" & std_ulogic'image(rin.stop_mark) &
|
||||
" V:" & std_ulogic'image(rin.valid) &
|
||||
" FF:" & std_ulogic'image(rin.fetch_failed) &
|
||||
" nia:" & to_hstring(rin.nia);
|
||||
end if;
|
||||
|
||||
-- Output state remains unchanged on stall, unless we are flushing
|
||||
if rst = '1' or flush_in = '1' or stall_in = '0' then
|
||||
r <= rin;
|
||||
end if;
|
||||
|
||||
-- Internal state is updated on every clock
|
||||
r_int <= rin_int;
|
||||
end if;
|
||||
end process;
|
||||
|
||||
comb : process(all)
|
||||
variable v : Fetch2ToDecode1Type;
|
||||
variable v_int : reg_internal_type;
|
||||
variable v_i_in : IcacheToFetch2Type;
|
||||
begin
|
||||
v := r;
|
||||
v_int := r_int;
|
||||
|
||||
-- If stalling, stash away the current input from the icache
|
||||
if stall_in = '1' and v_int.stash_valid = '0' then
|
||||
v_int.stash := i_in;
|
||||
v_int.stash_valid := '1';
|
||||
end if;
|
||||
|
||||
-- If unstalling, source input from the stash and invalidate it,
|
||||
-- otherwise source normally from the icache.
|
||||
--
|
||||
v_i_in := i_in;
|
||||
if v_int.stash_valid = '1' and stall_in = '0' then
|
||||
v_i_in := v_int.stash;
|
||||
v_int.stash_valid := '0';
|
||||
end if;
|
||||
|
||||
v.valid := v_i_in.valid;
|
||||
v.stop_mark := v_i_in.stop_mark;
|
||||
v.fetch_failed := v_i_in.fetch_failed;
|
||||
v.nia := v_i_in.nia;
|
||||
v.insn := v_i_in.insn;
|
||||
|
||||
-- Clear stash internal valid bit on flush. We still mark
|
||||
-- the stash itself as valid since we still want to override
|
||||
-- whatever comes form icache when unstalling, but we'll
|
||||
-- override it with something invalid.
|
||||
--
|
||||
if flush_in = '1' then
|
||||
v_int.stash.valid := '0';
|
||||
v_int.stash.fetch_failed := '0';
|
||||
end if;
|
||||
|
||||
-- If we are flushing or the instruction comes with a stop mark
|
||||
-- we tag it as invalid so it doesn't get decoded and executed
|
||||
if flush_in = '1' or v.stop_mark = '1' then
|
||||
v.valid := '0';
|
||||
v.fetch_failed := '0';
|
||||
end if;
|
||||
|
||||
-- Clear stash on reset
|
||||
if rst = '1' then
|
||||
v_int.stash_valid := '0';
|
||||
v.valid := '0';
|
||||
end if;
|
||||
|
||||
-- Update registers
|
||||
rin <= v;
|
||||
rin_int <= v_int;
|
||||
|
||||
-- Update outputs
|
||||
f_out <= r;
|
||||
end process;
|
||||
|
||||
end architecture behaviour;
|
||||
@@ -20,7 +20,8 @@ entity toplevel is
|
||||
SCLK_STARTUPE2 : boolean := false;
|
||||
SPI_FLASH_OFFSET : integer := 4194304;
|
||||
SPI_FLASH_DEF_CKDV : natural := 1;
|
||||
SPI_FLASH_DEF_QUAD : boolean := true
|
||||
SPI_FLASH_DEF_QUAD : boolean := true;
|
||||
LOG_LENGTH : natural := 512
|
||||
);
|
||||
port(
|
||||
ext_clk : in std_ulogic;
|
||||
@@ -140,7 +141,8 @@ begin
|
||||
SPI_FLASH_DLINES => 4,
|
||||
SPI_FLASH_OFFSET => SPI_FLASH_OFFSET,
|
||||
SPI_FLASH_DEF_CKDV => SPI_FLASH_DEF_CKDV,
|
||||
SPI_FLASH_DEF_QUAD => SPI_FLASH_DEF_QUAD
|
||||
SPI_FLASH_DEF_QUAD => SPI_FLASH_DEF_QUAD,
|
||||
LOG_LENGTH => LOG_LENGTH
|
||||
)
|
||||
port map (
|
||||
-- System signals
|
||||
|
||||
@@ -4,11 +4,15 @@ use ieee.numeric_std.all;
|
||||
|
||||
entity gpr_hazard is
|
||||
generic (
|
||||
PIPELINE_DEPTH : natural := 2
|
||||
PIPELINE_DEPTH : natural := 1
|
||||
);
|
||||
port(
|
||||
clk : in std_ulogic;
|
||||
stall_in : in std_ulogic;
|
||||
busy_in : in std_ulogic;
|
||||
deferred : in std_ulogic;
|
||||
complete_in : in std_ulogic;
|
||||
flush_in : in std_ulogic;
|
||||
issuing : in std_ulogic;
|
||||
|
||||
gpr_write_valid_in : in std_ulogic;
|
||||
gpr_write_in : in std_ulogic_vector(5 downto 0);
|
||||
@@ -16,6 +20,9 @@ entity gpr_hazard is
|
||||
gpr_read_valid_in : in std_ulogic;
|
||||
gpr_read_in : in std_ulogic_vector(5 downto 0);
|
||||
|
||||
ugpr_write_valid : in std_ulogic;
|
||||
ugpr_write_reg : in std_ulogic_vector(5 downto 0);
|
||||
|
||||
stall_out : out std_ulogic;
|
||||
use_bypass : out std_ulogic
|
||||
);
|
||||
@@ -25,10 +32,13 @@ architecture behaviour of gpr_hazard is
|
||||
valid : std_ulogic;
|
||||
bypass : std_ulogic;
|
||||
gpr : std_ulogic_vector(5 downto 0);
|
||||
ugpr_valid : std_ulogic;
|
||||
ugpr : std_ulogic_vector(5 downto 0);
|
||||
end record;
|
||||
constant pipeline_entry_init : pipeline_entry_type := (valid => '0', bypass => '0', gpr => (others => '0'));
|
||||
constant pipeline_entry_init : pipeline_entry_type := (valid => '0', bypass => '0', gpr => (others => '0'),
|
||||
ugpr_valid => '0', ugpr => (others => '0'));
|
||||
|
||||
type pipeline_t is array(0 to PIPELINE_DEPTH-1) of pipeline_entry_type;
|
||||
type pipeline_t is array(0 to PIPELINE_DEPTH) of pipeline_entry_type;
|
||||
constant pipeline_t_init : pipeline_t := (others => pipeline_entry_init);
|
||||
|
||||
signal r, rin : pipeline_t := pipeline_t_init;
|
||||
@@ -45,50 +55,46 @@ begin
|
||||
begin
|
||||
v := r;
|
||||
|
||||
if complete_in = '1' then
|
||||
v(PIPELINE_DEPTH).valid := '0';
|
||||
v(PIPELINE_DEPTH).ugpr_valid := '0';
|
||||
end if;
|
||||
|
||||
stall_out <= '0';
|
||||
use_bypass <= '0';
|
||||
if gpr_read_valid_in = '1' then
|
||||
if r(0).valid = '1' and r(0).gpr = gpr_read_in then
|
||||
if r(0).bypass = '1' and stall_in = '0' then
|
||||
use_bypass <= '1';
|
||||
else
|
||||
stall_out <= '1';
|
||||
end if;
|
||||
end if;
|
||||
loop_0: for i in 1 to PIPELINE_DEPTH-1 loop
|
||||
if r(i).valid = '1' and r(i).gpr = gpr_read_in then
|
||||
loop_0: for i in 0 to PIPELINE_DEPTH loop
|
||||
if v(i).valid = '1' and r(i).gpr = gpr_read_in then
|
||||
if r(i).bypass = '1' then
|
||||
use_bypass <= '1';
|
||||
else
|
||||
stall_out <= '1';
|
||||
end if;
|
||||
end if;
|
||||
if v(i).ugpr_valid = '1' and r(i).ugpr = gpr_read_in then
|
||||
stall_out <= '1';
|
||||
end if;
|
||||
end loop;
|
||||
end if;
|
||||
|
||||
if stall_in = '0' then
|
||||
-- XXX assumes PIPELINE_DEPTH = 1
|
||||
if busy_in = '0' then
|
||||
v(1) := v(0);
|
||||
v(0).valid := '0';
|
||||
v(0).ugpr_valid := '0';
|
||||
end if;
|
||||
if deferred = '0' and issuing = '1' then
|
||||
v(0).valid := gpr_write_valid_in;
|
||||
v(0).bypass := bypass_avail;
|
||||
v(0).gpr := gpr_write_in;
|
||||
loop_1: for i in 1 to PIPELINE_DEPTH-1 loop
|
||||
-- propagate to next slot
|
||||
v(i).valid := r(i-1).valid;
|
||||
v(i).bypass := r(i-1).bypass;
|
||||
v(i).gpr := r(i-1).gpr;
|
||||
end loop;
|
||||
|
||||
else
|
||||
-- stage 0 stalled, so stage 1 becomes empty
|
||||
loop_1b: for i in 1 to PIPELINE_DEPTH-1 loop
|
||||
-- propagate to next slot
|
||||
if i = 1 then
|
||||
v(i).valid := '0';
|
||||
else
|
||||
v(i).valid := r(i-1).valid;
|
||||
v(i).bypass := r(i-1).bypass;
|
||||
v(i).gpr := r(i-1).gpr;
|
||||
end if;
|
||||
end loop;
|
||||
v(0).ugpr_valid := ugpr_write_valid;
|
||||
v(0).ugpr := ugpr_write_reg;
|
||||
end if;
|
||||
if flush_in = '1' then
|
||||
v(0).valid := '0';
|
||||
v(0).ugpr_valid := '0';
|
||||
v(1).valid := '0';
|
||||
v(1).ugpr_valid := '0';
|
||||
end if;
|
||||
|
||||
-- update registers
|
||||
|
||||
151
icache.vhdl
151
icache.vhdl
@@ -48,16 +48,19 @@ entity icache is
|
||||
rst : in std_ulogic;
|
||||
|
||||
i_in : in Fetch1ToIcacheType;
|
||||
i_out : out IcacheToFetch2Type;
|
||||
i_out : out IcacheToDecode1Type;
|
||||
|
||||
m_in : in MmuToIcacheType;
|
||||
|
||||
stall_in : in std_ulogic;
|
||||
stall_out : out std_ulogic;
|
||||
flush_in : in std_ulogic;
|
||||
inval_in : in std_ulogic;
|
||||
|
||||
wishbone_out : out wishbone_master_out;
|
||||
wishbone_in : in wishbone_slave_out
|
||||
wishbone_in : in wishbone_slave_out;
|
||||
|
||||
log_out : out std_ulogic_vector(53 downto 0)
|
||||
);
|
||||
end entity icache;
|
||||
|
||||
@@ -112,6 +115,7 @@ architecture rtl of icache is
|
||||
subtype row_t is integer range 0 to BRAM_ROWS-1;
|
||||
subtype index_t is integer range 0 to NUM_LINES-1;
|
||||
subtype way_t is integer range 0 to NUM_WAYS-1;
|
||||
subtype row_in_line_t is unsigned(ROW_LINEBITS-1 downto 0);
|
||||
|
||||
-- The cache data BRAM organized as described above for each way
|
||||
subtype cache_row_t is std_ulogic_vector(wishbone_data_bits-1 downto 0);
|
||||
@@ -129,6 +133,7 @@ architecture rtl of icache is
|
||||
-- The cache valid bits
|
||||
subtype cache_way_valids_t is std_ulogic_vector(NUM_WAYS-1 downto 0);
|
||||
type cache_valids_t is array(index_t) of cache_way_valids_t;
|
||||
type row_per_line_valid_t is array(0 to ROW_PER_LINE - 1) of std_ulogic;
|
||||
|
||||
-- Storage. Hopefully "cache_rows" is a BRAM, the rest is LUTs
|
||||
signal cache_tags : cache_tags_array_t;
|
||||
@@ -176,6 +181,8 @@ architecture rtl of icache is
|
||||
store_row : row_t;
|
||||
store_tag : cache_tag_t;
|
||||
store_valid : std_ulogic;
|
||||
end_row_ix : row_in_line_t;
|
||||
rows_valid : row_per_line_valid_t;
|
||||
|
||||
-- TLB miss state
|
||||
fetch_failed : std_ulogic;
|
||||
@@ -197,6 +204,10 @@ architecture rtl of icache is
|
||||
signal ra_valid : std_ulogic;
|
||||
signal priv_fault : std_ulogic;
|
||||
signal access_ok : std_ulogic;
|
||||
signal use_previous : std_ulogic;
|
||||
|
||||
-- Output data to logger
|
||||
signal log_data : std_ulogic_vector(53 downto 0);
|
||||
|
||||
-- Cache RAM interface
|
||||
type cache_ram_out_t is array(way_t) of cache_row_t;
|
||||
@@ -219,20 +230,24 @@ architecture rtl of icache is
|
||||
return to_integer(unsigned(addr(SET_SIZE_BITS - 1 downto ROW_OFF_BITS)));
|
||||
end;
|
||||
|
||||
-- Returns whether this is the last row of a line
|
||||
function is_last_row_addr(addr: wishbone_addr_type) return boolean is
|
||||
constant ones : std_ulogic_vector(ROW_LINEBITS-1 downto 0) := (others => '1');
|
||||
-- Return the index of a row within a line
|
||||
function get_row_of_line(row: row_t) return row_in_line_t is
|
||||
variable row_v : unsigned(ROW_BITS-1 downto 0);
|
||||
begin
|
||||
return addr(LINE_OFF_BITS-1 downto ROW_OFF_BITS) = ones;
|
||||
row_v := to_unsigned(row, ROW_BITS);
|
||||
return row_v(ROW_LINEBITS-1 downto 0);
|
||||
end;
|
||||
|
||||
-- Returns whether this is the last row of a line
|
||||
function is_last_row(row: row_t) return boolean is
|
||||
variable row_v : std_ulogic_vector(ROW_BITS-1 downto 0);
|
||||
constant ones : std_ulogic_vector(ROW_LINEBITS-1 downto 0) := (others => '1');
|
||||
function is_last_row_addr(addr: wishbone_addr_type; last: row_in_line_t) return boolean is
|
||||
begin
|
||||
row_v := std_ulogic_vector(to_unsigned(row, ROW_BITS));
|
||||
return row_v(ROW_LINEBITS-1 downto 0) = ones;
|
||||
return unsigned(addr(LINE_OFF_BITS-1 downto ROW_OFF_BITS)) = last;
|
||||
end;
|
||||
|
||||
-- Returns whether this is the last row of a line
|
||||
function is_last_row(row: row_t; last: row_in_line_t) return boolean is
|
||||
begin
|
||||
return get_row_of_line(row) = last;
|
||||
end;
|
||||
|
||||
-- Return the address of the next row in the current cache line
|
||||
@@ -361,7 +376,7 @@ begin
|
||||
);
|
||||
process(all)
|
||||
begin
|
||||
do_read <= '1';
|
||||
do_read <= not (stall_in or use_previous);
|
||||
do_write <= '0';
|
||||
if wishbone_in.ack = '1' and r.store_way = i then
|
||||
do_write <= '1';
|
||||
@@ -466,23 +481,38 @@ begin
|
||||
variable is_hit : std_ulogic;
|
||||
variable hit_way : way_t;
|
||||
begin
|
||||
-- i_in.sequential means that i_in.nia this cycle is 4 more than
|
||||
-- last cycle. If we read more than 32 bits at a time, had a cache hit
|
||||
-- last cycle, and we don't want the first 32-bit chunk, then we can
|
||||
-- keep the data we read last cycle and just use that.
|
||||
if unsigned(i_in.nia(INSN_BITS+2-1 downto 2)) /= 0 then
|
||||
use_previous <= i_in.sequential and r.hit_valid;
|
||||
else
|
||||
use_previous <= '0';
|
||||
end if;
|
||||
|
||||
-- Extract line, row and tag from request
|
||||
req_index <= get_index(i_in.nia);
|
||||
req_row <= get_row(i_in.nia);
|
||||
req_tag <= get_tag(real_addr);
|
||||
|
||||
-- Calculate address of beginning of cache line, will be
|
||||
-- Calculate address of beginning of cache row, will be
|
||||
-- used for cache miss processing if needed
|
||||
--
|
||||
req_laddr <= (63 downto REAL_ADDR_BITS => '0') &
|
||||
real_addr(REAL_ADDR_BITS - 1 downto LINE_OFF_BITS) &
|
||||
(LINE_OFF_BITS-1 downto 0 => '0');
|
||||
real_addr(REAL_ADDR_BITS - 1 downto ROW_OFF_BITS) &
|
||||
(ROW_OFF_BITS-1 downto 0 => '0');
|
||||
|
||||
-- Test if pending request is a hit on any way
|
||||
hit_way := 0;
|
||||
is_hit := '0';
|
||||
for i in way_t loop
|
||||
if i_in.req = '1' and cache_valids(req_index)(i) = '1' then
|
||||
if i_in.req = '1' and
|
||||
(cache_valids(req_index)(i) = '1' or
|
||||
(r.state = WAIT_ACK and
|
||||
req_index = r.store_index and
|
||||
i = r.store_way and
|
||||
r.rows_valid(req_row mod ROW_PER_LINE) = '1')) then
|
||||
if read_tag(i, cache_tags(req_index)) = req_tag then
|
||||
hit_way := i;
|
||||
is_hit := '1';
|
||||
@@ -528,25 +558,35 @@ begin
|
||||
icache_hit : process(clk)
|
||||
begin
|
||||
if rising_edge(clk) then
|
||||
-- On a hit, latch the request for the next cycle, when the BRAM data
|
||||
-- will be available on the cache_out output of the corresponding way
|
||||
--
|
||||
r.hit_valid <= req_is_hit;
|
||||
-- Send stop marks and NIA down regardless of validity
|
||||
r.hit_smark <= i_in.stop_mark;
|
||||
r.hit_nia <= i_in.nia;
|
||||
if req_is_hit = '1' then
|
||||
r.hit_way <= req_hit_way;
|
||||
r.hit_smark <= i_in.stop_mark;
|
||||
-- keep outputs to fetch2 unchanged on a stall
|
||||
-- except that flush or reset sets valid to 0
|
||||
-- If use_previous, keep the same data as last cycle and use the second half
|
||||
if stall_in = '1' or use_previous = '1' then
|
||||
if rst = '1' or flush_in = '1' then
|
||||
r.hit_valid <= '0';
|
||||
end if;
|
||||
else
|
||||
-- On a hit, latch the request for the next cycle, when the BRAM data
|
||||
-- will be available on the cache_out output of the corresponding way
|
||||
--
|
||||
r.hit_valid <= req_is_hit;
|
||||
if req_is_hit = '1' then
|
||||
r.hit_way <= req_hit_way;
|
||||
|
||||
report "cache hit nia:" & to_hstring(i_in.nia) &
|
||||
" IR:" & std_ulogic'image(i_in.virt_mode) &
|
||||
" SM:" & std_ulogic'image(i_in.stop_mark) &
|
||||
" idx:" & integer'image(req_index) &
|
||||
" tag:" & to_hstring(req_tag) &
|
||||
" way:" & integer'image(req_hit_way) &
|
||||
" RA:" & to_hstring(real_addr);
|
||||
report "cache hit nia:" & to_hstring(i_in.nia) &
|
||||
" IR:" & std_ulogic'image(i_in.virt_mode) &
|
||||
" SM:" & std_ulogic'image(i_in.stop_mark) &
|
||||
" idx:" & integer'image(req_index) &
|
||||
" tag:" & to_hstring(req_tag) &
|
||||
" way:" & integer'image(req_hit_way) &
|
||||
" RA:" & to_hstring(real_addr);
|
||||
end if;
|
||||
end if;
|
||||
if stall_in = '0' then
|
||||
-- Send stop marks and NIA down regardless of validity
|
||||
r.hit_smark <= i_in.stop_mark;
|
||||
r.hit_nia <= i_in.nia;
|
||||
end if;
|
||||
end if;
|
||||
end process;
|
||||
|
||||
@@ -584,6 +624,11 @@ begin
|
||||
-- Main state machine
|
||||
case r.state is
|
||||
when IDLE =>
|
||||
-- Reset per-row valid flags, only used in WAIT_ACK
|
||||
for i in 0 to ROW_PER_LINE - 1 loop
|
||||
r.rows_valid(i) <= '0';
|
||||
end loop;
|
||||
|
||||
-- We need to read a cache line
|
||||
if req_is_miss = '1' then
|
||||
report "cache miss nia:" & to_hstring(i_in.nia) &
|
||||
@@ -600,6 +645,7 @@ begin
|
||||
r.store_row <= get_row(req_laddr);
|
||||
r.store_tag <= req_tag;
|
||||
r.store_valid <= '1';
|
||||
r.end_row_ix <= get_row_of_line(get_row(req_laddr)) - 1;
|
||||
|
||||
-- Prep for first wishbone read. We calculate the address of
|
||||
-- the start of the cache line and start the WB cycle.
|
||||
@@ -637,7 +683,7 @@ begin
|
||||
-- stb and set stbs_done so we can handle an eventual last
|
||||
-- ack on the same cycle.
|
||||
--
|
||||
if is_last_row_addr(r.wb.adr) then
|
||||
if is_last_row_addr(r.wb.adr, r.end_row_ix) then
|
||||
r.wb.stb <= '0';
|
||||
stbs_done := true;
|
||||
end if;
|
||||
@@ -648,8 +694,9 @@ begin
|
||||
|
||||
-- Incoming acks processing
|
||||
if wishbone_in.ack = '1' then
|
||||
r.rows_valid(r.store_row mod ROW_PER_LINE) <= '1';
|
||||
-- Check for completion
|
||||
if stbs_done and is_last_row(r.store_row) then
|
||||
if stbs_done and is_last_row(r.store_row, r.end_row_ix) then
|
||||
-- Complete wishbone cycle
|
||||
r.wb.cyc <= '0';
|
||||
|
||||
@@ -669,9 +716,41 @@ begin
|
||||
-- TLB miss and protection fault processing
|
||||
if rst = '1' or flush_in = '1' or m_in.tlbld = '1' then
|
||||
r.fetch_failed <= '0';
|
||||
elsif i_in.req = '1' and access_ok = '0' then
|
||||
elsif i_in.req = '1' and access_ok = '0' and stall_in = '0' then
|
||||
r.fetch_failed <= '1';
|
||||
end if;
|
||||
end if;
|
||||
end process;
|
||||
|
||||
data_log: process(clk)
|
||||
variable lway: way_t;
|
||||
variable wstate: std_ulogic;
|
||||
begin
|
||||
if rising_edge(clk) then
|
||||
if req_is_hit then
|
||||
lway := req_hit_way;
|
||||
else
|
||||
lway := replace_way;
|
||||
end if;
|
||||
wstate := '0';
|
||||
if r.state /= IDLE then
|
||||
wstate := '1';
|
||||
end if;
|
||||
log_data <= i_out.valid &
|
||||
i_out.insn &
|
||||
wishbone_in.ack &
|
||||
r.wb.adr(5 downto 3) &
|
||||
r.wb.stb & r.wb.cyc &
|
||||
wishbone_in.stall &
|
||||
stall_out &
|
||||
r.fetch_failed &
|
||||
r.hit_nia(5 downto 2) &
|
||||
wstate &
|
||||
std_ulogic_vector(to_unsigned(lway, 3)) &
|
||||
req_is_hit & req_is_miss &
|
||||
access_ok &
|
||||
ra_valid;
|
||||
end if;
|
||||
end process;
|
||||
log_out <= log_data;
|
||||
end;
|
||||
|
||||
@@ -13,7 +13,7 @@ architecture behave of icache_tb is
|
||||
signal rst : std_ulogic;
|
||||
|
||||
signal i_out : Fetch1ToIcacheType;
|
||||
signal i_in : IcacheToFetch2Type;
|
||||
signal i_in : IcacheToDecode1Type;
|
||||
|
||||
signal m_out : MmuToIcacheType;
|
||||
|
||||
@@ -33,6 +33,7 @@ begin
|
||||
i_in => i_out,
|
||||
i_out => i_in,
|
||||
m_in => m_out,
|
||||
stall_in => '0',
|
||||
flush_in => '0',
|
||||
inval_in => '0',
|
||||
wishbone_out => wb_bram_in,
|
||||
|
||||
279
loadstore1.vhdl
279
loadstore1.vhdl
@@ -25,7 +25,8 @@ entity loadstore1 is
|
||||
m_in : in MmuToLoadstore1Type;
|
||||
|
||||
dc_stall : in std_ulogic;
|
||||
stall_out : out std_ulogic
|
||||
|
||||
log_out : out std_ulogic_vector(9 downto 0)
|
||||
);
|
||||
end loadstore1;
|
||||
|
||||
@@ -41,7 +42,8 @@ architecture behave of loadstore1 is
|
||||
ACK_WAIT, -- waiting for ack from dcache
|
||||
LD_UPDATE, -- writing rA with computed addr on load
|
||||
MMU_LOOKUP, -- waiting for MMU to look up translation
|
||||
TLBIE_WAIT -- waiting for MMU to finish doing a tlbie
|
||||
TLBIE_WAIT, -- waiting for MMU to finish doing a tlbie
|
||||
SPR_CMPLT -- complete a mf/tspr operation
|
||||
);
|
||||
|
||||
type reg_stage_t is record
|
||||
@@ -49,6 +51,7 @@ architecture behave of loadstore1 is
|
||||
load : std_ulogic;
|
||||
tlbie : std_ulogic;
|
||||
dcbz : std_ulogic;
|
||||
mfspr : std_ulogic;
|
||||
addr : std_ulogic_vector(63 downto 0);
|
||||
store_data : std_ulogic_vector(63 downto 0);
|
||||
load_data : std_ulogic_vector(63 downto 0);
|
||||
@@ -71,6 +74,7 @@ architecture behave of loadstore1 is
|
||||
dar : std_ulogic_vector(63 downto 0);
|
||||
dsisr : std_ulogic_vector(31 downto 0);
|
||||
instr_fault : std_ulogic;
|
||||
sprval : std_ulogic_vector(63 downto 0);
|
||||
end record;
|
||||
|
||||
type byte_sel_t is array(0 to 7) of std_ulogic;
|
||||
@@ -80,6 +84,8 @@ architecture behave of loadstore1 is
|
||||
signal r, rin : reg_stage_t;
|
||||
signal lsu_sum : std_ulogic_vector(63 downto 0);
|
||||
|
||||
signal log_data : std_ulogic_vector(9 downto 0);
|
||||
|
||||
-- Generate byte enables from sizes
|
||||
function length_to_sel(length : in std_logic_vector(3 downto 0)) return std_ulogic_vector is
|
||||
begin
|
||||
@@ -135,7 +141,7 @@ begin
|
||||
variable long_sel : std_ulogic_vector(15 downto 0);
|
||||
variable byte_sel : std_ulogic_vector(7 downto 0);
|
||||
variable req : std_ulogic;
|
||||
variable stall : std_ulogic;
|
||||
variable busy : std_ulogic;
|
||||
variable addr : std_ulogic_vector(63 downto 0);
|
||||
variable wdata : std_ulogic_vector(63 downto 0);
|
||||
variable write_enable : std_ulogic;
|
||||
@@ -147,9 +153,7 @@ begin
|
||||
variable use_second : byte_sel_t;
|
||||
variable trim_ctl : trim_ctl_t;
|
||||
variable negative : std_ulogic;
|
||||
variable mfspr : std_ulogic;
|
||||
variable sprn : std_ulogic_vector(9 downto 0);
|
||||
variable sprval : std_ulogic_vector(63 downto 0);
|
||||
variable exception : std_ulogic;
|
||||
variable next_addr : std_ulogic_vector(63 downto 0);
|
||||
variable mmureq : std_ulogic;
|
||||
@@ -159,16 +163,12 @@ begin
|
||||
begin
|
||||
v := r;
|
||||
req := '0';
|
||||
stall := '0';
|
||||
done := '0';
|
||||
byte_sel := (others => '0');
|
||||
addr := lsu_sum;
|
||||
mfspr := '0';
|
||||
v.mfspr := '0';
|
||||
mmu_mtspr := '0';
|
||||
itlb_fault := '0';
|
||||
sprn := std_ulogic_vector(to_unsigned(decode_spr_num(l_in.insn), 10));
|
||||
sprval := (others => '0'); -- avoid inferred latches
|
||||
exception := '0';
|
||||
dsisr := (others => '0');
|
||||
mmureq := '0';
|
||||
|
||||
@@ -227,130 +227,18 @@ begin
|
||||
-- compute (addr + 8) & ~7 for the second doubleword when unaligned
|
||||
next_addr := std_ulogic_vector(unsigned(r.addr(63 downto 3)) + 1) & "000";
|
||||
|
||||
done := '0';
|
||||
exception := '0';
|
||||
case r.state is
|
||||
when IDLE =>
|
||||
if l_in.valid = '1' then
|
||||
v.addr := lsu_sum;
|
||||
v.load := '0';
|
||||
v.dcbz := '0';
|
||||
v.tlbie := '0';
|
||||
v.instr_fault := '0';
|
||||
v.dwords_done := '0';
|
||||
case l_in.op is
|
||||
when OP_STORE =>
|
||||
req := '1';
|
||||
when OP_LOAD =>
|
||||
req := '1';
|
||||
v.load := '1';
|
||||
when OP_DCBZ =>
|
||||
req := '1';
|
||||
v.dcbz := '1';
|
||||
when OP_TLBIE =>
|
||||
mmureq := '1';
|
||||
stall := '1';
|
||||
v.tlbie := '1';
|
||||
v.state := TLBIE_WAIT;
|
||||
when OP_MFSPR =>
|
||||
done := '1';
|
||||
mfspr := '1';
|
||||
-- partial decode on SPR number should be adequate given
|
||||
-- the restricted set that get sent down this path
|
||||
if sprn(9) = '0' and sprn(5) = '0' then
|
||||
if sprn(0) = '0' then
|
||||
sprval := x"00000000" & r.dsisr;
|
||||
else
|
||||
sprval := r.dar;
|
||||
end if;
|
||||
else
|
||||
-- reading one of the SPRs in the MMU
|
||||
sprval := m_in.sprval;
|
||||
end if;
|
||||
when OP_MTSPR =>
|
||||
if sprn(9) = '0' and sprn(5) = '0' then
|
||||
if sprn(0) = '0' then
|
||||
v.dsisr := l_in.data(31 downto 0);
|
||||
else
|
||||
v.dar := l_in.data;
|
||||
end if;
|
||||
done := '1';
|
||||
else
|
||||
-- writing one of the SPRs in the MMU
|
||||
mmu_mtspr := '1';
|
||||
stall := '1';
|
||||
v.state := TLBIE_WAIT;
|
||||
end if;
|
||||
when OP_FETCH_FAILED =>
|
||||
-- send it to the MMU to do the radix walk
|
||||
addr := l_in.nia;
|
||||
v.addr := l_in.nia;
|
||||
v.instr_fault := '1';
|
||||
mmureq := '1';
|
||||
stall := '1';
|
||||
v.state := MMU_LOOKUP;
|
||||
when others =>
|
||||
assert false report "unknown op sent to loadstore1";
|
||||
end case;
|
||||
|
||||
v.write_reg := l_in.write_reg;
|
||||
v.length := l_in.length;
|
||||
v.byte_reverse := l_in.byte_reverse;
|
||||
v.sign_extend := l_in.sign_extend;
|
||||
v.update := l_in.update;
|
||||
v.update_reg := l_in.update_reg;
|
||||
v.xerc := l_in.xerc;
|
||||
v.reserve := l_in.reserve;
|
||||
v.rc := l_in.rc;
|
||||
v.nc := l_in.ci;
|
||||
v.virt_mode := l_in.virt_mode;
|
||||
v.priv_mode := l_in.priv_mode;
|
||||
|
||||
-- XXX Temporary hack. Mark the op as non-cachable if the address
|
||||
-- is the form 0xc------- for a real-mode access.
|
||||
--
|
||||
-- This will have to be replaced by a combination of implementing the
|
||||
-- proper HV CI load/store instructions and having an MMU to get the I
|
||||
-- bit otherwise.
|
||||
if lsu_sum(31 downto 28) = "1100" and l_in.virt_mode = '0' then
|
||||
v.nc := '1';
|
||||
end if;
|
||||
|
||||
-- Do length_to_sel and work out if we are doing 2 dwords
|
||||
long_sel := xfer_data_sel(l_in.length, v.addr(2 downto 0));
|
||||
byte_sel := long_sel(7 downto 0);
|
||||
v.first_bytes := byte_sel;
|
||||
v.second_bytes := long_sel(15 downto 8);
|
||||
|
||||
-- Do byte reversing and rotating for stores in the first cycle
|
||||
byte_offset := unsigned(lsu_sum(2 downto 0));
|
||||
brev_lenm1 := "000";
|
||||
if l_in.byte_reverse = '1' then
|
||||
brev_lenm1 := unsigned(l_in.length(2 downto 0)) - 1;
|
||||
end if;
|
||||
for i in 0 to 7 loop
|
||||
k := (to_unsigned(i, 3) xor brev_lenm1) + byte_offset;
|
||||
j := to_integer(k) * 8;
|
||||
v.store_data(j + 7 downto j) := l_in.data(i * 8 + 7 downto i * 8);
|
||||
end loop;
|
||||
|
||||
if req = '1' then
|
||||
stall := '1';
|
||||
if long_sel(15 downto 8) = "00000000" then
|
||||
v.state := ACK_WAIT;
|
||||
else
|
||||
v.state := SECOND_REQ;
|
||||
end if;
|
||||
end if;
|
||||
end if;
|
||||
|
||||
when SECOND_REQ =>
|
||||
addr := next_addr;
|
||||
byte_sel := r.second_bytes;
|
||||
req := '1';
|
||||
stall := '1';
|
||||
v.state := ACK_WAIT;
|
||||
|
||||
when ACK_WAIT =>
|
||||
stall := '1';
|
||||
if d_in.valid = '1' then
|
||||
if d_in.error = '1' then
|
||||
-- dcache will discard the second request if it
|
||||
@@ -388,7 +276,6 @@ begin
|
||||
else
|
||||
-- stores write back rA update in this cycle
|
||||
do_update := r.update;
|
||||
stall := '0';
|
||||
done := '1';
|
||||
v.state := IDLE;
|
||||
end if;
|
||||
@@ -397,7 +284,6 @@ begin
|
||||
end if;
|
||||
|
||||
when MMU_LOOKUP =>
|
||||
stall := '1';
|
||||
if r.dwords_done = '1' then
|
||||
addr := next_addr;
|
||||
byte_sel := r.second_bytes;
|
||||
@@ -418,7 +304,6 @@ begin
|
||||
end if;
|
||||
else
|
||||
-- nothing to do, the icache retries automatically
|
||||
stall := '0';
|
||||
done := '1';
|
||||
v.state := IDLE;
|
||||
end if;
|
||||
@@ -434,10 +319,8 @@ begin
|
||||
end if;
|
||||
|
||||
when TLBIE_WAIT =>
|
||||
stall := '1';
|
||||
if m_in.done = '1' then
|
||||
-- tlbie is finished
|
||||
stall := '0';
|
||||
done := '1';
|
||||
v.state := IDLE;
|
||||
end if;
|
||||
@@ -447,8 +330,123 @@ begin
|
||||
v.state := IDLE;
|
||||
done := '1';
|
||||
|
||||
when SPR_CMPLT =>
|
||||
done := '1';
|
||||
v.state := IDLE;
|
||||
|
||||
end case;
|
||||
|
||||
busy := '1';
|
||||
if r.state = IDLE or done = '1' then
|
||||
busy := '0';
|
||||
end if;
|
||||
|
||||
-- Note that l_in.valid is gated with busy inside execute1
|
||||
if l_in.valid = '1' then
|
||||
v.addr := lsu_sum;
|
||||
v.load := '0';
|
||||
v.dcbz := '0';
|
||||
v.tlbie := '0';
|
||||
v.instr_fault := '0';
|
||||
v.dwords_done := '0';
|
||||
v.write_reg := l_in.write_reg;
|
||||
v.length := l_in.length;
|
||||
v.byte_reverse := l_in.byte_reverse;
|
||||
v.sign_extend := l_in.sign_extend;
|
||||
v.update := l_in.update;
|
||||
v.update_reg := l_in.update_reg;
|
||||
v.xerc := l_in.xerc;
|
||||
v.reserve := l_in.reserve;
|
||||
v.rc := l_in.rc;
|
||||
v.nc := l_in.ci;
|
||||
v.virt_mode := l_in.virt_mode;
|
||||
v.priv_mode := l_in.priv_mode;
|
||||
|
||||
-- XXX Temporary hack. Mark the op as non-cachable if the address
|
||||
-- is the form 0xc------- for a real-mode access.
|
||||
if lsu_sum(31 downto 28) = "1100" and l_in.virt_mode = '0' then
|
||||
v.nc := '1';
|
||||
end if;
|
||||
|
||||
-- Do length_to_sel and work out if we are doing 2 dwords
|
||||
long_sel := xfer_data_sel(l_in.length, v.addr(2 downto 0));
|
||||
byte_sel := long_sel(7 downto 0);
|
||||
v.first_bytes := byte_sel;
|
||||
v.second_bytes := long_sel(15 downto 8);
|
||||
|
||||
-- Do byte reversing and rotating for stores in the first cycle
|
||||
byte_offset := unsigned(lsu_sum(2 downto 0));
|
||||
brev_lenm1 := "000";
|
||||
if l_in.byte_reverse = '1' then
|
||||
brev_lenm1 := unsigned(l_in.length(2 downto 0)) - 1;
|
||||
end if;
|
||||
for i in 0 to 7 loop
|
||||
k := (to_unsigned(i, 3) xor brev_lenm1) + byte_offset;
|
||||
j := to_integer(k) * 8;
|
||||
v.store_data(j + 7 downto j) := l_in.data(i * 8 + 7 downto i * 8);
|
||||
end loop;
|
||||
|
||||
case l_in.op is
|
||||
when OP_STORE =>
|
||||
req := '1';
|
||||
when OP_LOAD =>
|
||||
req := '1';
|
||||
v.load := '1';
|
||||
when OP_DCBZ =>
|
||||
req := '1';
|
||||
v.dcbz := '1';
|
||||
when OP_TLBIE =>
|
||||
mmureq := '1';
|
||||
v.tlbie := '1';
|
||||
v.state := TLBIE_WAIT;
|
||||
when OP_MFSPR =>
|
||||
v.mfspr := '1';
|
||||
-- partial decode on SPR number should be adequate given
|
||||
-- the restricted set that get sent down this path
|
||||
if sprn(9) = '0' and sprn(5) = '0' then
|
||||
if sprn(0) = '0' then
|
||||
v.sprval := x"00000000" & r.dsisr;
|
||||
else
|
||||
v.sprval := r.dar;
|
||||
end if;
|
||||
else
|
||||
-- reading one of the SPRs in the MMU
|
||||
v.sprval := m_in.sprval;
|
||||
end if;
|
||||
v.state := SPR_CMPLT;
|
||||
when OP_MTSPR =>
|
||||
if sprn(9) = '0' and sprn(5) = '0' then
|
||||
if sprn(0) = '0' then
|
||||
v.dsisr := l_in.data(31 downto 0);
|
||||
else
|
||||
v.dar := l_in.data;
|
||||
end if;
|
||||
v.state := SPR_CMPLT;
|
||||
else
|
||||
-- writing one of the SPRs in the MMU
|
||||
mmu_mtspr := '1';
|
||||
v.state := TLBIE_WAIT;
|
||||
end if;
|
||||
when OP_FETCH_FAILED =>
|
||||
-- send it to the MMU to do the radix walk
|
||||
addr := l_in.nia;
|
||||
v.addr := l_in.nia;
|
||||
v.instr_fault := '1';
|
||||
mmureq := '1';
|
||||
v.state := MMU_LOOKUP;
|
||||
when others =>
|
||||
assert false report "unknown op sent to loadstore1";
|
||||
end case;
|
||||
|
||||
if req = '1' then
|
||||
if long_sel(15 downto 8) = "00000000" then
|
||||
v.state := ACK_WAIT;
|
||||
else
|
||||
v.state := SECOND_REQ;
|
||||
end if;
|
||||
end if;
|
||||
end if;
|
||||
|
||||
-- Update outputs to dcache
|
||||
d_out.valid <= req;
|
||||
d_out.load <= v.load;
|
||||
@@ -477,10 +475,10 @@ begin
|
||||
-- Multiplex either cache data to the destination GPR or
|
||||
-- the address for the rA update.
|
||||
l_out.valid <= done;
|
||||
if mfspr = '1' then
|
||||
if r.mfspr = '1' then
|
||||
l_out.write_enable <= '1';
|
||||
l_out.write_reg <= l_in.write_reg;
|
||||
l_out.write_data <= sprval;
|
||||
l_out.write_reg <= r.write_reg;
|
||||
l_out.write_data <= r.sprval;
|
||||
elsif do_update = '1' then
|
||||
l_out.write_enable <= '1';
|
||||
l_out.write_reg <= r.update_reg;
|
||||
@@ -495,6 +493,7 @@ begin
|
||||
l_out.store_done <= d_in.store_done;
|
||||
|
||||
-- update exception info back to execute1
|
||||
e_out.busy <= busy;
|
||||
e_out.exception <= exception;
|
||||
e_out.instr_fault <= r.instr_fault;
|
||||
e_out.invalid <= m_in.invalid;
|
||||
@@ -509,11 +508,23 @@ begin
|
||||
end if;
|
||||
end if;
|
||||
|
||||
stall_out <= stall;
|
||||
|
||||
-- Update registers
|
||||
rin <= v;
|
||||
|
||||
end process;
|
||||
|
||||
ls1_log: process(clk)
|
||||
begin
|
||||
if rising_edge(clk) then
|
||||
log_data <= e_out.busy &
|
||||
e_out.exception &
|
||||
l_out.valid &
|
||||
m_out.valid &
|
||||
d_out.valid &
|
||||
m_in.done &
|
||||
r.dwords_done &
|
||||
std_ulogic_vector(to_unsigned(state_t'pos(r.state), 3));
|
||||
end if;
|
||||
end process;
|
||||
log_out <= log_data;
|
||||
end;
|
||||
|
||||
66
logical.vhdl
66
logical.vhdl
@@ -4,6 +4,7 @@ use ieee.numeric_std.all;
|
||||
|
||||
library work;
|
||||
use work.decode_types.all;
|
||||
use work.ppc_fx_insns.all;
|
||||
|
||||
entity logical is
|
||||
port (
|
||||
@@ -13,9 +14,7 @@ entity logical is
|
||||
invert_in : in std_ulogic;
|
||||
invert_out : in std_ulogic;
|
||||
result : out std_ulogic_vector(63 downto 0);
|
||||
datalen : in std_logic_vector(3 downto 0);
|
||||
popcnt : out std_ulogic_vector(63 downto 0);
|
||||
parity : out std_ulogic_vector(63 downto 0)
|
||||
datalen : in std_logic_vector(3 downto 0)
|
||||
);
|
||||
end entity logical;
|
||||
|
||||
@@ -34,30 +33,14 @@ architecture behaviour of logical is
|
||||
type sixbit2 is array(0 to 1) of sixbit;
|
||||
signal pc32 : sixbit2;
|
||||
signal par0, par1 : std_ulogic;
|
||||
signal popcnt : std_ulogic_vector(63 downto 0);
|
||||
signal parity : std_ulogic_vector(63 downto 0);
|
||||
|
||||
begin
|
||||
logical_0: process(all)
|
||||
variable rb_adj, tmp : std_ulogic_vector(63 downto 0);
|
||||
variable negative : std_ulogic;
|
||||
begin
|
||||
rb_adj := rb;
|
||||
if invert_in = '1' then
|
||||
rb_adj := not rb;
|
||||
end if;
|
||||
|
||||
case op is
|
||||
when OP_AND =>
|
||||
tmp := rs and rb_adj;
|
||||
when OP_OR =>
|
||||
tmp := rs or rb_adj;
|
||||
when others =>
|
||||
tmp := rs xor rb_adj;
|
||||
end case;
|
||||
|
||||
result <= tmp;
|
||||
if invert_out = '1' then
|
||||
result <= not tmp;
|
||||
end if;
|
||||
|
||||
-- population counts
|
||||
for i in 0 to 31 loop
|
||||
pc2(i) <= unsigned("0" & rs(i * 2 downto i * 2)) + unsigned("0" & rs(i * 2 + 1 downto i * 2 + 1));
|
||||
@@ -98,5 +81,44 @@ begin
|
||||
parity(32) <= par1;
|
||||
end if;
|
||||
|
||||
rb_adj := rb;
|
||||
if invert_in = '1' then
|
||||
rb_adj := not rb;
|
||||
end if;
|
||||
|
||||
case op is
|
||||
when OP_AND =>
|
||||
tmp := rs and rb_adj;
|
||||
when OP_OR =>
|
||||
tmp := rs or rb_adj;
|
||||
when OP_XOR =>
|
||||
tmp := rs xor rb_adj;
|
||||
when OP_POPCNT =>
|
||||
tmp := popcnt;
|
||||
when OP_PRTY =>
|
||||
tmp := parity;
|
||||
when OP_CMPB =>
|
||||
tmp := ppc_cmpb(rs, rb);
|
||||
when others =>
|
||||
-- EXTS
|
||||
-- note datalen is a 1-hot encoding
|
||||
negative := (datalen(0) and rs(7)) or
|
||||
(datalen(1) and rs(15)) or
|
||||
(datalen(2) and rs(31));
|
||||
tmp := (others => negative);
|
||||
if datalen(2) = '1' then
|
||||
tmp(31 downto 16) := rs(31 downto 16);
|
||||
end if;
|
||||
if datalen(2) = '1' or datalen(1) = '1' then
|
||||
tmp(15 downto 8) := rs(15 downto 8);
|
||||
end if;
|
||||
tmp(7 downto 0) := rs(7 downto 0);
|
||||
end case;
|
||||
|
||||
if invert_out = '1' then
|
||||
tmp := not tmp;
|
||||
end if;
|
||||
result <= tmp;
|
||||
|
||||
end process;
|
||||
end behaviour;
|
||||
|
||||
@@ -9,7 +9,6 @@ filesets:
|
||||
- wishbone_types.vhdl
|
||||
- common.vhdl
|
||||
- fetch1.vhdl
|
||||
- fetch2.vhdl
|
||||
- decode1.vhdl
|
||||
- helpers.vhdl
|
||||
- decode2.vhdl
|
||||
@@ -27,7 +26,6 @@ filesets:
|
||||
- loadstore1.vhdl
|
||||
- mmu.vhdl
|
||||
- dcache.vhdl
|
||||
- multiply.vhdl
|
||||
- divider.vhdl
|
||||
- rotator.vhdl
|
||||
- writeback.vhdl
|
||||
@@ -63,6 +61,10 @@ filesets:
|
||||
- fpga/firmware.hex : {copyto : firmware.hex, file_type : user}
|
||||
file_type : vhdlSource-2008
|
||||
|
||||
xilinx_specific:
|
||||
files:
|
||||
- xilinx-mult.vhdl : {file_type : vhdlSource-2008}
|
||||
|
||||
debug_xilinx:
|
||||
files:
|
||||
- dmi_dtm_xilinx.vhdl : {file_type : vhdlSource-2008}
|
||||
@@ -101,20 +103,21 @@ filesets:
|
||||
targets:
|
||||
nexys_a7:
|
||||
default_tool: vivado
|
||||
filesets: [core, nexys_a7, soc, fpga, debug_xilinx]
|
||||
filesets: [core, nexys_a7, soc, fpga, debug_xilinx, xilinx_specific]
|
||||
parameters :
|
||||
- memory_size
|
||||
- ram_init_file
|
||||
- clk_input
|
||||
- clk_frequency
|
||||
- disable_flatten_core
|
||||
- log_length=2048
|
||||
tools:
|
||||
vivado: {part : xc7a100tcsg324-1}
|
||||
toplevel : toplevel
|
||||
|
||||
nexys_video-nodram:
|
||||
default_tool: vivado
|
||||
filesets: [core, nexys_video, soc, fpga, debug_xilinx]
|
||||
filesets: [core, nexys_video, soc, fpga, debug_xilinx, xilinx_specific]
|
||||
parameters :
|
||||
- memory_size
|
||||
- ram_init_file
|
||||
@@ -122,13 +125,14 @@ targets:
|
||||
- clk_frequency
|
||||
- disable_flatten_core
|
||||
- spi_flash_offset=10485760
|
||||
- log_length=2048
|
||||
tools:
|
||||
vivado: {part : xc7a200tsbg484-1}
|
||||
toplevel : toplevel
|
||||
|
||||
nexys_video:
|
||||
default_tool: vivado
|
||||
filesets: [core, nexys_video, soc, fpga, debug_xilinx, litedram]
|
||||
filesets: [core, nexys_video, soc, fpga, debug_xilinx, litedram, xilinx_specific]
|
||||
parameters:
|
||||
- memory_size
|
||||
- ram_init_file
|
||||
@@ -136,6 +140,7 @@ targets:
|
||||
- disable_flatten_core
|
||||
- no_bram
|
||||
- spi_flash_offset=10485760
|
||||
- log_length=2048
|
||||
generate: [dram_nexys_video]
|
||||
tools:
|
||||
vivado: {part : xc7a200tsbg484-1}
|
||||
@@ -143,7 +148,7 @@ targets:
|
||||
|
||||
arty_a7-35-nodram:
|
||||
default_tool: vivado
|
||||
filesets: [core, arty_a7, soc, fpga, debug_xilinx]
|
||||
filesets: [core, arty_a7, soc, fpga, debug_xilinx, xilinx_specific]
|
||||
parameters :
|
||||
- memory_size
|
||||
- ram_init_file
|
||||
@@ -151,13 +156,14 @@ targets:
|
||||
- clk_frequency
|
||||
- disable_flatten_core
|
||||
- spi_flash_offset=3145728
|
||||
- log_length=512
|
||||
tools:
|
||||
vivado: {part : xc7a35ticsg324-1L}
|
||||
toplevel : toplevel
|
||||
|
||||
arty_a7-35:
|
||||
default_tool: vivado
|
||||
filesets: [core, arty_a7, soc, fpga, debug_xilinx, litedram]
|
||||
filesets: [core, arty_a7, soc, fpga, debug_xilinx, litedram, xilinx_specific]
|
||||
parameters :
|
||||
- memory_size
|
||||
- ram_init_file
|
||||
@@ -165,6 +171,7 @@ targets:
|
||||
- disable_flatten_core
|
||||
- no_bram
|
||||
- spi_flash_offset=3145728
|
||||
- log_length=512
|
||||
generate: [dram_arty]
|
||||
tools:
|
||||
vivado: {part : xc7a35ticsg324-1L}
|
||||
@@ -172,7 +179,7 @@ targets:
|
||||
|
||||
arty_a7-100-nodram:
|
||||
default_tool: vivado
|
||||
filesets: [core, arty_a7, soc, fpga, debug_xilinx]
|
||||
filesets: [core, arty_a7, soc, fpga, debug_xilinx, xilinx_specific]
|
||||
parameters :
|
||||
- memory_size
|
||||
- ram_init_file
|
||||
@@ -180,13 +187,14 @@ targets:
|
||||
- clk_frequency
|
||||
- disable_flatten_core
|
||||
- spi_flash_offset=4194304
|
||||
- log_length=2048
|
||||
tools:
|
||||
vivado: {part : xc7a100ticsg324-1L}
|
||||
toplevel : toplevel
|
||||
|
||||
arty_a7-100:
|
||||
default_tool: vivado
|
||||
filesets: [core, arty_a7, soc, fpga, debug_xilinx, litedram]
|
||||
filesets: [core, arty_a7, soc, fpga, debug_xilinx, litedram, xilinx_specific]
|
||||
parameters:
|
||||
- memory_size
|
||||
- ram_init_file
|
||||
@@ -194,6 +202,7 @@ targets:
|
||||
- disable_flatten_core
|
||||
- no_bram
|
||||
- spi_flash_offset=4194304
|
||||
- log_length=2048
|
||||
generate: [dram_arty]
|
||||
tools:
|
||||
vivado: {part : xc7a100ticsg324-1L}
|
||||
@@ -201,7 +210,7 @@ targets:
|
||||
|
||||
cmod_a7-35:
|
||||
default_tool: vivado
|
||||
filesets: [core, cmod_a7-35, soc, fpga, debug_xilinx]
|
||||
filesets: [core, cmod_a7-35, soc, fpga, debug_xilinx, xilinx_specific]
|
||||
parameters :
|
||||
- memory_size
|
||||
- ram_init_file
|
||||
@@ -209,12 +218,13 @@ targets:
|
||||
- clk_input=12000000
|
||||
- clk_frequency
|
||||
- disable_flatten_core
|
||||
- log_length=512
|
||||
tools:
|
||||
vivado: {part : xc7a35tcpg236-1}
|
||||
toplevel : toplevel
|
||||
|
||||
synth:
|
||||
filesets: [core, soc]
|
||||
filesets: [core, soc, xilinx_specific]
|
||||
tools:
|
||||
vivado: {pnr : none}
|
||||
toplevel: core
|
||||
@@ -279,3 +289,8 @@ parameters:
|
||||
datatype : int
|
||||
description : Offset (in bytes) in the SPI flash of the code payload to run
|
||||
paramtype : generic
|
||||
|
||||
log_length:
|
||||
datatype : int
|
||||
description : Length of the core log buffer in entries (32 bytes each)
|
||||
paramtype : generic
|
||||
|
||||
32
mmu.vhdl
32
mmu.vhdl
@@ -27,6 +27,7 @@ end mmu;
|
||||
architecture behave of mmu is
|
||||
|
||||
type state_t is (IDLE,
|
||||
DO_TLBIE,
|
||||
TLB_WAIT,
|
||||
PROC_TBL_READ,
|
||||
PROC_TBL_WAIT,
|
||||
@@ -44,6 +45,7 @@ architecture behave of mmu is
|
||||
store : std_ulogic;
|
||||
priv : std_ulogic;
|
||||
addr : std_ulogic_vector(63 downto 0);
|
||||
inval_all : std_ulogic;
|
||||
-- config SPRs
|
||||
prtbl : std_ulogic_vector(63 downto 0);
|
||||
pid : std_ulogic_vector(31 downto 0);
|
||||
@@ -178,7 +180,6 @@ begin
|
||||
variable tlb_load : std_ulogic;
|
||||
variable itlb_load : std_ulogic;
|
||||
variable tlbie_req : std_ulogic;
|
||||
variable inval_all : std_ulogic;
|
||||
variable prtbl_rd : std_ulogic;
|
||||
variable pt_valid : std_ulogic;
|
||||
variable effpid : std_ulogic_vector(31 downto 0);
|
||||
@@ -207,7 +208,7 @@ begin
|
||||
tlb_load := '0';
|
||||
itlb_load := '0';
|
||||
tlbie_req := '0';
|
||||
inval_all := '0';
|
||||
v.inval_all := '0';
|
||||
prtbl_rd := '0';
|
||||
|
||||
-- Radix tree data structures in memory are big-endian,
|
||||
@@ -240,19 +241,17 @@ begin
|
||||
v.store := not (l_in.load or l_in.iside);
|
||||
v.priv := l_in.priv;
|
||||
if l_in.tlbie = '1' then
|
||||
dcreq := '1';
|
||||
tlbie_req := '1';
|
||||
-- Invalidate all iTLB/dTLB entries for tlbie with
|
||||
-- RB[IS] != 0 or RB[AP] != 0, or for slbia
|
||||
inval_all := l_in.slbia or l_in.addr(11) or l_in.addr(10) or
|
||||
l_in.addr(7) or l_in.addr(6) or l_in.addr(5);
|
||||
v.inval_all := l_in.slbia or l_in.addr(11) or l_in.addr(10) or
|
||||
l_in.addr(7) or l_in.addr(6) or l_in.addr(5);
|
||||
-- The RIC field of the tlbie instruction comes across on the
|
||||
-- sprn bus as bits 2--3. RIC=2 flushes process table caches.
|
||||
if l_in.sprn(3) = '1' then
|
||||
v.pt0_valid := '0';
|
||||
v.pt3_valid := '0';
|
||||
end if;
|
||||
v.state := TLB_WAIT;
|
||||
v.state := DO_TLBIE;
|
||||
else
|
||||
v.valid := '1';
|
||||
if pt_valid = '0' then
|
||||
@@ -281,12 +280,15 @@ begin
|
||||
v.pt3_valid := '0';
|
||||
end if;
|
||||
v.pt0_valid := '0';
|
||||
dcreq := '1';
|
||||
tlbie_req := '1';
|
||||
inval_all := '1';
|
||||
v.state := TLB_WAIT;
|
||||
v.inval_all := '1';
|
||||
v.state := DO_TLBIE;
|
||||
end if;
|
||||
|
||||
when DO_TLBIE =>
|
||||
dcreq := '1';
|
||||
tlbie_req := '1';
|
||||
v.state := TLB_WAIT;
|
||||
|
||||
when TLB_WAIT =>
|
||||
if d_in.done = '1' then
|
||||
done := '1';
|
||||
@@ -436,8 +438,8 @@ begin
|
||||
|
||||
-- drive outputs
|
||||
if tlbie_req = '1' then
|
||||
addr := l_in.addr;
|
||||
tlb_data := l_in.rs;
|
||||
addr := r.addr;
|
||||
tlb_data := (others => '0');
|
||||
elsif tlb_load = '1' then
|
||||
addr := r.addr(63 downto 12) & x"000";
|
||||
tlb_data := pte;
|
||||
@@ -458,14 +460,14 @@ begin
|
||||
|
||||
d_out.valid <= dcreq;
|
||||
d_out.tlbie <= tlbie_req;
|
||||
d_out.doall <= inval_all;
|
||||
d_out.doall <= r.inval_all;
|
||||
d_out.tlbld <= tlb_load;
|
||||
d_out.addr <= addr;
|
||||
d_out.pte <= tlb_data;
|
||||
|
||||
i_out.tlbld <= itlb_load;
|
||||
i_out.tlbie <= tlbie_req;
|
||||
i_out.doall <= inval_all;
|
||||
i_out.doall <= r.inval_all;
|
||||
i_out.addr <= addr;
|
||||
i_out.pte <= tlb_data;
|
||||
|
||||
|
||||
@@ -4,11 +4,10 @@ use ieee.numeric_std.all;
|
||||
|
||||
library work;
|
||||
use work.common.all;
|
||||
use work.decode_types.all;
|
||||
|
||||
entity multiply is
|
||||
generic (
|
||||
PIPELINE_DEPTH : natural := 16
|
||||
PIPELINE_DEPTH : natural := 4
|
||||
);
|
||||
port (
|
||||
clk : in std_logic;
|
||||
@@ -19,17 +18,16 @@ entity multiply is
|
||||
end entity multiply;
|
||||
|
||||
architecture behaviour of multiply is
|
||||
signal m: Execute1ToMultiplyType;
|
||||
signal m: Execute1ToMultiplyType := Execute1ToMultiplyInit;
|
||||
|
||||
type multiply_pipeline_stage is record
|
||||
valid : std_ulogic;
|
||||
insn_type : insn_type_t;
|
||||
data : signed(129 downto 0);
|
||||
data : unsigned(127 downto 0);
|
||||
is_32bit : std_ulogic;
|
||||
neg_res : std_ulogic;
|
||||
end record;
|
||||
constant MultiplyPipelineStageInit : multiply_pipeline_stage := (valid => '0',
|
||||
insn_type => OP_ILLEGAL,
|
||||
is_32bit => '0',
|
||||
is_32bit => '0', neg_res => '0',
|
||||
data => (others => '0'));
|
||||
|
||||
type multiply_pipeline_type is array(0 to PIPELINE_DEPTH-1) of multiply_pipeline_stage;
|
||||
@@ -51,51 +49,36 @@ begin
|
||||
|
||||
multiply_1: process(all)
|
||||
variable v : reg_type;
|
||||
variable d : std_ulogic_vector(129 downto 0);
|
||||
variable d : std_ulogic_vector(127 downto 0);
|
||||
variable d2 : std_ulogic_vector(63 downto 0);
|
||||
variable ov : std_ulogic;
|
||||
begin
|
||||
v := r;
|
||||
|
||||
m_out <= MultiplyToExecute1Init;
|
||||
|
||||
v.multiply_pipeline(0).valid := m.valid;
|
||||
v.multiply_pipeline(0).insn_type := m.insn_type;
|
||||
v.multiply_pipeline(0).data := signed(m.data1) * signed(m.data2);
|
||||
v.multiply_pipeline(0).data := unsigned(m.data1) * unsigned(m.data2);
|
||||
v.multiply_pipeline(0).is_32bit := m.is_32bit;
|
||||
v.multiply_pipeline(0).neg_res := m.neg_result;
|
||||
|
||||
loop_0: for i in 1 to PIPELINE_DEPTH-1 loop
|
||||
v.multiply_pipeline(i) := r.multiply_pipeline(i-1);
|
||||
end loop;
|
||||
|
||||
d := std_ulogic_vector(v.multiply_pipeline(PIPELINE_DEPTH-1).data);
|
||||
ov := '0';
|
||||
|
||||
-- TODO: Handle overflows
|
||||
case_0: case v.multiply_pipeline(PIPELINE_DEPTH-1).insn_type is
|
||||
when OP_MUL_L64 =>
|
||||
d2 := d(63 downto 0);
|
||||
if v.multiply_pipeline(PIPELINE_DEPTH-1).is_32bit = '1' then
|
||||
ov := (or d(63 downto 31)) and not (and d(63 downto 31));
|
||||
else
|
||||
ov := (or d(127 downto 63)) and not (and d(127 downto 63));
|
||||
end if;
|
||||
when OP_MUL_H32 =>
|
||||
d2 := d(63 downto 32) & d(63 downto 32);
|
||||
when OP_MUL_H64 =>
|
||||
d2 := d(127 downto 64);
|
||||
when others =>
|
||||
--report "Illegal insn type in multiplier";
|
||||
d2 := (others => '0');
|
||||
end case;
|
||||
|
||||
m_out.write_reg_data <= d2;
|
||||
m_out.overflow <= ov;
|
||||
|
||||
if v.multiply_pipeline(PIPELINE_DEPTH-1).valid = '1' then
|
||||
m_out.valid <= '1';
|
||||
if v.multiply_pipeline(PIPELINE_DEPTH-1).neg_res = '0' then
|
||||
d := std_ulogic_vector(v.multiply_pipeline(PIPELINE_DEPTH-1).data);
|
||||
else
|
||||
d := std_ulogic_vector(- signed(v.multiply_pipeline(PIPELINE_DEPTH-1).data));
|
||||
end if;
|
||||
|
||||
ov := '0';
|
||||
if v.multiply_pipeline(PIPELINE_DEPTH-1).is_32bit = '1' then
|
||||
ov := (or d(63 downto 31)) and not (and d(63 downto 31));
|
||||
else
|
||||
ov := (or d(127 downto 63)) and not (and d(127 downto 63));
|
||||
end if;
|
||||
|
||||
m_out.result <= d;
|
||||
m_out.overflow <= ov;
|
||||
m_out.valid <= v.multiply_pipeline(PIPELINE_DEPTH-1).valid;
|
||||
|
||||
rin <= v;
|
||||
end process;
|
||||
end architecture behaviour;
|
||||
|
||||
@@ -17,8 +17,18 @@ architecture behave of multiply_tb is
|
||||
|
||||
constant pipeline_depth : integer := 4;
|
||||
|
||||
signal m1 : Execute1ToMultiplyType;
|
||||
signal m1 : Execute1ToMultiplyType := Execute1ToMultiplyInit;
|
||||
signal m2 : MultiplyToExecute1Type;
|
||||
|
||||
function absval(x: std_ulogic_vector) return std_ulogic_vector is
|
||||
begin
|
||||
if x(x'left) = '1' then
|
||||
return std_ulogic_vector(- signed(x));
|
||||
else
|
||||
return x;
|
||||
end if;
|
||||
end;
|
||||
|
||||
begin
|
||||
multiply_0: entity work.multiply
|
||||
generic map (PIPELINE_DEPTH => pipeline_depth)
|
||||
@@ -39,9 +49,8 @@ begin
|
||||
wait for clk_period;
|
||||
|
||||
m1.valid <= '1';
|
||||
m1.insn_type <= OP_MUL_L64;
|
||||
m1.data1 <= '0' & x"0000000000001000";
|
||||
m1.data2 <= '0' & x"0000000000001111";
|
||||
m1.data1 <= x"0000000000001000";
|
||||
m1.data2 <= x"0000000000001111";
|
||||
|
||||
wait for clk_period;
|
||||
assert m2.valid = '0';
|
||||
@@ -56,7 +65,7 @@ begin
|
||||
|
||||
wait for clk_period;
|
||||
assert m2.valid = '1';
|
||||
assert m2.write_reg_data = x"0000000001111000";
|
||||
assert m2.result = x"00000000000000000000000001111000";
|
||||
|
||||
wait for clk_period;
|
||||
assert m2.valid = '0';
|
||||
@@ -70,7 +79,7 @@ begin
|
||||
|
||||
wait for clk_period * (pipeline_depth-1);
|
||||
assert m2.valid = '1';
|
||||
assert m2.write_reg_data = x"0000000001111000";
|
||||
assert m2.result = x"00000000000000000000000001111000";
|
||||
|
||||
-- test mulld
|
||||
mulld_loop : for i in 0 to 1000 loop
|
||||
@@ -79,10 +88,10 @@ begin
|
||||
|
||||
behave_rt := ppc_mulld(ra, rb);
|
||||
|
||||
m1.data1 <= '0' & ra;
|
||||
m1.data2 <= '0' & rb;
|
||||
m1.data1 <= absval(ra);
|
||||
m1.data2 <= absval(rb);
|
||||
m1.neg_result <= ra(63) xor rb(63);
|
||||
m1.valid <= '1';
|
||||
m1.insn_type <= OP_MUL_L64;
|
||||
|
||||
wait for clk_period;
|
||||
|
||||
@@ -92,8 +101,8 @@ begin
|
||||
|
||||
assert m2.valid = '1';
|
||||
|
||||
assert to_hstring(behave_rt) = to_hstring(m2.write_reg_data)
|
||||
report "bad mulld expected " & to_hstring(behave_rt) & " got " & to_hstring(m2.write_reg_data);
|
||||
assert to_hstring(behave_rt) = to_hstring(m2.result(63 downto 0))
|
||||
report "bad mulld expected " & to_hstring(behave_rt) & " got " & to_hstring(m2.result(63 downto 0));
|
||||
end loop;
|
||||
|
||||
-- test mulhdu
|
||||
@@ -103,10 +112,10 @@ begin
|
||||
|
||||
behave_rt := ppc_mulhdu(ra, rb);
|
||||
|
||||
m1.data1 <= '0' & ra;
|
||||
m1.data2 <= '0' & rb;
|
||||
m1.data1 <= ra;
|
||||
m1.data2 <= rb;
|
||||
m1.neg_result <= '0';
|
||||
m1.valid <= '1';
|
||||
m1.insn_type <= OP_MUL_H64;
|
||||
|
||||
wait for clk_period;
|
||||
|
||||
@@ -116,8 +125,8 @@ begin
|
||||
|
||||
assert m2.valid = '1';
|
||||
|
||||
assert to_hstring(behave_rt) = to_hstring(m2.write_reg_data)
|
||||
report "bad mulhdu expected " & to_hstring(behave_rt) & " got " & to_hstring(m2.write_reg_data);
|
||||
assert to_hstring(behave_rt) = to_hstring(m2.result(127 downto 64))
|
||||
report "bad mulhdu expected " & to_hstring(behave_rt) & " got " & to_hstring(m2.result(127 downto 64));
|
||||
end loop;
|
||||
|
||||
-- test mulhd
|
||||
@@ -127,10 +136,10 @@ begin
|
||||
|
||||
behave_rt := ppc_mulhd(ra, rb);
|
||||
|
||||
m1.data1 <= ra(63) & ra;
|
||||
m1.data2 <= rb(63) & rb;
|
||||
m1.data1 <= absval(ra);
|
||||
m1.data2 <= absval(rb);
|
||||
m1.neg_result <= ra(63) xor rb(63);
|
||||
m1.valid <= '1';
|
||||
m1.insn_type <= OP_MUL_H64;
|
||||
|
||||
wait for clk_period;
|
||||
|
||||
@@ -140,8 +149,8 @@ begin
|
||||
|
||||
assert m2.valid = '1';
|
||||
|
||||
assert to_hstring(behave_rt) = to_hstring(m2.write_reg_data)
|
||||
report "bad mulhd expected " & to_hstring(behave_rt) & " got " & to_hstring(m2.write_reg_data);
|
||||
assert to_hstring(behave_rt) = to_hstring(m2.result(127 downto 64))
|
||||
report "bad mulhd expected " & to_hstring(behave_rt) & " got " & to_hstring(m2.result(127 downto 64));
|
||||
end loop;
|
||||
|
||||
-- test mullw
|
||||
@@ -151,12 +160,12 @@ begin
|
||||
|
||||
behave_rt := ppc_mullw(ra, rb);
|
||||
|
||||
m1.data1 <= (others => ra(31));
|
||||
m1.data1(31 downto 0) <= ra(31 downto 0);
|
||||
m1.data2 <= (others => rb(31));
|
||||
m1.data2(31 downto 0) <= rb(31 downto 0);
|
||||
m1.data1 <= (others => '0');
|
||||
m1.data1(31 downto 0) <= absval(ra(31 downto 0));
|
||||
m1.data2 <= (others => '0');
|
||||
m1.data2(31 downto 0) <= absval(rb(31 downto 0));
|
||||
m1.neg_result <= ra(31) xor rb(31);
|
||||
m1.valid <= '1';
|
||||
m1.insn_type <= OP_MUL_L64;
|
||||
|
||||
wait for clk_period;
|
||||
|
||||
@@ -166,8 +175,8 @@ begin
|
||||
|
||||
assert m2.valid = '1';
|
||||
|
||||
assert to_hstring(behave_rt) = to_hstring(m2.write_reg_data)
|
||||
report "bad mullw expected " & to_hstring(behave_rt) & " got " & to_hstring(m2.write_reg_data);
|
||||
assert to_hstring(behave_rt) = to_hstring(m2.result(63 downto 0))
|
||||
report "bad mullw expected " & to_hstring(behave_rt) & " got " & to_hstring(m2.result(63 downto 0));
|
||||
end loop;
|
||||
|
||||
-- test mulhw
|
||||
@@ -177,12 +186,12 @@ begin
|
||||
|
||||
behave_rt := ppc_mulhw(ra, rb);
|
||||
|
||||
m1.data1 <= (others => ra(31));
|
||||
m1.data1(31 downto 0) <= ra(31 downto 0);
|
||||
m1.data2 <= (others => rb(31));
|
||||
m1.data2(31 downto 0) <= rb(31 downto 0);
|
||||
m1.data1 <= (others => '0');
|
||||
m1.data1(31 downto 0) <= absval(ra(31 downto 0));
|
||||
m1.data2 <= (others => '0');
|
||||
m1.data2(31 downto 0) <= absval(rb(31 downto 0));
|
||||
m1.neg_result <= ra(31) xor rb(31);
|
||||
m1.valid <= '1';
|
||||
m1.insn_type <= OP_MUL_H32;
|
||||
|
||||
wait for clk_period;
|
||||
|
||||
@@ -192,8 +201,9 @@ begin
|
||||
|
||||
assert m2.valid = '1';
|
||||
|
||||
assert to_hstring(behave_rt) = to_hstring(m2.write_reg_data)
|
||||
report "bad mulhw expected " & to_hstring(behave_rt) & " got " & to_hstring(m2.write_reg_data);
|
||||
assert to_hstring(behave_rt) = to_hstring(m2.result(63 downto 32) & m2.result(63 downto 32))
|
||||
report "bad mulhw expected " & to_hstring(behave_rt) & " got " &
|
||||
to_hstring(m2.result(63 downto 32) & m2.result(63 downto 32));
|
||||
end loop;
|
||||
|
||||
-- test mulhwu
|
||||
@@ -207,8 +217,8 @@ begin
|
||||
m1.data1(31 downto 0) <= ra(31 downto 0);
|
||||
m1.data2 <= (others => '0');
|
||||
m1.data2(31 downto 0) <= rb(31 downto 0);
|
||||
m1.neg_result <= '0';
|
||||
m1.valid <= '1';
|
||||
m1.insn_type <= OP_MUL_H32;
|
||||
|
||||
wait for clk_period;
|
||||
|
||||
@@ -218,8 +228,9 @@ begin
|
||||
|
||||
assert m2.valid = '1';
|
||||
|
||||
assert to_hstring(behave_rt) = to_hstring(m2.write_reg_data)
|
||||
report "bad mulhwu expected " & to_hstring(behave_rt) & " got " & to_hstring(m2.write_reg_data);
|
||||
assert to_hstring(behave_rt) = to_hstring(m2.result(63 downto 32) & m2.result(63 downto 32))
|
||||
report "bad mulhwu expected " & to_hstring(behave_rt) & " got " &
|
||||
to_hstring(m2.result(63 downto 32) & m2.result(63 downto 32));
|
||||
end loop;
|
||||
|
||||
-- test mulli
|
||||
@@ -229,11 +240,11 @@ begin
|
||||
|
||||
behave_rt := ppc_mulli(ra, si);
|
||||
|
||||
m1.data1 <= ra(63) & ra;
|
||||
m1.data2 <= (others => si(15));
|
||||
m1.data2(15 downto 0) <= si;
|
||||
m1.data1 <= absval(ra);
|
||||
m1.data2 <= (others => '0');
|
||||
m1.data2(15 downto 0) <= absval(si);
|
||||
m1.neg_result <= ra(63) xor si(15);
|
||||
m1.valid <= '1';
|
||||
m1.insn_type <= OP_MUL_L64;
|
||||
|
||||
wait for clk_period;
|
||||
|
||||
@@ -243,8 +254,8 @@ begin
|
||||
|
||||
assert m2.valid = '1';
|
||||
|
||||
assert to_hstring(behave_rt) = to_hstring(m2.write_reg_data)
|
||||
report "bad mulli expected " & to_hstring(behave_rt) & " got " & to_hstring(m2.write_reg_data);
|
||||
assert to_hstring(behave_rt) = to_hstring(m2.result(63 downto 0))
|
||||
report "bad mulli expected " & to_hstring(behave_rt) & " got " & to_hstring(m2.result(63 downto 0));
|
||||
end loop;
|
||||
|
||||
std.env.finish;
|
||||
|
||||
@@ -93,7 +93,7 @@ package ppc_fx_insns is
|
||||
function ppc_divd (ra, rb: std_ulogic_vector(63 downto 0)) return std_ulogic_vector;
|
||||
function ppc_divwu (ra, rb: std_ulogic_vector(63 downto 0)) return std_ulogic_vector;
|
||||
|
||||
function ppc_bc_taken(bo, bi: std_ulogic_vector(4 downto 0); cr: std_ulogic_vector(31 downto 0); ctr: std_ulogic_vector(63 downto 0)) return integer;
|
||||
function ppc_bc_taken(bo, bi: std_ulogic_vector(4 downto 0); cr: std_ulogic_vector(31 downto 0); ctr: std_ulogic_vector(63 downto 0)) return std_ulogic;
|
||||
end package ppc_fx_insns;
|
||||
|
||||
package body ppc_fx_insns is
|
||||
@@ -785,13 +785,12 @@ package body ppc_fx_insns is
|
||||
return std_ulogic_vector(resize(tmp, ra'length));
|
||||
end;
|
||||
|
||||
function ppc_bc_taken(bo, bi: std_ulogic_vector(4 downto 0); cr: std_ulogic_vector(31 downto 0); ctr: std_ulogic_vector(63 downto 0)) return integer is
|
||||
function ppc_bc_taken(bo, bi: std_ulogic_vector(4 downto 0); cr: std_ulogic_vector(31 downto 0); ctr: std_ulogic_vector(63 downto 0)) return std_ulogic is
|
||||
variable crfield: integer;
|
||||
variable crbit_match: std_ulogic;
|
||||
variable ctr_not_zero: std_ulogic;
|
||||
variable ctr_ok: std_ulogic;
|
||||
variable cond_ok: std_ulogic;
|
||||
variable ret: integer;
|
||||
begin
|
||||
crfield := to_integer(unsigned(bi));
|
||||
-- BE bit numbering
|
||||
@@ -800,12 +799,7 @@ package body ppc_fx_insns is
|
||||
ctr_not_zero := '1' when ctr /= x"0000000000000001" else '0';
|
||||
ctr_ok := bo(4-2) or (ctr_not_zero xor bo(4-3));
|
||||
cond_ok := bo(4-0) or crbit_match;
|
||||
if ctr_ok = '1' and cond_ok = '1' then
|
||||
ret := 1;
|
||||
else
|
||||
ret := 0;
|
||||
end if;
|
||||
return ret;
|
||||
return ctr_ok and cond_ok;
|
||||
end;
|
||||
|
||||
end package body ppc_fx_insns;
|
||||
|
||||
@@ -24,7 +24,9 @@ entity register_file is
|
||||
|
||||
-- debug
|
||||
sim_dump : in std_ulogic;
|
||||
sim_dump_done : out std_ulogic
|
||||
sim_dump_done : out std_ulogic;
|
||||
|
||||
log_out : out std_ulogic_vector(70 downto 0)
|
||||
);
|
||||
end entity register_file;
|
||||
|
||||
@@ -34,18 +36,19 @@ architecture behaviour of register_file is
|
||||
signal rd_port_b : std_ulogic_vector(63 downto 0);
|
||||
signal dbg_data : std_ulogic_vector(63 downto 0);
|
||||
signal dbg_ack : std_ulogic;
|
||||
signal log_data : std_ulogic_vector(70 downto 0);
|
||||
begin
|
||||
-- synchronous writes
|
||||
register_write_0: process(clk)
|
||||
begin
|
||||
if rising_edge(clk) then
|
||||
if w_in.write_enable = '1' then
|
||||
assert not(is_x(w_in.write_data)) and not(is_x(w_in.write_reg)) severity failure;
|
||||
if w_in.write_reg(5) = '0' then
|
||||
report "Writing GPR " & to_hstring(w_in.write_reg) & " " & to_hstring(w_in.write_data);
|
||||
else
|
||||
report "Writing GSPR " & to_hstring(w_in.write_reg) & " " & to_hstring(w_in.write_data);
|
||||
end if;
|
||||
assert not(is_x(w_in.write_data)) and not(is_x(w_in.write_reg)) severity failure;
|
||||
registers(to_integer(unsigned(w_in.write_reg))) <= w_in.write_data;
|
||||
end if;
|
||||
end if;
|
||||
@@ -131,4 +134,13 @@ begin
|
||||
sim_dump_done <= '0';
|
||||
end generate;
|
||||
|
||||
reg_log: process(clk)
|
||||
begin
|
||||
if rising_edge(clk) then
|
||||
log_data <= w_in.write_data &
|
||||
w_in.write_enable &
|
||||
w_in.write_reg;
|
||||
end if;
|
||||
end process;
|
||||
log_out <= log_data;
|
||||
end architecture behaviour;
|
||||
|
||||
12
scripts/fmt_log/Makefile
Normal file
12
scripts/fmt_log/Makefile
Normal file
@@ -0,0 +1,12 @@
|
||||
CFLAGS = -O2 -g -Wall -std=c99
|
||||
|
||||
all: fmt_log
|
||||
|
||||
fmt_log: fmt_log.c
|
||||
$(CC) -o $@ $^ $(CFLAGS)
|
||||
|
||||
clean:
|
||||
rm -f fmt_log
|
||||
distclean:
|
||||
rm -f *~
|
||||
|
||||
235
scripts/fmt_log/fmt_log.c
Normal file
235
scripts/fmt_log/fmt_log.c
Normal file
@@ -0,0 +1,235 @@
|
||||
#include <stddef.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
|
||||
typedef unsigned long long u64;
|
||||
|
||||
struct log_entry {
|
||||
u64 nia_lo: 42;
|
||||
u64 nia_hi: 1;
|
||||
u64 ic_ra_valid: 1;
|
||||
u64 ic_access_ok: 1;
|
||||
u64 ic_is_miss: 1;
|
||||
u64 ic_is_hit: 1;
|
||||
u64 ic_way: 3;
|
||||
u64 ic_state: 1;
|
||||
u64 ic_part_nia: 4;
|
||||
u64 ic_fetch_failed: 1;
|
||||
u64 ic_stall_out: 1;
|
||||
u64 ic_wb_stall: 1;
|
||||
u64 ic_wb_cyc: 1;
|
||||
u64 ic_wb_stb: 1;
|
||||
u64 ic_wb_adr: 3;
|
||||
u64 ic_wb_ack: 1;
|
||||
|
||||
u64 ic_insn: 32;
|
||||
u64 ic_valid: 1;
|
||||
u64 d1_valid: 1;
|
||||
u64 d1_unit: 2;
|
||||
u64 d1_part_nia: 4;
|
||||
u64 d1_insn_type: 6;
|
||||
u64 d2_bypass_a: 1;
|
||||
u64 d2_bypass_b: 1;
|
||||
u64 d2_bypass_c: 1;
|
||||
u64 d2_stall_out: 1;
|
||||
u64 d2_stopped_out: 1;
|
||||
u64 d2_valid: 1;
|
||||
u64 d2_part_nia: 4;
|
||||
u64 e1_flush_out: 1;
|
||||
u64 e1_stall_out: 1;
|
||||
u64 e1_redirect: 1;
|
||||
u64 e1_valid: 1;
|
||||
u64 e1_write_enable: 1;
|
||||
u64 e1_unused: 3;
|
||||
|
||||
u64 e1_irq_state: 1;
|
||||
u64 e1_irq: 1;
|
||||
u64 e1_exception: 1;
|
||||
u64 e1_msr_dr: 1;
|
||||
u64 e1_msr_ir: 1;
|
||||
u64 e1_msr_pr: 1;
|
||||
u64 e1_msr_ee: 1;
|
||||
u64 pad1: 5;
|
||||
u64 ls_state: 3;
|
||||
u64 ls_dw_done: 1;
|
||||
u64 ls_min_done: 1;
|
||||
u64 ls_do_valid: 1;
|
||||
u64 ls_mo_valid: 1;
|
||||
u64 ls_lo_valid: 1;
|
||||
u64 ls_eo_except: 1;
|
||||
u64 ls_stall_out: 1;
|
||||
u64 pad2: 2;
|
||||
u64 dc_state: 3;
|
||||
u64 dc_ra_valid: 1;
|
||||
u64 dc_tlb_way: 3;
|
||||
u64 dc_stall_out: 1;
|
||||
u64 dc_op: 3;
|
||||
u64 dc_do_valid: 1;
|
||||
u64 dc_do_error: 1;
|
||||
u64 dc_wb_cyc: 1;
|
||||
u64 dc_wb_stb: 1;
|
||||
u64 dc_wb_ack: 1;
|
||||
u64 dc_wb_stall: 1;
|
||||
u64 dc_wb_adr: 3;
|
||||
u64 cr_wr_mask: 8;
|
||||
u64 cr_wr_data: 4;
|
||||
u64 cr_wr_enable: 1;
|
||||
u64 reg_wr_reg: 6;
|
||||
u64 reg_wr_enable: 1;
|
||||
|
||||
u64 reg_wr_data;
|
||||
};
|
||||
|
||||
#define FLAG(i, y) (log.i? y: ' ')
|
||||
#define FLGA(i, y, z) (log.i? y: z)
|
||||
#define PNIA(f) (full_nia[log.f] & 0xff)
|
||||
|
||||
const char *units[4] = { "--", "al", "ls", "?3" };
|
||||
const char *ops[64] =
|
||||
{
|
||||
"illegal", "nop ", "add ", "and ", "attn ", "b ", "bc ", "bcreg ",
|
||||
"bperm ", "cmp ", "cmpb ", "cmpeqb ", "cmprb ", "cntz ", "crop ", "darn ",
|
||||
"dcbf ", "dcbst ", "dcbt ", "dcbtst ", "dcbz ", "div ", "dive ", "exts ",
|
||||
"extswsl", "icbi ", "icbt ", "isel ", "isync ", "ld ", "st ", "maddhd ",
|
||||
"maddhdu", "maddld ", "mcrxr ", "mcrxrx ", "mfcr ", "mfmsr ", "mfspr ", "mod ",
|
||||
"mtcrf ", "mtmsr ", "mtspr ", "mull64 ", "mulh64 ", "mulh32 ", "or ", "popcnt ",
|
||||
"prty ", "rfid ", "rlc ", "rlcl ", "rlcr ", "sc ", "setb ", "shl ",
|
||||
"shr ", "sync ", "tlbie ", "trap ", "xor ", "ffail ", "?62 ", "?63 "
|
||||
};
|
||||
|
||||
const char *spr_names[13] =
|
||||
{
|
||||
"lr ", "ctr", "sr0", "sr1", "hr0", "hr1", "sg0", "sg1",
|
||||
"sg2", "sg3", "hg0", "hg1", "xer"
|
||||
};
|
||||
|
||||
int main(int ac, char **av)
|
||||
{
|
||||
struct log_entry log;
|
||||
u64 full_nia[16];
|
||||
long int lineno = 1;
|
||||
FILE *f;
|
||||
const char *filename;
|
||||
int i;
|
||||
long int ncompl = 0;
|
||||
|
||||
if (ac != 1 && ac != 2) {
|
||||
fprintf(stderr, "Usage: %s [filename]\n", av[0]);
|
||||
exit(1);
|
||||
}
|
||||
f = stdin;
|
||||
if (ac == 2) {
|
||||
filename = av[1];
|
||||
f = fopen(filename, "rb");
|
||||
if (f == NULL) {
|
||||
perror(filename);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < 15; ++i)
|
||||
full_nia[i] = i << 2;
|
||||
|
||||
while (fread(&log, sizeof(log), 1, f) == 1) {
|
||||
full_nia[log.nia_lo & 0xf] = (log.nia_hi? 0xc000000000000000: 0) |
|
||||
(log.nia_lo << 2);
|
||||
if (lineno % 20 == 1) {
|
||||
printf(" fetch1 NIA icache decode1 decode2 execute1 loadstore dcache CR GSPR\n");
|
||||
printf(" ---------------- TAHW S -WB-- pN --insn-- pN un op pN byp FR IIE MSR WC SD MM CE SRTO DE -WB-- c ms reg val\n");
|
||||
printf(" LdMy t csnSa IA IA it IA abc le srx EPID em tw rd mx tAwp vr csnSa 0 k\n");
|
||||
}
|
||||
printf("%4ld %c0000%.11llx %c ", lineno,
|
||||
(log.nia_hi? 'c': '0'),
|
||||
(unsigned long long)log.nia_lo << 2,
|
||||
FLAG(ic_stall_out, '|'));
|
||||
printf("%c%c%c%d %c %c%c%d%c%c %.2llx ",
|
||||
FLGA(ic_ra_valid, ' ', 'T'),
|
||||
FLGA(ic_access_ok, ' ', 'X'),
|
||||
FLGA(ic_is_hit, 'H', FLGA(ic_is_miss, 'M', ' ')),
|
||||
log.ic_way,
|
||||
FLAG(ic_state, 'W'),
|
||||
FLAG(ic_wb_cyc, 'c'),
|
||||
FLAG(ic_wb_stb, 's'),
|
||||
log.ic_wb_adr,
|
||||
FLAG(ic_wb_stall, 'S'),
|
||||
FLAG(ic_wb_ack, 'a'),
|
||||
PNIA(ic_part_nia));
|
||||
if (log.ic_valid)
|
||||
printf("%.8x", log.ic_insn);
|
||||
else if (log.ic_fetch_failed)
|
||||
printf("!!!!!!!!");
|
||||
else
|
||||
printf("--------");
|
||||
printf(" %c%c %.2llx ",
|
||||
FLAG(ic_valid, '>'),
|
||||
FLAG(d2_stall_out, '|'),
|
||||
PNIA(d1_part_nia));
|
||||
if (log.d1_valid)
|
||||
printf("%s %s",
|
||||
units[log.d1_unit],
|
||||
ops[log.d1_insn_type]);
|
||||
else
|
||||
printf("-- -------");
|
||||
printf(" %c%c ",
|
||||
FLAG(d1_valid, '>'),
|
||||
FLAG(d2_stall_out, '|'));
|
||||
printf("%.2llx %c%c%c %c%c ",
|
||||
PNIA(d2_part_nia),
|
||||
FLAG(d2_bypass_a, 'a'),
|
||||
FLAG(d2_bypass_b, 'b'),
|
||||
FLAG(d2_bypass_c, 'c'),
|
||||
FLAG(d2_valid, '>'),
|
||||
FLAG(e1_stall_out, '|'));
|
||||
printf("%c%c %c%c%c %c%c%c%c %c%c ",
|
||||
FLAG(e1_flush_out, 'F'),
|
||||
FLAG(e1_redirect, 'R'),
|
||||
FLAG(e1_irq_state, 'w'),
|
||||
FLAG(e1_irq, 'I'),
|
||||
FLAG(e1_exception, 'X'),
|
||||
FLAG(e1_msr_ee, 'E'),
|
||||
FLGA(e1_msr_pr, 'u', 's'),
|
||||
FLAG(e1_msr_ir, 'I'),
|
||||
FLAG(e1_msr_dr, 'D'),
|
||||
FLAG(e1_write_enable, 'W'),
|
||||
FLAG(e1_valid, 'C'));
|
||||
printf("%c %d%d %c%c %c%c %c ",
|
||||
FLAG(ls_stall_out, '|'),
|
||||
log.ls_state,
|
||||
log.ls_dw_done,
|
||||
FLAG(ls_mo_valid, 'M'),
|
||||
FLAG(ls_min_done, 'm'),
|
||||
FLAG(ls_lo_valid, 'C'),
|
||||
FLAG(ls_eo_except, 'X'),
|
||||
FLAG(ls_do_valid, '>'));
|
||||
printf("%d%c%d%d %c%c %c%c%d%c%c ",
|
||||
log.dc_state,
|
||||
FLAG(dc_ra_valid, 'R'),
|
||||
log.dc_tlb_way,
|
||||
log.dc_op,
|
||||
FLAG(dc_do_valid, 'V'),
|
||||
FLAG(dc_do_error, 'E'),
|
||||
FLAG(dc_wb_cyc, 'c'),
|
||||
FLAG(dc_wb_stb, 's'),
|
||||
log.dc_wb_adr,
|
||||
FLAG(dc_wb_stall, 'S'),
|
||||
FLAG(dc_wb_ack, 'a'));
|
||||
if (log.cr_wr_enable)
|
||||
printf("%x>%.2x ", log.cr_wr_data, log.cr_wr_mask);
|
||||
else
|
||||
printf(" ");
|
||||
if (log.reg_wr_enable) {
|
||||
if (log.reg_wr_reg < 32 || log.reg_wr_reg > 44)
|
||||
printf("r%02d", log.reg_wr_reg);
|
||||
else
|
||||
printf("%s", spr_names[log.reg_wr_reg - 32]);
|
||||
printf("=%.16llx", log.reg_wr_data);
|
||||
}
|
||||
printf("\n");
|
||||
++lineno;
|
||||
if (log.ls_lo_valid || log.e1_valid)
|
||||
++ncompl;
|
||||
}
|
||||
printf("%ld instructions completed, %.2f CPI\n", ncompl,
|
||||
(double)(lineno - 1) / ncompl);
|
||||
exit(0);
|
||||
}
|
||||
@@ -42,6 +42,9 @@
|
||||
#define DBG_CORE_GSPR_INDEX 0x14
|
||||
#define DBG_CORE_GSPR_DATA 0x15
|
||||
|
||||
#define DBG_LOG_ADDR 0x16
|
||||
#define DBG_LOG_DATA 0x17
|
||||
|
||||
static bool debug;
|
||||
|
||||
struct backend {
|
||||
@@ -507,8 +510,10 @@ static void load(const char *filename, uint64_t addr)
|
||||
// if (rc < 8) XXX fixup endian ?
|
||||
check(dmi_write(DBG_WB_DATA, data), "writing WB_DATA");
|
||||
count += 8;
|
||||
if (!(count % 1024))
|
||||
printf("%x...\n", count);
|
||||
if (!(count % 1024)) {
|
||||
printf("%x...\r", count);
|
||||
fflush(stdout);
|
||||
}
|
||||
}
|
||||
close(fd);
|
||||
printf("%x done.\n", count);
|
||||
@@ -535,8 +540,10 @@ static void save(const char *filename, uint64_t addr, uint64_t size)
|
||||
break;
|
||||
}
|
||||
count += 8;
|
||||
if (!(count % 1024))
|
||||
printf("%x...\n", count);
|
||||
if (!(count % 1024)) {
|
||||
printf("%x...\r", count);
|
||||
fflush(stdout);
|
||||
}
|
||||
if (count >= size)
|
||||
break;
|
||||
}
|
||||
@@ -544,6 +551,73 @@ static void save(const char *filename, uint64_t addr, uint64_t size)
|
||||
printf("%x done.\n", count);
|
||||
}
|
||||
|
||||
#define LOG_STOP 0x80000000ull
|
||||
|
||||
static void log_start(void)
|
||||
{
|
||||
check(dmi_write(DBG_LOG_ADDR, 0), "writing LOG_ADDR");
|
||||
}
|
||||
|
||||
static void log_stop(void)
|
||||
{
|
||||
uint64_t lsize, laddr, waddr;
|
||||
|
||||
check(dmi_write(DBG_LOG_ADDR, LOG_STOP), "writing LOG_ADDR");
|
||||
check(dmi_read(DBG_LOG_ADDR, &laddr), "reading LOG_ADDR");
|
||||
waddr = laddr >> 32;
|
||||
for (lsize = 1; lsize; lsize <<= 1)
|
||||
if ((waddr >> 1) < lsize)
|
||||
break;
|
||||
waddr &= ~lsize;
|
||||
printf("Log size = %" PRIu64 " entries, ", lsize);
|
||||
printf("write ptr = %" PRIx64 "\n", waddr);
|
||||
}
|
||||
|
||||
static void log_dump(const char *filename)
|
||||
{
|
||||
FILE *f;
|
||||
uint64_t lsize, laddr, waddr;
|
||||
uint64_t orig_laddr;
|
||||
uint64_t i, ldata;
|
||||
|
||||
f = fopen(filename, "w");
|
||||
if (f == NULL) {
|
||||
fprintf(stderr, "Failed to create '%s': %s\n", filename,
|
||||
strerror(errno));
|
||||
exit(1);
|
||||
}
|
||||
|
||||
check(dmi_read(DBG_LOG_ADDR, &orig_laddr), "reading LOG_ADDR");
|
||||
if (!(orig_laddr & LOG_STOP))
|
||||
check(dmi_write(DBG_LOG_ADDR, LOG_STOP), "writing LOG_ADDR");
|
||||
|
||||
waddr = orig_laddr >> 32;
|
||||
for (lsize = 1; lsize; lsize <<= 1)
|
||||
if ((waddr >> 1) < lsize)
|
||||
break;
|
||||
waddr &= ~lsize;
|
||||
printf("Log size = %" PRIu64 " entries\n", lsize);
|
||||
|
||||
laddr = LOG_STOP | (waddr << 2);
|
||||
check(dmi_write(DBG_LOG_ADDR, laddr), "writing LOG_ADDR");
|
||||
|
||||
for (i = 0; i < lsize * 4; ++i) {
|
||||
check(dmi_read(DBG_LOG_DATA, &ldata), "reading LOG_DATA");
|
||||
if (fwrite(&ldata, sizeof(ldata), 1, f) != 1) {
|
||||
fprintf(stderr, "Write error on %s\n", filename);
|
||||
exit(1);
|
||||
}
|
||||
if (!(i % 128)) {
|
||||
printf("%" PRIu64 "...\r", i * 8);
|
||||
fflush(stdout);
|
||||
}
|
||||
}
|
||||
fclose(f);
|
||||
printf("%" PRIu64 " done\n", lsize * 32);
|
||||
|
||||
check(dmi_write(DBG_LOG_ADDR, orig_laddr), "writing LOG_ADDR");
|
||||
}
|
||||
|
||||
static void usage(const char *cmd)
|
||||
{
|
||||
fprintf(stderr, "Usage: %s -b <jtag|sim> <command> <args>\n", cmd);
|
||||
@@ -568,6 +642,12 @@ static void usage(const char *cmd)
|
||||
fprintf(stderr, " gpr <reg> [count]\n");
|
||||
fprintf(stderr, " status\n");
|
||||
|
||||
fprintf(stderr, "\n");
|
||||
fprintf(stderr, " Core logging:\n");
|
||||
fprintf(stderr, " lstart start logging\n");
|
||||
fprintf(stderr, " lstop stop logging\n");
|
||||
fprintf(stderr, " ldump <file> dump log to file\n");
|
||||
|
||||
fprintf(stderr, "\n");
|
||||
fprintf(stderr, " JTAG:\n");
|
||||
fprintf(stderr, " dmiread <hex addr>\n");
|
||||
@@ -706,6 +786,17 @@ int main(int argc, char *argv[])
|
||||
if (((i+1) < argc) && isdigit(argv[i+1][0]))
|
||||
count = strtoul(argv[++i], NULL, 10);
|
||||
gpr_read(reg, count);
|
||||
} else if (strcmp(argv[i], "lstart") == 0) {
|
||||
log_start();
|
||||
} else if (strcmp(argv[i], "lstop") == 0) {
|
||||
log_stop();
|
||||
} else if (strcmp(argv[i], "ldump") == 0) {
|
||||
const char *filename;
|
||||
|
||||
if ((i+1) >= argc)
|
||||
usage(argv[0]);
|
||||
filename = argv[++i];
|
||||
log_dump(filename);
|
||||
} else {
|
||||
fprintf(stderr, "Unknown command %s\n", argv[i]);
|
||||
exit(1);
|
||||
|
||||
6
soc.vhdl
6
soc.vhdl
@@ -51,7 +51,8 @@ entity soc is
|
||||
SPI_FLASH_DLINES : positive := 1;
|
||||
SPI_FLASH_OFFSET : integer := 0;
|
||||
SPI_FLASH_DEF_CKDV : natural := 2;
|
||||
SPI_FLASH_DEF_QUAD : boolean := false
|
||||
SPI_FLASH_DEF_QUAD : boolean := false;
|
||||
LOG_LENGTH : natural := 512
|
||||
);
|
||||
port(
|
||||
rst : in std_ulogic;
|
||||
@@ -198,7 +199,8 @@ begin
|
||||
generic map(
|
||||
SIM => SIM,
|
||||
DISABLE_FLATTEN => DISABLE_FLATTEN_CORE,
|
||||
ALT_RESET_ADDRESS => (23 downto 0 => '0', others => '1')
|
||||
ALT_RESET_ADDRESS => (23 downto 0 => '0', others => '1'),
|
||||
LOG_LENGTH => LOG_LENGTH
|
||||
)
|
||||
port map(
|
||||
clk => system_clk,
|
||||
|
||||
Binary file not shown.
@@ -9,6 +9,14 @@
|
||||
#undef DEBUG
|
||||
//#define DEBUG 1
|
||||
|
||||
void delay(void)
|
||||
{
|
||||
static volatile int i;
|
||||
|
||||
for (i = 0; i < 10; ++i)
|
||||
;
|
||||
}
|
||||
|
||||
void print_number(unsigned int i) // only for i = 0-999
|
||||
{
|
||||
unsigned int j, k, m;
|
||||
@@ -148,14 +156,17 @@ int xics_test_0(void)
|
||||
xics_write8(XICS_MFRR, 0x05); // cause 0x500 interrupt
|
||||
|
||||
// still masked, so shouldn't happen yet
|
||||
delay();
|
||||
assert(isrs_run == 0);
|
||||
|
||||
// unmask IPI only
|
||||
xics_write8(XICS_XIRR, 0x40);
|
||||
delay();
|
||||
assert(isrs_run == ISR_IPI);
|
||||
|
||||
// unmask UART
|
||||
xics_write8(XICS_XIRR, 0xc0);
|
||||
delay();
|
||||
assert(isrs_run == (ISR_IPI | ISR_UART));
|
||||
|
||||
// cleanup
|
||||
@@ -174,12 +185,14 @@ int xics_test_1(void)
|
||||
xics_write8(XICS_XIRR, 0xff); // allow all interrupts
|
||||
|
||||
// should be none pending
|
||||
delay();
|
||||
assert(isrs_run == 0);
|
||||
|
||||
// trigger both
|
||||
potato_uart_irq_en(); // cause 0x500 interrupt
|
||||
xics_write8(XICS_MFRR, 0x05); // cause 0x500 interrupt
|
||||
|
||||
delay();
|
||||
assert(isrs_run == (ISR_IPI | ISR_UART));
|
||||
|
||||
// cleanup
|
||||
@@ -208,9 +221,11 @@ int xics_test_2(void)
|
||||
// trigger an IPI
|
||||
xics_write8(XICS_MFRR, 0x05); // cause 0x500 interrupt
|
||||
|
||||
delay();
|
||||
assert(isrs_run == 0);
|
||||
|
||||
mtmsrd(0x9000000000008003); // EE on
|
||||
delay();
|
||||
assert(isrs_run == ISR_IPI);
|
||||
|
||||
// cleanup
|
||||
|
||||
@@ -22,27 +22,33 @@ end entity writeback;
|
||||
|
||||
architecture behaviour of writeback is
|
||||
begin
|
||||
writeback_1: process(all)
|
||||
writeback_0: process(clk)
|
||||
variable x : std_ulogic_vector(0 downto 0);
|
||||
variable y : std_ulogic_vector(0 downto 0);
|
||||
variable w : std_ulogic_vector(0 downto 0);
|
||||
begin
|
||||
if rising_edge(clk) then
|
||||
-- Do consistency checks only on the clock edge
|
||||
x(0) := e_in.valid;
|
||||
y(0) := l_in.valid;
|
||||
assert (to_integer(unsigned(x)) + to_integer(unsigned(y))) <= 1 severity failure;
|
||||
|
||||
x(0) := e_in.write_enable or e_in.exc_write_enable;
|
||||
y(0) := l_in.write_enable;
|
||||
assert (to_integer(unsigned(x)) + to_integer(unsigned(y))) <= 1 severity failure;
|
||||
|
||||
w(0) := e_in.write_cr_enable;
|
||||
x(0) := (e_in.write_enable and e_in.rc);
|
||||
assert (to_integer(unsigned(w)) + to_integer(unsigned(x))) <= 1 severity failure;
|
||||
end if;
|
||||
end process;
|
||||
|
||||
writeback_1: process(all)
|
||||
variable cf: std_ulogic_vector(3 downto 0);
|
||||
variable zero : std_ulogic;
|
||||
variable sign : std_ulogic;
|
||||
variable scf : std_ulogic_vector(3 downto 0);
|
||||
begin
|
||||
x(0) := e_in.valid;
|
||||
y(0) := l_in.valid;
|
||||
assert (to_integer(unsigned(x)) + to_integer(unsigned(y))) <= 1 severity failure;
|
||||
|
||||
x(0) := e_in.write_enable or e_in.exc_write_enable;
|
||||
y(0) := l_in.write_enable;
|
||||
assert (to_integer(unsigned(x)) + to_integer(unsigned(y))) <= 1 severity failure;
|
||||
|
||||
w(0) := e_in.write_cr_enable;
|
||||
x(0) := (e_in.write_enable and e_in.rc);
|
||||
assert (to_integer(unsigned(w)) + to_integer(unsigned(x))) <= 1 severity failure;
|
||||
|
||||
w_out <= WritebackToRegisterFileInit;
|
||||
c_out <= WritebackToCrFileInit;
|
||||
|
||||
|
||||
985
xilinx-mult.vhdl
Normal file
985
xilinx-mult.vhdl
Normal file
@@ -0,0 +1,985 @@
|
||||
library ieee;
|
||||
use ieee.std_logic_1164.all;
|
||||
use ieee.numeric_std.all;
|
||||
|
||||
library work;
|
||||
use work.common.all;
|
||||
|
||||
library unisim;
|
||||
use unisim.vcomponents.all;
|
||||
|
||||
entity multiply is
|
||||
port (
|
||||
clk : in std_logic;
|
||||
|
||||
m_in : in Execute1ToMultiplyType;
|
||||
m_out : out MultiplyToExecute1Type
|
||||
);
|
||||
end entity multiply;
|
||||
|
||||
architecture behaviour of multiply is
|
||||
signal m00_p, m01_p, m02_p, m03_p : std_ulogic_vector(47 downto 0);
|
||||
signal m00_pc : std_ulogic_vector(47 downto 0);
|
||||
signal m10_p, m11_p, m12_p, m13_p : std_ulogic_vector(47 downto 0);
|
||||
signal m11_pc, m12_pc, m13_pc : std_ulogic_vector(47 downto 0);
|
||||
signal m20_p, m21_p, m22_p, m23_p : std_ulogic_vector(47 downto 0);
|
||||
signal s0_pc, s1_pc : std_ulogic_vector(47 downto 0);
|
||||
signal product_lo : std_ulogic_vector(31 downto 0);
|
||||
signal product : std_ulogic_vector(127 downto 0);
|
||||
signal addend : std_ulogic_vector(127 downto 0);
|
||||
signal s0_carry, p0_carry : std_ulogic_vector(3 downto 0);
|
||||
signal p0_mask : std_ulogic_vector(47 downto 0);
|
||||
signal p0_pat, p0_patb : std_ulogic;
|
||||
signal p1_pat, p1_patb : std_ulogic;
|
||||
|
||||
signal req_32bit, r32_1 : std_ulogic;
|
||||
signal req_neg, rneg_1 : std_ulogic;
|
||||
signal valid_1 : std_ulogic;
|
||||
|
||||
begin
|
||||
addend <= (others => m_in.neg_result);
|
||||
|
||||
m00: DSP48E1
|
||||
generic map (
|
||||
ACASCREG => 0,
|
||||
ALUMODEREG => 0,
|
||||
AREG => 0,
|
||||
BCASCREG => 0,
|
||||
BREG => 0,
|
||||
CARRYINREG => 0,
|
||||
CARRYINSELREG => 0,
|
||||
INMODEREG => 0,
|
||||
OPMODEREG => 0,
|
||||
PREG => 0
|
||||
)
|
||||
port map (
|
||||
A => "0000000" & m_in.data1(22 downto 0),
|
||||
ACIN => (others => '0'),
|
||||
ALUMODE => "0000",
|
||||
B => '0' & m_in.data2(16 downto 0),
|
||||
BCIN => (others => '0'),
|
||||
C => "00000000000000" & addend(33 downto 0),
|
||||
CARRYCASCIN => '0',
|
||||
CARRYIN => '0',
|
||||
CARRYINSEL => "000",
|
||||
CEA1 => '0',
|
||||
CEA2 => '0',
|
||||
CEAD => '0',
|
||||
CEALUMODE => '0',
|
||||
CEB1 => '0',
|
||||
CEB2 => '0',
|
||||
CEC => '1',
|
||||
CECARRYIN => '0',
|
||||
CECTRL => '0',
|
||||
CED => '0',
|
||||
CEINMODE => '0',
|
||||
CEM => '1',
|
||||
CEP => '0',
|
||||
CLK => clk,
|
||||
D => (others => '0'),
|
||||
INMODE => "00000",
|
||||
MULTSIGNIN => '0',
|
||||
OPMODE => "0110101",
|
||||
P => m00_p,
|
||||
PCIN => (others => '0'),
|
||||
PCOUT => m00_pc,
|
||||
RSTA => '0',
|
||||
RSTALLCARRYIN => '0',
|
||||
RSTALUMODE => '0',
|
||||
RSTB => '0',
|
||||
RSTC => '0',
|
||||
RSTCTRL => '0',
|
||||
RSTD => '0',
|
||||
RSTINMODE => '0',
|
||||
RSTM => '0',
|
||||
RSTP => '0'
|
||||
);
|
||||
|
||||
m01: DSP48E1
|
||||
generic map (
|
||||
ACASCREG => 0,
|
||||
ALUMODEREG => 0,
|
||||
AREG => 0,
|
||||
BCASCREG => 0,
|
||||
BREG => 0,
|
||||
CARRYINREG => 0,
|
||||
CARRYINSELREG => 0,
|
||||
INMODEREG => 0,
|
||||
OPMODEREG => 0,
|
||||
PREG => 0
|
||||
)
|
||||
port map (
|
||||
A => "0000000" & m_in.data1(22 downto 0),
|
||||
ACIN => (others => '0'),
|
||||
ALUMODE => "0000",
|
||||
B => '0' & m_in.data2(33 downto 17),
|
||||
BCIN => (others => '0'),
|
||||
C => (others => '0'),
|
||||
CARRYCASCIN => '0',
|
||||
CARRYIN => '0',
|
||||
CARRYINSEL => "000",
|
||||
CEA1 => '0',
|
||||
CEA2 => '0',
|
||||
CEAD => '0',
|
||||
CEALUMODE => '0',
|
||||
CEB1 => '0',
|
||||
CEB2 => '0',
|
||||
CEC => '1',
|
||||
CECARRYIN => '0',
|
||||
CECTRL => '0',
|
||||
CED => '0',
|
||||
CEINMODE => '0',
|
||||
CEM => '1',
|
||||
CEP => '0',
|
||||
CLK => clk,
|
||||
D => (others => '0'),
|
||||
INMODE => "00000",
|
||||
MULTSIGNIN => '0',
|
||||
OPMODE => "1010101",
|
||||
P => m01_p,
|
||||
PCIN => m00_pc,
|
||||
RSTA => '0',
|
||||
RSTALLCARRYIN => '0',
|
||||
RSTALUMODE => '0',
|
||||
RSTB => '0',
|
||||
RSTC => '0',
|
||||
RSTCTRL => '0',
|
||||
RSTD => '0',
|
||||
RSTINMODE => '0',
|
||||
RSTM => '0',
|
||||
RSTP => '0'
|
||||
);
|
||||
|
||||
m02: DSP48E1
|
||||
generic map (
|
||||
ACASCREG => 0,
|
||||
ALUMODEREG => 0,
|
||||
AREG => 0,
|
||||
BCASCREG => 0,
|
||||
BREG => 0,
|
||||
CARRYINREG => 0,
|
||||
CARRYINSELREG => 0,
|
||||
INMODEREG => 0,
|
||||
OPMODEREG => 0,
|
||||
PREG => 0
|
||||
)
|
||||
port map (
|
||||
A => "0000000" & m_in.data1(22 downto 0),
|
||||
ACIN => (others => '0'),
|
||||
ALUMODE => "0000",
|
||||
B => '0' & m_in.data2(50 downto 34),
|
||||
BCIN => (others => '0'),
|
||||
C => x"0000000" & "000" & addend(50 downto 34),
|
||||
CARRYCASCIN => '0',
|
||||
CARRYIN => '0',
|
||||
CARRYINSEL => "000",
|
||||
CEA1 => '0',
|
||||
CEA2 => '0',
|
||||
CEAD => '0',
|
||||
CEALUMODE => '0',
|
||||
CEB1 => '0',
|
||||
CEB2 => '0',
|
||||
CEC => '1',
|
||||
CECARRYIN => '0',
|
||||
CECTRL => '0',
|
||||
CED => '0',
|
||||
CEINMODE => '0',
|
||||
CEM => '1',
|
||||
CEP => '0',
|
||||
CLK => clk,
|
||||
D => (others => '0'),
|
||||
INMODE => "00000",
|
||||
MULTSIGNIN => '0',
|
||||
OPMODE => "0110101",
|
||||
P => m02_p,
|
||||
PCIN => (others => '0'),
|
||||
RSTA => '0',
|
||||
RSTALLCARRYIN => '0',
|
||||
RSTALUMODE => '0',
|
||||
RSTB => '0',
|
||||
RSTC => '0',
|
||||
RSTCTRL => '0',
|
||||
RSTD => '0',
|
||||
RSTINMODE => '0',
|
||||
RSTM => '0',
|
||||
RSTP => '0'
|
||||
);
|
||||
|
||||
m03: DSP48E1
|
||||
generic map (
|
||||
ACASCREG => 0,
|
||||
ALUMODEREG => 0,
|
||||
AREG => 0,
|
||||
BCASCREG => 0,
|
||||
BREG => 0,
|
||||
CARRYINREG => 0,
|
||||
CARRYINSELREG => 0,
|
||||
INMODEREG => 0,
|
||||
OPMODEREG => 0,
|
||||
PREG => 0
|
||||
)
|
||||
port map (
|
||||
A => "0000000" & m_in.data1(22 downto 0),
|
||||
ACIN => (others => '0'),
|
||||
ALUMODE => "0000",
|
||||
B => "00000" & m_in.data2(63 downto 51),
|
||||
BCIN => (others => '0'),
|
||||
C => x"000000" & '0' & addend(73 downto 51),
|
||||
CARRYCASCIN => '0',
|
||||
CARRYIN => '0',
|
||||
CARRYINSEL => "000",
|
||||
CEA1 => '0',
|
||||
CEA2 => '0',
|
||||
CEAD => '0',
|
||||
CEALUMODE => '0',
|
||||
CEB1 => '0',
|
||||
CEB2 => '0',
|
||||
CEC => '1',
|
||||
CECARRYIN => '0',
|
||||
CECTRL => '0',
|
||||
CED => '0',
|
||||
CEINMODE => '0',
|
||||
CEM => '1',
|
||||
CEP => '0',
|
||||
CLK => clk,
|
||||
D => (others => '0'),
|
||||
INMODE => "00000",
|
||||
MULTSIGNIN => '0',
|
||||
OPMODE => "0110101",
|
||||
P => m03_p,
|
||||
PCIN => (others => '0'),
|
||||
RSTA => '0',
|
||||
RSTALLCARRYIN => '0',
|
||||
RSTALUMODE => '0',
|
||||
RSTB => '0',
|
||||
RSTC => '0',
|
||||
RSTCTRL => '0',
|
||||
RSTD => '0',
|
||||
RSTINMODE => '0',
|
||||
RSTM => '0',
|
||||
RSTP => '0'
|
||||
);
|
||||
|
||||
m10: DSP48E1
|
||||
generic map (
|
||||
ACASCREG => 0,
|
||||
ALUMODEREG => 0,
|
||||
AREG => 0,
|
||||
BCASCREG => 0,
|
||||
BREG => 0,
|
||||
CARRYINREG => 0,
|
||||
CARRYINSELREG => 0,
|
||||
CREG => 0,
|
||||
INMODEREG => 0,
|
||||
OPMODEREG => 0,
|
||||
PREG => 0
|
||||
)
|
||||
port map (
|
||||
A => "0000000000000" & m_in.data1(39 downto 23),
|
||||
ACIN => (others => '0'),
|
||||
ALUMODE => "0000",
|
||||
B => '0' & m_in.data2(16 downto 0),
|
||||
BCIN => (others => '0'),
|
||||
C => x"000" & "00" & m01_p(39 downto 6),
|
||||
CARRYCASCIN => '0',
|
||||
CARRYIN => '0',
|
||||
CARRYINSEL => "000",
|
||||
CEA1 => '0',
|
||||
CEA2 => '0',
|
||||
CEAD => '0',
|
||||
CEALUMODE => '0',
|
||||
CEB1 => '0',
|
||||
CEB2 => '0',
|
||||
CEC => '0',
|
||||
CECARRYIN => '0',
|
||||
CECTRL => '0',
|
||||
CED => '0',
|
||||
CEINMODE => '0',
|
||||
CEM => '1',
|
||||
CEP => '0',
|
||||
CLK => clk,
|
||||
D => (others => '0'),
|
||||
INMODE => "00000",
|
||||
MULTSIGNIN => '0',
|
||||
OPMODE => "0110101",
|
||||
P => m10_p,
|
||||
PCIN => (others => '0'),
|
||||
RSTA => '0',
|
||||
RSTALLCARRYIN => '0',
|
||||
RSTALUMODE => '0',
|
||||
RSTB => '0',
|
||||
RSTC => '0',
|
||||
RSTCTRL => '0',
|
||||
RSTD => '0',
|
||||
RSTINMODE => '0',
|
||||
RSTM => '0',
|
||||
RSTP => '0'
|
||||
);
|
||||
|
||||
m11: DSP48E1
|
||||
generic map (
|
||||
ACASCREG => 0,
|
||||
ALUMODEREG => 0,
|
||||
AREG => 0,
|
||||
BCASCREG => 0,
|
||||
BREG => 0,
|
||||
CARRYINREG => 0,
|
||||
CARRYINSELREG => 0,
|
||||
CREG => 0,
|
||||
INMODEREG => 0,
|
||||
OPMODEREG => 0,
|
||||
PREG => 0
|
||||
)
|
||||
port map (
|
||||
A => "0000000000000" & m_in.data1(39 downto 23),
|
||||
ACIN => (others => '0'),
|
||||
ALUMODE => "0000",
|
||||
B => '0' & m_in.data2(33 downto 17),
|
||||
BCIN => (others => '0'),
|
||||
C => x"000" & "00" & m02_p(39 downto 6),
|
||||
CARRYCASCIN => '0',
|
||||
CARRYIN => '0',
|
||||
CARRYINSEL => "000",
|
||||
CEA1 => '0',
|
||||
CEA2 => '0',
|
||||
CEAD => '0',
|
||||
CEALUMODE => '0',
|
||||
CEB1 => '0',
|
||||
CEB2 => '0',
|
||||
CEC => '0',
|
||||
CECARRYIN => '0',
|
||||
CECTRL => '0',
|
||||
CED => '0',
|
||||
CEINMODE => '0',
|
||||
CEM => '1',
|
||||
CEP => '0',
|
||||
CLK => clk,
|
||||
D => (others => '0'),
|
||||
INMODE => "00000",
|
||||
MULTSIGNIN => '0',
|
||||
OPMODE => "0110101",
|
||||
P => m11_p,
|
||||
PCIN => (others => '0'),
|
||||
PCOUT => m11_pc,
|
||||
RSTA => '0',
|
||||
RSTALLCARRYIN => '0',
|
||||
RSTALUMODE => '0',
|
||||
RSTB => '0',
|
||||
RSTC => '0',
|
||||
RSTCTRL => '0',
|
||||
RSTD => '0',
|
||||
RSTINMODE => '0',
|
||||
RSTM => '0',
|
||||
RSTP => '0'
|
||||
);
|
||||
|
||||
m12: DSP48E1
|
||||
generic map (
|
||||
ACASCREG => 0,
|
||||
ALUMODEREG => 0,
|
||||
AREG => 0,
|
||||
BCASCREG => 0,
|
||||
BREG => 0,
|
||||
CARRYINREG => 0,
|
||||
CARRYINSELREG => 0,
|
||||
CREG => 0,
|
||||
INMODEREG => 0,
|
||||
OPMODEREG => 0,
|
||||
PREG => 0
|
||||
)
|
||||
port map (
|
||||
A => "0000000000000" & m_in.data1(39 downto 23),
|
||||
ACIN => (others => '0'),
|
||||
ALUMODE => "0000",
|
||||
B => '0' & m_in.data2(50 downto 34),
|
||||
BCIN => (others => '0'),
|
||||
C => x"0000" & '0' & m03_p(36 downto 6),
|
||||
CARRYCASCIN => '0',
|
||||
CARRYIN => '0',
|
||||
CARRYINSEL => "000",
|
||||
CEA1 => '0',
|
||||
CEA2 => '0',
|
||||
CEAD => '0',
|
||||
CEALUMODE => '0',
|
||||
CEB1 => '0',
|
||||
CEB2 => '0',
|
||||
CEC => '0',
|
||||
CECARRYIN => '0',
|
||||
CECTRL => '0',
|
||||
CED => '0',
|
||||
CEINMODE => '0',
|
||||
CEM => '1',
|
||||
CEP => '0',
|
||||
CLK => clk,
|
||||
D => (others => '0'),
|
||||
INMODE => "00000",
|
||||
MULTSIGNIN => '0',
|
||||
OPMODE => "0110101",
|
||||
P => m12_p,
|
||||
PCIN => (others => '0'),
|
||||
PCOUT => m12_pc,
|
||||
RSTA => '0',
|
||||
RSTALLCARRYIN => '0',
|
||||
RSTALUMODE => '0',
|
||||
RSTB => '0',
|
||||
RSTC => '0',
|
||||
RSTCTRL => '0',
|
||||
RSTD => '0',
|
||||
RSTINMODE => '0',
|
||||
RSTM => '0',
|
||||
RSTP => '0'
|
||||
);
|
||||
|
||||
m13: DSP48E1
|
||||
generic map (
|
||||
ACASCREG => 0,
|
||||
ALUMODEREG => 0,
|
||||
AREG => 0,
|
||||
BCASCREG => 0,
|
||||
BREG => 0,
|
||||
CARRYINREG => 0,
|
||||
CARRYINSELREG => 0,
|
||||
INMODEREG => 0,
|
||||
OPMODEREG => 0,
|
||||
PREG => 0
|
||||
)
|
||||
port map (
|
||||
A => "0000000000000" & m_in.data1(39 downto 23),
|
||||
ACIN => (others => '0'),
|
||||
ALUMODE => "0000",
|
||||
B => "00000" & m_in.data2(63 downto 51),
|
||||
BCIN => (others => '0'),
|
||||
C => x"0000000" & "000" & addend(90 downto 74),
|
||||
CARRYCASCIN => '0',
|
||||
CARRYIN => '0',
|
||||
CARRYINSEL => "000",
|
||||
CEA1 => '0',
|
||||
CEA2 => '0',
|
||||
CEAD => '0',
|
||||
CEALUMODE => '0',
|
||||
CEB1 => '0',
|
||||
CEB2 => '0',
|
||||
CEC => '1',
|
||||
CECARRYIN => '0',
|
||||
CECTRL => '0',
|
||||
CED => '0',
|
||||
CEINMODE => '0',
|
||||
CEM => '1',
|
||||
CEP => '0',
|
||||
CLK => clk,
|
||||
D => (others => '0'),
|
||||
INMODE => "00000",
|
||||
MULTSIGNIN => '0',
|
||||
OPMODE => "0110101",
|
||||
P => m13_p,
|
||||
PCIN => (others => '0'),
|
||||
PCOUT => m13_pc,
|
||||
RSTA => '0',
|
||||
RSTALLCARRYIN => '0',
|
||||
RSTALUMODE => '0',
|
||||
RSTB => '0',
|
||||
RSTC => '0',
|
||||
RSTCTRL => '0',
|
||||
RSTD => '0',
|
||||
RSTINMODE => '0',
|
||||
RSTM => '0',
|
||||
RSTP => '0'
|
||||
);
|
||||
|
||||
m20: DSP48E1
|
||||
generic map (
|
||||
ACASCREG => 0,
|
||||
ALUMODEREG => 0,
|
||||
AREG => 0,
|
||||
BCASCREG => 0,
|
||||
BREG => 0,
|
||||
CARRYINREG => 0,
|
||||
CARRYINSELREG => 0,
|
||||
INMODEREG => 0,
|
||||
OPMODEREG => 0,
|
||||
PREG => 0
|
||||
)
|
||||
port map (
|
||||
A => "000000" & m_in.data1(63 downto 40),
|
||||
ACIN => (others => '0'),
|
||||
ALUMODE => "0000",
|
||||
B => '0' & m_in.data2(16 downto 0),
|
||||
BCIN => (others => '0'),
|
||||
C => (others => '0'),
|
||||
CARRYCASCIN => '0',
|
||||
CARRYIN => '0',
|
||||
CARRYINSEL => "000",
|
||||
CEA1 => '0',
|
||||
CEA2 => '0',
|
||||
CEAD => '0',
|
||||
CEALUMODE => '0',
|
||||
CEB1 => '0',
|
||||
CEB2 => '0',
|
||||
CEC => '1',
|
||||
CECARRYIN => '0',
|
||||
CECTRL => '0',
|
||||
CED => '0',
|
||||
CEINMODE => '0',
|
||||
CEM => '1',
|
||||
CEP => '0',
|
||||
CLK => clk,
|
||||
D => (others => '0'),
|
||||
INMODE => "00000",
|
||||
MULTSIGNIN => '0',
|
||||
OPMODE => "0010101",
|
||||
P => m20_p,
|
||||
PCIN => m11_pc,
|
||||
RSTA => '0',
|
||||
RSTALLCARRYIN => '0',
|
||||
RSTALUMODE => '0',
|
||||
RSTB => '0',
|
||||
RSTC => '0',
|
||||
RSTCTRL => '0',
|
||||
RSTD => '0',
|
||||
RSTINMODE => '0',
|
||||
RSTM => '0',
|
||||
RSTP => '0'
|
||||
);
|
||||
|
||||
m21: DSP48E1
|
||||
generic map (
|
||||
ACASCREG => 0,
|
||||
ALUMODEREG => 0,
|
||||
AREG => 0,
|
||||
BCASCREG => 0,
|
||||
BREG => 0,
|
||||
CARRYINREG => 0,
|
||||
CARRYINSELREG => 0,
|
||||
INMODEREG => 0,
|
||||
OPMODEREG => 0,
|
||||
PREG => 0
|
||||
)
|
||||
port map (
|
||||
A => "000000" & m_in.data1(63 downto 40),
|
||||
ACIN => (others => '0'),
|
||||
ALUMODE => "0000",
|
||||
B => '0' & m_in.data2(33 downto 17),
|
||||
BCIN => (others => '0'),
|
||||
C => (others => '0'),
|
||||
CARRYCASCIN => '0',
|
||||
CARRYIN => '0',
|
||||
CARRYINSEL => "000",
|
||||
CEA1 => '0',
|
||||
CEA2 => '0',
|
||||
CEAD => '0',
|
||||
CEALUMODE => '0',
|
||||
CEB1 => '0',
|
||||
CEB2 => '0',
|
||||
CEC => '1',
|
||||
CECARRYIN => '0',
|
||||
CECTRL => '0',
|
||||
CED => '0',
|
||||
CEINMODE => '0',
|
||||
CEM => '1',
|
||||
CEP => '0',
|
||||
CLK => clk,
|
||||
D => (others => '0'),
|
||||
INMODE => "00000",
|
||||
MULTSIGNIN => '0',
|
||||
OPMODE => "0010101",
|
||||
P => m21_p,
|
||||
PCIN => m12_pc,
|
||||
RSTA => '0',
|
||||
RSTALLCARRYIN => '0',
|
||||
RSTALUMODE => '0',
|
||||
RSTB => '0',
|
||||
RSTC => '0',
|
||||
RSTCTRL => '0',
|
||||
RSTD => '0',
|
||||
RSTINMODE => '0',
|
||||
RSTM => '0',
|
||||
RSTP => '0'
|
||||
);
|
||||
|
||||
m22: DSP48E1
|
||||
generic map (
|
||||
ACASCREG => 0,
|
||||
ALUMODEREG => 0,
|
||||
AREG => 0,
|
||||
BCASCREG => 0,
|
||||
BREG => 0,
|
||||
CARRYINREG => 0,
|
||||
CARRYINSELREG => 0,
|
||||
INMODEREG => 0,
|
||||
OPMODEREG => 0,
|
||||
PREG => 0
|
||||
)
|
||||
port map (
|
||||
A => "000000" & m_in.data1(63 downto 40),
|
||||
ACIN => (others => '0'),
|
||||
ALUMODE => "0000",
|
||||
B => '0' & m_in.data2(50 downto 34),
|
||||
BCIN => (others => '0'),
|
||||
C => (others => '0'),
|
||||
CARRYCASCIN => '0',
|
||||
CARRYIN => '0',
|
||||
CARRYINSEL => "000",
|
||||
CEA1 => '0',
|
||||
CEA2 => '0',
|
||||
CEAD => '0',
|
||||
CEALUMODE => '0',
|
||||
CEB1 => '0',
|
||||
CEB2 => '0',
|
||||
CEC => '1',
|
||||
CECARRYIN => '0',
|
||||
CECTRL => '0',
|
||||
CED => '0',
|
||||
CEINMODE => '0',
|
||||
CEM => '1',
|
||||
CEP => '0',
|
||||
CLK => clk,
|
||||
D => (others => '0'),
|
||||
INMODE => "00000",
|
||||
MULTSIGNIN => '0',
|
||||
OPMODE => "0010101",
|
||||
P => m22_p,
|
||||
PCIN => m13_pc,
|
||||
RSTA => '0',
|
||||
RSTALLCARRYIN => '0',
|
||||
RSTALUMODE => '0',
|
||||
RSTB => '0',
|
||||
RSTC => '0',
|
||||
RSTCTRL => '0',
|
||||
RSTD => '0',
|
||||
RSTINMODE => '0',
|
||||
RSTM => '0',
|
||||
RSTP => '0'
|
||||
);
|
||||
|
||||
m23: DSP48E1
|
||||
generic map (
|
||||
ACASCREG => 0,
|
||||
ALUMODEREG => 0,
|
||||
AREG => 0,
|
||||
BCASCREG => 0,
|
||||
BREG => 0,
|
||||
CARRYINREG => 0,
|
||||
CARRYINSELREG => 0,
|
||||
INMODEREG => 0,
|
||||
OPMODEREG => 0,
|
||||
PREG => 0
|
||||
)
|
||||
port map (
|
||||
A => "000000" & m_in.data1(63 downto 40),
|
||||
ACIN => (others => '0'),
|
||||
ALUMODE => "0000",
|
||||
B => "00000" & m_in.data2(63 downto 51),
|
||||
BCIN => (others => '0'),
|
||||
C => x"00" & "000" & addend(127 downto 91),
|
||||
CARRYCASCIN => '0',
|
||||
CARRYIN => '0',
|
||||
CARRYINSEL => "000",
|
||||
CEA1 => '0',
|
||||
CEA2 => '0',
|
||||
CEAD => '0',
|
||||
CEALUMODE => '0',
|
||||
CEB1 => '0',
|
||||
CEB2 => '0',
|
||||
CEC => '1',
|
||||
CECARRYIN => '0',
|
||||
CECTRL => '0',
|
||||
CED => '0',
|
||||
CEINMODE => '0',
|
||||
CEM => '1',
|
||||
CEP => '0',
|
||||
CLK => clk,
|
||||
D => (others => '0'),
|
||||
INMODE => "00000",
|
||||
MULTSIGNIN => '0',
|
||||
OPMODE => "0110101",
|
||||
P => m23_p,
|
||||
PCIN => (others => '0'),
|
||||
RSTA => '0',
|
||||
RSTALLCARRYIN => '0',
|
||||
RSTALUMODE => '0',
|
||||
RSTB => '0',
|
||||
RSTC => '0',
|
||||
RSTCTRL => '0',
|
||||
RSTD => '0',
|
||||
RSTINMODE => '0',
|
||||
RSTM => '0',
|
||||
RSTP => '0'
|
||||
);
|
||||
|
||||
s0: DSP48E1
|
||||
generic map (
|
||||
ACASCREG => 1,
|
||||
ALUMODEREG => 0,
|
||||
AREG => 1,
|
||||
BCASCREG => 1,
|
||||
BREG => 1,
|
||||
CARRYINREG => 0,
|
||||
CARRYINSELREG => 0,
|
||||
CREG => 1,
|
||||
INMODEREG => 0,
|
||||
MREG => 0,
|
||||
OPMODEREG => 0,
|
||||
PREG => 0,
|
||||
USE_MULT => "none"
|
||||
)
|
||||
port map (
|
||||
A => m22_p(5 downto 0) & x"0000" & m10_p(34 downto 27),
|
||||
ACIN => (others => '0'),
|
||||
ALUMODE => "0000",
|
||||
B => m10_p(26 downto 9),
|
||||
BCIN => (others => '0'),
|
||||
C => m20_p(39 downto 0) & m02_p(5 downto 0) & "00",
|
||||
CARRYCASCIN => '0',
|
||||
CARRYIN => '0',
|
||||
CARRYINSEL => "000",
|
||||
CARRYOUT => s0_carry,
|
||||
CEA1 => '0',
|
||||
CEA2 => '1',
|
||||
CEAD => '0',
|
||||
CEALUMODE => '0',
|
||||
CEB1 => '0',
|
||||
CEB2 => '1',
|
||||
CEC => '1',
|
||||
CECARRYIN => '0',
|
||||
CECTRL => '0',
|
||||
CED => '0',
|
||||
CEINMODE => '0',
|
||||
CEM => '0',
|
||||
CEP => '0',
|
||||
CLK => clk,
|
||||
D => (others => '0'),
|
||||
INMODE => "00000",
|
||||
MULTSIGNIN => '0',
|
||||
OPMODE => "0001111",
|
||||
PCIN => (others => '0'),
|
||||
PCOUT => s0_pc,
|
||||
RSTA => '0',
|
||||
RSTALLCARRYIN => '0',
|
||||
RSTALUMODE => '0',
|
||||
RSTB => '0',
|
||||
RSTC => '0',
|
||||
RSTCTRL => '0',
|
||||
RSTD => '0',
|
||||
RSTINMODE => '0',
|
||||
RSTM => '0',
|
||||
RSTP => '0'
|
||||
);
|
||||
|
||||
s1: DSP48E1
|
||||
generic map (
|
||||
ACASCREG => 1,
|
||||
ALUMODEREG => 0,
|
||||
AREG => 1,
|
||||
BCASCREG => 1,
|
||||
BREG => 1,
|
||||
CARRYINREG => 0,
|
||||
CARRYINSELREG => 0,
|
||||
CREG => 1,
|
||||
INMODEREG => 0,
|
||||
MREG => 0,
|
||||
OPMODEREG => 0,
|
||||
PREG => 0,
|
||||
USE_MULT => "none"
|
||||
)
|
||||
port map (
|
||||
A => x"000" & m22_p(41 downto 24),
|
||||
ACIN => (others => '0'),
|
||||
ALUMODE => "0000",
|
||||
B => m22_p(23 downto 6),
|
||||
BCIN => (others => '0'),
|
||||
C => m23_p(36 downto 0) & x"00" & "0" & m20_p(41 downto 40),
|
||||
CARRYCASCIN => '0',
|
||||
CARRYIN => s0_carry(3),
|
||||
CARRYINSEL => "000",
|
||||
CEA1 => '0',
|
||||
CEA2 => '1',
|
||||
CEAD => '0',
|
||||
CEALUMODE => '0',
|
||||
CEB1 => '0',
|
||||
CEB2 => '1',
|
||||
CEC => '1',
|
||||
CECARRYIN => '0',
|
||||
CECTRL => '0',
|
||||
CED => '0',
|
||||
CEINMODE => '0',
|
||||
CEM => '0',
|
||||
CEP => '0',
|
||||
CLK => clk,
|
||||
D => (others => '0'),
|
||||
INMODE => "00000",
|
||||
MULTSIGNIN => '0',
|
||||
OPMODE => "0001111",
|
||||
PCIN => (others => '0'),
|
||||
PCOUT => s1_pc,
|
||||
RSTA => '0',
|
||||
RSTALLCARRYIN => '0',
|
||||
RSTALUMODE => '0',
|
||||
RSTB => '0',
|
||||
RSTC => '0',
|
||||
RSTCTRL => '0',
|
||||
RSTD => '0',
|
||||
RSTINMODE => '0',
|
||||
RSTM => '0',
|
||||
RSTP => '0'
|
||||
);
|
||||
|
||||
-- mask is 0 for 32-bit ops, 0x0000ffffffff for 64-bit
|
||||
p0_mask(47 downto 31) <= (others => '0');
|
||||
p0_mask(30 downto 0) <= (others => not r32_1);
|
||||
|
||||
p0: DSP48E1
|
||||
generic map (
|
||||
ACASCREG => 1,
|
||||
ALUMODEREG => 1,
|
||||
AREG => 1,
|
||||
BCASCREG => 1,
|
||||
BREG => 1,
|
||||
CARRYINREG => 0,
|
||||
CARRYINSELREG => 0,
|
||||
CREG => 1,
|
||||
INMODEREG => 0,
|
||||
MREG => 0,
|
||||
OPMODEREG => 0,
|
||||
PREG => 0,
|
||||
SEL_MASK => "C",
|
||||
USE_MULT => "none",
|
||||
USE_PATTERN_DETECT => "PATDET"
|
||||
)
|
||||
port map (
|
||||
A => m21_p(22 downto 0) & m03_p(5 downto 0) & '0',
|
||||
ACIN => (others => '0'),
|
||||
ALUMODE => "00" & rneg_1 & '0',
|
||||
B => (others => '0'),
|
||||
BCIN => (others => '0'),
|
||||
C => p0_mask,
|
||||
CARRYCASCIN => '0',
|
||||
CARRYIN => '0',
|
||||
CARRYINSEL => "000",
|
||||
CARRYOUT => p0_carry,
|
||||
CEA1 => '0',
|
||||
CEA2 => '1',
|
||||
CEAD => '0',
|
||||
CEALUMODE => '1',
|
||||
CEB1 => '0',
|
||||
CEB2 => '1',
|
||||
CEC => '1',
|
||||
CECARRYIN => '0',
|
||||
CECTRL => '0',
|
||||
CED => '0',
|
||||
CEINMODE => '0',
|
||||
CEM => '0',
|
||||
CEP => '0',
|
||||
CLK => clk,
|
||||
D => (others => '0'),
|
||||
INMODE => "00000",
|
||||
MULTSIGNIN => '0',
|
||||
OPMODE => "0010011",
|
||||
P => product(79 downto 32),
|
||||
PATTERNDETECT => p0_pat,
|
||||
PATTERNBDETECT => p0_patb,
|
||||
PCIN => s0_pc,
|
||||
RSTA => '0',
|
||||
RSTALLCARRYIN => '0',
|
||||
RSTALUMODE => '0',
|
||||
RSTB => '0',
|
||||
RSTC => '0',
|
||||
RSTCTRL => '0',
|
||||
RSTD => '0',
|
||||
RSTINMODE => '0',
|
||||
RSTM => '0',
|
||||
RSTP => '0'
|
||||
);
|
||||
|
||||
p1: DSP48E1
|
||||
generic map (
|
||||
ACASCREG => 1,
|
||||
ALUMODEREG => 1,
|
||||
AREG => 1,
|
||||
BCASCREG => 1,
|
||||
BREG => 1,
|
||||
CARRYINREG => 0,
|
||||
CARRYINSELREG => 0,
|
||||
CREG => 0,
|
||||
INMODEREG => 0,
|
||||
MASK => x"000000000000",
|
||||
MREG => 0,
|
||||
OPMODEREG => 0,
|
||||
PREG => 0,
|
||||
USE_MULT => "none",
|
||||
USE_PATTERN_DETECT => "PATDET"
|
||||
)
|
||||
port map (
|
||||
A => x"0000000" & '0' & m21_p(41),
|
||||
ACIN => (others => '0'),
|
||||
ALUMODE => "00" & rneg_1 & '0',
|
||||
B => m21_p(40 downto 23),
|
||||
BCIN => (others => '0'),
|
||||
C => (others => '0'),
|
||||
CARRYCASCIN => '0',
|
||||
CARRYIN => p0_carry(3),
|
||||
CARRYINSEL => "000",
|
||||
CEA1 => '0',
|
||||
CEA2 => '1',
|
||||
CEAD => '0',
|
||||
CEALUMODE => '1',
|
||||
CEB1 => '0',
|
||||
CEB2 => '1',
|
||||
CEC => '0',
|
||||
CECARRYIN => '0',
|
||||
CECTRL => '0',
|
||||
CED => '0',
|
||||
CEINMODE => '0',
|
||||
CEM => '0',
|
||||
CEP => '0',
|
||||
CLK => clk,
|
||||
D => (others => '0'),
|
||||
INMODE => "00000",
|
||||
MULTSIGNIN => '0',
|
||||
OPMODE => "0010011",
|
||||
P => product(127 downto 80),
|
||||
PATTERNDETECT => p1_pat,
|
||||
PATTERNBDETECT => p1_patb,
|
||||
PCIN => s1_pc,
|
||||
RSTA => '0',
|
||||
RSTALLCARRYIN => '0',
|
||||
RSTALUMODE => '0',
|
||||
RSTB => '0',
|
||||
RSTC => '0',
|
||||
RSTCTRL => '0',
|
||||
RSTD => '0',
|
||||
RSTINMODE => '0',
|
||||
RSTM => '0',
|
||||
RSTP => '0'
|
||||
);
|
||||
|
||||
product(31 downto 0) <= product_lo xor (31 downto 0 => req_neg);
|
||||
|
||||
mult_out: process(all)
|
||||
variable ov : std_ulogic;
|
||||
begin
|
||||
-- set overflow if the high bits are neither all zeroes nor all ones
|
||||
if req_32bit = '0' then
|
||||
ov := not ((p1_pat and p0_pat) or (p1_patb and p0_patb));
|
||||
else
|
||||
ov := not ((p1_pat and p0_pat and not product(31)) or
|
||||
(p1_patb and p0_patb and product(31)));
|
||||
end if;
|
||||
|
||||
m_out.result <= product;
|
||||
m_out.overflow <= ov;
|
||||
end process;
|
||||
|
||||
process(clk)
|
||||
begin
|
||||
if rising_edge(clk) then
|
||||
product_lo <= m10_p(8 downto 0) & m01_p(5 downto 0) & m00_p(16 downto 0);
|
||||
m_out.valid <= valid_1;
|
||||
valid_1 <= m_in.valid;
|
||||
req_32bit <= r32_1;
|
||||
r32_1 <= m_in.is_32bit;
|
||||
req_neg <= rneg_1;
|
||||
rneg_1 <= m_in.neg_result;
|
||||
end if;
|
||||
end process;
|
||||
|
||||
end architecture behaviour;
|
||||
Reference in New Issue
Block a user