mirror of
https://github.com/antonblanchard/microwatt.git
synced 2026-04-09 22:19:09 +00:00
47
bitsort.vhdl
47
bitsort.vhdl
@@ -1,5 +1,6 @@
|
||||
-- Implements instructions that involve sorting bits,
|
||||
-- that is, cfuged, pextd and pdepd.
|
||||
-- Also does bperm, which is somewhat different.
|
||||
--
|
||||
-- cfuged: Sort the bits in the mask in RB into 0s at the left, 1s at the right
|
||||
-- and move the bits in RS in the same fashion to give the result
|
||||
@@ -7,6 +8,7 @@
|
||||
-- corresponding bit in RB is 1
|
||||
-- pdepd: Inverse of pextd; take the low-order bits of RS and spread them out
|
||||
-- to the bit positions which have a 1 in RB
|
||||
-- bperm: Select 8 arbitrary bits
|
||||
|
||||
-- NB opc is bits 7-6 of the instruction:
|
||||
-- 00 = pdepd, 01 = pextd, 10 = cfuged
|
||||
@@ -27,6 +29,8 @@ entity bit_sorter is
|
||||
go : in std_ulogic;
|
||||
opc : in std_ulogic_vector(1 downto 0);
|
||||
done : out std_ulogic;
|
||||
do_bperm : in std_ulogic;
|
||||
bperm_done : out std_ulogic;
|
||||
result : out std_ulogic_vector(63 downto 0)
|
||||
);
|
||||
end entity bit_sorter;
|
||||
@@ -45,6 +49,13 @@ architecture behaviour of bit_sorter is
|
||||
signal sr_vl : std_ulogic_vector(63 downto 0);
|
||||
signal sr_vr : std_ulogic_vector(63 downto 0);
|
||||
|
||||
signal is_bperm : std_ulogic;
|
||||
signal bpc : unsigned(2 downto 0);
|
||||
signal bp_done : std_ulogic;
|
||||
signal bperm_res : std_ulogic_vector(7 downto 0);
|
||||
signal rs_sr : std_ulogic_vector(63 downto 0);
|
||||
signal rb_bp : std_ulogic_vector(63 downto 0);
|
||||
|
||||
begin
|
||||
bsort_r: process(clk)
|
||||
begin
|
||||
@@ -96,7 +107,41 @@ begin
|
||||
end if;
|
||||
end process;
|
||||
|
||||
-- bit permutation
|
||||
bperm_res(7) <= rb_bp(to_integer(unsigned(not rs_sr(5 downto 0)))) when not is_X(rs_sr)
|
||||
else 'X';
|
||||
|
||||
bperm_r: process(clk)
|
||||
begin
|
||||
if rising_edge(clk) then
|
||||
if rst = '1' then
|
||||
is_bperm <= '0';
|
||||
bp_done <= '0';
|
||||
bperm_res(6 downto 0) <= (others => '0');
|
||||
bpc <= to_unsigned(0, 3);
|
||||
elsif do_bperm = '1' then
|
||||
is_bperm <= '1';
|
||||
bp_done <= '0';
|
||||
bperm_res(6 downto 0) <= (others => '0');
|
||||
bpc <= to_unsigned(0, 3);
|
||||
rs_sr <= rs;
|
||||
rb_bp <= rb;
|
||||
elsif bp_done = '1' then
|
||||
is_bperm <= '0';
|
||||
bp_done <= '0';
|
||||
elsif is_bperm = '1' then
|
||||
bperm_res(6 downto 0) <= bperm_res(7 downto 1);
|
||||
rs_sr <= x"00" & rs_sr(63 downto 8);
|
||||
if bpc = "110" then
|
||||
bp_done <= '1';
|
||||
end if;
|
||||
bpc <= bpc + 1;
|
||||
end if;
|
||||
end if;
|
||||
end process;
|
||||
|
||||
done <= sd;
|
||||
result <= val;
|
||||
bperm_done <= bp_done;
|
||||
result <= val when is_bperm = '0' else (56x"0" & bperm_res);
|
||||
|
||||
end behaviour;
|
||||
|
||||
14
common.vhdl
14
common.vhdl
@@ -252,19 +252,20 @@ package common is
|
||||
|
||||
-- For now, fixed 16 sources, make this either a parametric
|
||||
-- package of some sort or an unconstrainted array.
|
||||
-- We don't know NCPUS or SRC_NUM here, so make this
|
||||
-- large enough for 4 cpus and 16 interrupt sources for now.
|
||||
type ics_to_icp_t is record
|
||||
-- Level interrupts only, ICS just keeps prsenting the
|
||||
-- highest priority interrupt. Once handling edge, something
|
||||
-- smarter involving handshake & reject support will be needed
|
||||
src : std_ulogic_vector(3 downto 0);
|
||||
pri : std_ulogic_vector(7 downto 0);
|
||||
src : std_ulogic_vector(15 downto 0); -- 4 bits each for 4 cpus
|
||||
pri : std_ulogic_vector(31 downto 0); -- 8 bits each for 4 cpus
|
||||
end record;
|
||||
|
||||
-- This needs to die...
|
||||
type ctrl_t is record
|
||||
wait_state: std_ulogic;
|
||||
run: std_ulogic;
|
||||
tb: std_ulogic_vector(63 downto 0);
|
||||
dec: std_ulogic_vector(63 downto 0);
|
||||
msr: std_ulogic_vector(63 downto 0);
|
||||
cfar: std_ulogic_vector(63 downto 0);
|
||||
@@ -439,6 +440,11 @@ package common is
|
||||
illegal_form : std_ulogic;
|
||||
uses_tar : std_ulogic;
|
||||
uses_dscr : std_ulogic;
|
||||
right_shift : std_ulogic;
|
||||
rot_clear_left : std_ulogic;
|
||||
rot_clear_right : std_ulogic;
|
||||
rot_sign_ext : std_ulogic;
|
||||
do_popcnt : std_ulogic;
|
||||
end record;
|
||||
constant Decode2ToExecute1Init : Decode2ToExecute1Type :=
|
||||
(valid => '0', unit => ALU, fac => NONE, insn_type => OP_ILLEGAL, instr_tag => instr_tag_init,
|
||||
@@ -461,6 +467,8 @@ package common is
|
||||
dec_ctr => '0',
|
||||
prefixed => '0', prefix => (others => '0'), illegal_suffix => '0',
|
||||
misaligned_prefix => '0', illegal_form => '0', uses_tar => '0', uses_dscr => '0',
|
||||
right_shift => '0', rot_clear_left => '0', rot_clear_right => '0', rot_sign_ext => '0',
|
||||
do_popcnt => '0',
|
||||
others => (others => '0'));
|
||||
|
||||
type MultiplyInputType is record
|
||||
|
||||
73
control.vhdl
73
control.vhdl
@@ -45,9 +45,13 @@ entity control is
|
||||
valid_out : out std_ulogic;
|
||||
stopped_out : out std_ulogic;
|
||||
|
||||
gpr_bypass_a : out std_ulogic_vector(1 downto 0);
|
||||
gpr_bypass_b : out std_ulogic_vector(1 downto 0);
|
||||
gpr_bypass_c : out std_ulogic_vector(1 downto 0);
|
||||
-- Note on gpr_bypass_*: bits 1 to 3 are a 1-hot encoding of which
|
||||
-- bypass source we may possibly need to use; bit 0 is 1 if the bypass
|
||||
-- value should be used (i.e. any of bits 1-3 are 1 and the
|
||||
-- corresponding gpr_x_read_valid_in is also 1).
|
||||
gpr_bypass_a : out std_ulogic_vector(3 downto 0);
|
||||
gpr_bypass_b : out std_ulogic_vector(3 downto 0);
|
||||
gpr_bypass_c : out std_ulogic_vector(3 downto 0);
|
||||
cr_bypass : out std_ulogic_vector(1 downto 0);
|
||||
|
||||
instr_tag_out : out instr_tag_t
|
||||
@@ -152,9 +156,9 @@ begin
|
||||
variable tag_s : instr_tag_t;
|
||||
variable tag_t : instr_tag_t;
|
||||
variable incr_tag : tag_number_t;
|
||||
variable byp_a : std_ulogic_vector(1 downto 0);
|
||||
variable byp_b : std_ulogic_vector(1 downto 0);
|
||||
variable byp_c : std_ulogic_vector(1 downto 0);
|
||||
variable byp_a : std_ulogic_vector(3 downto 0);
|
||||
variable byp_b : std_ulogic_vector(3 downto 0);
|
||||
variable byp_c : std_ulogic_vector(3 downto 0);
|
||||
variable tag_cr : instr_tag_t;
|
||||
variable byp_cr : std_ulogic_vector(1 downto 0);
|
||||
variable tag_ov : instr_tag_t;
|
||||
@@ -163,57 +167,66 @@ begin
|
||||
tag_a := instr_tag_init;
|
||||
for i in tag_number_t loop
|
||||
if tag_regs(i).wr_gpr = '1' and tag_regs(i).recent = '1' and tag_regs(i).reg = gpr_a_read_in then
|
||||
tag_a.valid := gpr_a_read_valid_in;
|
||||
tag_a.valid := '1';
|
||||
tag_a.tag := i;
|
||||
end if;
|
||||
end loop;
|
||||
tag_b := instr_tag_init;
|
||||
for i in tag_number_t loop
|
||||
if tag_regs(i).wr_gpr = '1' and tag_regs(i).recent = '1' and tag_regs(i).reg = gpr_b_read_in then
|
||||
tag_b.valid := gpr_b_read_valid_in;
|
||||
tag_b.valid := '1';
|
||||
tag_b.tag := i;
|
||||
end if;
|
||||
end loop;
|
||||
tag_c := instr_tag_init;
|
||||
for i in tag_number_t loop
|
||||
if tag_regs(i).wr_gpr = '1' and tag_regs(i).recent = '1' and tag_regs(i).reg = gpr_c_read_in then
|
||||
tag_c.valid := gpr_c_read_valid_in;
|
||||
tag_c.valid := '1';
|
||||
tag_c.tag := i;
|
||||
end if;
|
||||
end loop;
|
||||
|
||||
byp_a := "00";
|
||||
byp_a := "0000";
|
||||
if EX1_BYPASS and tag_match(execute_next_tag, tag_a) then
|
||||
byp_a := "01";
|
||||
elsif EX1_BYPASS and tag_match(execute2_next_tag, tag_a) then
|
||||
byp_a := "10";
|
||||
elsif tag_match(complete_in, tag_a) then
|
||||
byp_a := "11";
|
||||
byp_a(1) := '1';
|
||||
end if;
|
||||
byp_b := "00";
|
||||
if EX1_BYPASS and tag_match(execute2_next_tag, tag_a) then
|
||||
byp_a(2) := '1';
|
||||
end if;
|
||||
if tag_match(complete_in, tag_a) then
|
||||
byp_a(3) := '1';
|
||||
end if;
|
||||
byp_a(0) := gpr_a_read_valid_in and (byp_a(1) or byp_a(2) or byp_a(3));
|
||||
byp_b := "0000";
|
||||
if EX1_BYPASS and tag_match(execute_next_tag, tag_b) then
|
||||
byp_b := "01";
|
||||
elsif EX1_BYPASS and tag_match(execute2_next_tag, tag_b) then
|
||||
byp_b := "10";
|
||||
elsif tag_match(complete_in, tag_b) then
|
||||
byp_b := "11";
|
||||
byp_b(1) := '1';
|
||||
end if;
|
||||
byp_c := "00";
|
||||
if EX1_BYPASS and tag_match(execute2_next_tag, tag_b) then
|
||||
byp_b(2) := '1';
|
||||
end if;
|
||||
if tag_match(complete_in, tag_b) then
|
||||
byp_b(3) := '1';
|
||||
end if;
|
||||
byp_b(0) := gpr_b_read_valid_in and (byp_b(1) or byp_b(2) or byp_b(3));
|
||||
byp_c := "0000";
|
||||
if EX1_BYPASS and tag_match(execute_next_tag, tag_c) then
|
||||
byp_c := "01";
|
||||
elsif EX1_BYPASS and tag_match(execute2_next_tag, tag_c) then
|
||||
byp_c := "10";
|
||||
elsif tag_match(complete_in, tag_c) then
|
||||
byp_c := "11";
|
||||
byp_c(1) := '1';
|
||||
end if;
|
||||
if EX1_BYPASS and tag_match(execute2_next_tag, tag_c) then
|
||||
byp_c(2) := '1';
|
||||
end if;
|
||||
if tag_match(complete_in, tag_c) then
|
||||
byp_c(3) := '1';
|
||||
end if;
|
||||
byp_c(0) := gpr_c_read_valid_in and (byp_c(1) or byp_c(2) or byp_c(3));
|
||||
|
||||
gpr_bypass_a <= byp_a;
|
||||
gpr_bypass_b <= byp_b;
|
||||
gpr_bypass_c <= byp_c;
|
||||
|
||||
gpr_tag_stall <= (tag_a.valid and not (or (byp_a))) or
|
||||
(tag_b.valid and not (or (byp_b))) or
|
||||
(tag_c.valid and not (or (byp_c)));
|
||||
gpr_tag_stall <= (tag_a.valid and gpr_a_read_valid_in and not byp_a(0)) or
|
||||
(tag_b.valid and gpr_b_read_valid_in and not byp_b(0)) or
|
||||
(tag_c.valid and gpr_c_read_valid_in and not byp_c(0));
|
||||
|
||||
incr_tag := curr_tag;
|
||||
instr_tag.tag <= curr_tag;
|
||||
|
||||
@@ -31,6 +31,9 @@ entity core is
|
||||
-- Alternate reset (0xffff0000) for use by DRAM init fw
|
||||
alt_reset : in std_ulogic;
|
||||
|
||||
-- Global timebase
|
||||
timebase : in std_ulogic_vector(63 downto 0);
|
||||
|
||||
-- Wishbone interface
|
||||
wishbone_insn_in : in wishbone_slave_out;
|
||||
wishbone_insn_out : out wishbone_master_out;
|
||||
@@ -373,6 +376,7 @@ begin
|
||||
port map (
|
||||
clk => clk,
|
||||
rst => rst_ex1,
|
||||
timebase => timebase,
|
||||
flush_in => flush,
|
||||
busy_out => ex1_busy_out,
|
||||
e_in => decode2_to_execute1,
|
||||
|
||||
108
decode2.vhdl
108
decode2.vhdl
@@ -201,6 +201,23 @@ architecture behaviour of decode2 is
|
||||
end case;
|
||||
end;
|
||||
|
||||
function andor (mask_a : std_ulogic; val_a : std_ulogic_vector(63 downto 0);
|
||||
mask_b : std_ulogic; val_b : std_ulogic_vector(63 downto 0);
|
||||
mask_c : std_ulogic; val_c : std_ulogic_vector(63 downto 0)) return std_ulogic_vector is
|
||||
variable t : std_ulogic_vector(63 downto 0) := (others => '0');
|
||||
begin
|
||||
if mask_a = '1' then
|
||||
t := val_a;
|
||||
end if;
|
||||
if mask_b = '1' then
|
||||
t := t or val_b;
|
||||
end if;
|
||||
if mask_c = '1' then
|
||||
t := t or val_c;
|
||||
end if;
|
||||
return t;
|
||||
end;
|
||||
|
||||
-- control signals that are derived from insn_type
|
||||
type mux_select_array_t is array(insn_type_t) of std_ulogic_vector(2 downto 0);
|
||||
|
||||
@@ -210,7 +227,6 @@ architecture behaviour of decode2 is
|
||||
OP_PRTY => "001",
|
||||
OP_CMPB => "001",
|
||||
OP_EXTS => "001",
|
||||
OP_BPERM => "001",
|
||||
OP_BREV => "001",
|
||||
OP_BCD => "001",
|
||||
OP_MTSPR => "001",
|
||||
@@ -239,6 +255,7 @@ architecture behaviour of decode2 is
|
||||
OP_DIVE => "101",
|
||||
OP_MOD => "101",
|
||||
OP_BSORT => "100",
|
||||
OP_BPERM => "100",
|
||||
OP_ADDG6S => "001", -- misc_result
|
||||
OP_ISEL => "010",
|
||||
OP_DARN => "011",
|
||||
@@ -269,15 +286,15 @@ architecture behaviour of decode2 is
|
||||
|
||||
signal gpr_a_read_valid : std_ulogic;
|
||||
signal gpr_a_read : gspr_index_t;
|
||||
signal gpr_a_bypass : std_ulogic_vector(1 downto 0);
|
||||
signal gpr_a_bypass : std_ulogic_vector(3 downto 0);
|
||||
|
||||
signal gpr_b_read_valid : std_ulogic;
|
||||
signal gpr_b_read : gspr_index_t;
|
||||
signal gpr_b_bypass : std_ulogic_vector(1 downto 0);
|
||||
signal gpr_b_bypass : std_ulogic_vector(3 downto 0);
|
||||
|
||||
signal gpr_c_read_valid : std_ulogic;
|
||||
signal gpr_c_read : gspr_index_t;
|
||||
signal gpr_c_bypass : std_ulogic_vector(1 downto 0);
|
||||
signal gpr_c_bypass : std_ulogic_vector(3 downto 0);
|
||||
|
||||
signal cr_read_valid : std_ulogic;
|
||||
signal cr_write_valid : std_ulogic;
|
||||
@@ -656,6 +673,14 @@ begin
|
||||
v.e.illegal_suffix := d_in.illegal_suffix;
|
||||
v.e.misaligned_prefix := d_in.misaligned_prefix;
|
||||
|
||||
-- rotator control signals
|
||||
v.e.right_shift := '1' when op = OP_SHR else '0';
|
||||
v.e.rot_clear_left := '1' when op = OP_RLC or op = OP_RLCL else '0';
|
||||
v.e.rot_clear_right := '1' when op = OP_RLC or op = OP_RLCR else '0';
|
||||
v.e.rot_sign_ext := '1' when op = OP_EXTSWSLI else '0';
|
||||
|
||||
v.e.do_popcnt := '1' when op = OP_COUNTB and d_in.insn(7 downto 6) = "11" else '0';
|
||||
|
||||
-- check for invalid forms that cause an illegal instruction interrupt
|
||||
-- Does RA = RT for a load quadword instr, or RB = RT for lqarx?
|
||||
if d_in.decode.repeat = DRTP and
|
||||
@@ -694,53 +719,38 @@ begin
|
||||
ov_write_valid <= v.output_ov;
|
||||
|
||||
-- See if any of the operands can get their value via the bypass path.
|
||||
if dc2.busy = '0' or gpr_a_bypass /= "00" then
|
||||
case gpr_a_bypass is
|
||||
when "01" =>
|
||||
v.e.read_data1 := execute_bypass.data;
|
||||
when "10" =>
|
||||
v.e.read_data1 := execute2_bypass.data;
|
||||
when "11" =>
|
||||
v.e.read_data1 := writeback_bypass.data;
|
||||
when others =>
|
||||
if decoded_reg_a.reg_valid = '1' then
|
||||
v.e.read_data1 := r_in.read1_data;
|
||||
else
|
||||
v.e.read_data1 := decoded_reg_a.data;
|
||||
end if;
|
||||
end case;
|
||||
if gpr_a_bypass(0) = '1' then
|
||||
v.e.read_data1 := andor(gpr_a_bypass(1), execute_bypass.data,
|
||||
gpr_a_bypass(2), execute2_bypass.data,
|
||||
gpr_a_bypass(3), writeback_bypass.data);
|
||||
elsif dc2.busy = '0' then
|
||||
if decoded_reg_a.reg_valid = '1' then
|
||||
v.e.read_data1 := r_in.read1_data;
|
||||
else
|
||||
v.e.read_data1 := decoded_reg_a.data;
|
||||
end if;
|
||||
end if;
|
||||
if dc2.busy = '0' or gpr_b_bypass /= "00" then
|
||||
case gpr_b_bypass is
|
||||
when "01" =>
|
||||
v.e.read_data2 := execute_bypass.data;
|
||||
when "10" =>
|
||||
v.e.read_data2 := execute2_bypass.data;
|
||||
when "11" =>
|
||||
v.e.read_data2 := writeback_bypass.data;
|
||||
when others =>
|
||||
if decoded_reg_b.reg_valid = '1' then
|
||||
v.e.read_data2 := r_in.read2_data;
|
||||
else
|
||||
v.e.read_data2 := decoded_reg_b.data;
|
||||
end if;
|
||||
end case;
|
||||
if gpr_b_bypass(0) = '1' then
|
||||
v.e.read_data2 := andor(gpr_b_bypass(1), execute_bypass.data,
|
||||
gpr_b_bypass(2), execute2_bypass.data,
|
||||
gpr_b_bypass(3), writeback_bypass.data);
|
||||
elsif dc2.busy = '0' then
|
||||
if decoded_reg_b.reg_valid = '1' then
|
||||
v.e.read_data2 := r_in.read2_data;
|
||||
else
|
||||
v.e.read_data2 := decoded_reg_b.data;
|
||||
end if;
|
||||
end if;
|
||||
if dc2.busy = '0' or gpr_c_bypass /= "00" then
|
||||
case gpr_c_bypass is
|
||||
when "01" =>
|
||||
v.e.read_data3 := execute_bypass.data;
|
||||
when "10" =>
|
||||
v.e.read_data3 := execute2_bypass.data;
|
||||
when "11" =>
|
||||
v.e.read_data3 := writeback_bypass.data;
|
||||
when others =>
|
||||
if decoded_reg_c.reg_valid = '1' then
|
||||
v.e.read_data3 := r_in.read3_data;
|
||||
else
|
||||
v.e.read_data3 := decoded_reg_c.data;
|
||||
end if;
|
||||
end case;
|
||||
if gpr_c_bypass(0) = '1' then
|
||||
v.e.read_data3 := andor(gpr_c_bypass(1), execute_bypass.data,
|
||||
gpr_c_bypass(2), execute2_bypass.data,
|
||||
gpr_c_bypass(3), writeback_bypass.data);
|
||||
elsif dc2.busy = '0' then
|
||||
if decoded_reg_c.reg_valid = '1' then
|
||||
v.e.read_data3 := r_in.read3_data;
|
||||
else
|
||||
v.e.read_data3 := decoded_reg_c.data;
|
||||
end if;
|
||||
end if;
|
||||
|
||||
case cr_bypass is
|
||||
|
||||
@@ -34,6 +34,8 @@ entity execute1 is
|
||||
ext_irq_in : std_ulogic;
|
||||
interrupt_in : WritebackToExecute1Type;
|
||||
|
||||
timebase : std_ulogic_vector(63 downto 0);
|
||||
|
||||
-- asynchronous
|
||||
l_out : out Execute1ToLoadstore1Type;
|
||||
fp_out : out Execute1ToFPUType;
|
||||
@@ -116,6 +118,7 @@ architecture behaviour of execute1 is
|
||||
start_mul : std_ulogic;
|
||||
start_div : std_ulogic;
|
||||
start_bsort : std_ulogic;
|
||||
start_bperm : std_ulogic;
|
||||
do_trace : std_ulogic;
|
||||
ciabr_trace : std_ulogic;
|
||||
fp_intr : std_ulogic;
|
||||
@@ -150,6 +153,7 @@ architecture behaviour of execute1 is
|
||||
mul_finish : std_ulogic;
|
||||
div_in_progress : std_ulogic;
|
||||
bsort_in_progress : std_ulogic;
|
||||
bperm_in_progress : std_ulogic;
|
||||
no_instr_avail : std_ulogic;
|
||||
instr_dispatch : std_ulogic;
|
||||
ext_interrupt : std_ulogic;
|
||||
@@ -174,7 +178,7 @@ architecture behaviour of execute1 is
|
||||
spr_select => spr_id_init, pmu_spr_num => 5x"0",
|
||||
redir_to_next => '0', advance_nia => '0', lr_from_next => '0',
|
||||
mul_in_progress => '0', mul_finish => '0', div_in_progress => '0',
|
||||
bsort_in_progress => '0',
|
||||
bsort_in_progress => '0', bperm_in_progress => '0',
|
||||
no_instr_avail => '0', instr_dispatch => '0', ext_interrupt => '0',
|
||||
taken_branch_event => '0', br_mispredict => '0',
|
||||
msr => 64x"0",
|
||||
@@ -206,12 +210,9 @@ architecture behaviour of execute1 is
|
||||
signal valid_in : std_ulogic;
|
||||
signal ctrl: ctrl_t := ctrl_t_init;
|
||||
signal ctrl_tmp: ctrl_t := ctrl_t_init;
|
||||
signal right_shift, rot_clear_left, rot_clear_right: std_ulogic;
|
||||
signal rot_sign_ext: std_ulogic;
|
||||
signal rotator_result: std_ulogic_vector(63 downto 0);
|
||||
signal rotator_carry: std_ulogic;
|
||||
signal logical_result: std_ulogic_vector(63 downto 0);
|
||||
signal do_popcnt: std_ulogic;
|
||||
signal countbits_result: std_ulogic_vector(63 downto 0);
|
||||
signal alu_result: std_ulogic_vector(63 downto 0);
|
||||
signal adder_result: std_ulogic_vector(63 downto 0);
|
||||
@@ -245,6 +246,8 @@ architecture behaviour of execute1 is
|
||||
-- bit-sort unit signals
|
||||
signal bsort_start : std_ulogic;
|
||||
signal bsort_done : std_ulogic;
|
||||
signal bperm_start : std_ulogic;
|
||||
signal bperm_done : std_ulogic;
|
||||
|
||||
-- random number generator signals
|
||||
signal random_raw : std_ulogic_vector(63 downto 0);
|
||||
@@ -448,11 +451,11 @@ begin
|
||||
shift => b_in(6 downto 0),
|
||||
insn => e_in.insn,
|
||||
is_32bit => e_in.is_32bit,
|
||||
right_shift => right_shift,
|
||||
right_shift => e_in.right_shift,
|
||||
arith => e_in.is_signed,
|
||||
clear_left => rot_clear_left,
|
||||
clear_right => rot_clear_right,
|
||||
sign_ext_rs => rot_sign_ext,
|
||||
clear_left => e_in.rot_clear_left,
|
||||
clear_right => e_in.rot_clear_right,
|
||||
sign_ext_rs => e_in.rot_sign_ext,
|
||||
result => rotator_result,
|
||||
carry_out => rotator_carry
|
||||
);
|
||||
@@ -476,7 +479,7 @@ begin
|
||||
stall => stage2_stall,
|
||||
count_right => e_in.insn(10),
|
||||
is_32bit => e_in.is_32bit,
|
||||
do_popcnt => do_popcnt,
|
||||
do_popcnt => e_in.do_popcnt,
|
||||
datalen => e_in.data_len,
|
||||
result => countbits_result
|
||||
);
|
||||
@@ -515,6 +518,8 @@ begin
|
||||
go => bsort_start,
|
||||
opc => e_in.insn(7 downto 6),
|
||||
done => bsort_done,
|
||||
do_bperm => bperm_start,
|
||||
bperm_done => bperm_done,
|
||||
result => bsort_result
|
||||
);
|
||||
|
||||
@@ -1147,7 +1152,7 @@ begin
|
||||
-- side-effect flags or write enables when generating a trap).
|
||||
-- With v.trap = 1 we will assert both ex1.e.valid and ex1.e.interrupt
|
||||
-- to writeback, and it will complete the instruction and take
|
||||
-- and interrupt. It is OK for v.trap to depend on operand data.
|
||||
-- an interrupt. It is OK for v.trap to depend on operand data.
|
||||
|
||||
illegal := '0';
|
||||
privileged := '0';
|
||||
@@ -1228,7 +1233,7 @@ begin
|
||||
when OP_CMPRB =>
|
||||
when OP_CMPEQB =>
|
||||
when OP_LOGIC | OP_XOR | OP_PRTY | OP_CMPB | OP_EXTS |
|
||||
OP_BPERM | OP_BREV | OP_BCD =>
|
||||
OP_BREV | OP_BCD =>
|
||||
|
||||
when OP_B =>
|
||||
v.take_branch := '1';
|
||||
@@ -1433,6 +1438,11 @@ begin
|
||||
slow_op := '1';
|
||||
owait := '1';
|
||||
|
||||
when OP_BPERM =>
|
||||
v.start_bperm := '1';
|
||||
slow_op := '1';
|
||||
owait := '1';
|
||||
|
||||
when OP_MUL_L64 =>
|
||||
if e_in.is_32bit = '1' then
|
||||
v.se.mult_32s := '1';
|
||||
@@ -1585,7 +1595,7 @@ begin
|
||||
|
||||
if e_in.unit = ALU then
|
||||
v.complete := e_in.valid and not v.exception and not owait;
|
||||
v.bypass_valid := e_in.valid and not v.exception and not slow_op;
|
||||
v.bypass_valid := e_in.valid and not slow_op;
|
||||
end if;
|
||||
|
||||
actions <= v;
|
||||
@@ -1631,18 +1641,10 @@ begin
|
||||
v.taken_branch_event := '0';
|
||||
v.br_mispredict := '0';
|
||||
v.busy := '0';
|
||||
bypass_valid := '0';
|
||||
bypass_valid := actions.bypass_valid;
|
||||
|
||||
irq_valid := ex1.msr(MSR_EE) and (pmu_to_x.intr or ctrl.dec(63) or ext_irq_in);
|
||||
|
||||
-- rotator control signals
|
||||
right_shift <= '1' when e_in.insn_type = OP_SHR else '0';
|
||||
rot_clear_left <= '1' when e_in.insn_type = OP_RLC or e_in.insn_type = OP_RLCL else '0';
|
||||
rot_clear_right <= '1' when e_in.insn_type = OP_RLC or e_in.insn_type = OP_RLCR else '0';
|
||||
rot_sign_ext <= '1' when e_in.insn_type = OP_EXTSWSLI else '0';
|
||||
|
||||
do_popcnt <= '1' when e_in.insn_type = OP_COUNTB and e_in.insn(7 downto 6) = "11" else '0';
|
||||
|
||||
if valid_in = '1' then
|
||||
v.prev_op := e_in.insn_type;
|
||||
v.prev_prefixed := e_in.prefixed;
|
||||
@@ -1706,7 +1708,6 @@ begin
|
||||
if go = '1' then
|
||||
v.se := actions.se;
|
||||
v.e.valid := actions.complete;
|
||||
bypass_valid := actions.bypass_valid;
|
||||
v.taken_branch_event := actions.take_branch;
|
||||
v.trace_next := actions.do_trace or actions.ciabr_trace;
|
||||
v.trace_ciabr := actions.ciabr_trace;
|
||||
@@ -1719,6 +1720,7 @@ begin
|
||||
x_to_divider.valid <= actions.start_div;
|
||||
v.div_in_progress := actions.start_div;
|
||||
v.bsort_in_progress := actions.start_bsort;
|
||||
v.bperm_in_progress := actions.start_bperm;
|
||||
v.br_mispredict := v.e.redirect and actions.direct_branch;
|
||||
v.advance_nia := actions.advance_nia;
|
||||
v.redir_to_next := actions.redir_to_next;
|
||||
@@ -1729,7 +1731,8 @@ begin
|
||||
-- multiply is happening in order to stop following
|
||||
-- instructions from using the wrong XER value
|
||||
-- (and for simplicity in the OE=0 case).
|
||||
v.busy := actions.start_div or actions.start_mul or actions.start_bsort;
|
||||
v.busy := actions.start_div or actions.start_mul or
|
||||
actions.start_bsort or actions.start_bperm;
|
||||
|
||||
-- instruction for other units, i.e. LDST
|
||||
if e_in.unit = LDST then
|
||||
@@ -1741,6 +1744,7 @@ begin
|
||||
end if;
|
||||
is_scv := go and actions.se.scv_trap;
|
||||
bsort_start <= go and actions.start_bsort;
|
||||
bperm_start <= go and actions.start_bperm;
|
||||
pmu_trace <= go and actions.do_trace;
|
||||
|
||||
if not HAS_FPU and ex1.div_in_progress = '1' then
|
||||
@@ -1781,6 +1785,13 @@ begin
|
||||
v.e.write_data := alu_result;
|
||||
bypass_valid := bsort_done;
|
||||
end if;
|
||||
if ex1.bperm_in_progress = '1' then
|
||||
v.bperm_in_progress := not bperm_done;
|
||||
v.e.valid := bperm_done;
|
||||
v.busy := not bperm_done;
|
||||
v.e.write_data := alu_result;
|
||||
bypass_valid := bperm_done;
|
||||
end if;
|
||||
|
||||
if v.e.write_xerc_enable = '1' and v.e.valid = '1' then
|
||||
v.xerc := v.e.xerc;
|
||||
@@ -1814,13 +1825,13 @@ begin
|
||||
v.fp_exception_next := '0';
|
||||
end if;
|
||||
|
||||
bypass_data.tag.valid <= v.e.write_enable and bypass_valid;
|
||||
bypass_data.tag.tag <= v.e.instr_tag.tag;
|
||||
bypass_data.tag.valid <= e_in.write_reg_enable and bypass_valid;
|
||||
bypass_data.tag.tag <= e_in.instr_tag.tag;
|
||||
bypass_data.data <= alu_result;
|
||||
|
||||
bypass_cr_data.tag.valid <= v.e.write_cr_enable and bypass_valid;
|
||||
bypass_cr_data.tag.tag <= v.e.instr_tag.tag;
|
||||
bypass_cr_data.data <= v.e.write_cr_data;
|
||||
bypass_cr_data.tag.valid <= e_in.output_cr and bypass_valid;
|
||||
bypass_cr_data.tag.tag <= e_in.instr_tag.tag;
|
||||
bypass_cr_data.data <= write_cr_data;
|
||||
|
||||
-- Outputs to loadstore1 (async)
|
||||
lv.op := e_in.insn_type;
|
||||
@@ -1881,8 +1892,8 @@ begin
|
||||
|
||||
-- Slow SPR read mux
|
||||
with ex1.spr_select.sel select spr_result <=
|
||||
ctrl.tb when SPRSEL_TB,
|
||||
32x"0" & ctrl.tb(63 downto 32) when SPRSEL_TBU,
|
||||
timebase when SPRSEL_TB,
|
||||
32x"0" & timebase(63 downto 32) when SPRSEL_TBU,
|
||||
ctrl.dec when SPRSEL_DEC,
|
||||
32x"0" & PVR_MICROWATT when SPRSEL_PVR,
|
||||
log_wr_addr & ex2.log_addr_spr when SPRSEL_LOGA,
|
||||
@@ -1936,16 +1947,14 @@ begin
|
||||
end if;
|
||||
|
||||
ctrl_tmp <= ctrl;
|
||||
-- FIXME: run at 512MHz not core freq
|
||||
ctrl_tmp.tb <= std_ulogic_vector(unsigned(ctrl.tb) + 1);
|
||||
ctrl_tmp.dec <= std_ulogic_vector(unsigned(ctrl.dec) - 1);
|
||||
|
||||
x_to_pmu.mfspr <= '0';
|
||||
x_to_pmu.mtspr <= '0';
|
||||
x_to_pmu.tbbits(3) <= ctrl.tb(63 - 47);
|
||||
x_to_pmu.tbbits(2) <= ctrl.tb(63 - 51);
|
||||
x_to_pmu.tbbits(1) <= ctrl.tb(63 - 55);
|
||||
x_to_pmu.tbbits(0) <= ctrl.tb(63 - 63);
|
||||
x_to_pmu.tbbits(3) <= timebase(63 - 47);
|
||||
x_to_pmu.tbbits(2) <= timebase(63 - 51);
|
||||
x_to_pmu.tbbits(1) <= timebase(63 - 55);
|
||||
x_to_pmu.tbbits(0) <= timebase(63 - 63);
|
||||
x_to_pmu.pmm_msr <= ctrl.msr(MSR_PMM);
|
||||
x_to_pmu.pr_msr <= ctrl.msr(MSR_PR);
|
||||
|
||||
|
||||
@@ -10,6 +10,7 @@ use work.wishbone_types.all;
|
||||
|
||||
entity toplevel is
|
||||
generic (
|
||||
CPUS : natural := 1;
|
||||
MEMORY_SIZE : integer := 16384;
|
||||
RAM_INIT_FILE : string := "firmware.hex";
|
||||
RESET_LOW : boolean := true;
|
||||
@@ -241,6 +242,7 @@ begin
|
||||
MEMORY_SIZE => BRAM_SIZE,
|
||||
RAM_INIT_FILE => RAM_INIT_FILE,
|
||||
SIM => false,
|
||||
NCPUS => CPUS,
|
||||
CLK_FREQ => CLK_FREQUENCY,
|
||||
HAS_FPU => HAS_FPU,
|
||||
HAS_BTC => HAS_BTC,
|
||||
|
||||
@@ -65,7 +65,8 @@
|
||||
#define SYS_REG_UART_IS_16550 (1ull << 32)
|
||||
#define SYS_REG_GIT_INFO 0x50
|
||||
#define SYS_REG_GIT_IS_DIRTY (1ull << 63)
|
||||
|
||||
#define SYS_REG_CPU_CTRL 0x58
|
||||
#define SYS_REG_CPU_CTRL_ENABLE 0xff
|
||||
|
||||
/*
|
||||
* Register definitions for the potato UART
|
||||
|
||||
@@ -52,6 +52,8 @@ architecture behave of loadstore1 is
|
||||
MMU_WAIT -- waiting for MMU to finish doing something
|
||||
);
|
||||
|
||||
constant num_dawr : positive := 2;
|
||||
|
||||
type byte_index_t is array(0 to 7) of unsigned(2 downto 0);
|
||||
subtype byte_trim_t is std_ulogic_vector(1 downto 0);
|
||||
type trim_ctl_t is array(0 to 7) of byte_trim_t;
|
||||
@@ -130,6 +132,9 @@ architecture behave of loadstore1 is
|
||||
busy : std_ulogic;
|
||||
issued : std_ulogic;
|
||||
addr0 : std_ulogic_vector(63 downto 0);
|
||||
dawr_ll : std_ulogic_vector(num_dawr-1 downto 0);
|
||||
dawr_ul : std_ulogic_vector(num_dawr-1 downto 0);
|
||||
dawr_ud : std_ulogic;
|
||||
end record;
|
||||
|
||||
type reg_stage2_t is record
|
||||
@@ -147,7 +152,6 @@ architecture behave of loadstore1 is
|
||||
dbg_spr_ack: std_ulogic;
|
||||
end record;
|
||||
|
||||
constant num_dawr : positive := 2;
|
||||
type dawr_array_t is array(0 to num_dawr - 1) of std_ulogic_vector(63 downto 3);
|
||||
type dawrx_array_t is array(0 to num_dawr - 1) of std_ulogic_vector(15 downto 0);
|
||||
|
||||
@@ -335,6 +339,9 @@ begin
|
||||
r1.req.sprsel <= "000";
|
||||
r1.req.ric <= "00";
|
||||
r1.req.xerc <= xerc_init;
|
||||
r1.dawr_ll <= (others => '0');
|
||||
r1.dawr_ul <= (others => '0');
|
||||
r1.dawr_ud <= '0';
|
||||
|
||||
r2.req.valid <= '0';
|
||||
r2.busy <= '0';
|
||||
@@ -617,6 +624,9 @@ begin
|
||||
variable req : request_t;
|
||||
variable dcreq : std_ulogic;
|
||||
variable issue : std_ulogic;
|
||||
variable addr : std_ulogic_vector(63 downto 3);
|
||||
variable addl : unsigned(64 downto 3);
|
||||
variable addu : unsigned(64 downto 3);
|
||||
begin
|
||||
v := r1;
|
||||
issue := '0';
|
||||
@@ -661,6 +671,20 @@ begin
|
||||
end if;
|
||||
end if;
|
||||
|
||||
-- Do subtractions for DAWR0/1 matches
|
||||
for i in 0 to 1 loop
|
||||
addr := req.addr(63 downto 3);
|
||||
if req.priv_mode = '1' and r3.dawrx(i)(7) = '1' then
|
||||
-- HRAMMC=1 => trim top bit from address
|
||||
addr(63) := '0';
|
||||
end if;
|
||||
addl := unsigned('0' & addr) - unsigned('0' & r3.dawr(i));
|
||||
addu := unsigned('0' & r3.dawr_uplim(i)) - unsigned('0' & addr);
|
||||
v.dawr_ll(i) := addl(64);
|
||||
v.dawr_ul(i) := addu(64);
|
||||
end loop;
|
||||
v.dawr_ud := r3.dawr_upd;
|
||||
|
||||
if flush = '1' then
|
||||
v.req.valid := '0';
|
||||
v.req.dc_req := '0';
|
||||
@@ -702,9 +726,6 @@ begin
|
||||
variable sprsel : std_ulogic_vector(2 downto 0);
|
||||
variable sprval : std_ulogic_vector(63 downto 0);
|
||||
variable dawr_match : std_ulogic;
|
||||
variable addr : std_ulogic_vector(63 downto 3);
|
||||
variable addl : unsigned(64 downto 3);
|
||||
variable addu : unsigned(64 downto 3);
|
||||
begin
|
||||
v := r2;
|
||||
|
||||
@@ -724,14 +745,7 @@ begin
|
||||
-- Test for DAWR0/1 matches
|
||||
dawr_match := '0';
|
||||
for i in 0 to 1 loop
|
||||
addr := r1.req.addr(63 downto 3);
|
||||
if r1.req.priv_mode = '1' and r3.dawrx(i)(7) = '1' then
|
||||
-- HRAMMC=1 => trim top bit from address
|
||||
addr(63) := '0';
|
||||
end if;
|
||||
addl := unsigned('0' & addr) - unsigned('0' & r3.dawr(i));
|
||||
addu := unsigned('0' & r3.dawr_uplim(i)) - unsigned('0' & addr);
|
||||
if addl(64) = '0' and addu(64) = '0' and
|
||||
if r1.dawr_ll(i) = '0' and r1.dawr_ul(i) = '0' and r1.dawr_ud = '0' and
|
||||
dawrx_match_enable(r3.dawrx(i), r1.req.virt_mode,
|
||||
r1.req.priv_mode, r1.req.store) then
|
||||
dawr_match := r1.req.valid and r1.req.dc_req and not r3.dawr_upd and
|
||||
|
||||
13
logical.vhdl
13
logical.vhdl
@@ -23,7 +23,6 @@ architecture behaviour of logical is
|
||||
|
||||
signal par0, par1 : std_ulogic;
|
||||
signal parity : std_ulogic_vector(63 downto 0);
|
||||
signal permute : std_ulogic_vector(7 downto 0);
|
||||
|
||||
function bcd_to_dpd(bcd: std_ulogic_vector(11 downto 0)) return std_ulogic_vector is
|
||||
variable dpd: std_ulogic_vector(9 downto 0);
|
||||
@@ -109,16 +108,6 @@ begin
|
||||
parity(32) <= par1;
|
||||
end if;
|
||||
|
||||
-- bit permutation
|
||||
for i in 0 to 7 loop
|
||||
j := i * 8;
|
||||
if rs(j+7 downto j+6) = "00" then
|
||||
permute(i) <= rb(to_integer(unsigned(not rs(j+5 downto j))));
|
||||
else
|
||||
permute(i) <= '0';
|
||||
end if;
|
||||
end loop;
|
||||
|
||||
rb_adj := rb;
|
||||
if invert_in = '1' then
|
||||
rb_adj := not rb;
|
||||
@@ -157,8 +146,6 @@ begin
|
||||
tmp := parity;
|
||||
when OP_CMPB =>
|
||||
tmp := ppc_cmpb(rs, rb);
|
||||
when OP_BPERM =>
|
||||
tmp := std_ulogic_vector(resize(unsigned(permute), 64));
|
||||
when OP_BCD =>
|
||||
-- invert_in is abused to indicate direction of conversion
|
||||
if invert_in = '0' then
|
||||
|
||||
@@ -335,6 +335,7 @@ targets:
|
||||
default_tool: vivado
|
||||
filesets: [core, arty_a7, soc, fpga, debug_xilinx, litedram, liteeth, uart16550, xilinx_specific, litesdcard]
|
||||
parameters:
|
||||
- cpus
|
||||
- memory_size
|
||||
- ram_init_file
|
||||
- use_litedram=true
|
||||
@@ -496,6 +497,12 @@ generate:
|
||||
parameters: {vendor : xilinx, frequency : 100e6}
|
||||
|
||||
parameters:
|
||||
cpus:
|
||||
datatype : int
|
||||
description : Number of CPU cores to include in the SoC.
|
||||
paramtype : generic
|
||||
default : 1
|
||||
|
||||
memory_size:
|
||||
datatype : int
|
||||
description : On-chip memory size (bytes). If no_bram is set, this is the size carved out for the DRAM payload
|
||||
|
||||
@@ -24,28 +24,30 @@
|
||||
#define DBG_WB_DATA 0x01
|
||||
#define DBG_WB_CTRL 0x02
|
||||
|
||||
#define DBG_CORE_CTRL 0x10
|
||||
unsigned int core;
|
||||
|
||||
#define DBG_CORE_CTRL (0x10 + (core << 4))
|
||||
#define DBG_CORE_CTRL_STOP (1 << 0)
|
||||
#define DBG_CORE_CTRL_RESET (1 << 1)
|
||||
#define DBG_CORE_CTRL_ICRESET (1 << 2)
|
||||
#define DBG_CORE_CTRL_STEP (1 << 3)
|
||||
#define DBG_CORE_CTRL_START (1 << 4)
|
||||
|
||||
#define DBG_CORE_STAT 0x11
|
||||
#define DBG_CORE_STAT (0x11 + (core << 4))
|
||||
#define DBG_CORE_STAT_STOPPING (1 << 0)
|
||||
#define DBG_CORE_STAT_STOPPED (1 << 1)
|
||||
#define DBG_CORE_STAT_TERM (1 << 2)
|
||||
|
||||
#define DBG_CORE_NIA 0x12
|
||||
#define DBG_CORE_MSR 0x13
|
||||
#define DBG_CORE_NIA (0x12 + (core << 4))
|
||||
#define DBG_CORE_MSR (0x13 + (core << 4))
|
||||
|
||||
#define DBG_CORE_GSPR_INDEX 0x14
|
||||
#define DBG_CORE_GSPR_DATA 0x15
|
||||
#define DBG_CORE_GSPR_INDEX (0x14 + (core << 4))
|
||||
#define DBG_CORE_GSPR_DATA (0x15 + (core << 4))
|
||||
|
||||
#define DBG_LOG_ADDR 0x16
|
||||
#define DBG_LOG_DATA 0x17
|
||||
#define DBG_LOG_TRIGGER 0x18
|
||||
#define DBG_LOG_MTRIGGER 0x19
|
||||
#define DBG_LOG_ADDR (0x16 + (core << 4))
|
||||
#define DBG_LOG_DATA (0x17 + (core << 4))
|
||||
#define DBG_LOG_TRIGGER (0x18 + (core << 4))
|
||||
#define DBG_LOG_MTRIGGER (0x19 + (core << 4))
|
||||
|
||||
static bool debug;
|
||||
|
||||
@@ -507,7 +509,7 @@ static void core_status(void)
|
||||
statstr2 = " (terminated)";
|
||||
} else if (stat & DBG_CORE_STAT_TERM)
|
||||
statstr = "odd state (TERM but no STOP)";
|
||||
printf("Core: %s%s\n", statstr, statstr2);
|
||||
printf("Core%u: %s%s\n", core, statstr, statstr2);
|
||||
printf(" NIA: %016" PRIx64 "\n", nia);
|
||||
printf(" MSR: %016" PRIx64 "\n", msr);
|
||||
}
|
||||
@@ -792,7 +794,7 @@ static void mtrig_set(uint64_t addr)
|
||||
|
||||
static void usage(const char *cmd)
|
||||
{
|
||||
fprintf(stderr, "Usage: %s -b <jtag|ecp5|sim> <command> <args>\n", cmd);
|
||||
fprintf(stderr, "Usage: %s -b <jtag|ecp5|sim> [-c core#] <command> <args>\n", cmd);
|
||||
|
||||
fprintf(stderr, "\n");
|
||||
fprintf(stderr, " CPU core:\n");
|
||||
@@ -851,12 +853,20 @@ int main(int argc, char *argv[])
|
||||
{ "target", required_argument, 0, 't' },
|
||||
{ "debug", no_argument, 0, 'd' },
|
||||
{ "frequency", no_argument, 0, 's' },
|
||||
{ "core", required_argument, 0, 'c' },
|
||||
{ 0, 0, 0, 0 }
|
||||
};
|
||||
c = getopt_long(argc, argv, "dhb:t:s:", lopts, &oindex);
|
||||
c = getopt_long(argc, argv, "dhb:t:s:c:", lopts, &oindex);
|
||||
if (c < 0)
|
||||
break;
|
||||
switch(c) {
|
||||
case 'c':
|
||||
core = atoi(optarg);
|
||||
if (core >= 15) {
|
||||
fprintf(stderr, "Core number out of range (max 14)\n");
|
||||
exit(1);
|
||||
}
|
||||
break;
|
||||
case 'h':
|
||||
usage(progname);
|
||||
break;
|
||||
|
||||
126
soc.vhdl
126
soc.vhdl
@@ -67,6 +67,7 @@ entity soc is
|
||||
RAM_INIT_FILE : string;
|
||||
CLK_FREQ : positive;
|
||||
SIM : boolean;
|
||||
NCPUS : positive := 1;
|
||||
HAS_FPU : boolean := true;
|
||||
HAS_BTC : boolean := true;
|
||||
DISABLE_FLATTEN_CORE : boolean := false;
|
||||
@@ -148,20 +149,18 @@ end entity soc;
|
||||
|
||||
architecture behaviour of soc is
|
||||
|
||||
subtype cpu_index_t is natural range 0 to NCPUS-1;
|
||||
type dword_percpu_array is array(cpu_index_t) of std_ulogic_vector(63 downto 0);
|
||||
|
||||
-- internal reset
|
||||
signal soc_reset : std_ulogic;
|
||||
|
||||
-- Wishbone master signals:
|
||||
signal wishbone_dcore_in : wishbone_slave_out;
|
||||
signal wishbone_dcore_out : wishbone_master_out;
|
||||
signal wishbone_icore_in : wishbone_slave_out;
|
||||
signal wishbone_icore_out : wishbone_master_out;
|
||||
signal wishbone_debug_in : wishbone_slave_out;
|
||||
signal wishbone_debug_out : wishbone_master_out;
|
||||
signal wishbone_debug_in : wishbone_slave_out;
|
||||
signal wishbone_debug_out : wishbone_master_out;
|
||||
|
||||
-- Arbiter array (ghdl doesnt' support assigning the array
|
||||
-- elements in the entity instantiation)
|
||||
constant NUM_WB_MASTERS : positive := 4;
|
||||
-- Arbiter array
|
||||
constant NUM_WB_MASTERS : positive := NCPUS * 2 + 2;
|
||||
signal wb_masters_out : wishbone_master_out_vector(0 to NUM_WB_MASTERS-1);
|
||||
signal wb_masters_in : wishbone_slave_out_vector(0 to NUM_WB_MASTERS-1);
|
||||
|
||||
@@ -180,7 +179,7 @@ architecture behaviour of soc is
|
||||
|
||||
-- Syscon signals
|
||||
signal dram_at_0 : std_ulogic;
|
||||
signal do_core_reset : std_ulogic;
|
||||
signal do_core_reset : std_ulogic_vector(NCPUS-1 downto 0);
|
||||
signal alt_reset : std_ulogic;
|
||||
signal wb_syscon_in : wb_io_master_out;
|
||||
signal wb_syscon_out : wb_io_slave_out;
|
||||
@@ -210,7 +209,7 @@ architecture behaviour of soc is
|
||||
signal wb_xics_ics_out : wb_io_slave_out;
|
||||
signal int_level_in : std_ulogic_vector(15 downto 0);
|
||||
signal ics_to_icp : ics_to_icp_t;
|
||||
signal core_ext_irq : std_ulogic;
|
||||
signal core_ext_irq : std_ulogic_vector(NCPUS-1 downto 0) := (others => '0');
|
||||
|
||||
-- GPIO signals:
|
||||
signal wb_gpio_in : wb_io_master_out;
|
||||
@@ -233,12 +232,12 @@ architecture behaviour of soc is
|
||||
signal dmi_wb_dout : std_ulogic_vector(63 downto 0);
|
||||
signal dmi_wb_req : std_ulogic;
|
||||
signal dmi_wb_ack : std_ulogic;
|
||||
signal dmi_core_dout : std_ulogic_vector(63 downto 0);
|
||||
signal dmi_core_req : std_ulogic;
|
||||
signal dmi_core_ack : std_ulogic;
|
||||
signal dmi_core_dout : dword_percpu_array;
|
||||
signal dmi_core_req : std_ulogic_vector(NCPUS-1 downto 0);
|
||||
signal dmi_core_ack : std_ulogic_vector(NCPUS-1 downto 0);
|
||||
|
||||
-- Delayed/latched resets and alt_reset
|
||||
signal rst_core : std_ulogic;
|
||||
signal rst_core : std_ulogic_vector(NCPUS-1 downto 0);
|
||||
signal rst_uart : std_ulogic;
|
||||
signal rst_xics : std_ulogic;
|
||||
signal rst_spi : std_ulogic;
|
||||
@@ -270,6 +269,10 @@ architecture behaviour of soc is
|
||||
signal io_cycle_gpio : std_ulogic;
|
||||
signal io_cycle_external : std_ulogic;
|
||||
|
||||
signal core_run_out : std_ulogic_vector(NCPUS-1 downto 0);
|
||||
|
||||
signal timebase : std_ulogic_vector(63 downto 0);
|
||||
|
||||
function wishbone_widen_data(wb : wb_io_master_out) return wishbone_master_out is
|
||||
variable wwb : wishbone_master_out;
|
||||
begin
|
||||
@@ -334,7 +337,9 @@ begin
|
||||
resets: process(system_clk)
|
||||
begin
|
||||
if rising_edge(system_clk) then
|
||||
rst_core <= soc_reset or do_core_reset;
|
||||
for i in 0 to NCPUS-1 loop
|
||||
rst_core(i) <= soc_reset or do_core_reset(i);
|
||||
end loop;
|
||||
rst_uart <= soc_reset;
|
||||
rst_spi <= soc_reset;
|
||||
rst_xics <= soc_reset;
|
||||
@@ -347,11 +352,27 @@ begin
|
||||
end if;
|
||||
end process;
|
||||
|
||||
-- Processor core
|
||||
processor: entity work.core
|
||||
-- Timebase just increments at the system clock frequency.
|
||||
-- There is currently no way to set it.
|
||||
-- Ideally it would (appear to) run at 512MHz like IBM POWER systems,
|
||||
-- but Linux seems to cope OK with it being 100MHz or whatever.
|
||||
tbase: process(system_clk)
|
||||
begin
|
||||
if rising_edge(system_clk) then
|
||||
if soc_reset = '1' then
|
||||
timebase <= (others => '0');
|
||||
else
|
||||
timebase <= std_ulogic_vector(unsigned(timebase) + 1);
|
||||
end if;
|
||||
end if;
|
||||
end process;
|
||||
|
||||
-- Processor cores
|
||||
processors: for i in 0 to NCPUS-1 generate
|
||||
core: entity work.core
|
||||
generic map(
|
||||
SIM => SIM,
|
||||
CPU_INDEX => 0,
|
||||
CPU_INDEX => i,
|
||||
HAS_FPU => HAS_FPU,
|
||||
HAS_BTC => HAS_BTC,
|
||||
DISABLE_FLATTEN => DISABLE_FLATTEN_CORE,
|
||||
@@ -367,32 +388,32 @@ begin
|
||||
)
|
||||
port map(
|
||||
clk => system_clk,
|
||||
rst => rst_core,
|
||||
rst => rst_core(i),
|
||||
alt_reset => alt_reset_d,
|
||||
run_out => run_out,
|
||||
wishbone_insn_in => wishbone_icore_in,
|
||||
wishbone_insn_out => wishbone_icore_out,
|
||||
wishbone_data_in => wishbone_dcore_in,
|
||||
wishbone_data_out => wishbone_dcore_out,
|
||||
run_out => core_run_out(i),
|
||||
timebase => timebase,
|
||||
wishbone_insn_in => wb_masters_in(i + NCPUS),
|
||||
wishbone_insn_out => wb_masters_out(i + NCPUS),
|
||||
wishbone_data_in => wb_masters_in(i),
|
||||
wishbone_data_out => wb_masters_out(i),
|
||||
wb_snoop_in => wb_snoop,
|
||||
dmi_addr => dmi_addr(3 downto 0),
|
||||
dmi_dout => dmi_core_dout,
|
||||
dmi_dout => dmi_core_dout(i),
|
||||
dmi_din => dmi_dout,
|
||||
dmi_wr => dmi_wr,
|
||||
dmi_ack => dmi_core_ack,
|
||||
dmi_req => dmi_core_req,
|
||||
ext_irq => core_ext_irq
|
||||
dmi_ack => dmi_core_ack(i),
|
||||
dmi_req => dmi_core_req(i),
|
||||
ext_irq => core_ext_irq(i)
|
||||
);
|
||||
end generate;
|
||||
|
||||
run_out <= or (core_run_out);
|
||||
|
||||
-- Wishbone bus master arbiter & mux
|
||||
wb_masters_out <= (0 => wishbone_dcore_out,
|
||||
1 => wishbone_icore_out,
|
||||
2 => wishbone_widen_data(wishbone_dma_out),
|
||||
3 => wishbone_debug_out);
|
||||
wishbone_dcore_in <= wb_masters_in(0);
|
||||
wishbone_icore_in <= wb_masters_in(1);
|
||||
wishbone_dma_in <= wishbone_narrow_data(wb_masters_in(2), wishbone_dma_out.adr);
|
||||
wishbone_debug_in <= wb_masters_in(3);
|
||||
wb_masters_out(2*NCPUS) <= wishbone_widen_data(wishbone_dma_out);
|
||||
wb_masters_out(2*NCPUS + 1) <= wishbone_debug_out;
|
||||
wishbone_dma_in <= wishbone_narrow_data(wb_masters_in(2*NCPUS), wishbone_dma_out.adr);
|
||||
wishbone_debug_in <= wb_masters_in(2*NCPUS + 1);
|
||||
wishbone_arbiter_0: entity work.wishbone_arbiter
|
||||
generic map(
|
||||
NUM_MASTERS => NUM_WB_MASTERS
|
||||
@@ -780,6 +801,7 @@ begin
|
||||
-- Syscon slave
|
||||
syscon0: entity work.syscon
|
||||
generic map(
|
||||
NCPUS => NCPUS,
|
||||
HAS_UART => true,
|
||||
HAS_DRAM => HAS_DRAM,
|
||||
BRAM_SIZE => MEMORY_SIZE,
|
||||
@@ -944,6 +966,9 @@ begin
|
||||
end generate;
|
||||
|
||||
xics_icp: entity work.xics_icp
|
||||
generic map(
|
||||
NCPUS => NCPUS
|
||||
)
|
||||
port map(
|
||||
clk => system_clk,
|
||||
rst => rst_xics,
|
||||
@@ -955,6 +980,7 @@ begin
|
||||
|
||||
xics_ics: entity work.xics_ics
|
||||
generic map(
|
||||
NCPUS => NCPUS,
|
||||
SRC_NUM => 16,
|
||||
PRIO_BITS => 3
|
||||
)
|
||||
@@ -1034,15 +1060,15 @@ begin
|
||||
);
|
||||
|
||||
-- DMI interconnect
|
||||
dmi_intercon: process(dmi_addr, dmi_req,
|
||||
dmi_wb_ack, dmi_wb_dout,
|
||||
dmi_core_ack, dmi_core_dout)
|
||||
dmi_intercon: process(all)
|
||||
|
||||
-- DMI address map (each address is a full 64-bit register)
|
||||
--
|
||||
-- Offset: Size: Slave:
|
||||
-- 0 4 Wishbone
|
||||
-- 10 16 Core
|
||||
-- 10 16 Core 0
|
||||
-- 20 16 Core 1
|
||||
-- ... and so on for NCPUS cores
|
||||
|
||||
type slave_type is (SLAVE_WB,
|
||||
SLAVE_CORE,
|
||||
@@ -1053,25 +1079,29 @@ begin
|
||||
slave := SLAVE_NONE;
|
||||
if std_match(dmi_addr, "000000--") then
|
||||
slave := SLAVE_WB;
|
||||
elsif std_match(dmi_addr, "0001----") then
|
||||
elsif not is_X(dmi_addr) and to_integer(unsigned(dmi_addr(7 downto 4))) <= NCPUS then
|
||||
slave := SLAVE_CORE;
|
||||
end if;
|
||||
|
||||
-- DMI muxing
|
||||
dmi_wb_req <= '0';
|
||||
dmi_core_req <= '0';
|
||||
dmi_core_req <= (others => '0');
|
||||
dmi_din <= (others => '1');
|
||||
dmi_ack <= dmi_req;
|
||||
case slave is
|
||||
when SLAVE_WB =>
|
||||
dmi_wb_req <= dmi_req;
|
||||
dmi_ack <= dmi_wb_ack;
|
||||
dmi_din <= dmi_wb_dout;
|
||||
when SLAVE_CORE =>
|
||||
dmi_core_req <= dmi_req;
|
||||
dmi_ack <= dmi_core_ack;
|
||||
dmi_din <= dmi_core_dout;
|
||||
for i in 0 to NCPUS-1 loop
|
||||
if not is_X(dmi_addr) and to_integer(unsigned(dmi_addr(7 downto 4))) = i + 1 then
|
||||
dmi_core_req(i) <= dmi_req;
|
||||
dmi_ack <= dmi_core_ack(i);
|
||||
dmi_din <= dmi_core_dout(i);
|
||||
end if;
|
||||
end loop;
|
||||
when others =>
|
||||
dmi_ack <= dmi_req;
|
||||
dmi_din <= (others => '1');
|
||||
end case;
|
||||
|
||||
-- SIM magic exit
|
||||
|
||||
16
syscon.vhdl
16
syscon.vhdl
@@ -9,6 +9,7 @@ use work.wishbone_types.all;
|
||||
|
||||
entity syscon is
|
||||
generic (
|
||||
NCPUS : positive := 1;
|
||||
SIG_VALUE : std_ulogic_vector(63 downto 0) := x"f00daa5500010001";
|
||||
CLK_FREQ : integer;
|
||||
HAS_UART : boolean;
|
||||
@@ -33,7 +34,7 @@ entity syscon is
|
||||
|
||||
-- System control ports
|
||||
dram_at_0 : out std_ulogic;
|
||||
core_reset : out std_ulogic;
|
||||
core_reset : out std_ulogic_vector(NCPUS-1 downto 0);
|
||||
soc_reset : out std_ulogic;
|
||||
alt_reset : out std_ulogic
|
||||
);
|
||||
@@ -56,6 +57,7 @@ architecture behaviour of syscon is
|
||||
constant SYS_REG_UART0_INFO : std_ulogic_vector(SYS_REG_BITS-1 downto 0) := "001000";
|
||||
constant SYS_REG_UART1_INFO : std_ulogic_vector(SYS_REG_BITS-1 downto 0) := "001001";
|
||||
constant SYS_REG_GIT_INFO : std_ulogic_vector(SYS_REG_BITS-1 downto 0) := "001010";
|
||||
constant SYS_REG_CPU_CTRL : std_ulogic_vector(SYS_REG_BITS-1 downto 0) := "001011";
|
||||
|
||||
-- Muxed reg read signal
|
||||
signal reg_out : std_ulogic_vector(63 downto 0);
|
||||
@@ -116,6 +118,7 @@ architecture behaviour of syscon is
|
||||
signal reg_uart0info : std_ulogic_vector(63 downto 0);
|
||||
signal reg_uart1info : std_ulogic_vector(63 downto 0);
|
||||
signal reg_gitinfo : std_ulogic_vector(63 downto 0);
|
||||
signal reg_cpuctrl : std_ulogic_vector(63 downto 0);
|
||||
signal info_has_dram : std_ulogic;
|
||||
signal info_has_bram : std_ulogic;
|
||||
signal info_has_uart : std_ulogic;
|
||||
@@ -134,7 +137,8 @@ begin
|
||||
-- Generated output signals
|
||||
dram_at_0 <= '1' when BRAM_SIZE = 0 else reg_ctrl(SYS_REG_CTRL_DRAM_AT_0);
|
||||
soc_reset <= reg_ctrl(SYS_REG_CTRL_SOC_RESET);
|
||||
core_reset <= reg_ctrl(SYS_REG_CTRL_CORE_RESET);
|
||||
core_reset <= not reg_cpuctrl(NCPUS-1 downto 0) when reg_ctrl(SYS_REG_CTRL_CORE_RESET) = '0'
|
||||
else (others => '1');
|
||||
alt_reset <= reg_ctrl(SYS_REG_CTRL_ALT_RESET);
|
||||
|
||||
|
||||
@@ -187,6 +191,8 @@ begin
|
||||
55 downto 0 => GIT_HASH,
|
||||
others => '0');
|
||||
|
||||
reg_cpuctrl(63 downto 8) <= std_ulogic_vector(to_unsigned(NCPUS, 56));
|
||||
|
||||
-- Wishbone response
|
||||
wb_rsp.ack <= wishbone_in.cyc and wishbone_in.stb;
|
||||
with wishbone_in.adr(SYS_REG_BITS downto 1) select reg_out <=
|
||||
@@ -201,6 +207,7 @@ begin
|
||||
reg_uart0info when SYS_REG_UART0_INFO,
|
||||
reg_uart1info when SYS_REG_UART1_INFO,
|
||||
reg_gitinfo when SYS_REG_GIT_INFO,
|
||||
reg_cpuctrl when SYS_REG_CPU_CTRL,
|
||||
(others => '0') when others;
|
||||
wb_rsp.dat <= reg_out(63 downto 32) when wishbone_in.adr(0) = '1' else
|
||||
reg_out(31 downto 0);
|
||||
@@ -225,6 +232,7 @@ begin
|
||||
if (rst) then
|
||||
reg_ctrl <= (SYS_REG_CTRL_ALT_RESET => ctrl_init_alt_reset,
|
||||
others => '0');
|
||||
reg_cpuctrl(7 downto 0) <= x"01"; -- enable cpu 0 only
|
||||
else
|
||||
if wishbone_in.cyc and wishbone_in.stb and wishbone_in.we then
|
||||
-- Change this if CTRL ever has more than 32 bits
|
||||
@@ -233,6 +241,10 @@ begin
|
||||
reg_ctrl(SYS_REG_CTRL_BITS-1 downto 0) <=
|
||||
wishbone_in.dat(SYS_REG_CTRL_BITS-1 downto 0);
|
||||
end if;
|
||||
if wishbone_in.adr(SYS_REG_BITS downto 1) = SYS_REG_CPU_CTRL and
|
||||
wishbone_in.adr(0) = '0' and wishbone_in.sel(0) = '1' then
|
||||
reg_cpuctrl(7 downto 0) <= wishbone_in.dat(7 downto 0);
|
||||
end if;
|
||||
end if;
|
||||
|
||||
-- Reset auto-clear
|
||||
|
||||
@@ -4,7 +4,6 @@ use ieee.std_logic_1164.all;
|
||||
library work;
|
||||
use work.wishbone_types.all;
|
||||
|
||||
-- TODO: Use an array of master/slaves with parametric size
|
||||
entity wishbone_arbiter is
|
||||
generic(
|
||||
NUM_MASTERS : positive := 3
|
||||
@@ -28,18 +27,23 @@ begin
|
||||
|
||||
busy <= wb_masters_in(selected).cyc;
|
||||
|
||||
wishbone_muxes: process(selected, candidate, busy, wb_slave_in, wb_masters_in)
|
||||
wishbone_muxes: process(all)
|
||||
variable early_sel : wb_arb_master_t;
|
||||
begin
|
||||
early_sel := selected;
|
||||
if busy = '0' then
|
||||
if NUM_MASTERS <= 4 and busy = '0' then
|
||||
early_sel := candidate;
|
||||
end if;
|
||||
wb_slave_out <= wb_masters_in(early_sel);
|
||||
for i in 0 to NUM_MASTERS-1 loop
|
||||
wb_masters_out(i).dat <= wb_slave_in.dat;
|
||||
wb_masters_out(i).ack <= wb_slave_in.ack when early_sel = i else '0';
|
||||
wb_masters_out(i).stall <= wb_slave_in.stall when early_sel = i else '1';
|
||||
if early_sel = i and wb_masters_in(i).cyc = '1' then
|
||||
wb_masters_out(i).ack <= wb_slave_in.ack;
|
||||
wb_masters_out(i).stall <= wb_slave_in.stall;
|
||||
else
|
||||
wb_masters_out(i).ack <= '0';
|
||||
wb_masters_out(i).stall <= '1';
|
||||
end if;
|
||||
end loop;
|
||||
end process;
|
||||
|
||||
|
||||
293
xics.vhdl
293
xics.vhdl
@@ -25,6 +25,9 @@ use work.common.all;
|
||||
use work.wishbone_types.all;
|
||||
|
||||
entity xics_icp is
|
||||
generic (
|
||||
NCPUS : natural := 1
|
||||
);
|
||||
port (
|
||||
clk : in std_logic;
|
||||
rst : in std_logic;
|
||||
@@ -33,32 +36,41 @@ entity xics_icp is
|
||||
wb_out : out wb_io_slave_out;
|
||||
|
||||
ics_in : in ics_to_icp_t;
|
||||
core_irq_out : out std_ulogic
|
||||
core_irq_out : out std_ulogic_vector(NCPUS-1 downto 0)
|
||||
);
|
||||
end xics_icp;
|
||||
|
||||
architecture behaviour of xics_icp is
|
||||
type reg_internal_t is record
|
||||
type xics_presentation_t is record
|
||||
xisr : std_ulogic_vector(23 downto 0);
|
||||
cppr : std_ulogic_vector(7 downto 0);
|
||||
mfrr : std_ulogic_vector(7 downto 0);
|
||||
irq : std_ulogic;
|
||||
end record;
|
||||
constant xics_presentation_t_init : xics_presentation_t :=
|
||||
(mfrr => x"ff", -- mask everything on reset
|
||||
irq => '0',
|
||||
others => (others => '0'));
|
||||
subtype cpu_index_t is natural range 0 to NCPUS-1;
|
||||
type xicp_array_t is array(cpu_index_t) of xics_presentation_t;
|
||||
|
||||
type reg_internal_t is record
|
||||
icp : xicp_array_t;
|
||||
wb_rd_data : std_ulogic_vector(31 downto 0);
|
||||
wb_ack : std_ulogic;
|
||||
end record;
|
||||
constant reg_internal_init : reg_internal_t :=
|
||||
(wb_ack => '0',
|
||||
mfrr => x"ff", -- mask everything on reset
|
||||
irq => '0',
|
||||
others => (others => '0'));
|
||||
wb_rd_data => (others => '0'),
|
||||
icp => (others => xics_presentation_t_init));
|
||||
|
||||
signal r, r_next : reg_internal_t;
|
||||
|
||||
-- 8 bit offsets for each presentation
|
||||
constant XIRR_POLL : std_ulogic_vector(7 downto 0) := x"00";
|
||||
constant XIRR : std_ulogic_vector(7 downto 0) := x"04";
|
||||
constant RESV0 : std_ulogic_vector(7 downto 0) := x"08";
|
||||
constant MFRR : std_ulogic_vector(7 downto 0) := x"0c";
|
||||
-- 4 bit offsets for each presentation register
|
||||
constant XIRR_POLL : std_ulogic_vector(3 downto 0) := x"0";
|
||||
constant XIRR : std_ulogic_vector(3 downto 0) := x"4";
|
||||
constant RESV0 : std_ulogic_vector(3 downto 0) := x"8";
|
||||
constant MFRR : std_ulogic_vector(3 downto 0) := x"c";
|
||||
|
||||
begin
|
||||
|
||||
@@ -68,7 +80,9 @@ begin
|
||||
r <= r_next;
|
||||
|
||||
-- We delay core_irq_out by a cycle to help with timing
|
||||
core_irq_out <= r.irq;
|
||||
for i in 0 to NCPUS-1 loop
|
||||
core_irq_out(i) <= r.icp(i).irq;
|
||||
end loop;
|
||||
end if;
|
||||
end process;
|
||||
|
||||
@@ -99,94 +113,105 @@ begin
|
||||
|
||||
v.wb_ack := '0';
|
||||
|
||||
xirr_accept_rd := '0';
|
||||
|
||||
be_in := bswap(wb_in.dat);
|
||||
be_out := (others => '0');
|
||||
|
||||
if wb_in.cyc = '1' and wb_in.stb = '1' then
|
||||
v.wb_ack := '1'; -- always ack
|
||||
if wb_in.we = '1' then -- write
|
||||
-- writes to both XIRR are the same
|
||||
case wb_in.adr(5 downto 0) & "00" is
|
||||
when XIRR_POLL =>
|
||||
report "ICP XIRR_POLL write";
|
||||
v.cppr := be_in(31 downto 24);
|
||||
when XIRR =>
|
||||
v.cppr := be_in(31 downto 24);
|
||||
if wb_in.sel = x"f" then -- 4 byte
|
||||
report "ICP XIRR write word (EOI) :" & to_hstring(be_in);
|
||||
elsif wb_in.sel = x"1" then -- 1 byte
|
||||
report "ICP XIRR write byte (CPPR):" & to_hstring(be_in(31 downto 24));
|
||||
else
|
||||
report "ICP XIRR UNSUPPORTED write ! sel=" & to_hstring(wb_in.sel);
|
||||
end if;
|
||||
when MFRR =>
|
||||
v.mfrr := be_in(31 downto 24);
|
||||
if wb_in.sel = x"f" then -- 4 bytes
|
||||
report "ICP MFRR write word:" & to_hstring(be_in);
|
||||
elsif wb_in.sel = x"1" then -- 1 byte
|
||||
report "ICP MFRR write byte:" & to_hstring(be_in(31 downto 24));
|
||||
else
|
||||
report "ICP MFRR UNSUPPORTED write ! sel=" & to_hstring(wb_in.sel);
|
||||
end if;
|
||||
when others =>
|
||||
end case;
|
||||
end if;
|
||||
|
||||
else -- read
|
||||
for i in cpu_index_t loop
|
||||
xirr_accept_rd := '0';
|
||||
|
||||
case wb_in.adr(5 downto 0) & "00" is
|
||||
when XIRR_POLL =>
|
||||
report "ICP XIRR_POLL read";
|
||||
be_out := r.cppr & r.xisr;
|
||||
when XIRR =>
|
||||
report "ICP XIRR read";
|
||||
be_out := r.cppr & r.xisr;
|
||||
if wb_in.sel = x"f" then
|
||||
xirr_accept_rd := '1';
|
||||
end if;
|
||||
when MFRR =>
|
||||
report "ICP MFRR read";
|
||||
be_out(31 downto 24) := r.mfrr;
|
||||
when others =>
|
||||
end case;
|
||||
if wb_in.cyc = '1' and wb_in.stb = '1' and
|
||||
to_integer(unsigned(wb_in.adr(5 downto 2))) = i then
|
||||
if wb_in.we = '1' then -- write
|
||||
-- writes to both XIRR are the same
|
||||
case wb_in.adr(1 downto 0) & "00" is
|
||||
when XIRR_POLL =>
|
||||
report "ICP XIRR_POLL write";
|
||||
v.icp(i).cppr := be_in(31 downto 24);
|
||||
when XIRR =>
|
||||
v.icp(i).cppr := be_in(31 downto 24);
|
||||
if wb_in.sel = x"f" then -- 4 byte
|
||||
report "ICP " & natural'image(i) & " XIRR write word (EOI) :" &
|
||||
to_hstring(be_in);
|
||||
elsif wb_in.sel = x"1" then -- 1 byte
|
||||
report "ICP " & natural'image(i) & " XIRR write byte (CPPR):" &
|
||||
to_hstring(be_in(31 downto 24));
|
||||
else
|
||||
report "ICP " & natural'image(i) & " XIRR UNSUPPORTED write ! sel=" &
|
||||
to_hstring(wb_in.sel);
|
||||
end if;
|
||||
when MFRR =>
|
||||
v.icp(i).mfrr := be_in(31 downto 24);
|
||||
if wb_in.sel = x"f" then -- 4 bytes
|
||||
report "ICP " & natural'image(i) & " MFRR write word:" &
|
||||
to_hstring(be_in);
|
||||
elsif wb_in.sel = x"1" then -- 1 byte
|
||||
report "ICP " & natural'image(i) & " MFRR write byte:" &
|
||||
to_hstring(be_in(31 downto 24));
|
||||
else
|
||||
report "ICP " & natural'image(i) & " MFRR UNSUPPORTED write ! sel=" &
|
||||
to_hstring(wb_in.sel);
|
||||
end if;
|
||||
when others =>
|
||||
end case;
|
||||
|
||||
else -- read
|
||||
|
||||
case wb_in.adr(1 downto 0) & "00" is
|
||||
when XIRR_POLL =>
|
||||
report "ICP XIRR_POLL read";
|
||||
be_out := r.icp(i).cppr & r.icp(i).xisr;
|
||||
when XIRR =>
|
||||
report "ICP XIRR read";
|
||||
be_out := r.icp(i).cppr & r.icp(i).xisr;
|
||||
if wb_in.sel = x"f" then
|
||||
xirr_accept_rd := '1';
|
||||
end if;
|
||||
when MFRR =>
|
||||
report "ICP MFRR read";
|
||||
be_out(31 downto 24) := r.icp(i).mfrr;
|
||||
when others =>
|
||||
end case;
|
||||
end if;
|
||||
end if;
|
||||
end if;
|
||||
|
||||
pending_priority := x"ff";
|
||||
v.xisr := x"000000";
|
||||
v.irq := '0';
|
||||
pending_priority := x"ff";
|
||||
v.icp(i).xisr := x"000000";
|
||||
v.icp(i).irq := '0';
|
||||
|
||||
if ics_in.pri /= x"ff" then
|
||||
v.xisr := x"00001" & ics_in.src;
|
||||
pending_priority := ics_in.pri;
|
||||
end if;
|
||||
|
||||
-- Check MFRR
|
||||
if unsigned(r.mfrr) < unsigned(pending_priority) then --
|
||||
v.xisr := x"000002"; -- special XICS MFRR IRQ source number
|
||||
pending_priority := r.mfrr;
|
||||
end if;
|
||||
|
||||
-- Accept the interrupt
|
||||
if xirr_accept_rd = '1' then
|
||||
report "XICS: ICP ACCEPT" &
|
||||
" cppr:" & to_hstring(r.cppr) &
|
||||
" xisr:" & to_hstring(r.xisr) &
|
||||
" mfrr:" & to_hstring(r.mfrr);
|
||||
v.cppr := pending_priority;
|
||||
end if;
|
||||
|
||||
v.wb_rd_data := bswap(be_out);
|
||||
|
||||
if unsigned(pending_priority) < unsigned(v.cppr) then
|
||||
if r.irq = '0' then
|
||||
report "IRQ set";
|
||||
if ics_in.pri(8*i + 7 downto 8*i) /= x"ff" then
|
||||
v.icp(i).xisr := x"00001" & ics_in.src(4*i + 3 downto 4*i);
|
||||
pending_priority := ics_in.pri(8*i + 7 downto 8*i);
|
||||
end if;
|
||||
v.irq := '1';
|
||||
elsif r.irq = '1' then
|
||||
report "IRQ clr";
|
||||
end if;
|
||||
|
||||
-- Check MFRR
|
||||
if unsigned(r.icp(i).mfrr) < unsigned(pending_priority) then --
|
||||
v.icp(i).xisr := x"000002"; -- special XICS MFRR IRQ source number
|
||||
pending_priority := r.icp(i).mfrr;
|
||||
end if;
|
||||
|
||||
-- Accept the interrupt
|
||||
if xirr_accept_rd = '1' then
|
||||
report "XICS " & natural'image(i) & ": ICP ACCEPT" &
|
||||
" cppr:" & to_hstring(r.icp(i).cppr) &
|
||||
" xisr:" & to_hstring(r.icp(i).xisr) &
|
||||
" mfrr:" & to_hstring(r.icp(i).mfrr);
|
||||
v.icp(i).cppr := pending_priority;
|
||||
end if;
|
||||
|
||||
v.wb_rd_data := bswap(be_out);
|
||||
|
||||
if unsigned(pending_priority) < unsigned(v.icp(i).cppr) then
|
||||
if r.icp(i).irq = '0' then
|
||||
report "CPU " & natural'image(i) & " IRQ set";
|
||||
end if;
|
||||
v.icp(i).irq := '1';
|
||||
elsif r.icp(i).irq = '1' then
|
||||
report "CPU " & natural'image(i) & " IRQ clr";
|
||||
end if;
|
||||
end loop;
|
||||
|
||||
if rst = '1' then
|
||||
v := reg_internal_init;
|
||||
@@ -210,6 +235,7 @@ use work.helpers.all;
|
||||
|
||||
entity xics_ics is
|
||||
generic (
|
||||
NCPUS : natural := 1;
|
||||
SRC_NUM : integer range 1 to 256 := 16;
|
||||
PRIO_BITS : integer range 1 to 8 := 3
|
||||
);
|
||||
@@ -228,10 +254,13 @@ end xics_ics;
|
||||
architecture rtl of xics_ics is
|
||||
|
||||
constant SRC_NUM_BITS : natural := log2(SRC_NUM);
|
||||
constant SERVER_NUM_BITS : natural := 2;
|
||||
|
||||
subtype pri_t is std_ulogic_vector(PRIO_BITS-1 downto 0);
|
||||
subtype server_t is unsigned(SERVER_NUM_BITS-1 downto 0);
|
||||
type xive_t is record
|
||||
pri : pri_t;
|
||||
server : server_t;
|
||||
end record;
|
||||
constant pri_masked : pri_t := (others => '1');
|
||||
|
||||
@@ -308,6 +337,16 @@ architecture rtl of xics_ics is
|
||||
return p(nbits - 1 downto 0);
|
||||
end function;
|
||||
|
||||
function server_check(serv_in: std_ulogic_vector(7 downto 0)) return unsigned is
|
||||
variable srv : server_t;
|
||||
begin
|
||||
srv := to_unsigned(0, SERVER_NUM_BITS);
|
||||
if to_integer(unsigned(serv_in)) < NCPUS then
|
||||
srv := unsigned(serv_in(SERVER_NUM_BITS - 1 downto 0));
|
||||
end if;
|
||||
return srv;
|
||||
end;
|
||||
|
||||
-- Register map
|
||||
-- 0 : Config
|
||||
-- 4 : Debug/diagnostics
|
||||
@@ -366,16 +405,14 @@ begin
|
||||
be_out := (others => '0');
|
||||
|
||||
if reg_is_xive = '1' then
|
||||
be_out := int_level_l(reg_idx) &
|
||||
'0' &
|
||||
int_level_l(reg_idx) &
|
||||
'0' &
|
||||
x"00000" &
|
||||
prio_unpack(xives(reg_idx).pri);
|
||||
be_out(31) := int_level_l(reg_idx);
|
||||
be_out(29) := int_level_l(reg_idx);
|
||||
be_out(8 + SERVER_NUM_BITS - 1 downto 8) := std_ulogic_vector(xives(reg_idx).server);
|
||||
be_out(7 downto 0) := prio_unpack(xives(reg_idx).pri);
|
||||
elsif reg_is_config = '1' then
|
||||
be_out := get_config;
|
||||
elsif reg_is_debug = '1' then
|
||||
be_out := x"00000" & icp_out_next.src & icp_out_next.pri;
|
||||
be_out := icp_out_next.src & icp_out_next.pri(15 downto 0);
|
||||
end if;
|
||||
wb_out.dat <= bswap(be_out);
|
||||
wb_out.ack <= wb_valid;
|
||||
@@ -389,17 +426,20 @@ begin
|
||||
if rising_edge(clk) then
|
||||
if rst = '1' then
|
||||
for i in 0 to SRC_NUM - 1 loop
|
||||
xives(i) <= (pri => pri_masked);
|
||||
xives(i) <= (pri => pri_masked, server => to_unsigned(0, SERVER_NUM_BITS));
|
||||
end loop;
|
||||
elsif wb_valid = '1' and wb_in.we = '1' then
|
||||
-- Byteswapped input
|
||||
be_in := bswap(wb_in.dat);
|
||||
if reg_is_xive then
|
||||
-- TODO: When adding support for other bits, make sure to
|
||||
-- properly implement wb_in.sel to allow partial writes.
|
||||
xives(reg_idx).pri <= prio_pack(be_in(7 downto 0));
|
||||
report "ICS irq " & integer'image(reg_idx) &
|
||||
" set to:" & to_hstring(be_in(7 downto 0));
|
||||
if wb_in.sel(3) = '1' then
|
||||
xives(reg_idx).pri <= prio_pack(be_in(7 downto 0));
|
||||
report "ICS irq " & integer'image(reg_idx) &
|
||||
" set to pri:" & to_hstring(be_in(7 downto 0));
|
||||
end if;
|
||||
if wb_in.sel(2) = '1' then
|
||||
xives(reg_idx).server <= server_check(be_in(15 downto 8));
|
||||
end if;
|
||||
end if;
|
||||
end if;
|
||||
end if;
|
||||
@@ -424,29 +464,36 @@ begin
|
||||
variable pending_pri : pri_vector_t;
|
||||
variable pending_at_pri : std_ulogic_vector(SRC_NUM - 1 downto 0);
|
||||
begin
|
||||
-- Work out the most-favoured (lowest) priority of the pending interrupts
|
||||
pending_pri := (others => '0');
|
||||
for i in 0 to SRC_NUM - 1 loop
|
||||
if int_level_l(i) = '1' then
|
||||
pending_pri := pending_pri or prio_decode(xives(i).pri);
|
||||
end if;
|
||||
end loop;
|
||||
max_pri := priority_encoder(pending_pri, PRIO_BITS);
|
||||
icp_out_next.src <= (others => '0');
|
||||
icp_out_next.pri <= (others => '0');
|
||||
for cpu in 0 to NCPUS-1 loop
|
||||
-- Work out the most-favoured (lowest) priority of the interrupts
|
||||
-- that are pending and directed to this cpu
|
||||
pending_pri := (others => '0');
|
||||
for i in 0 to SRC_NUM - 1 loop
|
||||
if int_level_l(i) = '1' and to_integer(xives(i).server) = cpu then
|
||||
pending_pri := pending_pri or prio_decode(xives(i).pri);
|
||||
end if;
|
||||
end loop;
|
||||
max_pri := priority_encoder(pending_pri, PRIO_BITS);
|
||||
|
||||
-- Work out which interrupts are pending at that priority
|
||||
pending_at_pri := (others => '0');
|
||||
for i in 0 to SRC_NUM - 1 loop
|
||||
if int_level_l(i) = '1' and xives(i).pri = max_pri then
|
||||
pending_at_pri(i) := '1';
|
||||
end if;
|
||||
end loop;
|
||||
max_idx := priority_encoder(pending_at_pri, SRC_NUM_BITS);
|
||||
-- Work out which interrupts are pending at that priority
|
||||
pending_at_pri := (others => '0');
|
||||
for i in 0 to SRC_NUM - 1 loop
|
||||
if int_level_l(i) = '1' and xives(i).pri = max_pri and
|
||||
to_integer(xives(i).server) = cpu then
|
||||
pending_at_pri(i) := '1';
|
||||
end if;
|
||||
end loop;
|
||||
max_idx := priority_encoder(pending_at_pri, SRC_NUM_BITS);
|
||||
|
||||
if max_pri /= pri_masked then
|
||||
report "MFI: " & integer'image(to_integer(unsigned(max_idx))) & " pri=" & to_hstring(prio_unpack(max_pri));
|
||||
end if;
|
||||
icp_out_next.src <= max_idx;
|
||||
icp_out_next.pri <= prio_unpack(max_pri);
|
||||
if max_pri /= pri_masked then
|
||||
report "MFI: " & integer'image(to_integer(unsigned(max_idx))) & " pri=" & to_hstring(prio_unpack(max_pri)) &
|
||||
" srv=" & integer'image(cpu);
|
||||
end if;
|
||||
icp_out_next.src(4*cpu + 3 downto 4*cpu) <= max_idx;
|
||||
icp_out_next.pri(8*cpu + 7 downto 8*cpu) <= prio_unpack(max_pri);
|
||||
end loop;
|
||||
end process;
|
||||
|
||||
end architecture rtl;
|
||||
|
||||
Reference in New Issue
Block a user