1
0
mirror of https://github.com/j-core/j-core-ice40.git synced 2026-01-11 23:52:49 +00:00
j-core.j-core-ice40/datapath.vhd

494 lines
19 KiB
VHDL

library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
use work.cpu2j0_pack.all;
use work.cpu2j0_components_pack.all;
use work.datapath_pack.all;
use work.decode_pack.all;
entity datapath is
port (
clk : in std_logic;
rst : in std_logic;
debug : in std_logic;
enter_debug : out std_logic;
slot : out std_logic;
reg : in reg_ctrl_t;
func : in func_ctrl_t;
sr_ctrl : in sr_ctrl_t;
mac : in mac_ctrl_t;
mem : in mem_ctrl_t;
instr : in instr_ctrl_t;
pc_ctrl : in pc_ctrl_t;
buses : in buses_ctrl_t;
db_lock : out std_logic;
db_o : out cpu_data_o_t;
db_i : in cpu_data_i_t;
inst_o : out cpu_instruction_o_t;
inst_i : in cpu_instruction_i_t;
debug_o : out cpu_debug_o_t;
debug_i : in cpu_debug_i_t;
macin1 : out std_logic_vector(31 downto 0);
macin2 : out std_logic_vector(31 downto 0);
mach : in std_logic_vector(31 downto 0);
macl : in std_logic_vector(31 downto 0);
mac_s : out std_logic;
t_bcc : out std_logic;
ibit : out std_logic_vector(3 downto 0);
if_dr : out std_logic_vector(15 downto 0);
if_stall : out std_logic;
mask_int : out std_logic;
illegal_delay_slot : out std_logic;
illegal_instr : out std_logic
);
end entity datapath;
architecture stru of datapath is
subtype reg_t is std_logic_vector(31 downto 0);
signal gpf_zwd, pc, reg_x, reg_y, reg_0, xbus, ybus, ybus_temp, zbus, wbus
: std_logic_vector(31 downto 0);
signal sr : sr_t;
signal sfto : std_logic;
-- alu ports
signal aluiny, aluinx : std_logic_vector(31 downto 0);
signal reg_wr_data_o : std_logic_vector(31 downto 0);
signal ybus_override : bus_val_t;
signal slot_o : std_logic;
signal div1_arith_func : arith_func_t;
signal arith_func : arith_func_t;
signal arith_out : std_logic_vector(32 downto 0);
signal logic_out : std_logic_vector(31 downto 0);
signal this_c : datapath_reg_t;
signal this_r : datapath_reg_t := DATAPATH_RESET;
-- The functions to_sr and to_slv convert between the sr record
-- and its CPU register representation.
function to_sr(a : std_logic_vector(31 downto 0))
return sr_t is variable r : sr_t;
begin
r.m := a(M);
r.q := a(Q);
r.int_mask := a(I3 downto I0);
r.s := a(S);
r.t := a(T);
return r;
end to_sr;
function to_slv(from : sr_t)
return std_logic_vector is
variable r : std_logic_vector(31 downto 0) := (others => '0');
begin
r(M) := from.m;
r(Q) := from.q;
r(I3 downto I0) := from.int_mask;
r(S) := from.s;
r(T) := from.t;
return r;
end to_slv;
-- A bit vector from a single bit
function to_slv(b : std_logic; s : integer) return std_logic_vector is
variable r : std_logic_vector(s-1 downto 0);
begin
r := (others => b);
return r;
end to_slv;
function to_data_o(memc : mem_ctrl_t; addr : std_logic_vector(31 downto 0);
data : std_logic_vector(31 downto 0))
return cpu_data_o_t is variable r : cpu_data_o_t := NULL_DATA_O;
begin
if memc.issue = '1' then
r.en := '1';
r.wr := memc.wr;
r.rd := not memc.wr;
r.a := addr;
-- for writes, prepare we and d signals
if memc.wr = '1' then
case memc.size is
when LONG =>
r.d := data; r.we := "1111";
when WORD =>
if addr(1) = '0' then r.we := "1100";
else r.we := "0011"; end if;
r.d := data(15 downto 0) & data(15 downto 0);
when BYTE =>
-- TODO: Use shift or rotate operator instead of case?
case addr(1 downto 0) is
when "00" => r.we := "1000";
when "01" => r.we := "0100";
when "10" => r.we := "0010";
when others => r.we := "0001";
end case;
r.d := data(7 downto 0) & data(7 downto 0) &
data(7 downto 0) & data(7 downto 0);
end case;
end if;
end if;
return r;
end to_data_o;
-- default to jump=1 unless caller knows address is incremented PC
function to_inst_o(instrc : instr_ctrl_t;
addr : std_logic_vector(31 downto 0);
jp : std_logic := '1')
return cpu_instruction_o_t is
variable r : cpu_instruction_o_t := NULL_INST_O;
begin
if instrc.issue = '1' then
r.en := '1';
r.a := addr(31 downto 1);
r.jp := jp;
end if;
return r;
end to_inst_o;
function align_read_data(d : std_logic_vector(31 downto 0);
bus_o : cpu_data_o_t; size : mem_size_t)
return std_logic_vector is
variable r : std_logic_vector(31 downto 0);
begin
case size is
when BYTE =>
case bus_o.a(1 downto 0) is
when "00" => r := to_slv(d(31), 24) & d(31 downto 24);
when "01" => r := to_slv(d(23), 24) & d(23 downto 16);
when "10" => r := to_slv(d(15), 24) & d(15 downto 8);
when others => r := to_slv(d( 7), 24) & d( 7 downto 0);
end case;
when WORD =>
case bus_o.a(1) is
when '0' => r := to_slv(d(31), 16) & d(31 downto 16);
when others => r := to_slv(d(15), 16) & d(15 downto 0);
end case;
when others => r := d;
end case;
return r;
end align_read_data;
-- body of datapath
begin
-- Multiplexors for the internal buses
with buses.x_sel select xbus <= reg_x when SEL_REG, pc when SEL_PC,
buses.imm_val when others;
-- TODO this one line is 128 cells in ice40
with buses.y_sel select ybus_temp <= reg_y when SEL_REG,
pc when SEL_PC, mach when SEL_MACH, macl when SEL_MACL,
to_slv(sr) when SEL_SR, buses.imm_val when others;
ybus <= ybus_override.d when ybus_override.en = '1' else ybus_temp;
gpf_zwd <= pc when pc_ctrl.wrpr = '1' else zbus;
u_regfile : register_file
generic map (ADDR_WIDTH => 5,
NUM_REGS => 21,
REG_WIDTH => 32)
port map(clk => clk, rst => rst, ce => slot_o, addr_ra => reg.num_x,
dout_a => reg_x, addr_rb => reg.num_y, dout_b => reg_y,
dout_0 => reg_0, we_wb => reg.wr_w, w_addr_wb => reg.num_w,
din_wb => wbus, we_ex => reg.wr_z, w_addr_ex => reg.num_z,
din_ex => gpf_zwd, wr_data_o => reg_wr_data_o);
-- setup arithmetic inputs function
with func.alu.inx_sel select
aluinx <= xbus(31 downto 2) & "00" when SEL_FC,
xbus(30 downto 0) & sr.t when SEL_ROTCL, -- used for DIV1
(others => '0') when SEL_ZERO,
xbus when others;
with func.alu.iny_sel select
aluiny <= buses.imm_val when SEL_IMM,
reg_0 when SEL_R0,
ybus when others;
-- DIV1 decides the arith function at runtime based on m=q. Override
-- the arith func set by decoder when DIV1.
div1_arith_func <= SUB when sr.m = sr.q else ADD;
arith_func <= div1_arith_func when func.arith.sr = DIV1
else func.arith.func;
arith_out <= arith_unit(aluinx, aluiny, arith_func,
func.arith.ci_en and sr.t);
logic_out <= logic_unit(aluinx, aluiny, func.logic_func);
with buses.z_sel select zbus <=
arith_out(31 downto 0) when SEL_ARITH,
logic_out when SEL_LOGIC,
bshifter(xbus, ybus(31) & ybus(4 downto 0),
sr.t, func.shift) when SEL_SHIFT,
manip(xbus, ybus, func.alu.manip) when SEL_MANIP,
ybus when SEL_YBUS,
wbus when SEL_WBUS;
sfto <= xbus(xbus'left) when ybus(31) = '0' else xbus(xbus'right);
with mac.sel1 select macin1 <=
xbus when SEL_XBUS,
zbus when SEL_ZBUS,
wbus when others;
with mac.sel2 select macin2 <=
ybus when SEL_YBUS,
zbus when SEL_ZBUS,
wbus when others;
ibit <= sr.int_mask;
datapath_p : process(this_r,pc_ctrl,wbus,zbus,sr_ctrl, xbus, ybus, mac,mem,
instr, db_i, inst_i, debug, debug_i,reg_wr_data_o,
logic_out, arith_out, arith_func, func, sfto)
variable this : datapath_reg_t;
variable if_ad : std_logic_vector(31 downto 0);
variable ma_ad, ma_dw : std_logic_vector(31 downto 0);
variable next_state : debug_state_t;
begin
this := this_r;
-- debug handling, from BREAK instruction
this.debug_o.ack := '0';
next_state := this.debug_state;
if this.old_debug = '0' and debug = '1' and
(this.debug_state = RUN or this.debug_state = AWAIT_BREAK)
then
next_state := AWAIT_IF;
-- stop requesting debug mode once we're in debug mode
this.enter_debug := (others => '0');
elsif this.debug_state = RUN and debug_i.en = '1' and debug_i.cmd = BRK
then
-- schedule entering debug mode
-- TODO: we could probably set enter_debug(0) = '1' to
-- immediately enter, but need to be careful that mask_int is
-- set early enough to avoid an interrupt during debugging.
this.enter_debug(this.enter_debug'left) := '1';
next_state := AWAIT_BREAK;
end if;
this.old_debug := debug;
-- check if data bus transaction finished
if this.data_o.en = '1' and db_i.ack = '1'
then
-- FIXME: Drop en, unless keep_cyc='1'
this.m_dr_next := align_read_data(db_i.d, this.data_o, this.data_o_size);
this.m_en := '1';
this.data_o := NULL_DATA_O;
end if;
-- check if instruction bus transaction finished
if this.inst_o.en = '1' and inst_i.ack = '1'
then
this.if_dr_next := inst_i.d;
this.if_en := '1';
this.inst_o := NULL_INST_O;
elsif this.debug_state = READY and debug_i.en = '1' then
-- handle debug command
case debug_i.cmd is
when BRK =>
-- A BREAK cmd when already in the READY state does nothing
this.debug_o.ack := '1';
when INSERT =>
-- use the instruction from the debug register
this.if_dr_next := debug_i.ir;
this.if_en := '1';
this.stop_pc_inc := '1';
-- latch the y-bus override into start of pipeline
this.ybus_override(this.ybus_override'left) :=
( en => debug_i.d_en, d => debug_i.d );
-- await instruction fetch before processing next debug cmd
next_state := AWAIT_IF;
when STEP =>
-- fetch a real instruction to execute next
this.inst_o := to_inst_o(instr, this.pc);
-- leave debug mode but schedule an enter_debug to
-- get back into debug mode
this.enter_debug(this.enter_debug'left) := '1';
next_state := AWAIT_BREAK;
when CONTINUE =>
-- fetch a real instruction to execute next
this.inst_o := to_inst_o(instr, this.pc);
this.enter_debug(this.enter_debug'left) := '0';
next_state := RUN;
end case;
end if;
if this.stop_pc_inc = '1' then
this.pc_inc := this.pc;
end if;
if this.slot = '1' then
-- Shift enter_debug pipeline along. The left-most bit is duplicated
-- The right-most bit becomes the enter_debug output.
this.enter_debug := this.enter_debug(this.enter_debug'left) &
this.enter_debug(this.enter_debug'left downto 1);
end if;
if this.data_o.en = '0' and this.inst_o.en = '0'
and this.debug_state /= READY
then
-- present data read by completed transactions
if this.m_en = '1' then
this.m_dr := this.m_dr_next;
this.m_en := '0';
end if;
if this.if_en = '1' then
this.if_dr := this.if_dr_next;
this.illegal_delay_slot := check_illegal_delay_slot(this.if_dr);
this.illegal_instr := check_illegal_instruction(this.if_dr);
this.if_en := '0';
end if;
this.slot := '1';
else
-- Slot is output as a combinatorial signal. Other blocks use it to
-- determine if a rising clock edge is the start of a new CPU slot
-- or whether the current slot is stretched into the next cycle.
this.slot := '0';
end if;
if this.slot = '1' then
-- start new memory transactions
if mem.issue = '1' and this.data_o.en = '0' then
-- start new data request
case mem.addr_sel is
when SEL_XBUS => ma_ad := xbus;
when SEL_YBUS => ma_ad := ybus;
when SEL_ZBUS => ma_ad := zbus;
end case;
case mem.wdata_sel is
when SEL_YBUS => ma_dw := ybus;
when SEL_ZBUS => ma_dw := zbus;
end case;
this.data_o_size := mem.size;
this.data_o := to_data_o(mem, ma_ad, ma_dw);
end if;
if instr.issue = '1' then
if this.debug_state = RUN or this.debug_state = AWAIT_BREAK then
if this.inst_o.en = '0' then
-- start new instruction request
if instr.addr_sel = '0'
then if_ad := this.pc_inc;
else if_ad := zbus;
end if;
this.inst_o := to_inst_o(instr, if_ad, instr.addr_sel);
end if;
elsif this.debug_state = AWAIT_IF or next_state = AWAIT_IF then
-- In debug mode. instruction fetch issue is our signal to
-- pause the CPU. Later we either allow the instruction
-- fetch from memory to proceed or insert an instruction.
-- Also check for next_state=AWAIT_IF to skip AWAIT_IF
-- state when decoder is already requesting an instruction.
next_state := READY;
-- Move y-bus override through its pipeline to use in EX
-- stage. Currently the pipeline is short so the INSERT
-- value used in an instruction comes in the subsequent
-- INSERT command. May increase pipeline size.
for i in 1 to this.ybus_override'left loop
this.ybus_override(i-1) := this.ybus_override(i);
end loop;
this.ybus_override(this.ybus_override'left) := BUS_VAL_RESET;
end if;
end if;
-- update PC
if pc_ctrl.wr_z = '1' then this.pc := zbus;
elsif pc_ctrl.inc = '1' then this.pc := this.pc_inc; end if;
-- update SR
case sr_ctrl.sel is
when SEL_PREV => -- leave sr unchanged
when SEL_WBUS => this.sr := to_sr(wbus);
when SEL_ZBUS => this.sr := to_sr(zbus);
when SEL_DIV0U =>
this.sr.m := '0';
this.sr.q := '0';
this.sr.t := '0';
when SEL_ARITH =>
this.sr := arith_update_sr(this.sr,
-- for DIV1 aluinx was shifted left one and the MSB lost,
-- so use xbus and ybus instead
-- aluinx(aluinx'left), aluiny(aluiny'left),
xbus(xbus'left), ybus(ybus'left), arith_out(31 downto 0),
arith_out(arith_out'left), arith_func, func.arith.sr);
when SEL_LOGIC =>
this.sr := logic_update_sr(this.sr, logic_out, func.logic_sr);
when SEL_INT_MASK =>
this.sr.int_mask := sr_ctrl.ilevel;
when SEL_SET_T =>
-- leave most of sr unchanged, but set the T bit
case sr_ctrl.t is
when SEL_CLEAR => this.sr.t := '0';
when SEL_SET => this.sr.t := '1';
when SEL_SHIFT => this.sr.t := sfto;
when SEL_CARRY => this.sr.t := arith_out(arith_out'left);
end case;
end case;
if mac.s_latch = '1' then this.mac_s := this.sr.s; end if;
this.data_o_lock := mem.lock;
end if;
this.pc_inc := std_logic_vector(unsigned(this.pc)+2);
-- all debug commands are ACKed when RUN or READY state reached.
if (next_state = RUN or next_state = READY) then
if this.debug_o.ack = '0' and debug_i.en = '1' then
if debug_i.cmd = INSERT then
-- latch the value being written to the register file for
-- the debug output.
this.debug_o.d := reg_wr_data_o;
else
-- latch the PC value to simplify debugging and profiling.
-- Without this multiple inserts, including a JSR and RTS
-- are needed to get the PC.
this.debug_o.d := this.pc;
end if;
end if;
this.debug_o.ack := debug_i.en;
this.stop_pc_inc := '0';
end if;
this.debug_state := next_state;
if this.debug_state = READY then
this.debug_o.rdy := '1';
else
this.debug_o.rdy := '0';
end if;
this_c <= this;
end process;
datapath_r0 : process(clk, rst)
begin
if rst='1' then
this_r <= DATAPATH_RESET;
elsif clk='1' and clk'event then
this_r <= this_c;
end if;
end process;
pc <= this_r.pc;
sr <= this_r.sr;
mac_s <= this_r.mac_s;
db_lock <= this_r.data_o_lock;
db_o <= this_r.data_o;
inst_o <= this_r.inst_o;
if_dr <= this_r.if_dr;
illegal_delay_slot <= this_r.illegal_delay_slot;
illegal_instr <= this_r.illegal_instr;
wbus <= this_r.m_dr;
slot_o <= this_c.slot;
-- Need to output T combinatorially so that decoder can make
-- conditional branch decisions
t_bcc <= this_c.sr.t;
enter_debug <= this_r.enter_debug(0);
mask_int <= '0'
when this_r.debug_state = RUN
and this_r.enter_debug = (this_r.enter_debug'range => '0')
else '1';
debug_o <= this_c.debug_o;
ybus_override <= this_r.ybus_override(0);
if_stall <= '0';
slot <= slot_o;
end architecture stru;