mirror of
https://github.com/Gehstock/Mist_FPGA.git
synced 2026-04-27 04:46:51 +00:00
1823 lines
71 KiB
VHDL
1823 lines
71 KiB
VHDL
----------------------------------------------------------------------------------
|
|
-- Company:
|
|
-- Engineer: Erik Piehl
|
|
--
|
|
-- Create Date: 09:53:30 04/02/2017
|
|
-- Design Name: TMS9900 CPU Core
|
|
-- Module Name: tms9900 - Behavioral
|
|
-- Project Name:
|
|
-- Target Devices: XC6SLX9
|
|
-- Tool versions: ISE 14.7
|
|
-- Description: Toplevel of the CPU core implementation
|
|
--
|
|
-- Dependencies:
|
|
--
|
|
-- Revision:
|
|
-- Revision 0.01 - File Created
|
|
-- Additional Comments:
|
|
--
|
|
-- Added CPU enable signal, so can clock at different frequency to CPU clock. Mike Coates
|
|
----------------------------------------------------------------------------------
|
|
library IEEE;
|
|
use IEEE.STD_LOGIC_1164.ALL;
|
|
-- Uncomment the following library declaration if using
|
|
-- arithmetic functions with Signed or Unsigned values
|
|
use IEEE.NUMERIC_STD.ALL;
|
|
-- Uncomment the following library declaration if instantiating
|
|
-- any Xilinx primitives in this code.
|
|
--library UNISIM;
|
|
--use UNISIM.VComponents.all;
|
|
|
|
-- simulation begin
|
|
--USE STD.TEXTIO.ALL;
|
|
--USE IEEE.STD_LOGIC_TEXTIO.ALL;
|
|
-- simulation end
|
|
|
|
|
|
entity tms9900 is
|
|
generic (
|
|
cycle_clks_g : integer := 0
|
|
);
|
|
Port (
|
|
clk : in STD_LOGIC; -- input clock
|
|
enable : in STD_LOGIC; -- CPU Enable
|
|
reset : in STD_LOGIC; -- reset, active high
|
|
addr_out : out STD_LOGIC_VECTOR (15 downto 0);
|
|
data_in : in STD_LOGIC_VECTOR (15 downto 0);
|
|
data_out : out STD_LOGIC_VECTOR (15 downto 0);
|
|
rd : out STD_LOGIC; -- workin read with Pepino 40ns
|
|
wr : out STD_LOGIC; -- working write with Pepino 60ns
|
|
ready : in STD_LOGIC := '1'; -- Currently connected to speech ready;
|
|
iaq : out STD_LOGIC;
|
|
as : out STD_LOGIC; -- address strobe, when high new address is valid, starts a memory cycle
|
|
-- test_out : out STD_LOGIC_VECTOR (15 downto 0);
|
|
-- alu_debug_out : out STD_LOGIC_VECTOR (15 downto 0); -- ALU debug bus
|
|
-- alu_debug_oper : out STD_LOGIC_VECTOR(3 downto 0);
|
|
alu_debug_arg1 : out STD_LOGIC_VECTOR (15 downto 0);
|
|
alu_debug_arg2 : out STD_LOGIC_VECTOR (15 downto 0);
|
|
cpu_debug_out : out STD_LOGIC_VECTOR (95 downto 0);
|
|
mult_debug_out : out STD_LOGIC_VECTOR (35 downto 0);
|
|
int_req : in STD_LOGIC; -- interrupt request, active high
|
|
ic03 : in STD_LOGIC_VECTOR(3 downto 0); -- interrupt priority for the request, 0001 is the highest (0000 is reset)
|
|
int_ack : out STD_LOGIC; -- does not exist on the TMS9900, when high CPU vectors to interrupt
|
|
cruin : in STD_LOGIC;
|
|
cruout : out STD_LOGIC;
|
|
cruclk : out STD_LOGIC;
|
|
hold : in STD_LOGIC; -- DMA request, active high
|
|
holda : out STD_LOGIC; -- DMA ack, active high
|
|
waits : in STD_LOGIC_VECTOR(7 downto 0); -- number of wait states per memory cycles
|
|
scratch_en : in STD_LOGIC; -- when 1 in-core scratchpad RAM is enabled
|
|
stuck : out STD_LOGIC; -- when high the CPU is stuck
|
|
turbo : in STD_LOGIC
|
|
);
|
|
end tms9900;
|
|
|
|
architecture Behavioral of tms9900 is
|
|
signal addr : std_logic_vector(15 downto 0); -- address bus
|
|
|
|
-- CPU architecture registers
|
|
signal pc : std_logic_vector(15 downto 0);
|
|
signal w : std_logic_vector(15 downto 0);
|
|
signal st : std_logic_vector(15 downto 0);
|
|
|
|
signal ea : std_logic_vector(15 downto 0); -- effective address
|
|
signal ir : std_logic_vector(15 downto 0); -- instruction register
|
|
signal rd_dat : std_logic_vector(15 downto 0); -- data read from memory
|
|
signal wr_dat : std_logic_vector(15 downto 0); -- data written to memory
|
|
signal reg_t : std_logic_vector(15 downto 0); -- temporary register
|
|
signal reg_t2 : std_logic_vector(15 downto 0); -- storage of source operand
|
|
signal reg_stcr : std_logic_vector(15 downto 0); -- specific storage for STCR instruction - BUGBUG
|
|
signal read_byte_aligner : std_logic_vector(15 downto 0); -- align bytes to words for reads
|
|
|
|
-- debug stuff begin
|
|
signal pc_ir : std_logic_vector(15 downto 0); -- capture address when IR is loaded - debug BUGBUG
|
|
signal first_ir : std_logic_vector(15 downto 0);
|
|
signal capture_ir : boolean := false;
|
|
signal alu_debug_src_arg : std_logic_vector(15 downto 0);
|
|
signal alu_debug_dst_arg : std_logic_vector(15 downto 0);
|
|
-- debug stuff end
|
|
|
|
type cpu_state_type is (
|
|
do_pc_read,
|
|
do_alu_read,
|
|
do_fetch, do_decode,
|
|
do_branch,
|
|
do_stuck,
|
|
do_read,
|
|
do_read0, do_read1, do_read2, do_read3,
|
|
do_read_pad, do_read_pad1,
|
|
do_write,
|
|
do_write0, do_write1, do_write2, do_write3,
|
|
do_ir_imm, do_lwpi_limi,
|
|
do_load_imm, do_load_imm2, do_load_imm3, do_load_imm4, do_load_imm5,
|
|
do_read_operand0, do_read_operand1, do_read_operand2, do_read_operand3, do_read_operand4, do_read_operand5,
|
|
do_write_operand0, do_write_operand1, do_write_operand2, do_write_operand3, do_write_operand4,
|
|
do_alu_write,
|
|
do_dual_op, do_dual_op1, do_dual_op2, do_dual_op3,
|
|
do_source_address0, do_source_address1, do_source_address2, do_source_address3, do_source_address4, do_source_address5, do_source_address6,
|
|
do_branch_b_bl, do_single_op_read, do_single_op_writeback,
|
|
do_rtwp0, do_rtwp1, do_rtwp2, do_rtwp3,
|
|
do_shifts0, do_shifts1, do_shifts2, do_shifts3, do_shifts4,
|
|
do_blwp00, do_blwp0, do_blwp_xop, do_blwp1, do_blwp2, do_blwp3,
|
|
do_single_bit_cru0, do_single_bit_cru1, do_single_bit_cru2,
|
|
do_ext_instructions, do_store_instructions,
|
|
do_coc_czc_etc0, do_coc_czc_etc1, do_coc_czc_etc2, do_coc_czc_etc3,
|
|
do_xop,
|
|
do_ldcr0, do_ldcr00, do_ldcr1, do_ldcr2, do_ldcr3, do_ldcr4, do_ldcr5,
|
|
do_stcr0, do_stcr6, do_stcr7,
|
|
do_stcr_delay0, do_stcr_delay1,
|
|
do_idle_wait, do_mul_store0, do_mul_store1, do_mul_store2,
|
|
do_div0, do_div1, do_div2, do_div3, do_div4, do_div5
|
|
);
|
|
signal cpu_state : cpu_state_type;
|
|
signal cpu_state_next : cpu_state_type;
|
|
signal cpu_state_operand_return : cpu_state_type;
|
|
|
|
signal arg1 : std_logic_vector(15 downto 0);
|
|
signal arg2 : std_logic_vector(15 downto 0);
|
|
signal alu_out : std_logic_vector(16 downto 0);
|
|
signal alu_result : std_logic_vector(15 downto 0);
|
|
signal shift_count : std_logic_vector(4 downto 0);
|
|
signal delay_count : std_logic_vector(7 downto 0);
|
|
signal delay_ir_count : std_logic_vector(15 downto 0);
|
|
signal delay_ir_wait : std_logic_vector(15 downto 0);
|
|
|
|
type alu_operation_type is (
|
|
alu_load1, alu_load2, alu_add, alu_or, alu_and, alu_sub, alu_compare,
|
|
alu_and_not, alu_xor,
|
|
alu_coc, alu_czc,
|
|
alu_swpb2, alu_abs,
|
|
alu_sla, alu_sra, alu_src, alu_srl
|
|
);
|
|
signal ope : alu_operation_type;
|
|
signal alu_flag_zero : std_logic;
|
|
signal alu_flag_overflow : std_logic;
|
|
signal alu_logical_gt : std_logic;
|
|
signal alu_arithmetic_gt : std_logic;
|
|
signal alu_flag_carry : std_logic;
|
|
signal alu_flag_parity : std_logic;
|
|
signal alu_flag_parity_source : std_logic;
|
|
|
|
signal i_am_xop : boolean := False;
|
|
signal set_int_priority : boolean := False;
|
|
|
|
-- operand_mode controls fetching of operands, i.e. addressing modes
|
|
-- operand_mode(5:4) is the mode R, *R, @ADDR, @ADDR(R), *R+
|
|
-- operand_mode(3:0) is the register number
|
|
signal operand_mode : std_logic_vector(5 downto 0);
|
|
signal operand_word : boolean; -- if false, we have a byte (matters for autoinc)
|
|
|
|
constant cru_delay_clocks : std_logic_vector(7 downto 0) := x"05";
|
|
|
|
signal debug_wr_data, debug_wr_addr : std_logic_vector(15 downto 0);
|
|
|
|
component multiplier IS
|
|
PORT (
|
|
clk : IN STD_LOGIC;
|
|
a : IN STD_LOGIC_VECTOR(17 DOWNTO 0);
|
|
b : IN STD_LOGIC_VECTOR(17 DOWNTO 0);
|
|
p : OUT STD_LOGIC_VECTOR(35 DOWNTO 0)
|
|
);
|
|
END component;
|
|
|
|
signal mult_a : std_logic_vector(17 downto 0);
|
|
signal mult_b : std_logic_vector(17 downto 0);
|
|
signal mult_product : std_logic_vector(35 downto 0);
|
|
signal dividend : std_logic_vector(31 downto 0); -- for the divide instruction
|
|
signal divider_sub : std_logic_vector(16 downto 0);
|
|
|
|
|
|
component scratchpad is
|
|
Port ( addr : in STD_LOGIC_VECTOR (7 downto 1);
|
|
din : in STD_LOGIC_VECTOR (15 downto 0);
|
|
dout : out STD_LOGIC_VECTOR (15 downto 0);
|
|
clk : in STD_LOGIC;
|
|
wr : in STD_LOGIC);
|
|
end component;
|
|
|
|
signal scratchpad_wr : std_logic;
|
|
-- signal scratchpad_en : std_logic;
|
|
signal scratchpad_out : STD_LOGIC_VECTOR (15 downto 0);
|
|
|
|
procedure do_pc_read_quick(
|
|
signal pc : inout std_logic_vector(15 downto 0);
|
|
signal addr : out std_logic_vector(15 downto 0);
|
|
signal cpu_state : out cpu_state_type;
|
|
signal as : out std_logic;
|
|
signal rd : out std_logic;
|
|
signal scratchpad_wr : out std_logic
|
|
-- signal scratchpad_en : out std_logic
|
|
) is
|
|
begin
|
|
-- pc is only top 15 bits
|
|
addr <= pc(15 downto 1) & "0";
|
|
pc <= std_logic_vector(unsigned(pc(15 downto 1) & "0") + to_unsigned(2,16));
|
|
if pc(15 downto 8) = x"83" and scratch_en='1' then
|
|
-- scratchpad support begin
|
|
scratchpad_wr <= '0';
|
|
-- scratchpad_en <= '1';
|
|
cpu_state <= do_read_pad;
|
|
else
|
|
as <= '1';
|
|
rd <= '1';
|
|
cpu_state <= do_read0;
|
|
end if;
|
|
end do_pc_read_quick;
|
|
|
|
begin
|
|
|
|
addr_out <= addr;
|
|
|
|
my_mult : multiplier port map (
|
|
clk => clk,
|
|
a => mult_a,
|
|
b => mult_b,
|
|
p => mult_product);
|
|
mult_debug_out <= mult_product;
|
|
|
|
|
|
my_scratchpad: scratchpad port map (
|
|
addr => addr(7 downto 1),
|
|
din => wr_dat,
|
|
dout => scratchpad_out,
|
|
clk => clk,
|
|
wr => scratchpad_wr);
|
|
|
|
cpu_debug_out <= debug_wr_data & debug_wr_addr & st & pc & pc_ir & ir;
|
|
|
|
process(arg1, arg2, ope)
|
|
variable t : std_logic_vector(15 downto 0);
|
|
begin
|
|
-- arg1 is DA, arg2 is SA when ALU used for instruction execute
|
|
case ope is
|
|
when alu_load1 =>
|
|
alu_out <= '0' & arg1;
|
|
when alu_load2 =>
|
|
alu_out <= '0' & arg2;
|
|
-- alu_debug_oper <= x"1";
|
|
when alu_add =>
|
|
alu_out <= std_logic_vector(unsigned('0' & arg1) + unsigned('0' & arg2));
|
|
-- alu_debug_oper <= x"2";
|
|
when alu_or =>
|
|
alu_out <= '0' & arg1 or '0' & arg2;
|
|
-- alu_debug_oper <= x"3";
|
|
when alu_and =>
|
|
alu_out <= '0' & arg1 and '0' & arg2;
|
|
-- alu_debug_oper <= x"4";
|
|
when alu_sub =>
|
|
-- t := std_logic_vector(unsigned(arg1) - unsigned(arg2));
|
|
-- alu_out <= t(15) & t; -- BUGBUG I wonder if this is right for carry generation?
|
|
alu_out <= std_logic_vector(unsigned('0' & arg1) - unsigned('0' & arg2));
|
|
-- alu_debug_oper <= x"5";
|
|
when alu_compare =>
|
|
-- this is just the same code as for subtract
|
|
alu_out <= std_logic_vector(unsigned('0' & arg1) - unsigned('0' & arg2));
|
|
when alu_and_not =>
|
|
alu_out <= '0' & arg1 and not ('0' & arg2);
|
|
-- alu_debug_oper <= x"6";
|
|
when alu_xor =>
|
|
alu_out <= '0' & arg1 xor '0' & arg2;
|
|
-- alu_debug_oper <= x"7";
|
|
when alu_coc => -- compare ones corresponding
|
|
alu_out <= ('0' & arg1 xor ('0' & arg2)) and ('0' & arg1);
|
|
-- alu_debug_oper <= x"7"; -- BUGBUG show still debug code 7 as in xor
|
|
when alu_czc => -- compare zeros corresponding
|
|
alu_out <= ('0' & arg1 xor not ('0' & arg2)) and ('0' & arg1);
|
|
-- alu_debug_oper <= x"7"; -- BUGBUG show still debug code 7 as in xor
|
|
when alu_swpb2 =>
|
|
alu_out <= '0' & arg2(7 downto 0) & arg2(15 downto 8); -- swap bytes of arg2
|
|
-- alu_debug_oper <= x"8";
|
|
when alu_abs => -- compute abs value of arg2
|
|
-- alu_debug_oper <= x"9";
|
|
if arg2(15) = '0' then
|
|
alu_out <= '0' & arg2;
|
|
else
|
|
-- same as alu sub (arg1 must be zero; this is set elsewhere)
|
|
alu_out <= std_logic_vector(unsigned(arg1(15) & arg1) - unsigned(arg2(15) & arg2));
|
|
end if;
|
|
when alu_sla =>
|
|
-- alu_debug_oper <= x"A";
|
|
alu_out <= arg2 & '0';
|
|
when alu_sra =>
|
|
-- alu_debug_oper <= x"B";
|
|
alu_out <= arg2(0) & arg2(15) & arg2(15 downto 1);
|
|
when alu_src =>
|
|
-- alu_debug_oper <= x"C";
|
|
alu_out <= arg2(0) & arg2(0) & arg2(15 downto 1);
|
|
when alu_srl =>
|
|
-- alu_debug_oper <= x"D";
|
|
alu_out <= arg2(0) & '0' & arg2(15 downto 1);
|
|
end case;
|
|
end process;
|
|
alu_result <= alu_out(15 downto 0);
|
|
-- alu_debug_out <= alu_out(15 downto 0);
|
|
-- alu_debug_arg1 <= arg1;
|
|
-- alu_debug_arg2 <= arg2;
|
|
alu_debug_arg1 <= alu_debug_dst_arg;
|
|
alu_debug_arg2 <= alu_debug_src_arg;
|
|
|
|
-- ST0 ST1 ST2 ST3 ST4 ST5
|
|
-- L> A> = C O P
|
|
-- ST0 - when looking at data sheet arg1 is (DA) and arg2 is (SA), sub is (DA)-(SA).
|
|
alu_logical_gt <= '1' when ope = alu_compare and ((arg2(15)='1' and arg1(15)='0') or (arg1(15)=arg2(15) and alu_result(15)= '1')) else
|
|
'1' when ope /= alu_compare and alu_result /= x"0000" else
|
|
'0';
|
|
-- ST1
|
|
alu_arithmetic_gt <= '1' when ope = alu_compare and ((arg2(15)='0' and arg1(15)='1') or (arg1(15)=arg2(15) and alu_result(15)= '1')) else
|
|
'1' when ope = alu_abs and arg2(15)='0' and arg2 /= x"0000" else
|
|
'1' when ope /= alu_compare and ope /= alu_abs and alu_result(15)='0' and alu_result /= x"0000" else
|
|
'0';
|
|
-- ST2
|
|
alu_flag_zero <= '1' when alu_result = x"0000" else '0';
|
|
-- ST3 carry
|
|
alu_flag_carry <= alu_out(16) when ope /= alu_sub else not alu_out(16); -- for sub carry out is inverted
|
|
-- ST4 overflow
|
|
alu_flag_overflow <=
|
|
'1' when (ope = alu_compare or ope = alu_sub or ope = alu_abs) and arg1(15) /= arg2(15) and alu_result(15) /= arg1(15) else
|
|
'1' when (ope /= alu_sla and not (ope = alu_compare or ope = alu_sub or ope = alu_abs)) and arg1(15) = arg2(15) and alu_result(15) /= arg1(15) else
|
|
'1' when ope = alu_sla and alu_result(15) /= arg2(15) else -- sla condition: if MSB changes during shift
|
|
'0';
|
|
-- ST5 parity
|
|
alu_flag_parity <= alu_result(15) xor alu_result(14) xor alu_result(13) xor alu_result(12) xor
|
|
alu_result(11) xor alu_result(10) xor alu_result(9) xor alu_result(8);
|
|
-- source parity used with CB and MOVB instructions
|
|
alu_flag_parity_source <= arg2(15) xor arg2(14) xor arg2(13) xor arg2(12) xor
|
|
arg2(11) xor arg2(10) xor arg2(9) xor arg2(8);
|
|
|
|
-- Byte aligner
|
|
process(ea, rd_dat, operand_mode, operand_word)
|
|
begin
|
|
-- We have a byte operation. If the data came from register,
|
|
-- we don't need to do anything. If it came from memory,
|
|
-- we will zero extend and possibly shift.
|
|
if operand_word then
|
|
read_byte_aligner <= rd_dat;
|
|
else
|
|
-- Not register operand. Need to check that EA is still valid.
|
|
if ea(0) = '0' then
|
|
read_byte_aligner <= rd_dat(15 downto 8) & x"00";
|
|
else
|
|
read_byte_aligner <= rd_dat(7 downto 0) & x"00";
|
|
end if;
|
|
end if;
|
|
end process;
|
|
|
|
process(clk, reset, hold) is
|
|
variable offset : std_logic_vector(15 downto 0);
|
|
variable take_branch : boolean;
|
|
variable dec_shift_count : boolean := False;
|
|
variable inc_ir_count : boolean := True;
|
|
-- simulation begin
|
|
-- variable my_line : line; -- from textio
|
|
-- simulation end
|
|
begin
|
|
if reset = '1' then
|
|
st <= (others => '0');
|
|
pc <= (others => '0');
|
|
stuck <= '0';
|
|
rd <= '0';
|
|
wr <= '0';
|
|
cruclk <= '0';
|
|
-- Prepare for BLWP from 0
|
|
i_am_xop <= False;
|
|
arg2 <= x"0000"; -- pass pointer to WP via ALU as our EA
|
|
ope <= alu_load2;
|
|
cpu_state <= do_blwp00; -- do blwp from zero
|
|
delay_count <= "00000000";
|
|
holda <= hold; -- during reset hold is respected
|
|
capture_ir <= True;
|
|
set_int_priority <= False;
|
|
int_ack <= '0';
|
|
-- scratchpad_en <= '0';
|
|
scratchpad_wr <= '0';
|
|
delay_ir_count <= x"0000";
|
|
delay_ir_wait <= x"0000";
|
|
else
|
|
if rising_edge(clk) then
|
|
if enable = '1' then -- CPU Enable signal
|
|
|
|
dec_shift_count := False;
|
|
inc_ir_count := True;
|
|
|
|
-- CPU state changes
|
|
case cpu_state is
|
|
------------------------
|
|
-- memory opperations --
|
|
------------------------
|
|
when do_pc_read =>
|
|
-- pc is only top 15 bits
|
|
addr <= pc(15 downto 1) & "0";
|
|
pc <= std_logic_vector(unsigned(pc(15 downto 1) & "0") + to_unsigned(2,16));
|
|
if pc(15 downto 8) = x"83" and scratch_en='1' then
|
|
-- scratchpad support begin
|
|
scratchpad_wr <= '0';
|
|
-- scratchpad_en <= '1';
|
|
cpu_state <= do_read_pad;
|
|
else
|
|
as <= '1';
|
|
rd <= '1';
|
|
cpu_state <= do_read0;
|
|
end if;
|
|
when do_read => -- start memory read cycle
|
|
addr <= ea;
|
|
if ea(15 downto 8) = x"83" and scratch_en='1' then
|
|
-- scratchpad support begin
|
|
scratchpad_wr <= '0';
|
|
-- scratchpad_en <= '1';
|
|
cpu_state <= do_read_pad;
|
|
else
|
|
as <= '1';
|
|
rd <= '1';
|
|
cpu_state <= do_read0;
|
|
end if;
|
|
when do_alu_read =>
|
|
addr <= alu_result;
|
|
if alu_result(15 downto 8) = x"83" and scratch_en='1' then
|
|
scratchpad_wr <= '0';
|
|
-- scratchpad_en <= '1';
|
|
cpu_state <= do_read_pad;
|
|
else
|
|
as <= '1';
|
|
rd <= '1';
|
|
cpu_state <= do_read0;
|
|
end if;
|
|
when do_read0 =>
|
|
cpu_state <= do_read1;
|
|
as <= '0';
|
|
delay_count <= waits; -- used to be zero (i.e. not assigned)
|
|
when do_read1 =>
|
|
if delay_count = "00000000" then
|
|
cpu_state <= do_read2;
|
|
end if;
|
|
when do_read2 => cpu_state <= do_read3;
|
|
when do_read3 =>
|
|
if ready='1' then
|
|
if (addr(15 downto 10) /= "100000") and -- "100000" = 8000-83FF
|
|
(addr(15 downto 13) /= "000") then -- "000" = 0000-1FFF
|
|
delay_ir_wait <= std_logic_vector(unsigned(delay_ir_wait) + to_unsigned(4*cycle_clks_g, 16));
|
|
end if;
|
|
cpu_state <= cpu_state_next;
|
|
rd <= '0';
|
|
rd_dat <= data_in;
|
|
end if;
|
|
when do_read_pad =>
|
|
cpu_state <= do_read_pad1;
|
|
when do_read_pad1 =>
|
|
-- read from scratchpad
|
|
-- scratchpad_en <= '0';
|
|
cpu_state <= cpu_state_next; -- do_read4; -- cpu_state_next;
|
|
data_out <= scratchpad_out; -- for debugging show what was read
|
|
rd_dat <= scratchpad_out;
|
|
|
|
-- write cycles --
|
|
when do_write =>
|
|
addr <= ea;
|
|
data_out <= wr_dat;
|
|
if ea(15 downto 8) = x"83" and scratch_en='1' then
|
|
scratchpad_wr <= '1';
|
|
-- scratchpad_en <= '1';
|
|
cpu_state <= do_write3;
|
|
else
|
|
as <= '1';
|
|
wr <= '1';
|
|
cpu_state <= do_write0;
|
|
end if;
|
|
when do_alu_write =>
|
|
-- scratchpad support begin
|
|
addr <= alu_result;
|
|
data_out <= wr_dat;
|
|
if alu_result(15 downto 8) = x"83" and scratch_en='1' then
|
|
scratchpad_wr <= '1';
|
|
-- scratchpad_en <= '1';
|
|
cpu_state <= do_write3;
|
|
else
|
|
-- external memory
|
|
as <= '1';
|
|
wr <= '1';
|
|
cpu_state <= do_write0;
|
|
end if;
|
|
when do_write0 =>
|
|
cpu_state <= do_write1;
|
|
as <= '0';
|
|
if waits(7 downto 1) = "0000000" then
|
|
delay_count <= "00000010"; -- minimum value
|
|
else
|
|
delay_count <= waits;
|
|
end if;
|
|
debug_wr_data <= wr_dat;
|
|
debug_wr_addr <= addr;
|
|
when do_write1 =>
|
|
if delay_count = "00000000" then
|
|
cpu_state <= do_write2;
|
|
end if;
|
|
when do_write2 => cpu_state <= do_write3;
|
|
when do_write3 =>
|
|
if ready='1' then
|
|
if (addr(15 downto 10) /= "100000") and -- "100000" = 8000-83FF
|
|
(addr(15 downto 13) /= "000") then -- "000" = 0000-1FFF
|
|
delay_ir_wait <= std_logic_vector(unsigned(delay_ir_wait) + to_unsigned(4*cycle_clks_g, 16));
|
|
end if;
|
|
scratchpad_wr <= '0';
|
|
-- scratchpad_en <= '0';
|
|
cpu_state <= cpu_state_next; -- do_write4; -- cpu_state_next;
|
|
wr <= '0';
|
|
else
|
|
inc_ir_count := False;
|
|
end if;
|
|
----------------
|
|
-- operations --
|
|
----------------
|
|
when do_fetch => -- instruction opcode fetch
|
|
if hold='1' then
|
|
holda <= '1'; -- honor DMA requests here - stay in do_fetch state
|
|
elsif (delay_ir_wait <= delay_ir_count) or (turbo = '1') then -- wait for cycle counter
|
|
inc_ir_count := False;
|
|
delay_ir_count <= x"0000";
|
|
delay_ir_wait <= x"0000";
|
|
holda <= '0';
|
|
i_am_xop <= False;
|
|
-- check interrupt requests
|
|
if int_req = '1' and unsigned(ic03) <= unsigned(st(3 downto 0)) then
|
|
delay_ir_wait <= std_logic_vector(to_unsigned(26*cycle_clks_g, 16));
|
|
-- pass pointer to WP via ALU as our EA
|
|
set_int_priority <= True;
|
|
arg2 <= x"00" & "00" & ic03 & "00"; -- vector through interrupt priority
|
|
ope <= alu_load2;
|
|
cpu_state <= do_blwp00; -- do blwp from interrupt vector
|
|
int_ack <= '1';
|
|
else
|
|
iaq <= '1';
|
|
-- let's run faster in here and save one clock cycle by setting things up already here.
|
|
-- instead of going to do_pc_read let's inline that stuff here.
|
|
-- LEGACY code:
|
|
-- -- cpu_state <= do_pc_read;
|
|
-- do_pc_read_quick(pc=>pc, addr=>addr, cpu_state=>cpu_state, as=>as, rd=>rd, scratchpad_wr=>scratchpad_wr, scratchpad_en=>scratchpad_en);
|
|
do_pc_read_quick(pc=>pc, addr=>addr, cpu_state=>cpu_state, as=>as, rd=>rd, scratchpad_wr=>scratchpad_wr);
|
|
cpu_state_next <= do_decode;
|
|
-- addr <= pc;
|
|
-- pc <= std_logic_vector(unsigned(pc) + to_unsigned(2,16));
|
|
-- if pc(15 downto 8) = x"83" and scratch_en='1' then
|
|
-- -- scratchpad support begin
|
|
-- scratchpad_wr <= '0';
|
|
-- scratchpad_en <= '1';
|
|
-- cpu_state <= do_read_pad;
|
|
-- else
|
|
-- as <= '1';
|
|
-- rd <= '1';
|
|
-- cpu_state <= do_read0;
|
|
-- end if;
|
|
|
|
end if;
|
|
end if;
|
|
-- test_out <= x"0000";
|
|
-------------------------------------------------------------------------------
|
|
-- do_decode
|
|
-------------------------------------------------------------------------------
|
|
when do_decode =>
|
|
operand_word <= True; -- By default 16-bit operations.
|
|
ir <= rd_dat; -- read done, store to instruction register
|
|
pc_ir <= pc; -- store increment PC for debug purposes
|
|
iaq <= '0';
|
|
-- if capture_ir then
|
|
-- capture_ir <= False;
|
|
-- first_ir <= rd_dat;
|
|
-- end if;
|
|
-- Next analyze what we got
|
|
-- check for dual operand instructions with full addressing modes
|
|
if rd_dat(15 downto 13) = "101" or -- A, AB
|
|
rd_dat(15 downto 13) = "100" or -- C, CB
|
|
rd_dat(15 downto 13) = "011" or -- S, SB
|
|
rd_dat(15 downto 13) = "111" or -- SOC, SOCB
|
|
rd_dat(15 downto 13) = "010" or -- SZC, SZCB
|
|
rd_dat(15 downto 13) = "110" then -- MOV, MOVB
|
|
delay_ir_wait <= std_logic_vector(unsigned(delay_ir_wait) + to_unsigned(14*cycle_clks_g, 16));
|
|
-- found dual operand instruction. Get source operand.
|
|
operand_mode <= rd_dat(5 downto 0); -- ir not set at this point yet
|
|
if rd_dat(12) = '1' then
|
|
operand_word <= False; -- byte operation
|
|
else
|
|
operand_word <= True;
|
|
end if;
|
|
cpu_state <= do_read_operand0;
|
|
cpu_state_operand_return <= do_dual_op;
|
|
elsif rd_dat(15 downto 12) = "0001" and
|
|
rd_dat(11 downto 8) /= x"D" and rd_dat(11 downto 8) /= x"E" and rd_dat(11 downto 8) /= x"F" then
|
|
delay_ir_wait <= std_logic_vector(unsigned(delay_ir_wait) + to_unsigned(8*cycle_clks_g, 16));
|
|
cpu_state <= do_branch;
|
|
elsif rd_dat(15 downto 10) = "000010" then -- SLA, SRA, SRC, SRL
|
|
-- Do all the shifts SLA(10) SRA(00) SRC(11) SRL(01), OPCODE:6 INS:2 C:4 W:4
|
|
delay_ir_wait <= std_logic_vector(unsigned(delay_ir_wait) + to_unsigned(12*cycle_clks_g, 16));
|
|
shift_count <= '0' & rd_dat(7 downto 4);
|
|
arg1 <= w;
|
|
arg2 <= x"00" & "000" & rd_dat(3 downto 0) & '0';
|
|
ope <= alu_add; -- calculate workspace address
|
|
cpu_state <= do_shifts0;
|
|
elsif rd_dat = x"0380" then -- RTWP
|
|
delay_ir_wait <= std_logic_vector(unsigned(delay_ir_wait) + to_unsigned(14*cycle_clks_g, 16));
|
|
arg1 <= w;
|
|
arg2 <= x"00" & "000" & x"D" & '0'; -- calculate of register 13 (WP)
|
|
ope <= alu_add;
|
|
cpu_state <= do_rtwp0;
|
|
elsif
|
|
rd_dat(15 downto 8) = x"1D" or --SBO
|
|
rd_dat(15 downto 8) = x"1E" or -- SBZ
|
|
rd_dat(15 downto 8) = x"1F" then -- TB
|
|
delay_ir_wait <= std_logic_vector(unsigned(delay_ir_wait) + to_unsigned(12*cycle_clks_g, 16));
|
|
-- test_out <= x"8877";
|
|
arg1 <= w;
|
|
arg2 <= x"00" & "000" & x"C" & '0';
|
|
ope <= alu_add;
|
|
cpu_state <= do_alu_read; -- Read WR12
|
|
cpu_state_next <= do_single_bit_cru0;
|
|
elsif rd_dat = x"0340" or rd_dat = x"0360" or rd_dat = x"03C0" or rd_dat = x"03A0" or rd_dat = x"03E0" then
|
|
delay_ir_wait <= std_logic_vector(unsigned(delay_ir_wait) + to_unsigned(12*cycle_clks_g, 16));
|
|
-- external instructions IDLE, RSET, CKOF, CKON, LREX
|
|
cpu_state <= do_ext_instructions;
|
|
elsif rd_dat(15 downto 4) = x"02C" or rd_dat(15 downto 4) = x"02A" then -- STST, STWP
|
|
if rd_dat(15 downto 4) = x"02C" then
|
|
delay_ir_wait <= std_logic_vector(unsigned(delay_ir_wait) + to_unsigned(8*cycle_clks_g, 16));
|
|
else
|
|
delay_ir_wait <= std_logic_vector(unsigned(delay_ir_wait) + to_unsigned(10*cycle_clks_g, 16));
|
|
end if;
|
|
arg1 <= w;
|
|
arg2 <= x"00" & "000" & rd_dat(3 downto 0) & '0';
|
|
ope <= alu_add; -- calculate workspace address
|
|
cpu_state <= do_store_instructions;
|
|
elsif rd_dat(15 downto 13) = "001" and rd_dat(12 downto 10) /= "100" and rd_dat(12 downto 10) /= "101" then
|
|
-- COC, CZC, XOR, MPY, DIV, XOP
|
|
if rd_dat(12 downto 10) = "011" then -- XOP
|
|
delay_ir_wait <= std_logic_vector(unsigned(delay_ir_wait) + to_unsigned(36*cycle_clks_g, 16));
|
|
operand_mode <= rd_dat(5 downto 0);
|
|
cpu_state <= do_source_address0;
|
|
cpu_state_operand_return <= do_xop;
|
|
else
|
|
--delay_ir_wait done elsewhere
|
|
operand_mode <= rd_dat(5 downto 0);
|
|
cpu_state <= do_read_operand0;
|
|
cpu_state_operand_return <= do_coc_czc_etc0;
|
|
end if;
|
|
elsif rd_dat(15 downto 11) = "00110" then -- LDCR, STCR
|
|
--delay_ir_wait done elsewhere
|
|
-- set operand_word to byte mode if count of bits is 1..8
|
|
if rd_dat(9 downto 6) = "1000" or (rd_dat(9) = '0' and rd_dat(8 downto 6) /= "000") then
|
|
operand_word <= False;
|
|
end if;
|
|
operand_mode <= rd_dat(5 downto 0);
|
|
if rd_dat(10) = '0' then
|
|
cpu_state <= do_read_operand0;
|
|
cpu_state_operand_return <= do_ldcr0; -- LDCR
|
|
else
|
|
cpu_state <= do_source_address0;
|
|
cpu_state_operand_return <= do_stcr0; -- STCR
|
|
end if;
|
|
elsif rd_dat(15 downto 4) = x"020" or rd_dat(15 downto 4) = x"022" or -- LI, AI
|
|
rd_dat(15 downto 4) = x"024" or rd_dat(15 downto 4) = x"026" or -- ANDI, ORI
|
|
rd_dat(15 downto 4) = x"028" -- CI
|
|
then -- ANDI, ORI
|
|
if rd_dat(15 downto 4) = x"020" then
|
|
delay_ir_wait <= std_logic_vector(unsigned(delay_ir_wait) + to_unsigned(12*cycle_clks_g, 16));
|
|
else
|
|
delay_ir_wait <= std_logic_vector(unsigned(delay_ir_wait) + to_unsigned(14*cycle_clks_g, 16));
|
|
end if;
|
|
cpu_state <= do_load_imm; -- LI or AI
|
|
elsif rd_dat(15 downto 9) = "0000001" and rd_dat(4 downto 0) = "00000" then
|
|
--delay_ir_wait done elsewhere
|
|
cpu_state <= do_ir_imm;
|
|
elsif rd_dat(15 downto 10) = "000001" then
|
|
--delay_ir_wait done elsewhere
|
|
-- Single operand instructions: BL, B, etc.
|
|
operand_word <= True;
|
|
operand_mode <= rd_dat(5 downto 0);
|
|
cpu_state <= do_source_address0;
|
|
cpu_state_operand_return <= do_branch_b_bl;
|
|
elsif
|
|
rd_dat(15 downto 9) = "0000000" or --illegal (0000-01FF)
|
|
rd_dat(15 downto 5) = "00000011001" or --illegal (0320-033F)
|
|
rd_dat(15 downto 7) = "000001111" or --illegal (0780-07FF)
|
|
rd_dat(15 downto 10) = "000011" then --illegal (0C00-0FFF)
|
|
delay_ir_wait <= std_logic_vector(unsigned(delay_ir_wait) + to_unsigned(6*cycle_clks_g, 16));
|
|
cpu_state <= do_fetch; -- 6 cycles delay then next instruction
|
|
else
|
|
cpu_state <= do_stuck; -- unknown instruction, let's get stuck
|
|
end if;
|
|
when do_branch =>
|
|
-- do branching, we need to sign extend ir(7 downto 0) and add it to PC and continue.
|
|
cpu_state <= do_fetch; -- may be overwritten with do_stuck
|
|
take_branch := False;
|
|
case ir(11 downto 8) is
|
|
when "0000" => take_branch := True; -- JMP
|
|
when "0001" => if ST(14)='0' and ST(13)='0' then take_branch := True; end if; -- JLT
|
|
when "0010" => if ST(15)='0' or ST(13)='1' then take_branch := True; end if; -- JLE
|
|
when "0011" => if ST(13)='1' then take_branch := True; end if; -- JEQ
|
|
when "0100" => if ST(15)='1' or ST(13)='1' then take_branch := True; end if; -- JHE
|
|
when "0101" => if ST(14)='1' then take_branch := True; end if; -- JGT
|
|
when "0110" => if ST(13)='0' then take_branch := True; end if; -- JNE
|
|
when "0111" => if ST(12)='0' then take_branch := True; end if; -- JNC
|
|
when "1000" => if ST(12)='1' then take_branch := True; end if; -- JOC (on carry)
|
|
when "1001" => if ST(11)='0' then take_branch := True; end if; -- JNO (no overflow)
|
|
when "1010" => if ST(15)='0' and ST(13)='0' then take_branch := True; end if; -- JL
|
|
when "1011" => if ST(15)='1' and ST(13)='0' then take_branch := True; end if; -- JH
|
|
when "1100" => if ST(10)='1' then take_branch := True; end if; -- JOP (odd parity)
|
|
when others => cpu_state <= do_stuck;
|
|
end case;
|
|
if take_branch then
|
|
delay_ir_wait <= std_logic_vector(unsigned(delay_ir_wait) + to_unsigned(2*cycle_clks_g, 16));
|
|
offset := ir(7) & ir(7) & ir(7) & ir(7) & ir(7) & ir(7) & ir(7) & ir(7 downto 0) & '0';
|
|
pc <= std_logic_vector(unsigned(offset) + unsigned(pc));
|
|
end if;
|
|
when do_ir_imm =>
|
|
-- test_out <= x"EE00";
|
|
if ir(8 downto 5) = "0111" or ir(8 downto 5) = "1000" then -- 4 LSBs don't care
|
|
cpu_state <= do_pc_read;
|
|
cpu_state_next <= do_lwpi_limi;
|
|
else
|
|
cpu_state <= do_stuck;
|
|
end if;
|
|
when do_lwpi_limi =>
|
|
cpu_state <= do_fetch;
|
|
if ir(8 downto 5) = "0111" then
|
|
delay_ir_wait <= std_logic_vector(unsigned(delay_ir_wait) + to_unsigned(10*cycle_clks_g, 16));
|
|
w <= rd_dat; -- LWPI
|
|
else
|
|
delay_ir_wait <= std_logic_vector(unsigned(delay_ir_wait) + to_unsigned(16*cycle_clks_g, 16));
|
|
st(3 downto 0) <= rd_dat(3 downto 0); -- LIMI
|
|
end if;
|
|
|
|
when do_load_imm => -- LI, AI, ANDI, ORI, CI instruction here
|
|
-- test_out <= x"0001";
|
|
cpu_state <= do_pc_read; -- read immediate value from instruction stream
|
|
cpu_state_next <= do_load_imm2;
|
|
when do_load_imm2 =>
|
|
-- test_out <= x"0002";
|
|
reg_t <= rd_dat; -- store the immediate to temp
|
|
arg1 <= w;
|
|
arg2 <= x"00" & "000" & ir(3 downto 0) & '0';
|
|
ope <= alu_add; -- calculate workspace address
|
|
cpu_state <= do_load_imm3;
|
|
when do_load_imm3 => -- read from workspace register
|
|
-- -- test_out <= x"0003";
|
|
ea <= alu_result;
|
|
cpu_state <= do_read;
|
|
cpu_state_next <= do_load_imm4;
|
|
when do_load_imm4 => -- do actual operation
|
|
-- test_out <= x"0004";
|
|
-- The order below is abit funny, but that's due to CI instruction (sub).
|
|
-- CI RX,IMM is defined as IMM-RX, and not RX-IMM
|
|
arg1 <= reg_t; -- temporary holds the immediate parameter
|
|
arg2 <= rd_dat; -- contents of workspace register
|
|
case ir(7 downto 4) is
|
|
when x"0" => ope <= alu_load1; -- LI
|
|
when x"2" => ope <= alu_add; -- AI
|
|
when x"4" => ope <= alu_and; -- ANDI
|
|
when x"6" => ope <= alu_or; -- ORI
|
|
when x"8" => ope <= alu_compare; -- CI
|
|
when others => cpu_state <= do_stuck;
|
|
end case;
|
|
cpu_state <= do_load_imm5;
|
|
when do_load_imm5 => -- write to workspace the result of ALU, ea still points to register
|
|
-- test_out <= x"0005";
|
|
-- let's write flags 0-2 for all instructions
|
|
st(15) <= alu_logical_gt;
|
|
st(14) <= alu_arithmetic_gt;
|
|
st(13) <= alu_flag_zero;
|
|
if ope = alu_add then
|
|
st(12) <= alu_flag_carry;
|
|
st(11) <= alu_flag_overflow;
|
|
end if;
|
|
|
|
if ope /= alu_compare then
|
|
wr_dat <= alu_result;
|
|
cpu_state <= do_write;
|
|
cpu_state_next <= do_fetch;
|
|
else
|
|
-- compare, skip result write altogether
|
|
cpu_state <= do_fetch;
|
|
end if;
|
|
|
|
-------------------------------------------------------------
|
|
-- Dual operand instructions
|
|
-------------------------------------------------------------
|
|
when do_dual_op =>
|
|
reg_t2 <= read_byte_aligner;
|
|
-- calculate address of destination operand
|
|
cpu_state <= do_source_address0;
|
|
cpu_state_operand_return <= do_dual_op1;
|
|
operand_mode <= ir(11 downto 6);
|
|
when do_dual_op1 =>
|
|
-- Now ALU output has address of destination (side effects done), and source_op
|
|
-- has the source operand.
|
|
-- Read destination operand, except if we have MOV in that case optimized
|
|
ea <= alu_result; -- Save destination address
|
|
if ir(15 downto 13) = "110" and operand_word then
|
|
-- We have MOV, skip reading of dest operand. We still need to
|
|
-- move along as we need to set flags.
|
|
-- test_out <= x"DD00";
|
|
cpu_state <= do_dual_op2;
|
|
else
|
|
-- we have any of the other ones expect MOV
|
|
cpu_state <= do_read;
|
|
cpu_state_next <= do_dual_op2;
|
|
-- test_out <= x"DD10";
|
|
end if;
|
|
when do_dual_op2 =>
|
|
-- perform the actual operation
|
|
-- test_out <= x"DD02";
|
|
-- Handle processing of byte operations for rd_dat.
|
|
if ir(15 downto 13) = "110" then
|
|
arg1 <= (others => '0'); -- For proper flag behavior drive zero for MOV to arg1
|
|
alu_debug_dst_arg <= (others => '0'); -- Store argument for debug information
|
|
else
|
|
arg1 <= read_byte_aligner;
|
|
alu_debug_dst_arg <= read_byte_aligner;
|
|
end if;
|
|
arg2 <= reg_t2;
|
|
alu_debug_src_arg <= reg_t2; -- Store argument for debug information
|
|
cpu_state <= do_dual_op3;
|
|
case ir(15 downto 13) is
|
|
when "101" => ope <= alu_add; -- A add
|
|
when "100" => ope <= alu_compare; -- C compare
|
|
when "011" => ope <= alu_sub; -- S substract
|
|
when "111" => ope <= alu_or;
|
|
when "010" => ope <= alu_and_not;
|
|
when "110" => ope <= alu_load2; -- MOV
|
|
when others => cpu_state <= do_stuck;
|
|
end case;
|
|
when do_dual_op3 =>
|
|
-- Store flags.
|
|
st(15) <= alu_logical_gt;
|
|
st(14) <= alu_arithmetic_gt;
|
|
st(13) <= alu_flag_zero;
|
|
if ir(15 downto 13) = "101" or ir(15 downto 13) = "011" then
|
|
-- add and sub set two more flags
|
|
st(12) <= alu_flag_carry;
|
|
st(11) <= alu_flag_overflow;
|
|
end if;
|
|
-- Byte operations set parity
|
|
if not operand_word then
|
|
-- parity bit for MOVB and CB is set differently and only depends on source operand
|
|
if ir(15 downto 13) = "100" or ir(15 downto 13) = "110" then
|
|
st(10) <= alu_flag_parity_source; -- MOVB, CB
|
|
else
|
|
st(10) <= alu_flag_parity;
|
|
end if;
|
|
end if;
|
|
-- Store the result except with compare instruction.
|
|
if ir(15 downto 13) = "100" then
|
|
cpu_state <= do_fetch; -- compare, we are already done
|
|
-- test_out <= x"DD03";
|
|
else
|
|
-- writeback result
|
|
-- test_out <= x"DD13";
|
|
if operand_word then
|
|
wr_dat <= alu_result;
|
|
else
|
|
-- simulation debug start
|
|
-- write(my_line, STRING'("do_dual_op3 byte arg1 "));
|
|
-- hwrite(my_line, arg1);
|
|
-- write(my_line, STRING'(" arg2 "));
|
|
-- hwrite(my_line, arg2);
|
|
-- write(my_line, STRING'(" alu_result "));
|
|
-- hwrite(my_line, alu_result);
|
|
-- write(my_line, STRING'(" rd_dat "));
|
|
-- hwrite(my_line, rd_dat);
|
|
-- simulation debug end
|
|
|
|
-- Byte operation.
|
|
if operand_mode(5 downto 4) = "00" or ea(0)='0' then
|
|
-- Register operation or write to high byte. Always impacts high byte.
|
|
wr_dat <= alu_result(15 downto 8) & rd_dat(7 downto 0);
|
|
-- write(my_line, STRING'(" HIGH "));
|
|
else
|
|
-- Memory operation going to low byte. High byte not impacted.
|
|
wr_dat <= rd_dat(15 downto 8) & alu_result(15 downto 8);
|
|
-- write(my_line, STRING'(" LOW "));
|
|
end if;
|
|
|
|
-- writeline(OUTPUT, my_line); -- simulation
|
|
end if;
|
|
cpu_state_next <= do_fetch;
|
|
cpu_state <= do_write;
|
|
end if;
|
|
|
|
-------------------------------------------------------------
|
|
-- Single operand instructions
|
|
-------------------------------------------------------------
|
|
when do_branch_b_bl =>
|
|
-- when we enter here source address is at the ALU output
|
|
case ir(9 downto 6) is
|
|
when "0001" => -- B instruction
|
|
delay_ir_wait <= std_logic_vector(unsigned(delay_ir_wait) + to_unsigned(8*cycle_clks_g, 16));
|
|
pc <= alu_result; -- the source address is our PC destination
|
|
cpu_state <= do_fetch;
|
|
when "1010" => -- BL instruction.Store old PC to R11 before returning.
|
|
delay_ir_wait <= std_logic_vector(unsigned(delay_ir_wait) + to_unsigned(12*cycle_clks_g, 16));
|
|
pc <= alu_result; -- the source address is our PC destination
|
|
wr_dat <= pc; -- capture old PC before to write data
|
|
arg1 <= w;
|
|
arg2 <= x"0016"; -- 2*11 = 22 = 0x16, offset to R11
|
|
ope <= alu_add;
|
|
cpu_state <= do_alu_write;
|
|
cpu_state_next <= do_fetch;
|
|
when "0011" => -- CLR instruction
|
|
delay_ir_wait <= std_logic_vector(unsigned(delay_ir_wait) + to_unsigned(10*cycle_clks_g, 16));
|
|
wr_dat <= x"0000";
|
|
cpu_state <= do_alu_write;
|
|
cpu_state_next <= do_fetch;
|
|
when "1100" => -- SETO instruction
|
|
delay_ir_wait <= std_logic_vector(unsigned(delay_ir_wait) + to_unsigned(10*cycle_clks_g, 16));
|
|
wr_dat <= x"FFFF";
|
|
cpu_state <= do_alu_write;
|
|
cpu_state_next <= do_fetch;
|
|
when "0101" => -- INV instruction
|
|
delay_ir_wait <= std_logic_vector(unsigned(delay_ir_wait) + to_unsigned(10*cycle_clks_g, 16));
|
|
ea <= alu_result; -- save address SA
|
|
cpu_state_next <= do_single_op_read;
|
|
cpu_state <= do_read;
|
|
arg1 <= x"FFFF";
|
|
ope <= alu_xor;
|
|
when "0100" => -- NEG instruction
|
|
delay_ir_wait <= std_logic_vector(unsigned(delay_ir_wait) + to_unsigned(12*cycle_clks_g, 16));
|
|
-- test_out <= x"EEFF";
|
|
ea <= alu_result; -- save address SA
|
|
cpu_state_next <= do_single_op_read;
|
|
cpu_state <= do_read;
|
|
arg1 <= x"0000";
|
|
ope <= alu_sub;
|
|
when "1101" => -- ABS instruction
|
|
if arg2(15) = '0' then
|
|
delay_ir_wait <= std_logic_vector(unsigned(delay_ir_wait) + to_unsigned(12*cycle_clks_g, 16));
|
|
else
|
|
delay_ir_wait <= std_logic_vector(unsigned(delay_ir_wait) + to_unsigned(14*cycle_clks_g, 16));
|
|
end if;
|
|
-- test_out <= x"AABB";
|
|
ea <= alu_result; -- save address SA
|
|
cpu_state_next <= do_single_op_read;
|
|
cpu_state <= do_read;
|
|
arg1 <= x"0000";
|
|
ope <= alu_abs;
|
|
when "1011" => -- SWPB instruction
|
|
delay_ir_wait <= std_logic_vector(unsigned(delay_ir_wait) + to_unsigned(10*cycle_clks_g, 16));
|
|
ea <= alu_result; -- save address SA
|
|
cpu_state_next <= do_single_op_read;
|
|
cpu_state <= do_read;
|
|
arg1 <= x"0000";
|
|
ope <= alu_swpb2;
|
|
when "0110" => -- INC instruction
|
|
delay_ir_wait <= std_logic_vector(unsigned(delay_ir_wait) + to_unsigned(10*cycle_clks_g, 16));
|
|
ea <= alu_result; -- save address SA
|
|
cpu_state_next <= do_single_op_read;
|
|
cpu_state <= do_read;
|
|
arg1 <= x"0001";
|
|
ope <= alu_add;
|
|
when "0111" => -- INCT instruction
|
|
delay_ir_wait <= std_logic_vector(unsigned(delay_ir_wait) + to_unsigned(10*cycle_clks_g, 16));
|
|
ea <= alu_result; -- save address SA
|
|
cpu_state_next <= do_single_op_read;
|
|
cpu_state <= do_read;
|
|
arg1 <= x"0002";
|
|
ope <= alu_add;
|
|
when "1000" => -- DEC instruction
|
|
delay_ir_wait <= std_logic_vector(unsigned(delay_ir_wait) + to_unsigned(10*cycle_clks_g, 16));
|
|
ea <= alu_result; -- save address SA
|
|
cpu_state_next <= do_single_op_read;
|
|
cpu_state <= do_read;
|
|
arg1 <= x"FFFF"; -- add -1 to create DEC
|
|
ope <= alu_add;
|
|
when "1001" => -- DECT instruction
|
|
delay_ir_wait <= std_logic_vector(unsigned(delay_ir_wait) + to_unsigned(10*cycle_clks_g, 16));
|
|
ea <= alu_result; -- save address SA
|
|
cpu_state_next <= do_single_op_read;
|
|
cpu_state <= do_read;
|
|
arg1 <= x"FFFE"; -- add -2 to create DEC
|
|
ope <= alu_add;
|
|
when "0010" => -- X instruction...
|
|
delay_ir_wait <= std_logic_vector(unsigned(delay_ir_wait) + to_unsigned((8-4)*cycle_clks_g, 16));
|
|
ea <= alu_result;
|
|
cpu_state_next <= do_single_op_read;
|
|
cpu_state <= do_read;
|
|
when "0000" => -- BLWP instruction
|
|
delay_ir_wait <= std_logic_vector(unsigned(delay_ir_wait) + to_unsigned(26*cycle_clks_g, 16));
|
|
-- alu_result points to new WP
|
|
cpu_state <= do_blwp00;
|
|
when others =>
|
|
cpu_state <= do_stuck;
|
|
end case;
|
|
when do_single_op_read =>
|
|
if ir(9 downto 6) /= "0010" then -- if not X instruction
|
|
arg2 <= rd_dat; -- feed the data that was read to ALU
|
|
cpu_state <= do_single_op_writeback;
|
|
else -- Here we process the X instruction...
|
|
ir <= rd_dat;
|
|
cpu_state <= do_decode; -- off we go to do something...
|
|
end if;
|
|
when do_single_op_writeback =>
|
|
-- setup flags
|
|
if ope /= alu_swpb2 then
|
|
-- set flags for INV, NEG, ABS, INC, INCT, DEC, DECT
|
|
st(15) <= alu_logical_gt;
|
|
st(14) <= alu_arithmetic_gt;
|
|
st(13) <= alu_flag_zero;
|
|
if ope = alu_add or ope = alu_sub or ope = alu_abs then
|
|
st(12) <= alu_flag_carry;
|
|
st(11) <= alu_flag_overflow;
|
|
end if;
|
|
end if;
|
|
-- write the result
|
|
wr_dat <= alu_result;
|
|
cpu_state <= do_write; -- ea still holds our address; return via write
|
|
cpu_state_next <= do_fetch;
|
|
|
|
-------------------------------------------------------------
|
|
-- BLWP
|
|
-- (SA) -> WP, (SA+2) -> PC
|
|
-- R13 -> old_WP, R14 -> old_PC, R15 -> ST
|
|
-------------------------------------------------------------
|
|
when do_blwp00 =>
|
|
-- since we come here from reset, continue to respect hold
|
|
-- or from interrupt processing
|
|
if hold='1' then
|
|
holda <= '1';
|
|
else
|
|
-- alu_result points to new WP
|
|
holda <= '0';
|
|
ea <= alu_result;
|
|
arg1 <= x"0002"; -- calculate address of PC
|
|
arg2 <= alu_result;
|
|
ope <= alu_add;
|
|
cpu_state <= do_read; -- read new WP
|
|
cpu_state_next <= do_blwp0;
|
|
end if;
|
|
when do_blwp0 =>
|
|
-- here rd_dat is our new WP, alu_result is addr of new PC
|
|
ea <= alu_result;
|
|
reg_t <= rd_dat; -- store new WP to temp register
|
|
arg1 <= rd_dat;
|
|
if not i_am_xop then
|
|
-- normal BLWP
|
|
arg2 <= x"00" & "000" & x"D" & '0'; -- calculate new addr 13 (WP)
|
|
cpu_state_next <= do_blwp1;
|
|
else
|
|
-- XOP
|
|
arg2 <= x"00" & "000" & x"B" & '0'; -- calculate new addr R11 (WP)
|
|
cpu_state_next <= do_blwp_xop; -- XOP has an extra step to store EA to R11
|
|
end if;
|
|
ope <= alu_add;
|
|
cpu_state <= do_read;
|
|
int_ack <= '0'; -- if this was an interrupt vectoring event, clear the flag
|
|
when do_blwp_xop =>
|
|
-- ** This phase only exists for XOP **
|
|
-- Now rd_dat is new PC, reg_t new WP, alu_result addr of new R11
|
|
wr_dat <= reg_t2; -- Write effective address to R11
|
|
ea <= alu_result;
|
|
arg1 <= x"0004"; -- Add 4 to skip R12, point to R13 for WP storage
|
|
arg2 <= alu_result; -- prepare for WP write, i.e. point to new R14
|
|
cpu_state <= do_write; -- write effective address to new R11
|
|
cpu_state_next <= do_blwp1;
|
|
when do_blwp1 =>
|
|
-- now rd_dat is new PC, reg_t new WP, alu_result addr of new R13
|
|
wr_dat <= w;
|
|
ea <= alu_result;
|
|
arg1 <= x"0002";
|
|
arg2 <= alu_result; -- prepare for PC write, i.e. point to new R14
|
|
cpu_state <= do_write; -- write old WP
|
|
cpu_state_next <= do_blwp2;
|
|
when do_blwp2 =>
|
|
wr_dat <= pc;
|
|
ea <= alu_result;
|
|
arg2 <= alu_result; -- prepare for ST write, i.e. point to new R15
|
|
cpu_state <= do_write; -- write old PC
|
|
cpu_state_next <= do_blwp3;
|
|
when do_blwp3 =>
|
|
wr_dat <= st;
|
|
ea <= alu_result;
|
|
arg2 <= alu_result;
|
|
cpu_state <= do_write; -- write old ST
|
|
cpu_state_next <= do_fetch;
|
|
-- For interrupts now set the interrupt priority.
|
|
-- BUGBUG: the priority may have changed since it was sampled...
|
|
if set_int_priority then
|
|
st(3 downto 0) <= std_logic_vector(unsigned(ic03) - 1);
|
|
set_int_priority <= False;
|
|
end if;
|
|
-- now do the context switch
|
|
pc <= rd_dat;
|
|
w <= reg_t;
|
|
if i_am_xop then
|
|
st(9) <= '1'; -- Set XOP flag
|
|
end if;
|
|
|
|
-------------------------------------------------------------
|
|
-- RTWP
|
|
-- R13 -> WP, R14 -> PC, R15 -> ST
|
|
-------------------------------------------------------------
|
|
when do_rtwp0 =>
|
|
-- Here start first read cycle (from R13) and calculate also addr of R14
|
|
ea <= alu_result; -- Addr of R13
|
|
arg1 <= x"0002";
|
|
arg2 <= alu_result;
|
|
ope <= alu_add;
|
|
cpu_state <= do_read;
|
|
cpu_state_next <= do_rtwp1;
|
|
when do_rtwp1 =>
|
|
w <= rd_dat; -- W from previous R13
|
|
ea <= alu_result; -- addr of previous R14
|
|
arg2 <= alu_result; -- start calculation of R15
|
|
cpu_state <= do_read;
|
|
cpu_state_next <= do_rtwp2;
|
|
when do_rtwp2 =>
|
|
pc <= rd_dat; -- PC from previous R14
|
|
ea <= alu_result; -- addr of previous R15
|
|
cpu_state <= do_read;
|
|
cpu_state_next <= do_rtwp3;
|
|
when do_rtwp3 =>
|
|
st <= rd_dat; -- ST from previous R15
|
|
cpu_state <= do_fetch;
|
|
|
|
-------------------------------------------------------------
|
|
-- All shift instructions
|
|
-------------------------------------------------------------
|
|
when do_shifts0 =>
|
|
ea <= alu_result; -- address of our working register
|
|
if shift_count = "00000" then
|
|
delay_ir_wait <= std_logic_vector(unsigned(delay_ir_wait) + to_unsigned(8*cycle_clks_g, 16));
|
|
-- we need to read WR0 to get shift count
|
|
arg1 <= w;
|
|
arg2 <= x"0000";
|
|
ope <= alu_add;
|
|
cpu_state <= do_alu_read;
|
|
cpu_state_next <= do_shifts1;
|
|
else
|
|
-- shift count is ready, it came from the instruction already.
|
|
cpu_state <= do_read; -- read the register.
|
|
cpu_state_next <= do_shifts2;
|
|
end if;
|
|
when do_shifts1 =>
|
|
-- rd_dat is now contents of WR0. Setup shift count and read the operand.
|
|
if rd_dat(3 downto 0) = x"0" then
|
|
shift_count <= '1' & rd_dat(3 downto 0);
|
|
else
|
|
shift_count <= '0' & rd_dat(3 downto 0);
|
|
end if;
|
|
cpu_state <= do_read;
|
|
cpu_state_next <= do_shifts2;
|
|
when do_shifts2 =>
|
|
-- shift count is now ready. rd_dat is our operand.
|
|
arg2 <= rd_dat;
|
|
case ir(9 downto 8) is
|
|
when "00" =>
|
|
ope <= alu_sra;
|
|
when "01" =>
|
|
ope <= alu_srl;
|
|
when "10" =>
|
|
ope <= alu_sla;
|
|
st(11) <= '0'; -- no overflow (yet)
|
|
when "11" =>
|
|
ope <= alu_src;
|
|
when others =>
|
|
end case;
|
|
cpu_state <= do_shifts3;
|
|
when do_shifts3 => -- we stay here doing the shifting
|
|
delay_ir_wait <= std_logic_vector(unsigned(delay_ir_wait) + to_unsigned(2*cycle_clks_g, 16));
|
|
arg2 <= alu_result;
|
|
st(15) <= alu_logical_gt;
|
|
st(14) <= alu_arithmetic_gt;
|
|
st(13) <= alu_flag_zero;
|
|
st(12) <= alu_flag_carry;
|
|
-- For SLA, set alu_flag_overflow. We have to handle it in a special way
|
|
-- since during multiple bit shift we cannot rely on the last value of alu_flag_overflow.
|
|
-- st(11) has been cleared in the beginning of the shift, so we only need to set it.
|
|
if ir(9 downto 8) = "10" and alu_flag_overflow='1' then
|
|
st(11) <= '1';
|
|
end if;
|
|
dec_shift_count := True;
|
|
if shift_count = "00001" then
|
|
ope <= alu_load2; -- pass through the previous result
|
|
cpu_state <= do_shifts4; -- done with shifting altogether
|
|
else
|
|
cpu_state <= do_shifts3; -- more shifting to be done
|
|
end if;
|
|
when do_shifts4 =>
|
|
-- Store the result of shifting, and return to next instruction.
|
|
wr_dat <= alu_result;
|
|
cpu_state <= do_write;
|
|
cpu_state_next <= do_fetch;
|
|
|
|
-------------------------------------------------------------
|
|
-- Single bit CRU instructions
|
|
-------------------------------------------------------------
|
|
when do_single_bit_cru0 =>
|
|
-- contents of R12 are in rd_dat. Sign extend the 8-bit displacement.
|
|
arg1 <= ir(7) & ir(7) & ir(7) & ir(7) & ir(7) & ir(7) & ir(7) & ir(7 downto 0) & '0';
|
|
arg2 <= rd_dat;
|
|
ope <= alu_add;
|
|
cpu_state <= do_single_bit_cru1;
|
|
when do_single_bit_cru1 =>
|
|
addr <= "000" & alu_result(12 downto 1) & '0';
|
|
cruout <= ir(8); -- in case of output, drive to CRUOUT the bit (SBZ, SBO)
|
|
cpu_state <= do_single_bit_cru2;
|
|
delay_count <= cru_delay_clocks;
|
|
when do_single_bit_cru2 =>
|
|
-- stay in this state until delay over. For writes drive CRUCLK high.
|
|
if ir(15 downto 8) /= x"1F" then -- Not TB
|
|
-- SBO or SBZ - or external instructions
|
|
cruclk <= '1';
|
|
end if;
|
|
if delay_count = "00000000" then
|
|
cpu_state <= do_fetch;
|
|
cruclk <= '0'; -- drive low, regardless of write or read. For reads (TB) this was zero to begin with.
|
|
if ir(15 downto 8) = x"1F" then -- Check if we have TB instruction (Mike)
|
|
st(13) <= cruin; -- If SBZ, now capture the input bit
|
|
end if;
|
|
end if;
|
|
|
|
-------------------------------------------------------------
|
|
-- External instructions
|
|
-------------------------------------------------------------
|
|
when do_ext_instructions =>
|
|
-- external instructions IDLE, RSET, CKOF, CKON, LREX
|
|
-- These are all the same in that they issue a CRUCLK pulse.
|
|
-- But high bits of address bus indicate which instruction it is.
|
|
if ir = x"0360" then
|
|
st(3 downto 0) <= "0000"; -- RSET
|
|
end if;
|
|
addr(15 downto 13) <= rd_dat(7 downto 5);
|
|
delay_count <= "00000101"; -- 5 clock cycles, used as delay counter
|
|
cpu_state <= do_single_bit_cru2; -- issue CRUCLK pulse
|
|
if ir = x"0340" then
|
|
-- IDLE instruction, go to idle state instead of cru stuff
|
|
cpu_state <= do_idle_wait;
|
|
end if;
|
|
|
|
when do_idle_wait =>
|
|
if delay_count /= "00000000" then
|
|
cruclk <= '1';
|
|
else
|
|
cruclk <= '0';
|
|
-- see if we should escape idle state, i.e. we get an interrupt we need to serve
|
|
if int_req = '1' and unsigned(ic03) <= unsigned(st(3 downto 0)) then
|
|
cpu_state <= do_fetch;
|
|
end if;
|
|
end if;
|
|
|
|
-------------------------------------------------------------
|
|
-- Store ST or W to workspace register
|
|
-------------------------------------------------------------
|
|
when do_store_instructions => -- STST, STWP
|
|
if ir(6 downto 5)="10" then
|
|
wr_dat <= st; -- STST
|
|
else
|
|
wr_dat <= w; -- STWP
|
|
end if;
|
|
cpu_state <= do_alu_write;
|
|
cpu_state_next <= do_fetch;
|
|
|
|
-------------------------------------------------------------
|
|
-- COC, CZC, XOR, MPY, DIV
|
|
-------------------------------------------------------------
|
|
when do_coc_czc_etc0 =>
|
|
-- Need to read destination operand. Source operand is in rd_dat.
|
|
reg_t <= rd_dat; -- store source operand
|
|
operand_mode <= "00" & ir(9 downto 6); -- register operand
|
|
cpu_state <= do_source_address0; -- calculate address of our register
|
|
cpu_state_operand_return <= do_coc_czc_etc1;
|
|
when do_coc_czc_etc1 =>
|
|
ea <= alu_result; -- store the effective address and go and read the destination operand
|
|
cpu_state <= do_read;
|
|
cpu_state_next <= do_coc_czc_etc2;
|
|
when do_coc_czc_etc2 =>
|
|
arg1 <= reg_t; -- source
|
|
arg2 <= rd_dat; -- dest
|
|
cpu_state <= do_stuck;
|
|
case ir(12 downto 10) is
|
|
when "000" => -- COC
|
|
delay_ir_wait <= std_logic_vector(unsigned(delay_ir_wait) + to_unsigned(14*cycle_clks_g, 16));
|
|
ope <= alu_coc;
|
|
cpu_state <= do_coc_czc_etc3;
|
|
when "001" => -- CZC
|
|
delay_ir_wait <= std_logic_vector(unsigned(delay_ir_wait) + to_unsigned(14*cycle_clks_g, 16));
|
|
ope <= alu_czc;
|
|
cpu_state <= do_coc_czc_etc3;
|
|
when "010" => -- XOR
|
|
delay_ir_wait <= std_logic_vector(unsigned(delay_ir_wait) + to_unsigned(14*cycle_clks_g, 16));
|
|
ope <= alu_xor;
|
|
cpu_state <= do_coc_czc_etc3;
|
|
when "110" => -- MPY
|
|
delay_ir_wait <= std_logic_vector(unsigned(delay_ir_wait) + to_unsigned(52*cycle_clks_g, 16));
|
|
mult_a <= "00" & reg_t;
|
|
mult_b <= "00" & rd_dat;
|
|
cpu_state <= do_mul_store0;
|
|
--delay_count <= "00000100";
|
|
delay_count <= "00010100";
|
|
when "111" => -- DIV
|
|
--delay_ir_wait done elsewhere
|
|
-- we need here dest - source operation
|
|
arg1 <= rd_dat;
|
|
arg2 <= reg_t;
|
|
ope <= alu_sub; -- do initial comparison
|
|
cpu_state <= do_div0;
|
|
-- The following are commented out and will stuck the CPU
|
|
when others =>
|
|
end case;
|
|
when do_coc_czc_etc3 =>
|
|
-- COC, CZC, set only flag 2. Nothing is written to destination register.
|
|
-- XOR sets flags 0-2
|
|
st(13) <= alu_flag_zero;
|
|
if ir(12 downto 11) = "00" then
|
|
cpu_state <= do_fetch; -- done for COC and CZC
|
|
elsif ir(12 downto 11) = "01" then -- XOR
|
|
st(15) <= alu_logical_gt;
|
|
st(14) <= alu_arithmetic_gt;
|
|
wr_dat <= alu_result;
|
|
cpu_state <= do_write;
|
|
cpu_state_next <= do_fetch;
|
|
else
|
|
cpu_state <= do_stuck;
|
|
end if;
|
|
when do_mul_store0 =>
|
|
if delay_count = "00000000" then
|
|
cpu_state <= do_mul_store1;
|
|
end if;
|
|
when do_mul_store1 =>
|
|
cpu_state <= do_write;
|
|
cpu_state_next <= do_mul_store2;
|
|
wr_dat <= mult_product(31 downto 16);
|
|
arg1 <= x"0002";
|
|
arg2 <= ea;
|
|
ope <= alu_add;
|
|
when do_mul_store2 =>
|
|
ea <= alu_result;
|
|
cpu_state <= do_write;
|
|
cpu_state_next <= do_fetch;
|
|
wr_dat <= mult_product(15 downto 0);
|
|
|
|
when do_div0 => -- division, now alu_result is arg1-arg2 i.e. dest-source
|
|
-- reg_t = source, rd_dat = destination
|
|
-- First check for overflow condition (ST4) i.e. st(11)
|
|
st(11) <= '0'; -- by default no overflow
|
|
if (reg_t(15)='0' and rd_dat(15)='1') or (reg_t(15)=rd_dat(15) and alu_result(15)='0') then
|
|
delay_ir_wait <= std_logic_vector(unsigned(delay_ir_wait) + to_unsigned(16*cycle_clks_g, 16));
|
|
st(11) <= '1'; -- overflow
|
|
cpu_state <= do_fetch; -- done
|
|
else
|
|
delay_ir_wait <= std_logic_vector(unsigned(delay_ir_wait) + to_unsigned(92*cycle_clks_g, 16));
|
|
-- fetch the 2nd word of the dividend, first calculate it's address
|
|
dividend(31 downto 16) <= rd_dat; -- store the high word
|
|
arg1 <= x"0002";
|
|
arg2 <= ea;
|
|
ope <= alu_add;
|
|
cpu_state <= do_alu_read;
|
|
cpu_state_next <= do_div1;
|
|
end if;
|
|
when do_div1 =>
|
|
dividend(15 downto 0) <= rd_dat; -- store the low word
|
|
shift_count <= "10000"; -- 16
|
|
cpu_state <= do_div2;
|
|
when do_div2 =>
|
|
delay_ir_wait <= std_logic_vector(unsigned(delay_ir_wait) + to_unsigned(2*cycle_clks_g, 16));
|
|
dividend(31 downto 0) <= dividend(30 downto 0) & '0'; -- shift left
|
|
-- perform 17-bit substraction, picking up the bit to shifted out too
|
|
divider_sub <= std_logic_vector(unsigned(dividend(31 downto 15)) - unsigned('0' & reg_t));
|
|
dec_shift_count := True; -- decrement count
|
|
cpu_state <= do_div3;
|
|
when do_div3 =>
|
|
if divider_sub(16)='0' then
|
|
-- successful subtract
|
|
dividend(31 downto 16) <= divider_sub(15 downto 0);
|
|
dividend(0) <= '1';
|
|
end if;
|
|
if shift_count /= "00000" then
|
|
cpu_state <= do_div2; -- loop back
|
|
else
|
|
cpu_state <= do_div4;
|
|
end if;
|
|
when do_div4 =>
|
|
-- done with the division.
|
|
wr_dat <= dividend(15 downto 0); -- store quotient. This operation cannot be merged with the above or we do not capture the LSB.
|
|
-- prepare in ALU the next address
|
|
arg1 <= x"0002";
|
|
arg2 <= ea;
|
|
ope <= alu_add;
|
|
-- write
|
|
cpu_state <= do_write;
|
|
cpu_state_next <= do_div5;
|
|
when do_div5 =>
|
|
-- write remainder to memory, continue with next instruction
|
|
wr_dat <= dividend(31 downto 16);
|
|
ea <= alu_result;
|
|
cpu_state <= do_write;
|
|
cpu_state_next <= do_fetch;
|
|
|
|
-------------------------------------------------------------
|
|
-- XOP - processed like BLWP but with a few extra steps
|
|
-------------------------------------------------------------
|
|
when do_xop =>
|
|
-- alu_result is here the effective address
|
|
reg_t2 <= alu_result; -- effective address on its way to R11, save to t2
|
|
-- calculate XOP vector address
|
|
arg1 <= x"0040";
|
|
arg2 <= x"00" & "00" & ir(9 downto 6) & "00"; -- 4*XOP number
|
|
ope <= alu_add;
|
|
cpu_state <= do_blwp00;
|
|
i_am_xop <= True;
|
|
|
|
-------------------------------------------------------------
|
|
-- LDCR and STCR
|
|
-------------------------------------------------------------
|
|
when do_ldcr0 =>
|
|
-- LDCR, now rd_dat is source operand
|
|
reg_t <= read_byte_aligner; -- LDCR
|
|
-- We need to setup flags - shove the (SA) which was just read into the ALU.
|
|
-- We perform a dummy add with zero to get the flags out.
|
|
arg1 <= read_byte_aligner;
|
|
ope <= alu_load1;
|
|
cpu_state <= do_ldcr00;
|
|
when do_ldcr00 =>
|
|
-- Update the CPU flags ST0-ST2 and ST5 if count is <= 8
|
|
st(15) <= alu_logical_gt;
|
|
st(14) <= alu_arithmetic_gt;
|
|
st(13) <= alu_flag_zero;
|
|
if not operand_word then
|
|
ST(10) <= alu_flag_parity;
|
|
end if;
|
|
operand_mode <= "001100"; -- Reg 12 in direct addressing mode
|
|
cpu_state <= do_read_operand0;
|
|
cpu_state_operand_return <= do_ldcr1;
|
|
when do_stcr0 =>
|
|
-- STCR, here alu_result is the address of our operand.
|
|
-- reg_t will contain the operand for OR
|
|
if operand_word then
|
|
reg_t <= x"0001";
|
|
else
|
|
reg_t <= x"0100";
|
|
end if;
|
|
reg_stcr <= x"0000";
|
|
reg_t2 <= alu_result; -- Store the destination effective address
|
|
operand_mode <= "001100"; -- Reg 12 in direct addressing mode
|
|
cpu_state <= do_read_operand0;
|
|
cpu_state_operand_return <= do_ldcr1;
|
|
when do_ldcr1 =>
|
|
-- rd_dat is now R12
|
|
ea <= rd_dat;
|
|
if ir(9 downto 6) = "0000" then
|
|
if ir(10) = '0' then
|
|
delay_ir_wait <= std_logic_vector(unsigned(delay_ir_wait) + to_unsigned(20*cycle_clks_g, 16));
|
|
else
|
|
delay_ir_wait <= std_logic_vector(unsigned(delay_ir_wait) + to_unsigned(60*cycle_clks_g, 16));
|
|
end if;
|
|
shift_count <= '1' & ir(9 downto 6);
|
|
else
|
|
if ir(10) = '0' then
|
|
delay_ir_wait <= std_logic_vector(unsigned(delay_ir_wait) + to_unsigned(20*cycle_clks_g, 16));
|
|
else
|
|
if ir(9 downto 6) = "1000" then
|
|
delay_ir_wait <= std_logic_vector(unsigned(delay_ir_wait) + to_unsigned(44*cycle_clks_g, 16));
|
|
elsif ir(9) = '1' then
|
|
delay_ir_wait <= std_logic_vector(unsigned(delay_ir_wait) + to_unsigned(58*cycle_clks_g, 16));
|
|
else
|
|
delay_ir_wait <= std_logic_vector(unsigned(delay_ir_wait) + to_unsigned(42*cycle_clks_g, 16));
|
|
end if;
|
|
end if;
|
|
shift_count <= '0' & ir(9 downto 6);
|
|
end if;
|
|
cpu_state <= do_ldcr2;
|
|
when do_ldcr2 =>
|
|
arg2 <= reg_t;
|
|
if ir(10) = '0' then
|
|
delay_ir_wait <= std_logic_vector(unsigned(delay_ir_wait) + to_unsigned(2*cycle_clks_g, 16));
|
|
ope <= alu_srl; -- for LDCR,shift right
|
|
cpu_state <= do_ldcr3;
|
|
else
|
|
ope <= alu_sla; -- for STCR, shift left
|
|
cpu_state <= do_stcr_delay0; -- a few cycles delay from address
|
|
end if;
|
|
addr <= "000" & ea(12 downto 1) & '0'; -- "000" & alu_result(12 downto 1) & '0';
|
|
when do_stcr_delay0 =>
|
|
cpu_state <= do_stcr_delay1;
|
|
when do_stcr_delay1 =>
|
|
cpu_state <= do_ldcr3;
|
|
when do_ldcr3 =>
|
|
if ir(10) = '0' then -- LDCR
|
|
cpu_state <= do_ldcr4;
|
|
if operand_word then
|
|
cruout <= alu_flag_carry;
|
|
else
|
|
cruout <= alu_result(7); -- Byte operand
|
|
end if;
|
|
else
|
|
-- STCR or in the data we get; done outside the ALU just here
|
|
if cruin = '1' then
|
|
reg_stcr <= reg_stcr or reg_t;
|
|
end if;
|
|
cpu_state <= do_ldcr5; -- skip creation of CLKOUT pulse
|
|
end if;
|
|
reg_t <= alu_result; -- store right shifted operand
|
|
arg1 <= x"0002";
|
|
arg2 <= ea;
|
|
ope <= alu_add;
|
|
delay_count <= cru_delay_clocks;
|
|
when do_ldcr4 =>
|
|
cruclk <= '1';
|
|
cpu_state <= do_ldcr5;
|
|
when do_ldcr5 =>
|
|
if delay_count = "00000000" then
|
|
ea <= alu_result;
|
|
cruclk <= '0';
|
|
dec_shift_count := True;
|
|
if shift_count = "00001" then
|
|
if ir(10) = '0' then
|
|
cpu_state <= do_fetch; -- LDCR, we are done
|
|
else
|
|
cpu_state <= do_stcr6; -- STCR, we need to store the result
|
|
end if;
|
|
else
|
|
cpu_state <= do_ldcr2;
|
|
end if;
|
|
end if;
|
|
when do_stcr6 =>
|
|
-- Writeback the result in reg_stcr.
|
|
-- For byte operation support, we need to read the destination before writing
|
|
-- to it. reg_t2 has the destination address.
|
|
st(15) <= '0';
|
|
st(14) <= '0';
|
|
st(13) <= '1';
|
|
st(12) <= '0';
|
|
if (reg_stcr /= x"0000") then
|
|
st(15) <= '1';
|
|
st(13) <= '0';
|
|
st(14) <= not reg_stcr(15);
|
|
end if;
|
|
ea <= reg_t2;
|
|
cpu_state <= do_read;
|
|
cpu_state_next <= do_stcr7;
|
|
when do_stcr7 =>
|
|
-- Ok now rd_dat has destination data from memory.
|
|
-- Let's merge our data from reg_stcr and write the bloody thing back.
|
|
if operand_word then
|
|
wr_dat <= reg_stcr;
|
|
else
|
|
-- Byte operation.
|
|
if ea(0)='0' then -- high byte impacted
|
|
wr_dat <= reg_stcr(15 downto 8) & rd_dat(7 downto 0);
|
|
else -- low byte impacted
|
|
wr_dat <= rd_dat(15 downto 8) & reg_stcr(15 downto 8);
|
|
end if;
|
|
end if;
|
|
cpu_state_next <= do_fetch;
|
|
cpu_state <= do_write;
|
|
|
|
|
|
-------------------------------------------------------------
|
|
-- subprogram to calculate source operand address SA
|
|
-- This does not include reading the source operand, the address is
|
|
-- left at ALU output register alu_result
|
|
-------------------------------------------------------------
|
|
when do_source_address0 =>
|
|
arg1 <= w;
|
|
arg2 <= x"00" & "000" & operand_mode(3 downto 0) & '0';
|
|
ope <= alu_add; -- calculate workspace address
|
|
case operand_mode(5 downto 4) is
|
|
when "00" => -- workspace register
|
|
cpu_state <= cpu_state_operand_return; -- return the workspace register address
|
|
when "01" => -- workspace register indirect
|
|
delay_ir_wait <= std_logic_vector(unsigned(delay_ir_wait) + to_unsigned(4*cycle_clks_g, 16));
|
|
cpu_state <= do_alu_read;
|
|
cpu_state_next <= do_source_address1;
|
|
when "10" => -- symbolic or indexed mode
|
|
delay_ir_wait <= std_logic_vector(unsigned(delay_ir_wait) + to_unsigned(8*cycle_clks_g, 16));
|
|
cpu_state <= do_pc_read;
|
|
if operand_mode(3 downto 0) = "0000" then
|
|
cpu_state_next <= do_source_address1; -- symbolic
|
|
else
|
|
cpu_state_next <= do_source_address2; -- indexed
|
|
end if;
|
|
when "11" => -- workspace register indirect with autoincrement
|
|
cpu_state <= do_alu_read;
|
|
cpu_state_next <= do_source_address4;
|
|
when others =>
|
|
cpu_state <= do_stuck;
|
|
end case;
|
|
when do_source_address1 =>
|
|
-- Make the result visible in alu output, i.e. the contents of the memory read.
|
|
-- This is either workspace register contents in case of *Rx or the immediate operand in case of @LABEL
|
|
arg2 <= rd_dat;
|
|
ope <= alu_load2;
|
|
cpu_state <= cpu_state_operand_return;
|
|
when do_source_address2 =>
|
|
-- Indexed. rd_dat is the immediate parameter. alu_result is still the address of register Rx.
|
|
-- We need to read the register and add it to rd_dat.
|
|
reg_t <= rd_dat;
|
|
cpu_state <= do_alu_read;
|
|
cpu_state_next <= do_source_address3;
|
|
when do_source_address3 =>
|
|
arg1 <= rd_dat; -- contents of Rx
|
|
arg2 <= reg_t; -- @TABLE
|
|
ope <= alu_add;
|
|
cpu_state <= cpu_state_operand_return;
|
|
when do_source_address4 => -- autoincrement
|
|
reg_t <= rd_dat; -- save the value of Rx, this is our return value
|
|
arg1 <= rd_dat;
|
|
if operand_word then
|
|
delay_ir_wait <= std_logic_vector(unsigned(delay_ir_wait) + to_unsigned(8*cycle_clks_g, 16));
|
|
arg2 <= x"0002";
|
|
else
|
|
delay_ir_wait <= std_logic_vector(unsigned(delay_ir_wait) + to_unsigned(6*cycle_clks_g, 16));
|
|
arg2 <= x"0001";
|
|
end if;
|
|
ope <= alu_add;
|
|
ea <= alu_result; -- save address of register before alu op destroys it
|
|
cpu_state <= do_source_address5;
|
|
when do_source_address5 =>
|
|
-- writeback the autoincremented value
|
|
wr_dat <= alu_result;
|
|
cpu_state <= do_write;
|
|
cpu_state_next <= do_source_address6;
|
|
when do_source_address6 =>
|
|
-- end of the autoincrement stuff, now put source address to ALU output
|
|
arg2 <= reg_t;
|
|
ope <= alu_load2;
|
|
cpu_state <= cpu_state_operand_return;
|
|
|
|
-------------------------------------------------------------
|
|
-- subprogram to do operand fetching, data returned in rd_dat.
|
|
-- operand address is left to EA (when appropriate)
|
|
when do_read_operand0 =>
|
|
-- read workspace register. Goes to waste if symbolic mode.
|
|
arg1 <= w;
|
|
arg2 <= x"00" & "000" & operand_mode(3 downto 0) & '0';
|
|
ope <= alu_add; -- calculate workspace address
|
|
cpu_state <= do_alu_read; -- read from addr of ALU output
|
|
cpu_state_next <= do_read_operand1;
|
|
-- test_out <= x"EE00";
|
|
when do_read_operand1 =>
|
|
-- test_out <= x"EE01";
|
|
case operand_mode(5 downto 4) is
|
|
when "00" =>
|
|
-- workspace register, we are done.
|
|
ea <= alu_result; -- effective address must be stored for byte selection to work
|
|
cpu_state <= cpu_state_operand_return;
|
|
when "01" =>
|
|
-- workspace register indirect
|
|
ea <= rd_dat;
|
|
cpu_state <= do_read;
|
|
-- return via operand read
|
|
cpu_state_next <= cpu_state_operand_return;
|
|
when "10" =>
|
|
-- read immediate operand for symbolic or indexed mode
|
|
reg_t <= rd_dat; -- save register value for later
|
|
cpu_state <= do_pc_read;
|
|
cpu_state_next <= do_read_operand2;
|
|
when "11" =>
|
|
-- workspace register indirect auto-increment
|
|
reg_t <= rd_dat; -- register value, to be left to EA
|
|
ea <= alu_result; -- address of register
|
|
arg1 <= rd_dat;
|
|
if operand_word then
|
|
arg2 <= x"0002";
|
|
else
|
|
arg2 <= x"0001";
|
|
end if;
|
|
ope <= alu_add; -- add for autoincrement
|
|
cpu_state <= do_read_operand3;
|
|
when others =>
|
|
cpu_state <= do_stuck; -- get stuck, should never happen
|
|
end case;
|
|
when do_read_operand2 =>
|
|
-- indirect or indexed mode here
|
|
-- test_out <= x"EE02";
|
|
if operand_mode(3 downto 0) = "0000" then
|
|
-- symbolic, read from rd_dat
|
|
ea <= rd_dat;
|
|
cpu_state <= do_read;
|
|
-- return after read
|
|
cpu_state_next <= cpu_state_operand_return;
|
|
else
|
|
-- indexed, need to compute the address
|
|
-- We need to return via an extra state (not with do_alu_read) since
|
|
-- EA needs to be setup.
|
|
arg1 <= rd_dat;
|
|
arg2 <= reg_t;
|
|
ope <= alu_add;
|
|
cpu_state <= do_read_operand5;
|
|
end if;
|
|
when do_read_operand3 =>
|
|
-- test_out <= x"EE03";
|
|
-- write back our result to the register
|
|
wr_dat <= alu_result;
|
|
cpu_state <= do_write;
|
|
cpu_state_next <= do_read_operand4;
|
|
when do_read_operand4 =>
|
|
-- Now we need to read the actual value. And return in EA where it came from.
|
|
ea <= reg_t;
|
|
cpu_state <= do_read;
|
|
cpu_state_next <= cpu_state_operand_return;
|
|
when do_read_operand5 =>
|
|
ea <= alu_result;
|
|
cpu_state <= do_read;
|
|
cpu_state_next <= cpu_state_operand_return; -- return via read
|
|
|
|
|
|
-- subprogram to do operand writing, data to write in wr_dat
|
|
when do_write_operand0 =>
|
|
-- read workspace register. Goes to waste if symbolic mode.
|
|
-- test_out <= x"AA00";
|
|
arg1 <= w;
|
|
arg2 <= x"00" & "000" & operand_mode(3 downto 0) & '0';
|
|
ope <= alu_add; -- calculate workspace address
|
|
if operand_mode(5 downto 4) = "00" then
|
|
-- write to workspace register directly, then done!
|
|
cpu_state <= do_alu_write;
|
|
cpu_state_next <= cpu_state_operand_return;
|
|
else
|
|
-- we have an indirect write, so need to first read the workspace register
|
|
cpu_state <= do_alu_read; -- read from addr of ALU output
|
|
cpu_state_next <= do_write_operand1;
|
|
end if;
|
|
when do_write_operand1 =>
|
|
-- test_out <= x"AA01";
|
|
case operand_mode(5 downto 4) is
|
|
when "01" =>
|
|
-- workspace register indirect
|
|
ea <= rd_dat;
|
|
cpu_state <= do_write;
|
|
-- return via operand write
|
|
cpu_state_next <= cpu_state_operand_return;
|
|
when "10" =>
|
|
-- read immediate operand for symbolic or indexed mode
|
|
reg_t <= rd_dat; -- save register value for later
|
|
cpu_state <= do_pc_read;
|
|
cpu_state_next <= do_write_operand2;
|
|
when "11" =>
|
|
-- workspace register indirect auto-increment
|
|
ea <= rd_dat;
|
|
reg_t <= rd_dat;
|
|
cpu_state <= do_write;
|
|
cpu_state_next <= do_write_operand3;
|
|
when others =>
|
|
cpu_state <= do_stuck; -- get stuck, should never happen
|
|
end case;
|
|
when do_write_operand2 =>
|
|
-- indirect or indexed mode here
|
|
if operand_mode(3 downto 0) = "0000" then
|
|
-- symbolic, write to address rd_dat
|
|
-- test_out <= x"AA02";
|
|
ea <= rd_dat;
|
|
cpu_state <= do_write;
|
|
-- return after write
|
|
cpu_state_next <= cpu_state_operand_return;
|
|
else
|
|
-- indexed, need to compute the address
|
|
-- test_out <= x"AA12";
|
|
arg1 <= rd_dat;
|
|
arg2 <= reg_t;
|
|
ope <= alu_add;
|
|
cpu_state <= do_alu_write;
|
|
-- return after read
|
|
cpu_state_next <= cpu_state_operand_return;
|
|
end if;
|
|
when do_write_operand3 =>
|
|
-- need to autoincrement our register. rd_dat contains still our read data.
|
|
-- test_out <= x"AA03";
|
|
arg1 <= reg_t; -- register value
|
|
if operand_word then
|
|
arg2 <= x"0002"; -- word operation, inc by 2
|
|
else
|
|
arg2 <= x"0001";
|
|
end if;
|
|
ope <= alu_add;
|
|
ea <= alu_result; -- save address of register before alu op destroys it
|
|
cpu_state <= do_write_operand4;
|
|
when do_write_operand4 =>
|
|
-- writeback of autoincremented register
|
|
-- test_out <= x"AA04";
|
|
wr_dat <= alu_result;
|
|
cpu_state <= do_write;
|
|
cpu_state_next <= cpu_state_operand_return;
|
|
|
|
|
|
when do_stuck =>
|
|
stuck <= '1';
|
|
holda <= hold;
|
|
end case;
|
|
|
|
-- decrement shift count if necessary
|
|
if dec_shift_count then
|
|
shift_count <= std_logic_vector(unsigned(shift_count) - to_unsigned(1, 5));
|
|
end if;
|
|
|
|
if delay_count /= "00000000" then
|
|
delay_count <= std_logic_vector(unsigned(delay_count) - to_unsigned(1, 8));
|
|
end if;
|
|
|
|
if inc_ir_count then
|
|
delay_ir_count <= std_logic_vector(unsigned(delay_ir_count) + to_unsigned(1, 16));
|
|
end if;
|
|
|
|
|
|
end if; -- enable
|
|
end if; -- rising_edge
|
|
end if;
|
|
end process;
|
|
|
|
end Behavioral;
|
|
|