diff --git a/tests/tg68k/TG68K_ALU.vhd b/tests/tg68k/TG68K_ALU.vhd index 429a624..613ff87 100644 --- a/tests/tg68k/TG68K_ALU.vhd +++ b/tests/tg68k/TG68K_ALU.vhd @@ -60,8 +60,7 @@ entity TG68K_ALU is micro_state : in micro_states; bf_ext_in : in std_logic_vector(7 downto 0); bf_ext_out : out std_logic_vector(7 downto 0); - bf_shift : in std_logic_vector(5 downto 0); - bf_width : in std_logic_vector(5 downto 0); + bf_width : in std_logic_vector(4 downto 0); bf_loffset : in std_logic_vector(4 downto 0); bf_offset : in std_logic_vector(31 downto 0); set_V_Flag_out : out bit; @@ -155,24 +154,11 @@ architecture logic of TG68K_ALU IS signal result : std_logic_vector(39 downto 0); signal result_tmp : std_logic_vector(39 downto 0); signal sign : std_logic_vector(31 downto 0); - signal bf_set1 : std_logic_vector(39 downto 0); - signal inmux0 : std_logic_vector(39 downto 0); - signal inmux1 : std_logic_vector(39 downto 0); - signal inmux2 : std_logic_vector(39 downto 0); - signal inmux3 : std_logic_vector(39 downto 0); - signal inmux4 : std_logic_vector(39 downto 0); - signal copymux0 : std_logic_vector(39 downto 0); - signal copymux1 : std_logic_vector(39 downto 0); - signal copymux2 : std_logic_vector(39 downto 0); - signal copymux3 : std_logic_vector(39 downto 0); + signal bf_loff_dir : std_logic_vector(4 downto 0); signal bf_set2 : std_logic_vector(39 downto 0); - signal shift : std_logic_vector(39 downto 0); signal copy : std_logic_vector(39 downto 0); signal bf_firstbit : std_logic_vector(5 downto 0); - signal mux : std_logic_vector(3 downto 0); - signal bitnr : std_logic_vector(4 downto 0); - signal mask : std_logic_vector(31 downto 0); signal bf_bset : std_logic; signal bf_NFlag : std_logic; signal bf_bchg : std_logic; @@ -194,8 +180,8 @@ begin ----------------------------------------------------------------------------- -- set OP1in ----------------------------------------------------------------------------- - process (OP2out, reg_QB, opcode, OP1out, OP1in, exe_datatype, addsub_q, execOPC, exec, mux, - pack_out, bcd_a, bcd_s, result_mulu, result_div, exe_condition, bf_shift, bf_offset, bf_width, + process (OP2out, reg_QB, opcode, OP1out, OP1in, exe_datatype, addsub_q, execOPC, exec, + pack_out, bcd_a, bcd_s, result_mulu, result_div, exe_condition, bf_offset, bf_width, Flags, FlagsSR, bits_out, exec_tas, rot_out, exe_opcode, result, bf_fffo, bf_firstbit, bf_datareg) begin ALUout <= OP1in; @@ -203,13 +189,7 @@ begin if exec(opcBFwb) = '1' then ALUout <= result(31 downto 0); if bf_fffo = '1' then - ALUout <= (others => '0'); - -- mux = 0 means no bit was found at all - if mux = "0000" then - ALUout <= bf_offset + bf_width + 1; - else - ALUout <= bf_offset + bf_width - bf_firstbit; - end if; + ALUout <= bf_offset + bf_width + 1 - bf_firstbit; end if; end if; @@ -421,11 +401,20 @@ begin -- in the next cycle while the ALU is working since the tg68k can only read -- from two registers at once. -- + -- All bitfield operations can operate on registers or memory. There are + -- two fundamental differences which make the shifters quite complex: + -- 1. Memory content is delivered byte aligned to the ALU. Thus all shifting + -- is 7 bits far at most. Registers are 32 bit in size and may require + -- shifting of up to 31 bit positions + -- 2. Memory operations can affect 5 bytes. Thus all shifting is 40 bit in that + -- case. Registers are 32 bit in size and bitfield operations wrap. Shifts + -- are actually rotations for that reason + -- -- The destination operand is transfered via op1out and bf_ext into the ALU. -- - -- bfset, bfclr and bfchg - ------------------------- - -- bfset, bfclr and bfchg work very similar. A "sign" vector is generated + -- bftst, bfset, bfclr and bfchg + -------------------------------- + -- bftst, bfset, bfclr and bfchg work very similar. A "sign" vector is generated -- having "width" right aligned 0-bits and the rest ones. -- A "copy" vector is generated from this by shifting through copymux so -- this contains a 1 for all bits in bf_ext_in & op1out that will not be @@ -444,18 +433,27 @@ begin -------- -- bfins reuses most of the functionality of bfset, bfclr and bfchg. But it -- has another 32 bit parameter that's being used for the source. This is passed - -- to the ALU via op2out. This is moved to the shift register, bits 39-32 of - -- the shift register mirror bits 7-0. This is then shifted bf_shift bits to - -- the right. + -- to the ALU via op2out. This is moved to the shift register and shifted + -- bf_shift bits to the right. -- The input valus is also store in datareg and the lowest "width" bits -- are masked. This is then forwarded to op1in which in turn uses the normal -- mechanisms to generate the flags. A special bf_NFlag is also generated -- from this. Z and N are set from these and not from the previous bitfield -- contents as with bfset, bfclr or bfchg + -- + -- bfextu/bfexts + ---------------- + -- bfexts and bfextu use the same shifter that is used by bfins to shift the + -- data to be inserted. It's using that same shifter to shift data in the + -- opposite direction. Flags are set from the extraced data + -- + -- bfffo + -------- + -- bfffo uses the same data path as bfext. But instead of directly returning + -- the extracted data it determines the highest bit setin the result - -process (clk, mux, mask, bitnr, bf_ins, bf_bchg, bf_bset, bf_exts, bf_extu, bf_shift, inmux0, inmux1, inmux2, inmux3, inmux4, bf_set2, OP1out, OP2out, result_tmp, bf_ext_in, - shift, datareg, bf_NFlag, result, reg_QB, sign, bf_d32, copy, bf_loffset, copymux0, copymux1, copymux2, copymux3, bf_width) + process (clk, bf_ins, bf_bchg, bf_bset, bf_exts, bf_extu, bf_set2, OP1out, OP2out, result_tmp, bf_ext_in, + datareg, bf_NFlag, result, reg_QB, sign, bf_d32, copy, bf_loffset, bf_width) begin if rising_edge(clk) then if clkena_lw = '1' then @@ -470,7 +468,7 @@ process (clk, mux, mask, bitnr, bf_ins, bf_bchg, bf_bset, bf_exts, bf_extu, bf_s when "010" => bf_bchg <= '1'; --BFCHG when "011" => bf_exts <= '1'; --BFEXTS when "001" => bf_extu <= '1'; --BFEXTU - -- when "100" => insert <= (others =>'0'); --BFCLR + -- when "100" => insert <= (others =>'0'); --BFCLR when "101" => bf_fffo <= '1'; --BFFFO when "110" => bf_bset <= '1'; --BFSET when "111" => bf_ins <= '1'; --BFinS @@ -486,137 +484,35 @@ process (clk, mux, mask, bitnr, bf_ins, bf_bchg, bf_bset, bf_exts, bf_extu, bf_s end if; end if; - shift <= bf_ext_in & OP2out; - - -- Rotate right by bf_shift bits. Rotate through 32 bits when - -- operating on a register (bf_d32 = 1), through 40 bits else. - - -- rotate 1 bit right if required - if bf_shift(0) = '1' then - if bf_d32 = '1' then - inmux0(31 downto 0) <= shift(0) & shift(31 downto 1); - else - inmux0 <= shift(0) & shift(39 downto 1); - end if; - else - inmux0 <= shift; - end if; + ------------- BF_SET2 -------------- + if bf_ins = '1' then + bf_loff_dir <= 32 - bf_loffset; + else + bf_loff_dir <= bf_loffset; + end if; - -- rotate 2 bits right if required - if bf_shift(1) = '1' then - if bf_d32 = '1' then - inmux1(31 downto 0) <= inmux0(1 downto 0) & inmux0(31 downto 2); + if bf_d32 = '1' then + -- 32bit: rotate 0..31 bits left or right, don't care for upper 8 bits + bf_set2 <= "--------" & std_logic_vector(unsigned(OP2out) ror to_integer(unsigned(bf_loff_dir))); + else + if bf_ins = '1' then + -- 40 bit: shift 0..7 bits left + bf_set2 <= std_logic_vector(unsigned(bf_ext_in & OP2out) sll to_integer(unsigned(bf_loffset(2 downto 0)))); else - inmux1 <= inmux0(1 downto 0) & inmux0(39 downto 2); + -- 40 bit: shift 0..7 bits right + bf_set2 <= std_logic_vector(unsigned(bf_ext_in & OP2out) srl to_integer(unsigned(bf_loffset(2 downto 0)))); end if; - else - inmux1 <= inmux0; - end if; - - -- rotate 4 bits right if required - if bf_shift(2) = '1' then - if bf_d32 = '1' then - inmux2(31 downto 0) <= inmux1(3 downto 0) & inmux1(31 downto 4); - else - inmux2 <= inmux1(3 downto 0) & inmux1(39 downto 4); - end if; - else - inmux2 <= inmux1; - end if; - - -- rotate 8 bits right if required - if bf_shift(3) = '1' then - if bf_d32 = '1' then - inmux3(31 downto 0) <= inmux2(7 downto 0) & inmux2(31 downto 8); - else - inmux3 <= inmux2(7 downto 0) & inmux2(39 downto 8); - end if; - else - inmux3 <= inmux2(39 downto 0); - end if; + end if; + + ------------- COPY -------------- + if bf_d32 = '1' then + -- 32bit: rotate 32 bits 0..31 bits left, don't care for upper 8 bits + copy <= "--------" & std_logic_vector(unsigned(sign) rol to_integer(unsigned(bf_loffset))); + else + -- 40 bit: shift 40 bits 0..7 bits left, fill with '1's (hence the two not's) + copy <= not std_logic_vector(unsigned(x"00" & (not sign)) sll to_integer(unsigned(bf_loffset(2 downto 0)))); + end if; - -- rotate 16 bits right if required - if bf_shift(4) = '1' then - if bf_d32 = '1' then - inmux4(31 downto 0) <= inmux3(15 downto 0) & inmux3(31 downto 16); - else - inmux4 <= inmux3(15 downto 0) & inmux3(39 downto 16); - end if; - else - inmux4 <= inmux3; - end if; - - -- rotate 32 bits right if required - if bf_shift(5) = '1' then - if bf_d32 = '1' then - bf_set2(31 downto 0) <= inmux4(31 downto 0); - else - bf_set2 <= inmux4(31 downto 0) & inmux3(39 downto 32); - end if; - else - bf_set2 <= inmux4; - end if; - - -- shift 16 bits left if required while expanding sign from 32 bits to 40 bits - --TH: Check if it's possible to shift 1 bits in from lsb instead of wrapping - if bf_loffset(4) = '1' then - if bf_d32 = '1' then - -- _ABCD -> _CDAB - copymux3(31 downto 0) <= sign(15 downto 0) & sign(31 downto 16); - else - -- _ABCD -> BCD1A - copymux3 <= sign(23 downto 0) & "11111111" & sign(31 downto 24); - end if; - else - copymux3 <= "11111111" & sign; - end if; - - -- shift 8 bits left if required - if bf_loffset(3) = '1' then - if bf_d32 = '1' then - -- _ABCD -> _BCDA - copymux2(31 downto 0) <= copymux3(23 downto 0) & copymux3(31 downto 24); - else - -- ABCDE -> BCDEA - copymux2 <= copymux3(31 downto 0) & copymux3(39 downto 32); - end if; - else - copymux2 <= copymux3; - end if; - - -- shift 4 bits left if required - if bf_loffset(2) = '1' then - if bf_d32 = '1' then - copymux1(31 downto 0) <= copymux2(27 downto 0) & copymux2(31 downto 28); - else - copymux1 <= copymux2(35 downto 0) & copymux2(39 downto 36); - end if; - else - copymux1 <= copymux2; - end if; - - -- shift 2 bits left if required - if bf_loffset(1) = '1' then - if bf_d32 = '1' then - copymux0(31 downto 0) <= copymux1(29 downto 0) & copymux1(31 downto 30); - else - copymux0 <= copymux1(37 downto 0) & copymux1(39 downto 38); - end if; - else - copymux0 <= copymux1; - end if; - - -- shift 1 bit left if required - if bf_loffset(0) = '1' then - if bf_d32 = '1' then - copy(31 downto 0) <= copymux0(30 downto 0) & copymux0(31); - else - copy <= copymux0(38 downto 0) & copymux0(39); - end if; - else - copy <= copymux0; - end if; - if bf_ins = '1' then datareg <= reg_QB; else @@ -627,8 +523,7 @@ process (clk, mux, mask, bitnr, bf_ins, bf_bchg, bf_bset, bf_exts, bf_extu, bf_s if bf_ins = '1' then result <= bf_set2; elsif bf_bchg = '1' then - result(31 downto 0) <= not OP1out; - result(39 downto 32) <= not bf_ext_in; + result <= not (bf_ext_in & OP1out); elsif bf_bset = '1' then result <= (others => '1'); else @@ -636,9 +531,9 @@ process (clk, mux, mask, bitnr, bf_ins, bf_bchg, bf_bset, bf_exts, bf_extu, bf_s end if; sign <= (others => '0'); - bf_NFlag <= datareg(to_integer(unsigned(bf_width(4 downto 0)))); + bf_NFlag <= datareg(to_integer(unsigned(bf_width))); for i in 0 TO 31 loop - if i > bf_width(4 downto 0) then + if i > bf_width then datareg(i) <= '0'; sign(i) <= '1'; end if; @@ -664,7 +559,7 @@ process (clk, mux, mask, bitnr, bf_ins, bf_bchg, bf_bset, bf_exts, bf_extu, bf_s end if; else --TH: TODO: check if this really does what it's supposed to - bf_flag_n <= result_tmp(to_integer(unsigned(bf_loffset)+unsigned(bf_width(4 downto 0)))); + bf_flag_n <= result_tmp(to_integer(unsigned(bf_loffset)+unsigned(bf_width))); end if; for i in 0 TO 39 loop if copy(i) = '1' then @@ -681,57 +576,41 @@ process (clk, mux, mask, bitnr, bf_ins, bf_bchg, bf_bset, bf_exts, bf_extu, bf_s end if; --BFFFO - mask <= datareg; - bf_firstbit <= '0' & bitnr; - bitnr <= "11111"; - if mask(31 downto 28) = "0000" then - if mask(27 downto 24) = "0000" then - if mask(23 downto 20) = "0000" then - if mask(19 downto 16) = "0000" then - bitnr(4) <= '0'; - if mask(15 downto 12) = "0000" then - if mask(11 downto 8) = "0000" then - bitnr(3) <= '0'; - if mask(7 downto 4) = "0000" then - bitnr(2) <= '0'; - mux <= mask(3 downto 0); - else - mux <= mask(7 downto 4); - end if; - else - mux <= mask(11 downto 8); - bitnr(2) <= '0'; - end if; - else - mux <= mask(15 downto 12); - end if; - else - mux <= mask(19 downto 16); - bitnr(3) <= '0'; - bitnr(2) <= '0'; - end if; - else - mux <= mask(23 downto 20); - bitnr(3) <= '0'; - end if; - else - mux <= mask(27 downto 24); - bitnr(2) <= '0'; - end if; - else - mux <= mask(31 downto 28); - end if; + if datareg(31) = '1' then bf_firstbit <= "100000"; + elsif datareg(30) = '1' then bf_firstbit <= "011111"; + elsif datareg(29) = '1' then bf_firstbit <= "011110"; + elsif datareg(28) = '1' then bf_firstbit <= "011101"; + elsif datareg(27) = '1' then bf_firstbit <= "011100"; + elsif datareg(26) = '1' then bf_firstbit <= "011011"; + elsif datareg(25) = '1' then bf_firstbit <= "011010"; + elsif datareg(24) = '1' then bf_firstbit <= "011001"; + elsif datareg(23) = '1' then bf_firstbit <= "011000"; + elsif datareg(22) = '1' then bf_firstbit <= "010111"; + elsif datareg(21) = '1' then bf_firstbit <= "010110"; + elsif datareg(20) = '1' then bf_firstbit <= "010101"; + elsif datareg(19) = '1' then bf_firstbit <= "010100"; + elsif datareg(18) = '1' then bf_firstbit <= "010011"; + elsif datareg(17) = '1' then bf_firstbit <= "010010"; + elsif datareg(16) = '1' then bf_firstbit <= "010001"; + elsif datareg(15) = '1' then bf_firstbit <= "010000"; + elsif datareg(14) = '1' then bf_firstbit <= "001111"; + elsif datareg(13) = '1' then bf_firstbit <= "001110"; + elsif datareg(12) = '1' then bf_firstbit <= "001101"; + elsif datareg(11) = '1' then bf_firstbit <= "001100"; + elsif datareg(10) = '1' then bf_firstbit <= "001011"; + elsif datareg(9) = '1' then bf_firstbit <= "001010"; + elsif datareg(8) = '1' then bf_firstbit <= "001001"; + elsif datareg(7) = '1' then bf_firstbit <= "001000"; + elsif datareg(6) = '1' then bf_firstbit <= "000111"; + elsif datareg(5) = '1' then bf_firstbit <= "000110"; + elsif datareg(4) = '1' then bf_firstbit <= "000101"; + elsif datareg(3) = '1' then bf_firstbit <= "000100"; + elsif datareg(2) = '1' then bf_firstbit <= "000011"; + elsif datareg(1) = '1' then bf_firstbit <= "000010"; + elsif datareg(0) = '1' then bf_firstbit <= "000001"; + else bf_firstbit <= "000000"; + end if; - if mux(3 downto 2) = "00" then - bitnr(1) <= '0'; - if mux(1) = '0' then - bitnr(0) <= '0'; - end if; - else - if mux(3) = '0' then - bitnr(0) <= '0'; - end if; - end if; end process; ----------------------------------------------------------------------------- diff --git a/tests/tg68k/TG68K_Pack.vhd b/tests/tg68k/TG68K_Pack.vhd index 5fc4d1a..8a5cc40 100644 --- a/tests/tg68k/TG68K_Pack.vhd +++ b/tests/tg68k/TG68K_Pack.vhd @@ -230,8 +230,7 @@ package TG68K_Pack is micro_state : in micro_states; bf_ext_in : in std_logic_vector(7 downto 0); bf_ext_out : out std_logic_vector(7 downto 0); - bf_shift : in std_logic_vector(5 downto 0); - bf_width : in std_logic_vector(5 downto 0); + bf_width : in std_logic_vector(4 downto 0); bf_loffset : in std_logic_vector(4 downto 0); bf_offset : in std_logic_vector(31 downto 0); set_V_Flag_out : out bit; diff --git a/tests/tg68k/TG68KdotC_Kernel.vhd b/tests/tg68k/TG68KdotC_Kernel.vhd index 25f3398..5e43693 100644 --- a/tests/tg68k/TG68KdotC_Kernel.vhd +++ b/tests/tg68k/TG68KdotC_Kernel.vhd @@ -265,15 +265,13 @@ architecture logic of TG68KdotC_Kernel is signal last_data_read : std_logic_vector(31 downto 0); signal last_data_in : std_logic_vector(31 downto 0); - signal alu_bf_offset : std_logic_vector(31 downto 0); signal bf_offset : std_logic_vector(31 downto 0); signal bf_offset_l : std_logic_vector(4 downto 0); - signal bf_width : std_logic_vector(5 downto 0); - signal bf_bhits : std_logic_vector(5 downto 0); - signal bf_shift : std_logic_vector(5 downto 0); - signal alu_width : std_logic_vector(5 downto 0); - signal alu_bf_shift : std_logic_vector(5 downto 0); signal bf_loffset : std_logic_vector(4 downto 0); + signal bf_width : std_logic_vector(4 downto 0); + signal bf_bhits : std_logic_vector(5 downto 0); + signal alu_bf_width : std_logic_vector(4 downto 0); + signal alu_bf_offset : std_logic_vector(31 downto 0); signal alu_bf_loffset : std_logic_vector(4 downto 0); signal movec_data : std_logic_vector(31 downto 0); @@ -327,8 +325,7 @@ begin micro_state => micro_state, --: in micro_states; bf_ext_in => bf_ext_in, bf_ext_out => bf_ext_out, - bf_shift => alu_bf_shift, - bf_width => alu_width, + bf_width => alu_bf_width, bf_offset => alu_bf_offset, bf_loffset => alu_bf_loffset, set_V_Flag_out => set_V_Flag, --: buffer bit; @@ -1043,8 +1040,7 @@ PROCESS (clk, IPL, setstate, state, exec_write_back, set_direct_data, next_micro end if; if exec(get_bfoffset) = '1' then - alu_width <= bf_width; - alu_bf_shift <= bf_shift; + alu_bf_width <= bf_width; alu_bf_loffset <= bf_loffset; alu_bf_offset <= bf_offset; end if; @@ -1174,7 +1170,7 @@ PROCESS (clk, IPL, setstate, state, exec_write_back, set_direct_data, next_micro ------------------------------------------------------------------------------ --prepare Bitfield Parameters ------------------------------------------------------------------------------ - process (clk, Reset, sndOPC, reg_QA, reg_QB, bf_width, bf_offset, bf_offset_l, bf_bhits, opcode, setstate, bf_shift) + process (clk, Reset, sndOPC, reg_QA, reg_QB, bf_width, bf_offset, bf_offset_l, bf_bhits, opcode, setstate) begin -- the ALU needs the full real offset to return the correct result for -- bfffo @@ -1187,41 +1183,23 @@ PROCESS (clk, IPL, setstate, state, exec_write_back, set_direct_data, next_micro -- offset within long word bf_offset_l <= bf_offset(4 downto 0); - bf_width(5) <= '0'; if sndOPC(5) = '1' then - bf_width(4 downto 0) <= reg_QB(4 downto 0) - 1; + bf_width <= reg_QB(4 downto 0) - 1; else - bf_width(4 downto 0) <= sndOPC(4 downto 0) - 1; + bf_width <= sndOPC(4 downto 0) - 1; end if; - bf_bhits <= bf_width + bf_offset_l; + + bf_bhits <= ('0' & bf_width) + ('0' & bf_offset_l); set_oddout <= not bf_bhits(3); - if opcode(4 downto 3) = "00" then -- register target - if opcode(10 downto 8) = "111" then --INS - bf_shift <= bf_bhits + 1; -- bf_shift = offset + width - else - bf_shift <= 31 - bf_bhits; -- bf_shift = 32 - (offset + width); - end if; - bf_loffset <= 31 - bf_bhits(4 downto 0); - bf_shift(5) <= '0'; - else - -- memory target - if opcode(10 downto 8) = "111" then --INS - -- bf_shift = 40 - (7 - (bf_bhits & 7)) - bf_shift <= 40 - ("000" & ("111" - bf_bhits(2 downto 0))); - else - -- bf_shift = 7 - (bf_bhits & 7) - bf_shift <= "000" & ("111" - bf_bhits(2 downto 0)); - end if; - -- bf_loffset = 7 - (bf_bhits & 7) - bf_loffset <= "00" & ("111" - bf_bhits(2 downto 0)); - + bf_loffset <= 31 - bf_bhits(4 downto 0); + if opcode(4 downto 3) /= "00" then -- memory is being read with byte precision, thus offset -- bit 2:0 are only used in the alu + bf_loffset(4 downto 3) <= "00"; bf_offset_l(4 downto 3) <= "00"; - end if; --- bf_loffset(5) <= '0'; - + end if; + case bf_bhits(5 downto 3) is when "000" => set_memmask <= "101111";