1
0
mirror of https://github.com/antonblanchard/microwatt.git synced 2026-01-13 15:18:09 +00:00
Paul Mackerras 374f4c536d writeback: Do data formatting and condition recording in writeback
This adds code to writeback to format data and test the result
against zero for the purpose of setting CR0.  The data formatter
is able to shift and mask by bytes and do byte reversal and sign
extension.  It can also put together bytes from two input
doublewords to support unaligned loads (including unaligned
byte-reversed loads).

The data formatter starts with an 8:1 multiplexer that is able
to direct any byte of the input to any byte of the output.  This
lets us rotate the data and simultaneously byte-reverse it.
The rotated/reversed data goes to a register for the unaligned
cases that overlap two doublewords.  Then there is per-byte logic
that does trimming, sign extension, and splicing together bytes
from a previous input doubleword (stored in data_latched) and the
current doubleword.  Finally the 64-bit result is tested to set
CR0 if rc = 1.

This removes the RC logic from the execute2, multiply and divide
units, and the shift/mask/byte-reverse/sign-extend logic from
loadstore2.

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
2019-10-15 15:23:28 +11:00

162 lines
5.7 KiB
VHDL

library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
library work;
use work.common.all;
use work.decode_types.all;
entity divider is
port (
clk : in std_logic;
rst : in std_logic;
d_in : in Decode2ToDividerType;
d_out : out DividerToWritebackType
);
end entity divider;
architecture behaviour of divider is
signal dend : std_ulogic_vector(128 downto 0);
signal div : unsigned(63 downto 0);
signal quot : std_ulogic_vector(63 downto 0);
signal result : std_ulogic_vector(63 downto 0);
signal sresult : std_ulogic_vector(63 downto 0);
signal oresult : std_ulogic_vector(63 downto 0);
signal qbit : std_ulogic;
signal running : std_ulogic;
signal signcheck : std_ulogic;
signal count : unsigned(6 downto 0);
signal neg_result : std_ulogic;
signal is_modulus : std_ulogic;
signal is_32bit : std_ulogic;
signal extended : std_ulogic;
signal is_signed : std_ulogic;
signal rc : std_ulogic;
signal write_reg : std_ulogic_vector(4 downto 0);
signal overflow : std_ulogic;
signal ovf32 : std_ulogic;
signal did_ovf : std_ulogic;
begin
divider_0: process(clk)
begin
if rising_edge(clk) then
if rst = '1' then
dend <= (others => '0');
div <= (others => '0');
quot <= (others => '0');
running <= '0';
count <= "0000000";
elsif d_in.valid = '1' then
if d_in.is_extended = '1' and not (d_in.is_signed = '1' and d_in.dividend(63) = '1') then
dend <= '0' & d_in.dividend & x"0000000000000000";
else
dend <= '0' & x"0000000000000000" & d_in.dividend;
end if;
div <= unsigned(d_in.divisor);
quot <= (others => '0');
write_reg <= d_in.write_reg;
neg_result <= '0';
is_modulus <= d_in.is_modulus;
extended <= d_in.is_extended;
is_32bit <= d_in.is_32bit;
is_signed <= d_in.is_signed;
rc <= d_in.rc;
count <= "1111111";
running <= '1';
overflow <= '0';
ovf32 <= '0';
signcheck <= d_in.is_signed and (d_in.dividend(63) or d_in.divisor(63));
elsif signcheck = '1' then
signcheck <= '0';
neg_result <= dend(63) xor (div(63) and not is_modulus);
if dend(63) = '1' then
if extended = '1' then
dend <= '0' & std_ulogic_vector(- signed(dend(63 downto 0))) & x"0000000000000000";
else
dend <= '0' & x"0000000000000000" & std_ulogic_vector(- signed(dend(63 downto 0)));
end if;
end if;
if div(63) = '1' then
div <= unsigned(- signed(div));
end if;
elsif running = '1' then
if count = "0111111" then
running <= '0';
end if;
overflow <= quot(63);
if dend(128) = '1' or unsigned(dend(127 downto 64)) >= div then
ovf32 <= ovf32 or quot(31);
dend <= std_ulogic_vector(unsigned(dend(127 downto 64)) - div) &
dend(63 downto 0) & '0';
quot <= quot(62 downto 0) & '1';
count <= count + 1;
elsif dend(128 downto 57) = x"000000000000000000" and count(6 downto 3) /= "0111" then
-- consume 8 bits of zeroes in one cycle
ovf32 <= or (ovf32 & quot(31 downto 24));
dend <= dend(120 downto 0) & x"00";
quot <= quot(55 downto 0) & x"00";
count <= count + 8;
else
ovf32 <= ovf32 or quot(31);
dend <= dend(127 downto 0) & '0';
quot <= quot(62 downto 0) & '0';
count <= count + 1;
end if;
else
count <= "0000000";
end if;
end if;
end process;
divider_1: process(all)
begin
d_out.write_reg_nr <= write_reg;
d_out.rc <= rc;
if is_modulus = '1' then
result <= dend(128 downto 65);
else
result <= quot;
end if;
if neg_result = '1' then
sresult <= std_ulogic_vector(- signed(result));
else
sresult <= result;
end if;
did_ovf <= '0';
if is_32bit = '0' then
did_ovf <= overflow or (is_signed and (sresult(63) xor neg_result));
elsif is_signed = '1' then
if ovf32 = '1' or sresult(32) /= sresult(31) then
did_ovf <= '1';
end if;
else
did_ovf <= ovf32;
end if;
if did_ovf = '1' then
oresult <= (others => '0');
elsif (is_32bit = '1') and (is_modulus = '0') then
-- 32-bit divisions set the top 32 bits of the result to 0
oresult <= x"00000000" & sresult(31 downto 0);
else
oresult <= sresult;
end if;
end process;
divider_out: process(clk)
begin
if rising_edge(clk) then
d_out.write_reg_data <= oresult;
if count = "1000000" then
d_out.valid <= '1';
d_out.write_reg_enable <= '1';
else
d_out.valid <= '0';
d_out.write_reg_enable <= '0';
end if;
end if;
end process;
end architecture behaviour;