mirror of
https://github.com/antonblanchard/microwatt.git
synced 2026-03-30 19:05:04 +00:00
core: Add support for single-precision FP loads and stores
This adds code to loadstore1 to convert between single-precision and double-precision formats, and implements the lfs* and stfs* instructions. The conversion processes are described in Power ISA v3.1 Book 1 sections 4.6.2 and 4.6.3. These conversions take one cycle, so lfs* and stfs* are one cycle slower than lfd* and stfd*. Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
This commit is contained in:
@@ -287,6 +287,7 @@ package common is
|
||||
virt_mode : std_ulogic; -- do translation through TLB
|
||||
priv_mode : std_ulogic; -- privileged mode (MSR[PR] = 0)
|
||||
mode_32bit : std_ulogic; -- trim addresses to 32 bits
|
||||
is_32bit : std_ulogic;
|
||||
end record;
|
||||
constant Execute1ToLoadstore1Init : Execute1ToLoadstore1Type := (valid => '0', op => OP_ILLEGAL, ci => '0', byte_reverse => '0',
|
||||
sign_extend => '0', update => '0', xerc => xerc_init,
|
||||
@@ -294,7 +295,7 @@ package common is
|
||||
nia => (others => '0'), insn => (others => '0'),
|
||||
addr1 => (others => '0'), addr2 => (others => '0'), data => (others => '0'),
|
||||
write_reg => (others => '0'), length => (others => '0'),
|
||||
mode_32bit => '0', others => (others => '0'));
|
||||
mode_32bit => '0', is_32bit => '0', others => (others => '0'));
|
||||
|
||||
type Loadstore1ToExecute1Type is record
|
||||
busy : std_ulogic;
|
||||
|
||||
@@ -3,6 +3,7 @@ use ieee.std_logic_1164.all;
|
||||
use ieee.numeric_std.all;
|
||||
|
||||
library work;
|
||||
use work.helpers.all;
|
||||
|
||||
entity zero_counter is
|
||||
port (
|
||||
@@ -15,42 +16,6 @@ entity zero_counter is
|
||||
end entity zero_counter;
|
||||
|
||||
architecture behaviour of zero_counter is
|
||||
-- Reverse the order of bits in a word
|
||||
function bit_reverse(a: std_ulogic_vector) return std_ulogic_vector is
|
||||
variable ret: std_ulogic_vector(a'left downto a'right);
|
||||
begin
|
||||
for i in a'right to a'left loop
|
||||
ret(a'left + a'right - i) := a(i);
|
||||
end loop;
|
||||
return ret;
|
||||
end;
|
||||
|
||||
-- If there is only one bit set in a doubleword, return its bit number
|
||||
-- (counting from the right). Each bit of the result is obtained by
|
||||
-- ORing together 32 bits of the input:
|
||||
-- bit 0 = a[1] or a[3] or a[5] or ...
|
||||
-- bit 1 = a[2] or a[3] or a[6] or a[7] or ...
|
||||
-- bit 2 = a[4..7] or a[12..15] or ...
|
||||
-- bit 5 = a[32..63] ORed together
|
||||
function bit_number(a: std_ulogic_vector(63 downto 0)) return std_ulogic_vector is
|
||||
variable ret: std_ulogic_vector(5 downto 0);
|
||||
variable stride: natural;
|
||||
variable bit: std_ulogic;
|
||||
variable k: natural;
|
||||
begin
|
||||
stride := 2;
|
||||
for i in 0 to 5 loop
|
||||
bit := '0';
|
||||
for j in 0 to (64 / stride) - 1 loop
|
||||
k := j * stride;
|
||||
bit := bit or (or a(k + stride - 1 downto k + (stride / 2)));
|
||||
end loop;
|
||||
ret(i) := bit;
|
||||
stride := stride * 2;
|
||||
end loop;
|
||||
return ret;
|
||||
end;
|
||||
|
||||
signal inp : std_ulogic_vector(63 downto 0);
|
||||
signal sum : std_ulogic_vector(64 downto 0);
|
||||
signal msb_r : std_ulogic;
|
||||
|
||||
16
decode1.vhdl
16
decode1.vhdl
@@ -74,8 +74,8 @@ architecture behaviour of decode1 is
|
||||
35 => (LDST, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '1', '0', '0', '0', NONE, '0', '0'), -- lbzu
|
||||
50 => (LDST, OP_FPLOAD, RA_OR_ZERO, CONST_SI, NONE, FRT, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- lfd
|
||||
51 => (LDST, OP_FPLOAD, RA_OR_ZERO, CONST_SI, NONE, FRT, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '1', '0', '0', '0', NONE, '0', '0'), -- lfdu
|
||||
-- 48 => (LDST, OP_FPLOAD, RA_OR_ZERO, CONST_SI, NONE, FRT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '1', '0', NONE, '0', '0'), -- lfs
|
||||
-- 49 => (LDST, OP_FPLOAD, RA_OR_ZERO, CONST_SI, NONE, FRT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '1', '0', '1', '0', NONE, '0', '0'), -- lfsu
|
||||
48 => (LDST, OP_FPLOAD, RA_OR_ZERO, CONST_SI, NONE, FRT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '1', '0', NONE, '0', '0'), -- lfs
|
||||
49 => (LDST, OP_FPLOAD, RA_OR_ZERO, CONST_SI, NONE, FRT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '1', '0', '1', '0', NONE, '0', '0'), -- lfsu
|
||||
42 => (LDST, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '1', '0', '0', '0', '0', NONE, '0', '0'), -- lha
|
||||
43 => (LDST, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '1', '1', '0', '0', '0', NONE, '0', '0'), -- lhau
|
||||
40 => (LDST, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- lhz
|
||||
@@ -93,8 +93,8 @@ architecture behaviour of decode1 is
|
||||
39 => (LDST, OP_STORE, RA_OR_ZERO, CONST_SI, RS, NONE, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '1', '0', '0', '0', NONE, '0', '0'), -- stbu
|
||||
54 => (LDST, OP_FPSTORE, RA_OR_ZERO, CONST_SI, FRS, NONE, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- stfd
|
||||
55 => (LDST, OP_FPSTORE, RA_OR_ZERO, CONST_SI, FRS, NONE, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '1', '0', '0', '0', NONE, '0', '0'), -- stfdu
|
||||
-- 52 => (LDST, OP_FPSTORE, RA_OR_ZERO, CONST_SI, FRS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '1', '0', NONE, '0', '0'), -- stfs
|
||||
-- 53 => (LDST, OP_FPSTORE, RA_OR_ZERO, CONST_SI, FRS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '1', '0', '1', '0', NONE, '0', '0'), -- stfsu
|
||||
52 => (LDST, OP_FPSTORE, RA_OR_ZERO, CONST_SI, FRS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '1', '0', NONE, '0', '0'), -- stfs
|
||||
53 => (LDST, OP_FPSTORE, RA_OR_ZERO, CONST_SI, FRS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '1', '0', '1', '0', NONE, '0', '0'), -- stfsu
|
||||
44 => (LDST, OP_STORE, RA_OR_ZERO, CONST_SI, RS, NONE, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- sth
|
||||
45 => (LDST, OP_STORE, RA_OR_ZERO, CONST_SI, RS, NONE, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '1', '0', '0', '0', NONE, '0', '0'), -- sthu
|
||||
36 => (LDST, OP_STORE, RA_OR_ZERO, CONST_SI, RS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- stw
|
||||
@@ -284,8 +284,8 @@ architecture behaviour of decode1 is
|
||||
2#1001110111# => (LDST, OP_FPLOAD, RA_OR_ZERO, RB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '1', '0', '0', '0', NONE, '0', '0'), -- lfdux
|
||||
2#1101010111# => (LDST, OP_FPLOAD, RA_OR_ZERO, RB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '1', '0', '0', '0', '0', NONE, '0', '0'), -- lfiwax
|
||||
2#1101110111# => (LDST, OP_FPLOAD, RA_OR_ZERO, RB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- lfiwzx
|
||||
-- 2#1000010111# => (LDST, OP_FPLOAD, RA_OR_ZERO, RB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '1', '0', NONE, '0', '0'), -- lfsx
|
||||
-- 2#1000110111# => (LDST, OP_FPLOAD, RA_OR_ZERO, RB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '1', '0', '1', '0', NONE, '0', '0'), -- lfsux
|
||||
2#1000010111# => (LDST, OP_FPLOAD, RA_OR_ZERO, RB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '1', '0', NONE, '0', '0'), -- lfsx
|
||||
2#1000110111# => (LDST, OP_FPLOAD, RA_OR_ZERO, RB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '1', '0', '1', '0', NONE, '0', '0'), -- lfsux
|
||||
2#0001110100# => (LDST, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '0', '1', '0', '0', NONE, '0', '0'), -- lharx
|
||||
2#0101110111# => (LDST, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '1', '1', '0', '0', '0', NONE, '0', '0'), -- lhaux
|
||||
2#0101010111# => (LDST, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '1', '0', '0', '0', '0', NONE, '0', '0'), -- lhax
|
||||
@@ -367,8 +367,8 @@ architecture behaviour of decode1 is
|
||||
2#1011010111# => (LDST, OP_FPSTORE, RA_OR_ZERO, RB, FRS, NONE, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- stfdx
|
||||
2#1011110111# => (LDST, OP_FPSTORE, RA_OR_ZERO, RB, FRS, NONE, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '1', '0', '0', '0', NONE, '0', '0'), -- stfdux
|
||||
2#1111010111# => (LDST, OP_FPSTORE, RA_OR_ZERO, RB, FRS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- stfiwx
|
||||
-- 2#1010010111# => (LDST, OP_FPSTORE, RA_OR_ZERO, RB, FRS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '1', '0', NONE, '0', '0'), -- stfsx
|
||||
-- 2#1010110111# => (LDST, OP_FPSTORE, RA_OR_ZERO, RB, FRS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '1', '0', '1', '0', NONE, '0', '0'), -- stfsux
|
||||
2#1010010111# => (LDST, OP_FPSTORE, RA_OR_ZERO, RB, FRS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '1', '0', NONE, '0', '0'), -- stfsx
|
||||
2#1010110111# => (LDST, OP_FPSTORE, RA_OR_ZERO, RB, FRS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '1', '0', '1', '0', NONE, '0', '0'), -- stfsux
|
||||
2#1110010110# => (LDST, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is2B, '1', '0', '0', '0', '0', '0', NONE, '0', '0'), -- sthbrx
|
||||
2#1110110101# => (LDST, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- sthcix
|
||||
2#1011010110# => (LDST, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '0', '1', '0', '0', ONE, '0', '0'), -- sthcx
|
||||
|
||||
@@ -1259,6 +1259,7 @@ begin
|
||||
lv.virt_mode := ctrl.msr(MSR_DR);
|
||||
lv.priv_mode := not ctrl.msr(MSR_PR);
|
||||
lv.mode_32bit := not ctrl.msr(MSR_SF);
|
||||
lv.is_32bit := e_in.is_32bit;
|
||||
|
||||
-- Update registers
|
||||
rin <= v;
|
||||
|
||||
53
helpers.vhdl
53
helpers.vhdl
@@ -25,6 +25,10 @@ package helpers is
|
||||
function byte_reverse(val: std_ulogic_vector(63 downto 0); size: integer) return std_ulogic_vector;
|
||||
|
||||
function sign_extend(val: std_ulogic_vector(63 downto 0); size: natural) return std_ulogic_vector;
|
||||
|
||||
function bit_reverse(a: std_ulogic_vector) return std_ulogic_vector;
|
||||
function bit_number(a: std_ulogic_vector(63 downto 0)) return std_ulogic_vector;
|
||||
function count_left_zeroes(val: std_ulogic_vector) return std_ulogic_vector;
|
||||
end package helpers;
|
||||
|
||||
package body helpers is
|
||||
@@ -206,4 +210,53 @@ package body helpers is
|
||||
return std_ulogic_vector(ret);
|
||||
|
||||
end;
|
||||
|
||||
-- Reverse the order of bits in a word
|
||||
function bit_reverse(a: std_ulogic_vector) return std_ulogic_vector is
|
||||
variable ret: std_ulogic_vector(a'left downto a'right);
|
||||
begin
|
||||
for i in a'right to a'left loop
|
||||
ret(a'left + a'right - i) := a(i);
|
||||
end loop;
|
||||
return ret;
|
||||
end;
|
||||
|
||||
-- If there is only one bit set in a doubleword, return its bit number
|
||||
-- (counting from the right). Each bit of the result is obtained by
|
||||
-- ORing together 32 bits of the input:
|
||||
-- bit 0 = a[1] or a[3] or a[5] or ...
|
||||
-- bit 1 = a[2] or a[3] or a[6] or a[7] or ...
|
||||
-- bit 2 = a[4..7] or a[12..15] or ...
|
||||
-- bit 5 = a[32..63] ORed together
|
||||
function bit_number(a: std_ulogic_vector(63 downto 0)) return std_ulogic_vector is
|
||||
variable ret: std_ulogic_vector(5 downto 0);
|
||||
variable stride: natural;
|
||||
variable bit: std_ulogic;
|
||||
variable k: natural;
|
||||
begin
|
||||
stride := 2;
|
||||
for i in 0 to 5 loop
|
||||
bit := '0';
|
||||
for j in 0 to (64 / stride) - 1 loop
|
||||
k := j * stride;
|
||||
bit := bit or (or a(k + stride - 1 downto k + (stride / 2)));
|
||||
end loop;
|
||||
ret(i) := bit;
|
||||
stride := stride * 2;
|
||||
end loop;
|
||||
return ret;
|
||||
end;
|
||||
|
||||
-- Count leading zeroes operation
|
||||
-- Assumes the value passed in is not zero (if it is, zero is returned)
|
||||
function count_left_zeroes(val: std_ulogic_vector) return std_ulogic_vector is
|
||||
variable rev: std_ulogic_vector(val'left downto val'right);
|
||||
variable sum: std_ulogic_vector(val'left downto val'right);
|
||||
variable onehot: std_ulogic_vector(val'left downto val'right);
|
||||
begin
|
||||
rev := bit_reverse(val);
|
||||
sum := std_ulogic_vector(- signed(rev));
|
||||
onehot := sum and rev;
|
||||
return bit_number(std_ulogic_vector(resize(unsigned(onehot), 64)));
|
||||
end;
|
||||
end package body helpers;
|
||||
|
||||
210
loadstore1.vhdl
210
loadstore1.vhdl
@@ -45,10 +45,12 @@ architecture behave of loadstore1 is
|
||||
|
||||
-- State machine for unaligned loads/stores
|
||||
type state_t is (IDLE, -- ready for instruction
|
||||
FPR_CONV, -- converting double to float for store
|
||||
SECOND_REQ, -- send 2nd request of unaligned xfer
|
||||
ACK_WAIT, -- waiting for ack from dcache
|
||||
MMU_LOOKUP, -- waiting for MMU to look up translation
|
||||
TLBIE_WAIT, -- waiting for MMU to finish doing a tlbie
|
||||
FINISH_LFS, -- write back converted SP data for lfs*
|
||||
COMPLETE -- extra cycle to complete an operation
|
||||
);
|
||||
|
||||
@@ -89,6 +91,11 @@ architecture behave of loadstore1 is
|
||||
do_update : std_ulogic;
|
||||
extra_cycle : std_ulogic;
|
||||
mode_32bit : std_ulogic;
|
||||
load_sp : std_ulogic;
|
||||
ld_sp_data : std_ulogic_vector(31 downto 0);
|
||||
ld_sp_nz : std_ulogic;
|
||||
ld_sp_lz : std_ulogic_vector(5 downto 0);
|
||||
st_sp_data : std_ulogic_vector(31 downto 0);
|
||||
end record;
|
||||
|
||||
type byte_sel_t is array(0 to 7) of std_ulogic;
|
||||
@@ -98,6 +105,9 @@ architecture behave of loadstore1 is
|
||||
signal r, rin : reg_stage_t;
|
||||
signal lsu_sum : std_ulogic_vector(63 downto 0);
|
||||
|
||||
signal store_sp_data : std_ulogic_vector(31 downto 0);
|
||||
signal load_dp_data : std_ulogic_vector(63 downto 0);
|
||||
|
||||
-- Generate byte enables from sizes
|
||||
function length_to_sel(length : in std_logic_vector(3 downto 0)) return std_ulogic_vector is
|
||||
begin
|
||||
@@ -128,6 +138,72 @@ architecture behave of loadstore1 is
|
||||
to_integer(unsigned(address))));
|
||||
end function xfer_data_sel;
|
||||
|
||||
-- 23-bit right shifter for DP -> SP float conversions
|
||||
function shifter_23r(frac: std_ulogic_vector(22 downto 0); shift: unsigned(4 downto 0))
|
||||
return std_ulogic_vector is
|
||||
variable fs1 : std_ulogic_vector(22 downto 0);
|
||||
variable fs2 : std_ulogic_vector(22 downto 0);
|
||||
begin
|
||||
case shift(1 downto 0) is
|
||||
when "00" =>
|
||||
fs1 := frac;
|
||||
when "01" =>
|
||||
fs1 := '0' & frac(22 downto 1);
|
||||
when "10" =>
|
||||
fs1 := "00" & frac(22 downto 2);
|
||||
when others =>
|
||||
fs1 := "000" & frac(22 downto 3);
|
||||
end case;
|
||||
case shift(4 downto 2) is
|
||||
when "000" =>
|
||||
fs2 := fs1;
|
||||
when "001" =>
|
||||
fs2 := x"0" & fs1(22 downto 4);
|
||||
when "010" =>
|
||||
fs2 := x"00" & fs1(22 downto 8);
|
||||
when "011" =>
|
||||
fs2 := x"000" & fs1(22 downto 12);
|
||||
when "100" =>
|
||||
fs2 := x"0000" & fs1(22 downto 16);
|
||||
when others =>
|
||||
fs2 := x"00000" & fs1(22 downto 20);
|
||||
end case;
|
||||
return fs2;
|
||||
end;
|
||||
|
||||
-- 23-bit left shifter for SP -> DP float conversions
|
||||
function shifter_23l(frac: std_ulogic_vector(22 downto 0); shift: unsigned(4 downto 0))
|
||||
return std_ulogic_vector is
|
||||
variable fs1 : std_ulogic_vector(22 downto 0);
|
||||
variable fs2 : std_ulogic_vector(22 downto 0);
|
||||
begin
|
||||
case shift(1 downto 0) is
|
||||
when "00" =>
|
||||
fs1 := frac;
|
||||
when "01" =>
|
||||
fs1 := frac(21 downto 0) & '0';
|
||||
when "10" =>
|
||||
fs1 := frac(20 downto 0) & "00";
|
||||
when others =>
|
||||
fs1 := frac(19 downto 0) & "000";
|
||||
end case;
|
||||
case shift(4 downto 2) is
|
||||
when "000" =>
|
||||
fs2 := fs1;
|
||||
when "001" =>
|
||||
fs2 := fs1(18 downto 0) & x"0" ;
|
||||
when "010" =>
|
||||
fs2 := fs1(14 downto 0) & x"00";
|
||||
when "011" =>
|
||||
fs2 := fs1(10 downto 0) & x"000";
|
||||
when "100" =>
|
||||
fs2 := fs1(6 downto 0) & x"0000";
|
||||
when others =>
|
||||
fs2 := fs1(2 downto 0) & x"00000";
|
||||
end case;
|
||||
return fs2;
|
||||
end;
|
||||
|
||||
begin
|
||||
-- Calculate the address in the first cycle
|
||||
lsu_sum <= std_ulogic_vector(unsigned(l_in.addr1) + unsigned(l_in.addr2)) when l_in.valid = '1' else (others => '0');
|
||||
@@ -145,6 +221,59 @@ begin
|
||||
end if;
|
||||
end process;
|
||||
|
||||
ls_fp_conv: if HAS_FPU generate
|
||||
-- Convert DP data to SP for stfs
|
||||
dp_to_sp: process(all)
|
||||
variable exp : unsigned(10 downto 0);
|
||||
variable frac : std_ulogic_vector(22 downto 0);
|
||||
variable shift : unsigned(4 downto 0);
|
||||
begin
|
||||
store_sp_data(31) <= l_in.data(63);
|
||||
store_sp_data(30 downto 0) <= (others => '0');
|
||||
exp := unsigned(l_in.data(62 downto 52));
|
||||
if exp > 896 then
|
||||
store_sp_data(30) <= l_in.data(62);
|
||||
store_sp_data(29 downto 0) <= l_in.data(58 downto 29);
|
||||
elsif exp >= 874 then
|
||||
-- denormalization required
|
||||
frac := '1' & l_in.data(51 downto 30);
|
||||
shift := 0 - exp(4 downto 0);
|
||||
store_sp_data(22 downto 0) <= shifter_23r(frac, shift);
|
||||
end if;
|
||||
end process;
|
||||
|
||||
-- Convert SP data to DP for lfs
|
||||
sp_to_dp: process(all)
|
||||
variable exp : unsigned(7 downto 0);
|
||||
variable exp_dp : unsigned(10 downto 0);
|
||||
variable exp_nz : std_ulogic;
|
||||
variable exp_ao : std_ulogic;
|
||||
variable frac : std_ulogic_vector(22 downto 0);
|
||||
variable frac_shift : unsigned(4 downto 0);
|
||||
begin
|
||||
frac := r.ld_sp_data(22 downto 0);
|
||||
exp := unsigned(r.ld_sp_data(30 downto 23));
|
||||
exp_nz := or (r.ld_sp_data(30 downto 23));
|
||||
exp_ao := and (r.ld_sp_data(30 downto 23));
|
||||
frac_shift := (others => '0');
|
||||
if exp_ao = '1' then
|
||||
exp_dp := to_unsigned(2047, 11); -- infinity or NaN
|
||||
elsif exp_nz = '1' then
|
||||
exp_dp := 896 + resize(exp, 11); -- finite normalized value
|
||||
elsif r.ld_sp_nz = '0' then
|
||||
exp_dp := to_unsigned(0, 11); -- zero
|
||||
else
|
||||
-- denormalized SP operand, need to normalize
|
||||
exp_dp := 896 - resize(unsigned(r.ld_sp_lz), 11);
|
||||
frac_shift := unsigned(r.ld_sp_lz(4 downto 0)) + 1;
|
||||
end if;
|
||||
load_dp_data(63) <= r.ld_sp_data(31);
|
||||
load_dp_data(62 downto 52) <= std_ulogic_vector(exp_dp);
|
||||
load_dp_data(51 downto 29) <= shifter_23l(frac, frac_shift);
|
||||
load_dp_data(28 downto 0) <= (others => '0');
|
||||
end process;
|
||||
end generate;
|
||||
|
||||
loadstore1_1: process(all)
|
||||
variable v : reg_stage_t;
|
||||
variable brev_lenm1 : unsigned(2 downto 0);
|
||||
@@ -165,6 +294,9 @@ begin
|
||||
variable data_permuted : std_ulogic_vector(63 downto 0);
|
||||
variable data_trimmed : std_ulogic_vector(63 downto 0);
|
||||
variable store_data : std_ulogic_vector(63 downto 0);
|
||||
variable data_in : std_ulogic_vector(63 downto 0);
|
||||
variable byte_rev : std_ulogic;
|
||||
variable length : std_ulogic_vector(3 downto 0);
|
||||
variable use_second : byte_sel_t;
|
||||
variable trim_ctl : trim_ctl_t;
|
||||
variable negative : std_ulogic;
|
||||
@@ -176,6 +308,8 @@ begin
|
||||
variable mmu_mtspr : std_ulogic;
|
||||
variable itlb_fault : std_ulogic;
|
||||
variable misaligned : std_ulogic;
|
||||
variable fp_reg_conv : std_ulogic;
|
||||
variable lfs_done : std_ulogic;
|
||||
begin
|
||||
v := r;
|
||||
req := '0';
|
||||
@@ -185,8 +319,10 @@ begin
|
||||
sprn := std_ulogic_vector(to_unsigned(decode_spr_num(l_in.insn), 10));
|
||||
dsisr := (others => '0');
|
||||
mmureq := '0';
|
||||
fp_reg_conv := '0';
|
||||
|
||||
write_enable := '0';
|
||||
lfs_done := '0';
|
||||
|
||||
do_update := r.do_update;
|
||||
v.do_update := '0';
|
||||
@@ -245,19 +381,38 @@ begin
|
||||
end case;
|
||||
end loop;
|
||||
|
||||
-- Byte reversing and rotating for stores
|
||||
-- Done in the first cycle (when l_in.valid = 1)
|
||||
if HAS_FPU then
|
||||
-- Single-precision FP conversion
|
||||
v.st_sp_data := store_sp_data;
|
||||
v.ld_sp_data := data_trimmed(31 downto 0);
|
||||
v.ld_sp_nz := or (data_trimmed(22 downto 0));
|
||||
v.ld_sp_lz := count_left_zeroes(data_trimmed(22 downto 0));
|
||||
end if;
|
||||
|
||||
-- Byte reversing and rotating for stores.
|
||||
-- Done in the first cycle (when l_in.valid = 1) for integer stores
|
||||
-- and DP float stores, and in the second cycle for SP float stores.
|
||||
store_data := r.store_data;
|
||||
if l_in.valid = '1' then
|
||||
byte_offset := unsigned(lsu_sum(2 downto 0));
|
||||
if l_in.valid = '1' or (HAS_FPU and r.state = FPR_CONV) then
|
||||
if HAS_FPU and r.state = FPR_CONV then
|
||||
data_in := x"00000000" & r.st_sp_data;
|
||||
byte_offset := unsigned(r.addr(2 downto 0));
|
||||
byte_rev := r.byte_reverse;
|
||||
length := r.length;
|
||||
else
|
||||
data_in := l_in.data;
|
||||
byte_offset := unsigned(lsu_sum(2 downto 0));
|
||||
byte_rev := l_in.byte_reverse;
|
||||
length := l_in.length;
|
||||
end if;
|
||||
brev_lenm1 := "000";
|
||||
if l_in.byte_reverse = '1' then
|
||||
brev_lenm1 := unsigned(l_in.length(2 downto 0)) - 1;
|
||||
if byte_rev = '1' then
|
||||
brev_lenm1 := unsigned(length(2 downto 0)) - 1;
|
||||
end if;
|
||||
for i in 0 to 7 loop
|
||||
k := (to_unsigned(i, 3) - byte_offset) xor brev_lenm1;
|
||||
j := to_integer(k) * 8;
|
||||
store_data(i * 8 + 7 downto i * 8) := l_in.data(j + 7 downto j);
|
||||
store_data(i * 8 + 7 downto i * 8) := data_in(j + 7 downto j);
|
||||
end loop;
|
||||
end if;
|
||||
v.store_data := store_data;
|
||||
@@ -292,6 +447,14 @@ begin
|
||||
case r.state is
|
||||
when IDLE =>
|
||||
|
||||
when FPR_CONV =>
|
||||
req := '1';
|
||||
if r.second_bytes /= "00000000" then
|
||||
v.state := SECOND_REQ;
|
||||
else
|
||||
v.state := ACK_WAIT;
|
||||
end if;
|
||||
|
||||
when SECOND_REQ =>
|
||||
req := '1';
|
||||
v.state := ACK_WAIT;
|
||||
@@ -323,8 +486,13 @@ begin
|
||||
v.load_data := data_permuted;
|
||||
end if;
|
||||
else
|
||||
write_enable := r.load;
|
||||
if r.extra_cycle = '1' then
|
||||
write_enable := r.load and not r.load_sp;
|
||||
if HAS_FPU and r.load_sp = '1' then
|
||||
-- SP to DP conversion takes a cycle
|
||||
-- Write back rA update in this cycle if needed
|
||||
do_update := r.update;
|
||||
v.state := FINISH_LFS;
|
||||
elsif r.extra_cycle = '1' then
|
||||
-- loads with rA update need an extra cycle
|
||||
v.state := COMPLETE;
|
||||
v.do_update := r.update;
|
||||
@@ -362,6 +530,9 @@ begin
|
||||
|
||||
when TLBIE_WAIT =>
|
||||
|
||||
when FINISH_LFS =>
|
||||
lfs_done := '1';
|
||||
|
||||
when COMPLETE =>
|
||||
exception := r.align_intr;
|
||||
|
||||
@@ -395,6 +566,7 @@ begin
|
||||
v.nc := l_in.ci;
|
||||
v.virt_mode := l_in.virt_mode;
|
||||
v.priv_mode := l_in.priv_mode;
|
||||
v.load_sp := '0';
|
||||
v.wait_dcache := '0';
|
||||
v.wait_mmu := '0';
|
||||
v.do_update := '0';
|
||||
@@ -436,14 +608,24 @@ begin
|
||||
v.dcbz := '1';
|
||||
when OP_FPSTORE =>
|
||||
if HAS_FPU then
|
||||
req := '1';
|
||||
if l_in.is_32bit = '1' then
|
||||
v.state := FPR_CONV;
|
||||
fp_reg_conv := '1';
|
||||
else
|
||||
req := '1';
|
||||
end if;
|
||||
end if;
|
||||
when OP_FPLOAD =>
|
||||
if HAS_FPU then
|
||||
v.load := '1';
|
||||
req := '1';
|
||||
-- Allow an extra cycle for RA update
|
||||
-- Allow an extra cycle for SP->DP precision conversion
|
||||
-- or RA update
|
||||
v.extra_cycle := l_in.update;
|
||||
if l_in.is_32bit = '1' then
|
||||
v.load_sp := '1';
|
||||
v.extra_cycle := '1';
|
||||
end if;
|
||||
end if;
|
||||
when OP_TLBIE =>
|
||||
mmureq := '1';
|
||||
@@ -500,7 +682,7 @@ begin
|
||||
end if;
|
||||
end if;
|
||||
|
||||
v.busy := req or mmureq or mmu_mtspr;
|
||||
v.busy := req or mmureq or mmu_mtspr or fp_reg_conv;
|
||||
end if;
|
||||
|
||||
-- Update outputs to dcache
|
||||
@@ -539,6 +721,10 @@ begin
|
||||
l_out.write_enable <= '1';
|
||||
l_out.write_reg <= gpr_to_gspr(r.update_reg);
|
||||
l_out.write_data <= r.addr;
|
||||
elsif lfs_done = '1' then
|
||||
l_out.write_enable <= '1';
|
||||
l_out.write_reg <= r.write_reg;
|
||||
l_out.write_data <= load_dp_data;
|
||||
else
|
||||
l_out.write_enable <= write_enable;
|
||||
l_out.write_reg <= r.write_reg;
|
||||
|
||||
Reference in New Issue
Block a user