mirror of
https://github.com/antonblanchard/microwatt.git
synced 2026-03-09 20:18:27 +00:00
execute1: Reduce width of the result mux to help timing
This reduces the number of different things that are assigned to the result variable. - The computations for the popcnt, prty, cmpb and exts instruction families are moved into the logical unit. - The result of mfspr from the slow SPRs is computed in 'spr_val' before being assigned to 'result'. - Writes to LR as a result of a blr or bclr instruction are done through the exc_write path to writeback. This eases timing considerably. Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
This commit is contained in:
@@ -82,8 +82,6 @@ architecture behaviour of execute1 is
|
||||
signal rotator_carry: std_ulogic;
|
||||
signal logical_result: std_ulogic_vector(63 downto 0);
|
||||
signal countzero_result: std_ulogic_vector(63 downto 0);
|
||||
signal popcnt_result: std_ulogic_vector(63 downto 0);
|
||||
signal parity_result: std_ulogic_vector(63 downto 0);
|
||||
|
||||
-- multiply signals
|
||||
signal x_to_multiply: Execute1ToMultiplyType;
|
||||
@@ -208,9 +206,7 @@ begin
|
||||
invert_in => e_in.invert_a,
|
||||
invert_out => e_in.invert_out,
|
||||
result => logical_result,
|
||||
datalen => e_in.data_len,
|
||||
popcnt => popcnt_result,
|
||||
parity => parity_result
|
||||
datalen => e_in.data_len
|
||||
);
|
||||
|
||||
countzero_0: entity work.zero_counter
|
||||
@@ -295,7 +291,6 @@ begin
|
||||
variable sign1, sign2 : std_ulogic;
|
||||
variable abs1, abs2 : signed(63 downto 0);
|
||||
variable overflow : std_ulogic;
|
||||
variable negative : std_ulogic;
|
||||
variable zerohi, zerolo : std_ulogic;
|
||||
variable msb_a, msb_b : std_ulogic;
|
||||
variable a_lt : std_ulogic;
|
||||
@@ -308,6 +303,7 @@ begin
|
||||
variable is_branch : std_ulogic;
|
||||
variable taken_branch : std_ulogic;
|
||||
variable abs_branch : std_ulogic;
|
||||
variable spr_val : std_ulogic_vector(63 downto 0);
|
||||
begin
|
||||
result := (others => '0');
|
||||
result_with_carry := (others => '0');
|
||||
@@ -627,7 +623,7 @@ begin
|
||||
end if;
|
||||
end if;
|
||||
end if;
|
||||
when OP_AND | OP_OR | OP_XOR =>
|
||||
when OP_AND | OP_OR | OP_XOR | OP_POPCNT | OP_PRTY | OP_CMPB | OP_EXTS =>
|
||||
result := logical_result;
|
||||
result_en := '1';
|
||||
when OP_B =>
|
||||
@@ -677,27 +673,10 @@ begin
|
||||
ctrl_tmp.msr(MSR_DR) <= '1';
|
||||
end if;
|
||||
|
||||
when OP_CMPB =>
|
||||
result := ppc_cmpb(c_in, b_in);
|
||||
result_en := '1';
|
||||
when OP_CNTZ =>
|
||||
v.e.valid := '0';
|
||||
v.cntz_in_progress := '1';
|
||||
v.busy := '1';
|
||||
when OP_EXTS =>
|
||||
-- note data_len is a 1-hot encoding
|
||||
negative := (e_in.data_len(0) and c_in(7)) or
|
||||
(e_in.data_len(1) and c_in(15)) or
|
||||
(e_in.data_len(2) and c_in(31));
|
||||
result := (others => negative);
|
||||
if e_in.data_len(2) = '1' then
|
||||
result(31 downto 16) := c_in(31 downto 16);
|
||||
end if;
|
||||
if e_in.data_len(2) = '1' or e_in.data_len(1) = '1' then
|
||||
result(15 downto 8) := c_in(15 downto 8);
|
||||
end if;
|
||||
result(7 downto 0) := c_in(7 downto 0);
|
||||
result_en := '1';
|
||||
when OP_ISEL =>
|
||||
crbit := to_integer(unsigned(insn_bc(e_in.insn)));
|
||||
if e_in.cr(31-crbit) = '1' then
|
||||
@@ -769,24 +748,25 @@ begin
|
||||
result(63-45) := v.e.xerc.ca32;
|
||||
end if;
|
||||
else
|
||||
spr_val := c_in;
|
||||
case decode_spr_num(e_in.insn) is
|
||||
when SPR_TB =>
|
||||
result := ctrl.tb;
|
||||
spr_val := ctrl.tb;
|
||||
when SPR_DEC =>
|
||||
result := ctrl.dec;
|
||||
spr_val := ctrl.dec;
|
||||
when 724 => -- LOG_ADDR SPR
|
||||
result := log_wr_addr & r.log_addr_spr;
|
||||
spr_val := log_wr_addr & r.log_addr_spr;
|
||||
when 725 => -- LOG_DATA SPR
|
||||
result := log_rd_data;
|
||||
spr_val := log_rd_data;
|
||||
v.log_addr_spr := std_ulogic_vector(unsigned(r.log_addr_spr) + 1);
|
||||
when others =>
|
||||
-- mfspr from unimplemented SPRs should be a nop in
|
||||
-- supervisor mode and a program interrupt for user mode
|
||||
result := c_in;
|
||||
if ctrl.msr(MSR_PR) = '1' then
|
||||
illegal := '1';
|
||||
end if;
|
||||
end case;
|
||||
result := spr_val;
|
||||
end if;
|
||||
when OP_MFCR =>
|
||||
if e_in.insn(20) = '0' then
|
||||
@@ -862,12 +842,6 @@ begin
|
||||
end if;
|
||||
end case;
|
||||
end if;
|
||||
when OP_POPCNT =>
|
||||
result := popcnt_result;
|
||||
result_en := '1';
|
||||
when OP_PRTY =>
|
||||
result := parity_result;
|
||||
result_en := '1';
|
||||
when OP_RLC | OP_RLCL | OP_RLCR | OP_SHL | OP_SHR | OP_EXTSWSLI =>
|
||||
result := rotator_result;
|
||||
if e_in.output_carry = '1' then
|
||||
@@ -917,12 +891,14 @@ begin
|
||||
|
||||
-- Update LR on the next cycle after a branch link
|
||||
-- If we're not writing back anything else, we can write back LR
|
||||
-- this cycle, otherwise we take an extra cycle.
|
||||
-- this cycle, otherwise we take an extra cycle. We use the
|
||||
-- exc_write path since next_nia is written through that path
|
||||
-- in other places.
|
||||
if e_in.lr = '1' then
|
||||
if result_en = '0' then
|
||||
result_en := '1';
|
||||
result := next_nia;
|
||||
v.e.write_reg := fast_spr_num(SPR_LR);
|
||||
v.e.exc_write_enable := '1';
|
||||
v.e.exc_write_data := next_nia;
|
||||
v.e.exc_write_reg := fast_spr_num(SPR_LR);
|
||||
else
|
||||
v.lr_update := '1';
|
||||
v.next_lr := next_nia;
|
||||
@@ -939,9 +915,9 @@ begin
|
||||
end if;
|
||||
|
||||
elsif r.lr_update = '1' then
|
||||
result_en := '1';
|
||||
result := r.next_lr;
|
||||
v.e.write_reg := fast_spr_num(SPR_LR);
|
||||
v.e.exc_write_enable := '1';
|
||||
v.e.exc_write_data := r.next_lr;
|
||||
v.e.exc_write_reg := fast_spr_num(SPR_LR);
|
||||
v.e.valid := '1';
|
||||
elsif r.cntz_in_progress = '1' then
|
||||
-- cnt[lt]z always takes two cycles
|
||||
|
||||
66
logical.vhdl
66
logical.vhdl
@@ -4,6 +4,7 @@ use ieee.numeric_std.all;
|
||||
|
||||
library work;
|
||||
use work.decode_types.all;
|
||||
use work.ppc_fx_insns.all;
|
||||
|
||||
entity logical is
|
||||
port (
|
||||
@@ -13,9 +14,7 @@ entity logical is
|
||||
invert_in : in std_ulogic;
|
||||
invert_out : in std_ulogic;
|
||||
result : out std_ulogic_vector(63 downto 0);
|
||||
datalen : in std_logic_vector(3 downto 0);
|
||||
popcnt : out std_ulogic_vector(63 downto 0);
|
||||
parity : out std_ulogic_vector(63 downto 0)
|
||||
datalen : in std_logic_vector(3 downto 0)
|
||||
);
|
||||
end entity logical;
|
||||
|
||||
@@ -34,30 +33,14 @@ architecture behaviour of logical is
|
||||
type sixbit2 is array(0 to 1) of sixbit;
|
||||
signal pc32 : sixbit2;
|
||||
signal par0, par1 : std_ulogic;
|
||||
signal popcnt : std_ulogic_vector(63 downto 0);
|
||||
signal parity : std_ulogic_vector(63 downto 0);
|
||||
|
||||
begin
|
||||
logical_0: process(all)
|
||||
variable rb_adj, tmp : std_ulogic_vector(63 downto 0);
|
||||
variable negative : std_ulogic;
|
||||
begin
|
||||
rb_adj := rb;
|
||||
if invert_in = '1' then
|
||||
rb_adj := not rb;
|
||||
end if;
|
||||
|
||||
case op is
|
||||
when OP_AND =>
|
||||
tmp := rs and rb_adj;
|
||||
when OP_OR =>
|
||||
tmp := rs or rb_adj;
|
||||
when others =>
|
||||
tmp := rs xor rb_adj;
|
||||
end case;
|
||||
|
||||
result <= tmp;
|
||||
if invert_out = '1' then
|
||||
result <= not tmp;
|
||||
end if;
|
||||
|
||||
-- population counts
|
||||
for i in 0 to 31 loop
|
||||
pc2(i) <= unsigned("0" & rs(i * 2 downto i * 2)) + unsigned("0" & rs(i * 2 + 1 downto i * 2 + 1));
|
||||
@@ -98,5 +81,44 @@ begin
|
||||
parity(32) <= par1;
|
||||
end if;
|
||||
|
||||
rb_adj := rb;
|
||||
if invert_in = '1' then
|
||||
rb_adj := not rb;
|
||||
end if;
|
||||
|
||||
case op is
|
||||
when OP_AND =>
|
||||
tmp := rs and rb_adj;
|
||||
when OP_OR =>
|
||||
tmp := rs or rb_adj;
|
||||
when OP_XOR =>
|
||||
tmp := rs xor rb_adj;
|
||||
when OP_POPCNT =>
|
||||
tmp := popcnt;
|
||||
when OP_PRTY =>
|
||||
tmp := parity;
|
||||
when OP_CMPB =>
|
||||
tmp := ppc_cmpb(rs, rb);
|
||||
when others =>
|
||||
-- EXTS
|
||||
-- note datalen is a 1-hot encoding
|
||||
negative := (datalen(0) and rs(7)) or
|
||||
(datalen(1) and rs(15)) or
|
||||
(datalen(2) and rs(31));
|
||||
tmp := (others => negative);
|
||||
if datalen(2) = '1' then
|
||||
tmp(31 downto 16) := rs(31 downto 16);
|
||||
end if;
|
||||
if datalen(2) = '1' or datalen(1) = '1' then
|
||||
tmp(15 downto 8) := rs(15 downto 8);
|
||||
end if;
|
||||
tmp(7 downto 0) := rs(7 downto 0);
|
||||
end case;
|
||||
|
||||
if invert_out = '1' then
|
||||
tmp := not tmp;
|
||||
end if;
|
||||
result <= tmp;
|
||||
|
||||
end process;
|
||||
end behaviour;
|
||||
|
||||
Reference in New Issue
Block a user