From 571c8f96a501274168b10dcdba334dd92e83196d Mon Sep 17 00:00:00 2001 From: Seth Morabito Date: Sat, 29 Jun 2019 08:44:14 -0700 Subject: [PATCH] 3b2: WE32106 Math Acceleration Unit (MAU) This change adds support for the WE32106 Math Acceleration Unit (MAU). The WE32106 is an IEEE-754 1985 compatible floating point math acceleration unit that was an optional component on the 3B2/310 and 3B2/400. The MAU is implemented using software floating point routines. As always, there may be bugs, but the MAU currently passes extensive floating point tests with exactly the same results as a real 3B2/400 equipped with a physical MAU, so I hope these are few. --- 3B2/3b2_cpu.c | 92 +- 3B2/3b2_cpu.h | 3 +- 3B2/3b2_defs.h | 9 + 3B2/3b2_mau.c | 3470 +++++++++++++++++++++++++++++ 3B2/3b2_mau.h | 375 ++++ 3B2/3b2_mmu.c | 36 +- 3B2/3b2_mmu.h | 1 + 3B2/3b2_sys.c | 3 + Visual Studio Projects/3B2.vcproj | 8 + descrip.mms | 4 +- makefile | 3 +- 11 files changed, 3964 insertions(+), 40 deletions(-) create mode 100644 3B2/3b2_mau.c create mode 100644 3B2/3b2_mau.h diff --git a/3B2/3b2_cpu.c b/3B2/3b2_cpu.c index b4de1283..83e2a301 100644 --- a/3B2/3b2_cpu.c +++ b/3B2/3b2_cpu.c @@ -178,9 +178,6 @@ static DEBTAB cpu_deb_tab[] = { UNIT cpu_unit = { UDATA (NULL, UNIT_FIX|UNIT_BINK|UNIT_IDLE, MAXMEMSIZE) }; -#define UNIT_V_EXHALT (UNIT_V_UF + 0) /* halt to console */ -#define UNIT_EXHALT (1u << UNIT_V_EXHALT) - /* * TODO: This works fine for now, but the moment we want to emulate * SCSI (0x0100) or EPORTS (0x0102) we're in trouble! @@ -236,7 +233,11 @@ DEVICE cpu_dev = { 0, /* Debug control flags */ cpu_deb_tab, /* Debug flag names */ &cpu_set_size, /* Memory size change */ - NULL /* Logical names */ + NULL, /* Logical names */ + NULL, /* Help routine */ + NULL, /* Attach Help Routine */ + NULL, /* Help Context */ + &cpu_description /* Device Description */ }; #define HWORD_OP_COUNT 11 @@ -259,12 +260,12 @@ mnemonic hword_ops[HWORD_OP_COUNT] = { mnemonic ops[256] = { {0x00, 0, OP_NONE, NA, "halt", -1, -1, -1, -1}, {0x01, -1, OP_NONE, NA, "???", -1, -1, -1, -1}, - {0x02, 2, OP_COPR, WD, "SPOPRD", -1, -1, -1, -1}, - {0x03, 3, OP_COPR, WD, "SPOPD2", -1, -1, -1, -1}, + {0x02, 2, OP_COPR, WD, "SPOPRD", 1, -1, -1, -1}, + {0x03, 3, OP_COPR, WD, "SPOPD2", 1, -1, -1, 2}, {0x04, 2, OP_DESC, WD, "MOVAW", 0, -1, -1, 1}, {0x05, -1, OP_NONE, NA, "???", -1, -1, -1, -1}, - {0x06, 2, OP_COPR, WD, "SPOPRT", -1, -1, -1, -1}, - {0x07, 3, OP_COPR, WD, "SPOPT2", -1, -1, -1, -1}, + {0x06, 2, OP_COPR, WD, "SPOPRT", 1, -1, -1, -1}, + {0x07, 3, OP_COPR, WD, "SPOPT2", 1, -1, -1, 2}, {0x08, 0, OP_NONE, NA, "RET", -1, -1, -1, -1}, {0x09, -1, OP_NONE, NA, "???", -1, -1, -1, -1}, {0x0a, -1, OP_NONE, NA, "???", -1, -1, -1, -1}, @@ -276,11 +277,11 @@ mnemonic ops[256] = { {0x10, 1, OP_DESC, WD, "SAVE", 0, -1, -1, -1}, {0x11, -1, OP_NONE, NA, "???", -1, -1, -1, -1}, {0x12, -1, OP_NONE, NA, "???", -1, -1, -1, -1}, - {0x13, 2, OP_COPR, WD, "SPOPWD", -1, -1, -1, -1}, + {0x13, 2, OP_COPR, WD, "SPOPWD", -1, -1, -1, 1}, {0x14, 1, OP_BYTE, NA, "EXTOP", -1, -1, -1, -1}, {0x15, -1, OP_NONE, NA, "???", -1, -1, -1, -1}, {0x16, -1, OP_NONE, NA, "???", -1, -1, -1, -1}, - {0x17, 2, OP_COPR, WD, "SPOPWT", -1, -1, -1, -1}, + {0x17, 2, OP_COPR, WD, "SPOPWT", -1, -1, -1, 1}, {0x18, 1, OP_DESC, WD, "RESTORE", 0, -1, -1, -1}, {0x19, -1, OP_NONE, NA, "???", -1, -1, -1, -1}, {0x1a, -1, OP_NONE, NA, "???", -1, -1, -1, -1}, @@ -291,8 +292,8 @@ mnemonic ops[256] = { {0x1f, 1, OP_DESC, BT, "SWAPBI", -1, -1, -1, 0}, /* 3-122 252 */ {0x20, 1, OP_DESC, WD, "POPW", -1, -1, -1, 0}, {0x21, -1, OP_NONE, NA, "???", -1, -1, -1, -1}, - {0x22, 2, OP_COPR, WD, "SPOPRS", -1, -1, -1, -1}, - {0x23, 3, OP_COPR, WD, "SPOPS2", -1, -1, -1, -1}, + {0x22, 2, OP_COPR, WD, "SPOPRS", 1, -1, -1, -1}, + {0x23, 3, OP_COPR, WD, "SPOPS2", 1, -1, -1, 2}, {0x24, 1, OP_DESC, NA, "JMP", -1, -1, -1, 0}, {0x25, -1, OP_NONE, NA, "???", -1, -1, -1, -1}, {0x26, -1, OP_NONE, NA, "???", -1, -1, -1, -1}, @@ -308,7 +309,7 @@ mnemonic ops[256] = { {0x30, -1, OP_NONE, NA, "???", -1, -1, -1, -1}, /* Two-byte instructions */ {0x31, -1, OP_NONE, NA, "???", -1, -1, -1, -1}, {0x32, 1, OP_COPR, WD, "SPOP", -1, -1, -1, -1}, - {0x33, 2, OP_COPR, WD, "SPOPWS", -1, -1, -1, -1}, + {0x33, 2, OP_COPR, WD, "SPOPWS", -1, -1, -1, 1}, {0x34, 1, OP_DESC, WD, "JSB", -1, -1, -1, 0}, {0x35, -1, OP_NONE, NA, "???", -1, -1, -1, -1}, {0x36, 1, OP_HALF, NA, "BSBH", -1, -1, -1, 0}, @@ -1709,6 +1710,9 @@ t_stat sim_instr(void) /* Generic index */ uint32 i; + /* Used by oprocessor instructions */ + uint32 coprocessor_word; + operand *src1, *src2, *src3, *dst; stop_reason = 0; @@ -1868,6 +1872,11 @@ t_stat sim_instr(void) * Operate on the decoded instruction. */ + /* Special case for coprocessor instructions */ + if (cpu_instr->mn->mode == OP_COPR) { + coprocessor_word = cpu_instr->operands[0].embedded.w; + } + /* Get the operands */ if (cpu_instr->mn->src_op1 >= 0) { src1 = &cpu_instr->operands[cpu_instr->mn->src_op1]; @@ -2834,18 +2843,40 @@ t_stat sim_instr(void) pc_incr = 0; break; case SPOP: + sim_debug(TRACE_DBG, &cpu_dev, "SPOP\n"); + /* Memory fault is signaled when no support processor is + active */ + if (mau_broadcast(coprocessor_word, 0, 0) != SCPE_OK) { + cpu_abort(NORMAL_EXCEPTION, EXTERNAL_MEMORY_FAULT); + } + break; case SPOPD2: case SPOPS2: case SPOPT2: + sim_debug(TRACE_DBG, &cpu_dev, "SPOP{D|S|T}2\n"); + a = cpu_effective_address(src1); + b = cpu_effective_address(dst); + if (mau_broadcast(coprocessor_word, a, b) != SCPE_OK) { + cpu_abort(NORMAL_EXCEPTION, EXTERNAL_MEMORY_FAULT); + } + break; case SPOPRD: case SPOPRS: case SPOPRT: + sim_debug(TRACE_DBG, &cpu_dev, "SPOPR{D|S|T}\n"); + a = cpu_effective_address(src1); + if (mau_broadcast(coprocessor_word, a, 0) != SCPE_OK) { + cpu_abort(NORMAL_EXCEPTION, EXTERNAL_MEMORY_FAULT); + } + break; case SPOPWD: case SPOPWS: case SPOPWT: - /* Memory fault is signaled when no support processor is - active */ - cpu_abort(NORMAL_EXCEPTION, EXTERNAL_MEMORY_FAULT); + sim_debug(TRACE_DBG, &cpu_dev, "SPOPW{D|S|T}\n"); + a = cpu_effective_address(dst); + if (mau_broadcast(coprocessor_word, 0, a) != SCPE_OK) { + cpu_abort(NORMAL_EXCEPTION, EXTERNAL_MEMORY_FAULT); + } break; case SUBW2: case SUBH2: @@ -3682,20 +3713,19 @@ static SIM_INLINE void add(t_uint64 a, t_uint64 b, operand *dst) void cpu_abort(uint8 et, uint8 isc) { /* We don't trap Integer Overflow if the OE bit is not set */ - if ((R[NUM_PSW] & PSW_OE_MASK) || isc != INTEGER_OVERFLOW) { - R[NUM_PSW] &= ~(PSW_ET_MASK); /* Clear ET */ - R[NUM_PSW] &= ~(PSW_ISC_MASK); /* Clear ISC */ - R[NUM_PSW] |= et; /* Set ET */ - R[NUM_PSW] |= (uint32) (isc << PSW_ISC); /* Set ISC */ - - /* TODO: We no longer use ABORT_TRAP or ABORT_EXC, so - * it would be nice to clean this up. */ - if (et == 3 && (isc == BREAKPOINT_TRAP || - isc == INTEGER_OVERFLOW || - isc == TRACE_TRAP)) { - longjmp(save_env, ABORT_TRAP); - } else { - longjmp(save_env, ABORT_EXC); - } + if ((R[NUM_PSW] & PSW_OE_MASK) == 0 && isc == INTEGER_OVERFLOW) { + return; } + + R[NUM_PSW] &= ~(PSW_ET_MASK); /* Clear ET */ + R[NUM_PSW] &= ~(PSW_ISC_MASK); /* Clear ISC */ + R[NUM_PSW] |= et; /* Set ET */ + R[NUM_PSW] |= (uint32) (isc << PSW_ISC); /* Set ISC */ + + longjmp(save_env, ABORT_EXC); +} + +CONST char *cpu_description(DEVICE *dptr) +{ + return "WE32100"; } diff --git a/3B2/3b2_cpu.h b/3B2/3b2_cpu.h index b4ffa25c..cd84c52e 100644 --- a/3B2/3b2_cpu.h +++ b/3B2/3b2_cpu.h @@ -377,7 +377,7 @@ typedef struct _operand { * An inst is a combination of a decoded instruction and * 0 to 4 operands. Also used for history record keeping. */ -typedef struct _instr { +typedef struct { mnemonic *mn; uint32 psw; uint32 sp; @@ -401,6 +401,7 @@ t_stat cpu_show_cio(FILE *st, UNIT *uptr, int32 val, CONST void *desc); t_stat cpu_set_halt(UNIT *uptr, int32 val, char *cptr, void *desc); t_stat cpu_clear_halt(UNIT *uptr, int32 val, char *cptr, void *desc); t_stat cpu_boot(int32 unit_num, DEVICE *dptr); +CONST char *cpu_description(DEVICE *dptr); t_bool cpu_is_pc_a_subroutine_call (t_addr **ret_addrs); diff --git a/3B2/3b2_defs.h b/3B2/3b2_defs.h index 375099d9..4bf9dd08 100644 --- a/3B2/3b2_defs.h +++ b/3B2/3b2_defs.h @@ -60,6 +60,9 @@ noret __libc_longjmp (jmp_buf buf, int val); #define UNUSED(x) ((void)((x))) #endif +#define UNIT_V_EXHALT (UNIT_V_UF + 0) +#define UNIT_EXHALT (1u << UNIT_V_EXHALT) + /* -t flag: Translate a virtual address */ #define EX_T_FLAG 1 << 19 /* -v flag for examine routine */ @@ -163,6 +166,7 @@ noret __libc_longjmp (jmp_buf buf, int val); #define PKT_DBG 0x0400 #define ERR_MSG 0x0800 #define CACHE_DBG 0x1000 +#define DECODE_DBG 0x2000 /* Data types operated on by instructions. NB: These integer values have meaning when decoding instructions, so this is not just an @@ -405,6 +409,11 @@ extern uint32 read_w(uint32 va, uint8 acc); extern void write_b(uint32 va, uint8 val); extern void write_h(uint32 va, uint16 val); extern void write_w(uint32 va, uint32 val); +extern void pwrite_w(uint32 pa, uint32 val); +extern uint32 pread_w(uint32 pa); + +/* global symbols from the MAU */ +extern t_stat mau_broadcast(uint32 cmd, uint32 src, uint32 dst); /* Globally scoped CPU functions */ extern void cpu_abort(uint8 et, uint8 isc); diff --git a/3B2/3b2_mau.c b/3B2/3b2_mau.c new file mode 100644 index 00000000..8c721aeb --- /dev/null +++ b/3B2/3b2_mau.c @@ -0,0 +1,3470 @@ +/* 3b2_mmu.c: AT&T 3B2 Model 400 Math Acceleration Unit (WE32106 MAU) + Implementation + + Copyright (c) 2019, Seth J. Morabito + + Permission is hereby granted, free of charge, to any person + obtaining a copy of this software and associated documentation + files (the "Software"), to deal in the Software without + restriction, including without limitation the rights to use, copy, + modify, merge, publish, distribute, sublicense, and/or sell copies + of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. + + Except as contained in this notice, the name of the author shall + not be used in advertising or otherwise to promote the sale, use or + other dealings in this Software without prior written authorization + from the author. + + --------------------------------------------------------------------- + + This file is part of a simulation of the WE32106 Math Acceleration + Unit. The WE32106 MAU is an IEEE-754 compabitle floating point + hardware math accelerator that was available as an optional + component on the AT&T 3B2/310 and 3B2/400, and a standard component + on the 3B2/500, 3B2/600, and 3B2/1000. + + Portions of this code are derived from the SoftFloat 2c library by + John R. Hauser. Functions derived from SoftFloat 2c are clearly + marked in the comments. + + Legal Notice + ============ + + SoftFloat was written by John R. Hauser. Release 2c of SoftFloat + was made possible in part by the International Computer Science + Institute, located at Suite 600, 1947 Center Street, Berkeley, + California 94704. Funding was partially provided by the National + Science Foundation under grant MIP-9311980. The original version + of this code was written as part of a project to build a + fixed-point vector processor in collaboration with the University + of California at Berkeley, overseen by Profs. Nelson Morgan and + John Wawrzynek. + + THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable + effort has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS + THAT WILL AT TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS + SOFTWARE IS RESTRICTED TO PERSONS AND ORGANIZATIONS WHO CAN AND + WILL TOLERATE ALL LOSSES, COSTS, OR OTHER PROBLEMS THEY INCUR DUE + TO THE SOFTWARE WITHOUT RECOMPENSE FROM JOHN HAUSER OR THE + INTERNATIONAL COMPUTER SCIENCE INSTITUTE, AND WHO FURTHERMORE + EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER + SCIENCE INSTITUTE (possibly via similar legal notice) AGAINST ALL + LOSSES, COSTS, OR OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND + CLIENTS DUE TO THE SOFTWARE, OR INCURRED BY ANYONE DUE TO A + DERIVATIVE WORK THEY CREATE USING ANY PART OF THE SOFTWARE. + + The following are expressly permitted, even for commercial + purposes: + + (1) distribution of SoftFloat in whole or in part, as long as this + and other legal notices remain and are prominent, and provided also + that, for a partial distribution, prominent notice is given that it + is a subset of the original; and + + (2) inclusion or use of SoftFloat in whole or in part in a + derivative work, provided that the use restrictions above are met + and the minimal documentation requirements stated in the source + code are satisfied. + --------------------------------------------------------------------- +*/ + +#include + +#include "3b2_mau.h" + +#define MAU_ID 0 /* Coprocessor ID of MAU */ + +#define TININESS_BEFORE_ROUNDING TRUE + +/* Static function declarations */ +static SIM_INLINE void mau_case_div_zero(XFP *op1, XFP *op2, XFP *result); + +static SIM_INLINE void mau_exc(uint32 flag, uint32 mask); +static SIM_INLINE void abort_on_fault(); +static SIM_INLINE void mau_decode(uint32 cmd, uint32 src, uint32 dst); +static SIM_INLINE t_bool le_128(t_uint64 a0, t_uint64 a1, t_uint64 b0, t_uint64 b1); +static SIM_INLINE t_bool eq_128(t_uint64 a0, t_uint64 a1, t_uint64 b0, t_uint64 b1); +static SIM_INLINE t_bool lt_128(t_uint64 a0, t_uint64 a1, t_uint64 b0, t_uint64 b1); +static uint8 leading_zeros(uint32 val); +static uint8 leading_zeros_64(t_int64 val); +static void shift_right_32_jamming(uint32 val, int16 count, uint32 *result); +static void shift_right_64_jamming(t_uint64 val, int16 count, t_uint64 *result); +static void shift_right_extra_64_jamming(t_uint64 val_a, t_uint64 val_b, int16 count, + t_uint64 *result_a, t_uint64 *result_b); +static void shift_right_128_jamming(t_uint64 val_a, t_uint64 val_b, int16 count, + t_uint64 *result_a, t_uint64 *result_b); +static void short_shift_left_128(t_uint64 val_a, t_uint64 val_b, int16 count, + t_uint64 *result_a, t_uint64 *result_b); +static void shift_right_128(t_uint64 val_a, t_uint64 val_b, int16 count, + t_uint64 *result_a, t_uint64 *result_b); +static void add_128(t_uint64 a0, t_uint64 a1, + t_uint64 b0, t_uint64 b1, + t_uint64 *r_low, t_uint64 *r_high); +static void sub_128(t_uint64 a0, t_uint64 a1, + t_uint64 b0, t_uint64 b1, + t_uint64 *r_low, t_uint64 *r_high); +static void add_192(t_uint64 a0, t_uint64 a1, t_uint64 a2, + t_uint64 b0, t_uint64 b1, t_uint64 b2, + t_uint64 *z0, t_uint64 *z1, t_uint64 *z2); +static void sub_192(t_uint64 a0, t_uint64 a1, t_uint64 a2, + t_uint64 b0, t_uint64 b1, t_uint64 b2, + t_uint64 *z0, t_uint64 *z1, t_uint64 *z2); +static void mul_64_to_128(t_uint64 a, t_uint64 b, t_uint64 *r_low, t_uint64 *r_high); +static t_uint64 estimate_div_128_to_64(t_uint64 a0, t_uint64 a1, t_uint64 b); +static uint32 estimate_sqrt_32(int16 a_exp, uint32 a); + +static uint32 round_pack_int(t_bool sign, t_uint64 frac, RM rounding_mode); +static t_int64 round_pack_int64(t_bool sign, + t_uint64 abs_0, t_uint64 abs_1, + RM rounding_mode); + +static SFP round_pack_sfp(t_bool sign, int16 exp, + uint32 frac, RM rounding_mode); +static DFP round_pack_dfp(t_bool sign, int16 exp, t_uint64 frac, + t_bool xfp_sticky, RM rounding_mode); +static void round_pack_xfp(t_bool sign, int32 exp, + t_uint64 frac_a, t_uint64 frac_b, + RM rounding_mode, XFP *result); +static void propagate_xfp_nan(XFP *a, XFP *b, XFP *result); +static void normalize_round_pack_xfp(t_bool sign, int32 exp, + t_uint64 frac_0, t_uint64 frac_1, + RM rounding_mode, XFP *result); +static void normalize_sfp_subnormal(uint32 in_frac, int16 *out_exp, uint32 *out_frac); +static void normalize_dfp_subnormal(t_uint64 in_frac, int16 *out_exp, t_uint64 *out_frac); +static void normalize_xfp_subnormal(t_uint64 in_frac, int32 *out_exp, t_uint64 *out_frac); + +static NAN_T sfp_to_common_nan(SFP val); +static NAN_T dfp_to_common_nan(DFP val); +static NAN_T xfp_to_common_nan(XFP *val); +static SFP common_nan_to_sfp(NAN_T nan); +static DFP common_nan_to_dfp(NAN_T nan); +static void common_nan_to_xfp(NAN_T nan, XFP *result); + +static void sfp_to_xfp(SFP val, XFP *result); +static void dfp_to_xfp(DFP val, XFP *result); +static SFP xfp_to_sfp(XFP *val, RM rounding_mode); +static DFP xfp_to_dfp(XFP *val, RM rounding_mode); + +static uint32 xfp_eq(XFP *a, XFP *b); +static uint32 xfp_lt(XFP *a, XFP *b); + +static void xfp_cmp(XFP *a, XFP *b); +static void xfp_cmpe(XFP *a, XFP *b); +static void xfp_cmps(XFP *a, XFP *b); +static void xfp_cmpes(XFP *a, XFP *b); +static void xfp_add(XFP *a, XFP *b, XFP *result, RM rounding_mode); +static void xfp_sub(XFP *a, XFP *b, XFP *result, RM rounding_mode); +static void xfp_mul(XFP *a, XFP *b, XFP *result, RM rounding_mode); +static void xfp_div(XFP *a, XFP *b, XFP *result, RM rounding_mode); +static void xfp_sqrt(XFP *a, XFP *result, RM rounding_mode); +static void xfp_remainder(XFP *a, XFP *b, XFP *result, RM rounding_mode); + +static void load_src_op(uint8 op, XFP *xfp); +static void load_op1_decimal(DEC *d); +static void store_op3_int(uint32 val); +static void store_op3_decimal(DEC *d); +static void store_op3(XFP *xfp); + +static void mau_rdasr(); +static void mau_wrasr(); +static void mau_move(); +static void mau_cmp(); +static void mau_cmps(); +static void mau_cmpe(); +static void mau_cmpes(); +static void mau_ldr(); +static void mau_erof(); +static void mau_rtoi(); +static void mau_ftoi(); +static void mau_dtof(); +static void mau_ftod(); +static void mau_add(); +static void mau_sub(); +static void mau_mul(); +static void mau_div(); +static void mau_neg(); +static void mau_abs(); +static void mau_sqrt(); +static void mau_itof(); +static void mau_remainder(); + +static void mau_execute(); + +extern volatile int32 stop_reason; + +UNIT mau_unit = { UDATA(NULL, 0, 0) }; + +MAU_STATE mau_state; + +BITFIELD asr_bits[] = { + BITNCF(5), + BIT(PR), + BIT(QS), + BIT(US), + BIT(OS), + BIT(IS), + BIT(PM), + BIT(QM), + BIT(UM), + BIT(OM), + BIT(IM), + BITNCF(1), + BIT(UO), + BIT(CSC), + BIT(PS), + BIT(IO), + BIT(Z), + BIT(N), + BITFFMT(RC,2,%d), + BIT(NTNC), + BIT(ECP), + BITNCF(5), + BIT(RA), + ENDBITS +}; + +REG mau_reg[] = { + { HRDATAD (CMD, mau_state.cmd, 32, "Command Word") }, + { HRDATADF (ASR, mau_state.asr, 32, "ASR", asr_bits) }, + { HRDATAD (OPCODE, mau_state.opcode, 8, "Opcode") }, + { HRDATAD (OP1, mau_state.op1, 8, "Operand 1") }, + { HRDATAD (OP2, mau_state.op2, 8, "Operand 2") }, + { HRDATAD (OP3, mau_state.op3, 8, "Operand 3") }, + { NULL } +}; + +MTAB mau_mod[] = { + { UNIT_EXHALT, UNIT_EXHALT, "Halt on Exception", "EXHALT", + NULL, NULL, NULL, "Enables Halt on floating point exceptions" }, + { UNIT_EXHALT, 0, "No halt on Exception", "NOEXHALT", + NULL, NULL, NULL, "Disables Halt on floating point exceptions" }, + { 0 } +}; + +static DEBTAB mau_debug[] = { + { "DECODE", DECODE_DBG, "Decode" }, + { "TRACE", TRACE_DBG, "Call Trace" }, + { NULL } +}; + +DEVICE mau_dev = { + "MAU", /* name */ + &mau_unit, /* units */ + mau_reg, /* registers */ + mau_mod, /* modifiers */ + 1, /* #units */ + 16, /* address radix */ + 32, /* address width */ + 1, /* address incr. */ + 16, /* data radix */ + 8, /* data width */ + NULL, /* examine routine */ + NULL, /* deposit routine */ + &mau_reset, /* reset routine */ + NULL, /* boot routine */ + NULL, /* attach routine */ + NULL, /* detach routine */ + NULL, /* context */ + DEV_DISABLE|DEV_DIS|DEV_DEBUG, /* flags */ + 0, /* debug control flags */ + mau_debug, /* debug flag names */ + NULL, /* memory size change */ + NULL, /* logical name */ + NULL, /* help routine */ + NULL, /* attach help routine */ + NULL, /* help context */ + &mau_description /* device description */ +}; + +XFP INF = { + 0x7fff, + 0x0000000000000000ull, + 0 +}; + +XFP TRAPPING_NAN = { + 0x7fff, + 0x7fffffffffffffffull, + 0 +}; + +/* Generated Non-Trapping NaN + * p. 2-8 "When the MAU generates a nontrapping NaN, J+fraction + * contains all 1s. The MAU never generates a trapping NaN." + */ +XFP GEN_NONTRAPPING_NAN = { + 0x7fff, + 0xffffffffffffffffull, + 0 +}; + +CONST char *mau_op_names[32] = { + "0x00", "0x01", "ADD", "SUB", "DIV", "REM", "MUL", "MOVE", /* 00-07 */ + "RDASR", "WRASR", "CMP", "CMPE", "ABS", "SQRT", "RTOI", "FTOI", /* 08-0F */ + "ITOF", "DTOF", "FTOD", "NOP", "EROF", "0x15", "0x16", "NEG", /* 10-17 */ + "LDR", "0x19", "CMPS", "CMPES", "0x1C", "0x1D", "0x1E", "0x1F" /* 18-1F */ +}; + +CONST char *src_op_names[8] = { + "F0", "F1", "F2", "F3", + "MEM S", "MEM D", "MEM X", "N/A" +}; + +CONST char *dst_op_names[16] = { + "F0 S", "F1 S", "F2 S", "F3 S", + "F0 D", "F1 D", "F2 D", "F3 D", + "F0 X", "F1 X", "F2 X", "F3 X", + "MEM S", "MEM D", "MEM X", "N/A" +}; + +/* + * Special Cases + * ------------- + * + * The handling of combinations of special input values is specified + * in the "WE32106 Math Acceleration Unit Information Manual" + * pp. 5-3--5-5. + * + * Each of these "special case" routines can be called by math + * functions based on a combination of the input values. + * + * (At the moment, only divide-by-zero is explicitly called out here + * as a special case) + */ + +static SIM_INLINE void mau_case_div_zero(XFP *op1, XFP *op2, XFP *result) +{ + mau_state.asr |= MAU_ASR_QS; + + if (mau_state.asr & MAU_ASR_QM) { + mau_state.asr |= MAU_ASR_ECP; + PACK_XFP(0, 0x7fff, 0x8000000000000000ull, result); + } else { + if (XFP_SIGN(op1) ^ XFP_SIGN(op2)) { + PACK_XFP(1, INF.sign_exp, INF.frac, result); + } else { + PACK_XFP(0, INF.sign_exp, INF.frac, result); + } + } +} + +static SIM_INLINE void mau_exc(uint32 flag, uint32 mask) +{ + sim_debug(TRACE_DBG, &mau_dev, + "[%08x] [mau_exc] asr=%08x flag=%08x mask=%08x\n", + R[NUM_PC], mau_state.asr, flag, mask); + + mau_state.asr |= flag; + + /* + * page 2-14: NTNC bit is checked if an Invalid Operation + * exception occurs while the Invalid Operation Mask bit is + * clear. If NTNC is set to 1, an exception occurs and bit 9 + * (IS) is set. If NTNC is set to 0, no exception occurs, + * and a nontraping NaN is generated. + */ + if (flag == MAU_ASR_IS && (mau_state.asr & MAU_ASR_IM) == 0) { + if (mau_state.asr & MAU_ASR_NTNC) { + mau_state.asr |= MAU_ASR_ECP; + } else { + mau_state.ntnan = TRUE; + } + return; + } + + if (mau_state.asr & mask) { + mau_state.asr |= MAU_ASR_ECP; + } +} + +/* + * Returns true if an exceptional condition is present. + */ +static SIM_INLINE t_bool mau_exception_present() +{ + + return mau_state.asr & MAU_ASR_ECP && + (((mau_state.asr & MAU_ASR_IS) && ((mau_state.asr & MAU_ASR_IM) || + (mau_state.asr & MAU_ASR_NTNC))) || + ((mau_state.asr & MAU_ASR_US) && (mau_state.asr & MAU_ASR_UM)) || + ((mau_state.asr & MAU_ASR_OS) && (mau_state.asr & MAU_ASR_OM)) || + ((mau_state.asr & MAU_ASR_PS) && (mau_state.asr & MAU_ASR_PM)) || + ((mau_state.asr & MAU_ASR_QS) && (mau_state.asr & MAU_ASR_QM))); +} + +static SIM_INLINE void abort_on_fault() +{ + switch(mau_state.opcode) { + case M_NOP: + case M_RDASR: + case M_WRASR: + case M_EROF: + case M_LDR: + return; + default: + /* + * Integer overflow is non-maskable in the MAU, but generates an Integer + * Overflow exception to be handled by the WE32100 CPU (if not masked + * in the CPU's PSW). + */ + if ((mau_state.asr & MAU_ASR_IO) && (R[NUM_PSW] & PSW_OE_MASK)) { + if (mau_unit.flags & UNIT_EXHALT) { + stop_reason = STOP_EX; + } + sim_debug(TRACE_DBG, &mau_dev, + "[%08x] [abort_on_fault] Aborting on un-maskable overflow fault. ASR=%08x\n", + R[NUM_PC], mau_state.asr); + cpu_abort(NORMAL_EXCEPTION, INTEGER_OVERFLOW); + } + + /* Otherwise, check for other exceptions. */ + if (mau_exception_present()) { + if (mau_unit.flags & UNIT_EXHALT) { + stop_reason = STOP_EX; + } + sim_debug(TRACE_DBG, &mau_dev, + "[%08x] [abort_on_fault] Aborting on ECP fault. ASR=%08x\n", + R[NUM_PC], mau_state.asr); + cpu_abort(NORMAL_EXCEPTION, EXTERNAL_MEMORY_FAULT); + } + + break; + } +} + +/* + * Clears N and Z flags in the ASR if appropriate. + */ +static void clear_asr() +{ + mau_state.ntnan = FALSE; + + switch(mau_state.opcode) { + case M_NOP: + case M_RDASR: + case M_WRASR: + case M_EROF: + return; + default: + mau_state.asr &= ~(MAU_ASR_Z|MAU_ASR_N|MAU_ASR_ECP); + break; + } +} + +/* + * Returns true if the 'nz' flags should be set. + * + * Note: There is an undocumented feature of the WE32106 expressed + * here. If an exception has occured, the Z and N flags are not to be + * set! + */ +static t_bool set_nz() +{ + + switch(mau_state.opcode) { + case M_NOP: + case M_RDASR: + case M_WRASR: + case M_EROF: + return FALSE; + default: + return (mau_state.asr & MAU_ASR_ECP) == 0; + } +} + +t_stat mau_reset(DEVICE *dptr) +{ + memset(&mau_state, 0, sizeof(MAU_STATE)); + return SCPE_OK; +} + +/************************************************************************* + * Utility Functions + ************************************************************************/ + +/* + * Compare two 128-bit values a and b. Rturns true if a <= b + * + * Derived from the SoftFloat 2c package (see copyright notice above) + */ +static SIM_INLINE t_bool le_128(t_uint64 a0, t_uint64 a1, t_uint64 b0, t_uint64 b1) +{ + return (a0 < b0) || ((a0 == b0) && (a1 <= b1)); +} + +/* + * Compare two 128-bit values a and b. Returns true if a = b + * + * Derived from the SoftFloat 2c package (see copyright notice above) + */ +static SIM_INLINE t_bool eq_128(t_uint64 a0, t_uint64 a1, t_uint64 b0, t_uint64 b1) +{ + return (a0 == b0) && (a1 == b1); +} + +/* + * Compare two 128-bit values a and b. Returns true if a < b + * + * Derived from the SoftFloat 2c package (see copyright notice above) + */ +static SIM_INLINE t_bool lt_128(t_uint64 a0, t_uint64 a1, t_uint64 b0, t_uint64 b1) +{ + return (a0 < b0) || ((a0 == b0) && (a1 < b1)); +} + +/* + * Return the number of leading binary zeros in an unsigned 32-bit + * value. + * + * Algorithm couresty of "Hacker's Delight" by Henry S. Warren. + */ +static uint8 leading_zeros(uint32 val) +{ + unsigned n = 0; + + if (val <= 0x0000ffff) { + n += 16; + val <<= 16; + } + if (val <= 0x00ffffff) { + n += 8; + val <<= 8; + } + if (val <= 0x0fffffff) { + n += 4; + val <<= 4; + } + if (val <= 0x3fffffff) { + n += 2; + val <<= 2; + } + if (val <= 0x7fffffff) { + n++; + } + + return n; +} + +/* + * Return the number of leading binary zeros in a signed 64-bit + * value. + */ +static uint8 leading_zeros_64(t_int64 val) +{ + uint8 n = 0; + + if (val == 0) { + return 64; + } + + while (1) { + if (val < 0) break; + + n++; + + val <<= 1; + } + + return n; +} + +/* + * Shift a 32-bit unsigned value, 'val', right by 'count' bits. If any + * non-zero bits are shifted off, they are "jammed" into the least + * significant bit of the result by setting the least significant bit + * to 1. + * + * Derived from the SoftFloat 2c package (see copyright notice above) + */ +static void shift_right_32_jamming(uint32 val, int16 count, uint32 *result) +{ + uint32 tmp; + + if (count == 0) { + tmp = val; + } else if (count < 32) { + tmp = (val >> count) | ((val << ((-count) & 31)) != 0); + } else { + tmp = (val != 0); + } + + *result = tmp; +} + +/* + * Shift a 64-bit unsigned value, 'val', right by 'count' bits. If any + * non-zero bits are shifted off, they are "jammed" into the least + * significant bit of the result by setting the least significant bit + * to 1. + * + * Derived from the SoftFloat 2c package (see copyright notice above) + */ +static void shift_right_64_jamming(t_uint64 val, int16 count, t_uint64 *result) +{ + t_uint64 tmp; + + if (count == 0) { + tmp = val; + } else if (count < 64) { + tmp = (val >> count) | ((val << ((-count) & 63)) != 0); + } else { + tmp = (val != 0); + } + + *result = tmp; +} + +/* + * Shifts the 128-bit value formed by concatenating val_a and val_b + * right by 64 _plus_ the number of bits given in 'count'. + * + * Derived from the SoftFloat 2c package (see copyright notice above) + */ +static void shift_right_extra_64_jamming(t_uint64 val_a, t_uint64 val_b, int16 count, + t_uint64 *result_a, t_uint64 *result_b) +{ + t_uint64 a, b; + int8 neg_count = (-count) & 63; + + if (count == 0) { + b = val_b; + a = val_a; + } else if (count < 64) { + b = (val_a << neg_count) | (val_b != 0); + a = val_a >> count; + } else { + if (count == 64) { + b = val_a | (val_b != 0); + } else { + b = ((val_a | val_b) != 0); + } + a = 0; + } + + *result_a = a; + *result_b = b; +} + +/* + * Shift the 128-bit value formed by val_a and val_b right by + * 64 plus the number of bits given in count. + * + * Derived from the SoftFloat 2c package (see copyright notice above) + */ +static void shift_right_128_jamming(t_uint64 val_a, t_uint64 val_b, int16 count, + t_uint64 *result_a, t_uint64 *result_b) +{ + t_uint64 tmp_a, tmp_b; + int8 neg_count = (-count) & 63; + + if (count == 0) { + tmp_a = val_a; + tmp_b = val_b; + } else if (count < 64) { + tmp_a = (val_a >> count); + tmp_b = (val_a << neg_count) | (val_b != 0); + } else { + if (count == 64) { + tmp_b = val_a | (val_b != 0); + } else { + tmp_b = ((val_a | val_b) != 0); + } + tmp_a = 0; + } + + *result_a = tmp_a; + *result_b = tmp_b; +} + +/* + * Shifts the 128-bit value formed by val_a and val_b left by the + * number of bits given in count. + * + * Derived from the SoftFloat 2c package (see copyright notice above) + */ +static void short_shift_left_128(t_uint64 val_a, t_uint64 val_b, int16 count, + t_uint64 *result_a, t_uint64 *result_b) +{ + *result_b = val_b << count; + if (count == 0) { + *result_a = val_a; + } else { + *result_a = (val_a << count) | (val_b >> ((-count) & 63)); + } +} + +/* + * Shifts the 128-bit value formed by val_a and val_b right by the + * number of bits given ihn 'count'. Any bits shifted off are lost. + * + * Derived from the SoftFloat 2c package (see copyright notice above) + */ +static void shift_right_128(t_uint64 val_a, t_uint64 val_b, int16 count, + t_uint64 *result_a, t_uint64 *result_b) +{ + t_uint64 tmp_a, tmp_b; + int8 neg_count; + + neg_count = (- count) & 63; + + if (count == 0) { + tmp_a = val_a; + tmp_b = val_b; + } else if (count < 64) { + tmp_a = val_a >> count; + tmp_b = (val_a << neg_count) | (val_b >> count); + } else { + tmp_a = 0; + tmp_b = (count < 128) ? (val_a >> (count & 63)) : 0; + } + + *result_a = tmp_a; + *result_b = tmp_b; +} + +/* + * Add two 128-bit values. + * + * Derived from the SoftFloat 2c package (see copyright notice above) + */ +static void add_128(t_uint64 a0, t_uint64 a1, + t_uint64 b0, t_uint64 b1, + t_uint64 *r_low, t_uint64 *r_high) +{ + t_uint64 tmp; + + tmp = a1 + b1; + *r_high = tmp; + *r_low = a0 + b0 + (tmp < a1); +} + +/* + * Subract two 128-bit values. + * + * Derived from the SoftFloat 2c package (see copyright notice above) + */ +static void sub_128(t_uint64 a0, t_uint64 a1, + t_uint64 b0, t_uint64 b1, + t_uint64 *r_low, t_uint64 *r_high) +{ + *r_high = a1 - b1; + *r_low = a0 - b0 - (a1 < b1); +} + +/* + * Add two 192-bit values. + * + * Derived from the SoftFloat 2c package (see copyright notice above) + */ +static void add_192(t_uint64 a0, t_uint64 a1, t_uint64 a2, + t_uint64 b0, t_uint64 b1, t_uint64 b2, + t_uint64 *z0, t_uint64 *z1, t_uint64 *z2) +{ + int8 carry_0, carry_1; + + *z2 = a2 + b2; + carry_1 = (*z2 < a2); + *z1 = a1 + b1; + carry_0 = (*z1 < a1); + *z0 = a0 + b0; + *z1 += carry_1; + *z0 += (*z1 < (t_uint64) carry_1); + *z0 += carry_0; +} + +/* + * Subtract two 192-bit values. + * + * Derived from the SoftFloat 2c package (see copyright notice above) + */ +static void sub_192(t_uint64 a0, t_uint64 a1, t_uint64 a2, + t_uint64 b0, t_uint64 b1, t_uint64 b2, + t_uint64 *z0, t_uint64 *z1, t_uint64 *z2) +{ + int8 borrow_0, borrow_1; + + *z2 = a2 - b2; + borrow_1 = (a2 < b2); + *z1 = a1 - b1; + borrow_0 = (a1 < b1); + *z0 = a0 - b0; + *z0 -= (*z1 < (t_uint64) borrow_1); + *z1 -= borrow_1; + *z2 -= borrow_0; +} + +/* + * Multiplies a by b to obtain a 128-bit product. The product is + * broken into two 64-bit pieces which are stored at r_low and r_high. + * + * Derived from the SoftFloat 2c package (see copyright notice above) + */ +static void mul_64_to_128(t_uint64 a, t_uint64 b, t_uint64 *r_low, t_uint64 *r_high) +{ + uint32 a_high, a_low, b_high, b_low; + t_uint64 rl, rm_a, rm_b, rh; + + a_low = (uint32)a; + a_high = a >> 32; + + b_low = (uint32)b; + b_high = b >> 32; + + rh = ((t_uint64) a_low) * b_low; + rm_a = ((t_uint64) a_low) * b_high; + rm_b = ((t_uint64) a_high) * b_low; + rl = ((t_uint64) a_high) * b_high; + + rm_a += rm_b; + + rl += (((t_uint64)(rm_a < rm_b)) << 32) + (rm_a >> 32); + rm_a <<= 32; + rh += rm_a; + rl += (rh < rm_a); + + *r_high = rh; + *r_low = rl; +} + +/* + * Returns an approximation of the 64-bit integer value obtained by + * dividing 'b' into the 128-bit value 'a0' and 'a1'. + * + * Derived from the SoftFloat 2c package (see copyright notice above) + */ +static t_uint64 estimate_div_128_to_64(t_uint64 a0, t_uint64 a1, t_uint64 b) +{ + t_uint64 b0, b1; + t_uint64 rem0, rem1, term0, term1; + t_uint64 z; + + if (b <= a0) { + return 0xffffffffffffffffull; + } + + b0 = b >> 32; + z = (b0 << 32 <= a0) ? 0xffffffff00000000ull : (a0 / b0) << 32; + + mul_64_to_128( b, z, &term0, &term1 ); + + sub_128( a0, a1, term0, term1, &rem0, &rem1 ); + + while (((int64_t)rem0) < 0) { + z -= 0x100000000ull; + b1 = b << 32; + add_128(rem0, rem1, b0, b1, &rem0, &rem1); + } + + rem0 = (rem0 << 32) | (rem1 >> 32); + z |= (b0<<32 <= rem0) ? 0xffffffff : rem0 / b0; + + return z; +} + +/* + * Returns an approximation of the square root of the 32-bit + * value 'a'. + * + * Derived from the SoftFloat 2c package (see copyright notice above) + */ +static uint32 estimate_sqrt_32(int16 a_exp, uint32 a) +{ + static const uint16 sqrt_odd_adjust[] = { + 0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0, + 0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67 + }; + + static const uint16 sqrt_even_adjust[] = { + 0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E, + 0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002 + }; + + int8 index; + uint32 z; + + index = (a >> 27) & 0xf; + + if (a_exp & 1) { + z = 0x4000 + (a >> 17) - sqrt_odd_adjust[index]; + z = ((a / z) << 14) + (z << 15); + a >>= 1; + } + else { + z = 0x8000 + (a >> 17) - sqrt_even_adjust[index]; + z = a / z + z; + z = (0x20000 <= z) ? 0xFFFF8000 : ( z<<15 ); + if ( z <= a ) return (uint32) (((int32) a) >> 1); + } + + return ((uint32) ((((t_uint64) a )<<31 ) / z)) + (z >> 1); +} + +/* + * Return the properly rounded 32-bit integer corresponding to 'sign' + * and 'frac'. + * + * Derived from the SoftFloat 2c package (see copyright notice above) + */ +static uint32 round_pack_int(t_bool sign, t_uint64 frac, RM rounding_mode) +{ + int8 round_increment, round_bits; + int32 result; + + round_increment = 0x40; + + if (!(rounding_mode == ROUND_NEAREST)) { + if (rounding_mode == ROUND_ZERO) { + round_increment = 0; + } else { + round_increment = 0x7f; + if (sign) { + if (rounding_mode == ROUND_PLUS_INF) { + round_increment = 0; + } + } else { + if (rounding_mode == ROUND_MINUS_INF) { + round_increment = 0; + } + } + } + } + + round_bits = frac & 0x7f; + frac = (frac + round_increment) >> 7; + frac &= ~(((round_bits ^ 0x40) == 0) & (rounding_mode == ROUND_NEAREST)); + + result = (int32)frac; + + if (sign) { + result = -result; + } + + if ((frac >> 32) || (result && ((result < 0) ^ sign))) { + mau_exc(MAU_ASR_IO, MAU_ASR_OM); /* Integer overflow */ + mau_exc(MAU_ASR_PS, MAU_ASR_PM); /* Inexact */ + return sign ? (int32) 0x80000000 : 0x7fffffff; + } + + if (round_bits) { + mau_exc(MAU_ASR_PS, MAU_ASR_PM); + } + + return result; +} + +/* + * Return the properly rounded 64-bit integer corresponding to 'sign' + * and 'frac'. + * + * Derived from the SoftFloat 2c package (see copyright notice above) + */ +static t_int64 round_pack_int64(t_bool sign, + t_uint64 abs_0, t_uint64 abs_1, + RM rounding_mode) +{ + t_bool increment; + int64_t z; + + increment = (t_int64)abs_1 < 0; + + if (rounding_mode != ROUND_NEAREST) { + if (rounding_mode == ROUND_ZERO) { + increment = 0; + } else { + if (sign) { + increment = (rounding_mode == ROUND_MINUS_INF) && abs_1; + } else { + increment = (rounding_mode == ROUND_PLUS_INF) && abs_1; + } + } + } + + if (increment) { + ++abs_0; + if (abs_0 == 0) { + /* Overflow */ + mau_exc(MAU_ASR_OS, MAU_ASR_OM); + return sign ? 0x8000000000000000ull : 0x7fffffffffffffffull; + } + abs_0 &= ~(((t_uint64)(abs_1 << 1) == 0) & (rounding_mode == ROUND_NEAREST)); + } + + z = abs_0; + if (sign) { + z = -z; + } + if (z && ((z < 0) ^ sign)) { + /* Overflow */ + mau_exc(MAU_ASR_OS, MAU_ASR_OM); + return sign ? 0x8000000000000000ull : 0x7fffffffffffffffull; + } + + if (abs_1) { + mau_exc(MAU_ASR_PS, MAU_ASR_PM); + } + + return z; +} + +/* + * Return a properly rounded 32-bit floating point value, given a sign + * bit, exponent, fractional part, and a rounding mode. + * + * Derived from the SoftFloat 2c package (see copyright notice above) + */ +static SFP round_pack_sfp(t_bool sign, int16 exp, uint32 frac, RM rounding_mode) +{ + int8 round_increment, round_bits; + uint8 is_tiny; + + is_tiny = 0; + round_increment = 0x40; + + if (rounding_mode != ROUND_NEAREST) { + if (rounding_mode == ROUND_ZERO) { + round_increment = 0; + } else { + if (sign) { + if (rounding_mode == ROUND_PLUS_INF) { + round_increment = 0; + } + } else { + if (rounding_mode == ROUND_MINUS_INF) { + round_increment = 0; + } + } + } + } + + round_bits = frac & 0x7f; + + if (0xfd < (uint16) exp) { + if ((0xfd < exp) || + (exp == 0xfd && (int32)(frac + round_increment) < 0)) { + mau_exc(MAU_ASR_OS, MAU_ASR_OM); + mau_exc(MAU_ASR_PS, MAU_ASR_PM); + return PACK_SFP(sign, 0xff, 0) - (round_increment == 0); + } + if (exp < 0) { + is_tiny = (TININESS_BEFORE_ROUNDING || + ((exp < -1) || + (frac + round_increment < 0x80000000))); + shift_right_32_jamming(frac, -exp, &frac); + exp = 0; + round_bits = frac & 0x7f; + if (is_tiny && round_bits) { + mau_exc(MAU_ASR_US, MAU_ASR_UM); + } + } + } + + if (round_bits) { + mau_exc(MAU_ASR_PS, MAU_ASR_PM); + } + + frac = (frac + round_increment) >> 7; + frac &= ~(((round_bits ^ 0x40) == 0) & (rounding_mode == ROUND_NEAREST)); + if (frac == 0) { + exp = 0; + } + + return PACK_SFP(sign, exp, frac); +} + +/* + * Return a properly rounded 64-bit floating point value, given a sign + * bit, exponent, fractional part, and a rounding mode. + * + * Derived from the SoftFloat 2c package (see copyright notice above) + */ +static DFP round_pack_dfp(t_bool sign, int16 exp, t_uint64 frac, + t_bool xfp_sticky, RM rounding_mode) +{ + int16 round_increment, round_bits; + t_bool lsb, round, sticky; + uint8 is_tiny; + + is_tiny = 0; + round_increment = 0; + + if (rounding_mode != ROUND_NEAREST) { + if (rounding_mode == ROUND_ZERO) { + round_increment = 0; + } else { + round_increment = 0x7ff; + if (sign) { + if (rounding_mode == ROUND_PLUS_INF) { + round_increment = 0; + } + } else { + if (rounding_mode == ROUND_MINUS_INF) { + round_increment = 0; + } + } + } + } + + round_bits = frac & 0x7ff; + + if (0x7fd <= (uint16) exp) { + if (exp < 0) { + is_tiny = (TININESS_BEFORE_ROUNDING || + (exp < -1) || + ((frac + round_increment) < 0x8000000000000000ull)); + shift_right_64_jamming(frac, -exp, &frac); + exp = 0; + round_bits = frac & 0x7ff; + if (is_tiny && round_bits) { + mau_exc(MAU_ASR_US, MAU_ASR_UM); + } + } else if (0x7fd < exp) { + mau_exc(MAU_ASR_OS, MAU_ASR_OM); + mau_exc(MAU_ASR_PS, MAU_ASR_PM); + return (PACK_DFP(sign, 0x7ff, 0) - (round_increment == 0)); + } + } + + if (round_bits) { + mau_exc(MAU_ASR_PS, MAU_ASR_PM); + } + + if (rounding_mode == ROUND_NEAREST) { + frac >>= 11; + lsb = (frac & 1) != 0; + round = (round_bits & 0x400) != 0; + sticky = ((round_bits & 0x3ff) != 0) | xfp_sticky; + if (round & (sticky || lsb)) { + frac++; + if (frac == 0) { + exp++; + } + } + } else { + frac = (frac + round_increment) >> 11; + frac &= ~(t_uint64)(!(round_bits ^ 0x200)); + } + + return PACK_DFP(sign, exp, frac); +} + +/* + * Return a properly rounded 80-bit floating point value, given a sign + * bit, exponent, fractional part, and a rounding mode. + * + * Derived from the SoftFloat 2c package (see copyright notice above) + */ +static void round_pack_xfp(t_bool sign, int32 exp, + t_uint64 frac_a, t_uint64 frac_b, + RM rounding_mode, XFP *result) +{ + uint8 round_nearest_even, is_tiny; + t_int64 round_mask; + + round_nearest_even = (rounding_mode == ROUND_NEAREST); + + if (0x7ffd <= (uint32)(exp - 1)) { + if (0x7ffe < exp) { + round_mask = 0; + mau_exc(MAU_ASR_OS, MAU_ASR_OM); + mau_exc(MAU_ASR_PS, MAU_ASR_PM); + if ((rounding_mode == ROUND_ZERO) || + (sign && (rounding_mode == ROUND_PLUS_INF)) || + (!sign && (rounding_mode == ROUND_MINUS_INF))) { + PACK_XFP(sign, 0x7ffe, ~round_mask, result); + return; + } + PACK_XFP(sign, 0x7fff, 0x8000000000000000ull, result); + return; + } + if (exp <= 0) { + is_tiny = (TININESS_BEFORE_ROUNDING || + (exp < 0) || + (frac_a < 0xffffffffffffffffull)); + shift_right_extra_64_jamming(frac_a, frac_b, (int16)(1 - exp), &frac_a, &frac_b); + exp = 0; + if (is_tiny && frac_b) { + mau_exc(MAU_ASR_US, MAU_ASR_UM); + } + if (frac_b) { + mau_exc(MAU_ASR_PS, MAU_ASR_PM); + } + PACK_XFP(sign, exp, frac_a, result); + return; + } + } + if (frac_b) { + mau_exc(MAU_ASR_PS, MAU_ASR_PM); + } + if (frac_a == 0) { + exp = 0; + } + PACK_XFP_S(sign, exp, frac_a, frac_b, result); +} + +/* + * Given two 80-bit floating point values 'a' and 'b', one of which is + * a NaN, return the appropriate NaN result. + * + * Derived from the SoftFloat 2c package (see copyright notice above) + */ +static void propagate_xfp_nan(XFP *a, XFP *b, XFP *result) +{ + uint8 a_is_nan, a_is_signaling_nan; + uint8 b_is_nan, b_is_signaling_nan; + + a_is_nan = XFP_IS_NAN(a); + a_is_signaling_nan = XFP_IS_TRAPPING_NAN(a); + b_is_nan = XFP_IS_NAN(b); + b_is_signaling_nan = XFP_IS_TRAPPING_NAN(b); + + a->frac |= 0xc000000000000000ull; + b->frac |= 0xc000000000000000ull; + + if (a_is_signaling_nan | b_is_signaling_nan) { + mau_exc(MAU_ASR_IS, MAU_ASR_IM); + } + + if (a_is_nan) { + if (a_is_signaling_nan & b_is_nan) { + result->sign_exp = b->sign_exp; + result->frac = b->frac; + } else { + result->sign_exp = a->sign_exp; + result->frac = a->frac; + } + } else { + result->sign_exp = b->sign_exp; + result->frac = b->frac; + } +} + +/* + * Normalize and round an extended-precision floating point value. + * + * Partially derived from the SoftFloat 2c package (see copyright + * notice above) + */ +static void normalize_round_pack_xfp(t_bool sign, int32 exp, + t_uint64 frac_0, t_uint64 frac_1, + RM rounding_mode, XFP *result) +{ + int8 shift_count; + + if (frac_0 == 0) { + frac_0 = frac_1; + frac_1 = 0; + exp -= 64; + } + + shift_count = leading_zeros_64(frac_0); + short_shift_left_128(frac_0, frac_1, shift_count, &frac_0, &frac_1); + exp -= shift_count; + + round_pack_xfp(sign, exp, frac_0, frac_1, rounding_mode, result); +} + + +/* + * Normalize the subnormal 80-bit floating point value represented by + * the denormalized input fractional comonent. + * + * Derived from the SoftFloat 2c package (see copyright notice above) + */ +static void normalize_sfp_subnormal(uint32 in_frac, int16 *out_exp, uint32 *out_frac) +{ + int8 shift_count; + + shift_count = leading_zeros(in_frac) - 8; + *out_frac = in_frac << shift_count; + *out_exp = (uint16)(1 - shift_count); +} + +/* + * Normalize the subnormal 64-bit floating point value represented by + * the denormalized input fractional comonent. + * + * Derived from the SoftFloat 2c package (see copyright notice above) + */ +static void normalize_dfp_subnormal(t_uint64 in_frac, int16 *out_exp, t_uint64 *out_frac) +{ + int8 shift_count; + + shift_count = leading_zeros_64(in_frac) - 11; + *out_frac = in_frac << shift_count; + *out_exp = 1 - shift_count; +} + +/* + * Normalize the subnormal 32-bit floating point value represented by + * the denormalized input fractional comonent. + * + * Derived from the SoftFloat 2c package (see copyright notice above) + */ +static void normalize_xfp_subnormal(t_uint64 in_frac, int32 *out_exp, t_uint64 *out_frac) +{ + int8 shift_count; + + shift_count = leading_zeros_64(in_frac); + *out_frac = in_frac << shift_count; + *out_exp = 1 - shift_count; +} + +/* + * Returns the result of converting the 32-bit floating point NaN + * value to the canonincal NaN format. + * + * Derived from the SoftFloat 2c package (see copyright notice above) + */ +static NAN_T sfp_to_common_nan(SFP val) +{ + NAN_T nan = {0}; + + if (SFP_IS_TRAPPING_NAN(val)) { + mau_state.trapping_nan = TRUE; + } + + nan.sign = val >> 31; + nan.low = 0; + nan.high = ((t_uint64) val) << 41; + + return nan; +} + +/* + * Returns the result of converting the 64-bit floating point NaN + * value to the canonincal NaN format. + * + * Derived from the SoftFloat 2c package (see copyright notice above) + */ +static NAN_T dfp_to_common_nan(DFP val) +{ + NAN_T nan = {0}; + + if (DFP_IS_TRAPPING_NAN(val)) { + mau_state.trapping_nan = TRUE; + } + + nan.sign = (val >> 63) & 1; + nan.low = 0; + nan.high = (t_uint64)(val << 12); + + return nan; +} + +/* + * Returns the result of converting the 80-bit floating point NaN + * value to the canonincal NaN format. + * + * Derived from the SoftFloat 2c package (see copyright notice above) + */ +static NAN_T xfp_to_common_nan(XFP *val) +{ + NAN_T nan = {0}; + + if (XFP_IS_TRAPPING_NAN(val)) { + mau_state.trapping_nan = TRUE; + } + + nan.sign = val->sign_exp >> 15; + nan.low = 0; + nan.high = val->frac << 1; + + return nan; +} + +/* + * Returns the result of converting a canonical NAN format value to a + * 32-bit floating point format. + * + * Derived from the SoftFloat 2c package (see copyright notice above) + */ +static SFP common_nan_to_sfp(NAN_T nan) +{ + return ((((uint32)nan.sign) << 31) + | 0x7fc00000 + | (nan.high >> 41)); +} + +/* + * Returns the result of converting a canonical NAN format value to a + * 64-bit floating point format. + * + * Derived from the SoftFloat 2c package (see copyright notice above) + */ +static DFP common_nan_to_dfp(NAN_T nan) +{ + return ((((t_uint64)nan.sign) << 63) + | 0x7ff8000000000000ull + | (nan.high >> 12)); +} + +/* + * Returns the result of converting a canonical NAN format value to an + * 80-bit floating point format. + * + * Derived from the SoftFloat 2c package (see copyright notice above) + */ +static void common_nan_to_xfp(NAN_T nan, XFP *result) +{ + result->frac = 0xc000000000000000ull | (nan.high >> 1); + result->sign_exp = (((uint16)nan.sign) << 15) | 0x7fff; +} + +/* + * Convert a 32-bit floating point value to an 80-bit floating point + * value. + * + * Derived from the SoftFloat 2c package (see copyright notice above) + */ +static void sfp_to_xfp(SFP val, XFP *result) +{ + t_bool sign; + int16 exp; + uint32 frac; + + sign = SFP_SIGN(val); + exp = SFP_EXP(val); + frac = SFP_FRAC(val); + + if (exp == 0xff) { + if (frac) { + common_nan_to_xfp(sfp_to_common_nan(val), result); + return; + } + } + + if (exp == 0) { + if (frac == 0) { + PACK_XFP(sign, 0, 0, result); + return; + } + normalize_sfp_subnormal(frac, &exp, &frac); + } + + frac |= 0x800000; + + PACK_XFP(sign, exp + 0x3f80, ((t_uint64) frac) << 40, result); +} + +/* + * Convert a 64-bit floating point value to an 80-bit floating point value. + * + * Derived from the SoftFloat 2c package (see copyright notice above) + */ +void dfp_to_xfp(DFP val, XFP *result) +{ + t_bool sign; + int16 exp; + t_uint64 frac; + + sign = DFP_SIGN(val); + exp = DFP_EXP(val); + frac = DFP_FRAC(val); + + if (exp == 0x7ff) { + if (sign) { + common_nan_to_xfp(dfp_to_common_nan(val), result); + } + + PACK_XFP(sign, 0xff, 0, result); + return; + } + if (exp == 0) { + if (frac == 0) { + PACK_XFP(sign, 0, 0, result); + return; + } + normalize_dfp_subnormal(frac, &exp, &frac); + } + + PACK_XFP(sign, + exp + 0x3c00, + 0x8000000000000000ull | (frac << 11), + result); +} + +/* + * Convert an 80-bit floating point value to a 32-bit floating point + * value. + * + * Derived from the SoftFloat 2c package (see copyright notice above) + */ +static SFP xfp_to_sfp(XFP *val, RM rounding_mode) +{ + t_bool sign; + int32 exp; + t_uint64 frac; + uint32 dst_frac; + + sign = XFP_SIGN(val); + exp = XFP_EXP(val); + frac = XFP_FRAC(val); + + if (exp == 0x7fff) { + if ((t_uint64)(frac << 1)) { + return common_nan_to_sfp(xfp_to_common_nan(val)); + } + return PACK_SFP(sign, 0xff, 0); + } + + shift_right_64_jamming(frac, 33, &frac); + + dst_frac = (uint32)frac; + + if (exp || frac) { + exp -= 0x3f81; + } + + return round_pack_sfp(sign, exp, dst_frac, rounding_mode); +} + +/* + * Convert an 80-bit floating point value to a 64-bit floating point + * value. + * + * Derived from the SoftFloat 2c package (see copyright notice above) + */ +static DFP xfp_to_dfp(XFP *val, RM rounding_mode) +{ + t_bool sign; + int32 exp; + t_uint64 frac; + + sign = XFP_SIGN(val); + exp = XFP_EXP(val); + frac = XFP_FRAC(val); + + sim_debug(TRACE_DBG, &mau_dev, + "[xfp_to_dfp] input=%04x%016llx input_exp=%04x packed_exp=%04x\n", + val->sign_exp, val->frac, (uint16)exp, (uint16)(exp - 0x3c01)); + + if (exp == 0x7fff) { + if ((t_uint64)(frac << 1)) { + return common_nan_to_dfp(xfp_to_common_nan(val)); + } + return PACK_DFP(sign, 0x7ff, 0); + } + + if (exp || frac) { + exp -= 0x3c01; + } + + return round_pack_dfp(sign, exp, frac, val->s, rounding_mode); +} + +/***************************************************************************** + * Comparison Functions + ****************************************************************************/ + +/* + * Returns true if the two 80-bit floating point values are equal. + * + * Derived from the SoftFloat 2c package (see copyright notice above) + */ +static uint32 xfp_eq(XFP *a, XFP *b) +{ + if (((XFP_EXP(a) == 0x7fff) && (t_uint64)(XFP_FRAC(a) << 1)) || + ((XFP_EXP(b) == 0x7fff) && (t_uint64)(XFP_FRAC(b) << 1))) { + + /* Check for NAN and raise invalid exception */ + if (XFP_IS_TRAPPING_NAN(a) || XFP_IS_TRAPPING_NAN(b)) { + mau_exc(MAU_ASR_IS, MAU_ASR_IM); + } + + return 0; + } + + return ((a->frac == b->frac) && + ((a->sign_exp == b->sign_exp) || + ((a->frac == 0) && ((uint16)((a->sign_exp|b->sign_exp) << 1) == 0)))); +} + +/* + * Returns true if the 80-bit floating point value 'a' is less than + * the 80-bit floating point value 'b'. + * + * Derived from the SoftFloat 2c package (see copyright notice above) + */ +static uint32 xfp_lt(XFP *a, XFP *b) +{ + uint32 a_sign, b_sign; + + if (((XFP_EXP(a) == 0x7fff) && (t_uint64)(XFP_FRAC(a) << 1)) || + ((XFP_EXP(b) == 0x7fff) && (t_uint64)(XFP_FRAC(b) << 1))) { + return 0; + } + + a_sign = XFP_SIGN(a); + b_sign = XFP_SIGN(b); + + if (a_sign != b_sign) { + return(a_sign && + ((((uint16)((a->sign_exp|b->sign_exp) << 1)) | a->frac | b->frac) != 0)); + } + + if (a_sign) { + return (b->sign_exp < a->sign_exp) || ((b->sign_exp == a->sign_exp) && (b->frac < a->frac)); + } else { + return (a->sign_exp < b->sign_exp) || ((a->sign_exp == b->sign_exp) && (a->frac < b->frac)); + } +} + +/***************************************************************************** + * Conversion Functions + ****************************************************************************/ + +/* + * Convert a 32-bit signed integer value to an IEEE-754 extended + * precion (80-bit) floating point value. + * + * Derived from the SoftFloat 2c package (see copyright notice above) + */ +void mau_int_to_xfp(int32 val, XFP *result) +{ + int32 shift_width; + t_bool sign; + uint32 abs_val; + uint16 sign_exp = 0; + t_uint64 frac = 0; + + if (val) { + sign = (val < 0); + abs_val = (uint32)(sign ? -val : val); + shift_width = leading_zeros(abs_val); + sign_exp = (sign << 15) | (0x401e - shift_width); + frac = (t_uint64) (abs_val << shift_width) << 32; + } + + result->sign_exp = sign_exp; + result->frac = frac; + + if (sign_exp & 0x8000) { + mau_state.asr |= MAU_ASR_N; + } + + if ((sign_exp & 0x7fff) == 0 && frac == 0) { + mau_state.asr |= MAU_ASR_Z; + } +} + +/* + * Convert a floating point value to a 64-bit integer. + */ +t_int64 xfp_to_int64(XFP *val, RM rounding_mode) +{ + t_bool sign; + int32 exp, shift_count; + t_uint64 frac, frac_extra; + + sign = XFP_SIGN(val); + exp = XFP_EXP(val); + frac = XFP_FRAC(val); + shift_count = 0x403e - exp; + if (shift_count <= 0) { + if (shift_count) { + mau_exc(MAU_ASR_IS, MAU_ASR_IM); + if (!sign || ((exp == 0x7fff) && (frac != 0x8000000000000000ull))) { + return 0x7fffffffffffffffull; + } + return 0x8000000000000000ull; + } + frac_extra = 0; + } else { + shift_right_extra_64_jamming(frac, 0, shift_count, &frac, &frac_extra); + } + + return round_pack_int64(sign, frac, frac_extra, rounding_mode); +} + +void mau_int64_to_xfp(t_uint64 val, XFP *result) +{ + t_bool sign; + t_uint64 abs; + int8 shift_count; + + if (val == 0) { + PACK_XFP(0, 0, 0, result); + return; + } + + sign = (val & 0x8000000000000000ull) != 0ull; + abs = val & 0x7fffffffffffffffull; + shift_count = leading_zeros_64(abs); + PACK_XFP(sign, 0x403e - shift_count, abs << shift_count, result); +} + +/* + * Convert a float value to a decimal value. + */ +void xfp_to_decimal(XFP *a, DEC *d, RM rounding_mode) +{ + t_int64 tmp; + int i; + t_bool sign; + uint16 digits[19] = {0}; + + tmp = xfp_to_int64(a, rounding_mode); + + if (tmp < 0) { + sign = 0xb; + } else { + sign = 0xa; + } + + for (i = 0; i < 19; i++) { + digits[i] = tmp % 10; + tmp /= 10; + } + + d->l = sign; + d->l |= (t_uint64)digits[0] << 4; + d->l |= (t_uint64)digits[1] << 8; + d->l |= (t_uint64)digits[2] << 12; + d->l |= (t_uint64)digits[3] << 16; + d->l |= (t_uint64)digits[4] << 20; + d->l |= (t_uint64)digits[5] << 24; + d->l |= (t_uint64)digits[6] << 28; + d->l |= (t_uint64)digits[7] << 32; + d->l |= (t_uint64)digits[8] << 36; + d->l |= (t_uint64)digits[9] << 40; + d->l |= (t_uint64)digits[10] << 44; + d->l |= (t_uint64)digits[11] << 48; + d->l |= (t_uint64)digits[12] << 52; + d->l |= (t_uint64)digits[13] << 56; + d->l |= (t_uint64)digits[14] << 60; + d->h = (uint32)digits[15]; + d->h |= (uint32)digits[15] << 4; + d->h |= (uint32)digits[15] << 8; + + sim_debug(TRACE_DBG, &mau_dev, + "[%08x] [xfp_to_decimal] " + "Digits: %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d 0x%x\n", + R[NUM_PC], + digits[17], digits[16], digits[15], digits[14], digits[13], digits[12], + digits[11], digits[10], digits[9], digits[8], digits[7], digits[6], + digits[5], digits[4], digits[3], digits[2], digits[1], digits[0], + sign); +} + +/* + * Convert a decimal value to a float value. + */ +void mau_decimal_to_xfp(DEC *d, XFP *a, RM rounding_mode) +{ + int i; + t_bool sign; + uint16 digits[18] = {0}; + t_uint64 multiplier = 1; + t_uint64 tmp; + t_int64 signed_tmp; + + sim_debug(TRACE_DBG, &mau_dev, + "[%08x] [mau_decimal_to_xfp] DEC input: %08x %08x %08x\n", + R[NUM_PC], d->h, (uint32)(d->l >> 32), (uint32)(d->l)); + + sign = (d->l) & 15; + digits[0] = (d->l >> 4) & 15; + digits[1] = (d->l >> 8) & 15; + digits[2] = (d->l >> 12) & 15; + digits[3] = (d->l >> 16) & 15; + digits[4] = (d->l >> 20) & 15; + digits[5] = (d->l >> 24) & 15; + digits[6] = (d->l >> 28) & 15; + digits[7] = (d->l >> 32) & 15; + digits[8] = (d->l >> 36) & 15; + digits[9] = (d->l >> 40) & 15; + digits[10] = (d->l >> 44) & 15; + digits[11] = (d->l >> 48) & 15; + digits[12] = (d->l >> 52) & 15; + digits[13] = (d->l >> 56) & 15; + digits[14] = (d->l >> 60) & 15; + digits[15] = (d->h) & 15; + digits[16] = (d->h >> 4) & 15; + digits[17] = (d->h >> 8) & 15; + + sim_debug(TRACE_DBG, &mau_dev, + "[%08x] [mau_decimal_to_xfp] " + "Digits: %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d 0x%x\n", + R[NUM_PC], + digits[17], digits[16], digits[15], digits[14], digits[13], digits[12], + digits[11], digits[10], digits[9], digits[8], digits[7], digits[6], + digits[5], digits[4], digits[3], digits[2], digits[1], digits[0], + sign); + + tmp = 0; + + for (i = 0; i < 18; i++) { + tmp += digits[i] * multiplier; + multiplier *= 10; + } + + switch (sign) { + case 0xd: + case 0xb: + /* Negative number */ + signed_tmp = -((t_int64) tmp); + break; + /* TODO: HANDLE NAN AND INFINITY */ + default: + signed_tmp = (t_int64) tmp; + } + + sim_debug(TRACE_DBG, &mau_dev, + "[%08x] [mau_decimal_to_xfp] tmp val = %lld\n", + R[NUM_PC], signed_tmp); + + mau_int64_to_xfp((t_uint64) signed_tmp, a); + + sim_debug(TRACE_DBG, &mau_dev, + "[%08x] [mau_decimal_to_xfp] XFP = %04x%016llx\n", + R[NUM_PC], a->sign_exp, a->frac); + +} + +/* + * Convert a floating point value to a 32-bit integer. + */ +uint32 xfp_to_int(XFP *val, RM rounding_mode) +{ + t_bool sign; + int32 exp, shift_count; + t_uint64 frac; + + sign = XFP_SIGN(val); + exp = XFP_EXP(val); + frac = XFP_FRAC(val); + + if ((exp == 0x7fff) && (t_uint64)(frac << 1)) { + sign = 0; + } + + shift_count = 0x4037 - exp; + + if (shift_count <= 0) { + shift_count = 1; + } + + shift_right_64_jamming(frac, shift_count, &frac); + + return round_pack_int(sign, frac, rounding_mode); +} + +/* + * Round an 80-bit extended precission floating-point value + * to an integer. + * + * Derived from the SoftFloat 2c library (see copyright notice above) + */ +void mau_round_xfp_to_int(XFP *val, XFP *result, RM rounding_mode) +{ + t_bool sign; + int32 exp; + t_uint64 last_bit_mask, round_bits_mask; + + exp = XFP_EXP(val); + + if (0x403e <= exp) { + if ((exp == 0x7fff) && (t_uint64)(XFP_FRAC(val) << 1)) { + propagate_xfp_nan(val, val, result); + return; + } + result->sign_exp = val->sign_exp; + result->frac = val->frac; + return; + } + if (exp < 0x3ff) { + if ((exp == 0) && ((t_uint64)(XFP_FRAC(val) << 1) == 0)) { + result->sign_exp = val->sign_exp; + result->frac = val->frac; + return; + } + mau_exc(MAU_ASR_PS, MAU_ASR_PM); + sign = XFP_SIGN(val); + switch (rounding_mode) { + case ROUND_NEAREST: + if (exp == 0x3ffe && (t_uint64)(XFP_FRAC(val) << 1)) { + PACK_XFP(sign, 0x3fff, 0x8000000000000000ull, result); + return; + } + break; + case ROUND_MINUS_INF: + if (sign) { + PACK_XFP(1, 0x3fff, 0x8000000000000000ull, result); + } else { + PACK_XFP(0, 0, 0, result); + } + return; + case ROUND_PLUS_INF: + if (sign) { + PACK_XFP(1, 0, 0, result); + } else { + PACK_XFP(0, 0x3fff, 0x8000000000000000ull, result); + } + return; + default: + // do nothing + break; + } + PACK_XFP(sign, 0, 0, result); + return; + } + + last_bit_mask = 1; + last_bit_mask <<= 0x403e - exp; + round_bits_mask = last_bit_mask - 1; + + result->sign_exp = val->sign_exp; + result->frac = val->frac; + + if (rounding_mode == ROUND_NEAREST) { + result->frac += last_bit_mask >> 1; + if ((result->frac & round_bits_mask) == 0) { + result->frac &= ~last_bit_mask; + } + } else if (rounding_mode != ROUND_ZERO) { + if (XFP_SIGN(result) ^ (rounding_mode == ROUND_PLUS_INF)) { + result->frac += round_bits_mask; + } + } + + result->frac &= ~round_bits_mask; + if (result->frac == 0) { + ++result->sign_exp; + result->frac = 0x8000000000000000ull; + } + + if (result->frac != val->frac) { + mau_exc(MAU_ASR_PS, MAU_ASR_PM); + } +} + +/***************************************************************************** + * Math Functions + ****************************************************************************/ + +static void xfp_add_fracs(XFP *a, XFP *b, t_bool sign, XFP *result, RM rounding_mode) +{ + int32 a_exp, b_exp, result_exp; + t_uint64 a_frac, b_frac, result_frac_0, result_frac_1; + int32 exp_diff; + + sim_debug(TRACE_DBG, &mau_dev, + "[%08x] [ADD_FRACS] a=%04x%016llx b=%04x%016llx\n", + R[NUM_PC], + a->sign_exp, a->frac, + b->sign_exp, b->frac); + + a_exp = XFP_EXP(a); + a_frac = XFP_FRAC(a); + b_exp = XFP_EXP(b); + b_frac = XFP_FRAC(b); + + exp_diff = a_exp - b_exp; + if (0 < exp_diff) { + if (a_exp == 0x7fff) { + if ((t_uint64) (a_frac << 1)) { + propagate_xfp_nan(a, b, result); + return; + } + result->sign_exp = a->sign_exp; + result->frac = a->frac; + return; + } + if (b_exp == 0) { + --exp_diff; + } + shift_right_extra_64_jamming(b_frac, 0, exp_diff, &b_frac, &result_frac_1); + result_exp = a_exp; + } else if (exp_diff < 0) { + if (b_exp == 0x7fff) { + if ((t_uint64) (b_frac << 1)) { + propagate_xfp_nan(a, b, result); + return; + } + PACK_XFP(sign, 0x7fff, 0x8000000000000000ull, result); + return; + } + if (a_exp == 0) { + ++exp_diff; + } + + shift_right_extra_64_jamming(a_frac, 0, -exp_diff, &a_frac, &result_frac_1); + result_exp = b_exp; + } else { + if (a_exp == 0x7fff) { + if ((t_uint64)((a_frac | b_frac) << 1)) { + propagate_xfp_nan(a, b, result); + return; + } + result->sign_exp = a->sign_exp; + result->frac = a->frac; + return; + } + result_frac_1 = 0; + result_frac_0 = a_frac + b_frac; + if (a_exp == 0) { + normalize_xfp_subnormal(result_frac_0, &result_exp, &result_frac_0); + + round_pack_xfp(sign, result_exp, result_frac_0, result_frac_1, rounding_mode, result); + return; + } + result_exp = a_exp; + shift_right_extra_64_jamming(result_frac_0, result_frac_1, 1, &result_frac_0, &result_frac_1); + result_frac_0 |= 0x8000000000000000ull; + ++result_exp; + round_pack_xfp(sign, result_exp, result_frac_0, result_frac_1, rounding_mode, result); + return; + } + result_frac_0 = a_frac + b_frac; + if (((t_int64) result_frac_0) < 0) { + round_pack_xfp(sign, result_exp, result_frac_0, result_frac_1, rounding_mode, result); + return; + } + shift_right_extra_64_jamming(result_frac_0, result_frac_1, 1, &result_frac_0, &result_frac_1); + result_frac_0 |= 0x8000000000000000ull; + ++result_exp; + round_pack_xfp(sign, result_exp, result_frac_0, result_frac_1, rounding_mode, result); + return; +} + +static void xfp_sub_fracs(XFP *a, XFP *b, t_bool sign, XFP *result, RM rounding_mode) +{ + int32 a_exp, b_exp, result_exp; + t_uint64 a_frac, b_frac, result_frac_0, result_frac_1; + int32 exp_diff; + + a_exp = XFP_EXP(a); + a_frac = XFP_FRAC(a); + b_exp = XFP_EXP(b); + b_frac = XFP_FRAC(b); + exp_diff = a_exp - b_exp; + + if (0 < exp_diff) { + /* aExpBigger */ + if (a_exp == 0x7fff) { + if ((t_uint64)(a_frac << 1)) { + propagate_xfp_nan(a, b, result); + return; + } + result->sign_exp = a->sign_exp; + result->frac = a->frac; + return; + } + if (b_exp == 0) { + --exp_diff; + } + shift_right_128_jamming(b_frac, 0, exp_diff, &b_frac, &result_frac_1); + /* aBigger */ + sub_128(a_frac, 0, b_frac, result_frac_1, &result_frac_0, &result_frac_1); + result_exp = a_exp; + /* normalizeRoundAndPack */ + normalize_round_pack_xfp(sign, result_exp, result_frac_0, result_frac_1, rounding_mode, result); + return; + } + if (exp_diff < 0) { + /* bExpBigger */ + if (b_exp == 0x7fff) { + if ((t_uint64)(b_frac << 1)) { + propagate_xfp_nan(a, b, result); + return; + } + PACK_XFP(sign ? 0 : 1, 0x7fff, 0x8000000000000000ull, result); + return; + } + if (a_exp == 0) { + ++exp_diff; + } + shift_right_128_jamming(a_frac, 0, -exp_diff, &a_frac, &result_frac_1); + /* bBigger */ + sub_128(b_frac, 0, a_frac, result_frac_1, &result_frac_0, &result_frac_1); + result_exp = b_exp; + sign = sign ? 0 : 1; + /* normalizeRoundAndPack */ + normalize_round_pack_xfp(sign, result_exp, + result_frac_0, result_frac_1, + rounding_mode, result); + return; + } + if (a_exp == 0x7fff) { + if ((t_uint64)((a_frac | b_frac) << 1)) { + propagate_xfp_nan(a, b, result); + return; + } + mau_exc(MAU_ASR_IS, MAU_ASR_IM); /* Invalid */ + result->sign_exp = DEFAULT_XFP_NAN_SIGN_EXP; + result->frac = DEFAULT_XFP_NAN_FRAC; + return; + } + if (a_exp == 0) { + a_exp = 1; + b_exp = 1; + } + result_frac_1 = 0; + if (b_frac < a_frac) { + /* aBigger */ + sub_128(a_frac, 0, b_frac, result_frac_1, &result_frac_0, &result_frac_1); + result_exp = a_exp; + /* normalizeRoundAndPack */ + normalize_round_pack_xfp(sign, result_exp, + result_frac_0, result_frac_1, + rounding_mode, result); + return; + } + if (a_frac < b_frac) { + /* bBigger */ + sub_128(b_frac, 0, a_frac, result_frac_1, &result_frac_0, &result_frac_1); + result_exp = b_exp; + sign ^= 1; + + /* normalizeRoundAndPack */ + normalize_round_pack_xfp(sign, result_exp, + result_frac_0, result_frac_1, + rounding_mode, result); + return; + } + + PACK_XFP(rounding_mode == ROUND_MINUS_INF, 0, 0, result); +} + +/************************************************************************* + * + * MAU-specific functions + * + *************************************************************************/ + +/* + * Set condition flags based on comparison of the two values A and B. + */ +static void xfp_cmp(XFP *a, XFP *b) +{ + mau_state.asr &= ~(MAU_ASR_N|MAU_ASR_Z|MAU_ASR_UO); + + /* Page 5-9: + * + * "An invalid operation exception condition exists if either or + * both source operands are trapping NaNs. If the exception is + * masked then the UO flag would be set. However, if this + * exception is enabled, and, if Op1 is a trapping NaN, it is + * converted to double-extended precision and stored in DR. Else, + * Op2 (converted to double-extended precision, if necessary) is + * stored in DR." + */ + + if (XFP_IS_NAN(a) || XFP_IS_NAN(b)) { + if ((mau_state.asr & MAU_ASR_IM) == 0) { + mau_state.asr |= MAU_ASR_UO; + } else if (XFP_IS_NAN(a)) { + mau_state.dr.sign_exp = a->sign_exp; + mau_state.dr.frac = a->frac; + } else { + mau_state.dr.sign_exp = b->sign_exp; + mau_state.dr.frac = b->frac; + } + return; + } + + if (xfp_lt(a, b)) { + mau_state.asr |= MAU_ASR_N; + } + + if (xfp_eq(a, b)) { + mau_state.asr |= MAU_ASR_Z; + } +} + +static void xfp_cmpe(XFP *a, XFP *b) +{ + mau_state.asr &= ~(MAU_ASR_N|MAU_ASR_Z|MAU_ASR_UO); + + /* Page 5-10: + * + * "When two unordered values are compared, then, in additon to + * the response specified below, the invalid operation exception + * sticky flag (ASR = 1) is set and the trap invoked if the + * invalid operation exceptionis enabled."" + */ + + if ((XFP_IS_NAN(a) || XFP_IS_NAN(b)) && (mau_state.asr & MAU_ASR_IM)) { + mau_state.asr |= MAU_ASR_UO; + return; + } + + if (xfp_lt(a, b)) { + mau_state.asr |= MAU_ASR_N; + } + + if (xfp_eq(a, b)) { + mau_state.asr |= MAU_ASR_Z; + } +} + +static void xfp_cmps(XFP *a, XFP *b) +{ + mau_state.asr &= ~(MAU_ASR_N|MAU_ASR_Z|MAU_ASR_UO); + + if (XFP_IS_NAN(a) || XFP_IS_NAN(b)) { + if ((mau_state.asr & MAU_ASR_IM) == 0) { + mau_state.asr |= MAU_ASR_UO; + } else if (XFP_IS_NAN(a)) { + mau_state.dr.sign_exp = a->sign_exp; + mau_state.dr.frac = a->frac; + } else { + mau_state.dr.sign_exp = b->sign_exp; + mau_state.dr.frac = b->frac; + } + return; + } + + if (xfp_lt(a, b)) { + mau_state.asr |= MAU_ASR_Z; + } else if (xfp_eq(a, b)) { + mau_state.asr |= MAU_ASR_N; + } +} + +static void xfp_cmpes(XFP *a, XFP *b) +{ + mau_state.asr &= ~(MAU_ASR_N|MAU_ASR_Z|MAU_ASR_UO); + + if ((XFP_IS_NAN(a) || XFP_IS_NAN(b)) && (mau_state.asr & MAU_ASR_IM)) { + mau_state.asr |= MAU_ASR_UO; + return; + } + + if (xfp_lt(a, b)) { + mau_state.asr |= MAU_ASR_Z; + } + + if (xfp_eq(a, b)) { + mau_state.asr |= MAU_ASR_N; + } +} + +static void xfp_add(XFP *a, XFP *b, XFP *result, RM rounding_mode) +{ + uint32 a_sign, b_sign; + + a_sign = XFP_SIGN(a); + b_sign = XFP_SIGN(b); + + if (a_sign == b_sign) { + xfp_add_fracs(a, b, a_sign, result, rounding_mode); + } else { + xfp_sub_fracs(a, b, a_sign, result, rounding_mode); + } +} + +static void xfp_sub(XFP *a, XFP *b, XFP *result, RM rounding_mode) +{ + uint32 a_sign, b_sign; + + a_sign = XFP_SIGN(a); + b_sign = XFP_SIGN(b); + + if (a_sign == b_sign) { + xfp_sub_fracs(a, b, a_sign, result, rounding_mode); + } else { + xfp_add_fracs(a, b, a_sign, result, rounding_mode); + } +} + +static void xfp_mul(XFP *a, XFP *b, XFP *result, RM rounding_mode) +{ + uint32 a_sign, b_sign, result_sign; + int32 a_exp, b_exp, result_exp; + t_uint64 a_frac, b_frac, result_frac_0, result_frac_1; + + sim_debug(TRACE_DBG, &mau_dev, + "[%08x] [MUL] op1=%04x%016llx op2=%04x%016llx\n", + R[NUM_PC], + a->sign_exp, a->frac, + b->sign_exp, b->frac); + + a_sign = XFP_SIGN(a); + a_exp = XFP_EXP(a); + a_frac = XFP_FRAC(a); + b_sign = XFP_SIGN(b); + b_exp = XFP_EXP(b); + b_frac = XFP_FRAC(b); + + result_sign = a_sign ^ b_sign; + + if (a_exp == 0x7fff) { + if ((t_uint64)(a_frac << 1) || ((b_exp == 0x7fff) && (t_uint64)(b_frac << 1))) { + propagate_xfp_nan(a, b, result); + return; + } + if ((b_exp | b_frac) == 0) { + /* invalid */ + mau_exc(MAU_ASR_IS, MAU_ASR_IM); + result->sign_exp = DEFAULT_XFP_NAN_SIGN_EXP; + result->frac = DEFAULT_XFP_NAN_FRAC; + return; + } + PACK_XFP(result_sign, 0x7fff, 0x8000000000000000ull, result); + return; + } + + if (b_exp == 0x7fff) { + if ((t_uint64)(b_frac << 1)) { + propagate_xfp_nan(a, b, result); + return; + } + if ((a_exp | a_frac) == 0) { + /* invalid */ + mau_exc(MAU_ASR_IS, MAU_ASR_IM); + result->sign_exp = DEFAULT_XFP_NAN_SIGN_EXP; + result->frac = DEFAULT_XFP_NAN_FRAC; + return; + } + PACK_XFP(result_sign, 0x7fff, 0x8000000000000000ull, result); + return; + } + + if (a_exp == 0) { + if (a_frac == 0) { + PACK_XFP(result_sign, 0, 0, result); + return; + } + normalize_xfp_subnormal(a_frac, &a_exp, &a_frac); + } + + if (b_exp == 0) { + if (b_frac == 0) { + PACK_XFP(result_sign, 0, 0, result); + return; + } + normalize_xfp_subnormal(b_frac, &b_exp, &b_frac); + } + + result_exp = a_exp + b_exp - 0x3ffe; + mul_64_to_128(a_frac, b_frac, &result_frac_0, &result_frac_1); + if (0 < (t_int64)result_frac_0) { + short_shift_left_128(result_frac_0, result_frac_1, 1, + &result_frac_0, &result_frac_1); + --result_exp; + } + + round_pack_xfp(result_sign, result_exp, result_frac_0, + result_frac_1, rounding_mode, result); +} + +static void xfp_div(XFP *a, XFP *b, XFP *result, RM rounding_mode) +{ + t_bool a_sign, b_sign, r_sign; + int32 a_exp, b_exp, r_exp; + t_uint64 a_frac, b_frac, r_frac0, r_frac1; + t_uint64 rem0, rem1, rem2, term0, term1, term2; + + sim_debug(TRACE_DBG, &mau_dev, + "[%08x] [DIV] op1=%04x%016llx op2=%04x%016llx\n", + R[NUM_PC], b->sign_exp, b->frac, a->sign_exp, a->frac); + + a_sign = XFP_SIGN(a); + a_exp = XFP_EXP(a); + a_frac = XFP_FRAC(a); + + b_sign = XFP_SIGN(b); + b_exp = XFP_EXP(b); + b_frac = XFP_FRAC(b); + + r_sign = a_sign ^ b_sign; + + if (a_exp == 0x7fff) { + if ((t_uint64)(a_frac << 1)) { + propagate_xfp_nan(a, b, result); + return; + } + + if (b_exp == 0x7fff) { + if ((t_uint64)(b_frac << 1)) { + propagate_xfp_nan(a, b, result); + return; + } + /* Invalid */ + mau_exc(MAU_ASR_IS, MAU_ASR_IM); + result->sign_exp = DEFAULT_XFP_NAN_SIGN_EXP; + result->frac = DEFAULT_XFP_NAN_FRAC; + return; + } + + PACK_XFP(r_sign, 0x7fff, 0x8000000000000000ull, result); + return; + } + + if (b_exp == 0x7fff) { + if ((t_uint64) (b_frac << 1)) { + propagate_xfp_nan(a, b, result); + return; + } + + PACK_XFP(r_sign, 0, 0, result); + return; + } + + if (b_exp == 0) { + if (b_frac == 0) { + if ((a_exp | b_frac) == 0) { + /* Invalid */ + mau_exc(MAU_ASR_IS, MAU_ASR_IM); + result->sign_exp = DEFAULT_XFP_NAN_SIGN_EXP; + result->frac = DEFAULT_XFP_NAN_FRAC; + return; + } + /* Divide by zero - SPECIAL CASE 4 */ + sim_debug(TRACE_DBG, &mau_dev, + "[%08x] [DIV] Divide by zero detected.\n", R[NUM_PC]); + mau_case_div_zero(a, b, result); + return; + } + normalize_xfp_subnormal(b_frac, &b_exp, &b_frac); + } + + if (a_exp == 0) { + if (a_frac == 0) { + PACK_XFP(r_sign, 0, 0, result); + return; + } + normalize_xfp_subnormal(a_frac, &a_exp, &a_frac); + } + + r_exp = a_exp - b_exp + 0x3ffe; + rem1 = 0; + if (b_frac <= a_frac) { + shift_right_128(a_frac, 0, 1, &a_frac, &rem1); + ++r_exp; + } + + r_frac0 = estimate_div_128_to_64(a_frac, rem1, b_frac); + mul_64_to_128(b_frac, r_frac0, &term0, &term1); + sub_128(a_frac, rem1, term0, term1, &rem0, &rem1); + + while ((t_int64) rem0 < 0) { + --r_frac0; + add_128(rem0, rem1, 0, b_frac, &rem0, &rem1); + } + + r_frac1 = estimate_div_128_to_64(rem1, 0, b_frac); + if ((t_uint64)(r_frac1 << 1) <= 8) { + mul_64_to_128(b_frac, r_frac1, &term1, &term2); + sub_128(rem1, 0, term1, term2, &rem1, &rem2); + while ((t_int64) rem1 < 0) { + --r_frac1; + add_128(rem1, rem2, 0, b_frac, &rem1, &rem2); + } + r_frac1 |= ((rem1 | rem2) != 0); + } + + round_pack_xfp(r_sign, r_exp, r_frac0, r_frac1, rounding_mode, result); +} + +static void xfp_sqrt(XFP *a, XFP *result, RM rounding_mode) +{ + uint32 a_sign; + int32 a_exp, result_exp; + t_uint64 a_frac_0, a_frac_1, result_frac_0, result_frac_1, double_result_frac_0; + t_uint64 rem_0, rem_1, rem_2, rem_3, term_0, term_1, term_2, term_3; + + a_sign = XFP_SIGN(a); + a_exp = XFP_EXP(a); + a_frac_0 = XFP_FRAC(a); + + if (a_exp == 0x7fff) { + if ((t_uint64)(a_frac_0 << 1)) { + propagate_xfp_nan(a, a, result); + return; + } + + if (!a_sign) { + result->sign_exp = a->sign_exp; + result->frac = a->frac; + return; + } + + /* invalid */ + mau_exc(MAU_ASR_IS, MAU_ASR_IM); + result->sign_exp = DEFAULT_XFP_NAN_SIGN_EXP; + result->frac = DEFAULT_XFP_NAN_FRAC; + return; + } + + if (a_sign) { + if ((a_exp | a_frac_0) == 0) { + result->sign_exp = a->sign_exp; + result->frac = a->frac; + return; + } + + /* invalid */ + mau_exc(MAU_ASR_IS, MAU_ASR_IM); + result->sign_exp = DEFAULT_XFP_NAN_SIGN_EXP; + result->frac = DEFAULT_XFP_NAN_FRAC; + return; + } + + if (a_exp == 0) { + if (a_frac_0 == 0) { + PACK_XFP(0, 0, 0, result); + return; + } + + normalize_xfp_subnormal(a_frac_0, &a_exp, &a_frac_0); + } + + result_exp = ((a_exp - 0x3fff) >> 1) + 0x3fff; + result_frac_0 = estimate_sqrt_32(a_exp, a_frac_0 >> 32); + shift_right_128(a_frac_0, 0, 2 + (a_exp & 1), &a_frac_0, &a_frac_1); + result_frac_0 = estimate_div_128_to_64(a_frac_0, a_frac_1, result_frac_0 << 32) + + (result_frac_0 << 30); + double_result_frac_0 = result_frac_0 << 1; + mul_64_to_128(result_frac_0, result_frac_0, &term_0, &term_1); + sub_128(a_frac_0, a_frac_1, term_0, term_1, &rem_0, &rem_1); + while ((t_int64) rem_0 < 0) { + --result_frac_0; + double_result_frac_0 -= 2; + add_128(rem_0, rem_1, result_frac_0 >> 63, + double_result_frac_0 | 1, &rem_0, &rem_1); + } + + result_frac_1 = estimate_div_128_to_64(rem_0, 0, double_result_frac_0); + + if ((result_frac_1 & 0x3fffffffffffffff) <= 5) { + if (result_frac_1 == 0) { + result_frac_1 = 1; + } + mul_64_to_128(double_result_frac_0, result_frac_1, &term_1, &term_2); + sub_128(rem_1, 0, term_1, term_2, &rem_1, &rem_2); + mul_64_to_128(result_frac_1, result_frac_1, &term_2, &term_3); + sub_192(rem_1, rem_2, 0, 0, term_2, term_3, &rem_1, &rem_2, &rem_3); + while ((t_int64) rem_1 < 0) { + --result_frac_1; + short_shift_left_128(0, result_frac_1, 1, &term_2, &term_3); + term_3 |= 1; + term_2 |= double_result_frac_0; + add_192(rem_1, rem_2, rem_3, + 0, term_2, term_3, + &rem_1, &rem_2, &rem_3); + } + result_frac_1 |= ((rem_1 | rem_2 | rem_3) != 0); + } + short_shift_left_128(0, result_frac_1, 1, &result_frac_0, &result_frac_1); + result_frac_0 |= double_result_frac_0; + + round_pack_xfp(0, result_exp, result_frac_0, result_frac_1, + rounding_mode, result); +} + +static void xfp_remainder(XFP *a, XFP *b, XFP *result, RM rounding_mode) +{ + uint32 a_sign, b_sign, result_sign; + int32 a_exp, b_exp, exp_diff; + t_uint64 a_frac_0, a_frac_1, b_frac; + t_uint64 q, term_0, term_1, alt_a_frac_0, alt_a_frac_1; + + a_sign = XFP_SIGN(a); + a_exp = XFP_EXP(a); + a_frac_0 = XFP_FRAC(a); + b_sign = XFP_SIGN(b); + b_exp = XFP_EXP(b); + b_frac = XFP_FRAC(b); + + if (a_exp == 0x7fff) { + if ((t_uint64)(a_frac_0 << 1) || + ((b_exp == 0x7fff) && (t_uint64)(b_frac << 1))) { + propagate_xfp_nan(a, b, result); + return; + } + /* invalid */ + mau_exc(MAU_ASR_IS, MAU_ASR_IM); + result->sign_exp = DEFAULT_XFP_NAN_SIGN_EXP; + result->frac = DEFAULT_XFP_NAN_FRAC; + return; + } + + if (b_exp == 0x7fff) { + if ((t_uint64)(b_frac << 1)) { + propagate_xfp_nan(a, b, result); + } + result->sign_exp = a->sign_exp; + result->frac = a->frac; + return; + } + + if (b_exp == 0) { + if (b_frac == 0) { + /* invalid */ + mau_exc(MAU_ASR_IS, MAU_ASR_IM); + result->sign_exp = DEFAULT_XFP_NAN_SIGN_EXP; + result->frac = DEFAULT_XFP_NAN_FRAC; + return; + } + normalize_xfp_subnormal(b_frac, &b_exp, &b_frac); + } + + if (a_exp == 0) { + if ((t_uint64)(a_frac_0 << 1) == 0) { + result->sign_exp = a->sign_exp; + result->frac = a->frac; + return; + } + normalize_xfp_subnormal(a_frac_0, &a_exp, &a_frac_0); + } + + b_frac |= 0x8000000000000000ull; + result_sign = a_sign; + exp_diff = a_exp - b_exp; + a_frac_1 = 0; + if (exp_diff < 0) { + if (exp_diff < -1) { + result->sign_exp = a->sign_exp; + result->frac = a->frac; + return; + } + shift_right_128(a_frac_0, 0, 1, &a_frac_0, &a_frac_1); + exp_diff = 0; + } + + q = (b_frac <= a_frac_0); + + if (q) { + a_frac_0 -= b_frac; + } + + exp_diff -= 64; + + while (0 < exp_diff) { + q = estimate_div_128_to_64(a_frac_0, a_frac_1, b_frac); + q = (2 < q) ? q - 2 : 0; + mul_64_to_128(b_frac, q, &term_0, &term_1); + sub_128(a_frac_0, a_frac_1, term_0, term_1, &a_frac_0, &a_frac_1); + short_shift_left_128(a_frac_0, a_frac_1, 62, &a_frac_0, &a_frac_1); + exp_diff -= 62; + } + + exp_diff += 64; + + if (0 < exp_diff) { + q = estimate_div_128_to_64(a_frac_0, a_frac_1, b_frac); + q = (2 < q) ? q - 2 : 0; + q >>= 64 - exp_diff; + mul_64_to_128(b_frac, q << (64 - exp_diff), &term_0, &term_1); + sub_128(a_frac_0, a_frac_1, term_0, term_1, &a_frac_0, &a_frac_1); + short_shift_left_128(0, b_frac, 64 - exp_diff, &term_0, &term_1); + while (le_128(term_0, term_1, a_frac_0, a_frac_1)) { + ++q; + sub_128(a_frac_0, a_frac_1, term_0, term_1, &a_frac_0, &a_frac_1); + } + } else { + term_0 = b_frac; + term_1 = 0; + } + + sub_128(term_0, term_1, a_frac_0, a_frac_1, &alt_a_frac_0, &alt_a_frac_1); + + if (lt_128(alt_a_frac_0, alt_a_frac_1, a_frac_0, a_frac_1) || + (eq_128(alt_a_frac_0, alt_a_frac_1, a_frac_0, a_frac_1) && + (q & 1))) { + a_frac_0 = alt_a_frac_0; + a_frac_1 = alt_a_frac_1; + result_sign = result_sign ? 0 : 1; + } + + normalize_round_pack_xfp(result_sign, b_exp + exp_diff, + a_frac_0, a_frac_1, + rounding_mode, result); +} + +/* + * Load an extended precision 80-bit IEE-754 floating point value from + * memory or register, based on the operand's specification. + */ +static void load_src_op(uint8 op, XFP *xfp) +{ + DFP dfp; + SFP sfp; + + switch (op) { + case M_OP_F0: + xfp->sign_exp = mau_state.f0.sign_exp; + xfp->frac = mau_state.f0.frac; + break; + case M_OP_F1: + xfp->sign_exp = mau_state.f1.sign_exp; + xfp->frac = mau_state.f1.frac; + break; + case M_OP_F2: + xfp->sign_exp = mau_state.f2.sign_exp; + xfp->frac = mau_state.f2.frac; + break; + case M_OP_F3: + xfp->sign_exp = mau_state.f3.sign_exp; + xfp->frac = mau_state.f3.frac; + break; + case M_OP_MEM_SINGLE: + sfp = read_w(mau_state.src, ACC_AF); + sfp_to_xfp(sfp, xfp); + break; + case M_OP_MEM_DOUBLE: + dfp = (t_uint64) read_w(mau_state.src + 4, ACC_AF); + dfp |= ((t_uint64) read_w(mau_state.src, ACC_AF)) << 32; + sim_debug(TRACE_DBG, &mau_dev, + "[load_src_op][DOUBLE] Loaded %016llx\n", + dfp); + dfp_to_xfp(dfp, xfp); + sim_debug(TRACE_DBG, &mau_dev, + "[load_src_op][DOUBLE] Expanded To %04x%016llx\n", + xfp->sign_exp, xfp->frac); + break; + case M_OP_MEM_TRIPLE: + xfp->frac = (t_uint64) read_w(mau_state.src + 8, ACC_AF); + xfp->frac |= ((t_uint64) read_w(mau_state.src + 4, ACC_AF)) << 32; + xfp->sign_exp = (uint32) read_w(mau_state.src, ACC_AF); + break; + default: + break; + } +} + +/* + * Load OP1 as a DEC value. + */ +static void load_op1_decimal(DEC *d) +{ + uint32 low, mid, high; + + switch (mau_state.op1) { + case M_OP_MEM_TRIPLE: + low = read_w(mau_state.src + 8, ACC_AF); + mid = read_w(mau_state.src + 4, ACC_AF); + high = read_w(mau_state.src, ACC_AF); + d->l = low; + d->l |= ((t_uint64) mid << 32); + d->h = high; + break; + default: + /* Invalid */ + mau_exc(MAU_ASR_IS, MAU_ASR_IM); + break; + } +} + +static void store_op3_int(uint32 val) +{ + switch(mau_state.op3) { + case M_OP3_F0_SINGLE: + mau_state.f0.sign_exp = 0; + mau_state.f0.frac = (t_uint64)val; + break; + case M_OP3_F1_SINGLE: + mau_state.f1.sign_exp = 0; + mau_state.f1.frac = (t_uint64)val; + break; + case M_OP3_F2_SINGLE: + mau_state.f2.sign_exp = 0; + mau_state.f2.frac = (t_uint64)val; + break; + case M_OP3_F3_SINGLE: + mau_state.f3.sign_exp = 0; + mau_state.f3.frac = (t_uint64)val; + break; + case M_OP3_MEM_SINGLE: + write_w(mau_state.dst, val); + break; + default: + /* Indeterminate output, unsupported */ + break; + } + + mau_state.dr.sign_exp = 0; + mau_state.dr.frac = (t_uint64)val; +} + +static void store_op3_decimal(DEC *d) +{ + + switch(mau_state.op3) { + case M_OP3_MEM_TRIPLE: + write_w(mau_state.dst, d->h); + write_w(mau_state.dst + 4, (uint32)((t_uint64)d->l >> 32)); + write_w(mau_state.dst + 8, (uint32)d->l); + break; + default: + /* Unsupported */ + return; + } + + mau_state.dr.sign_exp = d->h; + mau_state.dr.frac = ((t_uint64)d->l >> 32) | (t_uint64)d->l; +} + +static void store_op3(XFP *xfp) +{ + DFP dfp; + SFP sfp; + t_bool store_dr = FALSE; + + sim_debug(TRACE_DBG, &mau_dev, + "[%08x] [store_op3] op3=%04x%016llx\n", + R[NUM_PC], + xfp->sign_exp, + xfp->frac); + + switch (mau_state.opcode) { + case M_ADD: + case M_SUB: + case M_MUL: + case M_DIV: + store_dr = TRUE; + break; + default: + break; + } + + switch (mau_state.op3) { + case M_OP3_F0_SINGLE: + case M_OP3_F0_DOUBLE: + case M_OP3_F0_TRIPLE: + if (mau_state.ntnan) { + mau_state.f0.sign_exp = GEN_NONTRAPPING_NAN.sign_exp; + mau_state.f0.frac = GEN_NONTRAPPING_NAN.frac; + } else { + mau_state.f0.sign_exp = xfp->sign_exp; + mau_state.f0.frac = xfp->frac; + } + if (set_nz()) { + if (XFP_SIGN(xfp)) { + mau_state.asr |= MAU_ASR_N; + } + if (XFP_EXP(xfp) == 0 && XFP_FRAC(xfp) == 0) { + mau_state.asr |= MAU_ASR_Z; + } + } + break; + case M_OP3_F1_SINGLE: + case M_OP3_F1_DOUBLE: + case M_OP3_F1_TRIPLE: + if (mau_state.ntnan) { + mau_state.f1.sign_exp = GEN_NONTRAPPING_NAN.sign_exp; + mau_state.f1.frac = GEN_NONTRAPPING_NAN.frac; + } else { + mau_state.f1.sign_exp = xfp->sign_exp; + mau_state.f1.frac = xfp->frac; + } + if (set_nz()) { + if (XFP_SIGN(xfp)) { + mau_state.asr |= MAU_ASR_N; + } + if (XFP_EXP(xfp) == 0 && XFP_FRAC(xfp) == 0) { + mau_state.asr |= MAU_ASR_Z; + } + } + break; + case M_OP3_F2_SINGLE: + case M_OP3_F2_DOUBLE: + case M_OP3_F2_TRIPLE: + if (mau_state.ntnan) { + mau_state.f2.sign_exp = GEN_NONTRAPPING_NAN.sign_exp; + mau_state.f2.frac = GEN_NONTRAPPING_NAN.frac; + } else { + mau_state.f2.sign_exp = xfp->sign_exp; + mau_state.f2.frac = xfp->frac; + } + if (set_nz()) { + if (XFP_SIGN(xfp)) { + mau_state.asr |= MAU_ASR_N; + } + if (XFP_EXP(xfp) == 0 && XFP_FRAC(xfp) == 0) { + mau_state.asr |= MAU_ASR_Z; + } + } + break; + case M_OP3_F3_SINGLE: + case M_OP3_F3_DOUBLE: + case M_OP3_F3_TRIPLE: + if (mau_state.ntnan) { + mau_state.f3.sign_exp = GEN_NONTRAPPING_NAN.sign_exp; + mau_state.f3.frac = GEN_NONTRAPPING_NAN.frac; + } else { + mau_state.f3.sign_exp = xfp->sign_exp; + mau_state.f3.frac = xfp->frac; + } + if (set_nz()) { + if (XFP_SIGN(xfp)) { + mau_state.asr |= MAU_ASR_N; + } + if (XFP_EXP(xfp) == 0 && XFP_FRAC(xfp) == 0) { + mau_state.asr |= MAU_ASR_Z; + } + } + break; + case M_OP3_MEM_SINGLE: + if (mau_state.ntnan) { + sfp = xfp_to_sfp(&GEN_NONTRAPPING_NAN, MAU_RM); + } else { + sfp = xfp_to_sfp(xfp, MAU_RM); + } + if (set_nz()) { + if (SFP_SIGN(sfp)) { + mau_state.asr |= MAU_ASR_N; + } + if (SFP_EXP(sfp) == 0 && SFP_FRAC(sfp) == 0) { + mau_state.asr |= MAU_ASR_Z; + } + } + write_w(mau_state.dst, (uint32)sfp); + break; + case M_OP3_MEM_DOUBLE: + if (mau_state.ntnan) { + dfp = xfp_to_dfp(&GEN_NONTRAPPING_NAN, MAU_RM); + } else { + dfp = xfp_to_dfp(xfp, MAU_RM); + } + if (store_dr) { + mau_state.dr.sign_exp = ((uint16)(DFP_SIGN(dfp)) << 15) | (uint16)(DFP_EXP(dfp)); + mau_state.dr.frac = (t_uint64)(DFP_FRAC(dfp)); + if (DFP_EXP(dfp)) { + /* If the number is normalized, add the implicit + normalized bit 52 */ + mau_state.dr.frac |= ((t_uint64)1 << 52); + } + } + if (set_nz()) { + if (DFP_SIGN(dfp)) { + mau_state.asr |= MAU_ASR_N; + } + if (DFP_EXP(dfp) == 0 && DFP_FRAC(dfp) == 0) { + mau_state.asr |= MAU_ASR_Z; + } + } + write_w(mau_state.dst, (uint32)(dfp >> 32)); + write_w(mau_state.dst + 4, (uint32)(dfp)); + break; + case M_OP3_MEM_TRIPLE: + if (mau_state.ntnan) { + write_w(mau_state.dst, (uint32)(GEN_NONTRAPPING_NAN.sign_exp)); + write_w(mau_state.dst + 4, (uint32)(GEN_NONTRAPPING_NAN.frac >> 32)); + write_w(mau_state.dst + 8, (uint32)(GEN_NONTRAPPING_NAN.frac)); + } else { + write_w(mau_state.dst, (uint32)(xfp->sign_exp)); + write_w(mau_state.dst + 4, (uint32)(xfp->frac >> 32)); + write_w(mau_state.dst + 8, (uint32)(xfp->frac)); + } + if (set_nz()) { + if (XFP_SIGN(xfp)) { + mau_state.asr |= MAU_ASR_N; + } + if (XFP_EXP(xfp) == 0 && XFP_FRAC(xfp) == 0) { + mau_state.asr |= MAU_ASR_Z; + } + } + break; + default: + sim_debug(TRACE_DBG, &mau_dev, + "[store_op3] WARNING: Unhandled destination: %02x\n", mau_state.op3); + break; + } +} + +/************************************************************************* + * + * MAU instruction impelementations + * + *************************************************************************/ + +static void mau_rdasr() +{ + switch (mau_state.op3) { + /* Handled */ + case M_OP3_MEM_SINGLE: + write_w(mau_state.dst, mau_state.asr); + break; + case M_OP3_MEM_DOUBLE: + write_w(mau_state.dst, mau_state.asr); + write_w(mau_state.dst + 4, mau_state.asr); + break; + case M_OP3_MEM_TRIPLE: + write_w(mau_state.dst, mau_state.asr); + write_w(mau_state.dst + 4, mau_state.asr); + write_w(mau_state.dst + 8, mau_state.asr); + break; + /* Unhandled */ + default: + sim_debug(TRACE_DBG, &mau_dev, + "[%08x] [mau_rdasr] WARNING: Unhandled source: %02x\n", + R[NUM_PC], mau_state.op3); + break; + } +} + +static void mau_wrasr() +{ + switch (mau_state.op1) { + /* Handled */ + case M_OP_MEM_SINGLE: + mau_state.asr = read_w(mau_state.src, ACC_AF); + sim_debug(TRACE_DBG, &mau_dev, + "[%08x] [WRASR] Writing ASR with: %08x\n", + R[NUM_PC], mau_state.asr); + break; + default: + sim_debug(TRACE_DBG, &mau_dev, + "[%08x] [mau_wrasr] WARNING: Unhandled source: %02x\n", + R[NUM_PC], + mau_state.op3); + break; + } +} + +/* + * OP3 = OP1 + */ +static void mau_move() +{ + XFP xfp = {0}; + + load_src_op(mau_state.op1, &xfp); + store_op3(&xfp); +} + +static void mau_cmp() +{ + XFP a, b; + + load_src_op(mau_state.op1, &a); + load_src_op(mau_state.op2, &b); + xfp_cmp(&a, &b); +} + +static void mau_cmps() +{ + XFP a, b; + + load_src_op(mau_state.op1, &a); + load_src_op(mau_state.op2, &b); + xfp_cmps(&a, &b); +} + +static void mau_cmpe() +{ + XFP a, b; + + load_src_op(mau_state.op1, &a); + load_src_op(mau_state.op2, &b); + xfp_cmpe(&a, &b); +} + +static void mau_cmpes() +{ + XFP a, b; + + load_src_op(mau_state.op1, &a); + load_src_op(mau_state.op2, &b); + xfp_cmpes(&a, &b); +} + +static void mau_ldr() +{ + XFP xfp; + + load_src_op(mau_state.op1, &xfp); + sim_debug(TRACE_DBG, &mau_dev, + "[%08x] [LDR] Loading DR with %04x%016llx\n", + R[NUM_PC], xfp.sign_exp, xfp.frac); + mau_state.dr.sign_exp = xfp.sign_exp; + mau_state.dr.frac = xfp.frac; +} + +static void mau_erof() +{ + DFP dfp; + SFP sfp; + + switch (mau_state.op3) { + case M_OP3_F0_SINGLE: + case M_OP3_F0_DOUBLE: + case M_OP3_F0_TRIPLE: + mau_state.f0.sign_exp = mau_state.dr.sign_exp; + mau_state.f0.frac = mau_state.dr.frac; + return; + case M_OP3_F1_SINGLE: + case M_OP3_F1_DOUBLE: + case M_OP3_F1_TRIPLE: + mau_state.f1.sign_exp = mau_state.dr.sign_exp; + mau_state.f1.frac = mau_state.dr.frac; + return; + case M_OP3_F2_SINGLE: + case M_OP3_F2_DOUBLE: + case M_OP3_F2_TRIPLE: + mau_state.f2.sign_exp = mau_state.dr.sign_exp; + mau_state.f2.frac = mau_state.dr.frac; + return; + case M_OP3_F3_SINGLE: + case M_OP3_F3_DOUBLE: + case M_OP3_F3_TRIPLE: + mau_state.f3.sign_exp = mau_state.dr.sign_exp; + mau_state.f3.frac = mau_state.dr.frac; + return; + case M_OP3_MEM_SINGLE: + sfp = xfp_to_sfp(&(mau_state.dr), MAU_RM); + write_w(mau_state.dst, (uint32)sfp); + return; + case M_OP3_MEM_DOUBLE: + dfp = xfp_to_dfp(&(mau_state.dr), MAU_RM); + write_w(mau_state.dst + 4, (uint32)(dfp >> 32)); + write_w(mau_state.dst, (uint32)(dfp)); + return; + case M_OP3_MEM_TRIPLE: + write_w(mau_state.dst, (uint32)(mau_state.dr.sign_exp)); + write_w(mau_state.dst + 4, (uint32)(mau_state.dr.frac >> 32)); + write_w(mau_state.dst + 8, (uint32)(mau_state.dr.frac)); + return; + default: + sim_debug(TRACE_DBG, &mau_dev, + "[mau_erof] WARNING: Unhandled destination: %02x\n", mau_state.op3); + return; + } +} + + +static void mau_rtoi() +{ + XFP a, result; + + load_src_op(mau_state.op1, &a); + mau_round_xfp_to_int(&a, &result, MAU_RM); + store_op3(&result); +} + +static void mau_ftoi() +{ + XFP a; + uint32 result; + + load_src_op(mau_state.op1, &a); + result = xfp_to_int(&a, MAU_RM); + store_op3_int(result); +} + +static void mau_dtof() +{ + DEC d; + XFP result; + + load_op1_decimal(&d); + mau_decimal_to_xfp(&d, &result, MAU_RM); + store_op3(&result); +} + +static void mau_ftod() +{ + XFP a; + DEC d; + + load_src_op(mau_state.op1, &a); + xfp_to_decimal(&a, &d, MAU_RM); + store_op3_decimal(&d); +} + +static void mau_add() +{ + XFP a, b, result; + + load_src_op(mau_state.op1, &a); + load_src_op(mau_state.op2, &b); + xfp_add(&a, &b, &result, MAU_RM); + store_op3(&result); +} + +/* + * OP3 = OP2 - OP1 + */ +static void mau_sub() +{ + XFP a, b, result; + + load_src_op(mau_state.op1, &a); + load_src_op(mau_state.op2, &b); + xfp_sub(&b, &a, &result, MAU_RM); + store_op3(&result); +} + +/* + * OP3 = OP1 * OP2 + */ +static void mau_mul() +{ + XFP a, b, result; + + load_src_op(mau_state.op1, &a); + load_src_op(mau_state.op2, &b); + xfp_mul(&b, &a, &result, MAU_RM); + store_op3(&result); +} + +/* + * OP3 = OP1 / OP2 + */ +static void mau_div() +{ + XFP a, b, result; + + load_src_op(mau_state.op1, &a); + load_src_op(mau_state.op2, &b); + sim_debug(TRACE_DBG, &mau_dev, + "[%08x] [DIV OP2/OP1] OP2=0x%04x%016llx OP1=0x%04x%016llx\n", + R[NUM_PC], + b.sign_exp, b.frac, + a.sign_exp, a.frac); + xfp_div(&b, &a, &result, MAU_RM); + store_op3(&result); +} + +static void mau_neg() +{ + XFP a, result; + + load_src_op(mau_state.op1, &a); + result.sign_exp = a.sign_exp; + result.frac = a.frac; + result.sign_exp ^= 0x8000; + store_op3(&result); +} + +static void mau_abs() +{ + XFP a, result; + + load_src_op(mau_state.op1, &a); + result.sign_exp = a.sign_exp; + result.frac = a.frac; + result.sign_exp &= 0x7fff; + store_op3(&result); +} + +/* + * OP3 = sqrt(OP1) + */ +static void mau_sqrt() +{ + XFP a, result; + + load_src_op(mau_state.op1, &a); + xfp_sqrt(&a, &result, MAU_RM); + store_op3(&result); +} + +/* + * OP3 = float(OP1) + * + * If the source operand is more than one word wide, only the last + * word is converted. + */ +static void mau_itof() +{ + XFP xfp; + int32 val = 0; + + mau_state.asr &= ~(MAU_ASR_N|MAU_ASR_Z); + + switch(mau_state.op1) { + case M_OP_F0: + case M_OP_F1: + case M_OP_F2: + case M_OP_F3: + mau_exc(MAU_ASR_IS, MAU_ASR_IM); + return; + case M_OP_MEM_SINGLE: + val = read_w(mau_state.src, ACC_AF); + break; + case M_OP_MEM_DOUBLE: + val = read_w(mau_state.src + 4, ACC_AF); + break; + case M_OP_MEM_TRIPLE: + val = read_w(mau_state.src + 8, ACC_AF); + break; + default: + break; + } + /* Convert */ + mau_int_to_xfp(val, &xfp); + + store_op3(&xfp); +} + +/* + * OP3 = REMAINDER(b/a) + */ +static void mau_remainder() +{ + XFP a, b, result; + + load_src_op(mau_state.op1, &a); + load_src_op(mau_state.op2, &b); + xfp_remainder(&b, &a, &result, MAU_RM); + store_op3(&result); +} + +/* + * Decode the command word into its corresponding parts. Both src and + * dst are optional depending on the WE32100 operand, and may be set + * to any value if not used. + */ +static SIM_INLINE void mau_decode(uint32 cmd, uint32 src, uint32 dst) +{ + mau_state.cmd = cmd; + mau_state.src = src; + mau_state.dst = dst; + mau_state.opcode = (uint8) ((cmd & 0x7c00) >> 10); + mau_state.op1 = (uint8) ((cmd & 0x0380) >> 7); + mau_state.op2 = (uint8) ((cmd & 0x0070) >> 4); + mau_state.op3 = (uint8) (cmd & 0x000f); + sim_debug(DECODE_DBG, &mau_dev, + "opcode=%s (%02x) op1=%s op2=%s op3=%s\n", + mau_op_names[mau_state.opcode], + mau_state.opcode, + src_op_names[mau_state.op1 & 0x7], + src_op_names[mau_state.op2 & 0x7], + dst_op_names[mau_state.op3 & 0xf]); +} + +/* + * Handle a command. + */ +static void mau_execute() +{ + clear_asr(); + + switch(mau_state.opcode) { + case M_NOP: + /* Do nothing */ + break; + case M_ADD: + mau_add(); + break; + case M_SUB: + mau_sub(); + break; + case M_MUL: + mau_mul(); + break; + case M_DIV: + mau_div(); + break; + case M_RDASR: + mau_rdasr(); + break; + case M_WRASR: + mau_wrasr(); + break; + case M_MOVE: + mau_move(); + break; + case M_LDR: + mau_ldr(); + break; + case M_ITOF: + mau_itof(); + break; + case M_EROF: + mau_erof(); + break; + case M_RTOI: + mau_rtoi(); + break; + case M_FTOI: + mau_ftoi(); + break; + case M_CMP: + mau_cmp(); + break; + case M_CMPS: + mau_cmps(); + break; + case M_CMPE: + mau_cmpe(); + break; + case M_CMPES: + mau_cmpes(); + break; + case M_REM: + mau_remainder(); + break; + case M_NEG: + mau_neg(); + break; + case M_ABS: + mau_abs(); + break; + case M_SQRT: + mau_sqrt(); + break; + case M_FTOD: + mau_ftod(); + break; + case M_DTOF: + mau_dtof(); + break; + default: + sim_debug(TRACE_DBG, &mau_dev, + "[execute] unhandled opcode %s [0x%02x]\n", + mau_op_names[mau_state.opcode], + mau_state.opcode); + break; + } + + /* If an error has occured, abort */ + abort_on_fault(); + + /* Copy the N, Z, V and C (from PS) flags over to the CPU's PSW */ + R[NUM_PSW] &= ~(MAU_ASR_N|MAU_ASR_Z|MAU_ASR_IO|MAU_ASR_PS); + R[NUM_PSW] |= (mau_state.asr & (MAU_ASR_N|MAU_ASR_Z|MAU_ASR_IO|MAU_ASR_PS)); + + /* Set the RA and CSC flags in the ASR */ + mau_state.asr |= MAU_ASR_RA; + if (mau_state.opcode != M_RDASR && mau_state.opcode != M_LDR) { + mau_state.asr |= MAU_ASR_CSC; + } +} + +/* + * Receive a broadcast from the CPU, and potentially handle it. + */ +t_stat mau_broadcast(uint32 cmd, uint32 src, uint32 dst) +{ + uint8 id = (uint8) ((cmd & 0xff000000) >> 24); + + /* If the MAU isn't attached, or if this message isn't for us, + * return SCPE_NXM. Otherwise, decode and act on the command. */ + if (id != MAU_ID) { + sim_debug(DECODE_DBG, &mau_dev, + "[broadcast] Message for coprocessor id %d is not for MAU (%d)\n", + id, MAU_ID); + return SCPE_NXM; + } else if (mau_dev.flags & DEV_DIS) { + sim_debug(DECODE_DBG, &mau_dev, + "[broadcast] Message for MAU, but MAU is not attached.\n"); + return SCPE_NOATT; + } else { + mau_decode(cmd, src, dst); + mau_execute(); + return SCPE_OK; + } +} + +CONST char *mau_description(DEVICE *dptr) +{ + return "WE32106"; +} diff --git a/3B2/3b2_mau.h b/3B2/3b2_mau.h new file mode 100644 index 00000000..53825fe1 --- /dev/null +++ b/3B2/3b2_mau.h @@ -0,0 +1,375 @@ +/* 3b2_mmu.c: AT&T 3B2 Model 400 Math Acceleration Unit (WE32106 MAU) + Header + + Copyright (c) 2019, Seth J. Morabito + + Permission is hereby granted, free of charge, to any person + obtaining a copy of this software and associated documentation + files (the "Software"), to deal in the Software without + restriction, including without limitation the rights to use, copy, + modify, merge, publish, distribute, sublicense, and/or sell copies + of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. + + Except as contained in this notice, the name of the author shall + not be used in advertising or otherwise to promote the sale, use or + other dealings in this Software without prior written authorization + from the author. + + --------------------------------------------------------------------- + + This file is part of a simulation of the WE32106 Math Acceleration + Unit. The WE32106 MAU is an IEEE-754 compabitle floating point + hardware math accelerator that was available as an optional + component on the AT&T 3B2/310 and 3B2/400, and a standard component + on the 3B2/500, 3B2/600, and 3B2/1000. + + Portions of this code are derived from the SoftFloat 2c library by + John R. Hauser. Functions derived from SoftFloat 2c are clearly + marked in the comments. + + Legal Notice + ============ + + SoftFloat was written by John R. Hauser. Release 2c of SoftFloat + was made possible in part by the International Computer Science + Institute, located at Suite 600, 1947 Center Street, Berkeley, + California 94704. Funding was partially provided by the National + Science Foundation under grant MIP-9311980. The original version + of this code was written as part of a project to build a + fixed-point vector processor in collaboration with the University + of California at Berkeley, overseen by Profs. Nelson Morgan and + John Wawrzynek. + + THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable + effort has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS + THAT WILL AT TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS + SOFTWARE IS RESTRICTED TO PERSONS AND ORGANIZATIONS WHO CAN AND + WILL TOLERATE ALL LOSSES, COSTS, OR OTHER PROBLEMS THEY INCUR DUE + TO THE SOFTWARE WITHOUT RECOMPENSE FROM JOHN HAUSER OR THE + INTERNATIONAL COMPUTER SCIENCE INSTITUTE, AND WHO FURTHERMORE + EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER + SCIENCE INSTITUTE (possibly via similar legal notice) AGAINST ALL + LOSSES, COSTS, OR OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND + CLIENTS DUE TO THE SOFTWARE, OR INCURRED BY ANYONE DUE TO A + DERIVATIVE WORK THEY CREATE USING ANY PART OF THE SOFTWARE. + + The following are expressly permitted, even for commercial + purposes: + + (1) distribution of SoftFloat in whole or in part, as long as this + and other legal notices remain and are prominent, and provided also + that, for a partial distribution, prominent notice is given that it + is a subset of the original; and + + (2) inclusion or use of SoftFloat in whole or in part in a + derivative work, provided that the use restrictions above are met + and the minimal documentation requirements stated in the source + code are satisfied. + --------------------------------------------------------------------- + + Data Types + ========== + + The WE32106 MAU stores values using IEEE-754 1985 types, plus a + non-standard Decimal type. + + - Decimal Type - 18 BCD digits long. Each digit is 4 bits wide. + Sign is encoded in byte 0. + + 3322 2222 2222 1111 1111 1100 0000 0000 + 1098 7654 3210 9876 5432 1098 7654 3210 + +-------------------+----+----+----+----+ + | unused | D18| D17| D16| D15| High Word + +----+----+----+----+----+----+----+----+ + | D14| D13| D12| D11| D10| D09| D08| D07| Middle Word + +----+----+----+----+----+----+----+----+ + | D06| D05| D04| D03| D02| D01| D00|sign| Low Word + +----+----+----+----+----+----+----+----+ + + Sign: 0: Positive Infinity 10: Positive Number + 1: Negative Infinity 11: Negative Number + 2: Positive NaN 12: Positive Number + 3: Negative NaN 13: Negative Number + 4-9: Trapping NaN 14-15: Positive Number + + - Extended Precision (80-bit) - exponent biased by 16383 + + 3 322222222221111 1 111110000000000 + 1 098765432109876 5 432109876543210 + +-----------------+-+---------------+ + | unused |S| Exponent | High Word + +-+---------------+-+---------------+ + |J| Fraction (high word) | Middle Word + +-+---------------------------------+ + | Fraction (low word) | Low Word + +-----------------------------------+ + + + - Double Precision (64-bit) - exponent biased by 1023 + + 3 3222222222 211111111110000000000 + 1 0987654321 098765432109876543210 + +-+----------+---------------------+ + |S| Exponent | Fraction (high) | High Word + +-+----------+---------------------+ + | Fraction (low) | Low Word + +----------------------------------+ + + + - Single Precision (32-bit) - exponent biased by 127 + + 3 32222222 22211111111110000000000 + 1 09876543 21098765432109876543210 + +-+--------+-----------------------+ + |S| Exp | Fraction | + +-+--------+-----------------------+ + +*/ + +#ifndef _3B2_MAU_H +#define _3B2_MAU_H + +#include "3b2_defs.h" + +#define SRC_LEN_INVALID 0 +#define SRC_LEN_SINGLE 1 +#define SRC_LEN_DOUBLE 2 +#define SRC_LEN_TRIPLE 3 + +#define MAU_ASR_RC_SHIFT 22 + +#define MAU_ASR_PR 0x20u /* Partial Remainder */ +#define MAU_ASR_QS 0x40u /* Divide By Zero Sticky */ +#define MAU_ASR_US 0x80u /* Underflow Sticky */ +#define MAU_ASR_OS 0x100u /* Overflow Sticky */ +#define MAU_ASR_IS 0x200u /* Invalid Operation Sticky */ +#define MAU_ASR_PM 0x400u /* Inexact Mask */ +#define MAU_ASR_QM 0x800u /* Divide by Zero Mask */ +#define MAU_ASR_UM 0x1000u /* Underflow Mask */ +#define MAU_ASR_OM 0x2000u /* Overflow Mask */ +#define MAU_ASR_IM 0x4000u /* Invalid Operation Mask */ + +#define MAU_ASR_UO 0x10000u /* Unordered */ +#define MAU_ASR_CSC 0x20000u /* Context Switch Control */ +#define MAU_ASR_PS 0x40000u /* Inexact Sticky */ +#define MAU_ASR_IO 0x80000u /* Integer Overflow */ +#define MAU_ASR_Z 0x100000u /* Zero Flag */ +#define MAU_ASR_N 0x200000u /* Negative Flag */ +#define MAU_ASR_RC 0x400000u /* Round Control */ + +#define MAU_ASR_NTNC 0x1000000u /* Nontrapping NaN Control */ +#define MAU_ASR_ECP 0x2000000u /* Exception Condition */ + +#define MAU_ASR_RA 0x80000000u /* Result Available */ + +#define MAU_RC_RN 0 /* Round toward Nearest */ +#define MAU_RC_RP 1 /* Round toward Plus Infin. */ +#define MAU_RC_RM 2 /* Round toward Neg. Infin. */ +#define MAU_RC_RZ 3 /* Round toward Zero */ + +#define SFP_SIGN(V) (((V) >> 31) & 1) +#define SFP_EXP(V) (((V) >> 23) & 0xff) +#define SFP_FRAC(V) ((V) & 0x7fffff) + +#define DFP_SIGN(V) (((V) >> 63) & 1) +#define DFP_EXP(V) (((V) >> 52) & 0x7ff) +#define DFP_FRAC(V) ((V) & 0xfffffffffffffull) + +#define XFP_SIGN(V) (((V)->sign_exp >> 15) & 1) +#define XFP_EXP(V) ((V)->sign_exp & 0x7fff) +#define XFP_FRAC(V) ((V)->frac) + +#define XFP_IS_NORMAL(V) ((V)->frac & 0x8000000000000000ull) + +#define DEFAULT_XFP_NAN_SIGN_EXP 0xffff +#define DEFAULT_XFP_NAN_FRAC 0xc000000000000000ull + +#define SFP_IS_TRAPPING_NAN(V) (((((V) >> 22) & 0x1ff) == 0x1fe) && \ + ((V) & 0x3fffff)) +#define DFP_IS_TRAPPING_NAN(V) (((((V) >> 51) & 0xfff) == 0xffe) && \ + ((V) & 0x7ffffffffffffull)) +#define XFP_IS_NAN(V) ((((V)->sign_exp & 0x7fff) == 0x7fff) && \ + (t_uint64)((V)->frac << 1)) +#define XFP_IS_TRAPPING_NAN(V) ((((V)->sign_exp) & 0x7fff) && \ + ((((V)->frac) & ~(0x4000000000000000ull)) << 1) && \ + (((V)->frac) == ((V)->frac & ~(0x4000000000000000ull)))) +#define PACK_DFP(SIGN,EXP,FRAC) ((((t_uint64)(SIGN))<<63) + \ + (((t_uint64)(EXP))<<52) + \ + ((t_uint64)(FRAC))) +#define PACK_SFP(SIGN,EXP,FRAC) (((uint32)(SIGN)<<31) + \ + ((uint32)(EXP)<<23) + \ + ((uint32)(FRAC))) +#define PACK_XFP(SIGN,EXP,FRAC,V) do { \ + (V)->frac = (FRAC); \ + (V)->sign_exp = ((uint16)(SIGN) << 15) + (EXP); \ + } while (0) + +#define PACK_XFP_S(SIGN,EXP,FRAC,S,V) do { \ + (V)->frac = (FRAC); \ + (V)->sign_exp = ((uint16)(SIGN) << 15) + (EXP); \ + (V)->s = (S) != 0; \ + } while (0) + +#define MAU_RM ((RM)((mau_state.asr >> 22) & 3)) + +typedef enum { + M_ADD = 0x02, + M_SUB = 0x03, + M_DIV = 0x04, + M_REM = 0x05, + M_MUL = 0x06, + M_MOVE = 0x07, + M_RDASR = 0x08, + M_WRASR = 0x09, + M_CMP = 0x0a, + M_CMPE = 0x0b, + M_ABS = 0x0c, + M_SQRT = 0x0d, + M_RTOI = 0x0e, + M_FTOI = 0x0f, + M_ITOF = 0x10, + M_DTOF = 0x11, + M_FTOD = 0x12, + M_NOP = 0x13, + M_EROF = 0x14, + M_NEG = 0x17, + M_LDR = 0x18, + M_CMPS = 0x1a, + M_CMPES = 0x1b +} mau_opcodes; + +typedef enum { + M_OP3_F0_SINGLE, + M_OP3_F1_SINGLE, + M_OP3_F2_SINGLE, + M_OP3_F3_SINGLE, + M_OP3_F0_DOUBLE, + M_OP3_F1_DOUBLE, + M_OP3_F2_DOUBLE, + M_OP3_F3_DOUBLE, + M_OP3_F0_TRIPLE, + M_OP3_F1_TRIPLE, + M_OP3_F2_TRIPLE, + M_OP3_F3_TRIPLE, + M_OP3_MEM_SINGLE, + M_OP3_MEM_DOUBLE, + M_OP3_MEM_TRIPLE, + M_OP3_NONE +} op3_spec; + +/* Specifier bytes for Operands 1 and 2 */ +typedef enum { + M_OP_F0, + M_OP_F1, + M_OP_F2, + M_OP_F3, + M_OP_MEM_SINGLE, + M_OP_MEM_DOUBLE, + M_OP_MEM_TRIPLE, + M_OP_NONE +} op_spec; + +/* + * Not-a-Number Type + */ +typedef struct { + t_bool sign; + t_uint64 high; + t_uint64 low; +} NAN_T; + +/* + * Extended Precision (80 bits). + * + * Note that an undocumented feature of the WE32106 requires the use + * of uint32 rather than uint16 for the sign and exponent components + * of the struct. Although bits 80-95 are "unused", several + * diagnostics actually expect these bits to be moved and preserved on + * word transfers. They are ignored and discarded by math routines, + * however. + * + * The 's' field holds the Sticky bit used by rounding. + */ +typedef struct { + uint32 sign_exp; /* Sign and Exponent */ + t_uint64 frac; /* Fraction/Significand/Mantissa */ + t_bool s; /* Sticky bit */ +} XFP; + +typedef struct { + uint32 h; + t_uint64 l; +} DEC; + +/* + * Supported rounding modes. + */ +typedef enum { + ROUND_NEAREST, + ROUND_PLUS_INF, + ROUND_MINUS_INF, + ROUND_ZERO +} RM; + +/* + * Double Precision (64 bits) + */ +typedef t_uint64 DFP; + +/* + * Single Precision (32 bits) + */ +typedef uint32 SFP; + +/* + * MAU state + */ + +typedef struct { + uint32 cmd; + /* Exception */ + uint32 exception; + /* Status register */ + uint32 asr; + t_bool trapping_nan; + /* Generate a Non-Trapping NaN */ + t_bool ntnan; + /* Source (from broadcast) */ + uint32 src; + /* Destination (from broadcast) */ + uint32 dst; + uint8 opcode; + uint8 op1; + uint8 op2; + uint8 op3; + /* Data Register */ + XFP dr; + /* Operand Registers */ + XFP f0; + XFP f1; + XFP f2; + XFP f3; +} MAU_STATE; + +extern DEVICE mau_dev; + +t_stat mau_reset(DEVICE *dptr); +t_stat mau_attach(UNIT *uptr, CONST char *cptr); +t_stat mau_detach(UNIT *uptr); +t_stat mau_broadcast(uint32 cmd, uint32 src, uint32 dst); +CONST char *mau_description(DEVICE *dptr); + +#endif diff --git a/3B2/3b2_mmu.c b/3B2/3b2_mmu.c index d0a048e8..9a5dddbd 100644 --- a/3B2/3b2_mmu.c +++ b/3B2/3b2_mmu.c @@ -52,11 +52,32 @@ REG mmu_reg[] = { }; DEVICE mmu_dev = { - "MMU", &mmu_unit, mmu_reg, NULL, - 1, 16, 8, 4, 16, 32, - NULL, NULL, &mmu_init, - NULL, NULL, NULL, NULL, - DEV_DEBUG, 0, sys_deb_tab + "MMU", /* name */ + &mmu_unit, /* units */ + mmu_reg, /* registers */ + NULL, /* modifiers */ + 1, /* #units */ + 16, /* address radix */ + 8, /* address width */ + 4, /* address incr */ + 16, /* data radix */ + 32, /* data width */ + NULL, /* examine routine */ + NULL, /* deposit routine */ + &mmu_init, /* reset routine */ + NULL, /* boot routine */ + NULL, /* attach routine */ + NULL, /* detach routine */ + NULL, /* context */ + DEV_DEBUG, /* flags */ + 0, /* debug control flags */ + sys_deb_tab, /* debug flag names */ + NULL, /* memory size change */ + NULL, /* logical name */ + NULL, /* help routine */ + NULL, /* attach help routine */ + NULL, /* help context */ + &mmu_description /* device description */ }; /* @@ -1083,3 +1104,8 @@ void write_w(uint32 va, uint32 val) { pwrite_w(mmu_xlate_addr(va, ACC_W), val); } + +CONST char *mmu_description(DEVICE *dptr) +{ + return "WE32101"; +} diff --git a/3B2/3b2_mmu.h b/3B2/3b2_mmu.h index f7f57e83..ef377b51 100644 --- a/3B2/3b2_mmu.h +++ b/3B2/3b2_mmu.h @@ -322,6 +322,7 @@ extern DEVICE mmu_dev; t_stat mmu_init(DEVICE *dptr); uint32 mmu_read(uint32 pa, size_t size); void mmu_write(uint32 pa, uint32 val, size_t size); +CONST char *mmu_description(DEVICE *dptr); /* Physical memory read/write */ uint8 pread_b(uint32 pa); diff --git a/3B2/3b2_sys.c b/3B2/3b2_sys.c index 5ec44e0d..79eeb0dc 100644 --- a/3B2/3b2_sys.c +++ b/3B2/3b2_sys.c @@ -38,6 +38,7 @@ #include "3b2_ctc.h" #include "3b2_ports.h" #include "3b2_ni.h" +#include "3b2_mau.h" #include "3b2_sysdev.h" char sim_name[] = "AT&T 3B2 Model 400"; @@ -53,6 +54,7 @@ extern instr *cpu_instr; DEVICE *sim_devices[] = { &cpu_dev, &mmu_dev, + &mau_dev, &timer_dev, &tod_dev, &nvram_dev, @@ -86,6 +88,7 @@ const char *sim_stop_messages[] = { void full_reset() { cpu_reset(&cpu_dev); + mau_reset(&mau_dev); tti_reset(&tti_dev); contty_reset(&contty_dev); iu_timer_reset(&iu_timer_dev); diff --git a/Visual Studio Projects/3B2.vcproj b/Visual Studio Projects/3B2.vcproj index 689aa8ee..472742c6 100644 --- a/Visual Studio Projects/3B2.vcproj +++ b/Visual Studio Projects/3B2.vcproj @@ -220,6 +220,10 @@ RelativePath="..\3B2\3b2_iu.c" > + + @@ -508,6 +512,10 @@ RelativePath="..\3B2\3b2_iu.h" > + + diff --git a/descrip.mms b/descrip.mms index 2e5d9525..f901379f 100644 --- a/descrip.mms +++ b/descrip.mms @@ -328,8 +328,8 @@ ATT3B2_LIB = $(LIB_DIR)ATT3B2-$(ARCH).OLB ATT3B2_SOURCE = $(ATT3B2_DIR)3B2_CPU.C,$(ATT3B2_DIR)3B2_DMAC.C,\ $(ATT3B2_DIR)3B2_ID.C,$(ATT3B2_DIR)3B2_IF.C,\ $(ATT3B2_DIR)3B2_IO.C,$(ATT3B2_DIR)3B2_IU.C,\ - $(ATT3B2_DIR)3B2_MMU.C,$(ATT3B2_DIR)3B2_SYS.C,\ - $(ATT3B2_DIR)3B2_SYSDEV.C + $(ATT3B2_DIR)3B2_MAU.C,$(ATT3B2_DIR)3B2_MMU.C,\ + $(ATT3B2_DIR)3B2_SYS.C,$(ATT3B2_DIR)3B2_SYSDEV.C ATT3B2_OPTIONS = /INCL=($(SIMH_DIR),$(ATT3B2_DIR))/DEF=($(CC_DEFS)) # MITS Altair Simulator Definitions. diff --git a/makefile b/makefile index c96a5d73..91f46c2b 100644 --- a/makefile +++ b/makefile @@ -1920,7 +1920,8 @@ ATT3B2 = ${ATT3B2D}/3b2_cpu.c ${ATT3B2D}/3b2_mmu.c \ ${ATT3B2D}/3b2_id.c ${ATT3B2D}/3b2_dmac.c \ ${ATT3B2D}/3b2_sys.c ${ATT3B2D}/3b2_io.c \ ${ATT3B2D}/3b2_ports.c ${ATT3B2D}/3b2_ctc.c \ - ${ATT3B2D}/3b2_ni.c ${ATT3B2D}/3b2_sysdev.c + ${ATT3B2D}/3b2_ni.c ${ATT3B2D}/3b2_mau.c \ + ${ATT3B2D}/3b2_sysdev.c ATT3B2_OPT = -DUSE_INT64 -DUSE_ADDR64 -I ${ATT3B2D} ${NETWORK_OPT} # # Build everything (not the unsupported/incomplete or experimental simulators)