FPU update as latest MAME

2026-02-14 19:56:10 +00:00 · 2021-04-13 09:56:39 +02:00
parent 073c27df8e
commit 6c9276f882
9 changed files with 1968 additions and 233 deletions
--- a/2
+++ b/2
@@ -22,7 +22,7 @@ MAINFILES        = emulator.c \
 	platforms/amiga/net/pi-net.c \
 	platforms/shared/rtc.c

-MUSASHIFILES     = m68kcpu.c m68kdasm.c softfloat/softfloat.c
+MUSASHIFILES     = m68kcpu.c m68kdasm.c softfloat/softfloat.c softfloat/fsincos.c softfloat/fyl2x.c
 MUSASHIGENCFILES = m68kops.c
 MUSASHIGENHFILES = m68kops.h
 MUSASHIGENERATOR = m68kmake
--- a/m68k_in.c
+++ b/m68k_in.c
@@ -284,6 +284,7 @@ M68KMAKE_OPCODE_HANDLER_HEADER
 extern void m68040_fpu_op0(void);
 extern void m68040_fpu_op1(void);
 extern void m68881_mmu_ops();
+extern void m68881_ftrap();

 /* ======================================================================== */
 /* ========================= INSTRUCTION HANDLERS ========================= */
@@ -561,6 +562,7 @@ cpdbcc    32  .     .     1111...001001...  ..........  . . U U .   .   .   4
 cpgen     32  .     .     1111...000......  ..........  . . U U .   .   .   4   4   .  unemulated
 cpscc     32  .     .     1111...001......  ..........  . . U U .   .   .   4   4   .  unemulated
 cptrapcc  32  .     .     1111...001111...  ..........  . . U U .   .   .   4   4   .  unemulated
+ftrapcc   32  .     .     1111001001111...  ..........  . . U U .   .   .   4   4   .
 dbt       16  .     .     0101000011001...  ..........  U U U U U  12  12   6   6   6
 dbf       16  .     .     0101000111001...  ..........  U U U U U  12  12   6   6   6
 dbcc      16  .     .     0101....11001...  ..........  U U U U U  12  12   6   6   6
@@ -915,7 +917,8 @@ M68KMAKE_OP(1111, 0, ., .)

 M68KMAKE_OP(040fpu0, 32, ., .)
 {
-	if(CPU_TYPE_IS_030_PLUS(CPU_TYPE))
+//	printf("FPU 040fpu0 HAS_FPU=%d\n",!!HAS_FPU);
+	if(HAS_FPU)
 	{
 		m68040_fpu_op0();
 		return;
@@ -926,7 +929,8 @@ M68KMAKE_OP(040fpu0, 32, ., .)

 M68KMAKE_OP(040fpu1, 32, ., .)
 {
-	if(CPU_TYPE_IS_030_PLUS(CPU_TYPE))
+//	printf("FPU 040fpu1 HAS_FPU=%d\n",!!HAS_FPU);
+	if(HAS_FPU)
 	{
 		m68040_fpu_op1();
 		return;
@@ -4370,6 +4374,15 @@ M68KMAKE_OP(cptrapcc, 32, ., .)
 	m68ki_exception_1111();
 }

+M68KMAKE_OP(ftrapcc,32, ., .)
+{
+	if(HAS_FPU)
+	{
+		m68881_ftrap();
+	} else {
+		m68ki_exception_1111();
+	}
+}

 M68KMAKE_OP(dbt, 16, ., .)
 {
--- a/m68kcpu.c
+++ b/m68kcpu.c
@@ -794,7 +794,8 @@ void m68k_set_cpu_type(unsigned int cpu_type)
 			CYC_MOVEM_L      = 3;
 			CYC_SHIFT        = 1;
 			CYC_RESET        = 132;
-			HAS_PMMU	 = 0;
+			HAS_PMMU         = 0;
+			HAS_FPU          = 0;
 			return;
 		case M68K_CPU_TYPE_SCC68070:
 			m68k_set_cpu_type(M68K_CPU_TYPE_68010);
@@ -816,7 +817,8 @@ void m68k_set_cpu_type(unsigned int cpu_type)
 			CYC_MOVEM_L      = 3;
 			CYC_SHIFT        = 1;
 			CYC_RESET        = 130;
-			HAS_PMMU	 = 0;
+			HAS_PMMU         = 0;
+			HAS_FPU          = 0;
 			return;
 		case M68K_CPU_TYPE_68EC020:
 			CPU_TYPE         = CPU_TYPE_EC020;
@@ -833,7 +835,8 @@ void m68k_set_cpu_type(unsigned int cpu_type)
 			CYC_MOVEM_L      = 2;
 			CYC_SHIFT        = 0;
 			CYC_RESET        = 518;
-			HAS_PMMU	 = 0;
+			HAS_PMMU         = 0;
+			HAS_FPU          = 0;
 			return;
 		case M68K_CPU_TYPE_68020:
 			CPU_TYPE         = CPU_TYPE_020;
@@ -850,7 +853,8 @@ void m68k_set_cpu_type(unsigned int cpu_type)
 			CYC_MOVEM_L      = 2;
 			CYC_SHIFT        = 0;
 			CYC_RESET        = 518;
-			HAS_PMMU	 = 0;
+			HAS_PMMU         = 0;
+			HAS_FPU          = 0;
 			return;
 		case M68K_CPU_TYPE_68030:
 			CPU_TYPE         = CPU_TYPE_030;
@@ -867,7 +871,8 @@ void m68k_set_cpu_type(unsigned int cpu_type)
 			CYC_MOVEM_L      = 2;
 			CYC_SHIFT        = 0;
 			CYC_RESET        = 518;
-			HAS_PMMU	       = 1;
+			HAS_PMMU         = 1;
+			HAS_FPU          = 1;
 			return;
 		case M68K_CPU_TYPE_68EC030:
 			CPU_TYPE         = CPU_TYPE_EC030;
@@ -884,7 +889,8 @@ void m68k_set_cpu_type(unsigned int cpu_type)
 			CYC_MOVEM_L      = 2;
 			CYC_SHIFT        = 0;
 			CYC_RESET        = 518;
-			HAS_PMMU	       = 0;		/* EC030 lacks the PMMU and is effectively a die-shrink 68020 */
+			HAS_PMMU         = 0;		/* EC030 lacks the PMMU and is effectively a die-shrink 68020 */
+			HAS_FPU          = 1;
 			return;
 		case M68K_CPU_TYPE_68040:		// TODO: these values are not correct
 			CPU_TYPE         = CPU_TYPE_040;
@@ -901,7 +907,8 @@ void m68k_set_cpu_type(unsigned int cpu_type)
 			CYC_MOVEM_L      = 2;
 			CYC_SHIFT        = 0;
 			CYC_RESET        = 518;
-			HAS_PMMU	 = 1;
+			HAS_PMMU         = 1;
+			HAS_FPU          = 1;
 			return;
 		case M68K_CPU_TYPE_68EC040: // Just a 68040 without pmmu apparently...
 			CPU_TYPE         = CPU_TYPE_EC040;
@@ -918,10 +925,12 @@ void m68k_set_cpu_type(unsigned int cpu_type)
 			CYC_MOVEM_L      = 2;
 			CYC_SHIFT        = 0;
 			CYC_RESET        = 518;
-			HAS_PMMU	 = 0;
+			HAS_PMMU         = 0;
+			HAS_FPU          = 0;
 			return;
 		case M68K_CPU_TYPE_68LC040:
 			CPU_TYPE         = CPU_TYPE_LC040;
+			CPU_ADDRESS_MASK = 0xffffffff;
 			m68ki_cpu.sr_mask          = 0xf71f; /* T1 T0 S  M  -- I2 I1 I0 -- -- -- X  N  Z  V  C  */
 			m68ki_cpu.cyc_instruction  = m68ki_cycles[4];
 			m68ki_cpu.cyc_exception    = m68ki_exception_cycle_table[4];
@@ -934,7 +943,8 @@ void m68k_set_cpu_type(unsigned int cpu_type)
 			m68ki_cpu.cyc_movem_l      = 2;
 			m68ki_cpu.cyc_shift        = 0;
 			m68ki_cpu.cyc_reset        = 518;
-			HAS_PMMU	       = 1;
+			HAS_PMMU         = 1;
+			HAS_FPU          = 0;
 			return;
 	}
 }
--- a/m68kcpu.h
+++ b/m68kcpu.h
@@ -381,9 +381,10 @@ typedef uint32 uint64;
 #define CYC_MOVEM_L      m68ki_cpu.cyc_movem_l
 #define CYC_SHIFT        m68ki_cpu.cyc_shift
 #define CYC_RESET        m68ki_cpu.cyc_reset
-#define HAS_PMMU	 m68ki_cpu.has_pmmu
-#define PMMU_ENABLED	 m68ki_cpu.pmmu_enabled
-#define RESET_CYCLES	 m68ki_cpu.reset_cycles
+#define HAS_PMMU         m68ki_cpu.has_pmmu
+#define HAS_FPU          m68ki_cpu.has_fpu
+#define PMMU_ENABLED     m68ki_cpu.pmmu_enabled
+#define RESET_CYCLES     m68ki_cpu.reset_cycles


 #define CALLBACK_INT_ACK     m68ki_cpu.int_ack_callback
@@ -960,7 +961,8 @@ typedef struct
 	uint sr_mask;      /* Implemented status register bits */
 	uint instr_mode;   /* Stores whether we are in instruction mode or group 0/1 exception mode */
 	uint run_mode;     /* Stores whether we are processing a reset, bus error, address error, or something else */
-	int    has_pmmu;     /* Indicates if a PMMU available (yes on 030, 040, no on EC030) */
+	int    has_pmmu;   /* Indicates if a PMMU available (yes on 030, 040, no on EC030) */
+	int    has_fpu;    /* Indicates if a FPU available */
 	int    pmmu_enabled; /* Indicates if the PMMU is enabled */
 	int    fpu_just_reset; /* Indicates the FPU was just reset */
 	uint reset_cycles;
--- a/m68kfpu.c
+++ b/m68kfpu.c
--- a/softfloat/fpu_constant.h
+++ b/softfloat/fpu_constant.h
@@ -0,0 +1,80 @@
+/*============================================================================
+This source file is an extension to the SoftFloat IEC/IEEE Floating-point
+Arithmetic Package, Release 2b, written for Bochs (x86 achitecture simulator)
+floating point emulation.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
+been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
+RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
+AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
+COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
+EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
+INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
+OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) the source code for the derivative work includes prominent notice that
+the work is derivative, and (2) the source code includes prominent notice with
+these four paragraphs for those parts of this code that are retained.
+=============================================================================*/
+
+#ifndef _FPU_CONSTANTS_H_
+#define _FPU_CONSTANTS_H_
+
+// Pentium CPU uses only 68-bit precision M_PI approximation
+#define BETTER_THAN_PENTIUM
+
+/*============================================================================
+ * Written for Bochs (x86 achitecture simulator) by
+ *            Stanislav Shwartsman [sshwarts at sourceforge net]
+ * ==========================================================================*/
+
+//////////////////////////////
+// PI, PI/2, PI/4 constants
+//////////////////////////////
+
+#define FLOATX80_PI_EXP  (0x4000)
+
+// 128-bit PI fraction
+#ifdef BETTER_THAN_PENTIUM
+#define FLOAT_PI_HI (0xc90fdaa22168c234U)
+#define FLOAT_PI_LO (0xc4c6628b80dc1cd1U)
+#else
+#define FLOAT_PI_HI (0xc90fdaa22168c234U)
+#define FLOAT_PI_LO (0xC000000000000000U)
+#endif
+
+#define FLOATX80_PI2_EXP  (0x3FFF)
+#define FLOATX80_PI4_EXP  (0x3FFE)
+
+//////////////////////////////
+// 3PI/4 constant
+//////////////////////////////
+
+#define FLOATX80_3PI4_EXP (0x4000)
+
+// 128-bit 3PI/4 fraction
+#ifdef BETTER_THAN_PENTIUM
+#define FLOAT_3PI4_HI (0x96cbe3f9990e91a7U)
+#define FLOAT_3PI4_LO (0x9394c9e8a0a5159cU)
+#else
+#define FLOAT_3PI4_HI (0x96cbe3f9990e91a7U)
+#define FLOAT_3PI4_LO (0x9000000000000000U)
+#endif
+
+//////////////////////////////
+// 1/LN2 constant
+//////////////////////////////
+
+#define FLOAT_LN2INV_EXP  (0x3FFF)
+
+// 128-bit 1/LN2 fraction
+#ifdef BETTER_THAN_PENTIUM
+#define FLOAT_LN2INV_HI (0xb8aa3b295c17f0bbU)
+#define FLOAT_LN2INV_LO (0xbe87fed0691d3e89U)
+#else
+#define FLOAT_LN2INV_HI (0xb8aa3b295c17f0bbU)
+#define FLOAT_LN2INV_LO (0xC000000000000000U)
+#endif
+
+#endif
--- a/softfloat/fsincos.c
+++ b/softfloat/fsincos.c
@@ -0,0 +1,647 @@
+/*============================================================================
+This source file is an extension to the SoftFloat IEC/IEEE Floating-point
+Arithmetic Package, Release 2b, written for Bochs (x86 achitecture simulator)
+floating point emulation.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
+been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
+RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
+AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
+COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
+EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
+INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
+OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) the source code for the derivative work includes prominent notice that
+the work is derivative, and (2) the source code includes prominent notice with
+these four paragraphs for those parts of this code that are retained.
+=============================================================================*/
+
+/*============================================================================
+ * Written for Bochs (x86 achitecture simulator) by
+ *            Stanislav Shwartsman [sshwarts at sourceforge net]
+ * ==========================================================================*/
+
+#define FLOAT128
+
+#define USE_estimateDiv128To64
+
+#include "m68kcpu.h" // which includes softfloat.h after defining the basic types
+#include "assert.h"
+
+//#include "softfloat-specialize"
+#include "fpu_constant.h"
+
+#define floatx80_one packFloatx80(0, 0x3fff, 0x8000000000000000U)
+#define floatx80_default_nan packFloatx80(0, 0xffff, 0xffffffffffffffffU)
+
+#define packFloat2x128m(zHi, zLo) {(zHi), (zLo)}
+#define PACK_FLOAT_128(hi,lo) packFloat2x128m(LIT64(hi),LIT64(lo))
+
+#define EXP_BIAS 0x3FFF
+
+/*----------------------------------------------------------------------------
+| Returns the fraction bits of the extended double-precision floating-point
+| value `a'.
+*----------------------------------------------------------------------------*/
+
+static inline bits64 extractFloatx80Frac( floatx80 a )
+{
+	return a.low;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the exponent bits of the extended double-precision floating-point
+| value `a'.
+*----------------------------------------------------------------------------*/
+
+static inline int32 extractFloatx80Exp( floatx80 a )
+{
+	return a.high & 0x7FFF;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the sign bit of the extended double-precision floating-point value
+| `a'.
+*----------------------------------------------------------------------------*/
+
+static inline flag extractFloatx80Sign( floatx80 a )
+{
+	return a.high>>15;
+
+}
+
+/*----------------------------------------------------------------------------
+| Takes extended double-precision floating-point  NaN  `a' and returns the
+| appropriate NaN result. If `a' is a signaling NaN, the invalid exception
+| is raised.
+*----------------------------------------------------------------------------*/
+
+static inline floatx80 propagateFloatx80NaNOneArg(floatx80 a)
+{
+	if (floatx80_is_signaling_nan(a))
+		float_raise(float_flag_invalid);
+
+	a.low |= 0xC000000000000000U;
+
+	return a;
+}
+
+/*----------------------------------------------------------------------------
+| Normalizes the subnormal extended double-precision floating-point value
+| represented by the denormalized significand `aSig'.  The normalized exponent
+| and significand are stored at the locations pointed to by `zExpPtr' and
+| `zSigPtr', respectively.
+*----------------------------------------------------------------------------*/
+
+void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr, uint64_t *zSigPtr)
+{
+	int shiftCount = countLeadingZeros64(aSig);
+	*zSigPtr = aSig<<shiftCount;
+	*zExpPtr = 1 - shiftCount;
+}
+
+/* reduce trigonometric function argument using 128-bit precision
+   M_PI approximation */
+static uint64_t argument_reduction_kernel(uint64_t aSig0, int Exp, uint64_t *zSig0, uint64_t *zSig1)
+{
+	uint64_t term0, term1, term2;
+	uint64_t aSig1 = 0;
+
+	shortShift128Left(aSig1, aSig0, Exp, &aSig1, &aSig0);
+	uint64_t q = estimateDiv128To64(aSig1, aSig0, FLOAT_PI_HI);
+	mul128By64To192(FLOAT_PI_HI, FLOAT_PI_LO, q, &term0, &term1, &term2);
+	sub128(aSig1, aSig0, term0, term1, zSig1, zSig0);
+	while ((int64_t)(*zSig1) < 0) {
+		--q;
+		add192(*zSig1, *zSig0, term2, 0, FLOAT_PI_HI, FLOAT_PI_LO, zSig1, zSig0, &term2);
+	}
+	*zSig1 = term2;
+	return q;
+}
+
+static int reduce_trig_arg(int expDiff, int zSign, uint64_t aSig0, uint64_t aSig1)
+{
+	uint64_t term0, term1, q = 0;
+
+	if (expDiff < 0) {
+		shift128Right(aSig0, 0, 1, &aSig0, &aSig1);
+		expDiff = 0;
+	}
+	if (expDiff > 0) {
+		q = argument_reduction_kernel(aSig0, expDiff, &aSig0, &aSig1);
+	}
+	else {
+		if (FLOAT_PI_HI <= aSig0) {
+			aSig0 -= FLOAT_PI_HI;
+			q = 1;
+		}
+	}
+
+	shift128Right(FLOAT_PI_HI, FLOAT_PI_LO, 1, &term0, &term1);
+	if (! lt128(aSig0, aSig1, term0, term1))
+	{
+		int lt = lt128(term0, term1, aSig0, aSig1);
+		int eq = eq128(aSig0, aSig1, term0, term1);
+
+		if ((eq && (q & 1)) || lt) {
+			zSign = !zSign;
+			++q;
+		}
+		if (lt) sub128(FLOAT_PI_HI, FLOAT_PI_LO, aSig0, aSig1, &aSig0, &aSig1);
+	}
+
+	return (int)(q & 3);
+}
+
+#define SIN_ARR_SIZE 11
+#define COS_ARR_SIZE 11
+
+static float128 sin_arr[SIN_ARR_SIZE] =
+{
+	PACK_FLOAT_128(0x3fff000000000000, 0x0000000000000000), /*  1 */
+	PACK_FLOAT_128(0xbffc555555555555, 0x5555555555555555), /*  3 */
+	PACK_FLOAT_128(0x3ff8111111111111, 0x1111111111111111), /*  5 */
+	PACK_FLOAT_128(0xbff2a01a01a01a01, 0xa01a01a01a01a01a), /*  7 */
+	PACK_FLOAT_128(0x3fec71de3a556c73, 0x38faac1c88e50017), /*  9 */
+	PACK_FLOAT_128(0xbfe5ae64567f544e, 0x38fe747e4b837dc7), /* 11 */
+	PACK_FLOAT_128(0x3fde6124613a86d0, 0x97ca38331d23af68), /* 13 */
+	PACK_FLOAT_128(0xbfd6ae7f3e733b81, 0xf11d8656b0ee8cb0), /* 15 */
+	PACK_FLOAT_128(0x3fce952c77030ad4, 0xa6b2605197771b00), /* 17 */
+	PACK_FLOAT_128(0xbfc62f49b4681415, 0x724ca1ec3b7b9675), /* 19 */
+	PACK_FLOAT_128(0x3fbd71b8ef6dcf57, 0x18bef146fcee6e45)  /* 21 */
+};
+
+static float128 cos_arr[COS_ARR_SIZE] =
+{
+	PACK_FLOAT_128(0x3fff000000000000, 0x0000000000000000), /*  0 */
+	PACK_FLOAT_128(0xbffe000000000000, 0x0000000000000000), /*  2 */
+	PACK_FLOAT_128(0x3ffa555555555555, 0x5555555555555555), /*  4 */
+	PACK_FLOAT_128(0xbff56c16c16c16c1, 0x6c16c16c16c16c17), /*  6 */
+	PACK_FLOAT_128(0x3fefa01a01a01a01, 0xa01a01a01a01a01a), /*  8 */
+	PACK_FLOAT_128(0xbfe927e4fb7789f5, 0xc72ef016d3ea6679), /* 10 */
+	PACK_FLOAT_128(0x3fe21eed8eff8d89, 0x7b544da987acfe85), /* 12 */
+	PACK_FLOAT_128(0xbfda93974a8c07c9, 0xd20badf145dfa3e5), /* 14 */
+	PACK_FLOAT_128(0x3fd2ae7f3e733b81, 0xf11d8656b0ee8cb0), /* 16 */
+	PACK_FLOAT_128(0xbfca6827863b97d9, 0x77bb004886a2c2ab), /* 18 */
+	PACK_FLOAT_128(0x3fc1e542ba402022, 0x507a9cad2bf8f0bb)  /* 20 */
+};
+
+extern float128 OddPoly (float128 x, float128 *arr, unsigned n);
+
+/* 0 <= x <= pi/4 */
+static inline float128 poly_sin(float128 x)
+{
+	//                 3     5     7     9     11     13     15
+	//                x     x     x     x     x      x      x
+	// sin (x) ~ x - --- + --- - --- + --- - ---- + ---- - ---- =
+	//                3!    5!    7!    9!    11!    13!    15!
+	//
+	//                 2     4     6     8     10     12     14
+	//                x     x     x     x     x      x      x
+	//   = x * [ 1 - --- + --- - --- + --- - ---- + ---- - ---- ] =
+	//                3!    5!    7!    9!    11!    13!    15!
+	//
+	//           3                          3
+	//          --       4k                --        4k+2
+	//   p(x) = >  C  * x   > 0     q(x) = >  C   * x     < 0
+	//          --  2k                     --  2k+1
+	//          k=0                        k=0
+	//
+	//                          2
+	//   sin(x) ~ x * [ p(x) + x * q(x) ]
+	//
+
+	return OddPoly(x, sin_arr, SIN_ARR_SIZE);
+}
+
+extern float128 EvenPoly(float128 x, float128 *arr, unsigned n);
+
+/* 0 <= x <= pi/4 */
+static inline float128 poly_cos(float128 x)
+{
+	//                 2     4     6     8     10     12     14
+	//                x     x     x     x     x      x      x
+	// cos (x) ~ 1 - --- + --- - --- + --- - ---- + ---- - ----
+	//                2!    4!    6!    8!    10!    12!    14!
+	//
+	//           3                          3
+	//          --       4k                --        4k+2
+	//   p(x) = >  C  * x   > 0     q(x) = >  C   * x     < 0
+	//          --  2k                     --  2k+1
+	//          k=0                        k=0
+	//
+	//                      2
+	//   cos(x) ~ [ p(x) + x * q(x) ]
+	//
+
+	return EvenPoly(x, cos_arr, COS_ARR_SIZE);
+}
+
+static inline void sincos_invalid(floatx80 *sin_a, floatx80 *cos_a, floatx80 a)
+{
+	if (sin_a) *sin_a = a;
+	if (cos_a) *cos_a = a;
+}
+
+static inline void sincos_tiny_argument(floatx80 *sin_a, floatx80 *cos_a, floatx80 a)
+{
+	if (sin_a) *sin_a = a;
+	if (cos_a) *cos_a = floatx80_one;
+}
+
+static floatx80 sincos_approximation(int neg, float128 r, uint64_t quotient)
+{
+	if (quotient & 0x1) {
+		r = poly_cos(r);
+		neg = 0;
+	} else  {
+		r = poly_sin(r);
+	}
+
+	floatx80 result = float128_to_floatx80(r);
+	if (quotient & 0x2)
+		neg = ! neg;
+
+	if (neg)
+		result = floatx80_chs(result);
+
+	return result;
+}
+
+// =================================================
+// SFFSINCOS               Compute sin(x) and cos(x)
+// =================================================
+
+//
+// Uses the following identities:
+// ----------------------------------------------------------
+//
+//  sin(-x) = -sin(x)
+//  cos(-x) =  cos(x)
+//
+//  sin(x+y) = sin(x)*cos(y)+cos(x)*sin(y)
+//  cos(x+y) = sin(x)*sin(y)+cos(x)*cos(y)
+//
+//  sin(x+ pi/2)  =  cos(x)
+//  sin(x+ pi)    = -sin(x)
+//  sin(x+3pi/2)  = -cos(x)
+//  sin(x+2pi)    =  sin(x)
+//
+
+int sf_fsincos(floatx80 a, floatx80 *sin_a, floatx80 *cos_a)
+{
+	uint64_t aSig0, aSig1 = 0;
+	int32_t aExp, zExp, expDiff;
+	int aSign, zSign;
+	int q = 0;
+
+	aSig0 = extractFloatx80Frac(a);
+	aExp = extractFloatx80Exp(a);
+	aSign = extractFloatx80Sign(a);
+
+	/* invalid argument */
+	if (aExp == 0x7FFF) {
+		if ((uint64_t) (aSig0<<1)) {
+			sincos_invalid(sin_a, cos_a, propagateFloatx80NaNOneArg(a));
+			return 0;
+		}
+
+		float_raise(float_flag_invalid);
+		sincos_invalid(sin_a, cos_a, floatx80_default_nan);
+		return 0;
+	}
+
+	if (aExp == 0) {
+		if (aSig0 == 0) {
+			sincos_tiny_argument(sin_a, cos_a, a);
+			return 0;
+		}
+
+//        float_raise(float_flag_denormal);
+
+		/* handle pseudo denormals */
+		if (! (aSig0 & 0x8000000000000000U))
+		{
+			float_raise(float_flag_inexact);
+			if (sin_a)
+				float_raise(float_flag_underflow);
+			sincos_tiny_argument(sin_a, cos_a, a);
+			return 0;
+		}
+
+		normalizeFloatx80Subnormal(aSig0, &aExp, &aSig0);
+	}
+
+	zSign = aSign;
+	zExp = EXP_BIAS;
+	expDiff = aExp - zExp;
+
+	/* argument is out-of-range */
+	if (expDiff >= 63)
+		return -1;
+
+	float_raise(float_flag_inexact);
+
+	if (expDiff < -1) {    // doesn't require reduction
+		if (expDiff <= -68) {
+			a = packFloatx80(aSign, aExp, aSig0);
+			sincos_tiny_argument(sin_a, cos_a, a);
+			return 0;
+		}
+		zExp = aExp;
+	}
+	else {
+		q = reduce_trig_arg(expDiff, zSign, aSig0, aSig1);
+	}
+
+	/* **************************** */
+	/* argument reduction completed */
+	/* **************************** */
+
+	/* using float128 for approximation */
+	float128 r = normalizeRoundAndPackFloat128(0, zExp-0x10, aSig0, aSig1);
+
+	if (aSign) q = -q;
+	if (sin_a) *sin_a = sincos_approximation(zSign, r,   q);
+	if (cos_a) *cos_a = sincos_approximation(zSign, r, q+1);
+
+	return 0;
+}
+
+int floatx80_fsin(floatx80 *a)
+{
+	return sf_fsincos(*a, a, 0);
+}
+
+int floatx80_fcos(floatx80 *a)
+{
+	return sf_fsincos(*a, 0, a);
+}
+
+// =================================================
+// FPTAN                 Compute tan(x)
+// =================================================
+
+//
+// Uses the following identities:
+//
+// 1. ----------------------------------------------------------
+//
+//  sin(-x) = -sin(x)
+//  cos(-x) =  cos(x)
+//
+//  sin(x+y) = sin(x)*cos(y)+cos(x)*sin(y)
+//  cos(x+y) = sin(x)*sin(y)+cos(x)*cos(y)
+//
+//  sin(x+ pi/2)  =  cos(x)
+//  sin(x+ pi)    = -sin(x)
+//  sin(x+3pi/2)  = -cos(x)
+//  sin(x+2pi)    =  sin(x)
+//
+// 2. ----------------------------------------------------------
+//
+//           sin(x)
+//  tan(x) = ------
+//           cos(x)
+//
+
+int floatx80_ftan(floatx80 *a)
+{
+	uint64_t aSig0, aSig1 = 0;
+	int32_t aExp, zExp, expDiff;
+	int aSign, zSign;
+	int q = 0;
+
+	aSig0 = extractFloatx80Frac(*a);
+	aExp = extractFloatx80Exp(*a);
+	aSign = extractFloatx80Sign(*a);
+
+	/* invalid argument */
+	if (aExp == 0x7FFF) {
+		if ((uint64_t) (aSig0<<1))
+		{
+			*a = propagateFloatx80NaNOneArg(*a);
+			return 0;
+		}
+
+		float_raise(float_flag_invalid);
+		*a = floatx80_default_nan;
+		return 0;
+	}
+
+	if (aExp == 0) {
+		if (aSig0 == 0) return 0;
+//        float_raise(float_flag_denormal);
+		/* handle pseudo denormals */
+		if (! (aSig0 & 0x8000000000000000U))
+		{
+			float_raise(float_flag_inexact | float_flag_underflow);
+			return 0;
+		}
+		normalizeFloatx80Subnormal(aSig0, &aExp, &aSig0);
+	}
+
+	zSign = aSign;
+	zExp = EXP_BIAS;
+	expDiff = aExp - zExp;
+
+	/* argument is out-of-range */
+	if (expDiff >= 63)
+		return -1;
+
+	float_raise(float_flag_inexact);
+
+	if (expDiff < -1) {    // doesn't require reduction
+		if (expDiff <= -68) {
+			*a = packFloatx80(aSign, aExp, aSig0);
+			return 0;
+		}
+		zExp = aExp;
+	}
+	else {
+		q = reduce_trig_arg(expDiff, zSign, aSig0, aSig1);
+	}
+
+	/* **************************** */
+	/* argument reduction completed */
+	/* **************************** */
+
+	/* using float128 for approximation */
+	float128 r = normalizeRoundAndPackFloat128(0, zExp-0x10, aSig0, aSig1);
+
+	float128 sin_r = poly_sin(r);
+	float128 cos_r = poly_cos(r);
+
+	if (q & 0x1) {
+		r = float128_div(cos_r, sin_r);
+		zSign = ! zSign;
+	} else {
+		r = float128_div(sin_r, cos_r);
+	}
+
+	*a = float128_to_floatx80(r);
+	if (zSign)
+		*a = floatx80_chs(*a);
+
+	return 0;
+}
+
+//                            2         3         4               n
+// f(x) ~ C + (C * x) + (C * x) + (C * x) + (C * x) + ... + (C * x)
+//         0    1         2         3         4               n
+//
+//          --       2k                --        2k+1
+//   p(x) = >  C  * x           q(x) = >  C   * x
+//          --  2k                     --  2k+1
+//
+//   f(x) ~ [ p(x) + x * q(x) ]
+//
+
+float128 EvalPoly(float128 x, float128 *arr, unsigned n)
+{
+	float128 x2 = float128_mul(x, x);
+	unsigned i;
+
+	assert(n > 1);
+
+	float128 r1 = arr[--n];
+	i = n;
+	while(i >= 2) {
+		r1 = float128_mul(r1, x2);
+		i -= 2;
+		r1 = float128_add(r1, arr[i]);
+	}
+	if (i) r1 = float128_mul(r1, x);
+
+	float128 r2 = arr[--n];
+	i = n;
+	while(i >= 2) {
+		r2 = float128_mul(r2, x2);
+		i -= 2;
+		r2 = float128_add(r2, arr[i]);
+	}
+	if (i) r2 = float128_mul(r2, x);
+
+	return float128_add(r1, r2);
+}
+
+//                  2         4         6         8               2n
+// f(x) ~ C + (C * x) + (C * x) + (C * x) + (C * x) + ... + (C * x)
+//         0    1         2         3         4               n
+//
+//          --       4k                --        4k+2
+//   p(x) = >  C  * x           q(x) = >  C   * x
+//          --  2k                     --  2k+1
+//
+//                    2
+//   f(x) ~ [ p(x) + x * q(x) ]
+//
+
+float128 EvenPoly(float128 x, float128 *arr, unsigned n)
+{
+		return EvalPoly(float128_mul(x, x), arr, n);
+}
+
+//                        3         5         7         9               2n+1
+// f(x) ~ (C * x) + (C * x) + (C * x) + (C * x) + (C * x) + ... + (C * x)
+//          0         1         2         3         4               n
+//                        2         4         6         8               2n
+//      = x * [ C + (C * x) + (C * x) + (C * x) + (C * x) + ... + (C * x)
+//               0    1         2         3         4               n
+//
+//          --       4k                --        4k+2
+//   p(x) = >  C  * x           q(x) = >  C   * x
+//          --  2k                     --  2k+1
+//
+//                        2
+//   f(x) ~ x * [ p(x) + x * q(x) ]
+//
+
+float128 OddPoly(float128 x, float128 *arr, unsigned n)
+{
+		return float128_mul(x, EvenPoly(x, arr, n));
+}
+
+/*----------------------------------------------------------------------------
+| Scales extended double-precision floating-point value in operand `a' by
+| value `b'. The function truncates the value in the second operand 'b' to
+| an integral value and adds that value to the exponent of the operand 'a'.
+| The operation performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+extern floatx80 propagateFloatx80NaN( floatx80 a, floatx80 b );
+
+floatx80 floatx80_scale(floatx80 a, floatx80 b)
+{
+	sbits32 aExp, bExp;
+	bits64 aSig, bSig;
+
+	// handle unsupported extended double-precision floating encodings
+/*    if (floatx80_is_unsupported(a) || floatx80_is_unsupported(b))
+    {
+        float_raise(float_flag_invalid);
+        return floatx80_default_nan;
+    }*/
+
+	aSig = extractFloatx80Frac(a);
+	aExp = extractFloatx80Exp(a);
+	int aSign = extractFloatx80Sign(a);
+	bSig = extractFloatx80Frac(b);
+	bExp = extractFloatx80Exp(b);
+	int bSign = extractFloatx80Sign(b);
+
+	if (aExp == 0x7FFF) {
+		if ((bits64) (aSig<<1) || ((bExp == 0x7FFF) && (bits64) (bSig<<1)))
+		{
+			return propagateFloatx80NaN(a, b);
+		}
+		if ((bExp == 0x7FFF) && bSign) {
+			float_raise(float_flag_invalid);
+			return floatx80_default_nan;
+		}
+		if (bSig && (bExp == 0)) float_raise(float_flag_denormal);
+		return a;
+	}
+	if (bExp == 0x7FFF) {
+		if ((bits64) (bSig<<1)) return propagateFloatx80NaN(a, b);
+		if ((aExp | aSig) == 0) {
+			if (! bSign) {
+				float_raise(float_flag_invalid);
+				return floatx80_default_nan;
+			}
+			return a;
+		}
+		if (aSig && (aExp == 0)) float_raise(float_flag_denormal);
+		if (bSign) return packFloatx80(aSign, 0, 0);
+		return packFloatx80(aSign, 0x7FFF, 0x8000000000000000U);
+	}
+	if (aExp == 0) {
+		if (aSig == 0) return a;
+		float_raise(float_flag_denormal);
+		normalizeFloatx80Subnormal(aSig, &aExp, &aSig);
+	}
+	if (bExp == 0) {
+		if (bSig == 0) return a;
+		float_raise(float_flag_denormal);
+		normalizeFloatx80Subnormal(bSig, &bExp, &bSig);
+	}
+
+	if (bExp > 0x400E) {
+		/* generate appropriate overflow/underflow */
+		return roundAndPackFloatx80(80, aSign,
+							bSign ? -0x3FFF : 0x7FFF, aSig, 0);
+	}
+	if (bExp < 0x3FFF) return a;
+
+	int shiftCount = 0x403E - bExp;
+	bSig >>= shiftCount;
+	sbits32 scale = bSig;
+	if (bSign) scale = -scale; /* -32768..32767 */
+	return
+		roundAndPackFloatx80(80, aSign, aExp+scale, aSig, 0);
+}
--- a/softfloat/fyl2x.c
+++ b/softfloat/fyl2x.c
@@ -0,0 +1,487 @@
+/*============================================================================
+This source file is an extension to the SoftFloat IEC/IEEE Floating-point
+Arithmetic Package, Release 2b, written for Bochs (x86 achitecture simulator)
+floating point emulation.
+float_raise(float_flag_invalid)
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
+been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
+RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
+AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
+COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
+EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
+INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
+OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) the source code for the derivative work includes prominent notice that
+the work is derivative, and (2) the source code includes prominent notice with
+these four paragraphs for those parts of this code that are retained.
+=============================================================================*/
+
+/*============================================================================
+ * Written for Bochs (x86 achitecture simulator) by
+ *            Stanislav Shwartsman [sshwarts at sourceforge net]
+ * Adapted for lib/softfloat in MESS by Hans Ostermeyer (03/2012)
+ * ==========================================================================*/
+
+#define FLOAT128
+
+#define USE_estimateDiv128To64
+
+#include "m68kcpu.h" // which includes softfloat.h after defining the basic types
+
+//#include "softfloat-specialize"
+#include "fpu_constant.h"
+
+#define floatx80_log10_2 packFloatx80(0, 0x3ffd, 0x9a209a84fbcff798U)
+#define floatx80_ln_2 packFloatx80(0, 0x3ffe, 0xb17217f7d1cf79acU)
+#define floatx80_one packFloatx80(0, 0x3fff, 0x8000000000000000U)
+#define floatx80_default_nan packFloatx80(0, 0xffff, 0xffffffffffffffffU)
+
+#define packFloat_128(zHi, zLo) {(zHi), (zLo)}
+#define PACK_FLOAT_128(hi,lo) packFloat_128(LIT64(hi),LIT64(lo))
+
+#define EXP_BIAS 0x3FFF
+
+/*----------------------------------------------------------------------------
+| Returns the fraction bits of the extended double-precision floating-point
+| value `a'.
+*----------------------------------------------------------------------------*/
+
+static inline bits64 extractFloatx80Frac( floatx80 a )
+{
+	return a.low;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the exponent bits of the extended double-precision floating-point
+| value `a'.
+*----------------------------------------------------------------------------*/
+
+static inline int32 extractFloatx80Exp( floatx80 a )
+{
+	return a.high & 0x7FFF;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the sign bit of the extended double-precision floating-point value
+| `a'.
+*----------------------------------------------------------------------------*/
+
+static inline flag extractFloatx80Sign( floatx80 a )
+{
+	return a.high>>15;
+
+}
+
+#if 0
+/*----------------------------------------------------------------------------
+| Takes extended double-precision floating-point  NaN  `a' and returns the
+| appropriate NaN result. If `a' is a signaling NaN, the invalid exception
+| is raised.
+*----------------------------------------------------------------------------*/
+
+static inline floatx80 propagateFloatx80NaNOneArg(floatx80 a)
+{
+	if (floatx80_is_signaling_nan(a))
+		float_raise(float_flag_invalid);
+
+	a.low |= 0xC000000000000000U;
+
+	return a;
+}
+#endif
+
+/*----------------------------------------------------------------------------
+| Normalizes the subnormal extended double-precision floating-point value
+| represented by the denormalized significand `aSig'.  The normalized exponent
+| and significand are stored at the locations pointed to by `zExpPtr' and
+| `zSigPtr', respectively.
+*----------------------------------------------------------------------------*/
+
+static inline void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr, uint64_t *zSigPtr)
+{
+	int shiftCount = countLeadingZeros64(aSig);
+	*zSigPtr = aSig<<shiftCount;
+	*zExpPtr = 1 - shiftCount;
+}
+
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the extended double-precision floating-point value `a' is a
+| NaN; otherwise returns 0.
+*----------------------------------------------------------------------------*/
+
+static inline int floatx80_is_nan(floatx80 a)
+{
+	return ((a.high & 0x7FFF) == 0x7FFF) && (int64_t) (a.low<<1);
+}
+
+/*----------------------------------------------------------------------------
+| Takes two extended double-precision floating-point values `a' and `b', one
+| of which is a NaN, and returns the appropriate NaN result.  If either `a' or
+| `b' is a signaling NaN, the invalid exception is raised.
+*----------------------------------------------------------------------------*/
+
+static floatx80 propagateFloatx80NaN(floatx80 a, floatx80 b)
+{
+	int aIsNaN = floatx80_is_nan(a);
+	int aIsSignalingNaN = floatx80_is_signaling_nan(a);
+	int bIsNaN = floatx80_is_nan(b);
+	int bIsSignalingNaN = floatx80_is_signaling_nan(b);
+	a.low |= 0xC000000000000000U;
+	b.low |= 0xC000000000000000U;
+	if (aIsSignalingNaN | bIsSignalingNaN) float_raise(float_flag_invalid);
+	if (aIsSignalingNaN) {
+		if (bIsSignalingNaN) goto returnLargerSignificand;
+		return bIsNaN ? b : a;
+	}
+	else if (aIsNaN) {
+		if (bIsSignalingNaN | ! bIsNaN) return a;
+	returnLargerSignificand:
+		if (a.low < b.low) return b;
+		if (b.low < a.low) return a;
+		return (a.high < b.high) ? a : b;
+	}
+	else {
+		return b;
+	}
+}
+
+static const float128 float128_one =
+	packFloat_128(0x3fff000000000000U, 0x0000000000000000U);
+static const float128 float128_two =
+	packFloat_128(0x4000000000000000U, 0x0000000000000000U);
+
+static const float128 float128_ln2inv2 =
+	packFloat_128(0x400071547652b82fU, 0xe1777d0ffda0d23aU);
+
+#define SQRT2_HALF_SIG  0xb504f333f9de6484U
+
+extern float128 OddPoly(float128 x, float128 *arr, unsigned n);
+
+#define L2_ARR_SIZE 9
+
+static float128 ln_arr[L2_ARR_SIZE] =
+{
+	PACK_FLOAT_128(0x3fff000000000000, 0x0000000000000000), /*  1 */
+	PACK_FLOAT_128(0x3ffd555555555555, 0x5555555555555555), /*  3 */
+	PACK_FLOAT_128(0x3ffc999999999999, 0x999999999999999a), /*  5 */
+	PACK_FLOAT_128(0x3ffc249249249249, 0x2492492492492492), /*  7 */
+	PACK_FLOAT_128(0x3ffbc71c71c71c71, 0xc71c71c71c71c71c), /*  9 */
+	PACK_FLOAT_128(0x3ffb745d1745d174, 0x5d1745d1745d1746), /* 11 */
+	PACK_FLOAT_128(0x3ffb3b13b13b13b1, 0x3b13b13b13b13b14), /* 13 */
+	PACK_FLOAT_128(0x3ffb111111111111, 0x1111111111111111), /* 15 */
+	PACK_FLOAT_128(0x3ffae1e1e1e1e1e1, 0xe1e1e1e1e1e1e1e2)  /* 17 */
+};
+
+static float128 poly_ln(float128 x1)
+{
+/*
+    //
+    //                     3     5     7     9     11     13     15
+    //        1+u         u     u     u     u     u      u      u
+    // 1/2 ln ---  ~ u + --- + --- + --- + --- + ---- + ---- + ---- =
+    //        1-u         3     5     7     9     11     13     15
+    //
+    //                     2     4     6     8     10     12     14
+    //                    u     u     u     u     u      u      u
+    //       = u * [ 1 + --- + --- + --- + --- + ---- + ---- + ---- ] =
+    //                    3     5     7     9     11     13     15
+    //
+    //           3                          3
+    //          --       4k                --        4k+2
+    //   p(u) = >  C  * u           q(u) = >  C   * u
+    //          --  2k                     --  2k+1
+    //          k=0                        k=0
+    //
+    //          1+u                 2
+    //   1/2 ln --- ~ u * [ p(u) + u * q(u) ]
+    //          1-u
+    //
+*/
+	return OddPoly(x1, ln_arr, L2_ARR_SIZE);
+}
+
+/* required sqrt(2)/2 < x < sqrt(2) */
+static float128 poly_l2(float128 x)
+{
+	/* using float128 for approximation */
+	float128 x_p1 = float128_add(x, float128_one);
+	float128 x_m1 = float128_sub(x, float128_one);
+	x = float128_div(x_m1, x_p1);
+	x = poly_ln(x);
+	x = float128_mul(x, float128_ln2inv2);
+	return x;
+}
+
+static float128 poly_l2p1(float128 x)
+{
+	/* using float128 for approximation */
+	float128 x_p2 = float128_add(x, float128_two);
+	x = float128_div(x, x_p2);
+	x = poly_ln(x);
+	x = float128_mul(x, float128_ln2inv2);
+	return x;
+}
+
+// =================================================
+// FYL2X                   Compute y * log (x)
+//                                        2
+// =================================================
+
+//
+// Uses the following identities:
+//
+// 1. ----------------------------------------------------------
+//              ln(x)
+//   log (x) = -------,  ln (x*y) = ln(x) + ln(y)
+//      2       ln(2)
+//
+// 2. ----------------------------------------------------------
+//                1+u             x-1
+//   ln (x) = ln -----, when u = -----
+//                1-u             x+1
+//
+// 3. ----------------------------------------------------------
+//                        3     5     7           2n+1
+//       1+u             u     u     u           u
+//   ln ----- = 2 [ u + --- + --- + --- + ... + ------ + ... ]
+//       1-u             3     5     7           2n+1
+//
+
+static floatx80 fyl2x(floatx80 a, floatx80 b)
+{
+	uint64_t aSig = extractFloatx80Frac(a);
+	int32_t aExp = extractFloatx80Exp(a);
+	int aSign = extractFloatx80Sign(a);
+	uint64_t bSig = extractFloatx80Frac(b);
+	int32_t bExp = extractFloatx80Exp(b);
+	int bSign = extractFloatx80Sign(b);
+
+	int zSign = bSign ^ 1;
+
+	if (aExp == 0x7FFF) {
+		if ((uint64_t) (aSig<<1)
+				|| ((bExp == 0x7FFF) && (uint64_t) (bSig<<1)))
+		{
+			return propagateFloatx80NaN(a, b);
+		}
+		if (aSign)
+		{
+invalid:
+			float_raise(float_flag_invalid);
+			return floatx80_default_nan;
+		}
+		else {
+			if (bExp == 0) {
+				if (bSig == 0) goto invalid;
+				float_raise(float_flag_denormal);
+			}
+			return packFloatx80(bSign, 0x7FFF, 0x8000000000000000U);
+		}
+	}
+	if (bExp == 0x7FFF)
+	{
+		if ((uint64_t) (bSig<<1)) return propagateFloatx80NaN(a, b);
+		if (aSign && (uint64_t)(aExp | aSig)) goto invalid;
+		if (aSig && (aExp == 0))
+			float_raise(float_flag_denormal);
+		if (aExp < 0x3FFF) {
+			return packFloatx80(zSign, 0x7FFF, 0x8000000000000000U);
+		}
+		if (aExp == 0x3FFF && ((uint64_t) (aSig<<1) == 0)) goto invalid;
+		return packFloatx80(bSign, 0x7FFF, 0x8000000000000000U);
+	}
+	if (aExp == 0) {
+		if (aSig == 0) {
+			if ((bExp | bSig) == 0) goto invalid;
+			float_raise(float_flag_divbyzero);
+			return packFloatx80(zSign, 0x7FFF, 0x8000000000000000U);
+		}
+		if (aSign) goto invalid;
+		float_raise(float_flag_denormal);
+		normalizeFloatx80Subnormal(aSig, &aExp, &aSig);
+	}
+	if (aSign) goto invalid;
+	if (bExp == 0) {
+		if (bSig == 0) {
+			if (aExp < 0x3FFF) return packFloatx80(zSign, 0, 0);
+			return packFloatx80(bSign, 0, 0);
+		}
+		float_raise(float_flag_denormal);
+		normalizeFloatx80Subnormal(bSig, &bExp, &bSig);
+	}
+	if (aExp == 0x3FFF && ((uint64_t) (aSig<<1) == 0))
+		return packFloatx80(bSign, 0, 0);
+
+	float_raise(float_flag_inexact);
+
+	int ExpDiff = aExp - 0x3FFF;
+	aExp = 0;
+	if (aSig >= SQRT2_HALF_SIG) {
+		ExpDiff++;
+		aExp--;
+	}
+
+	/* ******************************** */
+	/* using float128 for approximation */
+	/* ******************************** */
+
+	uint64_t zSig0, zSig1;
+	shift128Right(aSig<<1, 0, 16, &zSig0, &zSig1);
+	float128 x = packFloat128(0, aExp+0x3FFF, zSig0, zSig1);
+	x = poly_l2(x);
+	x = float128_add(x, int64_to_float128((int64_t) ExpDiff));
+	return floatx80_mul(b, float128_to_floatx80(x));
+}
+
+// =================================================
+// FYL2XP1                 Compute y * log (x + 1)
+//                                        2
+// =================================================
+
+//
+// Uses the following identities:
+//
+// 1. ----------------------------------------------------------
+//              ln(x)
+//   log (x) = -------
+//      2       ln(2)
+//
+// 2. ----------------------------------------------------------
+//                  1+u              x
+//   ln (x+1) = ln -----, when u = -----
+//                  1-u             x+2
+//
+// 3. ----------------------------------------------------------
+//                        3     5     7           2n+1
+//       1+u             u     u     u           u
+//   ln ----- = 2 [ u + --- + --- + --- + ... + ------ + ... ]
+//       1-u             3     5     7           2n+1
+//
+
+floatx80 fyl2xp1(floatx80 a, floatx80 b)
+{
+	int32_t aExp, bExp;
+	uint64_t aSig, bSig, zSig0, zSig1, zSig2;
+	int aSign, bSign;
+
+	aSig = extractFloatx80Frac(a);
+	aExp = extractFloatx80Exp(a);
+	aSign = extractFloatx80Sign(a);
+	bSig = extractFloatx80Frac(b);
+	bExp = extractFloatx80Exp(b);
+	bSign = extractFloatx80Sign(b);
+	int zSign = aSign ^ bSign;
+
+	if (aExp == 0x7FFF) {
+		if ((uint64_t) (aSig<<1)
+				|| ((bExp == 0x7FFF) && (uint64_t) (bSig<<1)))
+		{
+			return propagateFloatx80NaN(a, b);
+		}
+		if (aSign)
+		{
+invalid:
+			float_raise(float_flag_invalid);
+			return floatx80_default_nan;
+		}
+			else {
+			if (bExp == 0) {
+				if (bSig == 0) goto invalid;
+				float_raise(float_flag_denormal);
+			}
+			return packFloatx80(bSign, 0x7FFF, 0x8000000000000000U);
+		}
+	}
+	if (bExp == 0x7FFF)
+	{
+		if ((uint64_t) (bSig<<1))
+			return propagateFloatx80NaN(a, b);
+
+		if (aExp == 0) {
+			if (aSig == 0) goto invalid;
+			float_raise(float_flag_denormal);
+		}
+
+		return packFloatx80(zSign, 0x7FFF, 0x8000000000000000U);
+	}
+	if (aExp == 0) {
+		if (aSig == 0) {
+			if (bSig && (bExp == 0)) float_raise(float_flag_denormal);
+			return packFloatx80(zSign, 0, 0);
+		}
+		float_raise(float_flag_denormal);
+		normalizeFloatx80Subnormal(aSig, &aExp, &aSig);
+	}
+	if (bExp == 0) {
+		if (bSig == 0) return packFloatx80(zSign, 0, 0);
+		float_raise(float_flag_denormal);
+		normalizeFloatx80Subnormal(bSig, &bExp, &bSig);
+	}
+
+	float_raise(float_flag_inexact);
+
+	if (aSign && aExp >= 0x3FFF)
+		return a;
+
+	if (aExp >= 0x3FFC) // big argument
+	{
+		return fyl2x(floatx80_add(a, floatx80_one), b);
+	}
+
+	// handle tiny argument
+	if (aExp < EXP_BIAS-70)
+	{
+		// first order approximation, return (a*b)/ln(2)
+		int32_t zExp = aExp + FLOAT_LN2INV_EXP - 0x3FFE;
+
+	mul128By64To192(FLOAT_LN2INV_HI, FLOAT_LN2INV_LO, aSig, &zSig0, &zSig1, &zSig2);
+		if (0 < (int64_t) zSig0) {
+			shortShift128Left(zSig0, zSig1, 1, &zSig0, &zSig1);
+			--zExp;
+		}
+
+		zExp = zExp + bExp - 0x3FFE;
+	mul128By64To192(zSig0, zSig1, bSig, &zSig0, &zSig1, &zSig2);
+		if (0 < (int64_t) zSig0) {
+			shortShift128Left(zSig0, zSig1, 1, &zSig0, &zSig1);
+			--zExp;
+		}
+
+		return
+			roundAndPackFloatx80(80, aSign ^ bSign, zExp, zSig0, zSig1);
+	}
+
+	/* ******************************** */
+	/* using float128 for approximation */
+	/* ******************************** */
+
+	shift128Right(aSig<<1, 0, 16, &zSig0, &zSig1);
+	float128 x = packFloat128(aSign, aExp, zSig0, zSig1);
+	x = poly_l2p1(x);
+	return floatx80_mul(b, float128_to_floatx80(x));
+}
+
+floatx80 floatx80_flognp1(floatx80 a)
+{
+	return fyl2xp1(a, floatx80_ln_2);
+}
+
+floatx80 floatx80_flogn(floatx80 a)
+{
+	return fyl2x(a, floatx80_ln_2);
+}
+
+floatx80 floatx80_flog2(floatx80 a)
+{
+	return fyl2x(a, floatx80_one);
+}
+
+floatx80 floatx80_flog10(floatx80 a)
+{
+	return fyl2x(a, floatx80_log10_2);
+}
--- a/softfloat/softfloat.h
+++ b/softfloat/softfloat.h
@@ -240,9 +240,9 @@ flag floatx80_le_quiet( floatx80, floatx80 );
 flag floatx80_lt_quiet( floatx80, floatx80 );
 flag floatx80_is_signaling_nan( floatx80 );

-/* int floatx80_fsin(floatx80 &a);
-int floatx80_fcos(floatx80 &a);
-int floatx80_ftan(floatx80 &a); */
+int floatx80_fsin(floatx80 *a);
+int floatx80_fcos(floatx80 *a);
+int floatx80_ftan(floatx80 *a);

 floatx80 floatx80_flognp1(floatx80 a);
 floatx80 floatx80_flogn(floatx80 a);