mirror of
https://github.com/captain-amygdala/pistorm.git
synced 2026-02-14 19:56:10 +00:00
FPU update as latest MAME
This commit is contained in:
2
Makefile
2
Makefile
@@ -22,7 +22,7 @@ MAINFILES = emulator.c \
|
||||
platforms/amiga/net/pi-net.c \
|
||||
platforms/shared/rtc.c
|
||||
|
||||
MUSASHIFILES = m68kcpu.c m68kdasm.c softfloat/softfloat.c
|
||||
MUSASHIFILES = m68kcpu.c m68kdasm.c softfloat/softfloat.c softfloat/fsincos.c softfloat/fyl2x.c
|
||||
MUSASHIGENCFILES = m68kops.c
|
||||
MUSASHIGENHFILES = m68kops.h
|
||||
MUSASHIGENERATOR = m68kmake
|
||||
|
||||
17
m68k_in.c
17
m68k_in.c
@@ -284,6 +284,7 @@ M68KMAKE_OPCODE_HANDLER_HEADER
|
||||
extern void m68040_fpu_op0(void);
|
||||
extern void m68040_fpu_op1(void);
|
||||
extern void m68881_mmu_ops();
|
||||
extern void m68881_ftrap();
|
||||
|
||||
/* ======================================================================== */
|
||||
/* ========================= INSTRUCTION HANDLERS ========================= */
|
||||
@@ -561,6 +562,7 @@ cpdbcc 32 . . 1111...001001... .......... . . U U . . . 4
|
||||
cpgen 32 . . 1111...000...... .......... . . U U . . . 4 4 . unemulated
|
||||
cpscc 32 . . 1111...001...... .......... . . U U . . . 4 4 . unemulated
|
||||
cptrapcc 32 . . 1111...001111... .......... . . U U . . . 4 4 . unemulated
|
||||
ftrapcc 32 . . 1111001001111... .......... . . U U . . . 4 4 .
|
||||
dbt 16 . . 0101000011001... .......... U U U U U 12 12 6 6 6
|
||||
dbf 16 . . 0101000111001... .......... U U U U U 12 12 6 6 6
|
||||
dbcc 16 . . 0101....11001... .......... U U U U U 12 12 6 6 6
|
||||
@@ -915,7 +917,8 @@ M68KMAKE_OP(1111, 0, ., .)
|
||||
|
||||
M68KMAKE_OP(040fpu0, 32, ., .)
|
||||
{
|
||||
if(CPU_TYPE_IS_030_PLUS(CPU_TYPE))
|
||||
// printf("FPU 040fpu0 HAS_FPU=%d\n",!!HAS_FPU);
|
||||
if(HAS_FPU)
|
||||
{
|
||||
m68040_fpu_op0();
|
||||
return;
|
||||
@@ -926,7 +929,8 @@ M68KMAKE_OP(040fpu0, 32, ., .)
|
||||
|
||||
M68KMAKE_OP(040fpu1, 32, ., .)
|
||||
{
|
||||
if(CPU_TYPE_IS_030_PLUS(CPU_TYPE))
|
||||
// printf("FPU 040fpu1 HAS_FPU=%d\n",!!HAS_FPU);
|
||||
if(HAS_FPU)
|
||||
{
|
||||
m68040_fpu_op1();
|
||||
return;
|
||||
@@ -4370,6 +4374,15 @@ M68KMAKE_OP(cptrapcc, 32, ., .)
|
||||
m68ki_exception_1111();
|
||||
}
|
||||
|
||||
M68KMAKE_OP(ftrapcc,32, ., .)
|
||||
{
|
||||
if(HAS_FPU)
|
||||
{
|
||||
m68881_ftrap();
|
||||
} else {
|
||||
m68ki_exception_1111();
|
||||
}
|
||||
}
|
||||
|
||||
M68KMAKE_OP(dbt, 16, ., .)
|
||||
{
|
||||
|
||||
28
m68kcpu.c
28
m68kcpu.c
@@ -794,7 +794,8 @@ void m68k_set_cpu_type(unsigned int cpu_type)
|
||||
CYC_MOVEM_L = 3;
|
||||
CYC_SHIFT = 1;
|
||||
CYC_RESET = 132;
|
||||
HAS_PMMU = 0;
|
||||
HAS_PMMU = 0;
|
||||
HAS_FPU = 0;
|
||||
return;
|
||||
case M68K_CPU_TYPE_SCC68070:
|
||||
m68k_set_cpu_type(M68K_CPU_TYPE_68010);
|
||||
@@ -816,7 +817,8 @@ void m68k_set_cpu_type(unsigned int cpu_type)
|
||||
CYC_MOVEM_L = 3;
|
||||
CYC_SHIFT = 1;
|
||||
CYC_RESET = 130;
|
||||
HAS_PMMU = 0;
|
||||
HAS_PMMU = 0;
|
||||
HAS_FPU = 0;
|
||||
return;
|
||||
case M68K_CPU_TYPE_68EC020:
|
||||
CPU_TYPE = CPU_TYPE_EC020;
|
||||
@@ -833,7 +835,8 @@ void m68k_set_cpu_type(unsigned int cpu_type)
|
||||
CYC_MOVEM_L = 2;
|
||||
CYC_SHIFT = 0;
|
||||
CYC_RESET = 518;
|
||||
HAS_PMMU = 0;
|
||||
HAS_PMMU = 0;
|
||||
HAS_FPU = 0;
|
||||
return;
|
||||
case M68K_CPU_TYPE_68020:
|
||||
CPU_TYPE = CPU_TYPE_020;
|
||||
@@ -850,7 +853,8 @@ void m68k_set_cpu_type(unsigned int cpu_type)
|
||||
CYC_MOVEM_L = 2;
|
||||
CYC_SHIFT = 0;
|
||||
CYC_RESET = 518;
|
||||
HAS_PMMU = 0;
|
||||
HAS_PMMU = 0;
|
||||
HAS_FPU = 0;
|
||||
return;
|
||||
case M68K_CPU_TYPE_68030:
|
||||
CPU_TYPE = CPU_TYPE_030;
|
||||
@@ -867,7 +871,8 @@ void m68k_set_cpu_type(unsigned int cpu_type)
|
||||
CYC_MOVEM_L = 2;
|
||||
CYC_SHIFT = 0;
|
||||
CYC_RESET = 518;
|
||||
HAS_PMMU = 1;
|
||||
HAS_PMMU = 1;
|
||||
HAS_FPU = 1;
|
||||
return;
|
||||
case M68K_CPU_TYPE_68EC030:
|
||||
CPU_TYPE = CPU_TYPE_EC030;
|
||||
@@ -884,7 +889,8 @@ void m68k_set_cpu_type(unsigned int cpu_type)
|
||||
CYC_MOVEM_L = 2;
|
||||
CYC_SHIFT = 0;
|
||||
CYC_RESET = 518;
|
||||
HAS_PMMU = 0; /* EC030 lacks the PMMU and is effectively a die-shrink 68020 */
|
||||
HAS_PMMU = 0; /* EC030 lacks the PMMU and is effectively a die-shrink 68020 */
|
||||
HAS_FPU = 1;
|
||||
return;
|
||||
case M68K_CPU_TYPE_68040: // TODO: these values are not correct
|
||||
CPU_TYPE = CPU_TYPE_040;
|
||||
@@ -901,7 +907,8 @@ void m68k_set_cpu_type(unsigned int cpu_type)
|
||||
CYC_MOVEM_L = 2;
|
||||
CYC_SHIFT = 0;
|
||||
CYC_RESET = 518;
|
||||
HAS_PMMU = 1;
|
||||
HAS_PMMU = 1;
|
||||
HAS_FPU = 1;
|
||||
return;
|
||||
case M68K_CPU_TYPE_68EC040: // Just a 68040 without pmmu apparently...
|
||||
CPU_TYPE = CPU_TYPE_EC040;
|
||||
@@ -918,10 +925,12 @@ void m68k_set_cpu_type(unsigned int cpu_type)
|
||||
CYC_MOVEM_L = 2;
|
||||
CYC_SHIFT = 0;
|
||||
CYC_RESET = 518;
|
||||
HAS_PMMU = 0;
|
||||
HAS_PMMU = 0;
|
||||
HAS_FPU = 0;
|
||||
return;
|
||||
case M68K_CPU_TYPE_68LC040:
|
||||
CPU_TYPE = CPU_TYPE_LC040;
|
||||
CPU_ADDRESS_MASK = 0xffffffff;
|
||||
m68ki_cpu.sr_mask = 0xf71f; /* T1 T0 S M -- I2 I1 I0 -- -- -- X N Z V C */
|
||||
m68ki_cpu.cyc_instruction = m68ki_cycles[4];
|
||||
m68ki_cpu.cyc_exception = m68ki_exception_cycle_table[4];
|
||||
@@ -934,7 +943,8 @@ void m68k_set_cpu_type(unsigned int cpu_type)
|
||||
m68ki_cpu.cyc_movem_l = 2;
|
||||
m68ki_cpu.cyc_shift = 0;
|
||||
m68ki_cpu.cyc_reset = 518;
|
||||
HAS_PMMU = 1;
|
||||
HAS_PMMU = 1;
|
||||
HAS_FPU = 0;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
10
m68kcpu.h
10
m68kcpu.h
@@ -381,9 +381,10 @@ typedef uint32 uint64;
|
||||
#define CYC_MOVEM_L m68ki_cpu.cyc_movem_l
|
||||
#define CYC_SHIFT m68ki_cpu.cyc_shift
|
||||
#define CYC_RESET m68ki_cpu.cyc_reset
|
||||
#define HAS_PMMU m68ki_cpu.has_pmmu
|
||||
#define PMMU_ENABLED m68ki_cpu.pmmu_enabled
|
||||
#define RESET_CYCLES m68ki_cpu.reset_cycles
|
||||
#define HAS_PMMU m68ki_cpu.has_pmmu
|
||||
#define HAS_FPU m68ki_cpu.has_fpu
|
||||
#define PMMU_ENABLED m68ki_cpu.pmmu_enabled
|
||||
#define RESET_CYCLES m68ki_cpu.reset_cycles
|
||||
|
||||
|
||||
#define CALLBACK_INT_ACK m68ki_cpu.int_ack_callback
|
||||
@@ -960,7 +961,8 @@ typedef struct
|
||||
uint sr_mask; /* Implemented status register bits */
|
||||
uint instr_mode; /* Stores whether we are in instruction mode or group 0/1 exception mode */
|
||||
uint run_mode; /* Stores whether we are processing a reset, bus error, address error, or something else */
|
||||
int has_pmmu; /* Indicates if a PMMU available (yes on 030, 040, no on EC030) */
|
||||
int has_pmmu; /* Indicates if a PMMU available (yes on 030, 040, no on EC030) */
|
||||
int has_fpu; /* Indicates if a FPU available */
|
||||
int pmmu_enabled; /* Indicates if the PMMU is enabled */
|
||||
int fpu_just_reset; /* Indicates the FPU was just reset */
|
||||
uint reset_cycles;
|
||||
|
||||
80
softfloat/fpu_constant.h
Normal file
80
softfloat/fpu_constant.h
Normal file
@@ -0,0 +1,80 @@
|
||||
/*============================================================================
|
||||
This source file is an extension to the SoftFloat IEC/IEEE Floating-point
|
||||
Arithmetic Package, Release 2b, written for Bochs (x86 achitecture simulator)
|
||||
floating point emulation.
|
||||
|
||||
THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has
|
||||
been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
|
||||
RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
|
||||
AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
|
||||
COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
|
||||
EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
|
||||
INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
|
||||
OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
|
||||
|
||||
Derivative works are acceptable, even for commercial purposes, so long as
|
||||
(1) the source code for the derivative work includes prominent notice that
|
||||
the work is derivative, and (2) the source code includes prominent notice with
|
||||
these four paragraphs for those parts of this code that are retained.
|
||||
=============================================================================*/
|
||||
|
||||
#ifndef _FPU_CONSTANTS_H_
|
||||
#define _FPU_CONSTANTS_H_
|
||||
|
||||
// Pentium CPU uses only 68-bit precision M_PI approximation
|
||||
#define BETTER_THAN_PENTIUM
|
||||
|
||||
/*============================================================================
|
||||
* Written for Bochs (x86 achitecture simulator) by
|
||||
* Stanislav Shwartsman [sshwarts at sourceforge net]
|
||||
* ==========================================================================*/
|
||||
|
||||
//////////////////////////////
|
||||
// PI, PI/2, PI/4 constants
|
||||
//////////////////////////////
|
||||
|
||||
#define FLOATX80_PI_EXP (0x4000)
|
||||
|
||||
// 128-bit PI fraction
|
||||
#ifdef BETTER_THAN_PENTIUM
|
||||
#define FLOAT_PI_HI (0xc90fdaa22168c234U)
|
||||
#define FLOAT_PI_LO (0xc4c6628b80dc1cd1U)
|
||||
#else
|
||||
#define FLOAT_PI_HI (0xc90fdaa22168c234U)
|
||||
#define FLOAT_PI_LO (0xC000000000000000U)
|
||||
#endif
|
||||
|
||||
#define FLOATX80_PI2_EXP (0x3FFF)
|
||||
#define FLOATX80_PI4_EXP (0x3FFE)
|
||||
|
||||
//////////////////////////////
|
||||
// 3PI/4 constant
|
||||
//////////////////////////////
|
||||
|
||||
#define FLOATX80_3PI4_EXP (0x4000)
|
||||
|
||||
// 128-bit 3PI/4 fraction
|
||||
#ifdef BETTER_THAN_PENTIUM
|
||||
#define FLOAT_3PI4_HI (0x96cbe3f9990e91a7U)
|
||||
#define FLOAT_3PI4_LO (0x9394c9e8a0a5159cU)
|
||||
#else
|
||||
#define FLOAT_3PI4_HI (0x96cbe3f9990e91a7U)
|
||||
#define FLOAT_3PI4_LO (0x9000000000000000U)
|
||||
#endif
|
||||
|
||||
//////////////////////////////
|
||||
// 1/LN2 constant
|
||||
//////////////////////////////
|
||||
|
||||
#define FLOAT_LN2INV_EXP (0x3FFF)
|
||||
|
||||
// 128-bit 1/LN2 fraction
|
||||
#ifdef BETTER_THAN_PENTIUM
|
||||
#define FLOAT_LN2INV_HI (0xb8aa3b295c17f0bbU)
|
||||
#define FLOAT_LN2INV_LO (0xbe87fed0691d3e89U)
|
||||
#else
|
||||
#define FLOAT_LN2INV_HI (0xb8aa3b295c17f0bbU)
|
||||
#define FLOAT_LN2INV_LO (0xC000000000000000U)
|
||||
#endif
|
||||
|
||||
#endif
|
||||
647
softfloat/fsincos.c
Normal file
647
softfloat/fsincos.c
Normal file
@@ -0,0 +1,647 @@
|
||||
/*============================================================================
|
||||
This source file is an extension to the SoftFloat IEC/IEEE Floating-point
|
||||
Arithmetic Package, Release 2b, written for Bochs (x86 achitecture simulator)
|
||||
floating point emulation.
|
||||
|
||||
THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has
|
||||
been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
|
||||
RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
|
||||
AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
|
||||
COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
|
||||
EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
|
||||
INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
|
||||
OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
|
||||
|
||||
Derivative works are acceptable, even for commercial purposes, so long as
|
||||
(1) the source code for the derivative work includes prominent notice that
|
||||
the work is derivative, and (2) the source code includes prominent notice with
|
||||
these four paragraphs for those parts of this code that are retained.
|
||||
=============================================================================*/
|
||||
|
||||
/*============================================================================
|
||||
* Written for Bochs (x86 achitecture simulator) by
|
||||
* Stanislav Shwartsman [sshwarts at sourceforge net]
|
||||
* ==========================================================================*/
|
||||
|
||||
#define FLOAT128
|
||||
|
||||
#define USE_estimateDiv128To64
|
||||
|
||||
#include "m68kcpu.h" // which includes softfloat.h after defining the basic types
|
||||
#include "assert.h"
|
||||
|
||||
//#include "softfloat-specialize"
|
||||
#include "fpu_constant.h"
|
||||
|
||||
#define floatx80_one packFloatx80(0, 0x3fff, 0x8000000000000000U)
|
||||
#define floatx80_default_nan packFloatx80(0, 0xffff, 0xffffffffffffffffU)
|
||||
|
||||
#define packFloat2x128m(zHi, zLo) {(zHi), (zLo)}
|
||||
#define PACK_FLOAT_128(hi,lo) packFloat2x128m(LIT64(hi),LIT64(lo))
|
||||
|
||||
#define EXP_BIAS 0x3FFF
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Returns the fraction bits of the extended double-precision floating-point
|
||||
| value `a'.
|
||||
*----------------------------------------------------------------------------*/
|
||||
|
||||
static inline bits64 extractFloatx80Frac( floatx80 a )
|
||||
{
|
||||
return a.low;
|
||||
|
||||
}
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Returns the exponent bits of the extended double-precision floating-point
|
||||
| value `a'.
|
||||
*----------------------------------------------------------------------------*/
|
||||
|
||||
static inline int32 extractFloatx80Exp( floatx80 a )
|
||||
{
|
||||
return a.high & 0x7FFF;
|
||||
|
||||
}
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Returns the sign bit of the extended double-precision floating-point value
|
||||
| `a'.
|
||||
*----------------------------------------------------------------------------*/
|
||||
|
||||
static inline flag extractFloatx80Sign( floatx80 a )
|
||||
{
|
||||
return a.high>>15;
|
||||
|
||||
}
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Takes extended double-precision floating-point NaN `a' and returns the
|
||||
| appropriate NaN result. If `a' is a signaling NaN, the invalid exception
|
||||
| is raised.
|
||||
*----------------------------------------------------------------------------*/
|
||||
|
||||
static inline floatx80 propagateFloatx80NaNOneArg(floatx80 a)
|
||||
{
|
||||
if (floatx80_is_signaling_nan(a))
|
||||
float_raise(float_flag_invalid);
|
||||
|
||||
a.low |= 0xC000000000000000U;
|
||||
|
||||
return a;
|
||||
}
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Normalizes the subnormal extended double-precision floating-point value
|
||||
| represented by the denormalized significand `aSig'. The normalized exponent
|
||||
| and significand are stored at the locations pointed to by `zExpPtr' and
|
||||
| `zSigPtr', respectively.
|
||||
*----------------------------------------------------------------------------*/
|
||||
|
||||
void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr, uint64_t *zSigPtr)
|
||||
{
|
||||
int shiftCount = countLeadingZeros64(aSig);
|
||||
*zSigPtr = aSig<<shiftCount;
|
||||
*zExpPtr = 1 - shiftCount;
|
||||
}
|
||||
|
||||
/* reduce trigonometric function argument using 128-bit precision
|
||||
M_PI approximation */
|
||||
static uint64_t argument_reduction_kernel(uint64_t aSig0, int Exp, uint64_t *zSig0, uint64_t *zSig1)
|
||||
{
|
||||
uint64_t term0, term1, term2;
|
||||
uint64_t aSig1 = 0;
|
||||
|
||||
shortShift128Left(aSig1, aSig0, Exp, &aSig1, &aSig0);
|
||||
uint64_t q = estimateDiv128To64(aSig1, aSig0, FLOAT_PI_HI);
|
||||
mul128By64To192(FLOAT_PI_HI, FLOAT_PI_LO, q, &term0, &term1, &term2);
|
||||
sub128(aSig1, aSig0, term0, term1, zSig1, zSig0);
|
||||
while ((int64_t)(*zSig1) < 0) {
|
||||
--q;
|
||||
add192(*zSig1, *zSig0, term2, 0, FLOAT_PI_HI, FLOAT_PI_LO, zSig1, zSig0, &term2);
|
||||
}
|
||||
*zSig1 = term2;
|
||||
return q;
|
||||
}
|
||||
|
||||
static int reduce_trig_arg(int expDiff, int zSign, uint64_t aSig0, uint64_t aSig1)
|
||||
{
|
||||
uint64_t term0, term1, q = 0;
|
||||
|
||||
if (expDiff < 0) {
|
||||
shift128Right(aSig0, 0, 1, &aSig0, &aSig1);
|
||||
expDiff = 0;
|
||||
}
|
||||
if (expDiff > 0) {
|
||||
q = argument_reduction_kernel(aSig0, expDiff, &aSig0, &aSig1);
|
||||
}
|
||||
else {
|
||||
if (FLOAT_PI_HI <= aSig0) {
|
||||
aSig0 -= FLOAT_PI_HI;
|
||||
q = 1;
|
||||
}
|
||||
}
|
||||
|
||||
shift128Right(FLOAT_PI_HI, FLOAT_PI_LO, 1, &term0, &term1);
|
||||
if (! lt128(aSig0, aSig1, term0, term1))
|
||||
{
|
||||
int lt = lt128(term0, term1, aSig0, aSig1);
|
||||
int eq = eq128(aSig0, aSig1, term0, term1);
|
||||
|
||||
if ((eq && (q & 1)) || lt) {
|
||||
zSign = !zSign;
|
||||
++q;
|
||||
}
|
||||
if (lt) sub128(FLOAT_PI_HI, FLOAT_PI_LO, aSig0, aSig1, &aSig0, &aSig1);
|
||||
}
|
||||
|
||||
return (int)(q & 3);
|
||||
}
|
||||
|
||||
#define SIN_ARR_SIZE 11
|
||||
#define COS_ARR_SIZE 11
|
||||
|
||||
static float128 sin_arr[SIN_ARR_SIZE] =
|
||||
{
|
||||
PACK_FLOAT_128(0x3fff000000000000, 0x0000000000000000), /* 1 */
|
||||
PACK_FLOAT_128(0xbffc555555555555, 0x5555555555555555), /* 3 */
|
||||
PACK_FLOAT_128(0x3ff8111111111111, 0x1111111111111111), /* 5 */
|
||||
PACK_FLOAT_128(0xbff2a01a01a01a01, 0xa01a01a01a01a01a), /* 7 */
|
||||
PACK_FLOAT_128(0x3fec71de3a556c73, 0x38faac1c88e50017), /* 9 */
|
||||
PACK_FLOAT_128(0xbfe5ae64567f544e, 0x38fe747e4b837dc7), /* 11 */
|
||||
PACK_FLOAT_128(0x3fde6124613a86d0, 0x97ca38331d23af68), /* 13 */
|
||||
PACK_FLOAT_128(0xbfd6ae7f3e733b81, 0xf11d8656b0ee8cb0), /* 15 */
|
||||
PACK_FLOAT_128(0x3fce952c77030ad4, 0xa6b2605197771b00), /* 17 */
|
||||
PACK_FLOAT_128(0xbfc62f49b4681415, 0x724ca1ec3b7b9675), /* 19 */
|
||||
PACK_FLOAT_128(0x3fbd71b8ef6dcf57, 0x18bef146fcee6e45) /* 21 */
|
||||
};
|
||||
|
||||
static float128 cos_arr[COS_ARR_SIZE] =
|
||||
{
|
||||
PACK_FLOAT_128(0x3fff000000000000, 0x0000000000000000), /* 0 */
|
||||
PACK_FLOAT_128(0xbffe000000000000, 0x0000000000000000), /* 2 */
|
||||
PACK_FLOAT_128(0x3ffa555555555555, 0x5555555555555555), /* 4 */
|
||||
PACK_FLOAT_128(0xbff56c16c16c16c1, 0x6c16c16c16c16c17), /* 6 */
|
||||
PACK_FLOAT_128(0x3fefa01a01a01a01, 0xa01a01a01a01a01a), /* 8 */
|
||||
PACK_FLOAT_128(0xbfe927e4fb7789f5, 0xc72ef016d3ea6679), /* 10 */
|
||||
PACK_FLOAT_128(0x3fe21eed8eff8d89, 0x7b544da987acfe85), /* 12 */
|
||||
PACK_FLOAT_128(0xbfda93974a8c07c9, 0xd20badf145dfa3e5), /* 14 */
|
||||
PACK_FLOAT_128(0x3fd2ae7f3e733b81, 0xf11d8656b0ee8cb0), /* 16 */
|
||||
PACK_FLOAT_128(0xbfca6827863b97d9, 0x77bb004886a2c2ab), /* 18 */
|
||||
PACK_FLOAT_128(0x3fc1e542ba402022, 0x507a9cad2bf8f0bb) /* 20 */
|
||||
};
|
||||
|
||||
extern float128 OddPoly (float128 x, float128 *arr, unsigned n);
|
||||
|
||||
/* 0 <= x <= pi/4 */
|
||||
static inline float128 poly_sin(float128 x)
|
||||
{
|
||||
// 3 5 7 9 11 13 15
|
||||
// x x x x x x x
|
||||
// sin (x) ~ x - --- + --- - --- + --- - ---- + ---- - ---- =
|
||||
// 3! 5! 7! 9! 11! 13! 15!
|
||||
//
|
||||
// 2 4 6 8 10 12 14
|
||||
// x x x x x x x
|
||||
// = x * [ 1 - --- + --- - --- + --- - ---- + ---- - ---- ] =
|
||||
// 3! 5! 7! 9! 11! 13! 15!
|
||||
//
|
||||
// 3 3
|
||||
// -- 4k -- 4k+2
|
||||
// p(x) = > C * x > 0 q(x) = > C * x < 0
|
||||
// -- 2k -- 2k+1
|
||||
// k=0 k=0
|
||||
//
|
||||
// 2
|
||||
// sin(x) ~ x * [ p(x) + x * q(x) ]
|
||||
//
|
||||
|
||||
return OddPoly(x, sin_arr, SIN_ARR_SIZE);
|
||||
}
|
||||
|
||||
extern float128 EvenPoly(float128 x, float128 *arr, unsigned n);
|
||||
|
||||
/* 0 <= x <= pi/4 */
|
||||
static inline float128 poly_cos(float128 x)
|
||||
{
|
||||
// 2 4 6 8 10 12 14
|
||||
// x x x x x x x
|
||||
// cos (x) ~ 1 - --- + --- - --- + --- - ---- + ---- - ----
|
||||
// 2! 4! 6! 8! 10! 12! 14!
|
||||
//
|
||||
// 3 3
|
||||
// -- 4k -- 4k+2
|
||||
// p(x) = > C * x > 0 q(x) = > C * x < 0
|
||||
// -- 2k -- 2k+1
|
||||
// k=0 k=0
|
||||
//
|
||||
// 2
|
||||
// cos(x) ~ [ p(x) + x * q(x) ]
|
||||
//
|
||||
|
||||
return EvenPoly(x, cos_arr, COS_ARR_SIZE);
|
||||
}
|
||||
|
||||
static inline void sincos_invalid(floatx80 *sin_a, floatx80 *cos_a, floatx80 a)
|
||||
{
|
||||
if (sin_a) *sin_a = a;
|
||||
if (cos_a) *cos_a = a;
|
||||
}
|
||||
|
||||
static inline void sincos_tiny_argument(floatx80 *sin_a, floatx80 *cos_a, floatx80 a)
|
||||
{
|
||||
if (sin_a) *sin_a = a;
|
||||
if (cos_a) *cos_a = floatx80_one;
|
||||
}
|
||||
|
||||
static floatx80 sincos_approximation(int neg, float128 r, uint64_t quotient)
|
||||
{
|
||||
if (quotient & 0x1) {
|
||||
r = poly_cos(r);
|
||||
neg = 0;
|
||||
} else {
|
||||
r = poly_sin(r);
|
||||
}
|
||||
|
||||
floatx80 result = float128_to_floatx80(r);
|
||||
if (quotient & 0x2)
|
||||
neg = ! neg;
|
||||
|
||||
if (neg)
|
||||
result = floatx80_chs(result);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// =================================================
|
||||
// SFFSINCOS Compute sin(x) and cos(x)
|
||||
// =================================================
|
||||
|
||||
//
|
||||
// Uses the following identities:
|
||||
// ----------------------------------------------------------
|
||||
//
|
||||
// sin(-x) = -sin(x)
|
||||
// cos(-x) = cos(x)
|
||||
//
|
||||
// sin(x+y) = sin(x)*cos(y)+cos(x)*sin(y)
|
||||
// cos(x+y) = sin(x)*sin(y)+cos(x)*cos(y)
|
||||
//
|
||||
// sin(x+ pi/2) = cos(x)
|
||||
// sin(x+ pi) = -sin(x)
|
||||
// sin(x+3pi/2) = -cos(x)
|
||||
// sin(x+2pi) = sin(x)
|
||||
//
|
||||
|
||||
int sf_fsincos(floatx80 a, floatx80 *sin_a, floatx80 *cos_a)
|
||||
{
|
||||
uint64_t aSig0, aSig1 = 0;
|
||||
int32_t aExp, zExp, expDiff;
|
||||
int aSign, zSign;
|
||||
int q = 0;
|
||||
|
||||
aSig0 = extractFloatx80Frac(a);
|
||||
aExp = extractFloatx80Exp(a);
|
||||
aSign = extractFloatx80Sign(a);
|
||||
|
||||
/* invalid argument */
|
||||
if (aExp == 0x7FFF) {
|
||||
if ((uint64_t) (aSig0<<1)) {
|
||||
sincos_invalid(sin_a, cos_a, propagateFloatx80NaNOneArg(a));
|
||||
return 0;
|
||||
}
|
||||
|
||||
float_raise(float_flag_invalid);
|
||||
sincos_invalid(sin_a, cos_a, floatx80_default_nan);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (aExp == 0) {
|
||||
if (aSig0 == 0) {
|
||||
sincos_tiny_argument(sin_a, cos_a, a);
|
||||
return 0;
|
||||
}
|
||||
|
||||
// float_raise(float_flag_denormal);
|
||||
|
||||
/* handle pseudo denormals */
|
||||
if (! (aSig0 & 0x8000000000000000U))
|
||||
{
|
||||
float_raise(float_flag_inexact);
|
||||
if (sin_a)
|
||||
float_raise(float_flag_underflow);
|
||||
sincos_tiny_argument(sin_a, cos_a, a);
|
||||
return 0;
|
||||
}
|
||||
|
||||
normalizeFloatx80Subnormal(aSig0, &aExp, &aSig0);
|
||||
}
|
||||
|
||||
zSign = aSign;
|
||||
zExp = EXP_BIAS;
|
||||
expDiff = aExp - zExp;
|
||||
|
||||
/* argument is out-of-range */
|
||||
if (expDiff >= 63)
|
||||
return -1;
|
||||
|
||||
float_raise(float_flag_inexact);
|
||||
|
||||
if (expDiff < -1) { // doesn't require reduction
|
||||
if (expDiff <= -68) {
|
||||
a = packFloatx80(aSign, aExp, aSig0);
|
||||
sincos_tiny_argument(sin_a, cos_a, a);
|
||||
return 0;
|
||||
}
|
||||
zExp = aExp;
|
||||
}
|
||||
else {
|
||||
q = reduce_trig_arg(expDiff, zSign, aSig0, aSig1);
|
||||
}
|
||||
|
||||
/* **************************** */
|
||||
/* argument reduction completed */
|
||||
/* **************************** */
|
||||
|
||||
/* using float128 for approximation */
|
||||
float128 r = normalizeRoundAndPackFloat128(0, zExp-0x10, aSig0, aSig1);
|
||||
|
||||
if (aSign) q = -q;
|
||||
if (sin_a) *sin_a = sincos_approximation(zSign, r, q);
|
||||
if (cos_a) *cos_a = sincos_approximation(zSign, r, q+1);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int floatx80_fsin(floatx80 *a)
|
||||
{
|
||||
return sf_fsincos(*a, a, 0);
|
||||
}
|
||||
|
||||
int floatx80_fcos(floatx80 *a)
|
||||
{
|
||||
return sf_fsincos(*a, 0, a);
|
||||
}
|
||||
|
||||
// =================================================
|
||||
// FPTAN Compute tan(x)
|
||||
// =================================================
|
||||
|
||||
//
|
||||
// Uses the following identities:
|
||||
//
|
||||
// 1. ----------------------------------------------------------
|
||||
//
|
||||
// sin(-x) = -sin(x)
|
||||
// cos(-x) = cos(x)
|
||||
//
|
||||
// sin(x+y) = sin(x)*cos(y)+cos(x)*sin(y)
|
||||
// cos(x+y) = sin(x)*sin(y)+cos(x)*cos(y)
|
||||
//
|
||||
// sin(x+ pi/2) = cos(x)
|
||||
// sin(x+ pi) = -sin(x)
|
||||
// sin(x+3pi/2) = -cos(x)
|
||||
// sin(x+2pi) = sin(x)
|
||||
//
|
||||
// 2. ----------------------------------------------------------
|
||||
//
|
||||
// sin(x)
|
||||
// tan(x) = ------
|
||||
// cos(x)
|
||||
//
|
||||
|
||||
int floatx80_ftan(floatx80 *a)
|
||||
{
|
||||
uint64_t aSig0, aSig1 = 0;
|
||||
int32_t aExp, zExp, expDiff;
|
||||
int aSign, zSign;
|
||||
int q = 0;
|
||||
|
||||
aSig0 = extractFloatx80Frac(*a);
|
||||
aExp = extractFloatx80Exp(*a);
|
||||
aSign = extractFloatx80Sign(*a);
|
||||
|
||||
/* invalid argument */
|
||||
if (aExp == 0x7FFF) {
|
||||
if ((uint64_t) (aSig0<<1))
|
||||
{
|
||||
*a = propagateFloatx80NaNOneArg(*a);
|
||||
return 0;
|
||||
}
|
||||
|
||||
float_raise(float_flag_invalid);
|
||||
*a = floatx80_default_nan;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (aExp == 0) {
|
||||
if (aSig0 == 0) return 0;
|
||||
// float_raise(float_flag_denormal);
|
||||
/* handle pseudo denormals */
|
||||
if (! (aSig0 & 0x8000000000000000U))
|
||||
{
|
||||
float_raise(float_flag_inexact | float_flag_underflow);
|
||||
return 0;
|
||||
}
|
||||
normalizeFloatx80Subnormal(aSig0, &aExp, &aSig0);
|
||||
}
|
||||
|
||||
zSign = aSign;
|
||||
zExp = EXP_BIAS;
|
||||
expDiff = aExp - zExp;
|
||||
|
||||
/* argument is out-of-range */
|
||||
if (expDiff >= 63)
|
||||
return -1;
|
||||
|
||||
float_raise(float_flag_inexact);
|
||||
|
||||
if (expDiff < -1) { // doesn't require reduction
|
||||
if (expDiff <= -68) {
|
||||
*a = packFloatx80(aSign, aExp, aSig0);
|
||||
return 0;
|
||||
}
|
||||
zExp = aExp;
|
||||
}
|
||||
else {
|
||||
q = reduce_trig_arg(expDiff, zSign, aSig0, aSig1);
|
||||
}
|
||||
|
||||
/* **************************** */
|
||||
/* argument reduction completed */
|
||||
/* **************************** */
|
||||
|
||||
/* using float128 for approximation */
|
||||
float128 r = normalizeRoundAndPackFloat128(0, zExp-0x10, aSig0, aSig1);
|
||||
|
||||
float128 sin_r = poly_sin(r);
|
||||
float128 cos_r = poly_cos(r);
|
||||
|
||||
if (q & 0x1) {
|
||||
r = float128_div(cos_r, sin_r);
|
||||
zSign = ! zSign;
|
||||
} else {
|
||||
r = float128_div(sin_r, cos_r);
|
||||
}
|
||||
|
||||
*a = float128_to_floatx80(r);
|
||||
if (zSign)
|
||||
*a = floatx80_chs(*a);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// 2 3 4 n
|
||||
// f(x) ~ C + (C * x) + (C * x) + (C * x) + (C * x) + ... + (C * x)
|
||||
// 0 1 2 3 4 n
|
||||
//
|
||||
// -- 2k -- 2k+1
|
||||
// p(x) = > C * x q(x) = > C * x
|
||||
// -- 2k -- 2k+1
|
||||
//
|
||||
// f(x) ~ [ p(x) + x * q(x) ]
|
||||
//
|
||||
|
||||
float128 EvalPoly(float128 x, float128 *arr, unsigned n)
|
||||
{
|
||||
float128 x2 = float128_mul(x, x);
|
||||
unsigned i;
|
||||
|
||||
assert(n > 1);
|
||||
|
||||
float128 r1 = arr[--n];
|
||||
i = n;
|
||||
while(i >= 2) {
|
||||
r1 = float128_mul(r1, x2);
|
||||
i -= 2;
|
||||
r1 = float128_add(r1, arr[i]);
|
||||
}
|
||||
if (i) r1 = float128_mul(r1, x);
|
||||
|
||||
float128 r2 = arr[--n];
|
||||
i = n;
|
||||
while(i >= 2) {
|
||||
r2 = float128_mul(r2, x2);
|
||||
i -= 2;
|
||||
r2 = float128_add(r2, arr[i]);
|
||||
}
|
||||
if (i) r2 = float128_mul(r2, x);
|
||||
|
||||
return float128_add(r1, r2);
|
||||
}
|
||||
|
||||
// 2 4 6 8 2n
|
||||
// f(x) ~ C + (C * x) + (C * x) + (C * x) + (C * x) + ... + (C * x)
|
||||
// 0 1 2 3 4 n
|
||||
//
|
||||
// -- 4k -- 4k+2
|
||||
// p(x) = > C * x q(x) = > C * x
|
||||
// -- 2k -- 2k+1
|
||||
//
|
||||
// 2
|
||||
// f(x) ~ [ p(x) + x * q(x) ]
|
||||
//
|
||||
|
||||
float128 EvenPoly(float128 x, float128 *arr, unsigned n)
|
||||
{
|
||||
return EvalPoly(float128_mul(x, x), arr, n);
|
||||
}
|
||||
|
||||
// 3 5 7 9 2n+1
|
||||
// f(x) ~ (C * x) + (C * x) + (C * x) + (C * x) + (C * x) + ... + (C * x)
|
||||
// 0 1 2 3 4 n
|
||||
// 2 4 6 8 2n
|
||||
// = x * [ C + (C * x) + (C * x) + (C * x) + (C * x) + ... + (C * x)
|
||||
// 0 1 2 3 4 n
|
||||
//
|
||||
// -- 4k -- 4k+2
|
||||
// p(x) = > C * x q(x) = > C * x
|
||||
// -- 2k -- 2k+1
|
||||
//
|
||||
// 2
|
||||
// f(x) ~ x * [ p(x) + x * q(x) ]
|
||||
//
|
||||
|
||||
float128 OddPoly(float128 x, float128 *arr, unsigned n)
|
||||
{
|
||||
return float128_mul(x, EvenPoly(x, arr, n));
|
||||
}
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Scales extended double-precision floating-point value in operand `a' by
|
||||
| value `b'. The function truncates the value in the second operand 'b' to
|
||||
| an integral value and adds that value to the exponent of the operand 'a'.
|
||||
| The operation performed according to the IEC/IEEE Standard for Binary
|
||||
| Floating-Point Arithmetic.
|
||||
*----------------------------------------------------------------------------*/
|
||||
|
||||
extern floatx80 propagateFloatx80NaN( floatx80 a, floatx80 b );
|
||||
|
||||
floatx80 floatx80_scale(floatx80 a, floatx80 b)
|
||||
{
|
||||
sbits32 aExp, bExp;
|
||||
bits64 aSig, bSig;
|
||||
|
||||
// handle unsupported extended double-precision floating encodings
|
||||
/* if (floatx80_is_unsupported(a) || floatx80_is_unsupported(b))
|
||||
{
|
||||
float_raise(float_flag_invalid);
|
||||
return floatx80_default_nan;
|
||||
}*/
|
||||
|
||||
aSig = extractFloatx80Frac(a);
|
||||
aExp = extractFloatx80Exp(a);
|
||||
int aSign = extractFloatx80Sign(a);
|
||||
bSig = extractFloatx80Frac(b);
|
||||
bExp = extractFloatx80Exp(b);
|
||||
int bSign = extractFloatx80Sign(b);
|
||||
|
||||
if (aExp == 0x7FFF) {
|
||||
if ((bits64) (aSig<<1) || ((bExp == 0x7FFF) && (bits64) (bSig<<1)))
|
||||
{
|
||||
return propagateFloatx80NaN(a, b);
|
||||
}
|
||||
if ((bExp == 0x7FFF) && bSign) {
|
||||
float_raise(float_flag_invalid);
|
||||
return floatx80_default_nan;
|
||||
}
|
||||
if (bSig && (bExp == 0)) float_raise(float_flag_denormal);
|
||||
return a;
|
||||
}
|
||||
if (bExp == 0x7FFF) {
|
||||
if ((bits64) (bSig<<1)) return propagateFloatx80NaN(a, b);
|
||||
if ((aExp | aSig) == 0) {
|
||||
if (! bSign) {
|
||||
float_raise(float_flag_invalid);
|
||||
return floatx80_default_nan;
|
||||
}
|
||||
return a;
|
||||
}
|
||||
if (aSig && (aExp == 0)) float_raise(float_flag_denormal);
|
||||
if (bSign) return packFloatx80(aSign, 0, 0);
|
||||
return packFloatx80(aSign, 0x7FFF, 0x8000000000000000U);
|
||||
}
|
||||
if (aExp == 0) {
|
||||
if (aSig == 0) return a;
|
||||
float_raise(float_flag_denormal);
|
||||
normalizeFloatx80Subnormal(aSig, &aExp, &aSig);
|
||||
}
|
||||
if (bExp == 0) {
|
||||
if (bSig == 0) return a;
|
||||
float_raise(float_flag_denormal);
|
||||
normalizeFloatx80Subnormal(bSig, &bExp, &bSig);
|
||||
}
|
||||
|
||||
if (bExp > 0x400E) {
|
||||
/* generate appropriate overflow/underflow */
|
||||
return roundAndPackFloatx80(80, aSign,
|
||||
bSign ? -0x3FFF : 0x7FFF, aSig, 0);
|
||||
}
|
||||
if (bExp < 0x3FFF) return a;
|
||||
|
||||
int shiftCount = 0x403E - bExp;
|
||||
bSig >>= shiftCount;
|
||||
sbits32 scale = bSig;
|
||||
if (bSign) scale = -scale; /* -32768..32767 */
|
||||
return
|
||||
roundAndPackFloatx80(80, aSign, aExp+scale, aSig, 0);
|
||||
}
|
||||
487
softfloat/fyl2x.c
Normal file
487
softfloat/fyl2x.c
Normal file
@@ -0,0 +1,487 @@
|
||||
/*============================================================================
|
||||
This source file is an extension to the SoftFloat IEC/IEEE Floating-point
|
||||
Arithmetic Package, Release 2b, written for Bochs (x86 achitecture simulator)
|
||||
floating point emulation.
|
||||
float_raise(float_flag_invalid)
|
||||
THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has
|
||||
been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
|
||||
RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
|
||||
AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
|
||||
COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
|
||||
EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
|
||||
INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
|
||||
OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
|
||||
|
||||
Derivative works are acceptable, even for commercial purposes, so long as
|
||||
(1) the source code for the derivative work includes prominent notice that
|
||||
the work is derivative, and (2) the source code includes prominent notice with
|
||||
these four paragraphs for those parts of this code that are retained.
|
||||
=============================================================================*/
|
||||
|
||||
/*============================================================================
|
||||
* Written for Bochs (x86 achitecture simulator) by
|
||||
* Stanislav Shwartsman [sshwarts at sourceforge net]
|
||||
* Adapted for lib/softfloat in MESS by Hans Ostermeyer (03/2012)
|
||||
* ==========================================================================*/
|
||||
|
||||
#define FLOAT128
|
||||
|
||||
#define USE_estimateDiv128To64
|
||||
|
||||
#include "m68kcpu.h" // which includes softfloat.h after defining the basic types
|
||||
|
||||
//#include "softfloat-specialize"
|
||||
#include "fpu_constant.h"
|
||||
|
||||
#define floatx80_log10_2 packFloatx80(0, 0x3ffd, 0x9a209a84fbcff798U)
|
||||
#define floatx80_ln_2 packFloatx80(0, 0x3ffe, 0xb17217f7d1cf79acU)
|
||||
#define floatx80_one packFloatx80(0, 0x3fff, 0x8000000000000000U)
|
||||
#define floatx80_default_nan packFloatx80(0, 0xffff, 0xffffffffffffffffU)
|
||||
|
||||
#define packFloat_128(zHi, zLo) {(zHi), (zLo)}
|
||||
#define PACK_FLOAT_128(hi,lo) packFloat_128(LIT64(hi),LIT64(lo))
|
||||
|
||||
#define EXP_BIAS 0x3FFF
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Returns the fraction bits of the extended double-precision floating-point
|
||||
| value `a'.
|
||||
*----------------------------------------------------------------------------*/
|
||||
|
||||
static inline bits64 extractFloatx80Frac( floatx80 a )
|
||||
{
|
||||
return a.low;
|
||||
|
||||
}
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Returns the exponent bits of the extended double-precision floating-point
|
||||
| value `a'.
|
||||
*----------------------------------------------------------------------------*/
|
||||
|
||||
static inline int32 extractFloatx80Exp( floatx80 a )
|
||||
{
|
||||
return a.high & 0x7FFF;
|
||||
|
||||
}
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Returns the sign bit of the extended double-precision floating-point value
|
||||
| `a'.
|
||||
*----------------------------------------------------------------------------*/
|
||||
|
||||
static inline flag extractFloatx80Sign( floatx80 a )
|
||||
{
|
||||
return a.high>>15;
|
||||
|
||||
}
|
||||
|
||||
#if 0
|
||||
/*----------------------------------------------------------------------------
|
||||
| Takes extended double-precision floating-point NaN `a' and returns the
|
||||
| appropriate NaN result. If `a' is a signaling NaN, the invalid exception
|
||||
| is raised.
|
||||
*----------------------------------------------------------------------------*/
|
||||
|
||||
static inline floatx80 propagateFloatx80NaNOneArg(floatx80 a)
|
||||
{
|
||||
if (floatx80_is_signaling_nan(a))
|
||||
float_raise(float_flag_invalid);
|
||||
|
||||
a.low |= 0xC000000000000000U;
|
||||
|
||||
return a;
|
||||
}
|
||||
#endif
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Normalizes the subnormal extended double-precision floating-point value
|
||||
| represented by the denormalized significand `aSig'. The normalized exponent
|
||||
| and significand are stored at the locations pointed to by `zExpPtr' and
|
||||
| `zSigPtr', respectively.
|
||||
*----------------------------------------------------------------------------*/
|
||||
|
||||
static inline void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr, uint64_t *zSigPtr)
|
||||
{
|
||||
int shiftCount = countLeadingZeros64(aSig);
|
||||
*zSigPtr = aSig<<shiftCount;
|
||||
*zExpPtr = 1 - shiftCount;
|
||||
}
|
||||
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Returns 1 if the extended double-precision floating-point value `a' is a
|
||||
| NaN; otherwise returns 0.
|
||||
*----------------------------------------------------------------------------*/
|
||||
|
||||
static inline int floatx80_is_nan(floatx80 a)
|
||||
{
|
||||
return ((a.high & 0x7FFF) == 0x7FFF) && (int64_t) (a.low<<1);
|
||||
}
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Takes two extended double-precision floating-point values `a' and `b', one
|
||||
| of which is a NaN, and returns the appropriate NaN result. If either `a' or
|
||||
| `b' is a signaling NaN, the invalid exception is raised.
|
||||
*----------------------------------------------------------------------------*/
|
||||
|
||||
static floatx80 propagateFloatx80NaN(floatx80 a, floatx80 b)
|
||||
{
|
||||
int aIsNaN = floatx80_is_nan(a);
|
||||
int aIsSignalingNaN = floatx80_is_signaling_nan(a);
|
||||
int bIsNaN = floatx80_is_nan(b);
|
||||
int bIsSignalingNaN = floatx80_is_signaling_nan(b);
|
||||
a.low |= 0xC000000000000000U;
|
||||
b.low |= 0xC000000000000000U;
|
||||
if (aIsSignalingNaN | bIsSignalingNaN) float_raise(float_flag_invalid);
|
||||
if (aIsSignalingNaN) {
|
||||
if (bIsSignalingNaN) goto returnLargerSignificand;
|
||||
return bIsNaN ? b : a;
|
||||
}
|
||||
else if (aIsNaN) {
|
||||
if (bIsSignalingNaN | ! bIsNaN) return a;
|
||||
returnLargerSignificand:
|
||||
if (a.low < b.low) return b;
|
||||
if (b.low < a.low) return a;
|
||||
return (a.high < b.high) ? a : b;
|
||||
}
|
||||
else {
|
||||
return b;
|
||||
}
|
||||
}
|
||||
|
||||
static const float128 float128_one =
|
||||
packFloat_128(0x3fff000000000000U, 0x0000000000000000U);
|
||||
static const float128 float128_two =
|
||||
packFloat_128(0x4000000000000000U, 0x0000000000000000U);
|
||||
|
||||
static const float128 float128_ln2inv2 =
|
||||
packFloat_128(0x400071547652b82fU, 0xe1777d0ffda0d23aU);
|
||||
|
||||
#define SQRT2_HALF_SIG 0xb504f333f9de6484U
|
||||
|
||||
extern float128 OddPoly(float128 x, float128 *arr, unsigned n);
|
||||
|
||||
#define L2_ARR_SIZE 9
|
||||
|
||||
static float128 ln_arr[L2_ARR_SIZE] =
|
||||
{
|
||||
PACK_FLOAT_128(0x3fff000000000000, 0x0000000000000000), /* 1 */
|
||||
PACK_FLOAT_128(0x3ffd555555555555, 0x5555555555555555), /* 3 */
|
||||
PACK_FLOAT_128(0x3ffc999999999999, 0x999999999999999a), /* 5 */
|
||||
PACK_FLOAT_128(0x3ffc249249249249, 0x2492492492492492), /* 7 */
|
||||
PACK_FLOAT_128(0x3ffbc71c71c71c71, 0xc71c71c71c71c71c), /* 9 */
|
||||
PACK_FLOAT_128(0x3ffb745d1745d174, 0x5d1745d1745d1746), /* 11 */
|
||||
PACK_FLOAT_128(0x3ffb3b13b13b13b1, 0x3b13b13b13b13b14), /* 13 */
|
||||
PACK_FLOAT_128(0x3ffb111111111111, 0x1111111111111111), /* 15 */
|
||||
PACK_FLOAT_128(0x3ffae1e1e1e1e1e1, 0xe1e1e1e1e1e1e1e2) /* 17 */
|
||||
};
|
||||
|
||||
static float128 poly_ln(float128 x1)
|
||||
{
|
||||
/*
|
||||
//
|
||||
// 3 5 7 9 11 13 15
|
||||
// 1+u u u u u u u u
|
||||
// 1/2 ln --- ~ u + --- + --- + --- + --- + ---- + ---- + ---- =
|
||||
// 1-u 3 5 7 9 11 13 15
|
||||
//
|
||||
// 2 4 6 8 10 12 14
|
||||
// u u u u u u u
|
||||
// = u * [ 1 + --- + --- + --- + --- + ---- + ---- + ---- ] =
|
||||
// 3 5 7 9 11 13 15
|
||||
//
|
||||
// 3 3
|
||||
// -- 4k -- 4k+2
|
||||
// p(u) = > C * u q(u) = > C * u
|
||||
// -- 2k -- 2k+1
|
||||
// k=0 k=0
|
||||
//
|
||||
// 1+u 2
|
||||
// 1/2 ln --- ~ u * [ p(u) + u * q(u) ]
|
||||
// 1-u
|
||||
//
|
||||
*/
|
||||
return OddPoly(x1, ln_arr, L2_ARR_SIZE);
|
||||
}
|
||||
|
||||
/* required sqrt(2)/2 < x < sqrt(2) */
|
||||
static float128 poly_l2(float128 x)
|
||||
{
|
||||
/* using float128 for approximation */
|
||||
float128 x_p1 = float128_add(x, float128_one);
|
||||
float128 x_m1 = float128_sub(x, float128_one);
|
||||
x = float128_div(x_m1, x_p1);
|
||||
x = poly_ln(x);
|
||||
x = float128_mul(x, float128_ln2inv2);
|
||||
return x;
|
||||
}
|
||||
|
||||
static float128 poly_l2p1(float128 x)
|
||||
{
|
||||
/* using float128 for approximation */
|
||||
float128 x_p2 = float128_add(x, float128_two);
|
||||
x = float128_div(x, x_p2);
|
||||
x = poly_ln(x);
|
||||
x = float128_mul(x, float128_ln2inv2);
|
||||
return x;
|
||||
}
|
||||
|
||||
// =================================================
|
||||
// FYL2X Compute y * log (x)
|
||||
// 2
|
||||
// =================================================
|
||||
|
||||
//
|
||||
// Uses the following identities:
|
||||
//
|
||||
// 1. ----------------------------------------------------------
|
||||
// ln(x)
|
||||
// log (x) = -------, ln (x*y) = ln(x) + ln(y)
|
||||
// 2 ln(2)
|
||||
//
|
||||
// 2. ----------------------------------------------------------
|
||||
// 1+u x-1
|
||||
// ln (x) = ln -----, when u = -----
|
||||
// 1-u x+1
|
||||
//
|
||||
// 3. ----------------------------------------------------------
|
||||
// 3 5 7 2n+1
|
||||
// 1+u u u u u
|
||||
// ln ----- = 2 [ u + --- + --- + --- + ... + ------ + ... ]
|
||||
// 1-u 3 5 7 2n+1
|
||||
//
|
||||
|
||||
static floatx80 fyl2x(floatx80 a, floatx80 b)
|
||||
{
|
||||
uint64_t aSig = extractFloatx80Frac(a);
|
||||
int32_t aExp = extractFloatx80Exp(a);
|
||||
int aSign = extractFloatx80Sign(a);
|
||||
uint64_t bSig = extractFloatx80Frac(b);
|
||||
int32_t bExp = extractFloatx80Exp(b);
|
||||
int bSign = extractFloatx80Sign(b);
|
||||
|
||||
int zSign = bSign ^ 1;
|
||||
|
||||
if (aExp == 0x7FFF) {
|
||||
if ((uint64_t) (aSig<<1)
|
||||
|| ((bExp == 0x7FFF) && (uint64_t) (bSig<<1)))
|
||||
{
|
||||
return propagateFloatx80NaN(a, b);
|
||||
}
|
||||
if (aSign)
|
||||
{
|
||||
invalid:
|
||||
float_raise(float_flag_invalid);
|
||||
return floatx80_default_nan;
|
||||
}
|
||||
else {
|
||||
if (bExp == 0) {
|
||||
if (bSig == 0) goto invalid;
|
||||
float_raise(float_flag_denormal);
|
||||
}
|
||||
return packFloatx80(bSign, 0x7FFF, 0x8000000000000000U);
|
||||
}
|
||||
}
|
||||
if (bExp == 0x7FFF)
|
||||
{
|
||||
if ((uint64_t) (bSig<<1)) return propagateFloatx80NaN(a, b);
|
||||
if (aSign && (uint64_t)(aExp | aSig)) goto invalid;
|
||||
if (aSig && (aExp == 0))
|
||||
float_raise(float_flag_denormal);
|
||||
if (aExp < 0x3FFF) {
|
||||
return packFloatx80(zSign, 0x7FFF, 0x8000000000000000U);
|
||||
}
|
||||
if (aExp == 0x3FFF && ((uint64_t) (aSig<<1) == 0)) goto invalid;
|
||||
return packFloatx80(bSign, 0x7FFF, 0x8000000000000000U);
|
||||
}
|
||||
if (aExp == 0) {
|
||||
if (aSig == 0) {
|
||||
if ((bExp | bSig) == 0) goto invalid;
|
||||
float_raise(float_flag_divbyzero);
|
||||
return packFloatx80(zSign, 0x7FFF, 0x8000000000000000U);
|
||||
}
|
||||
if (aSign) goto invalid;
|
||||
float_raise(float_flag_denormal);
|
||||
normalizeFloatx80Subnormal(aSig, &aExp, &aSig);
|
||||
}
|
||||
if (aSign) goto invalid;
|
||||
if (bExp == 0) {
|
||||
if (bSig == 0) {
|
||||
if (aExp < 0x3FFF) return packFloatx80(zSign, 0, 0);
|
||||
return packFloatx80(bSign, 0, 0);
|
||||
}
|
||||
float_raise(float_flag_denormal);
|
||||
normalizeFloatx80Subnormal(bSig, &bExp, &bSig);
|
||||
}
|
||||
if (aExp == 0x3FFF && ((uint64_t) (aSig<<1) == 0))
|
||||
return packFloatx80(bSign, 0, 0);
|
||||
|
||||
float_raise(float_flag_inexact);
|
||||
|
||||
int ExpDiff = aExp - 0x3FFF;
|
||||
aExp = 0;
|
||||
if (aSig >= SQRT2_HALF_SIG) {
|
||||
ExpDiff++;
|
||||
aExp--;
|
||||
}
|
||||
|
||||
/* ******************************** */
|
||||
/* using float128 for approximation */
|
||||
/* ******************************** */
|
||||
|
||||
uint64_t zSig0, zSig1;
|
||||
shift128Right(aSig<<1, 0, 16, &zSig0, &zSig1);
|
||||
float128 x = packFloat128(0, aExp+0x3FFF, zSig0, zSig1);
|
||||
x = poly_l2(x);
|
||||
x = float128_add(x, int64_to_float128((int64_t) ExpDiff));
|
||||
return floatx80_mul(b, float128_to_floatx80(x));
|
||||
}
|
||||
|
||||
// =================================================
|
||||
// FYL2XP1 Compute y * log (x + 1)
|
||||
// 2
|
||||
// =================================================
|
||||
|
||||
//
|
||||
// Uses the following identities:
|
||||
//
|
||||
// 1. ----------------------------------------------------------
|
||||
// ln(x)
|
||||
// log (x) = -------
|
||||
// 2 ln(2)
|
||||
//
|
||||
// 2. ----------------------------------------------------------
|
||||
// 1+u x
|
||||
// ln (x+1) = ln -----, when u = -----
|
||||
// 1-u x+2
|
||||
//
|
||||
// 3. ----------------------------------------------------------
|
||||
// 3 5 7 2n+1
|
||||
// 1+u u u u u
|
||||
// ln ----- = 2 [ u + --- + --- + --- + ... + ------ + ... ]
|
||||
// 1-u 3 5 7 2n+1
|
||||
//
|
||||
|
||||
floatx80 fyl2xp1(floatx80 a, floatx80 b)
|
||||
{
|
||||
int32_t aExp, bExp;
|
||||
uint64_t aSig, bSig, zSig0, zSig1, zSig2;
|
||||
int aSign, bSign;
|
||||
|
||||
aSig = extractFloatx80Frac(a);
|
||||
aExp = extractFloatx80Exp(a);
|
||||
aSign = extractFloatx80Sign(a);
|
||||
bSig = extractFloatx80Frac(b);
|
||||
bExp = extractFloatx80Exp(b);
|
||||
bSign = extractFloatx80Sign(b);
|
||||
int zSign = aSign ^ bSign;
|
||||
|
||||
if (aExp == 0x7FFF) {
|
||||
if ((uint64_t) (aSig<<1)
|
||||
|| ((bExp == 0x7FFF) && (uint64_t) (bSig<<1)))
|
||||
{
|
||||
return propagateFloatx80NaN(a, b);
|
||||
}
|
||||
if (aSign)
|
||||
{
|
||||
invalid:
|
||||
float_raise(float_flag_invalid);
|
||||
return floatx80_default_nan;
|
||||
}
|
||||
else {
|
||||
if (bExp == 0) {
|
||||
if (bSig == 0) goto invalid;
|
||||
float_raise(float_flag_denormal);
|
||||
}
|
||||
return packFloatx80(bSign, 0x7FFF, 0x8000000000000000U);
|
||||
}
|
||||
}
|
||||
if (bExp == 0x7FFF)
|
||||
{
|
||||
if ((uint64_t) (bSig<<1))
|
||||
return propagateFloatx80NaN(a, b);
|
||||
|
||||
if (aExp == 0) {
|
||||
if (aSig == 0) goto invalid;
|
||||
float_raise(float_flag_denormal);
|
||||
}
|
||||
|
||||
return packFloatx80(zSign, 0x7FFF, 0x8000000000000000U);
|
||||
}
|
||||
if (aExp == 0) {
|
||||
if (aSig == 0) {
|
||||
if (bSig && (bExp == 0)) float_raise(float_flag_denormal);
|
||||
return packFloatx80(zSign, 0, 0);
|
||||
}
|
||||
float_raise(float_flag_denormal);
|
||||
normalizeFloatx80Subnormal(aSig, &aExp, &aSig);
|
||||
}
|
||||
if (bExp == 0) {
|
||||
if (bSig == 0) return packFloatx80(zSign, 0, 0);
|
||||
float_raise(float_flag_denormal);
|
||||
normalizeFloatx80Subnormal(bSig, &bExp, &bSig);
|
||||
}
|
||||
|
||||
float_raise(float_flag_inexact);
|
||||
|
||||
if (aSign && aExp >= 0x3FFF)
|
||||
return a;
|
||||
|
||||
if (aExp >= 0x3FFC) // big argument
|
||||
{
|
||||
return fyl2x(floatx80_add(a, floatx80_one), b);
|
||||
}
|
||||
|
||||
// handle tiny argument
|
||||
if (aExp < EXP_BIAS-70)
|
||||
{
|
||||
// first order approximation, return (a*b)/ln(2)
|
||||
int32_t zExp = aExp + FLOAT_LN2INV_EXP - 0x3FFE;
|
||||
|
||||
mul128By64To192(FLOAT_LN2INV_HI, FLOAT_LN2INV_LO, aSig, &zSig0, &zSig1, &zSig2);
|
||||
if (0 < (int64_t) zSig0) {
|
||||
shortShift128Left(zSig0, zSig1, 1, &zSig0, &zSig1);
|
||||
--zExp;
|
||||
}
|
||||
|
||||
zExp = zExp + bExp - 0x3FFE;
|
||||
mul128By64To192(zSig0, zSig1, bSig, &zSig0, &zSig1, &zSig2);
|
||||
if (0 < (int64_t) zSig0) {
|
||||
shortShift128Left(zSig0, zSig1, 1, &zSig0, &zSig1);
|
||||
--zExp;
|
||||
}
|
||||
|
||||
return
|
||||
roundAndPackFloatx80(80, aSign ^ bSign, zExp, zSig0, zSig1);
|
||||
}
|
||||
|
||||
/* ******************************** */
|
||||
/* using float128 for approximation */
|
||||
/* ******************************** */
|
||||
|
||||
shift128Right(aSig<<1, 0, 16, &zSig0, &zSig1);
|
||||
float128 x = packFloat128(aSign, aExp, zSig0, zSig1);
|
||||
x = poly_l2p1(x);
|
||||
return floatx80_mul(b, float128_to_floatx80(x));
|
||||
}
|
||||
|
||||
floatx80 floatx80_flognp1(floatx80 a)
|
||||
{
|
||||
return fyl2xp1(a, floatx80_ln_2);
|
||||
}
|
||||
|
||||
floatx80 floatx80_flogn(floatx80 a)
|
||||
{
|
||||
return fyl2x(a, floatx80_ln_2);
|
||||
}
|
||||
|
||||
floatx80 floatx80_flog2(floatx80 a)
|
||||
{
|
||||
return fyl2x(a, floatx80_one);
|
||||
}
|
||||
|
||||
floatx80 floatx80_flog10(floatx80 a)
|
||||
{
|
||||
return fyl2x(a, floatx80_log10_2);
|
||||
}
|
||||
@@ -240,9 +240,9 @@ flag floatx80_le_quiet( floatx80, floatx80 );
|
||||
flag floatx80_lt_quiet( floatx80, floatx80 );
|
||||
flag floatx80_is_signaling_nan( floatx80 );
|
||||
|
||||
/* int floatx80_fsin(floatx80 &a);
|
||||
int floatx80_fcos(floatx80 &a);
|
||||
int floatx80_ftan(floatx80 &a); */
|
||||
int floatx80_fsin(floatx80 *a);
|
||||
int floatx80_fcos(floatx80 *a);
|
||||
int floatx80_ftan(floatx80 *a);
|
||||
|
||||
floatx80 floatx80_flognp1(floatx80 a);
|
||||
floatx80 floatx80_flogn(floatx80 a);
|
||||
|
||||
Reference in New Issue
Block a user