diff --git a/parse.c b/parse.c index 3d97f61..f24a768 100644 --- a/parse.c +++ b/parse.c @@ -318,7 +318,9 @@ int get_mode( return TRUE; } -#if DEBUG +#define DEBUG_FLOAT 0 +#if DEBUG_FLOAT + void printflt(unsigned *flt, int size) { @@ -328,11 +330,15 @@ printflt(unsigned *flt, int size) printf("ufrac: %02x", flt[0] & 0x007F); for (int i = 1; i < size; i++) { - printf(" %04x", flt[i]); + printf(" %04x", flt[i]); } printf("\n"); } + +#define DF(x) printf x +#else +#define DF(x) #endif /* @@ -392,10 +398,13 @@ int parse_float( int sexp; /* Signed exponent */ unsigned uexp; /* Unsigned excess-128 exponent */ unsigned sign = 0; /* Sign mask */ + unsigned round = 0; /* Rounding bit */ i = sscanf(cp, SCANF_FMT "%n", &d, &n); if (i == 0) return 0; /* Wasn't able to convert */ + DF(("LDBL_MANT_DIG: %d\n", LDBL_MANT_DIG)); + DF(("%Lf input: %s", d, cp)); cp += n; if (endp) @@ -409,11 +418,13 @@ int parse_float( } frac = FREXP(d, &sexp); /* Separate into exponent and mantissa */ + DF(("frac: %Lf %La sexp: %d\n", frac, frac, sexp)); if (sexp < -127 || sexp > 127) return 0; /* Exponent out of range. */ uexp = sexp + 128; /* Make excess-128 mode */ uexp &= 0xff; /* express in 8 bits */ + DF(("uexp: %02x\n", uexp)); /* * frexp guarantees its fractional return value is @@ -435,34 +446,41 @@ int parse_float( * However the bit immediately above the MSB is always 1 * because the value is normalized. So it's actually * 8 bits, 24 bits, or 56 bits. - * We multiply the fractional part of our value by + * We effectively multiply the fractional part of our value by * 2**56 to fully expose all of those bits (including * the MSB which is 1). + * However as an intermediate step, we really multiply by + * 2**57, so we get one lsb for possible later rounding + * purposes. After that, we divide by 2 again. */ - - /* The following big literal is 2 to the 56th power: */ - ufrac = (uint64_t) (frac * 72057594037927936.0); /* Align fraction bits */ + /* The following big literal is 2 to the 57th power: */ + ufrac = (uint64_t) (frac * 144115188075855872.0); /* Align fraction bits */ + round = ufrac & 1; + ufrac >>= 1; + DF(("ufrac: %016lx round: %d\n", ufrac, round)); + DF(("56 : %016lx\n", (1UL<<56) - 1)); /* ufrac is now >= 2**55 and < 2**56 */ /* Round from FLT4 to FLT2 or single-word float. * - * +--+--------+-------+ - * |15|14 7|6 0| - * +--+--------+-------+ - * | S|EEEEEEEE|MMMMMMM| - * +--+--------+-------+ + * +--+--------+-------+ +--------+--------+ + * |15|14 7|6 0| |15 | 0| + * +--+--------+-------+ +--------+--------+ + * | S|EEEEEEEE|MMMMMMM| |MMMMMMMM|MMMMMMMM| ...maybe 2 more words... + * +--+--------+-------+ +--------+--------+ * Sign (1 bit) * Exponent (8 bits) * Mantissa (7 bits) */ if (size < 4) { - /* Round to nearest 8- or 24- bit approximation */ /* * Round to nearest 8- or 24- bit approximation. + * * Method: if the msb of the bits that are chopped off is set, add 1 to * the lsb of the remaining bits (one more leftward). + * * Simplification: adding 1 to that msb will carry into the lsb. And if * that msb is unset, then that will not carry. So we can always do that * addition. @@ -470,12 +488,23 @@ int parse_float( */ int bits_chopped_off = 16 * (4 - size); uint64_t msb_chopped_off = 1ULL << (bits_chopped_off - 1); + DF(("msb_chopped_off = %16lx\n", msb_chopped_off)); ufrac += msb_chopped_off; + DF(("ufrac: %016lx after rounding\n", ufrac)); if ((ufrac >> 56) > 0) { /* Overflow? */ ufrac >>= 1; /* Normalize */ uexp++; + DF(("ufrac: %016lx uexp: %02x\n", ufrac, uexp)); } + } else { + /* + * Round to 56-bit approximation. + * + * The method is the same as above, with 'round' as the msb of the + * chopped off bits. + */ + ufrac += round; } flt[0] = (unsigned) (sign | (uexp << 7) | ((ufrac >> 48) & 0x7F)); diff --git a/tests/test-float.lst.ok b/tests/test-float.lst.ok index 620b658..76663bd 100644 --- a/tests/test-float.lst.ok +++ b/tests/test-float.lst.ok @@ -1,42 +1,94 @@ 1 ;;;;; 2 ; - 3 ; Test floating point numbers - 4 - 5 000000 040200 .word ^F 1.0 ; 040200 - 6 000002 140200 .word ^F-1.0 ; 140200 - 7 000004 137600 .word -^F 1.0 ; 137600 - 8 000006 037600 .word -^F-1.0 ; 037600 - 9 - 10 000010 040706 .word ^F6.2 ; 040706 - 11 000012 137071 .word ^C^F6.2 ; 137071 - 12 000014 137071 .word ^C<^F6.2> ; 137071 - 13 - 14 000016 040706 063146 .flt2 6.2 ; 040706 063146 - 15 000022 040706 063146 063146 .flt4 6.2 ; 040706 063146 063146 063146 + 3 ; Test floating point numbers. + 4 ; + 5 ; The values in the comments are the reference values from MACRO V05.05. + 6 ; + 7 000000 040200 .word ^F 1.0 ; 040200 + 8 000002 140200 .word ^F-1.0 ; 140200 + 9 000004 137600 .word -^F 1.0 ; 137600 + 10 000006 037600 .word -^F-1.0 ; 037600 + 11 + 12 000010 040706 .word ^F6.2 ; 040706 + 13 000012 137071 .word ^C^F6.2 ; 137071 + 14 000014 137071 .word ^C<^F6.2> ; 137071 + 15 + 16 000016 040706 063146 .flt2 6.2 ; 040706 063146 + 17 000022 040706 063146 063146 .flt4 6.2 ; 040706 063146 063146 063146 000030 063146 - 16 - 17 000032 040300 000000 .flt2 1.5 ; 040300 000000 - 18 000036 040300 000000 000000 .flt4 1.5 ; 040300 000000 000000 000000 + 18 + 19 000032 040300 000000 .flt2 1.5 ; 040300 000000 + 20 000036 040300 000000 000000 .flt4 1.5 ; 040300 000000 000000 000000 000044 000000 - 19 - 20 000046 056200 .word ^F 72057594037927935 ; 056200 (rounded!) - 21 000050 056200 000000 .flt2 72057594037927935 ; 056200 000000 (rounded!) - 22 000054 056177 177777 177777 .flt4 72057594037927935 ; 056177 177777 177777 177777 (exact!) - 000062 177777 - 23 - 24 000064 056200 .word ^F 72057594037927936 ; 056200 - 25 000066 056200 000000 .flt2 72057594037927936 ; 056200 000000 - 26 000072 056200 000000 000000 .flt4 72057594037927936 ; 056200 000000 000000 000000 - 000100 000000 - 27 - 28 000102 056200 000000 000000 .flt4 72057594037927937 ; 056200 000000 000000 000001 - 000110 000000 - 29 - 30 000112 056400 000000 000000 .flt4 144115188075855873 ; 1 << 57 +1 - 000120 000000 - 31 ; 056400 000000 000000 000000 (rounded!) - 32 + 21 + 22 000046 040511 .word ^F 3.1415926535897932384626433 ; 040511 + 23 000050 040511 007733 .flt2 3.1415926535897932384626433 ; 040511 007733 + 24 000054 040511 007732 121041 .flt4 3.1415926535897932384626433 ; 040511 007732 121041 064302 + 000062 064302 + 25 + 26 ; Test some large numbers at the edge of the exactly representable + 27 ; integers. + 28 ; 1 << 56 just barely fits: it uses 57 bits but the lsb is 0 so + 29 ; when it is cut off, we don't notice. + 30 ; 1 << 56 + 1 R the lsb are 01 and is cut off. + 31 ; 1 << 56 + 2 the lsb are 10 and the missing 0 goes unnoticed. 32 + 33 ; 1 << 56 - 1 consists of 56 1 bits. This value and all smaller ints + 34 ; 1 << 56 - 2 get represented exactly. + 35 + 36 ; Going up, to rounding steps of 4: + 37 ; + 38 ; 1 << 57 + 39 ; 1 << 57 + 4 next higher available representation + 40 ; 1 << 57 + 8 next higher available representation + 41 + 42 ; 1 << 56 - 3 + 43 000064 056177 177777 177777 .flt4 72057594037927933 ; 056177 177777 177777 177775 + 000072 177775 + 44 ; 1 << 56 - 2 + 45 000074 056177 177777 177777 .flt4 72057594037927934 ; 056177 177777 177777 177776 + 000102 177776 + 46 + 47 ; 1 << 56 - 1 + 48 000104 056200 .word ^F 72057594037927935 ; 056200 (rounded!) + 49 000106 056200 000000 .flt2 72057594037927935 ; 056200 000000 (rounded!) + 50 000112 056177 177777 177777 .flt4 72057594037927935 ; 056177 177777 177777 177777 + 000120 177777 + 51 + 52 ; 1 << 56 + 53 000122 056200 .word ^F 72057594037927936 ; 056200 + 54 000124 056200 000000 .flt2 72057594037927936 ; 056200 000000 + 55 000130 056200 000000 000000 .flt4 72057594037927936 ; 056200 000000 000000 000000 + 000136 000000 + 56 + 57 000140 056200 000000 000000 .flt4 72057594037927938 ; 1 << 56 + 2 + 000146 000001 + 58 ; 056200 000000 000000 000001 + 59 + 60 000150 056400 000000 000000 .flt4 144115188075855872 ; 1 << 57 + 000156 000000 + 61 ; 056400 000000 000000 000000 + 62 000160 056400 000000 000000 .flt4 144115188075855873 ; 1 << 57 + 1 + 000166 000000 + 63 ; 056400 000000 000000 000000 (inexact) + 64 000170 056400 000000 000000 .flt4 144115188075855874 ; 1 << 57 + 2 + 000176 000001 + 65 ; 056400 000000 000000 000001 (inexact) + 66 000200 056400 000000 000000 .flt4 144115188075855875 ; 1 << 57 + 3 + 000206 000001 + 67 ; 056400 000000 000000 000001 (inexact) + 68 000210 056400 000000 000000 .flt4 144115188075855876 ; 1 << 57 + 4 + 000216 000001 + 69 ; 056400 000000 000000 000001 (exact!) + 70 000220 056400 000000 000000 .flt4 144115188075855880 ; 1 << 57 + 8 + 000226 000002 + 71 ; 056400 000000 000000 000002 (exact!) + 72 + 73 ; This one triggers rounding up (round == 1) + 74 000230 040725 052507 055061 .flt4 6.66666 ; 040725 052507 055061 122276 + 000236 122276 + 75 + 75 Symbol table @@ -47,4 +99,4 @@ Symbol table Program sections: . ABS. 000000 000 (RW,I,GBL,ABS,OVR,NOSAV) - 000122 001 (RW,I,LCL,REL,CON,NOSAV) + 000240 001 (RW,I,LCL,REL,CON,NOSAV) diff --git a/tests/test-float.mac b/tests/test-float.mac index 3159c9f..1898b18 100644 --- a/tests/test-float.mac +++ b/tests/test-float.mac @@ -1,7 +1,9 @@ ;;;;; ; -; Test floating point numbers - +; Test floating point numbers. +; +; The values in the comments are the reference values from MACRO V05.05. +; .word ^F 1.0 ; 040200 .word ^F-1.0 ; 140200 .word -^F 1.0 ; 137600 @@ -17,16 +19,57 @@ .flt2 1.5 ; 040300 000000 .flt4 1.5 ; 040300 000000 000000 000000 + .word ^F 3.1415926535897932384626433 ; 040511 + .flt2 3.1415926535897932384626433 ; 040511 007733 + .flt4 3.1415926535897932384626433 ; 040511 007732 121041 064302 + + ; Test some large numbers at the edge of the exactly representable + ; integers. + ; 1 << 56 just barely fits: it uses 57 bits but the lsb is 0 so + ; when it is cut off, we don't notice. + ; 1 << 56 + 1 R the lsb are 01 and is cut off. + ; 1 << 56 + 2 the lsb are 10 and the missing 0 goes unnoticed. + + ; 1 << 56 - 1 consists of 56 1 bits. This value and all smaller ints + ; 1 << 56 - 2 get represented exactly. + + ; Going up, to rounding steps of 4: + ; + ; 1 << 57 + ; 1 << 57 + 4 next higher available representation + ; 1 << 57 + 8 next higher available representation + + ; 1 << 56 - 3 + .flt4 72057594037927933 ; 056177 177777 177777 177775 + ; 1 << 56 - 2 + .flt4 72057594037927934 ; 056177 177777 177777 177776 + + ; 1 << 56 - 1 .word ^F 72057594037927935 ; 056200 (rounded!) .flt2 72057594037927935 ; 056200 000000 (rounded!) - .flt4 72057594037927935 ; 056177 177777 177777 177777 (exact!) + .flt4 72057594037927935 ; 056177 177777 177777 177777 + ; 1 << 56 .word ^F 72057594037927936 ; 056200 .flt2 72057594037927936 ; 056200 000000 .flt4 72057594037927936 ; 056200 000000 000000 000000 - .flt4 72057594037927937 ; 056200 000000 000000 000001 + .flt4 72057594037927938 ; 1 << 56 + 2 + ; 056200 000000 000000 000001 - .flt4 144115188075855873 ; 1 << 57 +1 - ; 056400 000000 000000 000000 (rounded!) + .flt4 144115188075855872 ; 1 << 57 + ; 056400 000000 000000 000000 + .flt4 144115188075855873 ; 1 << 57 + 1 + ; 056400 000000 000000 000000 (inexact) + .flt4 144115188075855874 ; 1 << 57 + 2 + ; 056400 000000 000000 000001 (inexact) + .flt4 144115188075855875 ; 1 << 57 + 3 + ; 056400 000000 000000 000001 (inexact) + .flt4 144115188075855876 ; 1 << 57 + 4 + ; 056400 000000 000000 000001 (exact!) + .flt4 144115188075855880 ; 1 << 57 + 8 + ; 056400 000000 000000 000002 (exact!) + + ; This one triggers rounding up (round == 1) + .flt4 6.66666 ; 040725 052507 055061 122276