Add some more tests; rounding of 56-bit mantissa now works.

This commit is contained in:
Olaf Seibert 2021-01-19 18:12:13 +01:00
parent 4f2f813dee
commit 90943eaf49
3 changed files with 177 additions and 53 deletions

53
parse.c
View File

@ -318,7 +318,9 @@ int get_mode(
return TRUE;
}
#if DEBUG
#define DEBUG_FLOAT 0
#if DEBUG_FLOAT
void
printflt(unsigned *flt, int size)
{
@ -328,11 +330,15 @@ printflt(unsigned *flt, int size)
printf("ufrac: %02x", flt[0] & 0x007F);
for (int i = 1; i < size; i++) {
printf(" %04x", flt[i]);
printf(" %04x", flt[i]);
}
printf("\n");
}
#define DF(x) printf x
#else
#define DF(x)
#endif
/*
@ -392,10 +398,13 @@ int parse_float(
int sexp; /* Signed exponent */
unsigned uexp; /* Unsigned excess-128 exponent */
unsigned sign = 0; /* Sign mask */
unsigned round = 0; /* Rounding bit */
i = sscanf(cp, SCANF_FMT "%n", &d, &n);
if (i == 0)
return 0; /* Wasn't able to convert */
DF(("LDBL_MANT_DIG: %d\n", LDBL_MANT_DIG));
DF(("%Lf input: %s", d, cp));
cp += n;
if (endp)
@ -409,11 +418,13 @@ int parse_float(
}
frac = FREXP(d, &sexp); /* Separate into exponent and mantissa */
DF(("frac: %Lf %La sexp: %d\n", frac, frac, sexp));
if (sexp < -127 || sexp > 127)
return 0; /* Exponent out of range. */
uexp = sexp + 128; /* Make excess-128 mode */
uexp &= 0xff; /* express in 8 bits */
DF(("uexp: %02x\n", uexp));
/*
* frexp guarantees its fractional return value is
@ -435,34 +446,41 @@ int parse_float(
* However the bit immediately above the MSB is always 1
* because the value is normalized. So it's actually
* 8 bits, 24 bits, or 56 bits.
* We multiply the fractional part of our value by
* We effectively multiply the fractional part of our value by
* 2**56 to fully expose all of those bits (including
* the MSB which is 1).
* However as an intermediate step, we really multiply by
* 2**57, so we get one lsb for possible later rounding
* purposes. After that, we divide by 2 again.
*/
/* The following big literal is 2 to the 56th power: */
ufrac = (uint64_t) (frac * 72057594037927936.0); /* Align fraction bits */
/* The following big literal is 2 to the 57th power: */
ufrac = (uint64_t) (frac * 144115188075855872.0); /* Align fraction bits */
round = ufrac & 1;
ufrac >>= 1;
DF(("ufrac: %016lx round: %d\n", ufrac, round));
DF(("56 : %016lx\n", (1UL<<56) - 1));
/* ufrac is now >= 2**55 and < 2**56 */
/* Round from FLT4 to FLT2 or single-word float.
*
* +--+--------+-------+
* |15|14 7|6 0|
* +--+--------+-------+
* | S|EEEEEEEE|MMMMMMM|
* +--+--------+-------+
* +--+--------+-------+ +--------+--------+
* |15|14 7|6 0| |15 | 0|
* +--+--------+-------+ +--------+--------+
* | S|EEEEEEEE|MMMMMMM| |MMMMMMMM|MMMMMMMM| ...maybe 2 more words...
* +--+--------+-------+ +--------+--------+
* Sign (1 bit)
* Exponent (8 bits)
* Mantissa (7 bits)
*/
if (size < 4) {
/* Round to nearest 8- or 24- bit approximation */
/*
* Round to nearest 8- or 24- bit approximation.
*
* Method: if the msb of the bits that are chopped off is set, add 1 to
* the lsb of the remaining bits (one more leftward).
*
* Simplification: adding 1 to that msb will carry into the lsb. And if
* that msb is unset, then that will not carry. So we can always do that
* addition.
@ -470,12 +488,23 @@ int parse_float(
*/
int bits_chopped_off = 16 * (4 - size);
uint64_t msb_chopped_off = 1ULL << (bits_chopped_off - 1);
DF(("msb_chopped_off = %16lx\n", msb_chopped_off));
ufrac += msb_chopped_off;
DF(("ufrac: %016lx after rounding\n", ufrac));
if ((ufrac >> 56) > 0) { /* Overflow? */
ufrac >>= 1; /* Normalize */
uexp++;
DF(("ufrac: %016lx uexp: %02x\n", ufrac, uexp));
}
} else {
/*
* Round to 56-bit approximation.
*
* The method is the same as above, with 'round' as the msb of the
* chopped off bits.
*/
ufrac += round;
}
flt[0] = (unsigned) (sign | (uexp << 7) | ((ufrac >> 48) & 0x7F));

View File

@ -1,42 +1,94 @@
1 ;;;;;
2 ;
3 ; Test floating point numbers
4
5 000000 040200 .word ^F 1.0 ; 040200
6 000002 140200 .word ^F-1.0 ; 140200
7 000004 137600 .word -^F 1.0 ; 137600
8 000006 037600 .word -^F-1.0 ; 037600
9
10 000010 040706 .word ^F6.2 ; 040706
11 000012 137071 .word ^C^F6.2 ; 137071
12 000014 137071 .word ^C<^F6.2> ; 137071
13
14 000016 040706 063146 .flt2 6.2 ; 040706 063146
15 000022 040706 063146 063146 .flt4 6.2 ; 040706 063146 063146 063146
3 ; Test floating point numbers.
4 ;
5 ; The values in the comments are the reference values from MACRO V05.05.
6 ;
7 000000 040200 .word ^F 1.0 ; 040200
8 000002 140200 .word ^F-1.0 ; 140200
9 000004 137600 .word -^F 1.0 ; 137600
10 000006 037600 .word -^F-1.0 ; 037600
11
12 000010 040706 .word ^F6.2 ; 040706
13 000012 137071 .word ^C^F6.2 ; 137071
14 000014 137071 .word ^C<^F6.2> ; 137071
15
16 000016 040706 063146 .flt2 6.2 ; 040706 063146
17 000022 040706 063146 063146 .flt4 6.2 ; 040706 063146 063146 063146
000030 063146
16
17 000032 040300 000000 .flt2 1.5 ; 040300 000000
18 000036 040300 000000 000000 .flt4 1.5 ; 040300 000000 000000 000000
18
19 000032 040300 000000 .flt2 1.5 ; 040300 000000
20 000036 040300 000000 000000 .flt4 1.5 ; 040300 000000 000000 000000
000044 000000
19
20 000046 056200 .word ^F 72057594037927935 ; 056200 (rounded!)
21 000050 056200 000000 .flt2 72057594037927935 ; 056200 000000 (rounded!)
22 000054 056177 177777 177777 .flt4 72057594037927935 ; 056177 177777 177777 177777 (exact!)
000062 177777
23
24 000064 056200 .word ^F 72057594037927936 ; 056200
25 000066 056200 000000 .flt2 72057594037927936 ; 056200 000000
26 000072 056200 000000 000000 .flt4 72057594037927936 ; 056200 000000 000000 000000
000100 000000
27
28 000102 056200 000000 000000 .flt4 72057594037927937 ; 056200 000000 000000 000001
000110 000000
29
30 000112 056400 000000 000000 .flt4 144115188075855873 ; 1 << 57 +1
000120 000000
31 ; 056400 000000 000000 000000 (rounded!)
32
21
22 000046 040511 .word ^F 3.1415926535897932384626433 ; 040511
23 000050 040511 007733 .flt2 3.1415926535897932384626433 ; 040511 007733
24 000054 040511 007732 121041 .flt4 3.1415926535897932384626433 ; 040511 007732 121041 064302
000062 064302
25
26 ; Test some large numbers at the edge of the exactly representable
27 ; integers.
28 ; 1 << 56 just barely fits: it uses 57 bits but the lsb is 0 so
29 ; when it is cut off, we don't notice.
30 ; 1 << 56 + 1 R the lsb are 01 and is cut off.
31 ; 1 << 56 + 2 the lsb are 10 and the missing 0 goes unnoticed.
32
33 ; 1 << 56 - 1 consists of 56 1 bits. This value and all smaller ints
34 ; 1 << 56 - 2 get represented exactly.
35
36 ; Going up, to rounding steps of 4:
37 ;
38 ; 1 << 57
39 ; 1 << 57 + 4 next higher available representation
40 ; 1 << 57 + 8 next higher available representation
41
42 ; 1 << 56 - 3
43 000064 056177 177777 177777 .flt4 72057594037927933 ; 056177 177777 177777 177775
000072 177775
44 ; 1 << 56 - 2
45 000074 056177 177777 177777 .flt4 72057594037927934 ; 056177 177777 177777 177776
000102 177776
46
47 ; 1 << 56 - 1
48 000104 056200 .word ^F 72057594037927935 ; 056200 (rounded!)
49 000106 056200 000000 .flt2 72057594037927935 ; 056200 000000 (rounded!)
50 000112 056177 177777 177777 .flt4 72057594037927935 ; 056177 177777 177777 177777
000120 177777
51
52 ; 1 << 56
53 000122 056200 .word ^F 72057594037927936 ; 056200
54 000124 056200 000000 .flt2 72057594037927936 ; 056200 000000
55 000130 056200 000000 000000 .flt4 72057594037927936 ; 056200 000000 000000 000000
000136 000000
56
57 000140 056200 000000 000000 .flt4 72057594037927938 ; 1 << 56 + 2
000146 000001
58 ; 056200 000000 000000 000001
59
60 000150 056400 000000 000000 .flt4 144115188075855872 ; 1 << 57
000156 000000
61 ; 056400 000000 000000 000000
62 000160 056400 000000 000000 .flt4 144115188075855873 ; 1 << 57 + 1
000166 000000
63 ; 056400 000000 000000 000000 (inexact)
64 000170 056400 000000 000000 .flt4 144115188075855874 ; 1 << 57 + 2
000176 000001
65 ; 056400 000000 000000 000001 (inexact)
66 000200 056400 000000 000000 .flt4 144115188075855875 ; 1 << 57 + 3
000206 000001
67 ; 056400 000000 000000 000001 (inexact)
68 000210 056400 000000 000000 .flt4 144115188075855876 ; 1 << 57 + 4
000216 000001
69 ; 056400 000000 000000 000001 (exact!)
70 000220 056400 000000 000000 .flt4 144115188075855880 ; 1 << 57 + 8
000226 000002
71 ; 056400 000000 000000 000002 (exact!)
72
73 ; This one triggers rounding up (round == 1)
74 000230 040725 052507 055061 .flt4 6.66666 ; 040725 052507 055061 122276
000236 122276
75
75
Symbol table
@ -47,4 +99,4 @@ Symbol table
Program sections:
. ABS. 000000 000 (RW,I,GBL,ABS,OVR,NOSAV)
000122 001 (RW,I,LCL,REL,CON,NOSAV)
000240 001 (RW,I,LCL,REL,CON,NOSAV)

View File

@ -1,7 +1,9 @@
;;;;;
;
; Test floating point numbers
; Test floating point numbers.
;
; The values in the comments are the reference values from MACRO V05.05.
;
.word ^F 1.0 ; 040200
.word ^F-1.0 ; 140200
.word -^F 1.0 ; 137600
@ -17,16 +19,57 @@
.flt2 1.5 ; 040300 000000
.flt4 1.5 ; 040300 000000 000000 000000
.word ^F 3.1415926535897932384626433 ; 040511
.flt2 3.1415926535897932384626433 ; 040511 007733
.flt4 3.1415926535897932384626433 ; 040511 007732 121041 064302
; Test some large numbers at the edge of the exactly representable
; integers.
; 1 << 56 just barely fits: it uses 57 bits but the lsb is 0 so
; when it is cut off, we don't notice.
; 1 << 56 + 1 R the lsb are 01 and is cut off.
; 1 << 56 + 2 the lsb are 10 and the missing 0 goes unnoticed.
; 1 << 56 - 1 consists of 56 1 bits. This value and all smaller ints
; 1 << 56 - 2 get represented exactly.
; Going up, to rounding steps of 4:
;
; 1 << 57
; 1 << 57 + 4 next higher available representation
; 1 << 57 + 8 next higher available representation
; 1 << 56 - 3
.flt4 72057594037927933 ; 056177 177777 177777 177775
; 1 << 56 - 2
.flt4 72057594037927934 ; 056177 177777 177777 177776
; 1 << 56 - 1
.word ^F 72057594037927935 ; 056200 (rounded!)
.flt2 72057594037927935 ; 056200 000000 (rounded!)
.flt4 72057594037927935 ; 056177 177777 177777 177777 (exact!)
.flt4 72057594037927935 ; 056177 177777 177777 177777
; 1 << 56
.word ^F 72057594037927936 ; 056200
.flt2 72057594037927936 ; 056200 000000
.flt4 72057594037927936 ; 056200 000000 000000 000000
.flt4 72057594037927937 ; 056200 000000 000000 000001
.flt4 72057594037927938 ; 1 << 56 + 2
; 056200 000000 000000 000001
.flt4 144115188075855873 ; 1 << 57 +1
; 056400 000000 000000 000000 (rounded!)
.flt4 144115188075855872 ; 1 << 57
; 056400 000000 000000 000000
.flt4 144115188075855873 ; 1 << 57 + 1
; 056400 000000 000000 000000 (inexact)
.flt4 144115188075855874 ; 1 << 57 + 2
; 056400 000000 000000 000001 (inexact)
.flt4 144115188075855875 ; 1 << 57 + 3
; 056400 000000 000000 000001 (inexact)
.flt4 144115188075855876 ; 1 << 57 + 4
; 056400 000000 000000 000001 (exact!)
.flt4 144115188075855880 ; 1 << 57 + 8
; 056400 000000 000000 000002 (exact!)
; This one triggers rounding up (round == 1)
.flt4 6.66666 ; 040725 052507 055061 122276