mirror of
https://github.com/open-simh/simtools.git
synced 2026-01-14 07:39:37 +00:00
Add some more tests; rounding of 56-bit mantissa now works.
This commit is contained in:
parent
4f2f813dee
commit
90943eaf49
53
parse.c
53
parse.c
@ -318,7 +318,9 @@ int get_mode(
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
#if DEBUG
|
||||
#define DEBUG_FLOAT 0
|
||||
#if DEBUG_FLOAT
|
||||
|
||||
void
|
||||
printflt(unsigned *flt, int size)
|
||||
{
|
||||
@ -328,11 +330,15 @@ printflt(unsigned *flt, int size)
|
||||
printf("ufrac: %02x", flt[0] & 0x007F);
|
||||
|
||||
for (int i = 1; i < size; i++) {
|
||||
printf(" %04x", flt[i]);
|
||||
printf(" %04x", flt[i]);
|
||||
}
|
||||
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
#define DF(x) printf x
|
||||
#else
|
||||
#define DF(x)
|
||||
#endif
|
||||
|
||||
/*
|
||||
@ -392,10 +398,13 @@ int parse_float(
|
||||
int sexp; /* Signed exponent */
|
||||
unsigned uexp; /* Unsigned excess-128 exponent */
|
||||
unsigned sign = 0; /* Sign mask */
|
||||
unsigned round = 0; /* Rounding bit */
|
||||
|
||||
i = sscanf(cp, SCANF_FMT "%n", &d, &n);
|
||||
if (i == 0)
|
||||
return 0; /* Wasn't able to convert */
|
||||
DF(("LDBL_MANT_DIG: %d\n", LDBL_MANT_DIG));
|
||||
DF(("%Lf input: %s", d, cp));
|
||||
|
||||
cp += n;
|
||||
if (endp)
|
||||
@ -409,11 +418,13 @@ int parse_float(
|
||||
}
|
||||
|
||||
frac = FREXP(d, &sexp); /* Separate into exponent and mantissa */
|
||||
DF(("frac: %Lf %La sexp: %d\n", frac, frac, sexp));
|
||||
if (sexp < -127 || sexp > 127)
|
||||
return 0; /* Exponent out of range. */
|
||||
|
||||
uexp = sexp + 128; /* Make excess-128 mode */
|
||||
uexp &= 0xff; /* express in 8 bits */
|
||||
DF(("uexp: %02x\n", uexp));
|
||||
|
||||
/*
|
||||
* frexp guarantees its fractional return value is
|
||||
@ -435,34 +446,41 @@ int parse_float(
|
||||
* However the bit immediately above the MSB is always 1
|
||||
* because the value is normalized. So it's actually
|
||||
* 8 bits, 24 bits, or 56 bits.
|
||||
* We multiply the fractional part of our value by
|
||||
* We effectively multiply the fractional part of our value by
|
||||
* 2**56 to fully expose all of those bits (including
|
||||
* the MSB which is 1).
|
||||
* However as an intermediate step, we really multiply by
|
||||
* 2**57, so we get one lsb for possible later rounding
|
||||
* purposes. After that, we divide by 2 again.
|
||||
*/
|
||||
|
||||
|
||||
/* The following big literal is 2 to the 56th power: */
|
||||
ufrac = (uint64_t) (frac * 72057594037927936.0); /* Align fraction bits */
|
||||
/* The following big literal is 2 to the 57th power: */
|
||||
ufrac = (uint64_t) (frac * 144115188075855872.0); /* Align fraction bits */
|
||||
round = ufrac & 1;
|
||||
ufrac >>= 1;
|
||||
DF(("ufrac: %016lx round: %d\n", ufrac, round));
|
||||
DF(("56 : %016lx\n", (1UL<<56) - 1));
|
||||
|
||||
/* ufrac is now >= 2**55 and < 2**56 */
|
||||
|
||||
/* Round from FLT4 to FLT2 or single-word float.
|
||||
*
|
||||
* +--+--------+-------+
|
||||
* |15|14 7|6 0|
|
||||
* +--+--------+-------+
|
||||
* | S|EEEEEEEE|MMMMMMM|
|
||||
* +--+--------+-------+
|
||||
* +--+--------+-------+ +--------+--------+
|
||||
* |15|14 7|6 0| |15 | 0|
|
||||
* +--+--------+-------+ +--------+--------+
|
||||
* | S|EEEEEEEE|MMMMMMM| |MMMMMMMM|MMMMMMMM| ...maybe 2 more words...
|
||||
* +--+--------+-------+ +--------+--------+
|
||||
* Sign (1 bit)
|
||||
* Exponent (8 bits)
|
||||
* Mantissa (7 bits)
|
||||
*/
|
||||
if (size < 4) {
|
||||
/* Round to nearest 8- or 24- bit approximation */
|
||||
/*
|
||||
* Round to nearest 8- or 24- bit approximation.
|
||||
*
|
||||
* Method: if the msb of the bits that are chopped off is set, add 1 to
|
||||
* the lsb of the remaining bits (one more leftward).
|
||||
*
|
||||
* Simplification: adding 1 to that msb will carry into the lsb. And if
|
||||
* that msb is unset, then that will not carry. So we can always do that
|
||||
* addition.
|
||||
@ -470,12 +488,23 @@ int parse_float(
|
||||
*/
|
||||
int bits_chopped_off = 16 * (4 - size);
|
||||
uint64_t msb_chopped_off = 1ULL << (bits_chopped_off - 1);
|
||||
DF(("msb_chopped_off = %16lx\n", msb_chopped_off));
|
||||
ufrac += msb_chopped_off;
|
||||
DF(("ufrac: %016lx after rounding\n", ufrac));
|
||||
|
||||
if ((ufrac >> 56) > 0) { /* Overflow? */
|
||||
ufrac >>= 1; /* Normalize */
|
||||
uexp++;
|
||||
DF(("ufrac: %016lx uexp: %02x\n", ufrac, uexp));
|
||||
}
|
||||
} else {
|
||||
/*
|
||||
* Round to 56-bit approximation.
|
||||
*
|
||||
* The method is the same as above, with 'round' as the msb of the
|
||||
* chopped off bits.
|
||||
*/
|
||||
ufrac += round;
|
||||
}
|
||||
|
||||
flt[0] = (unsigned) (sign | (uexp << 7) | ((ufrac >> 48) & 0x7F));
|
||||
|
||||
@ -1,42 +1,94 @@
|
||||
1 ;;;;;
|
||||
2 ;
|
||||
3 ; Test floating point numbers
|
||||
4
|
||||
5 000000 040200 .word ^F 1.0 ; 040200
|
||||
6 000002 140200 .word ^F-1.0 ; 140200
|
||||
7 000004 137600 .word -^F 1.0 ; 137600
|
||||
8 000006 037600 .word -^F-1.0 ; 037600
|
||||
9
|
||||
10 000010 040706 .word ^F6.2 ; 040706
|
||||
11 000012 137071 .word ^C^F6.2 ; 137071
|
||||
12 000014 137071 .word ^C<^F6.2> ; 137071
|
||||
13
|
||||
14 000016 040706 063146 .flt2 6.2 ; 040706 063146
|
||||
15 000022 040706 063146 063146 .flt4 6.2 ; 040706 063146 063146 063146
|
||||
3 ; Test floating point numbers.
|
||||
4 ;
|
||||
5 ; The values in the comments are the reference values from MACRO V05.05.
|
||||
6 ;
|
||||
7 000000 040200 .word ^F 1.0 ; 040200
|
||||
8 000002 140200 .word ^F-1.0 ; 140200
|
||||
9 000004 137600 .word -^F 1.0 ; 137600
|
||||
10 000006 037600 .word -^F-1.0 ; 037600
|
||||
11
|
||||
12 000010 040706 .word ^F6.2 ; 040706
|
||||
13 000012 137071 .word ^C^F6.2 ; 137071
|
||||
14 000014 137071 .word ^C<^F6.2> ; 137071
|
||||
15
|
||||
16 000016 040706 063146 .flt2 6.2 ; 040706 063146
|
||||
17 000022 040706 063146 063146 .flt4 6.2 ; 040706 063146 063146 063146
|
||||
000030 063146
|
||||
16
|
||||
17 000032 040300 000000 .flt2 1.5 ; 040300 000000
|
||||
18 000036 040300 000000 000000 .flt4 1.5 ; 040300 000000 000000 000000
|
||||
18
|
||||
19 000032 040300 000000 .flt2 1.5 ; 040300 000000
|
||||
20 000036 040300 000000 000000 .flt4 1.5 ; 040300 000000 000000 000000
|
||||
000044 000000
|
||||
19
|
||||
20 000046 056200 .word ^F 72057594037927935 ; 056200 (rounded!)
|
||||
21 000050 056200 000000 .flt2 72057594037927935 ; 056200 000000 (rounded!)
|
||||
22 000054 056177 177777 177777 .flt4 72057594037927935 ; 056177 177777 177777 177777 (exact!)
|
||||
000062 177777
|
||||
23
|
||||
24 000064 056200 .word ^F 72057594037927936 ; 056200
|
||||
25 000066 056200 000000 .flt2 72057594037927936 ; 056200 000000
|
||||
26 000072 056200 000000 000000 .flt4 72057594037927936 ; 056200 000000 000000 000000
|
||||
000100 000000
|
||||
27
|
||||
28 000102 056200 000000 000000 .flt4 72057594037927937 ; 056200 000000 000000 000001
|
||||
000110 000000
|
||||
29
|
||||
30 000112 056400 000000 000000 .flt4 144115188075855873 ; 1 << 57 +1
|
||||
000120 000000
|
||||
31 ; 056400 000000 000000 000000 (rounded!)
|
||||
32
|
||||
21
|
||||
22 000046 040511 .word ^F 3.1415926535897932384626433 ; 040511
|
||||
23 000050 040511 007733 .flt2 3.1415926535897932384626433 ; 040511 007733
|
||||
24 000054 040511 007732 121041 .flt4 3.1415926535897932384626433 ; 040511 007732 121041 064302
|
||||
000062 064302
|
||||
25
|
||||
26 ; Test some large numbers at the edge of the exactly representable
|
||||
27 ; integers.
|
||||
28 ; 1 << 56 just barely fits: it uses 57 bits but the lsb is 0 so
|
||||
29 ; when it is cut off, we don't notice.
|
||||
30 ; 1 << 56 + 1 R the lsb are 01 and is cut off.
|
||||
31 ; 1 << 56 + 2 the lsb are 10 and the missing 0 goes unnoticed.
|
||||
32
|
||||
33 ; 1 << 56 - 1 consists of 56 1 bits. This value and all smaller ints
|
||||
34 ; 1 << 56 - 2 get represented exactly.
|
||||
35
|
||||
36 ; Going up, to rounding steps of 4:
|
||||
37 ;
|
||||
38 ; 1 << 57
|
||||
39 ; 1 << 57 + 4 next higher available representation
|
||||
40 ; 1 << 57 + 8 next higher available representation
|
||||
41
|
||||
42 ; 1 << 56 - 3
|
||||
43 000064 056177 177777 177777 .flt4 72057594037927933 ; 056177 177777 177777 177775
|
||||
000072 177775
|
||||
44 ; 1 << 56 - 2
|
||||
45 000074 056177 177777 177777 .flt4 72057594037927934 ; 056177 177777 177777 177776
|
||||
000102 177776
|
||||
46
|
||||
47 ; 1 << 56 - 1
|
||||
48 000104 056200 .word ^F 72057594037927935 ; 056200 (rounded!)
|
||||
49 000106 056200 000000 .flt2 72057594037927935 ; 056200 000000 (rounded!)
|
||||
50 000112 056177 177777 177777 .flt4 72057594037927935 ; 056177 177777 177777 177777
|
||||
000120 177777
|
||||
51
|
||||
52 ; 1 << 56
|
||||
53 000122 056200 .word ^F 72057594037927936 ; 056200
|
||||
54 000124 056200 000000 .flt2 72057594037927936 ; 056200 000000
|
||||
55 000130 056200 000000 000000 .flt4 72057594037927936 ; 056200 000000 000000 000000
|
||||
000136 000000
|
||||
56
|
||||
57 000140 056200 000000 000000 .flt4 72057594037927938 ; 1 << 56 + 2
|
||||
000146 000001
|
||||
58 ; 056200 000000 000000 000001
|
||||
59
|
||||
60 000150 056400 000000 000000 .flt4 144115188075855872 ; 1 << 57
|
||||
000156 000000
|
||||
61 ; 056400 000000 000000 000000
|
||||
62 000160 056400 000000 000000 .flt4 144115188075855873 ; 1 << 57 + 1
|
||||
000166 000000
|
||||
63 ; 056400 000000 000000 000000 (inexact)
|
||||
64 000170 056400 000000 000000 .flt4 144115188075855874 ; 1 << 57 + 2
|
||||
000176 000001
|
||||
65 ; 056400 000000 000000 000001 (inexact)
|
||||
66 000200 056400 000000 000000 .flt4 144115188075855875 ; 1 << 57 + 3
|
||||
000206 000001
|
||||
67 ; 056400 000000 000000 000001 (inexact)
|
||||
68 000210 056400 000000 000000 .flt4 144115188075855876 ; 1 << 57 + 4
|
||||
000216 000001
|
||||
69 ; 056400 000000 000000 000001 (exact!)
|
||||
70 000220 056400 000000 000000 .flt4 144115188075855880 ; 1 << 57 + 8
|
||||
000226 000002
|
||||
71 ; 056400 000000 000000 000002 (exact!)
|
||||
72
|
||||
73 ; This one triggers rounding up (round == 1)
|
||||
74 000230 040725 052507 055061 .flt4 6.66666 ; 040725 052507 055061 122276
|
||||
000236 122276
|
||||
75
|
||||
75
|
||||
|
||||
|
||||
Symbol table
|
||||
@ -47,4 +99,4 @@ Symbol table
|
||||
Program sections:
|
||||
|
||||
. ABS. 000000 000 (RW,I,GBL,ABS,OVR,NOSAV)
|
||||
000122 001 (RW,I,LCL,REL,CON,NOSAV)
|
||||
000240 001 (RW,I,LCL,REL,CON,NOSAV)
|
||||
|
||||
@ -1,7 +1,9 @@
|
||||
;;;;;
|
||||
;
|
||||
; Test floating point numbers
|
||||
|
||||
; Test floating point numbers.
|
||||
;
|
||||
; The values in the comments are the reference values from MACRO V05.05.
|
||||
;
|
||||
.word ^F 1.0 ; 040200
|
||||
.word ^F-1.0 ; 140200
|
||||
.word -^F 1.0 ; 137600
|
||||
@ -17,16 +19,57 @@
|
||||
.flt2 1.5 ; 040300 000000
|
||||
.flt4 1.5 ; 040300 000000 000000 000000
|
||||
|
||||
.word ^F 3.1415926535897932384626433 ; 040511
|
||||
.flt2 3.1415926535897932384626433 ; 040511 007733
|
||||
.flt4 3.1415926535897932384626433 ; 040511 007732 121041 064302
|
||||
|
||||
; Test some large numbers at the edge of the exactly representable
|
||||
; integers.
|
||||
; 1 << 56 just barely fits: it uses 57 bits but the lsb is 0 so
|
||||
; when it is cut off, we don't notice.
|
||||
; 1 << 56 + 1 R the lsb are 01 and is cut off.
|
||||
; 1 << 56 + 2 the lsb are 10 and the missing 0 goes unnoticed.
|
||||
|
||||
; 1 << 56 - 1 consists of 56 1 bits. This value and all smaller ints
|
||||
; 1 << 56 - 2 get represented exactly.
|
||||
|
||||
; Going up, to rounding steps of 4:
|
||||
;
|
||||
; 1 << 57
|
||||
; 1 << 57 + 4 next higher available representation
|
||||
; 1 << 57 + 8 next higher available representation
|
||||
|
||||
; 1 << 56 - 3
|
||||
.flt4 72057594037927933 ; 056177 177777 177777 177775
|
||||
; 1 << 56 - 2
|
||||
.flt4 72057594037927934 ; 056177 177777 177777 177776
|
||||
|
||||
; 1 << 56 - 1
|
||||
.word ^F 72057594037927935 ; 056200 (rounded!)
|
||||
.flt2 72057594037927935 ; 056200 000000 (rounded!)
|
||||
.flt4 72057594037927935 ; 056177 177777 177777 177777 (exact!)
|
||||
.flt4 72057594037927935 ; 056177 177777 177777 177777
|
||||
|
||||
; 1 << 56
|
||||
.word ^F 72057594037927936 ; 056200
|
||||
.flt2 72057594037927936 ; 056200 000000
|
||||
.flt4 72057594037927936 ; 056200 000000 000000 000000
|
||||
|
||||
.flt4 72057594037927937 ; 056200 000000 000000 000001
|
||||
.flt4 72057594037927938 ; 1 << 56 + 2
|
||||
; 056200 000000 000000 000001
|
||||
|
||||
.flt4 144115188075855873 ; 1 << 57 +1
|
||||
; 056400 000000 000000 000000 (rounded!)
|
||||
.flt4 144115188075855872 ; 1 << 57
|
||||
; 056400 000000 000000 000000
|
||||
.flt4 144115188075855873 ; 1 << 57 + 1
|
||||
; 056400 000000 000000 000000 (inexact)
|
||||
.flt4 144115188075855874 ; 1 << 57 + 2
|
||||
; 056400 000000 000000 000001 (inexact)
|
||||
.flt4 144115188075855875 ; 1 << 57 + 3
|
||||
; 056400 000000 000000 000001 (inexact)
|
||||
.flt4 144115188075855876 ; 1 << 57 + 4
|
||||
; 056400 000000 000000 000001 (exact!)
|
||||
.flt4 144115188075855880 ; 1 << 57 + 8
|
||||
; 056400 000000 000000 000002 (exact!)
|
||||
|
||||
; This one triggers rounding up (round == 1)
|
||||
.flt4 6.66666 ; 040725 052507 055061 122276
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user