Parse floats using integers (64 bits) only.

The algorithm is close to what the reference version is doing.
This commit is contained in:
Olaf Seibert
2022-06-19 14:56:14 +02:00
parent bb56fc33f1
commit 3109f40bd5
3 changed files with 315 additions and 53 deletions

287
parse.c
View File

@@ -417,7 +417,6 @@ int get_fp_src_mode(
}
#define DEBUG_FLOAT 0
#if DEBUG_FLOAT
void
printflt(unsigned *flt, int size)
@@ -434,9 +433,10 @@ printflt(unsigned *flt, int size)
printf("\n");
}
#define DF(x) printf x
#if DEBUG_FLOAT
#define DF(...) printf(__VA_ARGS__)
#else
#define DF(x)
#define DF(...)
#endif
/*
@@ -470,12 +470,17 @@ printflt(unsigned *flt, int size)
# define FREXP frexp
#endif
#define PARSE_FLOAT_WITH_FLOATS 0
#define PARSE_FLOAT_WITH_INTS 1
/* Parse PDP-11 64-bit floating point format. */
/* Give a pointer to "size" words to receive the result. */
/* Note: there are probably degenerate cases that store incorrect
results. For example, I think rounding up a FLT2 might cause
exponent overflow. Sorry. */
#if PARSE_FLOAT_WITH_FLOATS
/* Note also that the full 56 bits of precision probably aren't always
available on the source platform, given the widespread application
of IEEE floating point formats, so expect some differences. Sorry
@@ -502,8 +507,8 @@ int parse_float(
i = sscanf(cp, SCANF_FMT "%n", &d, &n);
if (i == 0)
return 0; /* Wasn't able to convert */
DF(("LDBL_MANT_DIG: %d\n", LDBL_MANT_DIG));
DF(("%Lf input: %s", d, cp));
DF("LDBL_MANT_DIG: %d\n", LDBL_MANT_DIG);
DF("%Lf input: %s", d, cp);
cp += n;
if (endp)
@@ -517,13 +522,13 @@ int parse_float(
}
frac = FREXP(d, &sexp); /* Separate into exponent and mantissa */
DF(("frac: %Lf %La sexp: %d\n", frac, frac, sexp));
DF("frac: %Lf %La sexp: %d\n", frac, frac, sexp);
if (sexp < -127 || sexp > 127)
return 0; /* Exponent out of range. */
uexp = sexp + 128; /* Make excess-128 mode */
uexp &= 0xff; /* express in 8 bits */
DF(("uexp: %02x\n", uexp));
DF("uexp: %02x\n", uexp);
/*
* frexp guarantees its fractional return value is
@@ -555,8 +560,8 @@ int parse_float(
/* The following big literal is 2 to the 57th power: */
ufrac = (uint64_t) (frac * 144115188075855872.0); /* Align fraction bits */
DF(("ufrac: %016lx\n", ufrac));
DF(("56 : %016lx\n", (1UL<<57) - 2));
DF("ufrac: %016lx\n", ufrac);
DF("56 : %016lx\n", (1UL<<57) - 2);
/*
* ufrac is now >= 2**56 and < 2**57.
@@ -589,14 +594,14 @@ int parse_float(
onehalf = 1ULL << (16 * (4-size));
ufrac += onehalf;
DF(("onehalf=%016lx, ufrac+onehalf: %016lx\n", onehalf, ufrac));
DF("onehalf=%016lx, ufrac+onehalf: %016lx\n", onehalf, ufrac);
/* Did it roll up to a value 2**56? */
if ((ufrac >> 57) > 0) { /* Overflow? */
if (uexp < 0xFF) {
ufrac >>= 1; /* Normalize */
uexp++;
DF(("ufrac: %016lx uexp: %02x (normalized)\n", ufrac, uexp));
DF("ufrac: %016lx uexp: %02x (normalized)\n", ufrac, uexp);
} else {
/*
* If rounding and then normalisation would cause the exponent to
@@ -606,7 +611,7 @@ int parse_float(
* readable to just undo it here.
*/
ufrac -= onehalf;
DF(("don't round: exponent overflow"));
DF("don't round: exponent overflow\n");
}
}
@@ -624,6 +629,264 @@ int parse_float(
return 1;
}
#elif PARSE_FLOAT_WITH_INTS
#define DUMP3 DF("exp: %d. %d %016llx\n", \
float_dec_exponent, float_bin_exponent, float_buf)
/*
* Parse floating point denotations using integer operations only.
* Follows the reference version's implementation fairly closely.
*/
int parse_float(
char *cp,
char **endp,
int size,
unsigned *flt)
{
int float_sign = 0;
int float_dot = 0;
int float_dec_exponent = 0;
int float_bin_exponent = 0;
int ok_chars = 0;
uint64_t float_buf = 0;
float_bin_exponent = 65;
cp = skipwhite(cp);
if (*cp == '+') {
cp++;
} else if (*cp == '-') {
float_sign = 1;
cp++;
}
DF("float_sign: %d\n", float_sign);
for (;;) {
if (isdigit(*cp)) {
/* Can we multiply by 10? */
DF("digit: %c\n", *cp);
ok_chars++;
if (float_buf & 0xF800000000000000ULL) { /* [0] & 0174000, 0xF800 */
/* No, that would overflow */
float_dec_exponent++; /* no, compensate for the snub */
/*
* Explanation of the above comment:
* - after the decimal separator, we should ignore extra digits
* completely. Since float_dot == -1, the exponent will be
* decremented below, and compensate for that here.
* - before the decimal separator, we don't add the digit
* (which would overflow) so we lose some lesser significant
* bits. float_dot == 0 in this case, and we do want the
* exponent increased to compensate for the ignored digit.
* So in both cases the right thing happens.
*/
} else {
/* Multiply by 10 */
float_buf *= 10;
/* Add digit */
float_buf += *cp - '0';
}
float_dec_exponent -= float_dot;
DUMP3;
cp++;
} else if (*cp == '.') {
DF("dot: %c\n", *cp);
ok_chars++;
if (float_dot) {
// error... printf("Error: repeated decimal separator\n");
return 0;
}
float_dot = 1;
cp++;
} else {
DF("Other char: %c\n", *cp);
if (ok_chars == 0) {
return 0; /* No valid number found */
}
break;
}
}
if (toupper(*cp) == 'E') {
cp++;
int exp = strtol(cp, &cp, 10);
float_dec_exponent += exp;
DF("E%d -> dec_exp %d\n", exp, float_dec_exponent);
}
if (endp)
*endp = cp;
/* FLTG3 */
if (float_buf) {
/* Non-zero */
DF("Non-zero: decimal exponent: %d\n", float_dec_exponent);
while (float_dec_exponent > 0) { /* 31$ */
DUMP3;
if (float_buf <= 0x3316000000000000ULL) { /* 031426, 13078, 0x3316, 65390 / 5 */
/* Multiply by 5 and 2 */
DF("Multiply by 5 and 2\n");
float_buf *= 5;
float_bin_exponent++;
DUMP3;
} else {
/* Multiply by 5/4 and 8 32$ */
DF("Multiply by 5/4 and 8\n");
if (float_buf >= 0xCCCC000000000000ULL) {
float_buf >>= 1;
float_bin_exponent++;
}
float_buf += (float_buf >> 2);
float_bin_exponent += 3;
DUMP3;
}
float_dec_exponent--;
}
while (float_dec_exponent < 0) { /* 41$ ish */
DUMP3;
DF("Prepare for division by left-shifting the bits\n");
/* Prepare for division by left-shifting the bits */
while (((float_buf >> 63) & 1) == 0) { /* 41$ */
float_bin_exponent--; /* 40$ */
float_buf <<= 1;
DUMP3;
}
/* Divide by 2 */
float_buf >>= 1;
uint64_t float_save = float_buf;
DUMP3;
DF("float_save: %016llx\n", float_save);
/* Divide by 5:
* multiplying by (2**66 - 4) / 5 = 0xCCCCCCCCCCCCCCCC
* while dropping the 64 least significant bits.
* This is nearly exact but exact enough?
* Somehow there is another doubling in here, so the
* final result = start * 8 / 5.
*/
for (int i = 16 * 2; i > 0; i--) {
if ((i & 1) == 0) { /* 42$ */
float_buf >>= 2;
}
float_buf >>= 1; /* 43$ */
float_buf += float_save;
DF("Loop i=%2d: ", i); DUMP3;
}
/* It's not simply dividing by 5, it also multiplies by 8,
* so we need to adjust the exponent here. */
float_bin_exponent -= 3;
float_dec_exponent++;
DUMP3;
}
/* Normalize the mantissa: shift a single 1 out to the left */
DF("Normalize the mantissa: shift a single 1 out to the left\n");
int carry;
do {
/* FLTG5 */
float_bin_exponent--;
carry = (float_buf >> 63) & 1;
float_buf <<= 1;
DUMP3;
} while (carry == 0);
/* Set excess 128. */
DF("Set excess 128.\n");
float_bin_exponent += 0200;
DUMP3;
if (float_bin_exponent & 0xFF00) {
/* Error N. Underflow. 2$ */
report(NULL, "Error N (underflow)\n");
return 0;
}
/* Shift right 9 positions to make room for sign and exponent 3$ */
DF("Shift right 9 positions to make room for sign and exponent\n");
int round = (float_buf >> 8) & 0x0001;
float_buf >>= 9;
float_buf |= (uint64_t)float_bin_exponent << (64-9);
DUMP3;
/*
* This rounding step seems always needed to make the result the same
* as the implementation with long doubles.
*
* This may be because of the slight imprecision of the divisions by 10?
*
* It is needed to get some exact results for values that are indeed
* exactly representable. Examples:
*
* (2**9-3)/2**9 = 0.994140625 = 0,11111101
* 407e 7fff ffff ffff -> 407e 8000 0000 0000 (correct)
* 1.00 (or 100E-2) divides 100 by 100 and gets
* 407f ffff ffff ffff -> 4080 0000 0000 0000 (correct)
*
* The reference implementation omits this rounding for size != 4:
* it has only one rounding step, which always depends on the size.
*/
float_buf += round;
DF("round: size = 4, round = %d\n", round);
/* Round (there is a truncation option to omit this step) */
if (1) {
uint64_t onehalf;
if (size < 4) {
/* 1 << 31 or 1 << 47 */
onehalf = 1ULL << ((16 * (4-size)) - 1);
DF("round: size = %d, onehalf = %016llx\n", size, onehalf);
float_buf += onehalf;
DUMP3;
/* The rounding bit is the lesser significant bit that's just
* outside the returned result. If we round, we add it to the
* returned value.
*
* If there is a carry-out of the mantissa, it gets added to
* the exponent (increasing it by 1).
*
* If that also overflows, we truely have overflow.
*/
}
DF("After rounding\n");
DUMP3;
}
if (float_buf & 0x8000000000000000ULL) {
// 6$ error T: exponent overflow
report(NULL, "error T: exponent overflow\n");
return 0;
}
/* 7$ */
float_buf |= (uint64_t)float_sign << 63;
DF("Put in float_sign: "); DUMP3;
}
/* Now put together the result from the parts */
flt[0] = (float_buf >> 48) & 0xFFFF;
if (size > 1) {
flt[1] = (float_buf >> 32) & 0xFFFF;
if (size > 2) {
flt[2] = (float_buf >> 16) & 0xFFFF;
flt[3] = (float_buf >> 0) & 0xFFFF;
}
}
return 1;
}
#else
# error How are you going to parse floats?
#endif
/* The recursive-descent expression parser parse_expr. */

View File

@@ -97,19 +97,19 @@
82 ; V05.06
83
84 000240 040177 .word ^F 0.994140625 ; (2**9-3)/2**9 040176 040177
85 000242 040176 100000 000000 .flt4 0.994140625
85 000242 040176 100000 000000 .flt4 0.994140625 ; same-> 040176 100000 0 0
000250 000000
86
87 000252 040200 .word ^F 0.998046875 ; (2**9-1)/2**9 040177 040200
88 000254 040177 100000 000000 .flt4 0.998046875
88 000254 040177 100000 000000 .flt4 0.998046875 ; same-> 040177 100000 0 0
000262 000000
89
90 000264 040201 .word ^F 1.00390625 ; (2**8+1)/2**8 040200 040201
91 000266 040200 100000 000000 .flt4 1.00390625
91 000266 040200 100000 000000 .flt4 1.00390625 ; same-> 040200 100000 0 0
000274 000000
92
93 000276 040202 .word ^F 1.01171875 ; (2**8+3)/2**8 040201 040202
94 000300 040201 100000 000000 .flt4 1.01171875
94 000300 040201 100000 000000 .flt4 1.01171875 ; same-> 040201 100000 0 0
000306 000000
95
96 000310 077777 177777 177777 .flt4 1.701411834604692307e+38 ; 077777 177777 177777 177777
@@ -152,47 +152,46 @@ test-float.mac:125: ***ERROR Invalid addressing mode (1st operand, fsrc: Invalid
127 000444 173027 000002 subf #^D<1+1>,ac0 ; literally
128 000450 173027 000002 subf #^D 1+1 ,ac0 ; literally
129 000454 173027 042572 subf #1e3,ac0 ; as float
test-float.mac:130: ***ERROR Invalid syntax (comma expected)
130 subf #1e 3,ac0 ; TODO: accepted by MACRO11 as 1E3 (but not 1 e3, 1 e 3)
130 000460 173027 042572 subf #1e 3,ac0 ; accepted by MACRO11 as 1E3 (but not 1 e3, 1 e 3)
131 000001 a = 1
132 000003 e3 = 3
133 000460 173027 000001 subf #a,ac0 ; a interpreted as bit pattern
134 000464 173027 000001 subf #<a>,ac0 ; a interpreted as bit pattern
135 000470 173027 000003 subf #e3,ac0 ; e3 is the label
133 000464 173027 000001 subf #a,ac0 ; a interpreted as bit pattern
134 000470 173027 000001 subf #<a>,ac0 ; a interpreted as bit pattern
135 000474 173027 000003 subf #e3,ac0 ; e3 is the label
test-float.mac:136: ***ERROR Invalid addressing mode (1st operand, fsrc: Invalid expression after '#')
136 subf #<1e3>,ac0 ; error N
137
test-float.mac:138: ***ERROR Junk at end of line ('5 ; bad: ')
138 000474 170627 000002 absf #2.5 ; bad: operand is destination
138 000500 170627 000002 absf #2.5 ; bad: operand is destination
test-float.mac:139: ***ERROR Junk at end of line ('5 ; bad: ')
139 000500 170527 000002 tstd #2.5 ; bad: operand is considered FDST by the arch handbook
139 000504 170527 000002 tstd #2.5 ; bad: operand is considered FDST by the arch handbook
test-float.mac:140: ***ERROR Junk at end of line ('5 ; bad: junk')
140 000504 174027 000002 stf ac0,#2.5 ; bad: junk at end of line
141 000510 174027 000002 stf ac0,#2 ; doesn't makes sense but MACRO11 allows it
140 000510 174027 000002 stf ac0,#2.5 ; bad: junk at end of line
141 000514 174027 000002 stf ac0,#2 ; doesn't makes sense but MACRO11 allows it
142
143 ; Test immediate source argument for instructions that have one (src or fsrc)
144
145 000514 172027 040200 addd #1,ac0 ; float
146 000520 172027 040200 addf #1,ac0 ; float
147 000524 173427 040200 cmpd #1,ac0 ; float
148 000530 173427 040200 cmpf #1,ac0 ; float
149 000534 174427 040200 divd #1,ac0 ; float
150 000540 174427 040200 divf #1,ac0 ; float
151 000544 177427 040200 ldcdf #1,ac0 ; float
152 000550 177427 040200 ldcfd #1,ac0 ; float
153 000554 177027 000001 ldcid #1,ac0 ; integer
154 000560 177027 000001 ldcif #1,ac0 ; integer
155 000564 177027 000001 ldcld #1,ac0 ; integer
156 000570 177027 000001 ldclf #1,ac0 ; integer
157 000574 172427 040200 ldd #1,ac0 ; float
158 000600 172427 040200 ldf #1,ac0 ; float
159 000604 176427 000001 ldexp #1,ac0 ; integer
160 000610 171427 040200 modd #1,ac0 ; float
161 000614 171427 040200 modf #1,ac0 ; float
162 000620 171027 040200 muld #1,ac0 ; float
163 000624 171027 040200 mulf #1,ac0 ; float
164 000630 173027 040200 subd #1,ac0 ; float
165 000634 173027 040200 subf #1,ac0 ; float
145 000520 172027 040200 addd #1,ac0 ; float
146 000524 172027 040200 addf #1,ac0 ; float
147 000530 173427 040200 cmpd #1,ac0 ; float
148 000534 173427 040200 cmpf #1,ac0 ; float
149 000540 174427 040200 divd #1,ac0 ; float
150 000544 174427 040200 divf #1,ac0 ; float
151 000550 177427 040200 ldcdf #1,ac0 ; float
152 000554 177427 040200 ldcfd #1,ac0 ; float
153 000560 177027 000001 ldcid #1,ac0 ; integer
154 000564 177027 000001 ldcif #1,ac0 ; integer
155 000570 177027 000001 ldcld #1,ac0 ; integer
156 000574 177027 000001 ldclf #1,ac0 ; integer
157 000600 172427 040200 ldd #1,ac0 ; float
158 000604 172427 040200 ldf #1,ac0 ; float
159 000610 176427 000001 ldexp #1,ac0 ; integer
160 000614 171427 040200 modd #1,ac0 ; float
161 000620 171427 040200 modf #1,ac0 ; float
162 000624 171027 040200 muld #1,ac0 ; float
163 000630 171027 040200 mulf #1,ac0 ; float
164 000634 173027 040200 subd #1,ac0 ; float
165 000640 173027 040200 subf #1,ac0 ; float
166
167 .end
167
@@ -200,11 +199,11 @@ test-float.mac:140: ***ERROR Junk at end of line ('5 ; bad: junk')
Symbol table
. 000640R 001 AC0 =%000000 E3 = 000003
. 000644R 001 AC0 =%000000 E3 = 000003
A = 000001 AC1 =%000001 F2 =%000002
Program sections:
. ABS. 000000 000 (RW,I,GBL,ABS,OVR,NOSAV)
000640 001 (RW,I,LCL,REL,CON,NOSAV)
000644 001 (RW,I,LCL,REL,CON,NOSAV)

View File

@@ -82,16 +82,16 @@
; V05.06
.word ^F 0.994140625 ; (2**9-3)/2**9 040176 040177
.flt4 0.994140625
.flt4 0.994140625 ; same-> 040176 100000 0 0
.word ^F 0.998046875 ; (2**9-1)/2**9 040177 040200
.flt4 0.998046875
.flt4 0.998046875 ; same-> 040177 100000 0 0
.word ^F 1.00390625 ; (2**8+1)/2**8 040200 040201
.flt4 1.00390625
.flt4 1.00390625 ; same-> 040200 100000 0 0
.word ^F 1.01171875 ; (2**8+3)/2**8 040201 040202
.flt4 1.01171875
.flt4 1.01171875 ; same-> 040201 100000 0 0
.flt4 1.701411834604692307e+38 ; 077777 177777 177777 177777
.FLT4 170141183460469230551095682998472802304 ; 2**127-2**70
@@ -127,7 +127,7 @@ f2 = %2
subf #^D<1+1>,ac0 ; literally
subf #^D 1+1 ,ac0 ; literally
subf #1e3,ac0 ; as float
subf #1e 3,ac0 ; TODO: accepted by MACRO11 as 1E3 (but not 1 e3, 1 e 3)
subf #1e 3,ac0 ; accepted by MACRO11 as 1E3 (but not 1 e3, 1 e 3)
a = 1
e3 = 3
subf #a,ac0 ; a interpreted as bit pattern