diff --git a/parse.c b/parse.c index b9f52c6..92ff809 100644 --- a/parse.c +++ b/parse.c @@ -417,7 +417,6 @@ int get_fp_src_mode( } #define DEBUG_FLOAT 0 -#if DEBUG_FLOAT void printflt(unsigned *flt, int size) @@ -434,9 +433,10 @@ printflt(unsigned *flt, int size) printf("\n"); } -#define DF(x) printf x +#if DEBUG_FLOAT +#define DF(...) printf(__VA_ARGS__) #else -#define DF(x) +#define DF(...) #endif /* @@ -470,12 +470,17 @@ printflt(unsigned *flt, int size) # define FREXP frexp #endif +#define PARSE_FLOAT_WITH_FLOATS 0 +#define PARSE_FLOAT_WITH_INTS 1 /* Parse PDP-11 64-bit floating point format. */ /* Give a pointer to "size" words to receive the result. */ /* Note: there are probably degenerate cases that store incorrect results. For example, I think rounding up a FLT2 might cause exponent overflow. Sorry. */ + +#if PARSE_FLOAT_WITH_FLOATS + /* Note also that the full 56 bits of precision probably aren't always available on the source platform, given the widespread application of IEEE floating point formats, so expect some differences. Sorry @@ -502,8 +507,8 @@ int parse_float( i = sscanf(cp, SCANF_FMT "%n", &d, &n); if (i == 0) return 0; /* Wasn't able to convert */ - DF(("LDBL_MANT_DIG: %d\n", LDBL_MANT_DIG)); - DF(("%Lf input: %s", d, cp)); + DF("LDBL_MANT_DIG: %d\n", LDBL_MANT_DIG); + DF("%Lf input: %s", d, cp); cp += n; if (endp) @@ -517,13 +522,13 @@ int parse_float( } frac = FREXP(d, &sexp); /* Separate into exponent and mantissa */ - DF(("frac: %Lf %La sexp: %d\n", frac, frac, sexp)); + DF("frac: %Lf %La sexp: %d\n", frac, frac, sexp); if (sexp < -127 || sexp > 127) return 0; /* Exponent out of range. */ uexp = sexp + 128; /* Make excess-128 mode */ uexp &= 0xff; /* express in 8 bits */ - DF(("uexp: %02x\n", uexp)); + DF("uexp: %02x\n", uexp); /* * frexp guarantees its fractional return value is @@ -555,8 +560,8 @@ int parse_float( /* The following big literal is 2 to the 57th power: */ ufrac = (uint64_t) (frac * 144115188075855872.0); /* Align fraction bits */ - DF(("ufrac: %016lx\n", ufrac)); - DF(("56 : %016lx\n", (1UL<<57) - 2)); + DF("ufrac: %016lx\n", ufrac); + DF("56 : %016lx\n", (1UL<<57) - 2); /* * ufrac is now >= 2**56 and < 2**57. @@ -589,14 +594,14 @@ int parse_float( onehalf = 1ULL << (16 * (4-size)); ufrac += onehalf; - DF(("onehalf=%016lx, ufrac+onehalf: %016lx\n", onehalf, ufrac)); + DF("onehalf=%016lx, ufrac+onehalf: %016lx\n", onehalf, ufrac); /* Did it roll up to a value 2**56? */ if ((ufrac >> 57) > 0) { /* Overflow? */ if (uexp < 0xFF) { ufrac >>= 1; /* Normalize */ uexp++; - DF(("ufrac: %016lx uexp: %02x (normalized)\n", ufrac, uexp)); + DF("ufrac: %016lx uexp: %02x (normalized)\n", ufrac, uexp); } else { /* * If rounding and then normalisation would cause the exponent to @@ -606,7 +611,7 @@ int parse_float( * readable to just undo it here. */ ufrac -= onehalf; - DF(("don't round: exponent overflow")); + DF("don't round: exponent overflow\n"); } } @@ -624,6 +629,264 @@ int parse_float( return 1; } +#elif PARSE_FLOAT_WITH_INTS + +#define DUMP3 DF("exp: %d. %d %016llx\n", \ + float_dec_exponent, float_bin_exponent, float_buf) +/* + * Parse floating point denotations using integer operations only. + * Follows the reference version's implementation fairly closely. + */ +int parse_float( + char *cp, + char **endp, + int size, + unsigned *flt) +{ + int float_sign = 0; + int float_dot = 0; + int float_dec_exponent = 0; + int float_bin_exponent = 0; + int ok_chars = 0; + uint64_t float_buf = 0; + + float_bin_exponent = 65; + + cp = skipwhite(cp); + + if (*cp == '+') { + cp++; + } else if (*cp == '-') { + float_sign = 1; + cp++; + } + DF("float_sign: %d\n", float_sign); + + for (;;) { + if (isdigit(*cp)) { + /* Can we multiply by 10? */ + DF("digit: %c\n", *cp); + ok_chars++; + if (float_buf & 0xF800000000000000ULL) { /* [0] & 0174000, 0xF800 */ + /* No, that would overflow */ + float_dec_exponent++; /* no, compensate for the snub */ + /* + * Explanation of the above comment: + * - after the decimal separator, we should ignore extra digits + * completely. Since float_dot == -1, the exponent will be + * decremented below, and compensate for that here. + * - before the decimal separator, we don't add the digit + * (which would overflow) so we lose some lesser significant + * bits. float_dot == 0 in this case, and we do want the + * exponent increased to compensate for the ignored digit. + * So in both cases the right thing happens. + */ + } else { + /* Multiply by 10 */ + float_buf *= 10; + /* Add digit */ + float_buf += *cp - '0'; + } + float_dec_exponent -= float_dot; + DUMP3; + cp++; + } else if (*cp == '.') { + DF("dot: %c\n", *cp); + ok_chars++; + if (float_dot) { + // error... printf("Error: repeated decimal separator\n"); + return 0; + } + float_dot = 1; + cp++; + } else { + DF("Other char: %c\n", *cp); + if (ok_chars == 0) { + return 0; /* No valid number found */ + } + break; + } + } + + if (toupper(*cp) == 'E') { + cp++; + int exp = strtol(cp, &cp, 10); + + float_dec_exponent += exp; + DF("E%d -> dec_exp %d\n", exp, float_dec_exponent); + } + + if (endp) + *endp = cp; + + /* FLTG3 */ + if (float_buf) { + /* Non-zero */ + DF("Non-zero: decimal exponent: %d\n", float_dec_exponent); + while (float_dec_exponent > 0) { /* 31$ */ + DUMP3; + if (float_buf <= 0x3316000000000000ULL) { /* 031426, 13078, 0x3316, 65390 / 5 */ + /* Multiply by 5 and 2 */ + DF("Multiply by 5 and 2\n"); + float_buf *= 5; + float_bin_exponent++; + DUMP3; + } else { + /* Multiply by 5/4 and 8 32$ */ + DF("Multiply by 5/4 and 8\n"); + if (float_buf >= 0xCCCC000000000000ULL) { + float_buf >>= 1; + float_bin_exponent++; + } + float_buf += (float_buf >> 2); + float_bin_exponent += 3; + DUMP3; + } + float_dec_exponent--; + } + + while (float_dec_exponent < 0) { /* 41$ ish */ + DUMP3; + DF("Prepare for division by left-shifting the bits\n"); + /* Prepare for division by left-shifting the bits */ + while (((float_buf >> 63) & 1) == 0) { /* 41$ */ + float_bin_exponent--; /* 40$ */ + float_buf <<= 1; + DUMP3; + } + + /* Divide by 2 */ + float_buf >>= 1; + + uint64_t float_save = float_buf; + DUMP3; + DF("float_save: %016llx\n", float_save); + + /* Divide by 5: + * multiplying by (2**66 - 4) / 5 = 0xCCCCCCCCCCCCCCCC + * while dropping the 64 least significant bits. + * This is nearly exact but exact enough? + * Somehow there is another doubling in here, so the + * final result = start * 8 / 5. + */ + for (int i = 16 * 2; i > 0; i--) { + if ((i & 1) == 0) { /* 42$ */ + float_buf >>= 2; + } + float_buf >>= 1; /* 43$ */ + float_buf += float_save; + DF("Loop i=%2d: ", i); DUMP3; + } + + /* It's not simply dividing by 5, it also multiplies by 8, + * so we need to adjust the exponent here. */ + float_bin_exponent -= 3; + float_dec_exponent++; + DUMP3; + } + + /* Normalize the mantissa: shift a single 1 out to the left */ + DF("Normalize the mantissa: shift a single 1 out to the left\n"); + int carry; + do { + /* FLTG5 */ + float_bin_exponent--; + carry = (float_buf >> 63) & 1; + float_buf <<= 1; + DUMP3; + } while (carry == 0); + + /* Set excess 128. */ + DF("Set excess 128.\n"); + float_bin_exponent += 0200; + DUMP3; + + if (float_bin_exponent & 0xFF00) { + /* Error N. Underflow. 2$ */ + report(NULL, "Error N (underflow)\n"); + return 0; + } + + /* Shift right 9 positions to make room for sign and exponent 3$ */ + DF("Shift right 9 positions to make room for sign and exponent\n"); + int round = (float_buf >> 8) & 0x0001; + float_buf >>= 9; + float_buf |= (uint64_t)float_bin_exponent << (64-9); + DUMP3; + + /* + * This rounding step seems always needed to make the result the same + * as the implementation with long doubles. + * + * This may be because of the slight imprecision of the divisions by 10? + * + * It is needed to get some exact results for values that are indeed + * exactly representable. Examples: + * + * (2**9-3)/2**9 = 0.994140625 = 0,11111101 + * 407e 7fff ffff ffff -> 407e 8000 0000 0000 (correct) + * 1.00 (or 100E-2) divides 100 by 100 and gets + * 407f ffff ffff ffff -> 4080 0000 0000 0000 (correct) + * + * The reference implementation omits this rounding for size != 4: + * it has only one rounding step, which always depends on the size. + */ + float_buf += round; + DF("round: size = 4, round = %d\n", round); + + /* Round (there is a truncation option to omit this step) */ + if (1) { + uint64_t onehalf; + + if (size < 4) { + /* 1 << 31 or 1 << 47 */ + onehalf = 1ULL << ((16 * (4-size)) - 1); + DF("round: size = %d, onehalf = %016llx\n", size, onehalf); + float_buf += onehalf; + DUMP3; + + /* The rounding bit is the lesser significant bit that's just + * outside the returned result. If we round, we add it to the + * returned value. + * + * If there is a carry-out of the mantissa, it gets added to + * the exponent (increasing it by 1). + * + * If that also overflows, we truely have overflow. + */ + } + + DF("After rounding\n"); + DUMP3; + } + + if (float_buf & 0x8000000000000000ULL) { + // 6$ error T: exponent overflow + report(NULL, "error T: exponent overflow\n"); + return 0; + } + + /* 7$ */ + float_buf |= (uint64_t)float_sign << 63; + DF("Put in float_sign: "); DUMP3; + } + + /* Now put together the result from the parts */ + flt[0] = (float_buf >> 48) & 0xFFFF; + if (size > 1) { + flt[1] = (float_buf >> 32) & 0xFFFF; + if (size > 2) { + flt[2] = (float_buf >> 16) & 0xFFFF; + flt[3] = (float_buf >> 0) & 0xFFFF; + } + } + + return 1; +} +#else +# error How are you going to parse floats? +#endif + /* The recursive-descent expression parser parse_expr. */ diff --git a/tests/test-float.lst.ok b/tests/test-float.lst.ok index 9c640bc..64759b5 100644 --- a/tests/test-float.lst.ok +++ b/tests/test-float.lst.ok @@ -97,19 +97,19 @@ 82 ; V05.06 83 84 000240 040177 .word ^F 0.994140625 ; (2**9-3)/2**9 040176 040177 - 85 000242 040176 100000 000000 .flt4 0.994140625 + 85 000242 040176 100000 000000 .flt4 0.994140625 ; same-> 040176 100000 0 0 000250 000000 86 87 000252 040200 .word ^F 0.998046875 ; (2**9-1)/2**9 040177 040200 - 88 000254 040177 100000 000000 .flt4 0.998046875 + 88 000254 040177 100000 000000 .flt4 0.998046875 ; same-> 040177 100000 0 0 000262 000000 89 90 000264 040201 .word ^F 1.00390625 ; (2**8+1)/2**8 040200 040201 - 91 000266 040200 100000 000000 .flt4 1.00390625 + 91 000266 040200 100000 000000 .flt4 1.00390625 ; same-> 040200 100000 0 0 000274 000000 92 93 000276 040202 .word ^F 1.01171875 ; (2**8+3)/2**8 040201 040202 - 94 000300 040201 100000 000000 .flt4 1.01171875 + 94 000300 040201 100000 000000 .flt4 1.01171875 ; same-> 040201 100000 0 0 000306 000000 95 96 000310 077777 177777 177777 .flt4 1.701411834604692307e+38 ; 077777 177777 177777 177777 @@ -152,47 +152,46 @@ test-float.mac:125: ***ERROR Invalid addressing mode (1st operand, fsrc: Invalid 127 000444 173027 000002 subf #^D<1+1>,ac0 ; literally 128 000450 173027 000002 subf #^D 1+1 ,ac0 ; literally 129 000454 173027 042572 subf #1e3,ac0 ; as float -test-float.mac:130: ***ERROR Invalid syntax (comma expected) - 130 subf #1e 3,ac0 ; TODO: accepted by MACRO11 as 1E3 (but not 1 e3, 1 e 3) + 130 000460 173027 042572 subf #1e 3,ac0 ; accepted by MACRO11 as 1E3 (but not 1 e3, 1 e 3) 131 000001 a = 1 132 000003 e3 = 3 - 133 000460 173027 000001 subf #a,ac0 ; a interpreted as bit pattern - 134 000464 173027 000001 subf #,ac0 ; a interpreted as bit pattern - 135 000470 173027 000003 subf #e3,ac0 ; e3 is the label + 133 000464 173027 000001 subf #a,ac0 ; a interpreted as bit pattern + 134 000470 173027 000001 subf #,ac0 ; a interpreted as bit pattern + 135 000474 173027 000003 subf #e3,ac0 ; e3 is the label test-float.mac:136: ***ERROR Invalid addressing mode (1st operand, fsrc: Invalid expression after '#') 136 subf #<1e3>,ac0 ; error N 137 test-float.mac:138: ***ERROR Junk at end of line ('5 ; bad: ') - 138 000474 170627 000002 absf #2.5 ; bad: operand is destination + 138 000500 170627 000002 absf #2.5 ; bad: operand is destination test-float.mac:139: ***ERROR Junk at end of line ('5 ; bad: ') - 139 000500 170527 000002 tstd #2.5 ; bad: operand is considered FDST by the arch handbook + 139 000504 170527 000002 tstd #2.5 ; bad: operand is considered FDST by the arch handbook test-float.mac:140: ***ERROR Junk at end of line ('5 ; bad: junk') - 140 000504 174027 000002 stf ac0,#2.5 ; bad: junk at end of line - 141 000510 174027 000002 stf ac0,#2 ; doesn't makes sense but MACRO11 allows it + 140 000510 174027 000002 stf ac0,#2.5 ; bad: junk at end of line + 141 000514 174027 000002 stf ac0,#2 ; doesn't makes sense but MACRO11 allows it 142 143 ; Test immediate source argument for instructions that have one (src or fsrc) 144 - 145 000514 172027 040200 addd #1,ac0 ; float - 146 000520 172027 040200 addf #1,ac0 ; float - 147 000524 173427 040200 cmpd #1,ac0 ; float - 148 000530 173427 040200 cmpf #1,ac0 ; float - 149 000534 174427 040200 divd #1,ac0 ; float - 150 000540 174427 040200 divf #1,ac0 ; float - 151 000544 177427 040200 ldcdf #1,ac0 ; float - 152 000550 177427 040200 ldcfd #1,ac0 ; float - 153 000554 177027 000001 ldcid #1,ac0 ; integer - 154 000560 177027 000001 ldcif #1,ac0 ; integer - 155 000564 177027 000001 ldcld #1,ac0 ; integer - 156 000570 177027 000001 ldclf #1,ac0 ; integer - 157 000574 172427 040200 ldd #1,ac0 ; float - 158 000600 172427 040200 ldf #1,ac0 ; float - 159 000604 176427 000001 ldexp #1,ac0 ; integer - 160 000610 171427 040200 modd #1,ac0 ; float - 161 000614 171427 040200 modf #1,ac0 ; float - 162 000620 171027 040200 muld #1,ac0 ; float - 163 000624 171027 040200 mulf #1,ac0 ; float - 164 000630 173027 040200 subd #1,ac0 ; float - 165 000634 173027 040200 subf #1,ac0 ; float + 145 000520 172027 040200 addd #1,ac0 ; float + 146 000524 172027 040200 addf #1,ac0 ; float + 147 000530 173427 040200 cmpd #1,ac0 ; float + 148 000534 173427 040200 cmpf #1,ac0 ; float + 149 000540 174427 040200 divd #1,ac0 ; float + 150 000544 174427 040200 divf #1,ac0 ; float + 151 000550 177427 040200 ldcdf #1,ac0 ; float + 152 000554 177427 040200 ldcfd #1,ac0 ; float + 153 000560 177027 000001 ldcid #1,ac0 ; integer + 154 000564 177027 000001 ldcif #1,ac0 ; integer + 155 000570 177027 000001 ldcld #1,ac0 ; integer + 156 000574 177027 000001 ldclf #1,ac0 ; integer + 157 000600 172427 040200 ldd #1,ac0 ; float + 158 000604 172427 040200 ldf #1,ac0 ; float + 159 000610 176427 000001 ldexp #1,ac0 ; integer + 160 000614 171427 040200 modd #1,ac0 ; float + 161 000620 171427 040200 modf #1,ac0 ; float + 162 000624 171027 040200 muld #1,ac0 ; float + 163 000630 171027 040200 mulf #1,ac0 ; float + 164 000634 173027 040200 subd #1,ac0 ; float + 165 000640 173027 040200 subf #1,ac0 ; float 166 167 .end 167 @@ -200,11 +199,11 @@ test-float.mac:140: ***ERROR Junk at end of line ('5 ; bad: junk') Symbol table -. 000640R 001 AC0 =%000000 E3 = 000003 +. 000644R 001 AC0 =%000000 E3 = 000003 A = 000001 AC1 =%000001 F2 =%000002 Program sections: . ABS. 000000 000 (RW,I,GBL,ABS,OVR,NOSAV) - 000640 001 (RW,I,LCL,REL,CON,NOSAV) + 000644 001 (RW,I,LCL,REL,CON,NOSAV) diff --git a/tests/test-float.mac b/tests/test-float.mac index 98d420a..8b5c8a8 100644 --- a/tests/test-float.mac +++ b/tests/test-float.mac @@ -82,16 +82,16 @@ ; V05.06 .word ^F 0.994140625 ; (2**9-3)/2**9 040176 040177 - .flt4 0.994140625 + .flt4 0.994140625 ; same-> 040176 100000 0 0 .word ^F 0.998046875 ; (2**9-1)/2**9 040177 040200 - .flt4 0.998046875 + .flt4 0.998046875 ; same-> 040177 100000 0 0 .word ^F 1.00390625 ; (2**8+1)/2**8 040200 040201 - .flt4 1.00390625 + .flt4 1.00390625 ; same-> 040200 100000 0 0 .word ^F 1.01171875 ; (2**8+3)/2**8 040201 040202 - .flt4 1.01171875 + .flt4 1.01171875 ; same-> 040201 100000 0 0 .flt4 1.701411834604692307e+38 ; 077777 177777 177777 177777 .FLT4 170141183460469230551095682998472802304 ; 2**127-2**70 @@ -127,7 +127,7 @@ f2 = %2 subf #^D<1+1>,ac0 ; literally subf #^D 1+1 ,ac0 ; literally subf #1e3,ac0 ; as float - subf #1e 3,ac0 ; TODO: accepted by MACRO11 as 1E3 (but not 1 e3, 1 e 3) + subf #1e 3,ac0 ; accepted by MACRO11 as 1E3 (but not 1 e3, 1 e 3) a = 1 e3 = 3 subf #a,ac0 ; a interpreted as bit pattern