From bb56fc33f1598e0ace5edd64af34d79aefadc394 Mon Sep 17 00:00:00 2001
From: Olaf Seibert <rhialto@falu.nl>
Date: Sun, 19 Jun 2022 14:49:41 +0200
Subject: [PATCH] Add floating point parsing playground.

---
 tests/float.c | 968 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 968 insertions(+)
 create mode 100755 tests/float.c

diff --git a/tests/float.c b/tests/float.c
new file mode 100755
index 0000000..7f328f3
--- /dev/null
+++ b/tests/float.c
@@ -0,0 +1,968 @@
+# /*
+gcc float.c -o float -lm
+./float
+exit $?
+*/
+/*
+ * Parsing PDP-11 floating point literals.
+ * Testing ground for different implementations.
+*/
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+#include <math.h>
+#include <float.h>
+#include <ieeefp.h>
+
+#define DEBUG_FLOAT     1
+
+void
+printflt(unsigned *flt, int size)
+{
+    printf("%06o: ",        flt[0]);
+    printf("sign:  %d ",   (flt[0] & 0x8000) >> 15);
+    printf("uexp:  %x ",   (flt[0] & 0x7F80) >>  7);
+    printf("ufrac: %02x",   flt[0] & 0x007F);
+
+    for (int i = 1; i < size; i++) {
+        printf(" %04x", flt[i]);
+    }
+
+    printf("\n");
+}
+
+#if DEBUG_FLOAT
+#define DF(...)   printf(__VA_ARGS__)
+#else
+#define DF(...)
+#endif
+
+/*
+ * We need 56 bits of mantissa.
+ *
+ * Try to detect if it is needed, possible and useful to use
+ * long double instead of double, when parsing floating point numbers.
+ */
+
+#if DBL_MANT_DIG >= 56
+/* plain double seems big enough */
+# define USE_LONG_DOUBLE        0
+/* long double exists and seems big enough */
+#elif LDBL_MANT_DIG >= 56
+# define USE_LONG_DOUBLE        1
+#elif defined(LDBL_MANT_DIG)
+/* long double exists but is probably still too small */
+# define USE_LONG_DOUBLE        1
+#else
+/* long double does not exist and plain double is too small */
+# define USE_LONG_DOUBLE        0
+#endif
+
+#if USE_LONG_DOUBLE
+# define DOUBLE                 long double
+# define SCANF_FMT              "%Lf"
+# define FREXP                  frexpl
+#else
+# define DOUBLE                 double
+# define SCANF_FMT              "%lf"
+# define FREXP                  frexp
+#endif
+
+
+/* Parse PDP-11 64-bit floating point format. */
+/* Give a pointer to "size" words to receive the result. */
+/* Note: there are probably degenerate cases that store incorrect
+   results.  For example, I think rounding up a FLT2 might cause
+   exponent overflow.  Sorry. */
+/* Note also that the full 56 bits of precision probably aren't always
+   available on the source platform, given the widespread application
+   of IEEE floating point formats, so expect some differences.  Sorry
+   again. */
+
+int parse_float(
+    char *cp,
+    int size,
+    unsigned *flt)
+{
+    DOUBLE          d;          /* value */
+    DOUBLE          frac;       /* fractional value */
+    uint64_t        ufrac;      /* fraction converted to 56 bit
+                                   unsigned integer */
+    uint64_t        onehalf;    /* one half of the smallest bit
+                                   (used for rounding) */
+    int             i;          /* Number of fields converted by sscanf */
+    int             n;          /* Number of characters converted by sscanf */
+    int             sexp;       /* Signed exponent */
+    unsigned        uexp;       /* Unsigned excess-128 exponent */
+    unsigned        sign = 0;   /* Sign mask */
+
+    i = sscanf(cp, SCANF_FMT "%n", &d, &n);
+    if (i == 0)
+        return 0;                      /* Wasn't able to convert */
+    DF("LDBL_MANT_DIG: %d\n", LDBL_MANT_DIG);
+    DF("%Lf input: %s\n", d, cp);
+
+    cp += n;
+
+    if (d == 0.0) {
+        for (i = 0; i < size; i++) {
+            flt[i] = 0; /* All-bits-zero equals zero */
+        }
+        return 1;                      /* Good job. */
+    }
+
+    frac = FREXP(d, &sexp);            /* Separate into exponent and mantissa */
+    DF("frac: %Lf %La sexp: %d\n", frac, frac, sexp);
+    if (sexp < -127 || sexp > 127)
+        return 0;                      /* Exponent out of range. */
+
+    uexp = sexp + 128;                  /* Make excess-128 mode */
+    uexp &= 0xff;                       /* express in 8 bits */
+    DF("uexp: %02x\n", uexp);
+
+    /*
+     * frexp guarantees its fractional return value is
+     *   abs(frac) >= 0.5    and  abs(frac) < 1.0
+     * Another way to think of this is that:
+     *   abs(frac) >= 2**-1  and  abs(frac) < 2**0
+     */
+
+    if (frac < 0) {
+        sign = (1 << 15);              /* Negative sign */
+        frac = -frac;                  /* fix the mantissa */
+    }
+
+    /*
+     * For the PDP-11 floating point representation the
+     *  fractional part is 7 bits (for 16-bit floating point
+     *  literals), 23 bits (for 32-bit floating point values),
+     *  or 55 bits (for 64-bit floating point values).
+     * However the bit immediately above the MSB is always 1
+     *  because the value is normalized.  So it's actually
+     *  8 bits, 24 bits, or 56 bits.
+     * We effectively multiply the fractional part of our value by
+     *  2**56 to fully expose all of those bits (including
+     *  the MSB which is 1).
+     * However as an intermediate step, we really multiply by
+     *  2**57, so we get one lsb for possible later rounding
+     *  purposes. After that, we divide by 2 again.
+     */
+
+    /* The following big literal is 2 to the 57th power: */
+    ufrac = (uint64_t) (frac * 144115188075855872.0); /* Align fraction bits */
+    DF("ufrac: %016lx\n", ufrac);
+    DF("56   : %016lx\n", (1UL<<57) - 2);
+
+    /*
+     * ufrac is now >= 2**56 and < 2**57.
+     *  This means it's normalized: bit [56] is 1
+     *  and all higher bits are 0.
+     */
+
+    /* Round from 57-bits to 56, 24, or 8.
+     * We do this by:
+     * + first adding a value equal to one half of the
+     *   least significant bit (the value 'onehalf')
+     * + (possibly) dealing with any carrying that
+     *   causes the value to no longer be normalized
+     *   (with bit [56] = 1 and all higher bits = 0)
+     * + shifting right by 1 bit (which throws away
+     *   the 0 bit).  Note this step could be rolled
+     *   into the next step.
+     * + taking the remaining highest order 8,
+     *   24, or 56 bits.
+     *
+     * +--+--------+-------+ +--------+--------+
+     * |15|14     7|6     0| |15      |       0|
+     * +--+--------+-------+ +--------+--------+
+     * | S|EEEEEEEE|MMMMMMM| |MMMMMMMM|MMMMMMMM| ...maybe 2 more words...
+     * +--+--------+-------+ +--------+--------+
+     *  Sign (1 bit)
+     *     Exponent (8 bits)
+     *              Mantissa (7 bits)
+     */
+
+    onehalf = 1ULL << (16 * (4-size));
+    ufrac += onehalf;
+    DF("onehalf=%016lx, ufrac+onehalf: %016lx\n", onehalf, ufrac);
+
+    /* Did it roll up to a value 2**56? */
+    if ((ufrac >> 57) > 0) {           /* Overflow? */
+        if (uexp < 0xFF) {
+            ufrac >>= 1;               /* Normalize */
+            uexp++;
+            DF("ufrac: %016lx  uexp: %02x (normalized)\n", ufrac, uexp);
+        } else {
+            /*
+             * If rounding and then normalisation would cause the exponent to
+             * overflow, just don't round: the cure is worse than the disease.
+             * We could detect ahead of time but the conditions for all size
+             * values may be a bit complicated, and so rare, that it is more
+             * readable to just undo it here.
+             */
+            ufrac -= onehalf;
+            DF("don't round: exponent overflow\n");
+        }
+    }
+
+    ufrac >>= 1;                       /* Go from 57 bits to 56 */
+
+    flt[0] = (unsigned) (sign | (uexp << 7) | ((ufrac >> 48) & 0x7F));
+    if (size > 1) {
+        flt[1] = (unsigned) ((ufrac >> 32) & 0xffff);
+        if (size > 2) {
+            flt[2] = (unsigned) ((ufrac >> 16) & 0xffff);
+            flt[3] = (unsigned) ((ufrac >> 0) & 0xffff);
+        }
+    }
+
+    return 1;
+}
+
+/*
+ * Several of these functions assume that "unsigned" contains more
+ * than 16 bits. Bit 16 is sometimes used to store a carry.
+ */
+
+#define DUMP DF("exp: %d. %d  %04x %04x %04x %04x\n", \
+                    float_dec_exponent, float_bin_exponent, \
+                    float_buf[0], float_buf[1], float_buf[2], float_buf[3])
+#define DUMP4(b, t) DF("%04x %04x %04x %04x%s", \
+                        b[0], b[1], b[2], b[3], t)
+
+void float_copy(                /* XMIT4 at FLTGSV, more or less */
+    unsigned *to,
+    unsigned *from)
+{
+    to[0] = from[0] & 0xFFFF;
+    to[1] = from[1] & 0xFFFF;
+    to[2] = from[2] & 0xFFFF;
+    to[3] = from[3] & 0xFFFF;
+}
+
+int float_right_shift(          /* FLTGRS */
+    unsigned *buf)
+{
+    int carry1, carry2;
+
+    carry1 = buf[0] & 0x0001; buf[0] >>= 1;
+    carry2 = buf[1] & 0x0001; buf[1] >>= 1; buf[1] |= (carry1 << 15);
+    carry1 = buf[2] & 0x0001; buf[2] >>= 1; buf[2] |= (carry2 << 15);
+    carry2 = buf[3] & 0x0001; buf[3] >>= 1; buf[3] |= (carry1 << 15);
+
+    return carry2;
+}
+
+int float_left_shift(           /* FLTGLS */
+    unsigned *buf)
+{
+    int carry1, carry2;
+
+    carry1 = buf[3] >> 15; buf[3] <<= 1;
+    carry2 = buf[2] >> 15; buf[2] <<= 1; buf[2] |= (carry1 & 0x0001);
+    carry1 = buf[1] >> 15; buf[1] <<= 1; buf[1] |= (carry2 & 0x0001);
+    carry2 = buf[0] >> 15; buf[0] <<= 1; buf[0] |= (carry1 & 0x0001);
+
+    float_copy(buf, buf);       /* Clean up carries */
+
+    return carry2 & 0x01;
+}
+
+void float_add(                 /* FLTGAD */
+    unsigned *to,
+    unsigned *from)
+{
+    //DF("float_add: "); DUMP4(to, ""); DF(" += "); DUMP4(from, "\n");
+    to[3] += from[3];
+    to[2] += from[2] + ((to[3] >> 16) & 0x0001);
+    to[1] += from[1] + ((to[2] >> 16) & 0x0001);
+    to[0] += from[0] + ((to[1] >> 16) & 0x0001);
+    //DF("         = "); DUMP4(to, "\n");
+
+    float_copy(to, to);       /* Clean up carries */
+}
+
+void float_mult_by_5(           /* FLTM50 */
+    unsigned *buf,
+    unsigned *workspace)
+{
+    float_copy(workspace, buf); /* Save 1 x original value */
+    float_left_shift(buf);      /* 2 x */
+    float_left_shift(buf);      /* 4 x */
+    float_add(buf, workspace);  /* 5 x */
+}
+
+void float_mult_by_5_4th(       /* FLTM54 */
+    int *bin_exp,
+    unsigned *buf,
+    unsigned *workspace)
+{
+    if (buf[0] >= 0146314) {
+        float_right_shift(buf);
+        ++*bin_exp;
+    }
+    float_copy(workspace, buf);
+    float_right_shift(buf);     /* half of the original */
+    float_right_shift(buf);     /* a quarter */
+    float_add(buf, workspace);  /* add the original to the quarter */
+}
+
+int parse_float_m2(
+    char *cp,
+    int size,
+    unsigned *flt)
+{
+    int float_sign = 0;
+    int float_dot = 0;
+    int float_dec_exponent = 0;
+    int float_bin_exponent = 0;
+    unsigned float_buf[4] = { 0 };
+    unsigned float_save[4];
+
+    float_bin_exponent = 65;
+
+    if (*cp == '+') {
+        cp++;
+    } else if (*cp == '-') {
+        float_sign = 0100000;
+        cp++;
+    }
+    DF("float_sign: %d\n", float_sign);
+
+    for (;;) {
+        if (isdigit(*cp)) {
+            /* Can we multiply by 10? */
+            DF("digit: %c\n", *cp);
+            if (float_buf[0] & 0174000) {
+                /* No, that would overflow */
+                float_dec_exponent++; /* no, compensate for the snub */
+                /*
+                 * Explanation of the above comment:
+                 * - after the decimal separator, we should ignore extra digits
+                 *   completely. Since float_dot == -1, the exponent will be
+                 *   decremented below, and compensate for that here.
+                 * - before the decimal separator, we don't add the digit
+                 *   (which would overflow) so we lose some lesser significant
+                 *   bits. float_dot == 0 in this case, and we do want the
+                 *   exponent increased to compensate for the ignored digit.
+                 * So in both cases the right thing happens.
+                 */
+            } else {
+                /* Multiply by 10 */
+                float_mult_by_5(float_buf, float_save);
+                float_left_shift(float_buf);
+
+                /* Add digit */
+                float_buf[3] += *cp - '0';
+                /* Ripple carry */
+                float_buf[2] += (float_buf[3] >> 16) & 0x0001;
+                float_buf[1] += (float_buf[2] >> 16) & 0x0001;
+                float_buf[0] += (float_buf[1] >> 16) & 0x0001;
+                float_copy(float_buf, float_buf); /* Clean up carries */
+            }
+            float_dec_exponent += float_dot;
+            DUMP;
+            cp++;
+        } else if (*cp == '.') {
+            DF("dot: %c\n", *cp);
+            if (float_dot < 0) {
+                // error...
+                printf("Error: repeated decimal separator\n");
+                return 0;
+            }
+            float_dot = -1;
+            cp++;
+        } else {
+            DF("Other char: %c\n", *cp);
+            break;
+        }
+    }
+
+    if (toupper(*cp) == 'E') {
+        cp++;
+        int exp = strtol(cp, &cp, 10);
+
+        float_dec_exponent += exp;
+        DF("E%d -> dec_exp %d\n", exp, float_dec_exponent);
+    }
+
+    /* FLTG3 */
+    if (float_buf[0] | float_buf[1] | float_buf[2] | float_buf[3]) {
+        /* Non-zero */
+        DF("Non-zero: decimal exponent: %d\n", float_dec_exponent);
+        while (float_dec_exponent > 0) { /* 31$ */
+            DUMP;
+            if (float_buf[0] <= 031426) {       /* 13078, 0x3316, 65390 / 5 */
+                /* Multiply by 5 and 2 */
+                DF("Multiply by 5 and 2\n");
+                float_mult_by_5(float_buf, float_save);
+                float_bin_exponent++;
+                DUMP;
+            } else {
+                /* Multiply by 5/4 and 8 32$ */
+                DF("Multiply by 5/4 and 8\n");
+                float_mult_by_5_4th(&float_bin_exponent, float_buf, float_save);
+                float_bin_exponent += 3;
+                DUMP;
+            }
+            float_dec_exponent--;
+        }
+
+        while (float_dec_exponent < 0) { /* 41$ ish */
+            DUMP;
+            DF("Prepare for division by left-shifting the bits\n");
+            /* Prepare for division by left-shifting the bits */
+            while ((float_buf[0] & 0x8000) == 0) {     /* 41$ */
+                float_bin_exponent--;                   /* 40$ */
+                float_left_shift(float_buf);
+                DUMP;
+            }
+
+            /* Divide by 2 */
+            float_right_shift(float_buf);
+
+            float_copy(float_save, float_buf);
+            DUMP;
+            DF("float_save: "); DUMP4(float_save, "\n");
+
+            /* Divide by 5:
+             * multiplying by (2**32 - 1) / 5 = 0x333333333
+             * while dropping the 32 least significant bits.
+             * This is nearly exact but exact enough?
+             * 2**32 - 1 is a multiple of 5.
+             */
+            for (int i = 16 * 2; i > 0; i--) {
+                if ((i & 1) == 0) {                    /* 42$ */
+                    float_right_shift(float_buf);
+                    float_right_shift(float_buf);
+                }
+                float_right_shift(float_buf);           /* 43$ */
+                float_add(float_buf, float_save);
+                DF("Loop i=%2d: ", i); DUMP;
+            }
+
+            float_bin_exponent -= 3;
+            float_dec_exponent++;
+            DUMP;
+        }
+
+        /* Normalize the mantissa: shift a single 1 out to the left */
+        DF("Normalize the mantissa: shift a single 1 out to the left\n");
+        int carry;
+        do {
+            /* FLTG5 */
+            float_bin_exponent--;
+            carry = float_left_shift(float_buf);
+            DUMP;
+        } while (carry == 0);
+
+        /* Set excess 128. */
+        DF("Set excess 128.\n");
+        float_bin_exponent += 0200;
+        DUMP;
+
+        if (float_bin_exponent & 0xFF00) {
+            /* Error N. Underflow. 2$ */
+            printf("Error N (underflow)\n");
+            return 0;
+        }
+
+        /* Shift right 9 positions to make room for sign and exponent 3$ */
+        DF("Shift right 9 positions to make room for sign and exponent\n");
+        int round = (float_buf[3] >> 8) & 0x0001;
+        float_buf[3] >>= 9; float_buf[3] |= (float_buf[2] << (16-9)) & 0xff80;
+        float_buf[2] >>= 9; float_buf[2] |= (float_buf[1] << (16-9)) & 0xff80;
+        float_buf[1] >>= 9; float_buf[1] |= (float_buf[0] << (16-9)) & 0xff80;
+        float_buf[0] >>= 9;
+        float_buf[0] |= float_bin_exponent << 7;
+        DUMP;
+        /*
+         * float_bin_exponent is included because of the location of
+         * FLTBEX ;BINARY EXPONENT (MUST PRECEED FLTBUF)
+         */
+
+        /* Round (this is optional, really) */
+        if (1) {
+            if (size < 4) {
+                round = float_buf[size] & 0x8000;
+            }
+            /* The rounding bit is the lesser significant bit that's just
+             * outside the returned result. If we round, we add it to the
+             * returned value.
+             *
+             * If there is a carry-out of the mantissa, it gets added to
+             * the exponent (increasing it by 1).
+             *
+             * If that also overflows, we truely have overflow.
+             */
+
+            if (round) {
+                for (int i = size - 1 ; round && i >= 0; i--) {
+                    float_buf[i]++;     /* 5$ */
+                    round = float_buf[i] & 0x10000;      /* carry */
+                    DF("round, i=%d: ", i); DUMP;
+                }
+            }
+            DF("After rounding\n");
+            DUMP;
+        }
+
+        if (float_buf[0] & 0x8000) {
+            // 6$ error T: exponent overflow
+            printf("Error T\n");
+            return 0;
+        }
+
+        /* 7$ */
+        float_buf[0] |= float_sign;
+        DF("Put in float_sign: "); DUMP;
+    }
+
+    /* Now put together the result from the parts */
+    flt[0] = float_buf[0] & 0xFFFF;
+    if (size > 1) {
+        flt[1] = float_buf[1] & 0xFFFF;
+        if (size > 2) {
+            flt[2] = float_buf[2] & 0xFFFF;
+            flt[3] = float_buf[3] & 0xFFFF;
+        }
+    }
+
+    return 1;
+}
+
+static void
+mult64to128(uint64_t op1, uint64_t op2, uint64_t *hi, uint64_t *lo)
+{
+#if defined(__SIZEOF_INT128__) && __SIZEOF_INT128__ >= 16
+    __uint128_t result = (__uint128_t)op1 * op2;
+    *lo = result;
+    *hi = result >> 64;
+#else
+    uint64_t u1 = (op1 & 0xffffffff);
+    uint64_t v1 = (op2 & 0xffffffff);
+    uint64_t t = (u1 * v1);
+    uint64_t w3 = (t & 0xffffffff);
+    uint64_t k = (t >> 32);
+
+    op1 >>= 32;
+    t = (op1 * v1) + k;
+    k = (t & 0xffffffff);
+    uint64_t w1 = (t >> 32);
+
+    op2 >>= 32;
+    t = (u1 * op2) + k;
+    k = (t >> 32);
+
+    *hi = (op1 * op2) + w1 + k;
+    *lo = (t << 32) + w3;
+#endif
+}
+
+#define DUMP3 DF("exp: %d. %d  %016llx\n", \
+                    float_dec_exponent, float_bin_exponent, float_buf)
+
+int parse_float_m3(
+    char *cp,
+    int size,
+    unsigned *flt)
+{
+    int float_sign = 0;
+    int float_dot = 0;
+    int float_dec_exponent = 0;
+    int float_bin_exponent = 0;
+    uint64_t float_buf = 0;
+
+    float_bin_exponent = 65;
+
+    if (*cp == '+') {
+        cp++;
+    } else if (*cp == '-') {
+        float_sign = 1;
+        cp++;
+    }
+    DF("float_sign: %d\n", float_sign);
+
+    for (;;) {
+        if (isdigit(*cp)) {
+            /* Can we multiply by 10? */
+            DF("digit: %c\n", *cp);
+            if (float_buf & 0xF800000000000000ULL) { /* [0] & 0174000, 0xF800 */
+                /* No, that would overflow */
+                float_dec_exponent++; /* no, compensate for the snub */
+                /*
+                 * Explanation of the above comment:
+                 * - after the decimal separator, we should ignore extra digits
+                 *   completely. Since float_dot == -1, the exponent will be
+                 *   decremented below, and compensate for that here.
+                 * - before the decimal separator, we don't add the digit
+                 *   (which would overflow) so we lose some lesser significant
+                 *   bits. float_dot == 0 in this case, and we do want the
+                 *   exponent increased to compensate for the ignored digit.
+                 * So in both cases the right thing happens.
+                 */
+            } else {
+                /* Multiply by 10 */
+                float_buf *= 10;
+                /* Add digit */
+                float_buf += *cp - '0';
+            }
+            float_dec_exponent -= float_dot;
+            DUMP3;
+            cp++;
+        } else if (*cp == '.') {
+            DF("dot: %c\n", *cp);
+            if (float_dot) {
+                // error...
+                printf("Error: repeated decimal separator\n");
+                return 0;
+            }
+            float_dot = 1;
+            cp++;
+        } else {
+            DF("Other char: %c\n", *cp);
+            break;
+        }
+    }
+
+    if (toupper(*cp) == 'E') {
+        cp++;
+        int exp = strtol(cp, &cp, 10);
+
+        float_dec_exponent += exp;
+        DF("E%d -> dec_exp %d\n", exp, float_dec_exponent);
+    }
+
+    /* FLTG3 */
+    if (float_buf) {
+        /* Non-zero */
+        DF("Non-zero: decimal exponent: %d\n", float_dec_exponent);
+        while (float_dec_exponent > 0) { /* 31$ */
+            DUMP3;
+            if (float_buf <= 0x3316000000000000ULL) { /* 031426, 13078, 0x3316, 65390 / 5 */
+                /* Multiply by 5 and 2 */
+                DF("Multiply by 5 and 2\n");
+                float_buf *= 5;
+                float_bin_exponent++;
+                DUMP3;
+            } else {
+                /* Multiply by 5/4 and 8 32$ */
+                DF("Multiply by 5/4 and 8\n");
+                if (float_buf >= 0xCCCC000000000000ULL) {
+                    float_buf >>= 1;
+                    float_bin_exponent++;
+                }
+                float_buf += (float_buf >> 2);
+                float_bin_exponent += 3;
+                DUMP3;
+            }
+            float_dec_exponent--;
+        }
+
+        while (float_dec_exponent < 0) { /* 41$ ish */
+            DUMP3;
+            DF("Prepare for division by left-shifting the bits\n");
+            /* Prepare for division by left-shifting the bits */
+            while (((float_buf >> 63) & 1) == 0) {     /* 41$ */
+                float_bin_exponent--;                  /* 40$ */
+                float_buf <<= 1;
+                DUMP3;
+            }
+
+            /* Divide by 2 */
+            float_buf >>= 1;
+
+            uint64_t float_save = float_buf;
+            DUMP3;
+            DF("float_save: %016llx\n", float_save);
+
+            /* Divide by 5:
+             * multiplying by (2**66 - 4) / 5 = 0xCCCCCCCCCCCCCCCC
+             * while dropping the 64 least significant bits.
+             * This is nearly exact but exact enough?
+             * Somehow there is another doubling in here, so the
+             * final result = start * 8 / 5.
+             */
+            for (int i = 16 * 2; i > 0; i--) {
+                if ((i & 1) == 0) {             /* 42$ */
+                    float_buf >>= 2;
+                }
+                float_buf >>= 1;                /* 43$ */
+                float_buf += float_save;
+                DF("Loop i=%2d: ", i); DUMP3;
+            }
+#if 1
+            { /* Method 3 */
+                uint64_t hi, lo;
+                mult64to128(float_save, 0x9999999999999999ULL, &hi, &lo);
+                /*
+                 * Really multiply with the 65-bit number
+                 *                     0x19999999999999999
+                 *                     1 1001 1001 1001 ...
+                 * which is 8 *          0011 0011 0011 ... aka 0x333...
+                 * which is (2**64 - 1) / 5.
+                 */
+                uint64_t result = hi + float_save;
+                if (result == float_buf) {
+                    printf("Same               333 loop and *9999: %016llx vs %016llx\n", float_buf, result);
+                    printf("                              was    : %016llx\n", float_save);
+                } else {
+                    printf("Difference between 333 loop and *3333: %016llx vs %016llx\n", float_buf, result);
+                    printf("                                     : %016llx    %016llx\n", hi, lo);
+                    printf("                              was    : %016llx\n", float_save);
+                }
+            }
+#endif
+#if 1   /* Try other methods to calculate the same thing more directly */
+            {
+                uint64_t result = float_save / 5 * 8;
+                /* Try to fill in some of the lesser significant bits */
+                int round = float_save % 5;
+                if (round) {
+                    result += round * 8 / 5;
+                } else {
+                    //result--;
+                }
+# if 0
+                if (result == float_buf) {
+                    printf("Same               333 loop and /5 *8: %016llx vs %016llx\n", float_buf, result);
+                } else {
+                    printf("Difference between 333 loop and /5 *8: %016llx vs %016llx\n", float_buf, result);
+                    printf("                             was     : %016llx    %d %d\n", float_buf, float_save % 20, (float_save % 20) * 2 / 5);
+                }
+# endif
+                float_buf = result;
+                printf("after / 5 * 8: ");
+                DUMP3;
+            }
+#endif
+#if 0
+            {
+                __uint128_t big = (__uint128_t)float_save << 3;
+
+                uint64_t result = big / 5;
+
+                if (float_save % 5 == 0)
+                    result--;
+
+                if (result == float_buf) {
+                    printf("Same               333 loop and *8 /5: %016llx vs %016llx\n", float_buf, result);
+                } else {
+                    printf("Difference between 333 loop and *8 /5: %016llx vs %016llx\n", float_buf, result);
+                }
+
+                /*
+                 * Rounding is slightly different, in particular for start
+                 * values that are multiples of 5 but some other cases too.
+                 */
+            }
+#endif
+
+            /* It's not simply dividing by 5, it also multiplies by 8,
+             * so we need to adjust the exponent here. */
+            float_bin_exponent -= 3;
+            float_dec_exponent++;
+            DUMP3;
+        }
+
+        /* Normalize the mantissa: shift a single 1 out to the left */
+        DF("Normalize the mantissa: shift a single 1 out to the left\n");
+        int carry;
+        do {
+            /* FLTG5 */
+            float_bin_exponent--;
+            carry = (float_buf >> 63) & 1;
+            float_buf <<= 1;
+            DUMP3;
+        } while (carry == 0);
+
+        /* Set excess 128. */
+        DF("Set excess 128.\n");
+        float_bin_exponent += 0200;
+        DUMP3;
+
+        if (float_bin_exponent & 0xFF00) {
+            /* Error N. Underflow. 2$ */
+            printf("Error N (underflow)\n");
+            return 0;
+        }
+
+        /* Shift right 9 positions to make room for sign and exponent 3$ */
+        DF("Shift right 9 positions to make room for sign and exponent\n");
+        int round = (float_buf >> 8) & 0x0001;
+        float_buf >>= 9;
+        float_buf |= (uint64_t)float_bin_exponent << (64-9);
+        DUMP3;
+
+        /*
+         * This rounding step seems always needed to make the result the same
+         * as the implementation with long doubles.
+         * This may be because of the slight imprecision of the divisions by 10?
+         * However there is something to be said for the argument that when you
+         * want a 1 or 2 word result, rounding twice is wrong.
+         * The reference implementation omits this rounding for size != 4.
+         */
+        float_buf += round;
+        DF("round: size = 4, round = %d\n", round);
+
+        /* Round (there is a truncation option to omit this step) */
+        if (1) {
+            if (size < 4) {
+                /* 1 << 31 or 1 << 47 */
+                uint64_t onehalf = 1ULL << ((16 * (4-size)) - 1);
+                DUMP3;
+                DF("round: size = %d, onehalf = %016llx\n", size, onehalf);
+                float_buf += onehalf;
+                DUMP3;
+
+                /* The rounding bit is the lesser significant bit that's just
+                 * outside the returned result. If we round, we add it to the
+                 * returned value.
+                 *
+                 * If there is a carry-out of the mantissa, it gets added to
+                 * the exponent (increasing it by 1).
+                 *
+                 * If that also overflows, we truely have overflow.
+                 */
+            }
+
+            DF("After rounding\n");
+            DUMP3;
+        }
+
+        if (float_buf & 0x8000000000000000ULL) {
+            // 6$ error T: exponent overflow
+            printf("Error T: exponent overflow\n");
+            return 0;
+        }
+
+        /* 7$ */
+        float_buf |= (uint64_t)float_sign << 63;
+        DF("Put in float_sign: "); DUMP3;
+    }
+
+    /* Now put together the result from the parts */
+    flt[0] = (float_buf >> 48) & 0xFFFF;
+    if (size > 1) {
+        flt[1] = (float_buf >> 32) & 0xFFFF;
+        if (size > 2) {
+            flt[2] = (float_buf >> 16) & 0xFFFF;
+            flt[3] = (float_buf >>  0) & 0xFFFF;
+        }
+    }
+
+    return 1;
+}
+
+void
+test_one(char *input, unsigned expected0, unsigned expected1, unsigned expected2, unsigned expected3)
+{
+    unsigned result[4];
+    unsigned result2[4];
+
+    printf("------------------------\n");
+    parse_float_m3(input, 4, result);
+
+    if (result[0] != expected0 ||
+        result[1] != expected1 ||
+        result[2] != expected2 ||
+        result[3] != expected3) {
+        printf("Unexpected result: %04x %04x %04x %04x  from %s\n", result[0], result[1], result[2], result[3], input);
+        printf("  expected       : %04x %04x %04x %04x\n", expected0, expected1, expected2, expected3);
+        printflt(result, 4);
+    }
+
+#if 0
+    parse_float_m2(input, 4, result2);
+    if (result2[0] != expected0 ||
+        result2[1] != expected1 ||
+        result2[2] != expected2 ||
+        result2[3] != expected3) {
+        printf("Unexpected result2: %04x %04x %04x %04x from %s\n", result2[0], result2[1], result2[2], result2[3], input);
+        printf("  expected        : %04x %04x %04x %04x\n", expected0, expected1, expected2, expected3);
+        printflt(result2, 4);
+    }
+#endif
+}
+
+int
+main(int argc, char **argv)
+{
+    /* Should print 64 */
+    DF("LDBL_MANT_DIG: %d\n", LDBL_MANT_DIG);
+
+    test_one("100E-2", 040200, 0000000, 0000000, 0000000);
+#if 1
+    test_one("10000000000E-10", 040200, 0000000, 0000000, 0000000);
+    test_one("1.0", 040200, 0000000, 0000000, 0000000);
+    test_one("0.1", 037314, 0146314, 0146314, 0146315);
+    test_one("1.0E5",  0044303, 0050000, 0000000, 0000000);
+    test_one("1.0E10", 0050425, 0001371, 0000000, 0000000);
+    test_one("1.0E20", 0060655, 0074353, 0142654, 0061000);
+    test_one("1.0E30", 0071111, 0171311, 0146404, 0063517);
+    test_one("1.0E38", 0077626, 0073231, 0050265, 0006611);
+    test_one("1.701411834604692307e+38", 077777, 0177777, 0177777, 0177777);
+#endif
+    test_one("0.994140625", 0040176, 0100000, 000000, 000000);
+    //                         407e     8000    0000    0000
+#if 1
+    test_one("0.998046875", 0040177, 0100000, 000000, 000000);
+    test_one("1.00390625",  0040200, 0100000, 000000, 000000);
+    test_one("1.01171875",  0040201, 0100000, 000000, 000000);
+    test_one("0.999999910593032836914062", 0040177, 0177776, 0100000, 0000000);
+    test_one("0.999999970197677612304687", 0040177, 0177777, 0100000, 0000000);
+    test_one("1.00000005960464477539062", 0040200, 0000000, 0100000, 0000000);
+    test_one("1.000000178813934326171875", 0040200, 0000001, 0100000, 0000000);
+    test_one("1.0000000000000000138777878078144567552953958511353", 0040200, 0000000, 0000000, 0000000);
+    test_one("1.0000000000000000416333634234433702658861875534058", 0040200, 0000000, 0000000, 0000001);
+    test_one("0.99999999999999997918331828827831486705690622329712", 0040177, 0177777, 0177777, 0177776);
+    test_one("0.99999999999999999306110609609277162235230207443237", 0040177, 0177777, 0177777, 0177777);
+    //test_one("", 0, 0, 0, 0);
+    test_one("6.66666", 040725, 052507, 055061, 0122276);
+    test_one("170141183460469230846243588177825628160", 0100000, 0000000, 0000000, 0000000);    // T error
+    test_one("170141183460469230564930741053754966016", 0077777, 0177777, 0177777, 0177777);
+    //test_one("3.1415926535897932384626433", 0, 0, 0, 0);
+    //test_one("", 0, 0, 0, 0);
+    //
+    /* First a number that should just fit in a 56 bit mantissa. */
+    /* 1 << 56 */
+    test_one("72057594037927936", 056200, 000000, 000000, 000000);
+    /* A bit more requires more precision, so they are rounded. */
+    test_one("72057594037927937", 056200, 000000, 000000, 000001);
+    test_one("72057594037927938", 056200, 000000, 000000, 000001);
+
+    /* Lower numbers should all be represented exactly */
+    test_one("72057594037927935", 056177, 0177777, 0177777, 0177777);
+    test_one("72057594037927934", 056177, 0177777, 0177777, 0177776);
+    test_one("72057594037927933", 056177, 0177777, 0177777, 0177775);
+
+    /* 1 << 57 should also be exactly representable */
+    test_one("144115188075855872", 056400, 000000, 000000, 000000);
+
+    /* 1 less lacks one significant bit so will be rounded up */
+    test_one("144115188075855871", 056400, 000000, 000000, 000000);
+
+    /* Same for 1 more, rounded down */
+    test_one("144115188075855873", 056400, 000000, 000000, 000000);
+
+    /* but 2 more should show up in the lsb */
+    /* This one seems most clearly problematic */
+    test_one("144115188075855874", 056400, 000000, 000000, 000001);
+
+    // Some numbers around some of the magic numbers in the parser
+    test_one("3681129745421959167", 0057514, 0054000, 0, 0);
+    test_one("3681129745421959168", 0057514, 0054000, 0, 0); // 0x3316000000000000
+    test_one("3681129745421959169", 0057514, 0054000, 0, 0);
+    test_one("3681129745421959170", 0057514, 0054000, 0, 0);
+
+    test_one("14757170078986272767", 0060114, 0146000, 0000000, 0000000);
+    test_one("14757170078986272768", 0060114, 0146000, 0000000, 0000000); // 0xCCCC000000000000
+    test_one("14757170078986272769", 0060114, 0146000, 0000000, 0000000);
+    test_one("14757170078986272780", 0060114, 0146000, 0000000, 0000000);
+
+#endif
+
+    return 0;
+}