Add some more tests; rounding of 56-bit mantissa now works.

2026-03-01 17:37:04 +00:00 · 2021-01-19 18:12:13 +01:00
parent 4f2f813dee
commit 90943eaf49
3 changed files with 177 additions and 53 deletions
--- a/parse.c
+++ b/parse.c
@@ -318,7 +318,9 @@ int get_mode(
    return TRUE;
 }

-#if DEBUG
+#define DEBUG_FLOAT     0
+#if DEBUG_FLOAT
+
 void
 printflt(unsigned *flt, int size)
 {
@@ -328,11 +330,15 @@ printflt(unsigned *flt, int size)
    printf("ufrac: %02x",   flt[0] & 0x007F);

    for (int i = 1; i < size; i++) {
-	printf(" %04x", flt[i]);
+        printf(" %04x", flt[i]);
    }

    printf("\n");
 }
+
+#define DF(x)   printf x
+#else
+#define DF(x)
 #endif

 /*
@@ -392,10 +398,13 @@ int parse_float(
    int             sexp;       /* Signed exponent */
    unsigned        uexp;       /* Unsigned excess-128 exponent */
    unsigned        sign = 0;   /* Sign mask */
+    unsigned        round = 0;  /* Rounding bit */

    i = sscanf(cp, SCANF_FMT "%n", &d, &n);
    if (i == 0)
        return 0;                      /* Wasn't able to convert */
+    DF(("LDBL_MANT_DIG: %d\n", LDBL_MANT_DIG));
+    DF(("%Lf input: %s", d, cp));

    cp += n;
    if (endp)
@@ -409,11 +418,13 @@ int parse_float(
    }

    frac = FREXP(d, &sexp);            /* Separate into exponent and mantissa */
+    DF(("frac: %Lf %La sexp: %d\n", frac, frac, sexp));
    if (sexp < -127 || sexp > 127)
        return 0;                      /* Exponent out of range. */

    uexp = sexp + 128;                  /* Make excess-128 mode */
    uexp &= 0xff;                       /* express in 8 bits */
+    DF(("uexp: %02x\n", uexp));

    /*
     * frexp guarantees its fractional return value is
@@ -435,34 +446,41 @@ int parse_float(
     * However the bit immediately above the MSB is always 1
     *  because the value is normalized.  So it's actually
     *  8 bits, 24 bits, or 56 bits.
-     * We multiply the fractional part of our value by
+     * We effectively multiply the fractional part of our value by
     *  2**56 to fully expose all of those bits (including
     *  the MSB which is 1).
+     * However as an intermediate step, we really multiply by
+     *  2**57, so we get one lsb for possible later rounding
+     *  purposes. After that, we divide by 2 again.
     */

-
-    /* The following big literal is 2 to the 56th power: */
-    ufrac = (uint64_t) (frac * 72057594037927936.0);     /* Align fraction bits */
+    /* The following big literal is 2 to the 57th power: */
+    ufrac = (uint64_t) (frac * 144115188075855872.0); /* Align fraction bits */
+    round = ufrac & 1;
+    ufrac >>= 1;
+    DF(("ufrac: %016lx  round: %d\n", ufrac, round));
+    DF(("56   : %016lx\n", (1UL<<56) - 1));

    /* ufrac is now >= 2**55 and < 2**56 */

    /* Round from FLT4 to FLT2 or single-word float.
     *
-     * +--+--------+-------+
-     * |15|14     7|6     0|
-     * +--+--------+-------+
-     * | S|EEEEEEEE|MMMMMMM|
-     * +--+--------+-------+
+     * +--+--------+-------+ +--------+--------+
+     * |15|14     7|6     0| |15      |       0|
+     * +--+--------+-------+ +--------+--------+
+     * | S|EEEEEEEE|MMMMMMM| |MMMMMMMM|MMMMMMMM| ...maybe 2 more words...
+     * +--+--------+-------+ +--------+--------+
     *  Sign (1 bit)
     *     Exponent (8 bits)
     *              Mantissa (7 bits)
     */
    if (size < 4) {
-        /* Round to nearest 8- or 24- bit approximation */
        /*
         * Round to nearest 8- or 24- bit approximation.
+         *
         * Method: if the msb of the bits that are chopped off is set, add 1 to
         * the lsb of the remaining bits (one more leftward).
+         *
         * Simplification: adding 1 to that msb will carry into the lsb. And if
         * that msb is unset, then that will not carry. So we can always do that
         * addition.
@@ -470,12 +488,23 @@ int parse_float(
         */
        int bits_chopped_off = 16 * (4 - size);
        uint64_t msb_chopped_off = 1ULL << (bits_chopped_off - 1);
+        DF(("msb_chopped_off = %16lx\n", msb_chopped_off));
        ufrac += msb_chopped_off;
+        DF(("ufrac: %016lx after rounding\n", ufrac));

        if ((ufrac >> 56) > 0) {       /* Overflow? */
            ufrac >>= 1;               /* Normalize */
            uexp++;
+            DF(("ufrac: %016lx  uexp: %02x\n", ufrac, uexp));
        }
+    } else {
+        /*
+         * Round to 56-bit approximation.
+         *
+         * The method is the same as above, with 'round' as the msb of the
+         * chopped off bits.
+         */
+        ufrac += round;
    }

    flt[0] = (unsigned) (sign | (uexp << 7) | ((ufrac >> 48) & 0x7F));
--- a/tests/test-float.lst.ok
+++ b/tests/test-float.lst.ok
@@ -1,42 +1,94 @@
       1                                ;;;;;
       2                                ;
-       3                                ; Test floating point numbers
-       4                                
-       5 000000 040200                          .word   ^F 1.0          ; 040200
-       6 000002 140200                          .word   ^F-1.0          ; 140200
-       7 000004 137600                          .word   -^F 1.0         ; 137600
-       8 000006 037600                          .word   -^F-1.0         ; 037600
-       9                                
-      10 000010 040706                          .word   ^F6.2           ; 040706
-      11 000012 137071                          .word   ^C^F6.2         ; 137071
-      12 000014 137071                          .word   ^C<^F6.2>       ; 137071
-      13                                
-      14 000016 040706  063146                  .flt2   6.2             ; 040706 063146
-      15 000022 040706  063146  063146          .flt4   6.2             ; 040706 063146 063146 063146
+       3                                ; Test floating point numbers.
+       4                                ;
+       5                                ; The values in the comments are the reference values from MACRO V05.05.
+       6                                ;
+       7 000000 040200                          .word   ^F 1.0          ; 040200
+       8 000002 140200                          .word   ^F-1.0          ; 140200
+       9 000004 137600                          .word   -^F 1.0         ; 137600
+      10 000006 037600                          .word   -^F-1.0         ; 037600
+      11                                
+      12 000010 040706                          .word   ^F6.2           ; 040706
+      13 000012 137071                          .word   ^C^F6.2         ; 137071
+      14 000014 137071                          .word   ^C<^F6.2>       ; 137071
+      15                                
+      16 000016 040706  063146                  .flt2   6.2             ; 040706 063146
+      17 000022 040706  063146  063146          .flt4   6.2             ; 040706 063146 063146 063146
         000030 063146                  
-      16                                
-      17 000032 040300  000000                  .flt2   1.5             ; 040300 000000
-      18 000036 040300  000000  000000          .flt4   1.5             ; 040300 000000 000000 000000
+      18                                
+      19 000032 040300  000000                  .flt2   1.5             ; 040300 000000
+      20 000036 040300  000000  000000          .flt4   1.5             ; 040300 000000 000000 000000
         000044 000000                  
-      19                                
-      20 000046 056200                          .word   ^F 72057594037927935  ; 056200 (rounded!)
-      21 000050 056200  000000                  .flt2   72057594037927935     ; 056200 000000 (rounded!)
-      22 000054 056177  177777  177777          .flt4   72057594037927935     ; 056177 177777 177777 177777 (exact!)
-         000062 177777                  
-      23                                
-      24 000064 056200                          .word   ^F 72057594037927936  ; 056200
-      25 000066 056200  000000                  .flt2   72057594037927936     ; 056200 000000
-      26 000072 056200  000000  000000          .flt4   72057594037927936     ; 056200 000000 000000 000000
-         000100 000000                  
-      27                                
-      28 000102 056200  000000  000000          .flt4   72057594037927937     ; 056200 000000 000000 000001
-         000110 000000                  
-      29                                
-      30 000112 056400  000000  000000          .flt4   144115188075855873    ; 1 << 57 +1
-         000120 000000                  
-      31                                                                      ; 056400 000000 000000 000000 (rounded!)
-      32                                
+      21                                
+      22 000046 040511                          .word   ^F 3.1415926535897932384626433  ; 040511
+      23 000050 040511  007733                  .flt2   3.1415926535897932384626433     ; 040511 007733
+      24 000054 040511  007732  121041          .flt4   3.1415926535897932384626433     ; 040511 007732 121041 064302
+         000062 064302                  
+      25                                
+      26                                        ; Test some large numbers at the edge of the exactly representable
+      27                                        ; integers.
+      28                                        ; 1 << 56       just barely fits: it uses 57 bits but the lsb is 0 so
+      29                                        ;               when it is cut off, we don't notice.
+      30                                        ; 1 << 56 + 1 R the lsb are 01 and is cut off.
+      31                                        ; 1 << 56 + 2   the lsb are 10 and the missing 0 goes unnoticed.
      32                                
+      33                                        ; 1 << 56 - 1   consists of 56 1 bits. This value and all smaller ints
+      34                                        ; 1 << 56 - 2   get represented exactly.
+      35                                
+      36                                        ; Going up, to rounding steps of 4:
+      37                                        ;
+      38                                        ; 1 << 57
+      39                                        ; 1 << 57 + 4   next higher available representation
+      40                                        ; 1 << 57 + 8   next higher available representation
+      41                                
+      42                                                                      ; 1 << 56 - 3
+      43 000064 056177  177777  177777          .flt4   72057594037927933     ; 056177 177777 177777 177775
+         000072 177775                  
+      44                                                                      ; 1 << 56 - 2
+      45 000074 056177  177777  177777          .flt4   72057594037927934     ; 056177 177777 177777 177776
+         000102 177776                  
+      46                                
+      47                                                                      ; 1 << 56 - 1
+      48 000104 056200                          .word   ^F 72057594037927935  ; 056200 (rounded!)
+      49 000106 056200  000000                  .flt2   72057594037927935     ; 056200 000000 (rounded!)
+      50 000112 056177  177777  177777          .flt4   72057594037927935     ; 056177 177777 177777 177777
+         000120 177777                  
+      51                                
+      52                                                                      ; 1 << 56
+      53 000122 056200                          .word   ^F 72057594037927936  ; 056200
+      54 000124 056200  000000                  .flt2   72057594037927936     ; 056200 000000
+      55 000130 056200  000000  000000          .flt4   72057594037927936     ; 056200 000000 000000 000000
+         000136 000000                  
+      56                                
+      57 000140 056200  000000  000000          .flt4   72057594037927938     ; 1 << 56 + 2
+         000146 000001                  
+      58                                                                      ; 056200 000000 000000 000001
+      59                                
+      60 000150 056400  000000  000000          .flt4   144115188075855872    ; 1 << 57
+         000156 000000                  
+      61                                                                      ; 056400 000000 000000 000000
+      62 000160 056400  000000  000000          .flt4   144115188075855873    ; 1 << 57 + 1
+         000166 000000                  
+      63                                                                      ; 056400 000000 000000 000000 (inexact)
+      64 000170 056400  000000  000000          .flt4   144115188075855874    ; 1 << 57 + 2
+         000176 000001                  
+      65                                                                      ; 056400 000000 000000 000001 (inexact)
+      66 000200 056400  000000  000000          .flt4   144115188075855875    ; 1 << 57 + 3
+         000206 000001                  
+      67                                                                      ; 056400 000000 000000 000001 (inexact)
+      68 000210 056400  000000  000000          .flt4   144115188075855876    ; 1 << 57 + 4
+         000216 000001                  
+      69                                                                      ; 056400 000000 000000 000001 (exact!)
+      70 000220 056400  000000  000000          .flt4   144115188075855880    ; 1 << 57 + 8
+         000226 000002                  
+      71                                                                      ; 056400 000000 000000 000002 (exact!)
+      72                                
+      73                                        ; This one triggers rounding up (round == 1)
+      74 000230 040725  052507  055061          .flt4   6.66666         ; 040725 052507 055061 122276
+         000236 122276                  
+      75                                
+      75                                


 Symbol table
@@ -47,4 +99,4 @@ Symbol table
 Program sections:

 . ABS.  000000    000   (RW,I,GBL,ABS,OVR,NOSAV)
-        000122    001   (RW,I,LCL,REL,CON,NOSAV)
+        000240    001   (RW,I,LCL,REL,CON,NOSAV)
--- a/tests/test-float.mac
+++ b/tests/test-float.mac
@@ -1,7 +1,9 @@
 ;;;;;
 ;
-; Test floating point numbers
-
+; Test floating point numbers.
+;
+; The values in the comments are the reference values from MACRO V05.05.
+;
        .word   ^F 1.0          ; 040200
        .word   ^F-1.0          ; 140200
        .word   -^F 1.0         ; 137600
@@ -17,16 +19,57 @@
        .flt2   1.5             ; 040300 000000
        .flt4   1.5             ; 040300 000000 000000 000000

+        .word   ^F 3.1415926535897932384626433  ; 040511
+        .flt2   3.1415926535897932384626433     ; 040511 007733
+        .flt4   3.1415926535897932384626433     ; 040511 007732 121041 064302
+
+        ; Test some large numbers at the edge of the exactly representable
+        ; integers.
+        ; 1 << 56       just barely fits: it uses 57 bits but the lsb is 0 so
+        ;               when it is cut off, we don't notice.
+        ; 1 << 56 + 1 R the lsb are 01 and is cut off.
+        ; 1 << 56 + 2   the lsb are 10 and the missing 0 goes unnoticed.
+
+        ; 1 << 56 - 1   consists of 56 1 bits. This value and all smaller ints
+        ; 1 << 56 - 2   get represented exactly.
+
+        ; Going up, to rounding steps of 4:
+        ;
+        ; 1 << 57
+        ; 1 << 57 + 4   next higher available representation
+        ; 1 << 57 + 8   next higher available representation
+
+                                      ; 1 << 56 - 3
+        .flt4   72057594037927933     ; 056177 177777 177777 177775
+                                      ; 1 << 56 - 2
+        .flt4   72057594037927934     ; 056177 177777 177777 177776
+
+                                      ; 1 << 56 - 1
        .word   ^F 72057594037927935  ; 056200 (rounded!)
        .flt2   72057594037927935     ; 056200 000000 (rounded!)
-        .flt4   72057594037927935     ; 056177 177777 177777 177777 (exact!)
+        .flt4   72057594037927935     ; 056177 177777 177777 177777

+                                      ; 1 << 56
        .word   ^F 72057594037927936  ; 056200
        .flt2   72057594037927936     ; 056200 000000
        .flt4   72057594037927936     ; 056200 000000 000000 000000

-        .flt4   72057594037927937     ; 056200 000000 000000 000001
+        .flt4   72057594037927938     ; 1 << 56 + 2
+                                      ; 056200 000000 000000 000001

-        .flt4   144115188075855873    ; 1 << 57 +1
-                                      ; 056400 000000 000000 000000 (rounded!)
+        .flt4   144115188075855872    ; 1 << 57
+                                      ; 056400 000000 000000 000000
+        .flt4   144115188075855873    ; 1 << 57 + 1
+                                      ; 056400 000000 000000 000000 (inexact)
+        .flt4   144115188075855874    ; 1 << 57 + 2
+                                      ; 056400 000000 000000 000001 (inexact)
+        .flt4   144115188075855875    ; 1 << 57 + 3
+                                      ; 056400 000000 000000 000001 (inexact)
+        .flt4   144115188075855876    ; 1 << 57 + 4
+                                      ; 056400 000000 000000 000001 (exact!)
+        .flt4   144115188075855880    ; 1 << 57 + 8
+                                      ; 056400 000000 000000 000002 (exact!)
+
+        ; This one triggers rounding up (round == 1)
+        .flt4   6.66666         ; 040725 052507 055061 122276