first commit

2026-04-25 11:41:39 +00:00 · 2020-10-31 09:26:56 +01:00
commit e2f86a9c1a
21 changed files with 29695 additions and 0 deletions
--- a/40
+++ b/40
@@ -0,0 +1,40 @@
+EXENAME          = emulator
+
+MAINFILES        = emulator.c
+MUSASHIFILES     = m68kcpu.c softfloat/softfloat.c 
+MUSASHIGENCFILES = m68kops.c
+MUSASHIGENHFILES = m68kops.h
+MUSASHIGENERATOR = m68kmake
+
+# EXE = .exe
+# EXEPATH = .\\
+EXE =
+EXEPATH = ./
+
+.CFILES   = $(MAINFILES) $(MUSASHIFILES) $(MUSASHIGENCFILES)
+.OFILES   = $(.CFILES:%.c=%.o)
+
+CC        = gcc
+WARNINGS  = -Wall -Wextra -pedantic
+CFLAGS    = $(WARNINGS) -march=armv7 -O3
+LFLAGS    = $(WARNINGS) 
+
+TARGET = $(EXENAME)$(EXE)
+
+DELETEFILES = $(MUSASHIGENCFILES) $(MUSASHIGENHFILES) $(.OFILES) $(TARGET) $(MUSASHIGENERATOR)$(EXE)
+
+
+all: $(TARGET)
+
+clean:
+	rm -f $(DELETEFILES)
+
+
+$(TARGET): $(MUSASHIGENHFILES) $(.OFILES) Makefile
+	$(CC) -o $@ $(.OFILES) -O3 -pthread $(LFLAGS) -lm
+
+$(MUSASHIGENCFILES) $(MUSASHIGENHFILES): $(MUSASHIGENERATOR)$(EXE)
+	$(EXEPATH)$(MUSASHIGENERATOR)$(EXE)
+
+$(MUSASHIGENERATOR)$(EXE):  $(MUSASHIGENERATOR).c
+	$(CC) -o  $(MUSASHIGENERATOR)$(EXE)  $(MUSASHIGENERATOR).c
--- a/README.md
+++ b/README.md
@@ -0,0 +1 @@
+# pistorm
--- a/emulator.c
+++ b/emulator.c
@@ -0,0 +1,674 @@
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <dirent.h>
+#include <fcntl.h>
+#include <assert.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <stdint.h>
+#include <sched.h>
+#include "m68k.h"
+#include "main.h"
+#include<pthread.h>
+
+
+//#define BCM2708_PERI_BASE        0x20000000  //pi0-1
+#define BCM2708_PERI_BASE       0x3F000000     //pi3
+#define BCM2708_PERI_SIZE       0x01000000
+#define GPIO_BASE               (BCM2708_PERI_BASE + 0x200000) /* GPIO controller */
+#define GPCLK_BASE              (BCM2708_PERI_BASE + 0x101000)
+#define GPIO_ADDR                0x200000 /* GPIO controller */
+#define GPCLK_ADDR               0x101000
+#define CLK_PASSWD      0x5a000000
+#define CLK_GP0_CTL     0x070
+#define CLK_GP0_DIV     0x074
+
+#define SA0 5
+#define SA1 3
+#define SA2 2
+
+#define STATUSREGADDR    GPIO_CLR = 1<<SA0;GPIO_CLR = 1<<SA1;GPIO_SET = 1<<SA2;
+#define W16              GPIO_CLR = 1<<SA0;GPIO_CLR = 1<<SA1;GPIO_CLR = 1<<SA2;
+#define R16              GPIO_SET = 1<<SA0;GPIO_CLR = 1<<SA1;GPIO_CLR = 1<<SA2;
+#define W8               GPIO_CLR = 1<<SA0;GPIO_SET = 1<<SA1;GPIO_CLR = 1<<SA2;
+#define R8               GPIO_SET = 1<<SA0;GPIO_SET = 1<<SA1;GPIO_CLR = 1<<SA2;
+
+#define PAGE_SIZE (4*1024)
+#define BLOCK_SIZE (4*1024)
+
+#define GPIOSET(no, ishigh)           \
+do {                                  \
+        if (ishigh)                   \
+                set |= (1 << (no));   \
+        else                          \
+                reset |= (1 << (no)); \
+} while (0)
+
+
+#define MAX_RAM 0xFFFFFF
+
+
+/* Read/write macros */
+#define READ_BYTE(BASE, ADDR) (BASE)[ADDR]
+#define READ_WORD(BASE, ADDR) (((BASE)[ADDR]<<8) |			\
+							  (BASE)[(ADDR)+1])
+#define READ_LONG(BASE, ADDR) (((BASE)[ADDR]<<24) |			\
+							  ((BASE)[(ADDR)+1]<<16) |		\
+							  ((BASE)[(ADDR)+2]<<8) |		\
+							  (BASE)[(ADDR)+3])
+
+#define WRITE_BYTE(BASE, ADDR, VAL) (BASE)[ADDR] = (VAL)&0xff
+#define WRITE_WORD(BASE, ADDR, VAL) (BASE)[ADDR] = ((VAL)>>8) & 0xff;		\
+									(BASE)[(ADDR)+1] = (VAL)&0xff
+#define WRITE_LONG(BASE, ADDR, VAL) (BASE)[ADDR] = ((VAL)>>24) & 0xff;		\
+									(BASE)[(ADDR)+1] = ((VAL)>>16)&0xff;	\
+									(BASE)[(ADDR)+2] = ((VAL)>>8)&0xff;		\
+									(BASE)[(ADDR)+3] = (VAL)&0xff
+
+
+
+int  mem_fd;
+int  mem_fd_gpclk;
+void *gpio_map;
+void *gpclk_map;
+
+// I/O access
+volatile unsigned int *gpio;
+volatile unsigned int *gpclk;
+volatile unsigned int gpfsel0;
+volatile unsigned int gpfsel1;
+volatile unsigned int gpfsel2;
+volatile unsigned int gpfsel0_o;
+volatile unsigned int gpfsel1_o;
+volatile unsigned int gpfsel2_o;
+
+// GPIO setup macros. Always use INP_GPIO(x) before using OUT_GPIO(x) or SET_GPIO_ALT(x,y)
+#define INP_GPIO(g) *(gpio+((g)/10)) &= ~(7<<(((g)%10)*3))
+#define OUT_GPIO(g) *(gpio+((g)/10)) |=  (1<<(((g)%10)*3))
+#define SET_GPIO_ALT(g,a) *(gpio+(((g)/10))) |= (((a)<=3?(a)+4:(a)==4?3:2)<<(((g)%10)*3))
+
+#define GPIO_SET *(gpio+7)  // sets   bits which are 1 ignores bits which are 0
+#define GPIO_CLR *(gpio+10) // clears bits which are 1 ignores bits which are 0
+
+#define GET_GPIO(g) (*(gpio+13)&(1<<g)) // 0 if LOW, (1<<g) if HIGH
+
+#define GPIO_PULL *(gpio+37) // Pull up/pull down
+#define GPIO_PULLCLK0 *(gpio+38) // Pull up/pull down clock
+
+
+void setup_io();
+
+uint32_t read8(uint32_t address);
+void write8(uint32_t address, uint32_t data);
+
+uint32_t read16(uint32_t address);
+void write16(uint32_t address, uint32_t data);
+
+void write32(uint32_t address, uint32_t data);
+uint32_t read32(uint32_t address);
+
+uint16_t read_reg(void);
+void write_reg(unsigned int value);
+
+volatile uint16_t srdata;
+volatile uint32_t srdata2;
+volatile uint32_t srdata2_old;
+
+
+unsigned char g_ram[MAX_RAM+1];                 /* RAM */
+unsigned char toggle;
+
+
+void* iplThread(void *args){ 
+
+printf("thread!/n");
+//srdata2_old = read_reg();
+//toggle = 0;
+
+while(42){
+//printf("thread!/n");
+  if (GET_GPIO(1) == 0){
+                  srdata = read_reg();
+                  if (srdata != srdata2_old){
+                        srdata2 = ((srdata >> 13)&0xff);
+                        //printf("STATUS: %d\n", srdata2);
+                        srdata2_old = srdata;
+                        m68k_set_irq(srdata2);
+                        toggle = 1;
+                        }
+                } else {
+                        if (toggle != 0){
+                         srdata = read_reg();
+                        srdata2 = ((srdata >> 13)&0xff);
+                        srdata2_old = srdata;
+                        m68k_set_irq(srdata2);
+                        //printf("STATUS: 0\n");
+                        toggle = 0;
+                        }
+                }
+
+	usleep(1);
+	}
+}
+
+
+int main() {
+
+
+int g;
+
+
+const struct sched_param priority = {99};
+
+    sched_setscheduler(0, SCHED_RR , &priority);
+    printf("YES locked in memory\n");
+    mlockall(MCL_CURRENT); // lock in memory to keep us from paging out
+
+
+  setup_io();
+
+  //Enable 200MHz CLK output on GPIO4, adjust divider and pll source depending on pi model
+  printf("Enable GPCLK0 on GPIO4\n");
+
+        *(gpclk+ (CLK_GP0_CTL/4)) =  CLK_PASSWD | (1 << 5);
+        usleep(10);
+        while ( (*(gpclk+(CLK_GP0_CTL/4))) & (1 << 7));
+        usleep(100);
+        *(gpclk+(CLK_GP0_DIV/4)) =  CLK_PASSWD | (6 << 12); //divider , 6=200MHz on pi3
+        usleep(10);
+        *(gpclk+(CLK_GP0_CTL/4)) =   CLK_PASSWD | 5 | (1 << 4); //pll? 6=plld, 5=pllc
+        usleep(10);
+        while (((*(gpclk+(CLK_GP0_CTL/4))) & (1 << 7))== 0);
+        usleep(100);
+
+    SET_GPIO_ALT(4,0); //gpclk0
+
+   //set SA to output
+    INP_GPIO(2);
+    OUT_GPIO(2);
+    INP_GPIO(3);
+    OUT_GPIO(3);
+    INP_GPIO(5);
+    OUT_GPIO(5);
+
+  //set gpio0 (aux0) and gpio1 (aux1) to input
+  INP_GPIO(0);
+  INP_GPIO(1);
+
+  // Set GPIO pins 6,7 and 8-23 to output
+  for (g=6; g<=23; g++)
+  {
+    INP_GPIO(g);
+    OUT_GPIO(g);
+  }
+     printf ("Precalculate GPIO8-23 aus Output\n");
+     gpfsel0_o =*(gpio); //store gpio ddr
+     printf ("gpfsel0: %#x\n", gpfsel0_o);
+     gpfsel1_o =*(gpio+1); //store gpio ddr
+     printf ("gpfsel1: %#x\n", gpfsel1_o);
+     gpfsel2_o =*(gpio+2); //store gpio ddr
+     printf ("gpfsel2: %#x\n", gpfsel2_o);
+
+  // Set GPIO pins 8-23 to input
+  for (g=8; g<=23; g++)
+  {
+    INP_GPIO(g);
+  }
+     printf ("Precalculate GPIO8-23 as Input\n");
+     gpfsel0 =*(gpio); //store gpio ddr
+     printf ("gpfsel0: %#x\n", gpfsel0);
+     gpfsel1 =*(gpio+1); //store gpio ddr
+     printf ("gpfsel1: %#x\n", gpfsel1);
+     gpfsel2 =*(gpio+2); //store gpio ddr
+     printf ("gpfsel2: %#x\n", gpfsel2);
+
+ GPIO_CLR = 1<<2;
+ GPIO_CLR = 1<<3;
+ GPIO_SET = 1<<5;
+
+ GPIO_SET = 1<<6;
+ GPIO_SET = 1<<7;
+
+ //reset cpld statemachine first
+
+ write_reg(0x01);
+ usleep(100);
+ write_reg(0x00);
+ usleep(100);
+
+
+ write8(0xbfe201,0x0001); //AMIGA OVL
+ write8(0xbfe001,0x0001); //AMIGA OVL high (ROM@0x0)
+
+ usleep(1000);
+
+	m68k_init();
+	m68k_set_cpu_type(M68K_CPU_TYPE_68040);
+	m68k_pulse_reset();
+	srdata2_old = read_reg();
+	toggle = 0;
+
+/*
+    pthread_t id;
+    int err;
+
+        //err = pthread_create(&id, NULL, &iplThread, NULL);
+        if (err != 0)
+            printf("\ncan't create IPL thread :[%s]", strerror(err));
+        else
+            printf("\n IPL Thread created successfully\n");
+*/
+	m68k_pulse_reset();
+	while(42) {
+
+		m68k_execute(600);
+		//usleep(1);
+
+		if (GET_GPIO(1) == 0){
+		  srdata = read_reg();
+		  if (srdata != srdata2_old){
+                        srdata2 = ((srdata >> 13)&0xff);
+                        //printf("STATUS: %d\n", srdata2);
+                        srdata2_old = srdata;
+                        m68k_set_irq(srdata2);
+			toggle = 1;
+                        }
+		} else {
+			if (toggle != 0){
+			 srdata = read_reg();
+			srdata2 = ((srdata >> 13)&0xff);
+			srdata2_old = srdata;
+			m68k_set_irq(srdata2);
+			 //printf("STATUS: 0\n");
+			toggle = 0;
+			}
+		}
+
+
+	}
+
+	return 0;
+}
+
+
+
+void cpu_pulse_reset(void){
+
+        usleep(10000);
+}
+
+
+
+
+int cpu_irq_ack(int level)
+{
+    printf("cpu irq ack\n");
+    return level;
+}
+
+
+
+unsigned int  m68k_read_memory_8(unsigned int address){
+
+        if(address>0x07FFFFFF){
+        return g_ram[address- 0x07FFFFFF];
+        }
+
+        return read8((uint32_t)address);
+}
+
+unsigned int  m68k_read_memory_16(unsigned int address){
+
+        if(address>0x07FFFFFF){
+        uint16_t value = *(uint16_t*)&g_ram[address- 0x07FFFFFF];
+        value = (value << 8) | (value >> 8);
+	return value;
+        }
+        return (unsigned int)read16((uint32_t)address);
+}
+
+unsigned int  m68k_read_memory_32(unsigned int address){
+
+
+ 	if(address>0x07FFFFFF){
+	uint32_t value = *(uint32_t*)&g_ram[address- 0x07FFFFFF];
+        value = ((value << 8) & 0xFF00FF00 ) | ((value >> 8) & 0xFF00FF );
+        return value << 16 | value >> 16;
+        }
+
+        uint16_t a = read16(address);
+        uint16_t b = read16(address+2);
+	return (a << 16) | b;
+}
+
+void m68k_write_memory_8(unsigned int address, unsigned int value){
+
+
+      if(address>0x07FFFFFF){
+	g_ram[address- 0x07FFFFFF] = value;
+        return;
+        }
+
+	write8((uint32_t)address,value);
+	return;
+}
+
+void m68k_write_memory_16(unsigned int address, unsigned int value){
+//        if (address==0xdff030) printf("%c", value);
+
+      if(address>0x07FFFFFF){
+	uint16_t* dest = (uint16_t*)&g_ram[address- 0x07FFFFFF];
+    	value = (value << 8) | (value >> 8);
+    	*dest = value;
+        return;
+        }
+
+        write16((uint32_t)address,value);
+	return;
+}
+
+void m68k_write_memory_32(unsigned int address, unsigned int value){
+
+
+        if(address>0x07FFFFFF){
+	   uint32_t* dest = (uint32_t*)&g_ram[address- 0x07FFFFFF];
+           value = ((value << 8) & 0xFF00FF00 ) | ((value >> 8) & 0xFF00FF );
+           value = value << 16 | value >> 16;
+           *dest = value;
+        return;
+        }
+
+	write16(address , value >> 16);
+	write16(address+2 , value );
+	return;
+}
+
+void write32(uint32_t address, uint32_t data){
+        write16(address+2 , data);
+        write16(address , data >>16 );
+}
+
+uint32_t read32(uint32_t address){
+        uint16_t a = read16(address+2);
+        uint16_t b = read16(address);
+        return (a>>16)|b;
+}
+
+
+void write16(uint32_t address, uint32_t data)
+{
+        asm volatile ("dmb" ::: "memory");
+        W16
+//        asm volatile ("nop" ::);
+//        asm volatile ("nop" ::);
+//        asm volatile ("nop" ::);
+        //write phase
+        *(gpio) = gpfsel0_o;
+        *(gpio + 1) = gpfsel1_o;
+        *(gpio + 2) = gpfsel2_o;
+
+        *(gpio + 7) = (address & 0x0000ffff) << 8;
+        *(gpio + 10) = (~address & 0x0000ffff) << 8;
+        GPIO_CLR = 1 << 7;
+//        GPIO_CLR = 1 << 7; //delay
+        GPIO_SET = 1 << 7;
+
+        *(gpio + 7) = (address >> 16) << 8;
+        *(gpio + 10) = (~address >> 16) << 8;
+        GPIO_CLR = 1 << 7;
+//        GPIO_CLR = 1 << 7; //delay
+        GPIO_SET = 1 << 7;
+
+        //write phase
+        *(gpio + 7) = (data & 0x0000ffff) << 8;
+        *(gpio + 10) = (~data & 0x0000ffff) << 8;
+        GPIO_CLR = 1 << 7;
+//        GPIO_CLR = 1 << 7; //delay
+        GPIO_SET = 1 << 7;
+
+        *(gpio) = gpfsel0;
+        *(gpio + 1) = gpfsel1;
+        *(gpio + 2) = gpfsel2;
+        while ((GET_GPIO(0)));
+
+        asm volatile ("dmb" ::: "memory");
+}
+
+
+void write8(uint32_t address, uint32_t data)
+{
+
+        if ((address & 1) == 0)
+            data = data + (data << 8); //EVEN, A0=0,UDS
+        else data = data & 0xff ; //ODD , A0=1,LDS
+
+        asm volatile ("dmb" ::: "memory");
+        W8
+//        asm volatile ("nop" ::);
+//        asm volatile ("nop" ::);
+//        asm volatile ("nop" ::);
+        //write phase
+        *(gpio) = gpfsel0_o;
+        *(gpio + 1) = gpfsel1_o;
+        *(gpio + 2) = gpfsel2_o;
+
+        *(gpio + 7) = (address & 0x0000ffff) << 8;
+        *(gpio + 10) = (~address & 0x0000ffff) << 8;
+        GPIO_CLR = 1 << 7;
+//        GPIO_CLR = 1 << 7; //delay
+        GPIO_SET = 1 << 7;
+
+        *(gpio + 7) = (address >> 16) << 8;
+        *(gpio + 10) = (~address >> 16) << 8;
+        GPIO_CLR = 1 << 7;
+//        GPIO_CLR = 1 << 7; //delay
+        GPIO_SET = 1 << 7;
+
+        //write phase
+        *(gpio + 7) = (data & 0x0000ffff) << 8;
+        *(gpio + 10) = (~data & 0x0000ffff) << 8;
+        GPIO_CLR = 1 << 7;
+//        GPIO_CLR = 1 << 7; //delay
+        GPIO_SET = 1 << 7;
+
+        *(gpio) = gpfsel0;
+        *(gpio + 1) = gpfsel1;
+        *(gpio + 2) = gpfsel2;
+        while ((GET_GPIO(0)));
+
+        asm volatile ("dmb" ::: "memory");
+}
+
+
+uint32_t read16(uint32_t address)
+{
+        volatile int val;
+//      while ((GET_GPIO(0)));
+        asm volatile ("dmb" ::: "memory");
+        R16
+//        asm volatile ("nop" ::);
+//        asm volatile ("nop" ::);
+//        asm volatile ("nop" ::);
+        //write phase
+        *(gpio) = gpfsel0_o;
+        *(gpio + 1) = gpfsel1_o;
+        *(gpio + 2) = gpfsel2_o;
+
+        val = address;// & 0x0000FFFF;
+        *(gpio + 7) = (val & 0xffff) << 8;
+        *(gpio + 10) = (~val & 0xffff) << 8;
+
+        GPIO_CLR = 1 << 7;
+//        GPIO_CLR = 1 << 7; //delay
+        GPIO_SET = 1 << 7;
+
+        val = address >> 16;
+        *(gpio + 7) = (val & 0xffff) << 8;
+        *(gpio + 10) = (~val & 0xffff) << 8;
+        GPIO_CLR = 1 << 7;
+//        GPIO_CLR = 1 << 7; //delay
+        GPIO_SET = 1 << 7;
+
+        //read phase
+
+        *(gpio) = gpfsel0;
+        *(gpio + 1) = gpfsel1;
+        *(gpio + 2) = gpfsel2;
+
+        GPIO_CLR = 1 << 6;
+        while (!(GET_GPIO(0)));
+        GPIO_CLR = 1 << 6;
+        val = *(gpio + 13);
+        GPIO_SET = 1 << 6;
+        asm volatile ("dmb" ::: "memory");
+        return (val >>8)&0xffff;
+}
+
+
+uint32_t read8(uint32_t address)
+{
+        int val;
+//      while ((GET_GPIO(0)));
+        asm volatile ("dmb" ::: "memory");
+        R8
+//        asm volatile ("nop" ::);
+//        asm volatile ("nop" ::);
+//        asm volatile ("nop" ::);
+        //write phase
+        *(gpio) = gpfsel0_o;
+        *(gpio + 1) = gpfsel1_o;
+        *(gpio + 2) = gpfsel2_o;
+
+        val = address;// & 0x0000FFFF;
+        *(gpio + 7) = (val & 0xffff) << 8;
+        *(gpio + 10) = (~val & 0xffff) << 8;
+
+        GPIO_CLR = 1 << 7;
+//        GPIO_CLR = 1 << 7; //delay
+        GPIO_SET = 1 << 7;
+
+        val = address >> 16;
+        *(gpio + 7) = (val & 0xffff) << 8;
+        *(gpio + 10) = (~val & 0xffff) << 8;
+        GPIO_CLR = 1 << 7;
+//        GPIO_CLR = 1 << 7; //delay
+        GPIO_SET = 1 << 7;
+
+        //read phase
+
+        *(gpio) = gpfsel0;
+        *(gpio + 1) = gpfsel1;
+        *(gpio + 2) = gpfsel2;
+
+        GPIO_CLR = 1 << 6;
+        while (!(GET_GPIO(0)));
+        GPIO_CLR = 1 << 6;
+        val = *(gpio + 13);
+        GPIO_SET = 1 << 6;
+        asm volatile ("dmb" ::: "memory");
+//        return (val >>8)&0xffff;
+
+	val = (val >>8)&0xffff;
+        if ((address & 1) == 0)
+            val = (val >> 8) & 0xff ; //EVEN, A0=0,UDS
+        else
+            val = val & 0xff ; //ODD , A0=1,LDS
+	return val;
+}
+
+
+
+/******************************************************/
+
+void write_reg(unsigned int value)
+{
+        asm volatile ("dmb" ::: "memory");
+        STATUSREGADDR
+        asm volatile ("nop" ::);
+        asm volatile ("nop" ::);
+        asm volatile ("nop" ::);
+        //Write Status register
+        GPIO_CLR = 1 << SA0;
+        GPIO_CLR = 1 << SA1;
+        GPIO_SET = 1 << SA2;
+
+        *(gpio) = gpfsel0_o;
+        *(gpio + 1) = gpfsel1_o;
+        *(gpio + 2) = gpfsel2_o;
+        *(gpio + 7) = (value & 0xffff) << 8;
+        *(gpio + 10) = (~value & 0xffff) << 8;
+        GPIO_CLR = 1 << 7;
+        GPIO_CLR = 1 << 7; //delay
+        GPIO_SET = 1 << 7;
+	GPIO_SET = 1 << 7;
+        //Bus HIGH-Z
+        *(gpio) = gpfsel0;
+        *(gpio + 1) = gpfsel1;
+        *(gpio + 2) = gpfsel2;
+        asm volatile ("dmb" ::: "memory");
+}
+
+
+uint16_t read_reg(void)
+{
+        uint32_t val;
+
+        asm volatile ("dmb" ::: "memory");
+        STATUSREGADDR
+        asm volatile ("nop" ::);
+        asm volatile ("nop" ::);
+        asm volatile ("nop" ::);
+        //Bus HIGH-Z
+        *(gpio) = gpfsel0;
+        *(gpio + 1) = gpfsel1;
+        *(gpio + 2) = gpfsel2;
+
+        GPIO_CLR = 1 << 6;
+        GPIO_CLR = 1 << 6;      //delay
+	GPIO_CLR = 1 << 6;
+	GPIO_CLR = 1 << 6;
+        val = *(gpio + 13);
+        GPIO_SET = 1 << 6;
+        asm volatile ("dmb" ::: "memory");
+
+        return (uint16_t)(val >> 8);
+}
+
+
+//
+// Set up a memory regions to access GPIO
+//
+void setup_io()
+{
+   /* open /dev/mem */
+   if ((mem_fd = open("/dev/mem", O_RDWR|O_SYNC) ) < 0) {
+      printf("can't open /dev/mem \n");
+      exit(-1);
+   }
+
+   /* mmap GPIO */
+   gpio_map = mmap(
+      NULL,             //Any adddress in our space will do
+      BCM2708_PERI_SIZE,       //Map length
+      PROT_READ|PROT_WRITE,// Enable reading & writting to mapped memory
+      MAP_SHARED,       //Shared with other processes
+      mem_fd,           //File to map
+      BCM2708_PERI_BASE //Offset to GPIO peripheral
+   );
+
+   close(mem_fd); //No need to keep mem_fd open after mmap
+
+   if (gpio_map == MAP_FAILED) {
+      printf("gpio mmap error %d\n", (int)gpio_map);//errno also set!
+      exit(-1);
+   }
+
+   gpio = ((volatile unsigned *)gpio_map) + GPIO_ADDR/4;
+   gpclk = ((volatile unsigned *)gpio_map) + GPCLK_ADDR/4;
+
+
+} // setup_io
+
--- a/m68k.h
+++ b/m68k.h
@@ -0,0 +1,411 @@
+/* ======================================================================== */
+/* ========================= LICENSING & COPYRIGHT ======================== */
+/* ======================================================================== */
+/*
+ *                                  MUSASHI
+ *                                Version 3.32
+ *
+ * A portable Motorola M680x0 processor emulation engine.
+ * Copyright Karl Stenerud.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef M68K__HEADER
+#define M68K__HEADER
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifndef ARRAY_LENGTH
+#define ARRAY_LENGTH(x)         (sizeof(x) / sizeof(x[0]))
+#endif
+
+#ifndef FALSE
+#define FALSE 0
+#define TRUE 1
+#endif
+
+/* ======================================================================== */
+/* ============================= CONFIGURATION ============================ */
+/* ======================================================================== */
+
+/* Import the configuration for this build */
+#ifdef MUSASHI_CNF
+#include MUSASHI_CNF
+#else
+#include "m68kconf.h"
+#endif
+
+/* ======================================================================== */
+/* ============================ GENERAL DEFINES =========================== */
+
+/* ======================================================================== */
+
+/* There are 7 levels of interrupt to the 68K.
+ * A transition from < 7 to 7 will cause a non-maskable interrupt (NMI).
+ */
+#define M68K_IRQ_NONE 0
+#define M68K_IRQ_1    1
+#define M68K_IRQ_2    2
+#define M68K_IRQ_3    3
+#define M68K_IRQ_4    4
+#define M68K_IRQ_5    5
+#define M68K_IRQ_6    6
+#define M68K_IRQ_7    7
+
+
+/* Special interrupt acknowledge values.
+ * Use these as special returns from the interrupt acknowledge callback
+ * (specified later in this header).
+ */
+
+/* Causes an interrupt autovector (0x18 + interrupt level) to be taken.
+ * This happens in a real 68K if VPA or AVEC is asserted during an interrupt
+ * acknowledge cycle instead of DTACK.
+ */
+#define M68K_INT_ACK_AUTOVECTOR    0xffffffff
+
+/* Causes the spurious interrupt vector (0x18) to be taken
+ * This happens in a real 68K if BERR is asserted during the interrupt
+ * acknowledge cycle (i.e. no devices responded to the acknowledge).
+ */
+#define M68K_INT_ACK_SPURIOUS      0xfffffffe
+
+
+/* CPU types for use in m68k_set_cpu_type() */
+enum
+{
+	M68K_CPU_TYPE_INVALID,
+	M68K_CPU_TYPE_68000,
+	M68K_CPU_TYPE_68010,
+	M68K_CPU_TYPE_68EC020,
+	M68K_CPU_TYPE_68020,
+	M68K_CPU_TYPE_68EC030,
+	M68K_CPU_TYPE_68030,
+	M68K_CPU_TYPE_68EC040,
+	M68K_CPU_TYPE_68LC040,
+	M68K_CPU_TYPE_68040,
+	M68K_CPU_TYPE_SCC68070
+};
+
+/* Registers used by m68k_get_reg() and m68k_set_reg() */
+typedef enum
+{
+	/* Real registers */
+	M68K_REG_D0,		/* Data registers */
+	M68K_REG_D1,
+	M68K_REG_D2,
+	M68K_REG_D3,
+	M68K_REG_D4,
+	M68K_REG_D5,
+	M68K_REG_D6,
+	M68K_REG_D7,
+	M68K_REG_A0,		/* Address registers */
+	M68K_REG_A1,
+	M68K_REG_A2,
+	M68K_REG_A3,
+	M68K_REG_A4,
+	M68K_REG_A5,
+	M68K_REG_A6,
+	M68K_REG_A7,
+	M68K_REG_PC,		/* Program Counter */
+	M68K_REG_SR,		/* Status Register */
+	M68K_REG_SP,		/* The current Stack Pointer (located in A7) */
+	M68K_REG_USP,		/* User Stack Pointer */
+	M68K_REG_ISP,		/* Interrupt Stack Pointer */
+	M68K_REG_MSP,		/* Master Stack Pointer */
+	M68K_REG_SFC,		/* Source Function Code */
+	M68K_REG_DFC,		/* Destination Function Code */
+	M68K_REG_VBR,		/* Vector Base Register */
+	M68K_REG_CACR,		/* Cache Control Register */
+	M68K_REG_CAAR,		/* Cache Address Register */
+
+	/* Assumed registers */
+	/* These are cheat registers which emulate the 1-longword prefetch
+	 * present in the 68000 and 68010.
+	 */
+	M68K_REG_PREF_ADDR,	/* Last prefetch address */
+	M68K_REG_PREF_DATA,	/* Last prefetch data */
+
+	/* Convenience registers */
+	M68K_REG_PPC,		/* Previous value in the program counter */
+	M68K_REG_IR,		/* Instruction register */
+	M68K_REG_CPU_TYPE	/* Type of CPU being run */
+} m68k_register_t;
+
+/* ======================================================================== */
+/* ====================== FUNCTIONS CALLED BY THE CPU ===================== */
+/* ======================================================================== */
+
+/* You will have to implement these functions */
+
+/* read/write functions called by the CPU to access memory.
+ * while values used are 32 bits, only the appropriate number
+ * of bits are relevant (i.e. in write_memory_8, only the lower 8 bits
+ * of value should be written to memory).
+ *
+ * NOTE: I have separated the immediate and PC-relative memory fetches
+ *       from the other memory fetches because some systems require
+ *       differentiation between PROGRAM and DATA fetches (usually
+ *       for security setups such as encryption).
+ *       This separation can either be achieved by setting
+ *       M68K_SEPARATE_READS in m68kconf.h and defining
+ *       the read functions, or by setting M68K_EMULATE_FC and
+ *       making a function code callback function.
+ *       Using the callback offers better emulation coverage
+ *       because you can also monitor whether the CPU is in SYSTEM or
+ *       USER mode, but it is also slower.
+ */
+
+/* Read from anywhere */
+unsigned int  m68k_read_memory_8(unsigned int address);
+unsigned int  m68k_read_memory_16(unsigned int address);
+unsigned int  m68k_read_memory_32(unsigned int address);
+
+/* Read data immediately following the PC */
+unsigned int  m68k_read_immediate_16(unsigned int address);
+unsigned int  m68k_read_immediate_32(unsigned int address);
+
+/* Read data relative to the PC */
+unsigned int  m68k_read_pcrelative_8(unsigned int address);
+unsigned int  m68k_read_pcrelative_16(unsigned int address);
+unsigned int  m68k_read_pcrelative_32(unsigned int address);
+
+/* Memory access for the disassembler */
+unsigned int m68k_read_disassembler_8  (unsigned int address);
+unsigned int m68k_read_disassembler_16 (unsigned int address);
+unsigned int m68k_read_disassembler_32 (unsigned int address);
+
+/* Write to anywhere */
+void m68k_write_memory_8(unsigned int address, unsigned int value);
+void m68k_write_memory_16(unsigned int address, unsigned int value);
+void m68k_write_memory_32(unsigned int address, unsigned int value);
+
+/* Special call to simulate undocumented 68k behavior when move.l with a
+ * predecrement destination mode is executed.
+ * To simulate real 68k behavior, first write the high word to
+ * [address+2], and then write the low word to [address].
+ *
+ * Enable this functionality with M68K_SIMULATE_PD_WRITES in m68kconf.h.
+ */
+void m68k_write_memory_32_pd(unsigned int address, unsigned int value);
+
+
+
+/* ======================================================================== */
+/* ============================== CALLBACKS =============================== */
+/* ======================================================================== */
+
+/* These functions allow you to set callbacks to the host when specific events
+ * occur.  Note that you must enable the corresponding value in m68kconf.h
+ * in order for these to do anything useful.
+ * Note: I have defined default callbacks which are used if you have enabled
+ * the corresponding #define in m68kconf.h but either haven't assigned a
+ * callback or have assigned a callback of NULL.
+ */
+
+/* Set the callback for an interrupt acknowledge.
+ * You must enable M68K_EMULATE_INT_ACK in m68kconf.h.
+ * The CPU will call the callback with the interrupt level being acknowledged.
+ * The host program must return either a vector from 0x02-0xff, or one of the
+ * special interrupt acknowledge values specified earlier in this header.
+ * If this is not implemented, the CPU will always assume an autovectored
+ * interrupt, and will automatically clear the interrupt request when it
+ * services the interrupt.
+ * Default behavior: return M68K_INT_ACK_AUTOVECTOR.
+ */
+void m68k_set_int_ack_callback(int  (*callback)(int int_level));
+
+
+/* Set the callback for a breakpoint acknowledge (68010+).
+ * You must enable M68K_EMULATE_BKPT_ACK in m68kconf.h.
+ * The CPU will call the callback with whatever was in the data field of the
+ * BKPT instruction for 68020+, or 0 for 68010.
+ * Default behavior: do nothing.
+ */
+void m68k_set_bkpt_ack_callback(void (*callback)(unsigned int data));
+
+
+/* Set the callback for the RESET instruction.
+ * You must enable M68K_EMULATE_RESET in m68kconf.h.
+ * The CPU calls this callback every time it encounters a RESET instruction.
+ * Default behavior: do nothing.
+ */
+void m68k_set_reset_instr_callback(void  (*callback)(void));
+
+
+/* Set the callback for informing of a large PC change.
+ * You must enable M68K_MONITOR_PC in m68kconf.h.
+ * The CPU calls this callback with the new PC value every time the PC changes
+ * by a large value (currently set for changes by longwords).
+ * Default behavior: do nothing.
+ */
+void m68k_set_pc_changed_callback(void  (*callback)(unsigned int new_pc));
+
+/* Set the callback for the TAS instruction.
+ * You must enable M68K_TAS_HAS_CALLBACK in m68kconf.h.
+ * The CPU calls this callback every time it encounters a TAS instruction.
+ * Default behavior: return 1, allow writeback.
+ */
+void m68k_set_tas_instr_callback(int  (*callback)(void));
+
+/* Set the callback for illegal instructions.
+ * You must enable M68K_ILLG_HAS_CALLBACK in m68kconf.h.
+ * The CPU calls this callback every time it encounters an illegal instruction
+ * which must return 1 if it handles the instruction normally or 0 if it's really an illegal instruction.
+ * Default behavior: return 0, exception will occur.
+ */
+void m68k_set_illg_instr_callback(int  (*callback)(int));
+
+/* Set the callback for CPU function code changes.
+ * You must enable M68K_EMULATE_FC in m68kconf.h.
+ * The CPU calls this callback with the function code before every memory
+ * access to set the CPU's function code according to what kind of memory
+ * access it is (supervisor/user, program/data and such).
+ * Default behavior: do nothing.
+ */
+void m68k_set_fc_callback(void  (*callback)(unsigned int new_fc));
+
+
+/* Set a callback for the instruction cycle of the CPU.
+ * You must enable M68K_INSTRUCTION_HOOK in m68kconf.h.
+ * The CPU calls this callback just before fetching the opcode in the
+ * instruction cycle.
+ * Default behavior: do nothing.
+ */
+void m68k_set_instr_hook_callback(void  (*callback)(unsigned int pc));
+
+
+
+/* ======================================================================== */
+/* ====================== FUNCTIONS TO ACCESS THE CPU ===================== */
+/* ======================================================================== */
+
+/* Use this function to set the CPU type you want to emulate.
+ * Currently supported types are: M68K_CPU_TYPE_68000, M68K_CPU_TYPE_68010,
+ * M68K_CPU_TYPE_EC020, and M68K_CPU_TYPE_68020.
+ */
+void m68k_set_cpu_type(unsigned int cpu_type);
+
+/* Do whatever initialisations the core requires.  Should be called
+ * at least once at init time.
+ */
+void m68k_init(void);
+
+/* Pulse the RESET pin on the CPU.
+ * You *MUST* reset the CPU at least once to initialize the emulation
+ * Note: If you didn't call m68k_set_cpu_type() before resetting
+ *       the CPU for the first time, the CPU will be set to
+ *       M68K_CPU_TYPE_68000.
+ */
+void m68k_pulse_reset(void);
+
+/* execute num_cycles worth of instructions.  returns number of cycles used */
+int m68k_execute(int num_cycles);
+
+/* These functions let you read/write/modify the number of cycles left to run
+ * while m68k_execute() is running.
+ * These are useful if the 68k accesses a memory-mapped port on another device
+ * that requires immediate processing by another CPU.
+ */
+int m68k_cycles_run(void);              /* Number of cycles run so far */
+int m68k_cycles_remaining(void);        /* Number of cycles left */
+void m68k_modify_timeslice(int cycles); /* Modify cycles left */
+void m68k_end_timeslice(void);          /* End timeslice now */
+
+/* Set the IPL0-IPL2 pins on the CPU (IRQ).
+ * A transition from < 7 to 7 will cause a non-maskable interrupt (NMI).
+ * Setting IRQ to 0 will clear an interrupt request.
+ */
+void m68k_set_irq(unsigned int int_level);
+
+/* Set the virtual irq lines, where the highest level
+ * active line is automatically selected.  If you use this function,
+ * do not use m68k_set_irq.
+ */
+void m68k_set_virq(unsigned int level, unsigned int active);
+unsigned int m68k_get_virq(unsigned int level);
+
+/* Halt the CPU as if you pulsed the HALT pin. */
+void m68k_pulse_halt(void);
+
+
+/* Trigger a bus error exception */
+void m68k_pulse_bus_error(void);
+
+
+/* Context switching to allow multiple CPUs */
+
+/* Get the size of the cpu context in bytes */
+unsigned int m68k_context_size(void);
+
+/* Get a cpu context */
+unsigned int m68k_get_context(void* dst);
+
+/* set the current cpu context */
+void m68k_set_context(void* dst);
+
+/* Register the CPU state information */
+void m68k_state_register(const char *type, int index);
+
+
+/* Peek at the internals of a CPU context.  This can either be a context
+ * retrieved using m68k_get_context() or the currently running context.
+ * If context is NULL, the currently running CPU context will be used.
+ */
+unsigned int m68k_get_reg(void* context, m68k_register_t reg);
+
+/* Poke values into the internals of the currently running CPU context */
+void m68k_set_reg(m68k_register_t reg, unsigned int value);
+
+/* Check if an instruction is valid for the specified CPU type */
+unsigned int m68k_is_valid_instruction(unsigned int instruction, unsigned int cpu_type);
+
+/* Disassemble 1 instruction using the epecified CPU type at pc.  Stores
+ * disassembly in str_buff and returns the size of the instruction in bytes.
+ */
+unsigned int m68k_disassemble(char* str_buff, unsigned int pc, unsigned int cpu_type);
+
+/* Same as above but accepts raw opcode data directly rather than fetching
+ * via the read/write interfaces.
+ */
+unsigned int m68k_disassemble_raw(char* str_buff, unsigned int pc, const unsigned char* opdata, const unsigned char* argdata, unsigned int cpu_type);
+
+
+/* ======================================================================== */
+/* ============================== MAME STUFF ============================== */
+/* ======================================================================== */
+
+#if M68K_COMPILE_FOR_MAME == OPT_ON
+#include "m68kmame.h"
+#endif /* M68K_COMPILE_FOR_MAME */
+
+
+/* ======================================================================== */
+/* ============================== END OF FILE ============================= */
+/* ======================================================================== */
+
+#ifdef __cplusplus
+}
+#endif
+
+
+#endif /* M68K__HEADER */
--- a/m68k_in.c
+++ b/m68k_in.c
--- a/m68kconf.h
+++ b/m68kconf.h
@@ -0,0 +1,221 @@
+/* ======================================================================== */
+/* ========================= LICENSING & COPYRIGHT ======================== */
+/* ======================================================================== */
+/*
+ *                                  MUSASHI
+ *                                Version 3.32
+ *
+ * A portable Motorola M680x0 processor emulation engine.
+ * Copyright Karl Stenerud.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+
+
+#ifndef M68KCONF__HEADER
+#define M68KCONF__HEADER
+
+
+/* Configuration switches.
+ * Use OPT_SPECIFY_HANDLER for configuration options that allow callbacks.
+ * OPT_SPECIFY_HANDLER causes the core to link directly to the function
+ * or macro you specify, rather than using callback functions whose pointer
+ * must be passed in using m68k_set_xxx_callback().
+ */
+#define OPT_OFF             0
+#define OPT_ON              1
+#define OPT_SPECIFY_HANDLER 2
+
+
+/* ======================================================================== */
+/* ============================== MAME STUFF ============================== */
+/* ======================================================================== */
+
+/* If you're compiling this for MAME, only change M68K_COMPILE_FOR_MAME
+ * to OPT_ON and use m68kmame.h to configure the 68k core.
+ */
+#ifndef M68K_COMPILE_FOR_MAME
+#define M68K_COMPILE_FOR_MAME      OPT_OFF
+#endif /* M68K_COMPILE_FOR_MAME */
+
+
+#if M68K_COMPILE_FOR_MAME == OPT_OFF
+
+
+/* ======================================================================== */
+/* ============================= CONFIGURATION ============================ */
+/* ======================================================================== */
+
+/* Turn ON if you want to use the following M68K variants */
+#define M68K_EMULATE_010            OPT_ON
+#define M68K_EMULATE_EC020          OPT_ON
+#define M68K_EMULATE_020            OPT_ON
+#define M68K_EMULATE_040            OPT_ON
+
+
+/* If ON, the CPU will call m68k_read_immediate_xx() for immediate addressing
+ * and m68k_read_pcrelative_xx() for PC-relative addressing.
+ * If off, all read requests from the CPU will be redirected to m68k_read_xx()
+ */
+#define M68K_SEPARATE_READS         OPT_OFF
+
+/* If ON, the CPU will call m68k_write_32_pd() when it executes move.l with a
+ * predecrement destination EA mode instead of m68k_write_32().
+ * To simulate real 68k behavior, m68k_write_32_pd() must first write the high
+ * word to [address+2], and then write the low word to [address].
+ */
+#define M68K_SIMULATE_PD_WRITES     OPT_OFF
+
+/* If ON, CPU will call the interrupt acknowledge callback when it services an
+ * interrupt.
+ * If off, all interrupts will be autovectored and all interrupt requests will
+ * auto-clear when the interrupt is serviced.
+ */
+#define M68K_EMULATE_INT_ACK        OPT_OFF
+#define M68K_INT_ACK_CALLBACK(A)    cpu_irq_ack(A)
+
+
+/* If ON, CPU will call the breakpoint acknowledge callback when it encounters
+ * a breakpoint instruction and it is running a 68010+.
+ */
+#define M68K_EMULATE_BKPT_ACK       OPT_OFF
+#define M68K_BKPT_ACK_CALLBACK()    your_bkpt_ack_handler_function()
+
+
+/* If ON, the CPU will monitor the trace flags and take trace exceptions
+ */
+#define M68K_EMULATE_TRACE          OPT_OFF
+
+
+/* If ON, CPU will call the output reset callback when it encounters a reset
+ * instruction.
+ */
+#define M68K_EMULATE_RESET          OPT_SPECIFY_HANDLER
+#define M68K_RESET_CALLBACK()       cpu_pulse_reset()
+
+/* If ON, CPU will call the callback when it encounters a cmpi.l #v, dn
+ * instruction.
+ */
+#define M68K_CMPILD_HAS_CALLBACK     OPT_OFF
+#define M68K_CMPILD_CALLBACK(v,r)    your_cmpild_handler_function(v,r)
+
+
+/* If ON, CPU will call the callback when it encounters a rte
+ * instruction.
+ */
+#define M68K_RTE_HAS_CALLBACK       OPT_OFF
+#define M68K_RTE_CALLBACK()         your_rte_handler_function()
+
+/* If ON, CPU will call the callback when it encounters a tas
+ * instruction.
+ */
+#define M68K_TAS_HAS_CALLBACK       OPT_OFF
+#define M68K_TAS_CALLBACK()         your_tas_handler_function()
+
+/* If ON, CPU will call the callback when it encounters an illegal instruction
+ * passing the opcode as argument. If the callback returns 1, then it's considered
+ * as a normal instruction, and the illegal exception in canceled. If it returns 0,
+ * the exception occurs normally.
+ * The callback looks like int callback(int opcode)
+ * You should put OPT_SPECIFY_HANDLER here if you cant to use it, otherwise it will
+ * use a dummy default handler and you'll have to call m68k_set_illg_instr_callback explicitely
+ */
+#define M68K_ILLG_HAS_CALLBACK	    OPT_OFF
+#define M68K_ILLG_CALLBACK(opcode)  op_illg(opcode)
+
+/* If ON, CPU will call the set fc callback on every memory access to
+ * differentiate between user/supervisor, program/data access like a real
+ * 68000 would.  This should be enabled and the callback should be set if you
+ * want to properly emulate the m68010 or higher. (moves uses function codes
+ * to read/write data from different address spaces)
+ */
+#define M68K_EMULATE_FC             OPT_OFF
+#define M68K_SET_FC_CALLBACK(A)     cpu_set_fc(A)
+
+/* If ON, CPU will call the pc changed callback when it changes the PC by a
+ * large value.  This allows host programs to be nicer when it comes to
+ * fetching immediate data and instructions on a banked memory system.
+ */
+#define M68K_MONITOR_PC             OPT_OFF
+#define M68K_SET_PC_CALLBACK(A)     your_pc_changed_handler_function(A)
+
+
+/* If ON, CPU will call the instruction hook callback before every
+ * instruction.
+ */
+#define M68K_INSTRUCTION_HOOK       OPT_OFF
+#define M68K_INSTRUCTION_CALLBACK(pc) cpu_instr_callback(pc)
+
+
+/* If ON, the CPU will emulate the 4-byte prefetch queue of a real 68000 */
+#define M68K_EMULATE_PREFETCH       OPT_ON
+
+
+/* If ON, the CPU will generate address error exceptions if it tries to
+ * access a word or longword at an odd address.
+ * NOTE: This is only emulated properly for 68000 mode.
+ */
+#define M68K_EMULATE_ADDRESS_ERROR  OPT_OFF
+
+
+/* Turn ON to enable logging of illegal instruction calls.
+ * M68K_LOG_FILEHANDLE must be #defined to a stdio file stream.
+ * Turn on M68K_LOG_1010_1111 to log all 1010 and 1111 calls.
+ */
+#define M68K_LOG_ENABLE             OPT_OFF
+#define M68K_LOG_1010_1111          OPT_OFF
+#define M68K_LOG_FILEHANDLE         some_file_handle
+
+
+/* ----------------------------- COMPATIBILITY ---------------------------- */
+
+/* The following options set optimizations that violate the current ANSI
+ * standard, but will be compliant under the forthcoming C9X standard.
+ */
+
+
+/* If ON, the enulation core will use 64-bit integers to speed up some
+ * operations.
+*/
+#define M68K_USE_64_BIT  OPT_ON
+
+
+#include "main.h"
+
+
+//#define m68k_read_memory_8(A)  read16(A)
+//#define m68k_read_memory_16(A) read16(A)
+//#define m68k_read_memory_32(A) read16(A)
+
+//#define m68k_read_disassembler_16(A) cpu_read_word_dasm(A)
+//#define m68k_read_disassembler_32(A) cpu_read_long_dasm(A)
+
+//#define m68k_write_memory_8(A, V)  write16(A, V)
+//#define m68k_write_memory_16(A, V) write16(A, V)
+//#define m68k_write_memory_32(A, V) write16(A, V)
+
+
+#endif /* M68K_COMPILE_FOR_MAME */
+
+/* ======================================================================== */
+/* ============================== END OF FILE ============================= */
+/* =======================================x================================= */
+
+#endif /* M68KCONF__HEADER */
--- a/m68kcpu.c
+++ b/m68kcpu.c
--- a/m68kcpu.h
+++ b/m68kcpu.h
--- a/m68kdasm.c
+++ b/m68kdasm.c
--- a/m68kfpu.c
+++ b/m68kfpu.c
--- a/m68kmake.c
+++ b/m68kmake.c
--- a/m68kmmu.h
+++ b/m68kmmu.h
@@ -0,0 +1,321 @@
+/*
+    m68kmmu.h - PMMU implementation for 68851/68030/68040
+
+    By R. Belmont
+
+    Copyright Nicola Salmoria and the MAME Team.
+    Visit http://mamedev.org for licensing and usage restrictions.
+*/
+
+/*
+	pmmu_translate_addr: perform 68851/68030-style PMMU address translation
+*/
+uint pmmu_translate_addr(uint addr_in)
+{
+	uint32 addr_out, tbl_entry = 0, tbl_entry2, tamode = 0, tbmode = 0, tcmode = 0;
+	uint root_aptr, root_limit, tofs, is, abits, bbits, cbits;
+	uint resolved, tptr, shift;
+
+	resolved = 0;
+	addr_out = addr_in;
+
+	// if SRP is enabled and we're in supervisor mode, use it
+	if ((m68ki_cpu.mmu_tc & 0x02000000) && (m68ki_get_sr() & 0x2000))
+	{
+		root_aptr = m68ki_cpu.mmu_srp_aptr;
+		root_limit = m68ki_cpu.mmu_srp_limit;
+	}
+	else	// else use the CRP
+	{
+		root_aptr = m68ki_cpu.mmu_crp_aptr;
+		root_limit = m68ki_cpu.mmu_crp_limit;
+	}
+
+	// get initial shift (# of top bits to ignore)
+	is = (m68ki_cpu.mmu_tc>>16) & 0xf;
+	abits = (m68ki_cpu.mmu_tc>>12)&0xf;
+	bbits = (m68ki_cpu.mmu_tc>>8)&0xf;
+	cbits = (m68ki_cpu.mmu_tc>>4)&0xf;
+
+//	fprintf(stderr,"PMMU: tcr %08x limit %08x aptr %08x is %x abits %d bbits %d cbits %d\n", m68ki_cpu.mmu_tc, root_limit, root_aptr, is, abits, bbits, cbits);
+
+	// get table A offset
+	tofs = (addr_in<<is)>>(32-abits);
+
+	// find out what format table A is
+	switch (root_limit & 3)
+	{
+		case 0:	// invalid, should cause MMU exception
+		case 1:	// page descriptor, should cause direct mapping
+			fatalerror("680x0 PMMU: Unhandled root mode\n");
+			break;
+
+		case 2:	// valid 4 byte descriptors
+			tofs *= 4;
+//			fprintf(stderr,"PMMU: reading table A entry at %08x\n", tofs + (root_aptr & 0xfffffffc));
+			tbl_entry = m68k_read_memory_32( tofs + (root_aptr & 0xfffffffc));
+			tamode = tbl_entry & 3;
+//			fprintf(stderr,"PMMU: addr %08x entry %08x mode %x tofs %x\n", addr_in, tbl_entry, tamode, tofs);
+			break;
+
+		case 3: // valid 8 byte descriptors
+			tofs *= 8;
+//			fprintf(stderr,"PMMU: reading table A entries at %08x\n", tofs + (root_aptr & 0xfffffffc));
+			tbl_entry2 = m68k_read_memory_32( tofs + (root_aptr & 0xfffffffc));
+			tbl_entry = m68k_read_memory_32( tofs + (root_aptr & 0xfffffffc)+4);
+			tamode = tbl_entry2 & 3;
+//			fprintf(stderr,"PMMU: addr %08x entry %08x entry2 %08x mode %x tofs %x\n", addr_in, tbl_entry, tbl_entry2, tamode, tofs);
+			break;
+	}
+
+	// get table B offset and pointer
+	tofs = (addr_in<<(is+abits))>>(32-bbits);
+	tptr = tbl_entry & 0xfffffff0;
+
+	// find out what format table B is, if any
+	switch (tamode)
+	{
+		case 0: // invalid, should cause MMU exception
+			fatalerror("680x0 PMMU: Unhandled Table A mode %d (addr_in %08x)\n", tamode, addr_in);
+			break;
+
+		case 2: // 4-byte table B descriptor
+			tofs *= 4;
+//			fprintf(stderr,"PMMU: reading table B entry at %08x\n", tofs + tptr);
+			tbl_entry = m68k_read_memory_32( tofs + tptr);
+			tbmode = tbl_entry & 3;
+//			fprintf(stderr,"PMMU: addr %08x entry %08x mode %x tofs %x\n", addr_in, tbl_entry, tbmode, tofs);
+			break;
+
+		case 3: // 8-byte table B descriptor
+			tofs *= 8;
+//			fprintf(stderr,"PMMU: reading table B entries at %08x\n", tofs + tptr);
+			tbl_entry2 = m68k_read_memory_32( tofs + tptr);
+			tbl_entry = m68k_read_memory_32( tofs + tptr + 4);
+			tbmode = tbl_entry2 & 3;
+//			fprintf(stderr,"PMMU: addr %08x entry %08x entry2 %08x mode %x tofs %x\n", addr_in, tbl_entry, tbl_entry2, tbmode, tofs);
+			break;
+
+		case 1:	// early termination descriptor
+			tbl_entry &= 0xffffff00;
+
+			shift = is+abits;
+			addr_out = ((addr_in<<shift)>>shift) + tbl_entry;
+			resolved = 1;
+			break;
+	}
+
+	// if table A wasn't early-out, continue to process table B
+	if (!resolved)
+	{
+		// get table C offset and pointer
+		tofs = (addr_in<<(is+abits+bbits))>>(32-cbits);
+		tptr = tbl_entry & 0xfffffff0;
+
+		switch (tbmode)
+		{
+			case 0:	// invalid, should cause MMU exception
+				fatalerror("680x0 PMMU: Unhandled Table B mode %d (addr_in %08x PC %x)\n", tbmode, addr_in, REG_PC);
+				break;
+
+			case 2: // 4-byte table C descriptor
+				tofs *= 4;
+//				fprintf(stderr,"PMMU: reading table C entry at %08x\n", tofs + tptr);
+				tbl_entry = m68k_read_memory_32(tofs + tptr);
+				tcmode = tbl_entry & 3;
+//				fprintf(stderr,"PMMU: addr %08x entry %08x mode %x tofs %x\n", addr_in, tbl_entry, tbmode, tofs);
+				break;
+
+			case 3: // 8-byte table C descriptor
+				tofs *= 8;
+//				fprintf(stderr,"PMMU: reading table C entries at %08x\n", tofs + tptr);
+				tbl_entry2 = m68k_read_memory_32(tofs + tptr);
+				tbl_entry = m68k_read_memory_32(tofs + tptr + 4);
+				tcmode = tbl_entry2 & 3;
+//				fprintf(stderr,"PMMU: addr %08x entry %08x entry2 %08x mode %x tofs %x\n", addr_in, tbl_entry, tbl_entry2, tbmode, tofs);
+				break;
+
+			case 1: // termination descriptor
+				tbl_entry &= 0xffffff00;
+
+				shift = is+abits+bbits;
+				addr_out = ((addr_in<<shift)>>shift) + tbl_entry;
+				resolved = 1;
+				break;
+		}
+	}
+
+	if (!resolved)
+	{
+		switch (tcmode)
+		{
+			case 0:	// invalid, should cause MMU exception
+			case 2: // 4-byte ??? descriptor
+			case 3: // 8-byte ??? descriptor
+				fatalerror("680x0 PMMU: Unhandled Table B mode %d (addr_in %08x PC %x)\n", tbmode, addr_in, REG_PC);
+				break;
+
+			case 1: // termination descriptor
+				tbl_entry &= 0xffffff00;
+
+				shift = is+abits+bbits+cbits;
+				addr_out = ((addr_in<<shift)>>shift) + tbl_entry;
+				resolved = 1;
+				break;
+		}
+	}
+
+
+//	fprintf(stderr,"PMMU: [%08x] => [%08x]\n", addr_in, addr_out);
+
+	return addr_out;
+}
+
+/*
+
+	m68881_mmu_ops: COP 0 MMU opcode handling
+
+*/
+
+void m68881_mmu_ops()
+{
+	uint16 modes;
+	uint32 ea = m68ki_cpu.ir & 0x3f;
+	uint64 temp64;
+
+	// catch the 2 "weird" encodings up front (PBcc)
+	if ((m68ki_cpu.ir & 0xffc0) == 0xf0c0)
+	{
+		fprintf(stderr,"680x0: unhandled PBcc\n");
+		return;
+	}
+	else if ((m68ki_cpu.ir & 0xffc0) == 0xf080)
+	{
+		fprintf(stderr,"680x0: unhandled PBcc\n");
+		return;
+	}
+	else	// the rest are 1111000xxxXXXXXX where xxx is the instruction family
+	{
+		switch ((m68ki_cpu.ir>>9) & 0x7)
+		{
+			case 0:
+				modes = OPER_I_16();
+
+				if ((modes & 0xfde0) == 0x2000)	// PLOAD
+				{
+					fprintf(stderr,"680x0: unhandled PLOAD\n");
+					return;
+				}
+				else if ((modes & 0xe200) == 0x2000)	// PFLUSH
+				{
+					fprintf(stderr,"680x0: unhandled PFLUSH PC=%x\n", REG_PC);
+					return;
+				}
+				else if (modes == 0xa000)	// PFLUSHR
+				{
+					fprintf(stderr,"680x0: unhandled PFLUSHR\n");
+					return;
+				}
+				else if (modes == 0x2800)	// PVALID (FORMAT 1)
+				{
+					fprintf(stderr,"680x0: unhandled PVALID1\n");
+					return;
+				}
+				else if ((modes & 0xfff8) == 0x2c00)	// PVALID (FORMAT 2)
+				{
+					fprintf(stderr,"680x0: unhandled PVALID2\n");
+					return;
+				}
+				else if ((modes & 0xe000) == 0x8000)	// PTEST
+				{
+					fprintf(stderr,"680x0: unhandled PTEST\n");
+					return;
+				}
+				else
+				{
+					switch ((modes>>13) & 0x7)
+					{
+						case 0:	// MC68030/040 form with FD bit
+						case 2:	// MC68881 form, FD never set
+							if (modes & 0x200)
+							{
+							 	switch ((modes>>10) & 7)
+								{
+									case 0:	// translation control register
+										WRITE_EA_32(ea, m68ki_cpu.mmu_tc);
+										break;
+
+									case 2: // supervisor root pointer
+										WRITE_EA_64(ea, (uint64)m68ki_cpu.mmu_srp_limit<<32 | (uint64)m68ki_cpu.mmu_srp_aptr);
+										break;
+
+									case 3: // CPU root pointer
+										WRITE_EA_64(ea, (uint64)m68ki_cpu.mmu_crp_limit<<32 | (uint64)m68ki_cpu.mmu_crp_aptr);
+										break;
+
+									default:
+										fprintf(stderr,"680x0: PMOVE from unknown MMU register %x, PC %x\n", (modes>>10) & 7, REG_PC);
+										break;
+								}
+							}
+							else
+							{
+							 	switch ((modes>>10) & 7)
+								{
+									case 0:	// translation control register
+										m68ki_cpu.mmu_tc = READ_EA_32(ea);
+
+										if (m68ki_cpu.mmu_tc & 0x80000000)
+										{
+											m68ki_cpu.pmmu_enabled = 1;
+										}
+										else
+										{
+											m68ki_cpu.pmmu_enabled = 0;
+										}
+										break;
+
+									case 2:	// supervisor root pointer
+										temp64 = READ_EA_64(ea);
+										m68ki_cpu.mmu_srp_limit = (temp64>>32) & 0xffffffff;
+										m68ki_cpu.mmu_srp_aptr = temp64 & 0xffffffff;
+										break;
+
+									case 3:	// CPU root pointer
+										temp64 = READ_EA_64(ea);
+										m68ki_cpu.mmu_crp_limit = (temp64>>32) & 0xffffffff;
+										m68ki_cpu.mmu_crp_aptr = temp64 & 0xffffffff;
+										break;
+
+									default:
+										fprintf(stderr,"680x0: PMOVE to unknown MMU register %x, PC %x\n", (modes>>10) & 7, REG_PC);
+										break;
+								}
+							}
+							break;
+
+						case 3:	// MC68030 to/from status reg
+							if (modes & 0x200)
+							{
+								WRITE_EA_32(ea, m68ki_cpu.mmu_sr);
+							}
+							else
+							{
+								m68ki_cpu.mmu_sr = READ_EA_32(ea);
+							}
+							break;
+
+						default:
+							fprintf(stderr,"680x0: unknown PMOVE mode %x (modes %04x) (PC %x)\n", (modes>>13) & 0x7, modes, REG_PC);
+							break;
+					}
+				}
+				break;
+
+			default:
+				fprintf(stderr,"680x0: unknown PMMU instruction group %d\n", (m68ki_cpu.ir>>9) & 0x7);
+				break;
+		}
+	}
+}
+
--- a/main.h
+++ b/main.h
@@ -0,0 +1,36 @@
+//
+// BCM283x SMI interface 
+// Derived from Documentation
+// GVL 15-Oct-2014 
+//
+#ifndef MAIN__HEADER
+#define MAIN__HEADER
+
+
+#include <stdint.h>
+
+void setup_io();
+void restore_io();
+int set_pio_timing(int p);
+/*
+void write16(uint32_t address,uint16_t data);
+uint16_t read16(uint32_t address);
+void write8(uint32_t address,uint16_t data);
+uint16_t read8(uint32_t address);
+*/
+
+
+void cpu_pulse_reset(void);
+void m68ki_int_ack(uint8_t int_level);
+int cpu_irq_ack(int level);
+unsigned int  m68k_read_memory_8(unsigned int address);
+unsigned int  m68k_read_memory_16(unsigned int address);
+unsigned int  m68k_read_memory_32(unsigned int address);
+void m68k_write_memory_8(unsigned int address, unsigned int value);
+void m68k_write_memory_16(unsigned int address, unsigned int value);
+void m68k_write_memory_32(unsigned int address, unsigned int value);
+
+
+
+#endif /* MAIN__HEADER */
+
--- a/run.sh
+++ b/run.sh
@@ -0,0 +1 @@
+taskset 0x8 sudo ./emulator
--- a/softfloat/README.txt
+++ b/softfloat/README.txt
@@ -0,0 +1,78 @@
+MAME note: this package is derived from the following original SoftFloat 
+package and has been "re-packaged" to work with MAME's conventions and
+build system.  The source files come from bits64/ and bits64/templates
+in the original distribution as MAME requires a compiler with a 64-bit
+integer type.
+
+
+Package Overview for SoftFloat Release 2b
+
+John R. Hauser
+2002 May 27
+
+
+----------------------------------------------------------------------------
+Overview
+
+SoftFloat is a software implementation of floating-point that conforms to
+the IEC/IEEE Standard for Binary Floating-Point Arithmetic.  SoftFloat is
+distributed in the form of C source code.  Compiling the SoftFloat sources
+generates two things:
+
+-- A SoftFloat object file (typically `softfloat.o') containing the complete
+   set of IEC/IEEE floating-point routines.
+
+-- A `timesoftfloat' program for evaluating the speed of the SoftFloat
+   routines.  (The SoftFloat module is linked into this program.)
+
+The SoftFloat package is documented in four text files:
+
+   SoftFloat.txt          Documentation for using the SoftFloat functions.
+   SoftFloat-source.txt   Documentation for compiling SoftFloat.
+   SoftFloat-history.txt  History of major changes to SoftFloat.
+   timesoftfloat.txt      Documentation for using `timesoftfloat'.
+
+Other files in the package comprise the source code for SoftFloat.
+
+Please be aware that some work is involved in porting this software to other
+targets.  It is not just a matter of getting `make' to complete without
+error messages.  I would have written the code that way if I could, but
+there are fundamental differences between systems that can't be hidden.
+You should not attempt to compile SoftFloat without first reading both
+`SoftFloat.txt' and `SoftFloat-source.txt'.
+
+
+----------------------------------------------------------------------------
+Legal Notice
+
+SoftFloat was written by me, John R. Hauser.  This work was made possible in
+part by the International Computer Science Institute, located at Suite 600,
+1947 Center Street, Berkeley, California 94704.  Funding was partially
+provided by the National Science Foundation under grant MIP-9311980.  The
+original version of this code was written as part of a project to build
+a fixed-point vector processor in collaboration with the University of
+California at Berkeley, overseen by Profs. Nelson Morgan and John Wawrzynek.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
+has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
+TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
+PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL
+LOSSES, COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO
+FURTHERMORE EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER
+SCIENCE INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES,
+COSTS, OR OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE
+SOFTWARE.
+
+Derivative works are acceptable, even for commercial purposes, provided
+that the minimal documentation requirements stated in the source code are
+satisfied.
+
+
+----------------------------------------------------------------------------
+Contact Information
+
+At the time of this writing, the most up-to-date information about
+SoftFloat and the latest release can be found at the Web page `http://
+www.cs.berkeley.edu/~jhauser/arithmetic/SoftFloat.html'.
+
+
--- a/softfloat/mamesf.h
+++ b/softfloat/mamesf.h
@@ -0,0 +1,61 @@
+/*----------------------------------------------------------------------------
+| One of the macros `BIGENDIAN' or `LITTLEENDIAN' must be defined.
+*----------------------------------------------------------------------------*/
+#ifdef LSB_FIRST
+#define LITTLEENDIAN
+#else
+#define BIGENDIAN
+#endif
+
+/*----------------------------------------------------------------------------
+| The macro `BITS64' can be defined to indicate that 64-bit integer types are
+| supported by the compiler.
+*----------------------------------------------------------------------------*/
+#define BITS64
+
+/*----------------------------------------------------------------------------
+| Each of the following `typedef's defines the most convenient type that holds
+| integers of at least as many bits as specified.  For example, `uint8' should
+| be the most convenient type that can hold unsigned integers of as many as
+| 8 bits.  The `flag' type must be able to hold either a 0 or 1.  For most
+| implementations of C, `flag', `uint8', and `int8' should all be `typedef'ed
+| to the same as `int'.
+*----------------------------------------------------------------------------*/
+
+typedef sint8 flag;
+typedef sint8 int8;
+typedef sint16 int16;
+typedef sint32 int32;
+typedef sint64 int64;
+
+/*----------------------------------------------------------------------------
+| Each of the following `typedef's defines a type that holds integers
+| of _exactly_ the number of bits specified.  For instance, for most
+| implementation of C, `bits16' and `sbits16' should be `typedef'ed to
+| `unsigned short int' and `signed short int' (or `short int'), respectively.
+*----------------------------------------------------------------------------*/
+typedef uint8 bits8;
+typedef sint8 sbits8;
+typedef uint16 bits16;
+typedef sint16 sbits16;
+typedef uint32 bits32;
+typedef sint32 sbits32;
+typedef uint64 bits64;
+typedef sint64 sbits64;
+
+/*----------------------------------------------------------------------------
+| The `LIT64' macro takes as its argument a textual integer literal and
+| if necessary ``marks'' the literal as having a 64-bit integer type.
+| For example, the GNU C Compiler (`gcc') requires that 64-bit literals be
+| appended with the letters `LL' standing for `long long', which is `gcc's
+| name for the 64-bit integer type.  Some compilers may allow `LIT64' to be
+| defined as the identity macro:  `#define LIT64( a ) a'.
+*----------------------------------------------------------------------------*/
+#define LIT64( a ) a##ULL
+
+/*----------------------------------------------------------------------------
+| The macro `INLINE' can be used before functions that should be inlined.  If
+| a compiler does not support explicit inlining, this macro should be defined
+| to be `static'.
+*----------------------------------------------------------------------------*/
+// MAME defines INLINE
--- a/softfloat/milieu.h
+++ b/softfloat/milieu.h
@@ -0,0 +1,42 @@
+
+/*============================================================================
+
+This C header file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic
+Package, Release 2b.
+
+Written by John R. Hauser.  This work was made possible in part by the
+International Computer Science Institute, located at Suite 600, 1947 Center
+Street, Berkeley, California 94704.  Funding was partially provided by the
+National Science Foundation under grant MIP-9311980.  The original version
+of this code was written as part of a project to build a fixed-point vector
+processor in collaboration with the University of California at Berkeley,
+overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
+is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
+arithmetic/SoftFloat.html'.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
+been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
+RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
+AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
+COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
+EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
+INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
+OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) the source code for the derivative work includes prominent notice that
+the work is derivative, and (2) the source code includes prominent notice with
+these four paragraphs for those parts of this code that are retained.
+
+=============================================================================*/
+
+/*----------------------------------------------------------------------------
+| Include common integer types and flags.
+*----------------------------------------------------------------------------*/
+#include "mamesf.h"
+
+/*----------------------------------------------------------------------------
+| Symbolic Boolean literals.
+*----------------------------------------------------------------------------*/
+#define FALSE 0
+#define TRUE 1
--- a/softfloat/softfloat-macros
+++ b/softfloat/softfloat-macros
@@ -0,0 +1,732 @@
+
+/*============================================================================
+
+This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
+Arithmetic Package, Release 2b.
+
+Written by John R. Hauser.  This work was made possible in part by the
+International Computer Science Institute, located at Suite 600, 1947 Center
+Street, Berkeley, California 94704.  Funding was partially provided by the
+National Science Foundation under grant MIP-9311980.  The original version
+of this code was written as part of a project to build a fixed-point vector
+processor in collaboration with the University of California at Berkeley,
+overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
+is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
+arithmetic/SoftFloat.html'.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
+been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
+RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
+AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
+COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
+EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
+INSTITUTE (possibly via similar legal notice) AGAINST ALL LOSSES, COSTS, OR
+OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) the source code for the derivative work includes prominent notice that
+the work is derivative, and (2) the source code includes prominent notice with
+these four paragraphs for those parts of this code that are retained.
+
+=============================================================================*/
+
+/*----------------------------------------------------------------------------
+| Shifts `a' right by the number of bits given in `count'.  If any nonzero
+| bits are shifted off, they are ``jammed'' into the least significant bit of
+| the result by setting the least significant bit to 1.  The value of `count'
+| can be arbitrarily large; in particular, if `count' is greater than 32, the
+| result will be either 0 or 1, depending on whether `a' is zero or nonzero.
+| The result is stored in the location pointed to by `zPtr'.
+*----------------------------------------------------------------------------*/
+
+static inline void shift32RightJamming( bits32 a, int16 count, bits32 *zPtr )
+{
+    bits32 z;
+
+    if ( count == 0 ) {
+        z = a;
+    }
+    else if ( count < 32 ) {
+        z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 );
+    }
+    else {
+        z = ( a != 0 );
+    }
+    *zPtr = z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Shifts `a' right by the number of bits given in `count'.  If any nonzero
+| bits are shifted off, they are ``jammed'' into the least significant bit of
+| the result by setting the least significant bit to 1.  The value of `count'
+| can be arbitrarily large; in particular, if `count' is greater than 64, the
+| result will be either 0 or 1, depending on whether `a' is zero or nonzero.
+| The result is stored in the location pointed to by `zPtr'.
+*----------------------------------------------------------------------------*/
+
+static inline void shift64RightJamming( bits64 a, int16 count, bits64 *zPtr )
+{
+    bits64 z;
+
+    if ( count == 0 ) {
+        z = a;
+    }
+    else if ( count < 64 ) {
+        z = ( a>>count ) | ( ( a<<( ( - count ) & 63 ) ) != 0 );
+    }
+    else {
+        z = ( a != 0 );
+    }
+    *zPtr = z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Shifts the 128-bit value formed by concatenating `a0' and `a1' right by 64
+| _plus_ the number of bits given in `count'.  The shifted result is at most
+| 64 nonzero bits; this is stored at the location pointed to by `z0Ptr'.  The
+| bits shifted off form a second 64-bit result as follows:  The _last_ bit
+| shifted off is the most-significant bit of the extra result, and the other
+| 63 bits of the extra result are all zero if and only if _all_but_the_last_
+| bits shifted off were all zero.  This extra result is stored in the location
+| pointed to by `z1Ptr'.  The value of `count' can be arbitrarily large.
+|     (This routine makes more sense if `a0' and `a1' are considered to form
+| a fixed-point value with binary point between `a0' and `a1'.  This fixed-
+| point value is shifted right by the number of bits given in `count', and
+| the integer part of the result is returned at the location pointed to by
+| `z0Ptr'.  The fractional part of the result may be slightly corrupted as
+| described above, and is returned at the location pointed to by `z1Ptr'.)
+*----------------------------------------------------------------------------*/
+
+static inline void
+ shift64ExtraRightJamming(
+     bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
+{
+    bits64 z0, z1;
+    int8 negCount = ( - count ) & 63;
+
+    if ( count == 0 ) {
+        z1 = a1;
+        z0 = a0;
+    }
+    else if ( count < 64 ) {
+        z1 = ( a0<<negCount ) | ( a1 != 0 );
+        z0 = a0>>count;
+    }
+    else {
+        if ( count == 64 ) {
+            z1 = a0 | ( a1 != 0 );
+        }
+        else {
+            z1 = ( ( a0 | a1 ) != 0 );
+        }
+        z0 = 0;
+    }
+    *z1Ptr = z1;
+    *z0Ptr = z0;
+
+}
+
+/*----------------------------------------------------------------------------
+| Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
+| number of bits given in `count'.  Any bits shifted off are lost.  The value
+| of `count' can be arbitrarily large; in particular, if `count' is greater
+| than 128, the result will be 0.  The result is broken into two 64-bit pieces
+| which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
+*----------------------------------------------------------------------------*/
+
+static inline void
+ shift128Right(
+     bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
+{
+    bits64 z0, z1;
+    int8 negCount = ( - count ) & 63;
+
+    if ( count == 0 ) {
+        z1 = a1;
+        z0 = a0;
+    }
+    else if ( count < 64 ) {
+        z1 = ( a0<<negCount ) | ( a1>>count );
+        z0 = a0>>count;
+    }
+    else {
+        z1 = ( count < 64 ) ? ( a0>>( count & 63 ) ) : 0;
+        z0 = 0;
+    }
+    *z1Ptr = z1;
+    *z0Ptr = z0;
+
+}
+
+/*----------------------------------------------------------------------------
+| Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
+| number of bits given in `count'.  If any nonzero bits are shifted off, they
+| are ``jammed'' into the least significant bit of the result by setting the
+| least significant bit to 1.  The value of `count' can be arbitrarily large;
+| in particular, if `count' is greater than 128, the result will be either
+| 0 or 1, depending on whether the concatenation of `a0' and `a1' is zero or
+| nonzero.  The result is broken into two 64-bit pieces which are stored at
+| the locations pointed to by `z0Ptr' and `z1Ptr'.
+*----------------------------------------------------------------------------*/
+
+static inline void
+ shift128RightJamming(
+     bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
+{
+    bits64 z0, z1;
+    int8 negCount = ( - count ) & 63;
+
+    if ( count == 0 ) {
+        z1 = a1;
+        z0 = a0;
+    }
+    else if ( count < 64 ) {
+        z1 = ( a0<<negCount ) | ( a1>>count ) | ( ( a1<<negCount ) != 0 );
+        z0 = a0>>count;
+    }
+    else {
+        if ( count == 64 ) {
+            z1 = a0 | ( a1 != 0 );
+        }
+        else if ( count < 128 ) {
+            z1 = ( a0>>( count & 63 ) ) | ( ( ( a0<<negCount ) | a1 ) != 0 );
+        }
+        else {
+            z1 = ( ( a0 | a1 ) != 0 );
+        }
+        z0 = 0;
+    }
+    *z1Ptr = z1;
+    *z0Ptr = z0;
+
+}
+
+/*----------------------------------------------------------------------------
+| Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' right
+| by 64 _plus_ the number of bits given in `count'.  The shifted result is
+| at most 128 nonzero bits; these are broken into two 64-bit pieces which are
+| stored at the locations pointed to by `z0Ptr' and `z1Ptr'.  The bits shifted
+| off form a third 64-bit result as follows:  The _last_ bit shifted off is
+| the most-significant bit of the extra result, and the other 63 bits of the
+| extra result are all zero if and only if _all_but_the_last_ bits shifted off
+| were all zero.  This extra result is stored in the location pointed to by
+| `z2Ptr'.  The value of `count' can be arbitrarily large.
+|     (This routine makes more sense if `a0', `a1', and `a2' are considered
+| to form a fixed-point value with binary point between `a1' and `a2'.  This
+| fixed-point value is shifted right by the number of bits given in `count',
+| and the integer part of the result is returned at the locations pointed to
+| by `z0Ptr' and `z1Ptr'.  The fractional part of the result may be slightly
+| corrupted as described above, and is returned at the location pointed to by
+| `z2Ptr'.)
+*----------------------------------------------------------------------------*/
+
+static inline void
+ shift128ExtraRightJamming(
+     bits64 a0,
+     bits64 a1,
+     bits64 a2,
+     int16 count,
+     bits64 *z0Ptr,
+     bits64 *z1Ptr,
+     bits64 *z2Ptr
+ )
+{
+    bits64 z0, z1, z2;
+    int8 negCount = ( - count ) & 63;
+
+    if ( count == 0 ) {
+        z2 = a2;
+        z1 = a1;
+        z0 = a0;
+    }
+    else {
+        if ( count < 64 ) {
+            z2 = a1<<negCount;
+            z1 = ( a0<<negCount ) | ( a1>>count );
+            z0 = a0>>count;
+        }
+        else {
+            if ( count == 64 ) {
+                z2 = a1;
+                z1 = a0;
+            }
+            else {
+                a2 |= a1;
+                if ( count < 128 ) {
+                    z2 = a0<<negCount;
+                    z1 = a0>>( count & 63 );
+                }
+                else {
+                    z2 = ( count == 128 ) ? a0 : ( a0 != 0 );
+                    z1 = 0;
+                }
+            }
+            z0 = 0;
+        }
+        z2 |= ( a2 != 0 );
+    }
+    *z2Ptr = z2;
+    *z1Ptr = z1;
+    *z0Ptr = z0;
+
+}
+
+/*----------------------------------------------------------------------------
+| Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the
+| number of bits given in `count'.  Any bits shifted off are lost.  The value
+| of `count' must be less than 64.  The result is broken into two 64-bit
+| pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
+*----------------------------------------------------------------------------*/
+
+static inline void
+ shortShift128Left(
+     bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
+{
+
+    *z1Ptr = a1<<count;
+    *z0Ptr =
+        ( count == 0 ) ? a0 : ( a0<<count ) | ( a1>>( ( - count ) & 63 ) );
+
+}
+
+/*----------------------------------------------------------------------------
+| Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' left
+| by the number of bits given in `count'.  Any bits shifted off are lost.
+| The value of `count' must be less than 64.  The result is broken into three
+| 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
+| `z1Ptr', and `z2Ptr'.
+*----------------------------------------------------------------------------*/
+
+static inline void
+ shortShift192Left(
+     bits64 a0,
+     bits64 a1,
+     bits64 a2,
+     int16 count,
+     bits64 *z0Ptr,
+     bits64 *z1Ptr,
+     bits64 *z2Ptr
+ )
+{
+    bits64 z0, z1, z2;
+    int8 negCount;
+
+    z2 = a2<<count;
+    z1 = a1<<count;
+    z0 = a0<<count;
+    if ( 0 < count ) {
+        negCount = ( ( - count ) & 63 );
+        z1 |= a2>>negCount;
+        z0 |= a1>>negCount;
+    }
+    *z2Ptr = z2;
+    *z1Ptr = z1;
+    *z0Ptr = z0;
+
+}
+
+/*----------------------------------------------------------------------------
+| Adds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit
+| value formed by concatenating `b0' and `b1'.  Addition is modulo 2^128, so
+| any carry out is lost.  The result is broken into two 64-bit pieces which
+| are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
+*----------------------------------------------------------------------------*/
+
+static inline void
+ add128(
+     bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr )
+{
+    bits64 z1;
+
+    z1 = a1 + b1;
+    *z1Ptr = z1;
+    *z0Ptr = a0 + b0 + ( z1 < a1 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Adds the 192-bit value formed by concatenating `a0', `a1', and `a2' to the
+| 192-bit value formed by concatenating `b0', `b1', and `b2'.  Addition is
+| modulo 2^192, so any carry out is lost.  The result is broken into three
+| 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
+| `z1Ptr', and `z2Ptr'.
+*----------------------------------------------------------------------------*/
+
+static inline void
+ add192(
+     bits64 a0,
+     bits64 a1,
+     bits64 a2,
+     bits64 b0,
+     bits64 b1,
+     bits64 b2,
+     bits64 *z0Ptr,
+     bits64 *z1Ptr,
+     bits64 *z2Ptr
+ )
+{
+    bits64 z0, z1, z2;
+    uint8 carry0, carry1;
+
+    z2 = a2 + b2;
+    carry1 = ( z2 < a2 );
+    z1 = a1 + b1;
+    carry0 = ( z1 < a1 );
+    z0 = a0 + b0;
+    z1 += carry1;
+    z0 += ( z1 < carry1 );
+    z0 += carry0;
+    *z2Ptr = z2;
+    *z1Ptr = z1;
+    *z0Ptr = z0;
+
+}
+
+/*----------------------------------------------------------------------------
+| Subtracts the 128-bit value formed by concatenating `b0' and `b1' from the
+| 128-bit value formed by concatenating `a0' and `a1'.  Subtraction is modulo
+| 2^128, so any borrow out (carry out) is lost.  The result is broken into two
+| 64-bit pieces which are stored at the locations pointed to by `z0Ptr' and
+| `z1Ptr'.
+*----------------------------------------------------------------------------*/
+
+static inline void
+ sub128(
+     bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr )
+{
+
+    *z1Ptr = a1 - b1;
+    *z0Ptr = a0 - b0 - ( a1 < b1 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Subtracts the 192-bit value formed by concatenating `b0', `b1', and `b2'
+| from the 192-bit value formed by concatenating `a0', `a1', and `a2'.
+| Subtraction is modulo 2^192, so any borrow out (carry out) is lost.  The
+| result is broken into three 64-bit pieces which are stored at the locations
+| pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'.
+*----------------------------------------------------------------------------*/
+
+static inline void
+ sub192(
+     bits64 a0,
+     bits64 a1,
+     bits64 a2,
+     bits64 b0,
+     bits64 b1,
+     bits64 b2,
+     bits64 *z0Ptr,
+     bits64 *z1Ptr,
+     bits64 *z2Ptr
+ )
+{
+    bits64 z0, z1, z2;
+    uint8 borrow0, borrow1;
+
+    z2 = a2 - b2;
+    borrow1 = ( a2 < b2 );
+    z1 = a1 - b1;
+    borrow0 = ( a1 < b1 );
+    z0 = a0 - b0;
+    z0 -= ( z1 < borrow1 );
+    z1 -= borrow1;
+    z0 -= borrow0;
+    *z2Ptr = z2;
+    *z1Ptr = z1;
+    *z0Ptr = z0;
+
+}
+
+/*----------------------------------------------------------------------------
+| Multiplies `a' by `b' to obtain a 128-bit product.  The product is broken
+| into two 64-bit pieces which are stored at the locations pointed to by
+| `z0Ptr' and `z1Ptr'.
+*----------------------------------------------------------------------------*/
+
+static inline void mul64To128( bits64 a, bits64 b, bits64 *z0Ptr, bits64 *z1Ptr )
+{
+    bits32 aHigh, aLow, bHigh, bLow;
+    bits64 z0, zMiddleA, zMiddleB, z1;
+
+    aLow = a;
+    aHigh = a>>32;
+    bLow = b;
+    bHigh = b>>32;
+    z1 = ( (bits64) aLow ) * bLow;
+    zMiddleA = ( (bits64) aLow ) * bHigh;
+    zMiddleB = ( (bits64) aHigh ) * bLow;
+    z0 = ( (bits64) aHigh ) * bHigh;
+    zMiddleA += zMiddleB;
+    z0 += ( ( (bits64) ( zMiddleA < zMiddleB ) )<<32 ) + ( zMiddleA>>32 );
+    zMiddleA <<= 32;
+    z1 += zMiddleA;
+    z0 += ( z1 < zMiddleA );
+    *z1Ptr = z1;
+    *z0Ptr = z0;
+
+}
+
+/*----------------------------------------------------------------------------
+| Multiplies the 128-bit value formed by concatenating `a0' and `a1' by
+| `b' to obtain a 192-bit product.  The product is broken into three 64-bit
+| pieces which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and
+| `z2Ptr'.
+*----------------------------------------------------------------------------*/
+
+static inline void
+ mul128By64To192(
+     bits64 a0,
+     bits64 a1,
+     bits64 b,
+     bits64 *z0Ptr,
+     bits64 *z1Ptr,
+     bits64 *z2Ptr
+ )
+{
+    bits64 z0, z1, z2, more1;
+
+    mul64To128( a1, b, &z1, &z2 );
+    mul64To128( a0, b, &z0, &more1 );
+    add128( z0, more1, 0, z1, &z0, &z1 );
+    *z2Ptr = z2;
+    *z1Ptr = z1;
+    *z0Ptr = z0;
+
+}
+
+/*----------------------------------------------------------------------------
+| Multiplies the 128-bit value formed by concatenating `a0' and `a1' to the
+| 128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit
+| product.  The product is broken into four 64-bit pieces which are stored at
+| the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.
+*----------------------------------------------------------------------------*/
+
+static inline void
+ mul128To256(
+     bits64 a0,
+     bits64 a1,
+     bits64 b0,
+     bits64 b1,
+     bits64 *z0Ptr,
+     bits64 *z1Ptr,
+     bits64 *z2Ptr,
+     bits64 *z3Ptr
+ )
+{
+    bits64 z0, z1, z2, z3;
+    bits64 more1, more2;
+
+    mul64To128( a1, b1, &z2, &z3 );
+    mul64To128( a1, b0, &z1, &more2 );
+    add128( z1, more2, 0, z2, &z1, &z2 );
+    mul64To128( a0, b0, &z0, &more1 );
+    add128( z0, more1, 0, z1, &z0, &z1 );
+    mul64To128( a0, b1, &more1, &more2 );
+    add128( more1, more2, 0, z2, &more1, &z2 );
+    add128( z0, z1, 0, more1, &z0, &z1 );
+    *z3Ptr = z3;
+    *z2Ptr = z2;
+    *z1Ptr = z1;
+    *z0Ptr = z0;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns an approximation to the 64-bit integer quotient obtained by dividing
+| `b' into the 128-bit value formed by concatenating `a0' and `a1'.  The
+| divisor `b' must be at least 2^63.  If q is the exact quotient truncated
+| toward zero, the approximation returned lies between q and q + 2 inclusive.
+| If the exact quotient q is larger than 64 bits, the maximum positive 64-bit
+| unsigned integer is returned.
+*----------------------------------------------------------------------------*/
+
+static inline bits64 estimateDiv128To64( bits64 a0, bits64 a1, bits64 b )
+{
+    bits64 b0, b1;
+    bits64 rem0, rem1, term0, term1;
+    bits64 z;
+
+    if ( b <= a0 ) return LIT64( 0xFFFFFFFFFFFFFFFF );
+    b0 = b>>32;
+    z = ( b0<<32 <= a0 ) ? LIT64( 0xFFFFFFFF00000000 ) : ( a0 / b0 )<<32;
+    mul64To128( b, z, &term0, &term1 );
+    sub128( a0, a1, term0, term1, &rem0, &rem1 );
+    while ( ( (sbits64) rem0 ) < 0 ) {
+        z -= LIT64( 0x100000000 );
+        b1 = b<<32;
+        add128( rem0, rem1, b0, b1, &rem0, &rem1 );
+    }
+    rem0 = ( rem0<<32 ) | ( rem1>>32 );
+    z |= ( b0<<32 <= rem0 ) ? 0xFFFFFFFF : rem0 / b0;
+    return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns an approximation to the square root of the 32-bit significand given
+| by `a'.  Considered as an integer, `a' must be at least 2^31.  If bit 0 of
+| `aExp' (the least significant bit) is 1, the integer returned approximates
+| 2^31*sqrt(`a'/2^31), where `a' is considered an integer.  If bit 0 of `aExp'
+| is 0, the integer returned approximates 2^31*sqrt(`a'/2^30).  In either
+| case, the approximation returned lies strictly within +/-2 of the exact
+| value.
+*----------------------------------------------------------------------------*/
+
+static inline bits32 estimateSqrt32( int16 aExp, bits32 a )
+{
+    static const bits16 sqrtOddAdjustments[] = {
+        0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0,
+        0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67
+    };
+    static const bits16 sqrtEvenAdjustments[] = {
+        0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E,
+        0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002
+    };
+    int8 index;
+    bits32 z;
+
+    index = ( a>>27 ) & 15;
+    if ( aExp & 1 ) {
+        z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ index ];
+        z = ( ( a / z )<<14 ) + ( z<<15 );
+        a >>= 1;
+    }
+    else {
+        z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ index ];
+        z = a / z + z;
+        z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 );
+        if ( z <= a ) return (bits32) ( ( (sbits32) a )>>1 );
+    }
+    return ( (bits32) ( ( ( (bits64) a )<<31 ) / z ) ) + ( z>>1 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the number of leading 0 bits before the most-significant 1 bit of
+| `a'.  If `a' is zero, 32 is returned.
+*----------------------------------------------------------------------------*/
+
+static int8 countLeadingZeros32( bits32 a )
+{
+    static const int8 countLeadingZerosHigh[] = {
+        8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
+        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+    };
+    int8 shiftCount;
+
+    shiftCount = 0;
+    if ( a < 0x10000 ) {
+        shiftCount += 16;
+        a <<= 16;
+    }
+    if ( a < 0x1000000 ) {
+        shiftCount += 8;
+        a <<= 8;
+    }
+    shiftCount += countLeadingZerosHigh[ a>>24 ];
+    return shiftCount;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the number of leading 0 bits before the most-significant 1 bit of
+| `a'.  If `a' is zero, 64 is returned.
+*----------------------------------------------------------------------------*/
+
+static int8 countLeadingZeros64( bits64 a )
+{
+    int8 shiftCount;
+
+    shiftCount = 0;
+    if ( a < ( (bits64) 1 )<<32 ) {
+        shiftCount += 32;
+    }
+    else {
+        a >>= 32;
+    }
+    shiftCount += countLeadingZeros32( a );
+    return shiftCount;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1'
+| is equal to the 128-bit value formed by concatenating `b0' and `b1'.
+| Otherwise, returns 0.
+*----------------------------------------------------------------------------*/
+
+static inline flag eq128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
+{
+
+    return ( a0 == b0 ) && ( a1 == b1 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
+| than or equal to the 128-bit value formed by concatenating `b0' and `b1'.
+| Otherwise, returns 0.
+*----------------------------------------------------------------------------*/
+
+static inline flag le128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
+{
+
+    return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 <= b1 ) );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
+| than the 128-bit value formed by concatenating `b0' and `b1'.  Otherwise,
+| returns 0.
+*----------------------------------------------------------------------------*/
+
+static inline flag lt128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
+{
+
+    return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is
+| not equal to the 128-bit value formed by concatenating `b0' and `b1'.
+| Otherwise, returns 0.
+*----------------------------------------------------------------------------*/
+
+static inline flag ne128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
+{
+
+    return ( a0 != b0 ) || ( a1 != b1 );
+
+}
+
+/*-----------------------------------------------------------------------------
+| Changes the sign of the extended double-precision floating-point value 'a'.
+| The operation is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+static inline floatx80 floatx80_chs(floatx80 reg)
+{
+    reg.high ^= 0x8000;
+    return reg;
+}
+
--- a/softfloat/softfloat-specialize
+++ b/softfloat/softfloat-specialize
@@ -0,0 +1,470 @@
+
+/*============================================================================
+
+This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
+Arithmetic Package, Release 2b.
+
+Written by John R. Hauser.  This work was made possible in part by the
+International Computer Science Institute, located at Suite 600, 1947 Center
+Street, Berkeley, California 94704.  Funding was partially provided by the
+National Science Foundation under grant MIP-9311980.  The original version
+of this code was written as part of a project to build a fixed-point vector
+processor in collaboration with the University of California at Berkeley,
+overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
+is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
+arithmetic/SoftFloat.html'.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
+been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
+RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
+AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
+COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
+EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
+INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
+OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) the source code for the derivative work includes prominent notice that
+the work is derivative, and (2) the source code includes prominent notice with
+these four paragraphs for those parts of this code that are retained.
+
+=============================================================================*/
+
+/*----------------------------------------------------------------------------
+| Underflow tininess-detection mode, statically initialized to default value.
+| (The declaration in `softfloat.h' must match the `int8' type here.)
+*----------------------------------------------------------------------------*/
+int8 float_detect_tininess = float_tininess_after_rounding;
+
+/*----------------------------------------------------------------------------
+| Raises the exceptions specified by `flags'.  Floating-point traps can be
+| defined here if desired.  It is currently not possible for such a trap to
+| substitute a result value.  If traps are not implemented, this routine
+| should be simply `float_exception_flags |= flags;'.
+*----------------------------------------------------------------------------*/
+
+void float_raise( int8 flags )
+{
+
+    float_exception_flags |= flags;
+
+}
+
+/*----------------------------------------------------------------------------
+| Internal canonical NaN format.
+*----------------------------------------------------------------------------*/
+typedef struct {
+    flag sign;
+    bits64 high, low;
+} commonNaNT;
+
+/*----------------------------------------------------------------------------
+| The pattern for a default generated single-precision NaN.
+*----------------------------------------------------------------------------*/
+#define float32_default_nan 0xFFFFFFFF
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the single-precision floating-point value `a' is a NaN;
+| otherwise returns 0.
+*----------------------------------------------------------------------------*/
+
+flag float32_is_nan( float32 a )
+{
+
+    return ( 0xFF000000 < (bits32) ( a<<1 ) );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the single-precision floating-point value `a' is a signaling
+| NaN; otherwise returns 0.
+*----------------------------------------------------------------------------*/
+
+flag float32_is_signaling_nan( float32 a )
+{
+
+    return ( ( ( a>>22 ) & 0x1FF ) == 0x1FE ) && ( a & 0x003FFFFF );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the single-precision floating-point NaN
+| `a' to the canonical NaN format.  If `a' is a signaling NaN, the invalid
+| exception is raised.
+*----------------------------------------------------------------------------*/
+
+static commonNaNT float32ToCommonNaN( float32 a )
+{
+    commonNaNT z;
+
+    if ( float32_is_signaling_nan( a ) ) float_raise( float_flag_invalid );
+    z.sign = a>>31;
+    z.low = 0;
+    z.high = ( (bits64) a )<<41;
+    return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the canonical NaN `a' to the single-
+| precision floating-point format.
+*----------------------------------------------------------------------------*/
+
+static float32 commonNaNToFloat32( commonNaNT a )
+{
+
+    return ( ( (bits32) a.sign )<<31 ) | 0x7FC00000 | ( a.high>>41 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Takes two single-precision floating-point values `a' and `b', one of which
+| is a NaN, and returns the appropriate NaN result.  If either `a' or `b' is a
+| signaling NaN, the invalid exception is raised.
+*----------------------------------------------------------------------------*/
+
+static float32 propagateFloat32NaN( float32 a, float32 b )
+{
+    flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN;
+
+    aIsNaN = float32_is_nan( a );
+    aIsSignalingNaN = float32_is_signaling_nan( a );
+    bIsNaN = float32_is_nan( b );
+    bIsSignalingNaN = float32_is_signaling_nan( b );
+    a |= 0x00400000;
+    b |= 0x00400000;
+    if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid );
+    if ( aIsNaN ) {
+        return ( aIsSignalingNaN & bIsNaN ) ? b : a;
+    }
+    else {
+        return b;
+    }
+
+}
+
+/*----------------------------------------------------------------------------
+| The pattern for a default generated double-precision NaN.
+*----------------------------------------------------------------------------*/
+#define float64_default_nan LIT64( 0xFFFFFFFFFFFFFFFF )
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the double-precision floating-point value `a' is a NaN;
+| otherwise returns 0.
+*----------------------------------------------------------------------------*/
+
+flag float64_is_nan( float64 a )
+{
+
+    return ( LIT64( 0xFFE0000000000000 ) < (bits64) ( a<<1 ) );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the double-precision floating-point value `a' is a signaling
+| NaN; otherwise returns 0.
+*----------------------------------------------------------------------------*/
+
+flag float64_is_signaling_nan( float64 a )
+{
+
+    return
+           ( ( ( a>>51 ) & 0xFFF ) == 0xFFE )
+        && ( a & LIT64( 0x0007FFFFFFFFFFFF ) );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the double-precision floating-point NaN
+| `a' to the canonical NaN format.  If `a' is a signaling NaN, the invalid
+| exception is raised.
+*----------------------------------------------------------------------------*/
+
+static commonNaNT float64ToCommonNaN( float64 a )
+{
+    commonNaNT z;
+
+    if ( float64_is_signaling_nan( a ) ) float_raise( float_flag_invalid );
+    z.sign = a>>63;
+    z.low = 0;
+    z.high = a<<12;
+    return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the canonical NaN `a' to the double-
+| precision floating-point format.
+*----------------------------------------------------------------------------*/
+
+static float64 commonNaNToFloat64( commonNaNT a )
+{
+
+    return
+          ( ( (bits64) a.sign )<<63 )
+        | LIT64( 0x7FF8000000000000 )
+        | ( a.high>>12 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Takes two double-precision floating-point values `a' and `b', one of which
+| is a NaN, and returns the appropriate NaN result.  If either `a' or `b' is a
+| signaling NaN, the invalid exception is raised.
+*----------------------------------------------------------------------------*/
+
+static float64 propagateFloat64NaN( float64 a, float64 b )
+{
+    flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN;
+
+    aIsNaN = float64_is_nan( a );
+    aIsSignalingNaN = float64_is_signaling_nan( a );
+    bIsNaN = float64_is_nan( b );
+    bIsSignalingNaN = float64_is_signaling_nan( b );
+    a |= LIT64( 0x0008000000000000 );
+    b |= LIT64( 0x0008000000000000 );
+    if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid );
+    if ( aIsNaN ) {
+        return ( aIsSignalingNaN & bIsNaN ) ? b : a;
+    }
+    else {
+        return b;
+    }
+
+}
+
+#ifdef FLOATX80
+
+/*----------------------------------------------------------------------------
+| The pattern for a default generated extended double-precision NaN.  The
+| `high' and `low' values hold the most- and least-significant bits,
+| respectively.
+*----------------------------------------------------------------------------*/
+#define floatx80_default_nan_high 0xFFFF
+#define floatx80_default_nan_low  LIT64( 0xFFFFFFFFFFFFFFFF )
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the extended double-precision floating-point value `a' is a
+| NaN; otherwise returns 0.
+*----------------------------------------------------------------------------*/
+
+flag floatx80_is_nan( floatx80 a )
+{
+
+    return ( ( a.high & 0x7FFF ) == 0x7FFF ) && (bits64) ( a.low<<1 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the extended double-precision floating-point value `a' is a
+| signaling NaN; otherwise returns 0.
+*----------------------------------------------------------------------------*/
+
+flag floatx80_is_signaling_nan( floatx80 a )
+{
+    bits64 aLow;
+
+    aLow = a.low & ~ LIT64( 0x4000000000000000 );
+    return
+           ( ( a.high & 0x7FFF ) == 0x7FFF )
+        && (bits64) ( aLow<<1 )
+        && ( a.low == aLow );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the extended double-precision floating-
+| point NaN `a' to the canonical NaN format.  If `a' is a signaling NaN, the
+| invalid exception is raised.
+*----------------------------------------------------------------------------*/
+
+static commonNaNT floatx80ToCommonNaN( floatx80 a )
+{
+    commonNaNT z;
+
+    if ( floatx80_is_signaling_nan( a ) ) float_raise( float_flag_invalid );
+    z.sign = a.high>>15;
+    z.low = 0;
+    z.high = a.low<<1;
+    return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the canonical NaN `a' to the extended
+| double-precision floating-point format.
+*----------------------------------------------------------------------------*/
+
+static floatx80 commonNaNToFloatx80( commonNaNT a )
+{
+    floatx80 z;
+
+    z.low = LIT64( 0xC000000000000000 ) | ( a.high>>1 );
+    z.high = ( ( (bits16) a.sign )<<15 ) | 0x7FFF;
+    return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Takes two extended double-precision floating-point values `a' and `b', one
+| of which is a NaN, and returns the appropriate NaN result.  If either `a' or
+| `b' is a signaling NaN, the invalid exception is raised.
+*----------------------------------------------------------------------------*/
+
+floatx80 propagateFloatx80NaN( floatx80 a, floatx80 b )
+{
+    flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN;
+
+    aIsNaN = floatx80_is_nan( a );
+    aIsSignalingNaN = floatx80_is_signaling_nan( a );
+    bIsNaN = floatx80_is_nan( b );
+    bIsSignalingNaN = floatx80_is_signaling_nan( b );
+    a.low |= LIT64( 0xC000000000000000 );
+    b.low |= LIT64( 0xC000000000000000 );
+    if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid );
+    if ( aIsNaN ) {
+        return ( aIsSignalingNaN & bIsNaN ) ? b : a;
+    }
+    else {
+        return b;
+    }
+
+}
+
+#define EXP_BIAS 0x3FFF
+
+/*----------------------------------------------------------------------------
+| Returns the fraction bits of the extended double-precision floating-point
+| value `a'.
+*----------------------------------------------------------------------------*/
+
+static inline bits64 extractFloatx80Frac( floatx80 a )
+{
+
+    return a.low;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the exponent bits of the extended double-precision floating-point
+| value `a'.
+*----------------------------------------------------------------------------*/
+
+static inline int32 extractFloatx80Exp( floatx80 a )
+{
+
+    return a.high & 0x7FFF;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the sign bit of the extended double-precision floating-point value
+| `a'.
+*----------------------------------------------------------------------------*/
+
+static inline flag extractFloatx80Sign( floatx80 a )
+{
+
+    return a.high>>15;
+
+}
+
+#endif
+
+#ifdef FLOAT128
+
+/*----------------------------------------------------------------------------
+| The pattern for a default generated quadruple-precision NaN.  The `high' and
+| `low' values hold the most- and least-significant bits, respectively.
+*----------------------------------------------------------------------------*/
+#define float128_default_nan_high LIT64( 0xFFFFFFFFFFFFFFFF )
+#define float128_default_nan_low  LIT64( 0xFFFFFFFFFFFFFFFF )
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the quadruple-precision floating-point value `a' is a NaN;
+| otherwise returns 0.
+*----------------------------------------------------------------------------*/
+
+flag float128_is_nan( float128 a )
+{
+
+    return
+           ( LIT64( 0xFFFE000000000000 ) <= (bits64) ( a.high<<1 ) )
+        && ( a.low || ( a.high & LIT64( 0x0000FFFFFFFFFFFF ) ) );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the quadruple-precision floating-point value `a' is a
+| signaling NaN; otherwise returns 0.
+*----------------------------------------------------------------------------*/
+
+flag float128_is_signaling_nan( float128 a )
+{
+
+    return
+           ( ( ( a.high>>47 ) & 0xFFFF ) == 0xFFFE )
+        && ( a.low || ( a.high & LIT64( 0x00007FFFFFFFFFFF ) ) );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the quadruple-precision floating-point NaN
+| `a' to the canonical NaN format.  If `a' is a signaling NaN, the invalid
+| exception is raised.
+*----------------------------------------------------------------------------*/
+
+static commonNaNT float128ToCommonNaN( float128 a )
+{
+    commonNaNT z;
+
+    if ( float128_is_signaling_nan( a ) ) float_raise( float_flag_invalid );
+    z.sign = a.high>>63;
+    shortShift128Left( a.high, a.low, 16, &z.high, &z.low );
+    return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the canonical NaN `a' to the quadruple-
+| precision floating-point format.
+*----------------------------------------------------------------------------*/
+
+static float128 commonNaNToFloat128( commonNaNT a )
+{
+    float128 z;
+
+    shift128Right( a.high, a.low, 16, &z.high, &z.low );
+    z.high |= ( ( (bits64) a.sign )<<63 ) | LIT64( 0x7FFF800000000000 );
+    return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Takes two quadruple-precision floating-point values `a' and `b', one of
+| which is a NaN, and returns the appropriate NaN result.  If either `a' or
+| `b' is a signaling NaN, the invalid exception is raised.
+*----------------------------------------------------------------------------*/
+
+static float128 propagateFloat128NaN( float128 a, float128 b )
+{
+    flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN;
+
+    aIsNaN = float128_is_nan( a );
+    aIsSignalingNaN = float128_is_signaling_nan( a );
+    bIsNaN = float128_is_nan( b );
+    bIsSignalingNaN = float128_is_signaling_nan( b );
+    a.high |= LIT64( 0x0000800000000000 );
+    b.high |= LIT64( 0x0000800000000000 );
+    if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid );
+    if ( aIsNaN ) {
+        return ( aIsSignalingNaN & bIsNaN ) ? b : a;
+    }
+    else {
+        return b;
+    }
+
+}
+
+#endif
+
--- a/softfloat/softfloat.c
+++ b/softfloat/softfloat.c
--- a/softfloat/softfloat.h
+++ b/softfloat/softfloat.h
@@ -0,0 +1,460 @@
+
+/*============================================================================
+
+This C header file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic
+Package, Release 2b.
+
+Written by John R. Hauser.  This work was made possible in part by the
+International Computer Science Institute, located at Suite 600, 1947 Center
+Street, Berkeley, California 94704.  Funding was partially provided by the
+National Science Foundation under grant MIP-9311980.  The original version
+of this code was written as part of a project to build a fixed-point vector
+processor in collaboration with the University of California at Berkeley,
+overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
+is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
+arithmetic/SoftFloat.html'.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
+been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
+RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
+AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
+COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
+EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
+INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
+OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) the source code for the derivative work includes prominent notice that
+the work is derivative, and (2) the source code includes prominent notice with
+these four paragraphs for those parts of this code that are retained.
+
+=============================================================================*/
+
+/*----------------------------------------------------------------------------
+| The macro `FLOATX80' must be defined to enable the extended double-precision
+| floating-point format `floatx80'.  If this macro is not defined, the
+| `floatx80' type will not be defined, and none of the functions that either
+| input or output the `floatx80' type will be defined.  The same applies to
+| the `FLOAT128' macro and the quadruple-precision format `float128'.
+*----------------------------------------------------------------------------*/
+#define FLOATX80
+#define FLOAT128
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE floating-point types.
+*----------------------------------------------------------------------------*/
+typedef bits32 float32;
+typedef bits64 float64;
+#ifdef FLOATX80
+typedef struct {
+	bits16 high;
+	bits64 low;
+} floatx80;
+#endif
+#ifdef FLOAT128
+typedef struct {
+	bits64 high, low;
+} float128;
+#endif
+
+/*----------------------------------------------------------------------------
+| Primitive arithmetic functions, including multi-word arithmetic, and
+| division and square root approximations.  (Can be specialized to target if
+| desired.)
+*----------------------------------------------------------------------------*/
+#include "softfloat-macros"
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE floating-point underflow tininess-detection mode.
+*----------------------------------------------------------------------------*/
+extern int8 float_detect_tininess;
+enum {
+	float_tininess_after_rounding  = 0,
+	float_tininess_before_rounding = 1
+};
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE floating-point rounding mode.
+*----------------------------------------------------------------------------*/
+extern int8 float_rounding_mode;
+enum {
+	float_round_nearest_even = 0,
+	float_round_to_zero      = 1,
+	float_round_down         = 2,
+	float_round_up           = 3
+};
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE floating-point exception flags.
+*----------------------------------------------------------------------------*/
+extern int8 float_exception_flags;
+enum {
+	float_flag_invalid = 0x01, float_flag_denormal = 0x02, float_flag_divbyzero = 0x04, float_flag_overflow = 0x08,
+	float_flag_underflow = 0x10, float_flag_inexact = 0x20
+};
+
+/*----------------------------------------------------------------------------
+| Routine to raise any or all of the software IEC/IEEE floating-point
+| exception flags.
+*----------------------------------------------------------------------------*/
+void float_raise( int8 );
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE integer-to-floating-point conversion routines.
+*----------------------------------------------------------------------------*/
+float32 int32_to_float32( int32 );
+float64 int32_to_float64( int32 );
+#ifdef FLOATX80
+floatx80 int32_to_floatx80( int32 );
+#endif
+#ifdef FLOAT128
+float128 int32_to_float128( int32 );
+#endif
+float32 int64_to_float32( int64 );
+float64 int64_to_float64( int64 );
+#ifdef FLOATX80
+floatx80 int64_to_floatx80( int64 );
+#endif
+#ifdef FLOAT128
+float128 int64_to_float128( int64 );
+#endif
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE single-precision conversion routines.
+*----------------------------------------------------------------------------*/
+int32 float32_to_int32( float32 );
+int32 float32_to_int32_round_to_zero( float32 );
+int64 float32_to_int64( float32 );
+int64 float32_to_int64_round_to_zero( float32 );
+float64 float32_to_float64( float32 );
+#ifdef FLOATX80
+floatx80 float32_to_floatx80( float32 );
+#endif
+#ifdef FLOAT128
+float128 float32_to_float128( float32 );
+#endif
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE single-precision operations.
+*----------------------------------------------------------------------------*/
+float32 float32_round_to_int( float32 );
+float32 float32_add( float32, float32 );
+float32 float32_sub( float32, float32 );
+float32 float32_mul( float32, float32 );
+float32 float32_div( float32, float32 );
+float32 float32_rem( float32, float32 );
+float32 float32_sqrt( float32 );
+flag float32_eq( float32, float32 );
+flag float32_le( float32, float32 );
+flag float32_lt( float32, float32 );
+flag float32_eq_signaling( float32, float32 );
+flag float32_le_quiet( float32, float32 );
+flag float32_lt_quiet( float32, float32 );
+flag float32_is_signaling_nan( float32 );
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE double-precision conversion routines.
+*----------------------------------------------------------------------------*/
+int32 float64_to_int32( float64 );
+int32 float64_to_int32_round_to_zero( float64 );
+int64 float64_to_int64( float64 );
+int64 float64_to_int64_round_to_zero( float64 );
+float32 float64_to_float32( float64 );
+#ifdef FLOATX80
+floatx80 float64_to_floatx80( float64 );
+#endif
+#ifdef FLOAT128
+float128 float64_to_float128( float64 );
+#endif
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE double-precision operations.
+*----------------------------------------------------------------------------*/
+float64 float64_round_to_int( float64 );
+float64 float64_add( float64, float64 );
+float64 float64_sub( float64, float64 );
+float64 float64_mul( float64, float64 );
+float64 float64_div( float64, float64 );
+float64 float64_rem( float64, float64 );
+float64 float64_sqrt( float64 );
+flag float64_eq( float64, float64 );
+flag float64_le( float64, float64 );
+flag float64_lt( float64, float64 );
+flag float64_eq_signaling( float64, float64 );
+flag float64_le_quiet( float64, float64 );
+flag float64_lt_quiet( float64, float64 );
+flag float64_is_signaling_nan( float64 );
+
+#ifdef FLOATX80
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE extended double-precision conversion routines.
+*----------------------------------------------------------------------------*/
+int32 floatx80_to_int32( floatx80 );
+int32 floatx80_to_int32_round_to_zero( floatx80 );
+int64 floatx80_to_int64( floatx80 );
+int64 floatx80_to_int64_round_to_zero( floatx80 );
+float32 floatx80_to_float32( floatx80 );
+float64 floatx80_to_float64( floatx80 );
+#ifdef FLOAT128
+float128 floatx80_to_float128( floatx80 );
+#endif
+floatx80 floatx80_scale(floatx80 a, floatx80 b);
+
+/*----------------------------------------------------------------------------
+| Packs the sign `zSign', exponent `zExp', and significand `zSig' into an
+| extended double-precision floating-point value, returning the result.
+*----------------------------------------------------------------------------*/
+
+static inline floatx80 packFloatx80( flag zSign, int32 zExp, bits64 zSig )
+{
+	floatx80 z;
+
+	z.low = zSig;
+	z.high = ( ( (bits16) zSign )<<15 ) + zExp;
+	return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE extended double-precision rounding precision.  Valid
+| values are 32, 64, and 80.
+*----------------------------------------------------------------------------*/
+extern int8 floatx80_rounding_precision;
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE extended double-precision operations.
+*----------------------------------------------------------------------------*/
+floatx80 floatx80_round_to_int( floatx80 );
+floatx80 floatx80_add( floatx80, floatx80 );
+floatx80 floatx80_sub( floatx80, floatx80 );
+floatx80 floatx80_mul( floatx80, floatx80 );
+floatx80 floatx80_div( floatx80, floatx80 );
+floatx80 floatx80_rem( floatx80, floatx80 );
+floatx80 floatx80_sqrt( floatx80 );
+flag floatx80_eq( floatx80, floatx80 );
+flag floatx80_le( floatx80, floatx80 );
+flag floatx80_lt( floatx80, floatx80 );
+flag floatx80_eq_signaling( floatx80, floatx80 );
+flag floatx80_le_quiet( floatx80, floatx80 );
+flag floatx80_lt_quiet( floatx80, floatx80 );
+flag floatx80_is_signaling_nan( floatx80 );
+
+/* int floatx80_fsin(floatx80 &a);
+int floatx80_fcos(floatx80 &a);
+int floatx80_ftan(floatx80 &a); */
+
+floatx80 floatx80_flognp1(floatx80 a);
+floatx80 floatx80_flogn(floatx80 a);
+floatx80 floatx80_flog2(floatx80 a);
+floatx80 floatx80_flog10(floatx80 a);
+
+// roundAndPackFloatx80 used to be in softfloat-round-pack, is now in softfloat.c
+floatx80 roundAndPackFloatx80(int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1);
+
+#endif
+
+#ifdef FLOAT128
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE quadruple-precision conversion routines.
+*----------------------------------------------------------------------------*/
+int32 float128_to_int32( float128 );
+int32 float128_to_int32_round_to_zero( float128 );
+int64 float128_to_int64( float128 );
+int64 float128_to_int64_round_to_zero( float128 );
+float32 float128_to_float32( float128 );
+float64 float128_to_float64( float128 );
+#ifdef FLOATX80
+floatx80 float128_to_floatx80( float128 );
+#endif
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE quadruple-precision operations.
+*----------------------------------------------------------------------------*/
+float128 float128_round_to_int( float128 );
+float128 float128_add( float128, float128 );
+float128 float128_sub( float128, float128 );
+float128 float128_mul( float128, float128 );
+float128 float128_div( float128, float128 );
+float128 float128_rem( float128, float128 );
+float128 float128_sqrt( float128 );
+flag float128_eq( float128, float128 );
+flag float128_le( float128, float128 );
+flag float128_lt( float128, float128 );
+flag float128_eq_signaling( float128, float128 );
+flag float128_le_quiet( float128, float128 );
+flag float128_lt_quiet( float128, float128 );
+flag float128_is_signaling_nan( float128 );
+
+/*----------------------------------------------------------------------------
+| Packs the sign `zSign', the exponent `zExp', and the significand formed
+| by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
+| floating-point value, returning the result.  After being shifted into the
+| proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
+| added together to form the most significant 32 bits of the result.  This
+| means that any integer portion of `zSig0' will be added into the exponent.
+| Since a properly normalized significand will have an integer portion equal
+| to 1, the `zExp' input should be 1 less than the desired result exponent
+| whenever `zSig0' and `zSig1' concatenated form a complete, normalized
+| significand.
+*----------------------------------------------------------------------------*/
+
+static inline float128
+	packFloat128( flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 )
+{
+	float128 z;
+
+	z.low = zSig1;
+	z.high = ( ( (bits64) zSign )<<63 ) + ( ( (bits64) zExp )<<48 ) + zSig0;
+	return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+| and extended significand formed by the concatenation of `zSig0', `zSig1',
+| and `zSig2', and returns the proper quadruple-precision floating-point value
+| corresponding to the abstract input.  Ordinarily, the abstract value is
+| simply rounded and packed into the quadruple-precision format, with the
+| inexact exception raised if the abstract input cannot be represented
+| exactly.  However, if the abstract value is too large, the overflow and
+| inexact exceptions are raised and an infinity or maximal finite value is
+| returned.  If the abstract value is too small, the input value is rounded to
+| a subnormal number, and the underflow and inexact exceptions are raised if
+| the abstract input cannot be represented exactly as a subnormal quadruple-
+| precision floating-point number.
+|     The input significand must be normalized or smaller.  If the input
+| significand is not normalized, `zExp' must be 0; in that case, the result
+| returned is a subnormal number, and it must not require rounding.  In the
+| usual case that the input significand is normalized, `zExp' must be 1 less
+| than the ``true'' floating-point exponent.  The handling of underflow and
+| overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+static inline float128
+	roundAndPackFloat128(
+		flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1, bits64 zSig2 )
+{
+	int8 roundingMode;
+	flag roundNearestEven, increment, isTiny;
+
+	roundingMode = float_rounding_mode;
+	roundNearestEven = ( roundingMode == float_round_nearest_even );
+	increment = ( (sbits64) zSig2 < 0 );
+	if ( ! roundNearestEven ) {
+		if ( roundingMode == float_round_to_zero ) {
+			increment = 0;
+		}
+		else {
+			if ( zSign ) {
+				increment = ( roundingMode == float_round_down ) && zSig2;
+			}
+			else {
+				increment = ( roundingMode == float_round_up ) && zSig2;
+			}
+		}
+	}
+	if ( 0x7FFD <= (bits32) zExp ) {
+		if (    ( 0x7FFD < zExp )
+				|| (    ( zExp == 0x7FFD )
+					&& eq128(
+							LIT64( 0x0001FFFFFFFFFFFF ),
+							LIT64( 0xFFFFFFFFFFFFFFFF ),
+							zSig0,
+							zSig1
+						)
+					&& increment
+				)
+			) {
+			float_raise( float_flag_overflow | float_flag_inexact );
+			if (    ( roundingMode == float_round_to_zero )
+					|| ( zSign && ( roundingMode == float_round_up ) )
+					|| ( ! zSign && ( roundingMode == float_round_down ) )
+				) {
+				return
+					packFloat128(
+						zSign,
+						0x7FFE,
+						LIT64( 0x0000FFFFFFFFFFFF ),
+						LIT64( 0xFFFFFFFFFFFFFFFF )
+					);
+			}
+			return packFloat128( zSign, 0x7FFF, 0, 0 );
+		}
+		if ( zExp < 0 ) {
+			isTiny =
+					( float_detect_tininess == float_tininess_before_rounding )
+				|| ( zExp < -1 )
+				|| ! increment
+				|| lt128(
+						zSig0,
+						zSig1,
+						LIT64( 0x0001FFFFFFFFFFFF ),
+						LIT64( 0xFFFFFFFFFFFFFFFF )
+					);
+			shift128ExtraRightJamming(
+				zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
+			zExp = 0;
+			if ( isTiny && zSig2 ) float_raise( float_flag_underflow );
+			if ( roundNearestEven ) {
+				increment = ( (sbits64) zSig2 < 0 );
+			}
+			else {
+				if ( zSign ) {
+					increment = ( roundingMode == float_round_down ) && zSig2;
+				}
+				else {
+					increment = ( roundingMode == float_round_up ) && zSig2;
+				}
+			}
+		}
+	}
+	if ( zSig2 ) float_exception_flags |= float_flag_inexact;
+	if ( increment ) {
+		add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
+		zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven );
+	}
+	else {
+		if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
+	}
+	return packFloat128( zSign, zExp, zSig0, zSig1 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+| and significand formed by the concatenation of `zSig0' and `zSig1', and
+| returns the proper quadruple-precision floating-point value corresponding
+| to the abstract input.  This routine is just like `roundAndPackFloat128'
+| except that the input significand has fewer bits and does not have to be
+| normalized.  In all cases, `zExp' must be 1 less than the ``true'' floating-
+| point exponent.
+*----------------------------------------------------------------------------*/
+
+static inline float128
+	normalizeRoundAndPackFloat128(
+		flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 )
+{
+	int8 shiftCount;
+	bits64 zSig2;
+
+	if ( zSig0 == 0 ) {
+		zSig0 = zSig1;
+		zSig1 = 0;
+		zExp -= 64;
+	}
+	shiftCount = countLeadingZeros64( zSig0 ) - 15;
+	if ( 0 <= shiftCount ) {
+		zSig2 = 0;
+		shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
+	}
+	else {
+		shift128ExtraRightJamming(
+			zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
+	}
+	zExp -= shiftCount;
+	return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
+
+}
+#endif