From 89310bf9c82c732c03aa6c02247b86e876678cf4 Mon Sep 17 00:00:00 2001 From: "warren.toomey" Date: Mon, 5 May 2008 05:01:27 +0000 Subject: [PATCH] Added my half-written disassembler, in case someone else can help finish it. --- tools/disaout/Makefile | 7 + tools/disaout/README | 11 + tools/disaout/aout.c | 264 +++++++++++++++++++++++ tools/disaout/aout.h | 97 +++++++++ tools/disaout/magic.c | 136 ++++++++++++ tools/disaout/main.c | 78 +++++++ tools/disaout/opset.c | 443 +++++++++++++++++++++++++++++++++++++++ tools/disaout/symbols.c | 70 +++++++ tools/disaout/syscalls.c | 202 ++++++++++++++++++ 9 files changed, 1308 insertions(+) create mode 100644 tools/disaout/Makefile create mode 100644 tools/disaout/README create mode 100644 tools/disaout/aout.c create mode 100644 tools/disaout/aout.h create mode 100644 tools/disaout/magic.c create mode 100644 tools/disaout/main.c create mode 100644 tools/disaout/opset.c create mode 100644 tools/disaout/symbols.c create mode 100644 tools/disaout/syscalls.c diff --git a/tools/disaout/Makefile b/tools/disaout/Makefile new file mode 100644 index 0000000..ad31709 --- /dev/null +++ b/tools/disaout/Makefile @@ -0,0 +1,7 @@ +CFLAGS= -g -Wall + +disaout: main.o aout.o magic.o opset.o syscalls.o symbols.o + cc -o disaout main.o aout.o magic.o opset.o syscalls.o symbols.o + +clean: + rm -f disaout *.o diff --git a/tools/disaout/README b/tools/disaout/README new file mode 100644 index 0000000..9802934 --- /dev/null +++ b/tools/disaout/README @@ -0,0 +1,11 @@ +This is a start on a disassembler for PDP-11 a.out Unix binaries. There's +still a lot to do. Right now I am concentrating on V1 and V2 binaries. I +have re-used code from Apout, so later on we can also do binaries from +V5/B6/V7, 2.9 and 2.11 BSD. I'd also like to parse symbol tables, but +that's for later. + +I need to deal with the bss and initialised data sections of a binary. + +Lots to do, and it's very frustrating. + +Warren Toomey, Mon May 5 2008 diff --git a/tools/disaout/aout.c b/tools/disaout/aout.c new file mode 100644 index 0000000..3ba3a79 --- /dev/null +++ b/tools/disaout/aout.c @@ -0,0 +1,264 @@ +#include +#include +#include +#include "aout.h" + + +/* This code borrowed from Apout */ + +extern int special_magic(u_int16_t *cptr); + +int Binary; /* Type of binary this a.out is */ +u_int8_t *ispace, *dspace; /* Instruction and Data spaces */ +static u_int8_t darray[PDP_MEM_SIZE], iarray[PDP_MEM_SIZE]; + +/* 2.11BSD allows up to 16 8K overlays in the 0430 and 0431 a.out types. + * Each overlay is loaded at the first 8K `click' above the end of the + * main text. The following structures hold the overlays from the current + * a.out, if there are any. Missing overlays have size 0 and pointer NULL. + */ +static struct { + u_int16_t size; + u_int8_t *ovlay; +} ovlist[NOVL] = { + {0, NULL}, {0, NULL}, {0, NULL}, {0, NULL}, {0, NULL}, {0, NULL}, + {0, NULL}, {0, NULL}, {0, NULL}, {0, NULL}, {0, NULL}, {0, NULL}, + {0, NULL}, {0, NULL}, {0, NULL} +}; + +static u_int8_t *ovbase; /* Base address of 2.11BSD overlays */ + +/* Load the a.out header from the given file pointer, and return it. + * Also return an integer describing which version of UNIX the a.out + * belongs to. If errors on reading, return -1. + */ +int load_aout_header(FILE * zin, struct exec * E) +{ + char *cptr; + + /* Read the a_magic value first */ + /* This makes it easier to deal with */ + /* parsing any script interpreter below */ + if (fread(E, sizeof(u_int16_t), 1, zin) != 1) return (-1); + + switch (E->a_magic) { + case ANY_SCRIPT: /* Shell script, return now */ + return (IS_UNKNOWN); + case V1_NORMAL: + case ANY_NORMAL: /* These are recognised below */ + case ANY_ROTEXT: + case ANY_SPLITID: + case BSD_OVERLAY: + case BSD_ROVERLAY: + case A68_MAGIC: + break; + + default: /* Unrecognised binary, mark as such */ + E->a_magic = UNKNOWN_AOUT; return (IS_UNKNOWN); + } + + /* We can deal with this a.out, so */ + /* read in the rest of the header */ + cptr = (char *) &(E->a_text); + if (fread(cptr, sizeof(struct exec) - sizeof(u_int16_t), 1, zin) != 1) + return (-1); + + switch (E->a_magic) { + case A68_MAGIC: if (E->a_data==A68_DATA) return(IS_A68); + else { E->a_magic = UNKNOWN_AOUT; return (IS_UNKNOWN); } + case V1_NORMAL: return (IS_V1); + case BSD_OVERLAY: + case BSD_ROVERLAY: return (IS_211BSD); + case ANY_NORMAL: + case ANY_ROTEXT: + case ANY_SPLITID: /* Check crt0.o 2nd magic for V2/V6/V7/2.11BSD */ + if (E->a_magic2 == V2_M2) return (IS_V2); + if (E->a_magic2 == V6_M2) return (IS_V6); + if (E->a_magic2 == V7_M2) return (IS_V7); + if (E->a_magic2 == BSD_M2) return (IS_211BSD); + + /* Still no idea, use checksum to determine */ + return(special_magic((u_int16_t *) E)); + + default: /* Should never get here */ + E->a_magic = UNKNOWN_AOUT; return (IS_UNKNOWN); + } +} + + +/* Load the named PDP-11 executable file into the disassembler's memory. + * Returns 0 if ok, -1 if error. + */ +int load_a_out(const char *file, struct exec * e) +{ /* @globals errno,stdout,stderr; @ */ +#define V12_MEMBASE 16384 /* Offset for V1/V2 binaries load */ + FILE *zin; + u_int8_t *ibase, *dbase, *bbase; /* Instruction, data, bss bases */ + u_int16_t size; + int i,j; + + if ((zin = fopen(file, "r"))==NULL) /* Open the file */ + return (-1); + + Binary = load_aout_header(zin, e); /* Determine a.out & Unix type */ + + if (e->a_magic == ANY_SCRIPT) { + return (-1); + } + if (e->a_magic == UNKNOWN_AOUT) { + return (-1); + } + + switch (e->a_magic) { + case V1_NORMAL: /* V1 a.out binary looks like */ + e->a_bss = e->a_syms; /* 0405 */ + e->a_syms = e->a_data; /* size of text */ + e->a_data = 0; /* size of symbol table */ + /* reloc bits */ + /* size of data (i.e bss) */ + /* unused and zeroed */ + /* We must rearrange fields */ + /* Move back to start of V1 header */ + if (fseek(zin, 0, SEEK_SET) != 0) { + (void) fclose(zin); return (-1); + } + ispace = dspace = darray; + ibase = &(ispace[V12_MEMBASE]); /* Load & run the binary starting */ + dbase = &(ispace[e->a_text]); /* at address 16384 (040000) */ + bbase = &(ispace[e->a_text + e->a_data]); + e->a_entry = V12_MEMBASE + 12; /* Add 12 to skip over a.out hdr */ + break; + + case A68_MAGIC: /* Algol 68 image */ + if (fseek(zin, 0, SEEK_SET) != 0) { + (void) fclose(zin); return (-1); + } + e->a_text= e->ov_siz[0]+1; + e->a_data= 0; + e->a_bss= 0160000-e->a_text; + e->a_entry= e->a_flag; + ibase = ispace = dspace = darray; + dbase= ibase; + bbase= &(ispace[e->a_text+e->a_data]); + break; + case ANY_NORMAL: + /* Move back to end of V5/6/7 header */ + if (fseek(zin, 16, SEEK_SET) != 0) { + (void) fclose(zin); return (-1); + } + ibase = ispace = dspace = darray; + + if (Binary == IS_V2) { + ibase = &(ispace[V12_MEMBASE]); + e->a_entry = V12_MEMBASE; + dbase = &(ispace[e->a_text + V12_MEMBASE]); + bbase = &(ispace[e->a_text + e->a_data + V12_MEMBASE]); + } else + { + dbase = &(ispace[e->a_text]); + bbase = &(ispace[e->a_text + e->a_data]); + } + break; + case ANY_ROTEXT: + /* Move back to end of V5/6/7 header */ + if (fseek(zin, 16, SEEK_SET) != 0) { + (void) fclose(zin); return (-1); + } + /* @fallthrough@ */ + case BSD_OVERLAY: + /* Round up text area to next 8K boundary */ + if (e->a_text % EIGHT_K) { + size = EIGHT_K * (1 + e->a_text / EIGHT_K); + } else size = e->a_text; + /* And the next 8K boundary if overlays! */ + if (e->a_magic == BSD_OVERLAY) { + if (e->max_ovl % EIGHT_K) { + size += EIGHT_K * (1 + e->max_ovl / EIGHT_K); + } else size += e->max_ovl; + } + ibase = ispace = dspace = darray; + dbase = &(ispace[size]); + bbase = &(ispace[size + e->a_data]); + break; + case ANY_SPLITID: + /* Move back to end of V5/6/7 header */ + if (fseek(zin, 16, SEEK_SET) != 0) { + (void) fclose(zin); return (-1); + } + /* @fallthrough@ */ + case BSD_ROVERLAY: + ibase = ispace = iarray; + dbase = dspace = darray; + bbase = &(dspace[e->a_data]); + break; + default: + (void) fprintf(stderr, "Apout - unknown a.out format 0%o\n", e->a_magic); + (void) fclose(zin); return (-1); + } + + + memset(darray, 0, PDP_MEM_SIZE); /* Clear all memory */ + if (ispace != dspace) memset(iarray, 0, PDP_MEM_SIZE); + + /* Now load the text into ibase */ + for (size = e->a_text; size;) { + i = (int) fread(ibase, 1, (size_t) size, zin); + if (i == -1) { (void) fclose(zin); return (i); } + size -= i; + ibase += i; + } + + /* Now deal with any overlays */ + if (Binary == IS_211BSD) + switch (e->a_magic) { + case BSD_OVERLAY: + case BSD_ROVERLAY: + /* Round up text area to next 8K boundary */ + if (e->a_text % EIGHT_K) { + size = EIGHT_K * (1 + e->a_text / EIGHT_K); + } else size = e->a_text; + ovbase = &ispace[size]; + + for (i = 0; i < NOVL; i++) { + if (e->ov_siz[i] == 0) { + ovlist[i].size = 0; + ovlist[i].ovlay = NULL; + continue; + } + /* Create memory for the overlay */ + ovlist[i].size = e->ov_siz[i]; + if (ovlist[i].ovlay) + free(ovlist[i].ovlay); + ovlist[i].ovlay = (u_int8_t *) malloc(e->ov_siz[i]); + if (ovlist[i].ovlay == NULL) { + fprintf(stderr, "Apout - can't malloc overlay!\n"); + exit(-1); + } + /* Load the overlay into memory */ + for (size = ovlist[i].size, ibase = ovlist[i].ovlay; size;) { + j = fread(ibase, 1, size, zin); + if (j == -1) { + fclose(zin); return (j); + } + size -= j; + ibase += j; + } + } + } + + /* Now load the data into dbase */ + if (dbase) + for (size = e->a_data; size;) { + i = (int) fread(dbase, 1, (size_t) size, zin); + if (i == -1) { (void) fclose(zin); return (i); } + size -= i; + dbase += i; + } + + /* Now clear the bss */ + if ((bbase != 0) && (e->a_bss != 0)) + memset(bbase, 0, (size_t) e->a_bss); + + (void) fclose(zin); + return (0); +} diff --git a/tools/disaout/aout.h b/tools/disaout/aout.h new file mode 100644 index 0000000..65ce030 --- /dev/null +++ b/tools/disaout/aout.h @@ -0,0 +1,97 @@ +/* + * aout.h - parse and load the contents of a UNIX a.out file, for several + * flavours of PDP-11 UNIX + * + * $Revision: 1.6 $ $Date: 2008/05/01 03:23:21 $ + */ +#include +#define EIGHT_K 8192 +#define PDP_MEM_SIZE 65536 /* Size of inst-space and data-space */ + +/* UNIX magic numbers for the a.out header */ +#define V1_NORMAL 0405 /* normal: 1st Edition, six words long */ +#define ANY_NORMAL 0407 /* normal: V5,V6,V7,2.11BSD */ +#define ANY_ROTEXT 0410 /* read-only text: V5,V6,V7,2.11BSD */ +#define ANY_SPLITID 0411 /* seperated I&D: V5,V6,V7,2.11BSD */ +#define BSD_OVERLAY 0430 /* 2.11BSD overlay, non-separate */ +#define BSD_ROVERLAY 0431 /* 2.11BSD overlay, separate */ +#define ANY_SCRIPT 020443 /* Shell script, i.e #! */ +#define A68_MAGIC 0 /* Algol68 binaries have these magic nums */ +#define A68_DATA 0107116 /* Algol68 binaries have these magic nums */ + +#define UNKNOWN_AOUT 034567 /* An unknown a.out header */ + +/* Which version of UNIX this a.out comes from */ +#define IS_UNKNOWN 0 +#define IS_V1 1 +#define IS_V2 2 +#define IS_V3 3 +#define IS_V4 4 +#define IS_V5 5 +#define IS_V6 6 +#define IS_V7 7 +#define IS_A68 68 +#define IS_29BSD 29 +#define IS_211BSD 211 + +/* a.out header for nearly all UNIX flavours */ +struct exec { + u_int16_t a_magic; /* magic number */ + u_int16_t a_text; /* size of text segment */ + u_int16_t a_data; /* size of initialised data */ + u_int16_t a_bss; /* size of initialised bss */ + u_int16_t a_syms; /* size of symbol table */ + u_int16_t a_entry; /* entry point */ + u_int16_t a_unused; /* unused */ + u_int16_t a_flag; /* relocation info stripped */ + /* 16 bytes up to here */ + + /* 2.11BSD overlay files have the following */ +#define NOVL 15 + int16_t max_ovl; /* maximum overlay size */ + u_int16_t ov_siz[NOVL]; /* size of the i'th overlay */ + /* Note that if the file isn't a 2.11BSD */ + /* overlay, we have to rewind to undo */ + /* the read of this section */ +}; + +/* + * Because V5, V6, V7 and 2.11BSD share several magic numbers in their a.out + * headers, we must distinguish them so as to set up the correct emulated + * environment. This is done by observing the differences in their crt0.s + * code: they all differ at position 021 + */ +#define a_magic2 ov_siz[0] +#define V2_M2 0177304 /* Doesn't apply to all, tho */ +#define V6_M2 0010600 +#define V7_M2 0016600 +#define BSD_M2 0162706 + + +/* + * Some syscalls pass arguments in registers, some in words following the + * trap instruction. For each Unix version, we keep an array of syscall + * names, and the number of words following the trap + */ +struct syscallinfo { + char *name; + int numwords; +}; + +/* + * We try to infer symbols based on branch addresses, jsr addresses etc. We + * keep two lists, instruction symbols and data symbols. Each one is kept in + * this struct. + */ +struct symbol { + char *name; + int type; + int size; /* # bytes, used by SYM_JSRTEXT and SYM_JSRDATA */ +}; + +/* Symbol types */ +#define SYM_BRANCH 0 +#define SYM_FUNCTION 1 +#define SYM_DATA 2 +#define SYM_JSRTEXT 3 /* Ascii string following jsr r5,xxx */ +#define SYM_JSRDATA 4 /* Binary data following jsr r5,xxx */ diff --git a/tools/disaout/magic.c b/tools/disaout/magic.c new file mode 100644 index 0000000..0636161 --- /dev/null +++ b/tools/disaout/magic.c @@ -0,0 +1,136 @@ +/* This code borrowed from Apout */ + +/* magic.c - determine the environment for certain PDP-11 a.out binaries + * + * Some binaries in V1, V2, V5, V6, V7 and 2.11BSD are not caught with the + * magic numbers in aout.c. If this is the case, we fall into the + * special_magic() function, which calculates a checksum on the + * a.out header. If it matches any of the checksums below, it returns + * the appropriate environment value. Otherwise, it returns IS_UNKNOWN. + * + * $Revision: 1.1 $ + * $Date: 2008/04/30 03:46:29 $ + */ +#include +#include "aout.h" + +struct spec_aout { + u_int32_t cksum; + int environment; +}; + +static struct spec_aout S[]= { + { 0x1042c2, IS_V6 }, /* V6 bin/dc */ + { 0x10f02, IS_V5 }, /* V5 etc/update */ + { 0x11002, IS_V5 }, /* V5 bin/clri */ + { 0x1117c2, IS_V7 }, /* V7 bin/roff */ + { 0x11702, IS_V6 }, /* V6 etc/update */ + { 0x11a82, IS_V5 }, /* V5 bin/sum */ + { 0x1319c2, IS_V5 }, /* V5 usr/fort/fc1 */ + { 0x1332c2, IS_V2 }, /* /lib/c0 dated Jun 30 1973 from s2 tape */ + { 0x13642, IS_V5 }, /* V5 bin/rew */ + { 0x139e02, IS_V5 }, /* V5 bin/dc */ + { 0x13c0, IS_V6 }, /* V6 usr/lib/tmgc */ + { 0x14042, IS_V6 }, /* V6 bin/tty */ + { 0x143c2, IS_V5 }, /* V5 bin/tty */ + { 0x152ac2, IS_V6 }, /* V6 usr/lib/tmg */ + { 0x15f42, IS_V5 }, /* V5 bin/kill */ + { 0x16802, IS_V5 }, /* V5 bin/dsw */ + { 0x16902, IS_V5 }, /* V5 bin/mkdir */ + { 0x1720c2, IS_V6 }, /* V6 bin/cdb */ + { 0x17742, IS_V5 }, /* V5 usr/bin/pfe */ + { 0x17cc2, IS_V5 }, /* V5 usr/bin/mesg */ + { 0x18702, IS_V5 }, /* V5 bin/rmdir */ + { 0x194c2, IS_V6 }, /* V6 bin/chgrp */ + { 0x197c2, IS_V6 }, /* V6 bin/chown */ + { 0x19a42, IS_V5 }, /* V5 bin/chown */ + { 0x19b342, IS_V6 }, /* V6 usr/bin/nroff */ + { 0x19f682, IS_V6 }, /* V6 usr/fort/fc1 */ + { 0x1b102, IS_V5 }, /* V5 bin/strip */ + { 0x1ba02, IS_V6 }, /* V6 bin/strip */ + { 0x1c342, IS_V5 }, /* V5 bin/cat */ + { 0x1c8442, IS_V7 }, /* V7 usr/games/maze */ + { 0x1cc782, IS_V6 }, /* V6 lib/fc0 */ + { 0x1dfc2, IS_V5 }, /* V5 etc/getty */ + { 0x1f9c2, IS_V2 }, /* /bin/nm dated Jun 30 1973 from s2 tape */ + { 0x20202, IS_V5 }, /* V5 usr/games/bj */ + { 0x21e42, IS_V6 }, /* V6 usr/bin/units */ + { 0x23f82, IS_V5 }, /* V5 usr/bin/passwd */ + { 0x260642, IS_V6 }, /* V6 lib/fc1 */ + { 0x262a82, IS_211BSD }, /* 2.11 usr/new/m11 */ + { 0x27e82, IS_V5 }, /* V5 usr/bin/grep */ + { 0x290c2, IS_V7 }, /* V7 usr/games/cubic */ + { 0x299c2, IS_V5 }, /* V5 usr/games/cubic */ + { 0x2f482, IS_V5 }, /* V5 usr/bin/form */ + { 0x3382, IS_V6 }, /* V6 bin/write */ + { 0x326642, IS_V7 }, /* 2.9 awk */ + { 0x33c42, IS_211BSD }, /* 2.11 usr/games/moo */ + { 0x351382, IS_211BSD }, /* 2.11 usr/games/lib/zork */ + { 0x3702, IS_V5 }, /* V5 usr/games/moo */ + { 0x3b402, IS_V5 }, /* V5 bin/ar */ + { 0x3cc02, IS_V2 }, /* /bin/size from from s2 tape */ + { 0x4382, IS_V5 }, /* V5 bin/write */ + { 0x451f42, IS_V7 }, /* 2.9 /lib/c1 */ + { 0x47042, IS_211BSD }, /* 2.11 usr/games/ttt */ + { 0x4fa02, IS_V5 }, /* V5 bin/ld */ + { 0x51342, IS_211BSD }, /* 2.11 usr/games/bj */ + { 0x53302, IS_V6 }, /* V6 usr/lib/suftab */ + { 0x55882, IS_V7 }, /* 2.9 /bin/as */ + { 0x54702, IS_V5 }, /* V5 usr/games/ttt */ + { 0x55702, IS_V7 }, /* V7 bin/as */ + { 0x5c342, IS_V2 }, /* /bin/cc dated Jun 30 1973 from s2 tape */ + { 0x6f742, IS_V6 }, /* V6 usr/bin/sa */ + { 0x7042, IS_V7 }, /* V7 bin/factor */ + { 0x71702, IS_V7 }, /* V7 lib/as2 */ + { 0x7342, IS_V5 }, /* V5 bin/du */ + { 0x73782, IS_V7}, /* 2.9 /lib/as2 */ + { 0x73e00, IS_V2 }, /* /bin/ld from s2 tape */ + { 0x7a242, IS_V6 }, /* V6 lib/as2 */ + { 0x7b102, IS_V6 }, /* V6 bin/as */ + { 0x7d082, IS_V5 }, /* V5 bin/as */ + { 0x7d6844, IS_V1 }, /* bin/cal from s2 tape */ + { 0x7d942, IS_V5 }, /* V5 lib/as2 */ + { 0x8002, IS_V5 }, /* V5 etc/lpd */ + { 0x85842, IS_V5 }, /* V5 bin/ed */ + { 0x8f00, IS_V6 }, /* V6 usr/lib/tmga */ + { 0x915c2, IS_V6 }, /* V6 bin/bas */ + { 0x94542, IS_V5 }, /* V5 bin/db */ + { 0x98442, IS_V6 }, /* V6 usr/bin/ac */ + { 0x9adc2, IS_V6 }, /* V6 bin/db */ + { 0xa242, IS_V7 }, /* V7 bin/primes */ + { 0xa4602, IS_V2 }, /* /bin/as from s2 tape */ + { 0xa702, IS_V5 }, /* V5 bin/time */ + { 0xad882, IS_V7 }, /* V7 bin/bas */ + { 0xadc42, IS_V2 }, /* /usr/lib/c1 from s2 tape */ + { 0xb5a82, IS_V6 }, /* V6 usr/bin/prof */ + { 0xc1e42, IS_V5 }, /* V5 usr/bin/fed */ + { 0xc3102, IS_V6 }, /* V6 bin/tp */ + { 0xc8bc2, IS_V5 }, /* V5 bin/tp */ + { 0xe1642, IS_V6 }, /* V6 usr/bin/roff */ + { 0xe1f42, IS_V5 }, /* V5 usr/bin/roff */ + { 0xec582, IS_V5 }, /* V5 bin/bas */ + { 0xfc2, IS_V6 }, /* V6 usr/bin/typo */ + { 0xfc002, IS_V2 }, /* /bin/as dated Jun 30 1973 from s2 tape */ + { 0x38ec0, IS_V5 }, /* V5 bin/ar, Warrens */ + { 0, 0 } +}; + +/* cptr points at the start of the a.out header */ +int special_magic(u_int16_t *cptr) +{ + u_int32_t cksum=0; + int i; + + if (cptr==NULL) return(IS_UNKNOWN); + /* Calculate the checksum */ + for (i=0;i<8; i++) { cksum ^= cptr[i]; cksum = cksum<<1; } + + /* Try and find a match */ + for (i=0; S[i].cksum!=0; i++) if (S[i].cksum==cksum) { + return(S[i].environment); + } + + /* None, return 0 */ + (void)fprintf(stderr, "Unknown magic in header: 0x%x\n",cksum); + return(IS_UNKNOWN); +} diff --git a/tools/disaout/main.c b/tools/disaout/main.c new file mode 100644 index 0000000..2e38718 --- /dev/null +++ b/tools/disaout/main.c @@ -0,0 +1,78 @@ +#include +#include +#include "aout.h" + +extern int load_a_out(const char *file, struct exec *E); +extern int printins(int addr); +extern void patch_symbols(void); +extern u_int8_t *ispace, *dspace; /* Instruction and Data spaces */ +extern int doprint; +int onepass = 0; /* Only do a single pass */ +int printaddrs = 0; /* Print out addresses and words */ + +void dopass(struct exec *e) +{ + int i; + u_int16_t *iptr; + for (i = e->a_entry; i < e->a_entry + e->a_text;) { + iptr = (u_int16_t *) & ispace[i]; + if (doprint && printaddrs) + printf("%06o: %06o\t", i, *iptr); + i += printins(i); + } +} + +void usage() +{ + fprintf(stderr, "Usage: disaout [-1a] file\n"); + exit(1); +} + +int main(int argc, char *argv[]) +{ + struct exec E; + int ch, err; + + /* Get any arguments */ + while ((ch = getopt(argc, argv, "1a")) != -1) { + switch (ch) { + case '1': + onepass = 1; + break; + case 'a': + printaddrs = 1; + break; + case '?': + default: + usage(); + } + } + argc -= optind; + argv += optind; + + + /* Check we have an file to open */ + if (argc != 1) + usage(); + + /* Get the header details for the a.out file */ + err = load_a_out(argv[0], &E); + + if (err == -1) { + fprintf(stderr, "%s does not appear to be a PDP-11 a.out file\n", argv[0]); + exit(1); + } + + printf("/ text at 0%o, len 0%o, end 0%o\n", + E.a_entry, E.a_text, E.a_entry + E.a_text); + + if (onepass == 0) { + doprint = 0; + dopass(&E); /* Do pass 1 to infer symbols */ + patch_symbols(); + } + doprint = 1; + dopass(&E); /* Do pass 2 to print it out */ + + exit(0); +} diff --git a/tools/disaout/opset.c b/tools/disaout/opset.c new file mode 100644 index 0000000..8cd146e --- /dev/null +++ b/tools/disaout/opset.c @@ -0,0 +1,443 @@ +/* This code borrowed from 2.11BSD adb */ + +#include +#include +#include +#include "aout.h" + +extern void add_symbol(int addr, int type, int size); +extern struct symbol *get_isym(int addr); +extern struct symbol *get_dsym(int addr); + +extern u_int8_t *ispace, *dspace; /* Instruction and Data spaces */ +extern struct syscallinfo *systab; +extern int numsyscalls; + +int doprint = 0; /* Only print out if 1 */ +int itype; /* Global equal to p->itype */ + +#define NSP 0 +#define ISYM 2 +#define DSYM 7 + +/* instruction printing */ + +#define DOUBLE 0 +#define DOUBLW 1 +#define SINGLE 2 +#define SINGLW 3 +#define REVERS 4 +#define BRANCH 5 +#define NOADDR 6 +#define DFAULT 7 +#define TRAP 8 +#define SYS 9 +#define SOB 10 +#define JMP 11 +#define JSR 12 + +struct optab { + int mask; + int val; + int itype; + char *iname; +} optab[] = { + { 0107777, 0010000, DOUBLE, "mov" }, + { 0107777, 0020000, DOUBLE, "cmp" }, + { 0107777, 0030000, DOUBLE, "bit" }, + { 0107777, 0040000, DOUBLE, "bic" }, + { 0107777, 0050000, DOUBLE, "bis" }, + { 0007777, 0060000, DOUBLW, "add" }, + { 0007777, 0160000, DOUBLW, "sub" }, + { 0100077, 0005000, SINGLE, "clr" }, + { 0100077, 0005100, SINGLE, "com" }, + { 0100077, 0005200, SINGLE, "inc" }, + { 0100077, 0005300, SINGLE, "dec" }, + { 0100077, 0005400, SINGLE, "neg" }, + { 0100077, 0005500, SINGLE, "adc" }, + { 0100077, 0005600, SINGLE, "sbc" }, + { 0100077, 0005700, SINGLE, "tst" }, + { 0100077, 0006000, SINGLE, "ror" }, + { 0100077, 0006100, SINGLE, "rol" }, + { 0100077, 0006200, SINGLE, "asr" }, + { 0100077, 0006300, SINGLE, "asl" }, + { 0000077, 0000100, JMP, "jmp" }, + { 0000077, 0000300, SINGLE, "swab" }, + { 0000077, 0170100, SINGLW, "ldfps" }, + { 0000077, 0170200, SINGLW, "stfps" }, + { 0000077, 0170300, SINGLW, "stst" }, + { 0000077, 0170400, SINGLW, "clrf" }, + { 0000077, 0170500, SINGLW, "tstf" }, + { 0000077, 0170600, SINGLW, "absf" }, + { 0000077, 0170700, SINGLW, "negf" }, + { 0000077, 0006700, SINGLW, "sxt" }, + { 0000077, 0006600, SINGLW, "mtpi" }, + { 0000077, 0106600, SINGLW, "mtpd" }, + { 0000077, 0006500, SINGLW, "mfpi" }, + { 0000077, 0106500, SINGLW, "mfpd" }, + { 0000077, 0106700, SINGLW, "mfps" }, + { 0000077, 0106400, SINGLW, "mtps" }, + { 0000777, 0070000, REVERS, "mul" }, + { 0000777, 0071000, REVERS, "div" }, + { 0000777, 0072000, REVERS, "ash" }, + { 0000777, 0073000, REVERS, "ashc" }, + { 0377, 0000400, BRANCH, "br" }, + { 0377, 0001000, BRANCH, "bne" }, + { 0377, 0001400, BRANCH, "beq" }, + { 0377, 0002000, BRANCH, "bge" }, + { 0377, 0002400, BRANCH, "blt" }, + { 0377, 0003000, BRANCH, "bgt" }, + { 0377, 0003400, BRANCH, "ble" }, + { 0377, 0100000, BRANCH, "bpl" }, + { 0377, 0100400, BRANCH, "bmi" }, + { 0377, 0101000, BRANCH, "bhi" }, + { 0377, 0101400, BRANCH, "blos" }, + { 0377, 0102000, BRANCH, "bvc" }, + { 0377, 0102400, BRANCH, "bvs" }, + { 0377, 0103000, BRANCH, "bcc" }, + { 0377, 0103400, BRANCH, "bcs" }, + { 0000000, 0000000, NOADDR, "halt" }, + { 0000000, 0000001, NOADDR, "wait" }, + { 0000000, 0000002, NOADDR, "rti" }, + { 0000000, 0000003, NOADDR, "bpt" }, + { 0000000, 0000004, NOADDR, "iot" }, + { 0000000, 0000005, NOADDR, "reset" }, + { 0000000, 0000006, NOADDR, "rtt" }, + { 0377, 0171000, REVERS, "mulf" }, + { 0377, 0171400, REVERS, "modf" }, + { 0377, 0172000, REVERS, "addf" }, + { 0377, 0172400, REVERS, "movf" }, + { 0377, 0173000, REVERS, "subf" }, + { 0377, 0173400, REVERS, "cmpf" }, + { 0377, 0174000, DOUBLW, "movf" }, + { 0377, 0174400, REVERS, "divf" }, + { 0377, 0175000, DOUBLW, "movei" }, + { 0377, 0175400, DOUBLW, "movfi" }, + { 0377, 0176000, DOUBLW, "movfo" }, + { 0377, 0176400, REVERS, "movie" }, + { 0377, 0177000, REVERS, "movif" }, + { 0377, 0177400, REVERS, "movof" }, + { 0000000, 0170000, NOADDR, "cfcc" }, + { 0000000, 0170001, NOADDR, "setf" }, + { 0000000, 0170002, NOADDR, "seti" }, + { 0000000, 0170011, NOADDR, "setd" }, + { 0000000, 0170012, NOADDR, "setl" }, + { 0000000, 0000007, NOADDR, "mfpt" }, + { 0000077, 0007000, JMP, "csm" }, + { 0000077, 0007300, SINGLW, "wrtlck" }, + { 0000077, 0007200, SINGLW, "tstset" }, + { 0000777, 0004000, JSR, "jsr" }, + { 0000777, 0074000, DOUBLE, "xor" }, + { 0000007, 0000200, SINGLE, "rts" }, + { 0000017, 0000240, DFAULT, "cflg" }, + { 0000017, 0000260, DFAULT, "sflg" }, + { 0377, 0104000, TRAP, "emt" }, + { 0377, 0104400, SYS, "sys" }, + { 0000077, 0006400, TRAP, "mark" }, + { 0000777, 0077000, SOB, "sob" }, + { 0000007, 0000230, DFAULT, "spl" }, + { 0177777, 0000000, DFAULT, "" } +}; + + +/* Heuristically determine what follows after a jsr r5,xxx. + * Create a symbol for it, including the estimated size. + * Return the length of the data in bytes. + */ +int guess_jsr_r5(int addr) +{ + int a,len; + int istext=1; + + /* Try an ASCII string first. Give up if we find a + * non-printable and non-NUL character. + */ + for (a=addr,len=0;; a++,len++) { + /* Found end of string which isn't zero length, stop now */ + if (len && (ispace[a]=='\0')) { + len++; break; + } + + /* If char is not ASCII and not tab/newline, it's not a string */ + if (!isprint(ispace[a]) && (ispace[a] != '\t') && (ispace[a] != '\n')) { + istext=0; break; + } + } + + /* If not a string, guess a single word as argument */ + if (!istext) { + add_symbol(addr, SYM_JSRDATA, 2); + return(2); + } else { + add_symbol(addr, SYM_JSRTEXT, len); + return(len); + } +} + + +/* + * Print out an operand. Return any increment to skip to reach the next + * instruction. + */ +int paddr(char *str, int addr, int a, int lastr) +{ + char *regname[] = {"r0", "r1", "r2", "r3", "r4", "r5", "sp", "pc"}; + u_int16_t var; + int r; + char *rptr; + + r = a & 07; + a &= 070; + + if (doprint) + printf(str); + if (r == 7 && a & 020) { + int jsrr5_skip=0; /* Amount to skip on a jsr r5,xxx */ + if (a & 010) { + if (doprint) + putchar('*'); + } + if (a & 040) { + + var = (addr + 4) + ispace[addr + 2] + (ispace[addr + 3] << 8); + if (doprint) { + /* See if there is a label for that */ + struct symbol *s; + if (itype==JSR) s= get_isym(var); + else s= get_dsym(var); + if (s == NULL) + printf("0%o", var); + else + printf("%s", s->name); + + /* We've hit a jsr r5,... */ + if ((itype==JSR) && (lastr==5)) { + s= get_isym(addr+4); + if (s==NULL) { + printf("Weird, no SYM_JSR\n"); + } else { + jsrr5_skip= s->size; + if (s->type==SYM_JSRTEXT) { + char *str= (char *)&ispace[addr+4]; + printf("; <%s>; .even", str); + } else { + u_int16_t var2= ispace[addr + 4] + (ispace[addr + 5] << 8); + printf("; 0%o", var2); + } + } + } + } else { + /* Register a function if this is a JSR, else data */ + if (itype==JSR) { + add_symbol(var, SYM_FUNCTION, 0); + if (lastr==5) { + jsrr5_skip= guess_jsr_r5(addr+4); + } + } + else add_symbol(var, SYM_DATA, 0); + } + } else { + var = ispace[addr + 2] + (ispace[addr + 3] << 8); + if (doprint) + printf("$0%o", var); + } + return (2+jsrr5_skip); + } + + rptr = regname[r]; + switch (a) { + /* r */ + case 000: + if (doprint) + printf(rptr); + return (0); + + /* (r) */ + case 010: + if (doprint) + printf("(%s)", rptr); + return (0); + + /* *(r)+ */ + case 030: + if (doprint) + putchar('*'); + + /* (r)+ */ + case 020: + if (doprint) + printf("(%s)+", rptr); + return (0); + + /* *-(r) */ + case 050: + if (doprint) + putchar('*'); + + /* -(r) */ + case 040: + if (doprint) + printf("-(%s)", rptr); + return (0); + + /* *x(r) */ + case 070: + if (doprint) + putchar('*'); + + /* x(r) */ + case 060: + var = ispace[addr + 2] + (ispace[addr + 3] << 8); + if (doprint) + printf("0%o(%s)", var, rptr); + return (2); + } + return (0); +} + +/* + * Deal with double operands. Return any increment to skip to reach the next + * instruction. + */ +int doubl(int addr, int a, int b) +{ + int i = 0; + int r= a & 07; /* Get the register from the 1st operand */ + i += paddr(" ", addr, a, 0); + i += paddr(",", addr + i, b, r); + return (i); +} + +void branch(char *str, int addr, int ins) +{ + if (doprint) printf(str); + if (ins & 0200) { + ins |= 0177400; + } + + /* Determine the branch address */ + ins = (addr + (ins << 1) + 2) & 0177777; + + if (!doprint) { + add_symbol(ins, SYM_BRANCH, 0); /* Add a branch symbol for it */ + return; + } + + /* Printing it, see if there is a label for that */ + struct symbol *s = get_isym(ins); + if (s == NULL) { + printf("0%o", ins); + return; + } + + /* Yes there is, print it out */ + printf("%s", s->name); + if (s->type == SYM_BRANCH) { + if (ins > addr) printf("f"); + else printf("b"); + } +} + +/* + * Given an address, disassemble and print out the instruction at that + * address. Return any increment to skip to reach the next instruction. + */ +int printins(int addr) +{ + unsigned int type; + int byte; + int incr = 2; + int syscall; + struct optab *p; + struct symbol *s; + + /* Print out any instruction symbol */ + if (doprint) { + s = get_isym(addr); + if (s) + printf("%s:\n", s->name); + } + /* Get the instruction from the ispace array */ + int ins = ispace[addr] + (ispace[addr + 1] << 8); + + type = DSYM; + for (p = optab;; p++) { + if ((ins & ~p->mask) == p->val) { + break; + } + } + if (doprint) + printf("\t%s", p->iname); + byte = ins & 0100000; + ins &= p->mask; + + itype= p->itype; + switch (p->itype) { + + case JMP: + type = ISYM; + + case SINGLE: + if (byte) { + if (doprint) + putchar('b'); + } + case SINGLW: + incr += paddr(" ", addr, ins, 0); + break; + + case REVERS: + incr += doubl(addr, ins & 077, (ins >> 6) & 07); + break; + + case JSR: + type = ISYM; + + case DOUBLE: + if (byte) { + if (doprint) + putchar('b'); + } + case DOUBLW: + incr += doubl(addr, ins >> 6, ins); + + case NOADDR: + break; + + case SOB: + incr += paddr(" ", addr, (ins >> 6) & 07, 0); + branch(",", addr, -(ins & 077)); + break; + + case BRANCH: + branch(" ", addr, ins); + break; + + case SYS: + if (ins < numsyscalls && systab[ins].name) { + if (doprint) + printf(" %s", systab[ins].name); + /* Print any argument words following the syscall */ + int n; + for (n = 1; n <= systab[ins].numwords; n++) { + int b = 2 * n; + syscall = ispace[addr + b] + (ispace[addr + b + 1] << 8); + if (doprint) + printf("; 0%o", syscall); + } + /* Skip some number of words following the syscall */ + incr += 2 * systab[ins].numwords; + } else if (doprint) + printf(" %d", ins); + break; + + case TRAP: + case DFAULT: + default: + if (doprint) + printf(" 0%o", ins); + } + if (doprint) + printf("\n"); + return (incr); +} diff --git a/tools/disaout/symbols.c b/tools/disaout/symbols.c new file mode 100644 index 0000000..be6cc93 --- /dev/null +++ b/tools/disaout/symbols.c @@ -0,0 +1,70 @@ +/* Tables and functions to keep track of symbols */ + +#include +#include +#include +#include "aout.h" + +struct symbol * isym[PDP_MEM_SIZE], * dsym[PDP_MEM_SIZE]; + +/* Array of structs holding details for each type of symbol */ +struct symtype { + char *format; + int counter; + struct symbol **table; +} symtypelist[] = { + { "%d", 1, isym }, + { "func%d", 1, isym }, + { "data%d", 1, dsym }, + { "jsrtext%d", 1, isym }, + { "jsrdata%d", 1, isym }, +}; + +void add_symbol(int addr, int type, int size) +{ + struct symbol *s; + + /* See if we have a symbol already defined at this address */ + if (symtypelist[type].table[addr] != NULL) return; + + /* No, so create one */ + s= malloc(sizeof(struct symbol)); +#if 0 + s->name= malloc(12); + snprintf(s->name,12,symtypelist[type].format, symtypelist[type].counter++); +#endif + s->type= type; + s->size= size; + symtypelist[type].table[addr]= s; +} + +/* Walk through both isym and dsym tables, giving them actual + * symbol names. + */ +void patch_symbols(void) +{ + int i,type; + struct symbol *s; + for (i=0; i< PDP_MEM_SIZE; i++) { + if (isym[i]==NULL) continue; + s= isym[i]; s->name= malloc(12); + type=s->type; + snprintf(s->name,12,symtypelist[type].format, symtypelist[type].counter++); + } + for (i=0; i< PDP_MEM_SIZE; i++) { + if (dsym[i]==NULL) continue; + s= dsym[i]; s->name= malloc(12); + type=s->type; + snprintf(s->name,12,symtypelist[type].format, symtypelist[type].counter++); + } +} + +struct symbol * get_isym(int addr) +{ + return(isym[addr]); +} + +struct symbol * get_dsym(int addr) +{ + return(dsym[addr]); +} diff --git a/tools/disaout/syscalls.c b/tools/disaout/syscalls.c new file mode 100644 index 0000000..8e74078 --- /dev/null +++ b/tools/disaout/syscalls.c @@ -0,0 +1,202 @@ +/* List of system calls, per UNIX version */ +#include "aout.h" + + +struct syscallinfo v1syscalls[]= { + { "rele", 0 }, + { "exit", 0 }, + { "fork", 0 }, + { "read", 2 }, + { "write", 2 }, + { "open", 3, }, + { "close", 2 }, + { "wait", 1 }, + { "creat", 2 }, + { "link", 2 }, + { "unlink", 1 }, + { "exec", 2 }, + { "chdir", 1 }, + { "time", 0 }, + { "mkdir", 2 }, + { "chmod", 2 }, + { "chown", 2 }, + { "break", 1 }, + { "stat", 2 }, + { "seek", 3 }, + { "tell", 3 }, + { "mount", 2 }, + { "umount", 1 }, + { "setuid", 1 }, + { "getuid", 1 }, + { "stime", 0 }, + { "quit", 1 }, + { "intr", 1 }, + { "fstat", 2 }, + { "cemt", 1 }, + { "smdate", 1 }, + { "stty", 2 }, + { "gtty", 2 }, + { "ilgins", 1 } +}; + +struct syscallinfo bsd211syscalls[]= { + { "indir", 0 }, + { "exit", 0 }, + { "fork", 0 }, + { "read", 0 }, + { "write", 0 }, + { "open", 0 }, + { "close", 0 }, + { "wait4", 0 }, + { NULL, 0 }, + { "link", 0 }, + { "unlink", 0 }, + { "execv", 0 }, + { "chdir", 0 }, + { "fchdir", 0 }, + { "mknod", 0 }, + { "chmod", 0 }, + { "chown", 0 }, + { "chflags", 0 }, + { "fchflags", 0 }, + { "lseek", 0 }, + { "getpid", 0 }, + { "mount", 0 }, + { "umount", 0 }, + { "__sysctl", 0 }, + { "getuid", 0 }, + { "geteuid", 0 }, + { "ptrace", 0 }, + { "getppid", 0 }, + { NULL, 0 }, + { NULL, 0 }, + { NULL, 0 }, + { "sigaction", 0 }, + { "sigprocmask", 0 }, + { "access", 0 }, + { "sigpending", 0 }, + { "sigaltstack", 0 }, + { "sync", 0 }, + { "kill", 0 }, + { "stat", 0 }, + { "_getlogin", 0 }, + { "lstat", 0 }, + { "dup", 0 }, + { "pipe", 0 }, + { "setlogin", 0 }, + { "profil", 0 }, + { "setuid", 0 }, + { "seteuid", 0 }, + { "getgid", 0 }, + { "getegid", 0 }, + { "setgid", 0 }, + { "setegid", 0 }, + { "acct", 0 }, + { "phys", 0 }, + { "lock", 0 }, + { "ioctl", 0 }, + { "reboot", 0 }, + { NULL, 0 }, + { "symlink", 0 }, + { "readlink", 0 }, + { "execve", 0 }, + { "umask", 0 }, + { "chroot", 0 }, + { "fstat", 0 }, + { NULL, 0 }, + { NULL, 0 }, + { "pselect", 0 }, + { "vfork", 0 }, + { NULL, 0 }, + { NULL, 0 }, + { "sbrk", 0 }, + { NULL, 0 }, + { NULL, 0 }, + { NULL, 0 }, + { NULL, 0 }, + { NULL, 0 }, + { NULL, 0 }, + { "vhangup", 0 }, + { NULL, 0 }, + { NULL, 0 }, + { "getgroups", 0 }, + { "setgroups", 0 }, + { "getpgrp", 0 }, + { "setpgrp", 0 }, + { "setitimer", 0 }, + { NULL, 0 }, + { NULL, 0 }, + { "getitimer", 0 }, + { NULL, 0 }, + { NULL, 0 }, + { "getdtablesize", 0 }, + { "dup2", 0 }, + { NULL, 0 }, + { "fcntl", 0 }, + { "select", 0 }, + { NULL, 0 }, + { "fsync", 0 }, + { "setpriority", 0 }, + { "socket", 0 }, + { "connect", 0 }, + { "accept", 0 }, + { "getpriority", 0 }, + { "send", 0 }, + { "recv", 0 }, + { "sigreturn", 0 }, + { "bind", 0 }, + { "setsockopt", 0 }, + { "listen", 0 }, + { "sigsuspend", 0 }, + { NULL, 0 }, + { NULL, 0 }, + { NULL, 0 }, + { NULL, 0 }, + { "old sigstack", 0 }, + { "recvmsg", 0 }, + { "sendmsg", 0 }, + { NULL, 0 }, + { "gettimeofday", 0 }, + { "getrusage", 0 }, + { "getsockopt", 0 }, + { NULL, 0 }, + { "readv", 0 }, + { "writev", 0 }, + { "settimeofday", 0 }, + { "fchown", 0 }, + { "fchmod", 0 }, + { "recvfrom", 0 }, + { NULL, 0 }, + { NULL, 0 }, + { "rename", 0 }, + { "truncate", 0 }, + { "ftruncate", 0 }, + { "flock", 0 }, + { NULL, 0 }, + { "sendto", 0 }, + { "shutdown", 0 }, + { "socketpair", 0 }, + { "mkdir", 0 }, + { "rmdir", 0 }, + { "utimes", 0 }, + { NULL, 0 }, + { "adjtime", 0 }, + { "getpeername", 0 }, + { NULL, 0 }, + { NULL, 0 }, + { "getrlimit", 0 }, + { "setrlimit", 0 }, + { "killpg", 0 }, + { NULL, 0 }, + { "setquota", 0 }, + { "quota", 0 }, + { "getsockname", 0 }, + { NULL, 0 }, + { "nostk", 0 }, + { "fetchi", 0 }, + { "ucall", 0 }, + { "fperr", 0 }, +}; + +struct syscallinfo *systab=v1syscalls; /* Pointer to one of the following tables */ +int numsyscalls= 34;