mirror of
https://github.com/mikpe/pdp10-tools.git
synced 2026-03-10 12:48:31 +00:00
as: add initial, primitive, but working assembler
This commit is contained in:
@@ -1,40 +0,0 @@
|
||||
/*
|
||||
* main.c
|
||||
*/
|
||||
#include <stdio.h>
|
||||
#include <unistd.h>
|
||||
#include "pass1.h"
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
int ch;
|
||||
const char *outfile = "a.out";
|
||||
const char *infile = NULL;
|
||||
|
||||
for (;;) {
|
||||
ch = getopt(argc, argv, "o:");
|
||||
switch (ch) {
|
||||
case 'o':
|
||||
outfile = optarg;
|
||||
continue;
|
||||
case -1:
|
||||
break;
|
||||
default:
|
||||
fprintf(stderr, "as: invalid option '%c'\n", ch);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
if (optind + 1 == argc)
|
||||
infile = argv[optind];
|
||||
|
||||
if (pass1(infile) < 0)
|
||||
return 1;
|
||||
|
||||
if (pass2() < 0)
|
||||
return 1;
|
||||
|
||||
if (pass3(outfile) < 0)
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -1,11 +0,0 @@
|
||||
/*
|
||||
* parse.h
|
||||
*/
|
||||
#ifndef PARSE_H
|
||||
#define PARSE_H
|
||||
|
||||
#include "stmt.h"
|
||||
|
||||
int parse_stmt(struct stmt *stmt);
|
||||
|
||||
#endif /* PARSE_H */
|
||||
@@ -1,15 +0,0 @@
|
||||
/*
|
||||
* scan.h
|
||||
*/
|
||||
#ifndef SCAN_H
|
||||
#define SCAN_H
|
||||
|
||||
#include "token.h"
|
||||
|
||||
const char *scan_filename;
|
||||
int scan_freopen(const char *filename);
|
||||
|
||||
unsigned int scan_linenr;
|
||||
enum token scan(union token_attribute *token_attr);
|
||||
|
||||
#endif /* SCAN_H */
|
||||
@@ -1,77 +0,0 @@
|
||||
/*
|
||||
* token.def
|
||||
*
|
||||
* TOKEN(T_<name>, <print name>, <attribute fmt>)
|
||||
*/
|
||||
|
||||
/* directives */
|
||||
TOKEN(T_DOT_ALIGN, ".align", FMT_NONE)
|
||||
TOKEN(T_DOT_ASCII, ".ascii", FMT_NONE)
|
||||
TOKEN(T_DOT_ASCIZ, ".asciz", FMT_NONE)
|
||||
TOKEN(T_DOT_BALIGN, ".balign", FMT_NONE)
|
||||
TOKEN(T_DOT_BSS, ".bss", FMT_NONE)
|
||||
TOKEN(T_DOT_BYTE, ".byte", FMT_NONE)
|
||||
TOKEN(T_DOT_COMM, ".comm", FMT_NONE)
|
||||
TOKEN(T_DOT_DATA, ".data", FMT_NONE)
|
||||
TOKEN(T_DOT_FILE, ".file", FMT_NONE)
|
||||
TOKEN(T_DOT_GLOBL, ".globl", FMT_NONE)
|
||||
TOKEN(T_DOT_HIDDEN, ".hidden", FMT_NONE)
|
||||
TOKEN(T_DOT_IDENT, ".ident", FMT_NONE)
|
||||
TOKEN(T_DOT_INTERNAL, ".internal", FMT_NONE)
|
||||
TOKEN(T_DOT_LOCAL, ".local", FMT_NONE)
|
||||
TOKEN(T_DOT_LONG, ".long", FMT_NONE)
|
||||
TOKEN(T_DOT_ORG, ".org", FMT_NONE)
|
||||
TOKEN(T_DOT_P2ALIGN, ".p2align", FMT_NONE)
|
||||
TOKEN(T_DOT_POPSECTION, ".popsection", FMT_NONE)
|
||||
TOKEN(T_DOT_PREVIOUS, ".previous", FMT_NONE)
|
||||
TOKEN(T_DOT_PROTECTED, ".protected", FMT_NONE)
|
||||
TOKEN(T_DOT_PUSHSECTION, ".pushsection", FMT_NONE)
|
||||
TOKEN(T_DOT_RODATA, ".rodata", FMT_NONE)
|
||||
TOKEN(T_DOT_SECTION, ".section", FMT_NONE)
|
||||
TOKEN(T_DOT_SET, ".set", FMT_NONE)
|
||||
TOKEN(T_DOT_SHORT, ".short", FMT_NONE)
|
||||
TOKEN(T_DOT_SIZE, ".size", FMT_NONE)
|
||||
TOKEN(T_DOT_SUBSECTION, ".subsection", FMT_NONE)
|
||||
TOKEN(T_DOT_SYMVER, ".symver", FMT_NONE)
|
||||
TOKEN(T_DOT_TEXT, ".text", FMT_NONE)
|
||||
TOKEN(T_DOT_TYPE, ".type", FMT_NONE)
|
||||
TOKEN(T_DOT_WEAK, ".weak", FMT_NONE)
|
||||
TOKEN(T_DOT_WEAKREF, ".weakref", FMT_NONE)
|
||||
/* other symbols */
|
||||
TOKEN(T_REGISTER, "<register>", FMT_UINT)
|
||||
TOKEN(T_SYMBOL, "<symbol>", FMT_SYMBOL)
|
||||
TOKEN(T_LOCAL_LABEL, "<local label>", FMT_UINT) /* 1f, 2b */
|
||||
TOKEN(T_AT, "@", FMT_NONE)
|
||||
TOKEN(T_COLON, ":", FMT_NONE)
|
||||
/* literals */
|
||||
TOKEN(T_UINTEGER, "<integer>", FMT_UINT)
|
||||
TOKEN(T_STRING, "<string>", FMT_STRING)
|
||||
/* operators, separators */
|
||||
TOKEN(T_COMMA, ",", FMT_NONE)
|
||||
TOKEN(T_LPAREN, "(", FMT_NONE)
|
||||
TOKEN(T_RPAREN, ")", FMT_NONE)
|
||||
TOKEN(T_TILDE, "~", FMT_NONE)
|
||||
TOKEN(T_MUL, "*", FMT_NONE)
|
||||
TOKEN(T_DIV, "/", FMT_NONE)
|
||||
TOKEN(T_REM, "%", FMT_NONE)
|
||||
TOKEN(T_LSHIFT, "<<", FMT_NONE)
|
||||
TOKEN(T_RSHIFT, ">>", FMT_NONE)
|
||||
TOKEN(T_OR, "|", FMT_NONE)
|
||||
TOKEN(T_AND, "&", FMT_NONE)
|
||||
TOKEN(T_CARET, "^", FMT_NONE)
|
||||
TOKEN(T_BANG, "!", FMT_NONE)
|
||||
TOKEN(T_PLUS, "+", FMT_NONE)
|
||||
TOKEN(T_MINUS, "-", FMT_NONE)
|
||||
TOKEN(T_EQ, "=", FMT_NONE)
|
||||
TOKEN(T_EQEQ, "==", FMT_NONE)
|
||||
TOKEN(T_NEQ, "!=", FMT_NONE)
|
||||
TOKEN(T_LT, "<", FMT_NONE)
|
||||
TOKEN(T_GT, ">", FMT_NONE)
|
||||
TOKEN(T_GE, ">=", FMT_NONE)
|
||||
TOKEN(T_LE, "<=", FMT_NONE)
|
||||
TOKEN(T_ANDAND, "&&", FMT_NONE)
|
||||
TOKEN(T_OROR, "||", FMT_NONE)
|
||||
/* misc */
|
||||
TOKEN(T_NEWLINE, "<newline>", FMT_NONE)
|
||||
TOKEN(T_EOF, "<eof>", FMT_NONE)
|
||||
TOKEN(T_ERROR, "<error>", FMT_NONE)
|
||||
@@ -1,22 +0,0 @@
|
||||
/*
|
||||
* token.h
|
||||
*/
|
||||
#ifndef TOKEN_H
|
||||
#define TOKEN_H
|
||||
|
||||
#include "pdp10-stdint.h"
|
||||
|
||||
enum token {
|
||||
#define TOKEN(T,P,F) T,
|
||||
#include "token.def"
|
||||
#undef TOKEN
|
||||
};
|
||||
|
||||
union token_attribute {
|
||||
const char *text; /* symbol, string */
|
||||
pdp10_uint36_t uint; /* uinteger */
|
||||
};
|
||||
|
||||
void token_print(FILE *fp, enum token token, const union token_attribute *token_attr);
|
||||
|
||||
#endif /* TOKEN_H */
|
||||
19
as/Makefile
Normal file
19
as/Makefile
Normal file
@@ -0,0 +1,19 @@
|
||||
CC=gcc
|
||||
CFLAGS=-O2 -g -Wall
|
||||
CPPFLAGS=-I../include
|
||||
|
||||
ASOBJS=assemble.o input.o main.o output.o parse.o scan.o token.o
|
||||
LIBOBJS=../lib/pdp10-elf36.o ../lib/pdp10-extint.o ../lib/pdp10-opcodes.o ../lib/pdp10-stdio.o
|
||||
|
||||
as: $(ASOBJS) $(LIBOBJS)
|
||||
$(LINK.c) -o $@ $^
|
||||
|
||||
input.o: input.h parse.h scan.h token.def token.h
|
||||
main.o: assemble.h input.h output.h
|
||||
output.o: assemble.h output.h
|
||||
parse.o: input.h scan.h token.def token.h
|
||||
scan.o: scan.h token.def token.h
|
||||
token.o: token.def token.h
|
||||
|
||||
clean:
|
||||
rm -f $(ASOBJS) as a.out core.*
|
||||
106
as/assemble.c
Normal file
106
as/assemble.c
Normal file
@@ -0,0 +1,106 @@
|
||||
/*
|
||||
* assemble.c
|
||||
*/
|
||||
#include <errno.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include "assemble.h"
|
||||
#include "input.h"
|
||||
|
||||
static struct aunit_symbol *symbol(const char *progname, struct aunit *aunit, const char *name)
|
||||
{
|
||||
struct aunit_symbol *sym;
|
||||
|
||||
for (sym = aunit->symbols; sym; sym = sym->next)
|
||||
if (strcmp(name, sym->name) == 0)
|
||||
return sym;
|
||||
|
||||
sym = malloc(sizeof *sym);
|
||||
if (!sym) {
|
||||
fprintf(stderr, "%s: failed to allocate %zu bytes for aunit_symbol: %s\n", progname, sizeof *sym, strerror(errno));
|
||||
return NULL;
|
||||
}
|
||||
|
||||
sym->name = name;
|
||||
sym->text_offset = 0;
|
||||
sym->is_global = 0;
|
||||
sym->is_defined = 0;
|
||||
|
||||
sym->next = aunit->symbols;
|
||||
aunit->symbols = sym;
|
||||
|
||||
return sym;
|
||||
}
|
||||
|
||||
int assemble(const char *progname, struct iunit *iunit, struct aunit *aunit)
|
||||
{
|
||||
struct stmt *stmt;
|
||||
struct aunit_symbol *sym;
|
||||
pdp10_uint36_t i, n;
|
||||
|
||||
aunit->text_words = NULL;
|
||||
aunit->text_nr_words = 0;
|
||||
aunit->symbols = NULL;
|
||||
|
||||
n = 0;
|
||||
for (stmt = iunit->text.head; stmt; stmt = stmt->next) {
|
||||
switch (stmt->tag) {
|
||||
case S_DOT_GLOBL:
|
||||
sym = symbol(progname, aunit, stmt->u.symbol.name);
|
||||
if (!sym)
|
||||
return -1;
|
||||
sym->is_global = 1;
|
||||
break;
|
||||
case S_LABEL:
|
||||
(void)symbol(progname, aunit, stmt->u.symbol.name);
|
||||
break;
|
||||
case S_INSN:
|
||||
++n;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
aunit->text_nr_words = n;
|
||||
aunit->text_words = malloc(n * sizeof(pdp10_uint36_t));
|
||||
if (!aunit->text_words) {
|
||||
fprintf(stderr, "%s: failed to allocate %zu bytes for text image: %s\n", progname, n * sizeof(pdp10_uint36_t), strerror(errno));
|
||||
return -1;
|
||||
}
|
||||
|
||||
i = 0;
|
||||
for (stmt = iunit->text.head; stmt; stmt = stmt->next) {
|
||||
switch (stmt->tag) {
|
||||
case S_LABEL:
|
||||
sym = symbol(progname, aunit, stmt->u.symbol.name);
|
||||
if (!sym)
|
||||
return -1;
|
||||
sym->is_defined = 1;
|
||||
sym->text_offset = i * 4;
|
||||
break;
|
||||
case S_INSN:
|
||||
if (i >= n) {
|
||||
fprintf(stderr, "%s: internal error: text image overflow\n", progname);
|
||||
return -1;
|
||||
}
|
||||
aunit->text_words[i] =
|
||||
((pdp10_uint36_t)(stmt->u.insn.opcode & 0x1FF) << (36 - 9)
|
||||
| ((stmt->u.insn.accumulator & 0xF) << (36 - 13))
|
||||
| ((stmt->u.insn.at & 1) << (36 - 14))
|
||||
| ((stmt->u.insn.indexreg & 0xF) << (36 - 18))
|
||||
| (stmt->u.insn.address & PDP10_UINT18_MAX));
|
||||
++i;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (i != n) {
|
||||
fprintf(stderr, "%s: internal error: text image size mismatch\n", progname);
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
26
as/assemble.h
Normal file
26
as/assemble.h
Normal file
@@ -0,0 +1,26 @@
|
||||
/*
|
||||
* assemble.h
|
||||
*/
|
||||
#ifndef ASSEMBLE_H
|
||||
#define ASSEMBLE_H
|
||||
|
||||
#include "pdp10-stdint.h"
|
||||
#include "input.h"
|
||||
|
||||
struct aunit_symbol {
|
||||
struct aunit_symbol *next;
|
||||
const char *name;
|
||||
pdp10_uint36_t text_offset;
|
||||
int is_global;
|
||||
int is_defined;
|
||||
};
|
||||
|
||||
struct aunit {
|
||||
pdp10_uint36_t *text_words;
|
||||
pdp10_uint36_t text_nr_words;
|
||||
struct aunit_symbol *symbols;
|
||||
};
|
||||
|
||||
int assemble(const char *progname, struct iunit *iunit, struct aunit *aunit);
|
||||
|
||||
#endif /* ASSEMBLE_H */
|
||||
85
as/input.c
Normal file
85
as/input.c
Normal file
@@ -0,0 +1,85 @@
|
||||
/*
|
||||
* input.c
|
||||
*/
|
||||
#include <errno.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "input.h"
|
||||
#include "parse.h"
|
||||
#include "scan.h"
|
||||
|
||||
static int interpret(struct scan_state *scan_state, struct iunit *iunit, struct stmt *stmt)
|
||||
{
|
||||
struct stmt *stmt2;
|
||||
|
||||
switch (stmt->tag) {
|
||||
case S_DOT_GLOBL:
|
||||
break;
|
||||
case S_DOT_TEXT:
|
||||
return 0; /* XXX: nothing to do yet */
|
||||
case S_LABEL:
|
||||
break;
|
||||
case S_INSN:
|
||||
break;
|
||||
default:
|
||||
fprintf(stderr, "%s: %s line %u: parser returned unexpected stmt->tag %u\n",
|
||||
scan_state->progname, scan_state->filename, scan_state->linenr, stmt->tag);
|
||||
return -1;
|
||||
}
|
||||
|
||||
stmt2 = malloc(sizeof *stmt2);
|
||||
if (!stmt2) {
|
||||
fprintf(stderr, "%s: %s line %u: malloc(%zu) failed: %s\n",
|
||||
scan_state->progname, scan_state->filename, scan_state->linenr, sizeof *stmt2, strerror(errno));
|
||||
return -1;
|
||||
}
|
||||
|
||||
*stmt2 = *stmt;
|
||||
stmt2->next = NULL;
|
||||
|
||||
*iunit->text.tailptr = stmt2;
|
||||
iunit->text.tailptr = &stmt2->next;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int input(const char *progname, char **files, int nrfiles, struct iunit *iunit)
|
||||
{
|
||||
char fake_file[3];
|
||||
char *fake_files[1];
|
||||
struct scan_state scan_state;
|
||||
int i;
|
||||
struct stmt stmt;
|
||||
int status;
|
||||
|
||||
if (nrfiles <= 0) {
|
||||
fake_file[0] = '-';
|
||||
fake_file[1] = '-';
|
||||
fake_file[2] = '\0';
|
||||
fake_files[0] = fake_file;
|
||||
files = fake_files;
|
||||
nrfiles = 1;
|
||||
}
|
||||
|
||||
iunit->text.head = NULL;
|
||||
iunit->text.tailptr = &iunit->text.head;
|
||||
|
||||
scan_init(&scan_state, progname);
|
||||
|
||||
for (i = 0; i < nrfiles; ++i) {
|
||||
if (scan_open(&scan_state, files[i]) < 0)
|
||||
return -1;
|
||||
for (;;) {
|
||||
status = parse_stmt(&scan_state, &stmt);
|
||||
if (status < 0)
|
||||
return -1;
|
||||
if (status == 0)
|
||||
break;
|
||||
if (interpret(&scan_state, iunit, &stmt) < 0)
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
53
as/input.h
Normal file
53
as/input.h
Normal file
@@ -0,0 +1,53 @@
|
||||
/*
|
||||
* input.h
|
||||
*/
|
||||
#ifndef INPUT_H
|
||||
#define INPUT_H
|
||||
|
||||
/*
|
||||
* A directives, label, or instruction is parsed to a statement, which is
|
||||
* either interpreted immediately or appended to the representation of the
|
||||
* current section.
|
||||
*/
|
||||
|
||||
enum stmt_tag {
|
||||
/* directives */
|
||||
S_DOT_GLOBL,
|
||||
S_DOT_TEXT,
|
||||
/* non-directives */
|
||||
S_LABEL,
|
||||
S_INSN,
|
||||
};
|
||||
|
||||
struct stmt {
|
||||
struct stmt *next;
|
||||
enum stmt_tag tag;
|
||||
union {
|
||||
struct { /* S_DOT_GLOBL, S_LABEL */
|
||||
const char *name;
|
||||
} symbol;
|
||||
struct { /* S_INSN */
|
||||
unsigned int opcode;
|
||||
unsigned int accumulator;
|
||||
int at;
|
||||
unsigned int address; /* XXX: relocatable expr */
|
||||
unsigned int indexreg;
|
||||
} insn;
|
||||
} u;
|
||||
};
|
||||
|
||||
/*
|
||||
* The input unit object is the top-level container for the representation
|
||||
* of the sections, and all other information collected from the input.
|
||||
*/
|
||||
|
||||
struct iunit {
|
||||
struct {
|
||||
struct stmt *head;
|
||||
struct stmt **tailptr;
|
||||
} text;
|
||||
};
|
||||
|
||||
int input(const char *progname, char **files, int nrfiles, struct iunit *iunit);
|
||||
|
||||
#endif /* INPUT_H */
|
||||
52
as/main.c
Normal file
52
as/main.c
Normal file
@@ -0,0 +1,52 @@
|
||||
/*
|
||||
* main.c
|
||||
*
|
||||
* as clone for PDP10 with Elf36 object files.
|
||||
*/
|
||||
#include <stdio.h>
|
||||
#include <unistd.h>
|
||||
#include "assemble.h"
|
||||
#include "input.h"
|
||||
#include "output.h"
|
||||
|
||||
#define VERSION "pdp10-tools as version 0.1, built " __DATE__ " " __TIME__ "\n"
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
const char *outfile = "a.out";
|
||||
struct iunit iunit;
|
||||
struct aunit aunit;
|
||||
|
||||
for (;;) {
|
||||
int ch;
|
||||
|
||||
ch = getopt(argc, argv, "vo:");
|
||||
switch (ch) {
|
||||
case 'v':
|
||||
printf(VERSION);
|
||||
continue;
|
||||
case 'o':
|
||||
outfile = optarg;
|
||||
continue;
|
||||
case -1:
|
||||
break;
|
||||
default:
|
||||
fprintf(stderr, "Usage: %s [-v] [-o outfile] [files..]\n", argv[0]);
|
||||
return 1;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
if (input(argv[0], &argv[optind], argc - optind, &iunit) < 0)
|
||||
return 1;
|
||||
|
||||
if (assemble(argv[0], &iunit, &aunit) < 0)
|
||||
return 1;
|
||||
|
||||
/* XXX: iunit_fini(&iunit) */
|
||||
|
||||
if (output(argv[0], &aunit, outfile) < 0)
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
325
as/output.c
Normal file
325
as/output.c
Normal file
@@ -0,0 +1,325 @@
|
||||
/*
|
||||
* output.c
|
||||
*/
|
||||
#include <errno.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "pdp10-elf36.h"
|
||||
#include "pdp10-stdint.h"
|
||||
#include "pdp10-stdio.h"
|
||||
#include "assemble.h"
|
||||
#include "output.h"
|
||||
|
||||
struct strtab_entry {
|
||||
struct strtab_entry *next;
|
||||
const char *string;
|
||||
unsigned int nrbytes; /* strlen(string) + 1 */
|
||||
};
|
||||
|
||||
struct strtab {
|
||||
struct strtab_entry *head;
|
||||
unsigned int nrbytes;
|
||||
};
|
||||
|
||||
static void strtab_init(struct strtab *strtab)
|
||||
{
|
||||
strtab->head = NULL;
|
||||
strtab->nrbytes = 0;
|
||||
}
|
||||
|
||||
static pdp10_uint36_t strtab_enter(const char *progname, struct strtab *strtab, const char *name)
|
||||
{
|
||||
struct strtab_entry *prev, *here;
|
||||
pdp10_uint36_t index;
|
||||
|
||||
index = 1;
|
||||
prev = NULL;
|
||||
here = strtab->head;
|
||||
while (here != NULL) {
|
||||
if (strcmp(name, here->string) == 0)
|
||||
return index;
|
||||
index += here->nrbytes;
|
||||
prev = here;
|
||||
here = here->next;
|
||||
}
|
||||
|
||||
here = malloc(sizeof *here);
|
||||
if (!here) {
|
||||
fprintf(stderr, "%s: failed to allocate %zu bytes for a strtab_entry: %s\n",
|
||||
progname, sizeof *here, strerror(errno));
|
||||
return 0;
|
||||
}
|
||||
here->next = NULL;
|
||||
here->string = name;
|
||||
here->nrbytes = strlen(name) + 1;
|
||||
|
||||
if (prev) {
|
||||
prev->next = here;
|
||||
} else {
|
||||
strtab->head = here;
|
||||
index = 1;
|
||||
strtab->nrbytes = 1;
|
||||
}
|
||||
|
||||
strtab->nrbytes += here->nrbytes;
|
||||
|
||||
return index;
|
||||
}
|
||||
|
||||
static int strtab_write(PDP10_FILE *pdp10fp, const struct strtab *strtab)
|
||||
{
|
||||
struct strtab_entry *here;
|
||||
unsigned int i;
|
||||
|
||||
if (pdp10_elf36_write_uint9(pdp10fp, '\0') < 0)
|
||||
return -1;
|
||||
|
||||
for (here = strtab->head; here; here = here->next)
|
||||
for (i = 0; i < here->nrbytes; ++i)
|
||||
if (pdp10_elf36_write_uint9(pdp10fp, here->string[i]) < 0)
|
||||
return -1;
|
||||
|
||||
i = (4 - (strtab->nrbytes & 3)) & 3;
|
||||
while (i != 0) {
|
||||
if (pdp10_elf36_write_uint9(pdp10fp, '\0') < 0)
|
||||
return -1;
|
||||
--i;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int output(const char *progname, struct aunit *aunit, const char *outfile)
|
||||
{
|
||||
pdp10_uint36_t shnum, text_shndx, symtab_shndx, strtab_shndx, shstrtab_shndx;
|
||||
pdp10_uint36_t text_shstrndx, symtab_shstrndx, strtab_shstrndx, shstrtab_shstrndx;
|
||||
Elf36_Sym *symtab;
|
||||
pdp10_uint36_t symnum;
|
||||
struct strtab strtab, shstrtab;
|
||||
struct aunit_symbol *asym;
|
||||
pdp10_uint36_t i;
|
||||
Elf36_Shdr *shtab;
|
||||
pdp10_uint36_t offset;
|
||||
Elf36_Ehdr ehdr;
|
||||
PDP10_FILE *pdp10fp;
|
||||
|
||||
shnum = 0;
|
||||
shstrtab_shndx = 0;
|
||||
text_shndx = 0;
|
||||
symtab_shndx = 0;
|
||||
strtab_shndx = 0;
|
||||
symtab = NULL;
|
||||
symnum = 0;
|
||||
strtab_init(&strtab);
|
||||
strtab_init(&shstrtab);
|
||||
shtab = NULL;
|
||||
|
||||
shnum = 1; /* tentative */
|
||||
|
||||
if (aunit->text_nr_words != 0) {
|
||||
text_shstrndx = strtab_enter(progname, &shstrtab, ".text");
|
||||
if (text_shstrndx == 0)
|
||||
return -1;
|
||||
text_shndx = shnum;
|
||||
++shnum;
|
||||
}
|
||||
|
||||
for (asym = aunit->symbols; asym; asym = asym->next)
|
||||
++symnum;
|
||||
if (symnum != 0) {
|
||||
symtab_shstrndx = strtab_enter(progname, &shstrtab, ".symtab");
|
||||
if (symtab_shstrndx == 0)
|
||||
return -1;
|
||||
strtab_shstrndx = strtab_enter(progname, &shstrtab, ".strtab");
|
||||
if (strtab_shstrndx == 0)
|
||||
return -1;
|
||||
symtab_shndx = shnum;
|
||||
strtab_shndx = shnum + 1;
|
||||
shnum += 2;
|
||||
}
|
||||
|
||||
if (shnum == 1) {
|
||||
shstrtab_shndx = 0;
|
||||
shnum = 0;
|
||||
} else {
|
||||
shstrtab_shstrndx = strtab_enter(progname, &shstrtab, ".shstrtab");
|
||||
if (shstrtab_shstrndx == 0)
|
||||
return -1;
|
||||
shstrtab_shndx = shnum;
|
||||
++shnum;
|
||||
}
|
||||
|
||||
if (symnum) {
|
||||
++symnum; /* for initial stub entry */
|
||||
symtab = malloc(symnum * sizeof(Elf36_Sym));
|
||||
if (!symtab) {
|
||||
fprintf(stderr, "%s: failed to allocate %zu bytes for Elf36 symbol table: %s\n",
|
||||
progname, symnum * sizeof(Elf36_Sym), strerror(errno));
|
||||
return -1;
|
||||
}
|
||||
|
||||
symtab[0].st_name = 0;
|
||||
symtab[0].st_value = 0;
|
||||
symtab[0].st_size = 0;
|
||||
symtab[0].st_info = ELF36_ST_INFO(STB_LOCAL, STT_NOTYPE);
|
||||
symtab[0].st_other = 0;
|
||||
symtab[0].st_shndx = SHN_UNDEF;
|
||||
|
||||
for (i = 1, asym = aunit->symbols; asym; ++i, asym = asym->next) {
|
||||
symtab[i].st_name = strtab_enter(progname, &strtab, asym->name);
|
||||
if (symtab[i].st_name == 0)
|
||||
return -1;
|
||||
symtab[i].st_value = asym->text_offset;
|
||||
symtab[i].st_size = 0;
|
||||
if (asym->is_global)
|
||||
symtab[i].st_info = ELF36_ST_INFO(STB_GLOBAL, STT_NOTYPE);
|
||||
else
|
||||
symtab[i].st_info = ELF36_ST_INFO(STB_LOCAL, STT_NOTYPE);
|
||||
symtab[i].st_other = STV_DEFAULT;
|
||||
symtab[i].st_shndx = text_shndx;
|
||||
}
|
||||
}
|
||||
|
||||
if (shnum) {
|
||||
shtab = malloc(shnum * sizeof(Elf36_Shdr));
|
||||
if (!shtab) {
|
||||
fprintf(stderr, "%s: failed to allocate %zu bytes for Elf36 section header table: %s\n",
|
||||
progname, shnum * sizeof(Elf36_Shdr), strerror(errno));
|
||||
return -1;
|
||||
}
|
||||
|
||||
shtab[0].sh_name = 0;
|
||||
shtab[0].sh_type = SHT_NULL;
|
||||
shtab[0].sh_flags = 0;
|
||||
shtab[0].sh_addr = 0;
|
||||
shtab[0].sh_offset = 0;
|
||||
shtab[0].sh_size = 0;
|
||||
shtab[0].sh_link = 0;
|
||||
shtab[0].sh_info = 0;
|
||||
shtab[0].sh_addralign = 0;
|
||||
shtab[0].sh_entsize = 0;
|
||||
|
||||
offset = ELF36_EHDR_SIZEOF;
|
||||
|
||||
if (text_shndx) {
|
||||
shtab[text_shndx].sh_name = text_shstrndx;
|
||||
shtab[text_shndx].sh_type = SHT_PROGBITS;
|
||||
shtab[text_shndx].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
|
||||
shtab[text_shndx].sh_addr = 0;
|
||||
shtab[text_shndx].sh_offset = offset;
|
||||
shtab[text_shndx].sh_size = aunit->text_nr_words * 4;
|
||||
shtab[text_shndx].sh_link = 0;
|
||||
shtab[text_shndx].sh_info = 0;
|
||||
shtab[text_shndx].sh_addralign = 4;
|
||||
shtab[text_shndx].sh_entsize = 0;
|
||||
offset += aunit->text_nr_words * 4;
|
||||
}
|
||||
|
||||
if (symtab_shndx) {
|
||||
shtab[symtab_shndx].sh_name = symtab_shstrndx;
|
||||
shtab[symtab_shndx].sh_type = SHT_SYMTAB;
|
||||
shtab[symtab_shndx].sh_flags = 0;
|
||||
shtab[symtab_shndx].sh_addr = 0;
|
||||
shtab[symtab_shndx].sh_offset = offset;
|
||||
shtab[symtab_shndx].sh_size = symnum * ELF36_SYM_SIZEOF;
|
||||
shtab[symtab_shndx].sh_link = strtab_shndx;
|
||||
shtab[symtab_shndx].sh_info = 0 + 1; /* XXX: LAST_LOCAL + 1 */
|
||||
shtab[symtab_shndx].sh_addralign = 4;
|
||||
shtab[symtab_shndx].sh_entsize = ELF36_SYM_SIZEOF;
|
||||
offset += symnum * ELF36_SYM_SIZEOF;
|
||||
}
|
||||
|
||||
if (strtab_shndx) {
|
||||
shtab[strtab_shndx].sh_name = strtab_shstrndx;
|
||||
shtab[strtab_shndx].sh_type = SHT_STRTAB;
|
||||
shtab[strtab_shndx].sh_flags = 0;
|
||||
shtab[strtab_shndx].sh_addr = 0;
|
||||
shtab[strtab_shndx].sh_offset = offset;
|
||||
shtab[strtab_shndx].sh_size = strtab.nrbytes;
|
||||
shtab[strtab_shndx].sh_link = 0;
|
||||
shtab[strtab_shndx].sh_info = 0;
|
||||
shtab[strtab_shndx].sh_addralign = 1;
|
||||
shtab[strtab_shndx].sh_entsize = 0;
|
||||
offset += (strtab.nrbytes + 3) & ~3;
|
||||
}
|
||||
|
||||
if (shstrtab_shndx) {
|
||||
shtab[shstrtab_shndx].sh_name = shstrtab_shstrndx;
|
||||
shtab[shstrtab_shndx].sh_type = SHT_STRTAB;
|
||||
shtab[shstrtab_shndx].sh_flags = 0;
|
||||
shtab[shstrtab_shndx].sh_addr = 0;
|
||||
shtab[shstrtab_shndx].sh_offset = offset;
|
||||
shtab[shstrtab_shndx].sh_size = shstrtab.nrbytes;
|
||||
shtab[shstrtab_shndx].sh_link = 0;
|
||||
shtab[shstrtab_shndx].sh_info = 0;
|
||||
shtab[shstrtab_shndx].sh_addralign = 1;
|
||||
shtab[shstrtab_shndx].sh_entsize = 0;
|
||||
offset += (shstrtab.nrbytes + 3) & ~3;
|
||||
}
|
||||
|
||||
/* offset is now the offset of the section header table, which is last in the file */
|
||||
} else
|
||||
offset = 0;
|
||||
|
||||
ehdr.e_wident[0] = (((pdp10_uint36_t)ELFMAG0 << 28)
|
||||
| (ELFMAG1 << 20)
|
||||
| (ELFMAG2 << 12)
|
||||
| (ELFMAG3 << 4)
|
||||
| (ELFCLASS36 >> 4));
|
||||
ehdr.e_wident[1] = (((pdp10_uint36_t)(ELFCLASS36 & 0x0f) << 32)
|
||||
| (ELFDATA2MSB << 24)
|
||||
| (EV_CURRENT << 16)
|
||||
| (ELFOSABI_NONE << 8)
|
||||
| 0); /* EI_ABIVERSION */
|
||||
ehdr.e_wident[2] = 0;
|
||||
ehdr.e_wident[3] = 0;
|
||||
ehdr.e_type = ET_REL;
|
||||
ehdr.e_machine = EM_PDP10;
|
||||
ehdr.e_version = EV_CURRENT;
|
||||
ehdr.e_entry = 0;
|
||||
ehdr.e_phoff = 0;
|
||||
ehdr.e_shoff = offset;
|
||||
ehdr.e_flags = 0;
|
||||
ehdr.e_ehsize = ELF36_EHDR_SIZEOF;
|
||||
ehdr.e_phentsize = 0;
|
||||
ehdr.e_phnum = 0;
|
||||
ehdr.e_shentsize = ELF36_SHDR_SIZEOF;
|
||||
ehdr.e_shnum = shnum;
|
||||
ehdr.e_shstrndx = shstrtab_shndx;
|
||||
|
||||
pdp10fp = pdp10_fopen(outfile, "wb");
|
||||
if (!pdp10fp) {
|
||||
fprintf(stderr, "%s: failed to open %s: %s\n", progname, outfile, strerror(errno));
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (pdp10_elf36_write_ehdr(pdp10fp, &ehdr) < 0)
|
||||
return -1;
|
||||
|
||||
if (text_shndx)
|
||||
for (i = 0; i < aunit->text_nr_words; ++i)
|
||||
if (pdp10_elf36_write_uint36(pdp10fp, aunit->text_words[i]) < 0)
|
||||
return -1;
|
||||
|
||||
if (symtab_shndx)
|
||||
for (i = 0; i < symnum; ++i)
|
||||
if (pdp10_elf36_write_sym(pdp10fp, &symtab[i]) < 0)
|
||||
return -1;
|
||||
|
||||
if (strtab_shndx)
|
||||
if (strtab_write(pdp10fp, &strtab) < 0)
|
||||
return -1;
|
||||
|
||||
if (shstrtab_shndx)
|
||||
if (strtab_write(pdp10fp, &shstrtab) < 0)
|
||||
return -1;
|
||||
|
||||
if (shnum)
|
||||
for (i = 0; i < shnum; ++i)
|
||||
if (pdp10_elf36_write_shdr(pdp10fp, &shtab[i]) < 0)
|
||||
return -1;
|
||||
|
||||
pdp10_fclose(pdp10fp);
|
||||
return 0;
|
||||
}
|
||||
11
as/output.h
Normal file
11
as/output.h
Normal file
@@ -0,0 +1,11 @@
|
||||
/*
|
||||
* output.h
|
||||
*/
|
||||
#ifndef OUTPUT_H
|
||||
#define OUTPUT_H
|
||||
|
||||
#include "assemble.h"
|
||||
|
||||
int output(const char *progname, struct aunit *aunit, const char *outfile);
|
||||
|
||||
#endif /* OUTPUT_H */
|
||||
354
as/parse.c
Normal file
354
as/parse.c
Normal file
@@ -0,0 +1,354 @@
|
||||
/*
|
||||
* parse.c
|
||||
*/
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "pdp10-opcodes.h"
|
||||
#include "input.h" /* for struct stmt */
|
||||
#include "scan.h"
|
||||
#include "token.h"
|
||||
|
||||
static int error(struct scan_state *scan_state, const char *msg, enum token token, const union token_attribute *token_attr)
|
||||
{
|
||||
fprintf(stderr, "%s: %s line %u: syntax error: %s; current token is ",
|
||||
scan_state->progname, scan_state->filename, scan_state->linenr, msg);
|
||||
token_print(stderr, token, token_attr);
|
||||
fprintf(stderr, "\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
static int parse_dot_globl(struct scan_state *scan_state, struct stmt *stmt)
|
||||
{
|
||||
enum token token;
|
||||
union token_attribute token_attr;
|
||||
|
||||
token = scan_token(scan_state, &token_attr);
|
||||
if (token == T_SYMBOL) {
|
||||
stmt->u.symbol.name = token_attr.text;
|
||||
token = scan_token(scan_state, &token_attr);
|
||||
if (token == T_NEWLINE) {
|
||||
stmt->tag = S_DOT_GLOBL;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
return error(scan_state, "junk after .globl directive", token, &token_attr);
|
||||
}
|
||||
|
||||
static int parse_dot_text(struct scan_state *scan_state, struct stmt *stmt)
|
||||
{
|
||||
enum token token;
|
||||
union token_attribute token_attr;
|
||||
|
||||
token = scan_token(scan_state, &token_attr);
|
||||
if (token == T_NEWLINE) {
|
||||
stmt->tag = S_DOT_TEXT;
|
||||
return 1;
|
||||
}
|
||||
return error(scan_state, "junk after .text directive", token, &token_attr);
|
||||
}
|
||||
|
||||
/*
|
||||
* Recognize:
|
||||
*
|
||||
* <label> ::= <symbol> ":"
|
||||
*
|
||||
* <insn> ::= <symbol> (<accumulator> ",")? <address> <newline>
|
||||
*
|
||||
* <accumulator> ::= <uinteger> [uint <= 0xF]
|
||||
*
|
||||
* <address> ::= "@"? <displacement>? <index>?
|
||||
*
|
||||
* <displacement> ::= <uinteger> [uint <= 1^18 - 1]
|
||||
* <displacement> ::= "(" <uinteger> ")" [uint <= 1^18 - 1]
|
||||
*
|
||||
* <index> ::= "(" <indexreg> ")"
|
||||
* <indexreg> ::= <uinteger> [uint <= 0xF]
|
||||
*
|
||||
* Examples:
|
||||
* foo:
|
||||
* popj 17,
|
||||
* pushj 17,bar
|
||||
* movei 1,@fum(2)
|
||||
*
|
||||
* Ambiguous examples:
|
||||
*
|
||||
* <symbol> (<uinteger>) <newline>
|
||||
*
|
||||
* This is ambigouous since we have no special notation for <register>, and the same kind of
|
||||
* parentheses are used for expression grouping in the displacement as for the index register.
|
||||
*
|
||||
* This might denote an insn with a parenthesized displacement and no index,
|
||||
* or it might denote an insn with an index but no displacement.
|
||||
*
|
||||
* However, the uinteger in an indexreg cannot be > 0xF, and it rarely makes sense to form
|
||||
* an effective address with a displacement <= 0xF and no index.
|
||||
*
|
||||
* Therefore, if the uinteger is <= 0xF this is an index with no displacement,
|
||||
* otherwise it is a displacement without an index.
|
||||
*/
|
||||
|
||||
static int parse_insn_index_after_lparen(struct scan_state *scan_state, struct stmt *stmt)
|
||||
{
|
||||
enum token token;
|
||||
union token_attribute token_attr;
|
||||
|
||||
token = scan_token(scan_state, &token_attr);
|
||||
if (token != T_UINTEGER
|
||||
|| token_attr.uint > 0xF)
|
||||
return error(scan_state, "invalid <indexreg>", token, &token_attr);
|
||||
|
||||
stmt->u.insn.indexreg = token_attr.uint;
|
||||
|
||||
token = scan_token(scan_state, &token_attr);
|
||||
if (token != T_RPAREN)
|
||||
return error(scan_state, "junk after '(' <indexreg>", token, &token_attr);
|
||||
|
||||
token = scan_token(scan_state, &token_attr);
|
||||
if (token != T_NEWLINE)
|
||||
return error(scan_state, "junk after '(' <indexreg> ')'", token, &token_attr);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int parse_insn_address_after_lparen_uinteger_rparen(struct scan_state *scan_state, struct stmt *stmt, union token_attribute *uinteger_attr)
|
||||
{
|
||||
enum token token;
|
||||
union token_attribute token_attr;
|
||||
|
||||
token = scan_token(scan_state, &token_attr);
|
||||
switch (token) {
|
||||
case T_NEWLINE: /* might be <displacement> or <index>, inspect the <uinteger>'s value to disambiguate */
|
||||
if (uinteger_attr->uint > PDP10_UINT18_MAX)
|
||||
return error(scan_state, "invalid <displacement>", T_UINTEGER, uinteger_attr);
|
||||
if (uinteger_attr->uint <= 0xF) /* it's the <index> */
|
||||
stmt->u.insn.indexreg = uinteger_attr->uint;
|
||||
else /* it's the <displacement> */
|
||||
stmt->u.insn.address = uinteger_attr->uint;
|
||||
return 1;
|
||||
case T_LPAREN: /* the <uinteger> is the <displacement>, followed by <index> */
|
||||
if (uinteger_attr->uint > PDP10_UINT18_MAX)
|
||||
return error(scan_state, "invalid <displacement>", T_UINTEGER, uinteger_attr);
|
||||
stmt->u.insn.address = uinteger_attr->uint;
|
||||
return parse_insn_index_after_lparen(scan_state, stmt);
|
||||
default:
|
||||
return error(scan_state, "junk in <address> after '(' <uinteger> ')'", token, &token_attr);
|
||||
}
|
||||
}
|
||||
|
||||
static int parse_insn_address_after_lparen_uinteger(struct scan_state *scan_state, struct stmt *stmt, union token_attribute *uinteger_attr)
|
||||
{
|
||||
enum token token;
|
||||
union token_attribute token_attr;
|
||||
|
||||
token = scan_token(scan_state, &token_attr);
|
||||
switch (token) {
|
||||
case T_RPAREN: /* might be <displacement> or <index> */
|
||||
return parse_insn_address_after_lparen_uinteger_rparen(scan_state, stmt, uinteger_attr);
|
||||
default:
|
||||
return error(scan_state, "junk in <address> after '(' <uinteger>", token, &token_attr);
|
||||
}
|
||||
}
|
||||
|
||||
static int parse_insn_address_after_lparen(struct scan_state *scan_state, struct stmt *stmt)
|
||||
{
|
||||
enum token token;
|
||||
union token_attribute token_attr;
|
||||
|
||||
token = scan_token(scan_state, &token_attr);
|
||||
switch (token) {
|
||||
case T_UINTEGER: /* might be <displacement> or <index> */
|
||||
return parse_insn_address_after_lparen_uinteger(scan_state, stmt, &token_attr);
|
||||
default:
|
||||
return error(scan_state, "junk in <address> after '('", token, &token_attr);
|
||||
}
|
||||
}
|
||||
|
||||
static int parse_insn_after_displacement(struct scan_state *scan_state, struct stmt *stmt)
|
||||
{
|
||||
enum token token;
|
||||
union token_attribute token_attr;
|
||||
|
||||
token = scan_token(scan_state, &token_attr);
|
||||
switch (token) {
|
||||
case T_NEWLINE: /* no <index> */
|
||||
return 1;
|
||||
case T_LPAREN: /* need <index> */
|
||||
return parse_insn_index_after_lparen(scan_state, stmt);
|
||||
default:
|
||||
return error(scan_state, "junk in <address> after <displacement>", token, &token_attr);
|
||||
}
|
||||
}
|
||||
|
||||
static int parse_insn_address_after_at(struct scan_state *scan_state, struct stmt *stmt)
|
||||
{
|
||||
enum token token;
|
||||
union token_attribute token_attr;
|
||||
|
||||
token = scan_token(scan_state, &token_attr);
|
||||
switch (token) {
|
||||
case T_NEWLINE:
|
||||
return 1;
|
||||
case T_LPAREN: /* might be <displacement> or <index> */
|
||||
return parse_insn_address_after_lparen(scan_state, stmt);
|
||||
case T_UINTEGER:
|
||||
if (token_attr.uint > PDP10_UINT18_MAX)
|
||||
return error(scan_state, "invalid <displacement>", token, &token_attr);
|
||||
stmt->u.insn.address = token_attr.uint;
|
||||
return parse_insn_after_displacement(scan_state, stmt);
|
||||
default:
|
||||
return error(scan_state, "invalid <address>", token, &token_attr);
|
||||
}
|
||||
}
|
||||
|
||||
static int parse_insn_address(struct scan_state *scan_state, struct stmt *stmt, const struct pdp10_instruction *insndesc)
|
||||
{
|
||||
enum token token;
|
||||
union token_attribute token_attr;
|
||||
|
||||
token = scan_token(scan_state, &token_attr);
|
||||
if (token == T_NEWLINE)
|
||||
return 1;
|
||||
|
||||
if (insndesc->type & PDP10_E_UNUSED)
|
||||
return error(scan_state, "<address> not allowed in this instruction", token, &token_attr);
|
||||
|
||||
switch (token) {
|
||||
case T_LPAREN: /* might be <displacement> or <index> */
|
||||
return parse_insn_address_after_lparen(scan_state, stmt);
|
||||
case T_UINTEGER:
|
||||
if (token_attr.uint > PDP10_UINT18_MAX)
|
||||
return error(scan_state, "invalid <displacement>", token, &token_attr);
|
||||
stmt->u.insn.address = token_attr.uint;
|
||||
return parse_insn_after_displacement(scan_state, stmt);
|
||||
case T_AT:
|
||||
stmt->u.insn.at = 1;
|
||||
return parse_insn_address_after_at(scan_state, stmt);
|
||||
default:
|
||||
return error(scan_state, "invalid <address>", token, &token_attr);
|
||||
}
|
||||
}
|
||||
|
||||
static int parse_insn_after_symbol_uinteger(
|
||||
struct scan_state *scan_state, struct stmt *stmt, const struct pdp10_instruction *insndesc, union token_attribute *uinteger_attr)
|
||||
{
|
||||
enum token token;
|
||||
union token_attribute token_attr;
|
||||
|
||||
token = scan_token(scan_state, &token_attr);
|
||||
if (token == T_COMMA) { /* the <uinteger> is the <accumulator> */
|
||||
if (uinteger_attr->uint > 0xF)
|
||||
return error(scan_state, "invalid <accumulator>", T_UINTEGER, uinteger_attr);
|
||||
if (insndesc->type & (PDP10_A_OPCODE | PDP10_A_UNUSED))
|
||||
return error(scan_state, "<accumulator> not allowed in this instruction", T_UINTEGER, uinteger_attr);
|
||||
stmt->u.insn.accumulator = uinteger_attr->uint;
|
||||
return parse_insn_address(scan_state, stmt, insndesc);
|
||||
}
|
||||
|
||||
if (insndesc->type & PDP10_E_UNUSED)
|
||||
return error(scan_state, "<address> not allowed in this instruction", token, &token_attr);
|
||||
|
||||
switch (token) {
|
||||
case T_LPAREN: /* the <uinteger> is the <displacement>, followed by <index> */
|
||||
if (uinteger_attr->uint > PDP10_UINT18_MAX)
|
||||
return error(scan_state, "invalid <displacement>", T_UINTEGER, uinteger_attr);
|
||||
stmt->u.insn.address = uinteger_attr->uint;
|
||||
return parse_insn_index_after_lparen(scan_state, stmt);
|
||||
case T_NEWLINE: /* the <uinteger> is the <displacement>, there is no <accumulator> or <index> */
|
||||
if (uinteger_attr->uint > PDP10_UINT18_MAX)
|
||||
return error(scan_state, "invalid <displacement>", T_UINTEGER, uinteger_attr);
|
||||
stmt->u.insn.address = uinteger_attr->uint;
|
||||
return 1;
|
||||
default:
|
||||
return error(scan_state, "junk after <symbol> <uinteger>", token, &token_attr);
|
||||
}
|
||||
}
|
||||
|
||||
static int parse_after_symbol(struct scan_state *scan_state, struct stmt *stmt, union token_attribute *symbol_attr)
|
||||
{
|
||||
enum token token;
|
||||
union token_attribute token_attr;
|
||||
const struct pdp10_instruction *insndesc;
|
||||
|
||||
token = scan_token(scan_state, &token_attr);
|
||||
if (token == T_COLON) {
|
||||
stmt->u.symbol.name = symbol_attr->text;
|
||||
stmt->tag = S_LABEL;
|
||||
return 1;
|
||||
}
|
||||
|
||||
insndesc = pdp10_instruction_from_name(symbol_attr->text);
|
||||
if (!insndesc)
|
||||
return error(scan_state, "invalid instruction name", T_SYMBOL, symbol_attr);
|
||||
|
||||
stmt->tag = S_INSN;
|
||||
stmt->u.insn.at = 0;
|
||||
stmt->u.insn.address = 0;
|
||||
stmt->u.insn.indexreg = 0;
|
||||
|
||||
if (insndesc->type & PDP10_A_OPCODE) {
|
||||
/* XXX: this is too intimate with quirky ->opcode representation */
|
||||
stmt->u.insn.opcode = (insndesc->opcode >> 6) & 0x1FF;
|
||||
stmt->u.insn.accumulator = (insndesc->opcode >> 2) & 0xF;
|
||||
} else {
|
||||
stmt->u.insn.opcode = insndesc->opcode & 0x1FF;
|
||||
stmt->u.insn.accumulator = 0;
|
||||
}
|
||||
|
||||
switch (token) {
|
||||
case T_NEWLINE:
|
||||
return 1;
|
||||
case T_UINTEGER: /* might be <accumulator> or <displacement> */
|
||||
return parse_insn_after_symbol_uinteger(scan_state, stmt, insndesc, &token_attr);
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
if (insndesc->type & PDP10_E_UNUSED)
|
||||
return error(scan_state, "<address> not allowed in this instruction", token, &token_attr);
|
||||
|
||||
switch (token) {
|
||||
case T_AT:
|
||||
stmt->u.insn.at = 1;
|
||||
return parse_insn_address_after_at(scan_state, stmt);
|
||||
case T_LPAREN: /* might be <displacement> or <index> */
|
||||
return parse_insn_address_after_lparen(scan_state, stmt);
|
||||
default:
|
||||
return error(scan_state, "junk after instruction name", token, &token_attr);
|
||||
}
|
||||
}
|
||||
|
||||
int parse_stmt(struct scan_state *scan_state, struct stmt *stmt)
|
||||
{
|
||||
enum token token;
|
||||
union token_attribute token_attr;
|
||||
|
||||
for (;;) {
|
||||
token = scan_token(scan_state, &token_attr);
|
||||
switch (token) {
|
||||
/*
|
||||
* directives
|
||||
*/
|
||||
case T_DOT_GLOBL:
|
||||
return parse_dot_globl(scan_state, stmt);
|
||||
case T_DOT_TEXT:
|
||||
return parse_dot_text(scan_state, stmt);
|
||||
/*
|
||||
* other symbols
|
||||
*/
|
||||
case T_SYMBOL: /* start of label, insn, or symbol assignment */
|
||||
return parse_after_symbol(scan_state, stmt, &token_attr);
|
||||
/*
|
||||
* synthetic symbols
|
||||
*/
|
||||
case T_ERROR:
|
||||
return -1; /* diagnostics already emitted by scan.c */
|
||||
case T_EOF:
|
||||
return 0;
|
||||
case T_NEWLINE:
|
||||
continue;
|
||||
default:
|
||||
return error(scan_state, "expected directive, label, or instruction", token, &token_attr);
|
||||
}
|
||||
}
|
||||
}
|
||||
12
as/parse.h
Normal file
12
as/parse.h
Normal file
@@ -0,0 +1,12 @@
|
||||
/*
|
||||
* parse.h
|
||||
*/
|
||||
#ifndef PARSE_H
|
||||
#define PARSE_H
|
||||
|
||||
#include "input.h" /* for struct stmt */
|
||||
#include "scan.h"
|
||||
|
||||
int parse_stmt(struct scan_state *scan_state, struct stmt *stmt);
|
||||
|
||||
#endif /* PARSE_H */
|
||||
239
as/scan.c
Normal file
239
as/scan.c
Normal file
@@ -0,0 +1,239 @@
|
||||
/*
|
||||
* scan.c
|
||||
*/
|
||||
#include <errno.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "scan.h"
|
||||
#include "token.h"
|
||||
|
||||
void scan_init(struct scan_state *scan_state, const char *progname)
|
||||
{
|
||||
scan_state->progname = progname;
|
||||
scan_state->filename = "<stdin>";
|
||||
scan_state->linenr = 1;
|
||||
}
|
||||
|
||||
int scan_open(struct scan_state *scan_state, const char *filename)
|
||||
{
|
||||
if (filename[0] == '-' && filename[1] == '-' && filename[2] == '\0') {
|
||||
scan_state->filename = "<stdin>";
|
||||
filename = "/dev/stdin";
|
||||
} else
|
||||
scan_state->filename = filename;
|
||||
|
||||
if (freopen(filename, "r", stdin) == NULL) {
|
||||
fprintf(stderr, "%s: Error opening %s: %s\n", scan_state->progname, filename, strerror(errno));
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void scan_ungetc(struct scan_state *scan_state, int ch)
|
||||
{
|
||||
if (ch != EOF && ungetc(ch, stdin) == EOF)
|
||||
fprintf(stderr, "%s: %s line %u: ungetc %d failed: %s\n",
|
||||
scan_state->progname, scan_state->filename, scan_state->linenr, ch, strerror(errno));
|
||||
}
|
||||
|
||||
static int scan_getchar(void)
|
||||
{
|
||||
return fgetc(stdin);
|
||||
}
|
||||
|
||||
static void badchar(struct scan_state *scan_state, int ch, const char *context)
|
||||
{
|
||||
char buf[7];
|
||||
|
||||
if (ch == EOF) {
|
||||
buf[0] = '<';
|
||||
buf[1] = 'E';
|
||||
buf[2] = 'O';
|
||||
buf[3] = 'F';
|
||||
buf[4] = '>';
|
||||
buf[5] = '\0';
|
||||
} else if (' ' <= ch && ch <= '~') {
|
||||
buf[0] = '\'';
|
||||
buf[1] = ch;
|
||||
buf[2] = '\'';
|
||||
buf[3] = '\0';
|
||||
} else {
|
||||
buf[0] = '\'';
|
||||
buf[1] = '\\';
|
||||
buf[2] = '0' + ((ch >> 6) & 3);
|
||||
buf[3] = '0' + ((ch >> 3) & 7);
|
||||
buf[4] = '0' + (ch & 7);
|
||||
buf[5] = '\'';
|
||||
buf[6] = '\0';
|
||||
}
|
||||
|
||||
fprintf(stderr, "%s: %s, line %u: invalid character %s%s\n",
|
||||
scan_state->progname, scan_state->filename, scan_state->linenr, buf, context);
|
||||
}
|
||||
|
||||
static unsigned int get_chval(int ch)
|
||||
{
|
||||
if ('0' <= ch && ch <= '9')
|
||||
return ch - '0';
|
||||
if ('A' <= ch && ch <= 'F')
|
||||
return ch - ('A' - 10);
|
||||
if ('a' <= ch && ch <= 'f')
|
||||
return ch - ('a' - 10);
|
||||
return -1U;
|
||||
}
|
||||
|
||||
static int is_symbol_internal_char(int ch)
|
||||
{
|
||||
return
|
||||
('A' <= ch && ch <= 'Z')
|
||||
|| ('a' <= ch && ch <= 'z')
|
||||
|| ('0' <= ch && ch <= '9')
|
||||
|| ch == '_'
|
||||
|| ch == '$'
|
||||
|| ch == '.';
|
||||
}
|
||||
|
||||
static enum token do_symbol(struct scan_state *scan_state, union token_attribute *token_attr, int ch)
|
||||
{
|
||||
char charbuf[128]; /* 127 chars + NUL, XXX: make it dynamic */
|
||||
unsigned int len;
|
||||
char *text;
|
||||
|
||||
len = 0;
|
||||
do {
|
||||
if (len >= sizeof charbuf - 1) {
|
||||
fprintf(stderr, "%s: %s line %u: too long symbol\n",
|
||||
scan_state->progname, scan_state->filename, scan_state->linenr);
|
||||
return T_ERROR;
|
||||
}
|
||||
charbuf[len] = ch;
|
||||
++len;
|
||||
ch = scan_getchar();
|
||||
} while (is_symbol_internal_char(ch));
|
||||
charbuf[len] = '\0';
|
||||
scan_ungetc(scan_state, ch);
|
||||
|
||||
if (charbuf[0] == '.') {
|
||||
enum token low, high;
|
||||
|
||||
/* see token.def, reserved symbols occupy tokens [0,T_SYMBOL[ */
|
||||
low = 0;
|
||||
high = T_SYMBOL;
|
||||
|
||||
while (low < high) {
|
||||
enum token middle;
|
||||
int cmp;
|
||||
|
||||
middle = (low + high) / 2;
|
||||
cmp = strcmp(charbuf, token_info[middle].print_name);
|
||||
|
||||
if (cmp < 0)
|
||||
high = middle;
|
||||
else if (cmp > 0)
|
||||
low = middle + 1;
|
||||
else
|
||||
return middle;
|
||||
}
|
||||
}
|
||||
|
||||
text = malloc(len + 1);
|
||||
if (!text) {
|
||||
fprintf(stderr, "%s: %s line %u: malloc(%u) failed: %s\n",
|
||||
scan_state->progname, scan_state->filename, scan_state->linenr, len + 1, strerror(errno));
|
||||
return T_ERROR;
|
||||
}
|
||||
strcpy(text, charbuf);
|
||||
token_attr->text = text;
|
||||
return T_SYMBOL;
|
||||
}
|
||||
|
||||
static enum token do_number(struct scan_state *scan_state, union token_attribute *token_attr, int ch)
|
||||
{
|
||||
unsigned int base, chval;
|
||||
pdp10_uint36_t numval;
|
||||
|
||||
base = (ch == '0') ? 8 : 10;
|
||||
numval = ch - '0';
|
||||
|
||||
/* handle 0x<first hexdig> */
|
||||
ch = scan_getchar();
|
||||
if (base == 8 && (ch == 'x' || ch == 'X')) {
|
||||
base = 16;
|
||||
/* must have at least one hex digit after 0x */
|
||||
ch = scan_getchar();
|
||||
chval = get_chval(ch);
|
||||
if (chval >= 16) {
|
||||
badchar(scan_state, ch, " after 0x in hexadecimal literal");
|
||||
return T_ERROR;
|
||||
}
|
||||
numval = chval;
|
||||
ch = scan_getchar();
|
||||
}
|
||||
|
||||
/* the number is non-empty, consume and accumulate trailing
|
||||
characters as long as they are valid in the base */
|
||||
for (;;) {
|
||||
chval = get_chval(ch);
|
||||
if (chval >= base)
|
||||
break;
|
||||
numval = numval * base + chval; /* XXX: check for overflow */
|
||||
ch = scan_getchar();
|
||||
}
|
||||
|
||||
/* XXX: check for <decimal>{b,f} which is a local label reference */
|
||||
|
||||
/* plain integer literal */
|
||||
scan_ungetc(scan_state, ch);
|
||||
token_attr->uint = numval;
|
||||
return T_UINTEGER;
|
||||
}
|
||||
|
||||
enum token scan_token(struct scan_state *scan_state, union token_attribute *token_attr)
|
||||
{
|
||||
int ch;
|
||||
|
||||
ch = scan_getchar();
|
||||
|
||||
for (;; ch = scan_getchar()) {
|
||||
switch (ch) {
|
||||
case ' ':
|
||||
case '\t':
|
||||
case '\r':
|
||||
case '\f':
|
||||
continue;
|
||||
case '\n':
|
||||
++scan_state->linenr;
|
||||
return T_NEWLINE;
|
||||
case EOF:
|
||||
return T_EOF;
|
||||
case '@':
|
||||
return T_AT;
|
||||
case ':':
|
||||
return T_COLON;
|
||||
case ',':
|
||||
return T_COMMA;
|
||||
case '(':
|
||||
return T_LPAREN;
|
||||
case ')':
|
||||
return T_RPAREN;
|
||||
case '.':
|
||||
/* Dot may start a floating point literal, but tests show that
|
||||
gcc always outputs floating point values as integer literals,
|
||||
so we shouldn't have to support floating point literals at all. */
|
||||
case '$':
|
||||
case '_':
|
||||
return do_symbol(scan_state, token_attr, ch);
|
||||
default:
|
||||
if ('0' <= ch && ch <= '9') /* number or <decimal>{b,f} */
|
||||
return do_number(scan_state, token_attr, ch);
|
||||
if (('A' <= ch && ch <= 'Z') ||
|
||||
('a' <= ch && ch <= 'z'))
|
||||
return do_symbol(scan_state, token_attr, ch);
|
||||
break;
|
||||
}
|
||||
badchar(scan_state, ch, "");
|
||||
return T_ERROR;
|
||||
}
|
||||
}
|
||||
19
as/scan.h
Normal file
19
as/scan.h
Normal file
@@ -0,0 +1,19 @@
|
||||
/*
|
||||
* scan.h
|
||||
*/
|
||||
#ifndef SCAN_H
|
||||
#define SCAN_H
|
||||
|
||||
#include "token.h"
|
||||
|
||||
struct scan_state {
|
||||
const char *progname; /* for diagnostics, does not change after scan_init() */
|
||||
const char *filename; /* for diagnostics, set by scan_open() */
|
||||
unsigned int linenr;
|
||||
};
|
||||
|
||||
void scan_init(struct scan_state *scan_state, const char *progname);
|
||||
int scan_open(struct scan_state *scan_state, const char *filename);
|
||||
enum token scan_token(struct scan_state *scan_state, union token_attribute *token_attr);
|
||||
|
||||
#endif /* SCAN_H */
|
||||
@@ -5,19 +5,7 @@
|
||||
#include "pdp10-inttypes.h"
|
||||
#include "token.h"
|
||||
|
||||
enum {
|
||||
FMT_NONE = 0,
|
||||
FMT_UINT = 1,
|
||||
FMT_SYMBOL = 2,
|
||||
FMT_STRING = 3,
|
||||
};
|
||||
|
||||
struct token_info {
|
||||
char print_name[15];
|
||||
unsigned char attribute_fmt;
|
||||
};
|
||||
|
||||
static const struct token_info token_info[] = {
|
||||
const struct token_info token_info[] = {
|
||||
#define TOKEN(T,P,F) { P, F },
|
||||
#include "token.def"
|
||||
#undef TOKEN
|
||||
@@ -39,13 +27,13 @@ void token_print(FILE *fp, enum token token, const union token_attribute *token_
|
||||
return;
|
||||
|
||||
switch (ti->attribute_fmt) {
|
||||
case FMT_UINT:
|
||||
fprintf(fp, " [%" PDP10_PRIu36 "u]", token_attr->uint);
|
||||
case TAFMT_UINT:
|
||||
fprintf(fp, " [%" PDP10_PRIu36 "]", token_attr->uint);
|
||||
break;
|
||||
case FMT_SYMBOL:
|
||||
case TAFMT_SYMBOL:
|
||||
fprintf(fp, " [%s]", token_attr->text);
|
||||
break;
|
||||
case FMT_STRING:
|
||||
case TAFMT_STRING:
|
||||
fprintf(fp, " [\"%s\"]", token_attr->text);
|
||||
break;
|
||||
default:
|
||||
84
as/token.def
Normal file
84
as/token.def
Normal file
@@ -0,0 +1,84 @@
|
||||
/*
|
||||
* token.def
|
||||
*
|
||||
* TOKEN(T_<name>, <print name>, <attribute fmt>)
|
||||
*/
|
||||
|
||||
/* reserved symbols including directives; MUST come first and MUST be listed in increasing alphanumeric order */
|
||||
TOKEN(T_DOT_GLOBL, ".globl", TAFMT_NONE)
|
||||
TOKEN(T_DOT_TEXT, ".text", TAFMT_NONE)
|
||||
/* non-reserved symbols; T_SYMBOL MUST be the first token after the list of reserved symbols */
|
||||
TOKEN(T_SYMBOL, "<symbol>", TAFMT_SYMBOL)
|
||||
/* literals */
|
||||
TOKEN(T_UINTEGER, "<integer>", TAFMT_UINT)
|
||||
/* special symbols including operators and separators */
|
||||
TOKEN(T_AT, "@", TAFMT_NONE)
|
||||
TOKEN(T_COLON, ":", TAFMT_NONE)
|
||||
TOKEN(T_COMMA, ",", TAFMT_NONE)
|
||||
TOKEN(T_LPAREN, "(", TAFMT_NONE)
|
||||
TOKEN(T_RPAREN, ")", TAFMT_NONE)
|
||||
/* synthetic symbols */
|
||||
TOKEN(T_NEWLINE, "<newline>", TAFMT_NONE)
|
||||
TOKEN(T_EOF, "<eof>", TAFMT_NONE)
|
||||
TOKEN(T_ERROR, "<error>", TAFMT_NONE)
|
||||
|
||||
/* XXX: old tokens not yet resurrected */
|
||||
#if 0
|
||||
TOKEN(T_DOT_ALIGN, ".align", TAFMT_NONE)
|
||||
TOKEN(T_DOT_ASCII, ".ascii", TAFMT_NONE)
|
||||
TOKEN(T_DOT_ASCIZ, ".asciz", TAFMT_NONE)
|
||||
TOKEN(T_DOT_BALIGN, ".balign", TAFMT_NONE)
|
||||
TOKEN(T_DOT_BSS, ".bss", TAFMT_NONE)
|
||||
TOKEN(T_DOT_BYTE, ".byte", TAFMT_NONE)
|
||||
TOKEN(T_DOT_COMM, ".comm", TAFMT_NONE)
|
||||
TOKEN(T_DOT_DATA, ".data", TAFMT_NONE)
|
||||
TOKEN(T_DOT_FILE, ".file", TAFMT_NONE)
|
||||
TOKEN(T_DOT_HIDDEN, ".hidden", TAFMT_NONE)
|
||||
TOKEN(T_DOT_IDENT, ".ident", TAFMT_NONE)
|
||||
TOKEN(T_DOT_INTERNAL, ".internal", TAFMT_NONE)
|
||||
TOKEN(T_DOT_LOCAL, ".local", TAFMT_NONE)
|
||||
TOKEN(T_DOT_LONG, ".long", TAFMT_NONE)
|
||||
TOKEN(T_DOT_ORG, ".org", TAFMT_NONE)
|
||||
TOKEN(T_DOT_P2ALIGN, ".p2align", TAFMT_NONE)
|
||||
TOKEN(T_DOT_POPSECTION, ".popsection", TAFMT_NONE)
|
||||
TOKEN(T_DOT_PREVIOUS, ".previous", TAFMT_NONE)
|
||||
TOKEN(T_DOT_PROTECTED, ".protected", TAFMT_NONE)
|
||||
TOKEN(T_DOT_PUSHSECTION, ".pushsection", TAFMT_NONE)
|
||||
TOKEN(T_DOT_RODATA, ".rodata", TAFMT_NONE)
|
||||
TOKEN(T_DOT_SECTION, ".section", TAFMT_NONE)
|
||||
TOKEN(T_DOT_SET, ".set", TAFMT_NONE)
|
||||
TOKEN(T_DOT_SHORT, ".short", TAFMT_NONE)
|
||||
TOKEN(T_DOT_SIZE, ".size", TAFMT_NONE)
|
||||
TOKEN(T_DOT_SUBSECTION, ".subsection", TAFMT_NONE)
|
||||
TOKEN(T_DOT_SYMVER, ".symver", TAFMT_NONE)
|
||||
TOKEN(T_DOT_TYPE, ".type", TAFMT_NONE)
|
||||
TOKEN(T_DOT_WEAK, ".weak", TAFMT_NONE)
|
||||
TOKEN(T_DOT_WEAKREF, ".weakref", TAFMT_NONE)
|
||||
/* other symbols */
|
||||
TOKEN(T_REGISTER, "<register>", TAFMT_UINT)
|
||||
TOKEN(T_LOCAL_LABEL, "<local label>", TAFMT_UINT) /* 1f, 2b */
|
||||
/* literals */
|
||||
TOKEN(T_STRING, "<string>", TAFMT_STRING)
|
||||
/* operators, separators */
|
||||
TOKEN(T_TILDE, "~", TAFMT_NONE)
|
||||
TOKEN(T_MUL, "*", TAFMT_NONE)
|
||||
TOKEN(T_DIV, "/", TAFMT_NONE)
|
||||
TOKEN(T_REM, "%", TAFMT_NONE)
|
||||
TOKEN(T_LSHIFT, "<<", TAFMT_NONE)
|
||||
TOKEN(T_RSHIFT, ">>", TAFMT_NONE)
|
||||
TOKEN(T_OR, "|", TAFMT_NONE)
|
||||
TOKEN(T_AND, "&", TAFMT_NONE)
|
||||
TOKEN(T_CARET, "^", TAFMT_NONE)
|
||||
TOKEN(T_BANG, "!", TAFMT_NONE)
|
||||
TOKEN(T_PLUS, "+", TAFMT_NONE)
|
||||
TOKEN(T_MINUS, "-", TAFMT_NONE)
|
||||
TOKEN(T_EQ, "=", TAFMT_NONE)
|
||||
TOKEN(T_EQEQ, "==", TAFMT_NONE)
|
||||
TOKEN(T_NEQ, "!=", TAFMT_NONE)
|
||||
TOKEN(T_LT, "<", TAFMT_NONE)
|
||||
TOKEN(T_GT, ">", TAFMT_NONE)
|
||||
TOKEN(T_GE, ">=", TAFMT_NONE)
|
||||
TOKEN(T_LE, "<=", TAFMT_NONE)
|
||||
TOKEN(T_ANDAND, "&&", TAFMT_NONE)
|
||||
TOKEN(T_OROR, "||", TAFMT_NONE)
|
||||
#endif
|
||||
40
as/token.h
Normal file
40
as/token.h
Normal file
@@ -0,0 +1,40 @@
|
||||
/*
|
||||
* token.h
|
||||
*/
|
||||
#ifndef TOKEN_H
|
||||
#define TOKEN_H
|
||||
|
||||
#include <stdio.h>
|
||||
#include "pdp10-stdint.h"
|
||||
|
||||
enum token {
|
||||
#define TOKEN(T,P,F) T,
|
||||
#include "token.def"
|
||||
#undef TOKEN
|
||||
};
|
||||
|
||||
enum {
|
||||
TAFMT_NONE = 0,
|
||||
TAFMT_UINT = 1,
|
||||
TAFMT_SYMBOL = 2,
|
||||
TAFMT_STRING = 3,
|
||||
};
|
||||
|
||||
struct token_info {
|
||||
char print_name[15];
|
||||
unsigned char attribute_fmt;
|
||||
};
|
||||
|
||||
/* token_info[] is indexed by token and is used by token_print() to print tokens;
|
||||
it is also public so the scanner can map directive names to tokens without
|
||||
duplicating the names or the name-to-token mapping */
|
||||
extern const struct token_info token_info[];
|
||||
|
||||
union token_attribute {
|
||||
const char *text; /* symbol, string */
|
||||
pdp10_uint36_t uint; /* uinteger */
|
||||
};
|
||||
|
||||
void token_print(FILE *fp, enum token token, const union token_attribute *token_attr);
|
||||
|
||||
#endif /* TOKEN_H */
|
||||
Reference in New Issue
Block a user