Initial import

This commit is contained in:
Mikael Pettersson 2013-07-03 16:29:42 +00:00
commit 7c189dd488
36 changed files with 5441 additions and 0 deletions

22
as/Makefile Normal file
View File

@ -0,0 +1,22 @@
CC=gcc
CFLAGS=-O2 -g -Wall
CPPFLAGS=-I../include
OBJS= arrlst.o charbuf.o emalloc.o htab.o main.o parse.o pass1.o scan.o section.o strtab.o token.o
all: $(OBJS)
arrlst.o: arrlst.h emalloc.h
charbuf.o: charbuf.h emalloc.h
emalloc.o: emalloc.h
htab.o: emalloc.h htab.h
main.o: pass1.h
parse.o: emalloc.h parse.h scan.h token.h token.def stmt.h expr.h strtab.h section.h ../include/pdp10-stdint.h ../include/pdp10-elf36.h
pass1.o: parse.h pass1.h scan.h section.h stmt.h expr.h token.h ../include/pdp10-elf36.h arrlst.h htab.h strtab.h ../include/pdp10-stdint.h
scan.o: charbuf.h scan.h token.h token.def ../include/pdp10-stdint.h
section.o: emalloc.h htab.h section.h arrlst.h stmt.h expr.h strtab.h ../include/pdp10-elf36.h ../include/pdp10-stdint.h
strtab.o: emalloc.h htab.h strtab.h
token.o: ../include/pdp10-inttypes.h token.h token.def
clean:
rm -f $(OBJS) a.out core core.*

52
as/Notes.txt Normal file
View File

@ -0,0 +1,52 @@
* strip whitespace, leave one space before a keyword on a line, compress any
other whitespace to a single space
* white is blank, tab, /* ... */
* target-spec line comment, .e.g. #
* symbol = [letter | _ | . | $] [letter | _ | . | $ | digit]*
* stmt: {label}* { directive | instruction | empty }
* %r0..%r15 with %sp == %r15 ? (no, % is also an operator)
* $0..$15 with $sp == $15? (yes, these are just symbols)
* PDP10 syntax:
mnemonic accumulator,address
movem 1,foo
mnemonic accumulator,
popj 17,
mnemonic address
setzm foo
skipe 0(16)
address prefixed by "@" makes it indirect
setzm @foo
address suffixed by "(ixreg)" makes it indexed
setzm 3(16)
";" is line comment char
foo ;comment
radix is 8 by default; a single-digit number is always decimal
radix can be changed by the RADIX directive
a number can indicate its radix by a ^B, ^O, or ^D prefix (XXX: add ^X for hex)
symbols can use letters, digits, dot, dollar, and percent signs
exp1,,exp2 assembles two 18-bit expressions into a 36-bit value
< expr > parenteses
there is no "semi-colon" like symbol for putting multiple statements
on a single line

28
as/TODO Normal file
View File

@ -0,0 +1,28 @@
section text
* subsection text0
** frag0, frag1, ... each being sth that generates object code
* subsection test1
** frag...
section data
* subsection data0
** frag...
array of sections (ELF allows user-defined ones)
a section is array of subsections, sorted and concatenated during output, ld sees no subsections
a subsection is array of fragments
a fragment describes a directive, an insn, a label defn
pass1 parses .s text, creates frags, and appends them to subsections
pass2 computes labels
pass3 outputs concatenated data in ELF form
helpers
* string table
* symbol table
* section table
copy ELF32 headers and use them throughout, esp. for symbol/section types etc
Look up section group and comdat in sco elf draft html pages.

186
as/arrlst.c Normal file
View File

@ -0,0 +1,186 @@
/*
* arrlst.c
*/
#include <stdlib.h>
#include <string.h>
#include "arrlst.h"
#include "emalloc.h"
enum {
ARRLST_CHUNK_NRELEM = 128,
};
struct arrlst {
void *head;
void *tail;
unsigned int eltsz;
unsigned int tailpos;
/* We only allow a single iterator per arrlst, so we
allocate the iterator in the arrlst itself. */
struct {
void *chunk;
unsigned int chunkpos;
unsigned int chunklen;
} iter;
};
static void **arrlst_chunk_nextp(const void *chunk)
{
return (void**)chunk;
}
static unsigned int eltsz_chunk_header_nrelem(unsigned int eltsz)
{
unsigned int nrelem;
/* compute how may eltsz elements are needed to cover a void* */
nrelem = sizeof(void*) / eltsz;
if (nrelem * eltsz < sizeof(void*))
++nrelem;
return nrelem;
}
static unsigned int arrlst_chunk_header_nrelem(const struct arrlst *arrlst)
{
return eltsz_chunk_header_nrelem(arrlst->eltsz);
}
static void *arrlst_chunk_element(const struct arrlst *arrlst, void *chunk, unsigned int eltnr)
{
return (char*)chunk + eltnr * arrlst->eltsz;
}
static void *arrlst_alloc_chunk(const struct arrlst *arrlst)
{
unsigned int nrbytes;
void *chunk;
nrbytes = arrlst->eltsz * ARRLST_CHUNK_NRELEM;
chunk = emalloc(nrbytes);
return chunk;
}
struct arrlst *arrlst_alloc(size_t eltsz)
{
struct arrlst *arrlst;
if (eltsz == 0
|| eltsz > (unsigned int)-1
|| eltsz_chunk_header_nrelem(eltsz) >= ARRLST_CHUNK_NRELEM)
return NULL;
arrlst = emalloc(sizeof *arrlst);
/* these fields will be adjusted in the first call to append() */
arrlst->head = NULL;
arrlst->tail = NULL;
arrlst->eltsz = eltsz;
arrlst->tailpos = ARRLST_CHUNK_NRELEM;
return arrlst;
}
void arrlst_free(struct arrlst *arrlst)
{
void *chunk, *next;
chunk = arrlst->head;
while (chunk) {
next = *arrlst_chunk_nextp(chunk);
free(chunk);
chunk = next;
}
free(arrlst);
}
size_t arrlst_length(const struct arrlst *arrlst)
{
const void *chunk;
size_t length;
size_t chunk_header_nrelem;
chunk = arrlst->head;
if (!chunk)
return 0;
length = 0;
chunk_header_nrelem = arrlst_chunk_header_nrelem(arrlst);
for (;;) {
chunk = *arrlst_chunk_nextp(chunk);
if (!chunk)
break;
length += ARRLST_CHUNK_NRELEM - chunk_header_nrelem;
}
return length + arrlst->tailpos - chunk_header_nrelem;
}
void *arrlst_append(struct arrlst *arrlst)
{
void *tail;
void *elt;
tail = arrlst->tail;
if (arrlst->tailpos >= ARRLST_CHUNK_NRELEM) {
void *new_tail;
new_tail = arrlst_alloc_chunk(arrlst);
if (!new_tail)
return NULL;
if (tail)
*arrlst_chunk_nextp(tail) = new_tail;
else {
arrlst->head = new_tail;
arrlst->tail = new_tail;
}
arrlst->tailpos = arrlst_chunk_header_nrelem(arrlst);
tail = new_tail;
}
elt = arrlst_chunk_element(arrlst, tail, arrlst->tailpos);
++arrlst->tailpos;
return elt;
}
void arrlst_iter_rewind(struct arrlst *arrlst)
{
/* these fields will be adjusted in the first call to next() */
arrlst->iter.chunk = NULL;
arrlst->iter.chunklen = 0;
arrlst->iter.chunkpos = 0;
}
void *arrlst_iter_next(struct arrlst *arrlst)
{
void *chunk;
void *elt;
chunk = arrlst->iter.chunk;
if (arrlst->iter.chunkpos >= arrlst->iter.chunklen) {
if (!chunk)
chunk = arrlst->head;
else
chunk = *arrlst_chunk_nextp(chunk);
if (!chunk)
return NULL;
arrlst->iter.chunk = chunk;
if (*arrlst_chunk_nextp(chunk))
arrlst->iter.chunklen = ARRLST_CHUNK_NRELEM;
else
arrlst->iter.chunklen = arrlst->tailpos;
arrlst->iter.chunkpos = arrlst_chunk_header_nrelem(arrlst);
}
elt = arrlst_chunk_element(arrlst, chunk, arrlst->iter.chunkpos);
++arrlst->iter.chunkpos;
return elt;
}

20
as/arrlst.h Normal file
View File

@ -0,0 +1,20 @@
/*
* arrlst.h
*/
#ifndef ARRLST_H
#define ARRLST_H
#include <stdlib.h> /* size_t */
struct arrlst;
struct arrlst *arrlst_alloc(size_t eltsz);
void arrlst_free(struct arrlst *arrlst);
size_t arrlst_length(const struct arrlst *arrlst);
void *arrlst_append(struct arrlst *arrlst);
/* for now there is only one iterator per arrlst */
void arrlst_iter_rewind(struct arrlst *arrlst);
void *arrlst_iter_next(struct arrlst *arrlst);
#endif /* ARRLST_H */

90
as/charbuf.c Normal file
View File

@ -0,0 +1,90 @@
/*
* charbuf.c
*/
#include <stdlib.h>
#include <string.h>
#include "charbuf.h"
#include "emalloc.h"
void charbuf_init(struct charbuf *charbuf)
{
charbuf->head.next = NULL;
charbuf->tail = &charbuf->head;
charbuf->pos = 0;
}
void charbuf_fini(struct charbuf *charbuf)
{
struct charbuf_chunk *chunk, *next;
chunk = charbuf->head.next;
while (chunk != NULL) {
next = chunk->next;
free(chunk);
chunk = next;
}
}
void charbuf_append(struct charbuf *charbuf, int ch)
{
struct charbuf_chunk *tail, *next;
unsigned int pos;
tail = charbuf->tail;
pos = charbuf->pos;
if (pos >= sizeof tail->buf) {
next = emalloc(sizeof *next);
next->next = NULL;
tail->next = next;
tail = next;
charbuf->tail = tail;
pos = 0;
}
tail->buf[pos] = ch;
charbuf->pos = pos + 1;
}
int charbuf_strcmp(const struct charbuf *charbuf, const char *string)
{
const struct charbuf_chunk *chunk;
int cmp;
chunk = &charbuf->head;
while (chunk->next != NULL) {
cmp = strncmp(chunk->buf, string, sizeof chunk->buf);
if (cmp)
return cmp;
string += sizeof chunk->buf;
chunk = chunk->next;
}
return strncmp(chunk->buf, string, charbuf->pos);
}
char *charbuf_string(const struct charbuf *charbuf)
{
const struct charbuf_chunk *chunk;
size_t nrbytes;
char *string, *strp;
chunk = &charbuf->head;
nrbytes = 0;
while (chunk->next != NULL) {
nrbytes += sizeof chunk->buf;
chunk = chunk->next;
}
nrbytes = nrbytes + charbuf->pos + 1;
string = emalloc(nrbytes);
chunk = &charbuf->head;
strp = string;
while (chunk->next != NULL) {
memcpy(strp, chunk->buf, sizeof chunk->buf);
strp += sizeof chunk->buf;
chunk = chunk->next;
}
memcpy(strp, chunk->buf, charbuf->pos);
strp[charbuf->pos] = '\0';
return string;
}

24
as/charbuf.h Normal file
View File

@ -0,0 +1,24 @@
/*
* charbuf.h
*/
#ifndef CHARBUF_H
#define CHARBUF_H
struct charbuf_chunk {
char buf[128 - sizeof(struct charbuf_chunk*)];
struct charbuf_chunk *next;
};
struct charbuf {
struct charbuf_chunk head;
struct charbuf_chunk *tail; /* INV: tail->next == NULL */
unsigned int pos; /* in tail chunk */
};
void charbuf_init(struct charbuf *charbuf);
void charbuf_fini(struct charbuf *charbuf);
void charbuf_append(struct charbuf *charbuf, int ch);
int charbuf_strcmp(const struct charbuf *charbuf, const char *string);
char *charbuf_string(const struct charbuf *charbuf);
#endif /* CHARBUF_H */

20
as/emalloc.c Normal file
View File

@ -0,0 +1,20 @@
/*
* emalloc.c
*/
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "emalloc.h"
void *emalloc(size_t nrbytes)
{
void *p;
p = malloc(nrbytes);
if (!p) {
fprintf(stderr, "malloc(%zu) failed: %s\n", nrbytes, strerror(errno));
exit(1);
}
return p;
}

11
as/emalloc.h Normal file
View File

@ -0,0 +1,11 @@
/*
* emalloc.h
*/
#ifndef EMALLOC_H
#define EMALLOC_H
#include <stddef.h> /* size_t */
void *emalloc(size_t nrbytes);
#endif /* EMALLOC_H */

168
as/expr.c Normal file
View File

@ -0,0 +1,168 @@
/*
* expr.c
*/
#include <stdio.h>
#include "pdp10-arith.h"
#include "expr.h"
int eval(const struct expr *expr, struct value *value, int quiet)
{
switch (expr->tag) {
case E_UINTEGER:
value->section = SECTION_ABS;
value->offset = expr->u.e_uinteger.val;
break;
case E_SYMBOL:
xxx;
break;
case E_UNARY:
if (eval(expr->u.e_unary.expr, value, quiet) < 0)
return -1;
if (value->section != SECTION_ABS) {
if (!quiet)
fprintf(stderr, "as: %s(): unary operand is not absolute\n", __FUNCTION__);
return -1;
}
switch (expr->u.e_unary.unop) {
case E_UMINUS:
value->offset = pdp10_neg_int36(value->offset);
break;
case E_NOT:
value->offset = pdp10_not_int36(value->offset);
break;
}
break;
case E_BINARY:
{
struct value value2;
if (eval(expr->u.e_binary.expr1, value, quiet) < 0
|| eval(expr->u.e_binary.expr2, &value2, quiet) < 0)
return -1;
switch (expr->u.e_binary.binop) {
case E_ADD:
if (value->section == SECTION_ABS)
value->section = value2.section;
else if (value2.section == SECTION_ABS)
;
else if (value->section == value2.section)
;
else {
if (!quiet)
fprintf(stderr, "as: %s(): adding operands from different sections\n", __FUNCTION__);
return -1;
}
value->offset = pdp10_add_int36(value->offset, value2.offset);
return 0;
case E_SUB:
if (value2.section == SECTION_ABS)
;
else if (value->section == value2.section)
value->section = SECTION_ABS;
else {
if (!quiet)
fprintf(stderr, "as: %s(): subtracting operands from different sections\n", __FUNCTION__);
return -1;
}
value->offset = pdp10_sub_int36(value->offset, value2.offset);
return 0;
default:
break;
}
if (value->section != SECTION_ABS
|| value2.section != SECTION_ABS) {
if (!quiet)
fprintf(stderr, "as: %s(): binary sub-<expr> is not absolute\n", __FUNCTION__);
return -1;
}
switch (expr->u.e_binary.binop) {
case E_MUL:
value->offset = pdp10_mul_int36(value->offset, value2.offset);
break;
case E_DIV:
/* XXX: div-by-zero check */
value->offset = pdp10_div_int36(value->offset, value2.offset);
break;
case E_REM:
/* XXX: div-by-zero check */
value->offset = pdp10_rem_int36(value->offset, value2.offset);
break;
case E_LSHIFT:
/* XXX: range check */
value->offset = pdp10_asl_int36(value->offset, value2.offset);
break;
case E_RSHIFT:
/* XXX: range check */
value->offset = pdp10_asr_int36(value->offset, value2.offset);
break;
case E_OR:
value->offset = pdp10_or_int36(value->offset, value2.offset);
break;
case E_AND:
value->offset = pdp10_and_int36(value->offset, value2.offset);
break;
case E_XOR:
value->offset = pdp10_xor_int36(value->offset, value2.offset);
break;
case E_ORNOT:
value->offset = pdp10_or_int36(value->offset, pdp10_not_int36(value2.offset));
break;
case E_EQ:
value->offset = pdp10_eq_int36(value->offset, value2.offset) ? PDP10_UINT36_MAX : 0;
break;
case E_NE:
value->offset = pdp10_ne_int36(value->offset, value2.offset) ? PDP10_UINT36_MAX : 0;
break;
case E_LT:
value->offset = pdp10_lt_int36(value->offset, value2.offset) ? PDP10_UINT36_MAX : 0;
break;
case E_GT:
value->offset = pdp10_gt_int36(value->offset, value2.offset) ? PDP10_UINT36_MAX : 0;
break;
case E_GE:
value->offset = pdp10_ge_int36(value->offset, value2.offset) ? PDP10_UINT36_MAX : 0;
break;
case E_LE:
value->offset = pdp10_le_int36(value->offset, value2.offset) ? PDP10_UINT36_MAX : 0;
break;
case E_ANDAND:
value->offset = (pdp10_nonzero_int36(value->offset) && pdp10_nonzero_int36(value2.offset)) ? 1 : 0;
break;
case E_OROR:
value->offset = (pdp10_nonzero_int36(value->offset) || pdp10_nonzero_int36(value2.offset)) ? 1 : 0;
break;
default:
/* E_ADD / E_SUB cannot occur here, but the compiler may not see that */
break;
}
break;
}
}
return 0;
}
int eval_abs(const struct expr *expr, pdp10_uint36_t *offset, int quiet)
{
struct value value;
if (eval(expr, &value, quiet) < 0)
return -1;
if (value.section != SECTION_ABS) {
if (!quiet)
fprintf(stderr, "as: non-absolute expression\n");
return -1;
}
*offset = value.offset;
return 0;
}
int eval_abs_verbose(const struct expr *expr, pdp10_uint36_t *offset)
{
return eval_abs(expr, offset, 0);
}
int eval_abs_quiet(const struct expr *expr, pdp10_uint36_t *offset)
{
return eval_abs(expr, offset, 1);
}

76
as/expr.h Normal file
View File

@ -0,0 +1,76 @@
/*
* expr.h
*/
#ifndef EXPR_H
#define EXPR_H
#include "pdp10-elf36.h"
#include "section.h"
#include "strtab.h"
enum expr_tag {
E_UINTEGER,
E_SYMBOL,
E_UNARY,
E_BINARY,
};
enum expr_unop {
E_UMINUS,
E_NOT,
};
enum expr_binop {
E_MUL,
E_DIV,
E_REM,
E_LSHIFT,
E_RSHIFT,
E_OR,
E_AND,
E_XOR,
E_ORNOT,
E_ADD,
E_SUB,
E_EQ,
E_NE,
E_LT,
E_GT,
E_GE,
E_LE,
E_ANDAND,
E_OROR,
};
struct expr {
enum expr_tag tag;
union {
struct {
pdp10_uint36_t val;
} e_uinteger;
struct {
const struct strnode *name;
} e_symbol;
struct {
enum expr_unop unop;
struct expr *expr;
} e_unary;
struct {
enum expr_binop binop;
struct expr *expr1;
struct expr *expr2;
} e_binary;
} u;
};
struct value {
struct section *section;
pdp10_uint36_t offset;
};
int eval(const struct expr *expr, struct value *value, int quiet);
int eval_abs(const struct expr *expr, pdp10_uint36_t *offset, int quiet);
int eval_abs_verbose(const struct expr *expr, pdp10_uint36_t *offset);
int eval_abs_quiet(const struct expr *expr, pdp10_uint36_t *offset);
#endif /* EXPR_H */

136
as/htab.c Normal file
View File

@ -0,0 +1,136 @@
/*
* htab.c
*/
#include <stdlib.h>
#include <string.h>
#include "emalloc.h"
#include "htab.h"
static struct hnode **htab_alloc_bucket(unsigned int size)
{
size_t nrbytes;
struct hnode **bucket;
nrbytes = size * sizeof(struct hnode*);
bucket = emalloc(nrbytes);
memset(bucket, 0, nrbytes);
return bucket;
}
void htab_init(struct htab *htab, unsigned int log2size, htab_cmpfn_t cmpfn)
{
unsigned int size;
size = 1 << log2size;
htab->log2size = log2size;
htab->mask = size - 1;
htab->used = 0;
htab->cmpfn = cmpfn;
htab->bucket = htab_alloc_bucket(size);
}
struct hnode *htab_lookup(const struct htab *htab, uintptr_t hval, const void *data)
{
htab_cmpfn_t cmpfn;
unsigned int i;
struct hnode *hnode;
cmpfn = htab->cmpfn;
i = hval & htab->mask;
hnode = htab->bucket[i];
while (hnode != NULL) {
if (hnode->hval == hval
&& (cmpfn == NULL || (*cmpfn)(hnode, data) == 0))
break;
hnode = hnode->hnext;
}
return hnode;
}
static void htab_grow(struct htab *htab)
{
unsigned int old_size, new_size, new_mask;
struct hnode **old_bucket, **new_bucket;
unsigned int i;
old_size = 1 << htab->log2size;
htab->log2size += 1;
new_size = 1 << htab->log2size;
new_mask = new_size - 1;
htab->mask = new_mask;
old_bucket = htab->bucket;
new_bucket = htab_alloc_bucket(new_size);
htab->bucket = new_bucket;
for (i = 0; i < old_size; ++i) {
struct hnode *hnode = old_bucket[i];
while (hnode != NULL) {
struct hnode *hnext = hnode->hnext;
unsigned int j = hnode->hval & new_mask;
hnode->hnext = new_bucket[j];
new_bucket[j] = hnode;
hnode = hnext;
}
}
free(old_bucket);
}
void htab_insert(struct htab *htab, struct hnode *hnode)
{
unsigned int i;
unsigned int size;
i = hnode->hval & htab->mask;
hnode->hnext = htab->bucket[i];
htab->bucket[i] = hnode;
htab->used += 1;
size = 1 << htab->log2size;
if (htab->used > (4 * size) / 5) /* rehash at 80% */
htab_grow(htab);
}
#if 0
struct hash_node *am_hash_reset(struct hash_table *hash_table)
{
unsigned int i;
unsigned int size;
struct hash_node * volatile *bucket, *all_nodes, *head, *tail;
all_nodes = NULL;
bucket = hash_table->bucket;
size = 1 << hash_table->log2size;
for (i = 0; i < size; ++i) {
head = bucket[i];
if (head) {
bucket[i] = NULL;
tail = head;
while (tail->next)
tail = tail->next;
tail->next = all_nodes;
all_nodes = head;
}
}
hash_table->used = 0;
return all_nodes;
}
void am_hash_enumerate(const struct hash_table *hash_table,
void (*callback)(struct hash_node *hash_node, void *data),
void *data)
{
unsigned int i;
unsigned int size;
struct hash_node * volatile *bucket, *head;
bucket = hash_table->bucket;
size = 1 << hash_table->log2size;
for (i = 0; i < size; ++i) {
head = bucket[i];
while (head != NULL) {
callback(head, data);
head = head->next;
}
}
}
#endif

36
as/htab.h Normal file
View File

@ -0,0 +1,36 @@
/*
* htab.h
*/
#ifndef HTAB_H
#define HTAB_H
#include <stdint.h> /* uintptr_t */
struct hnode {
uintptr_t hval;
struct hnode *hnext;
};
typedef int (*htab_cmpfn_t)(const struct hnode *hnode, const void *data);
struct htab {
unsigned int log2size;
unsigned int mask; /* INV: mask == (1 << log2size) - 1 */
unsigned int used;
htab_cmpfn_t cmpfn;
struct hnode **bucket;
};
void htab_init(struct htab *htab, unsigned int log2size, htab_cmpfn_t cmpfn);
struct hnode *htab_lookup(const struct htab *htab, uintptr_t hval, const void *data);
struct hnode *htab_reset(struct htab *htab);
void htab_enumerate(const struct htab *htab,
void (*callback)(struct hnode *hnode, void *data),
void *data);
void htab_insert(struct htab *htab, struct hnode *hnode);
#endif /* HTAB_H */

40
as/main.c Normal file
View File

@ -0,0 +1,40 @@
/*
* main.c
*/
#include <stdio.h>
#include <unistd.h>
#include "pass1.h"
int main(int argc, char **argv)
{
int ch;
const char *outfile = "a.out";
const char *infile = NULL;
for (;;) {
ch = getopt(argc, argv, "o:");
switch (ch) {
case 'o':
outfile = optarg;
continue;
case -1:
break;
default:
fprintf(stderr, "as: invalid option '%c'\n", ch);
return 1;
}
}
if (optind + 1 == argc)
infile = argv[optind];
if (pass1(infile) < 0)
return 1;
if (pass2() < 0)
return 1;
if (pass3(outfile) < 0)
return 1;
return 0;
}

1155
as/parse.c Normal file

File diff suppressed because it is too large Load Diff

11
as/parse.h Normal file
View File

@ -0,0 +1,11 @@
/*
* parse.h
*/
#ifndef PARSE_H
#define PARSE_H
#include "stmt.h"
int parse_stmt(struct stmt *stmt);
#endif /* PARSE_H */

270
as/pass1.c Normal file
View File

@ -0,0 +1,270 @@
/*
* pass1.c
*/
#include <stdio.h>
#include <stdlib.h>
#include "emalloc.h"
#include "parse.h"
#include "pass1.h"
#include "scan.h"
#include "section.h"
struct section_and_subsection {
struct section *section;
struct subsection *subsection;
};
struct current_and_previous_sections {
struct section_and_subsection cursec;
struct section_and_subsection prevsec;
};
struct sections_stack_element {
struct current_and_previous_sections sects;
struct sections_stack_element *next;
};
struct pass1_state {
struct current_and_previous_sections sects;
struct sections_stack_element *sects_stack;
};
static int pass1_s_popsection(struct pass1_state *state)
{
struct sections_stack_element *top;
top = state->sects_stack;
if (top == NULL) {
fprintf(stderr, "as: %s, line %u: .popsection with no previous .pushsection\n", scan_filename, scan_linenr);
return -1;
}
state->sects = top->sects;
state->sects_stack = top->next;
free(top);
return 0;
}
static int pass1_s_previous(struct pass1_state *state)
{
struct section_and_subsection prevsec;
prevsec = state->sects.prevsec;
if (prevsec.section == NULL) {
fprintf(stderr, "as: %s, line %u: .previous with no previous .section\n", scan_filename, scan_linenr);
return -1;
}
state->sects.prevsec = state->sects.cursec;
state->sects.cursec = prevsec;
return 0;
}
static int pass1_s_section(struct pass1_state *state, struct stmt *stmt, int push)
{
struct section *section;
struct subsection *subsection;
int subsectnr;
section = section_enter(stmt->u.s_section.name);
if (stmt->u.s_section.sh_type != 0) {
if (section->e_shdr.sh_type == 0)
section->e_shdr.sh_type = stmt->u.s_section.sh_type;
else if (section->e_shdr.sh_type != stmt->u.s_section.sh_type) {
fprintf(stderr, "as: %s, line %u: section type mismatch\n", scan_filename, scan_linenr);
return -1;
}
}
section->e_shdr.sh_flags |= stmt->u.s_section.sh_flags;
if (stmt->u.s_section.sh_entsize != NULL) {
pdp10_uint36_t offset;
if (eval_abs_verbose(stmt->u.s_section.sh_entsize, &offset) < 0)
return -1;
if (section->e_shdr.sh_entsize == 0)
section->e_shdr.sh_entsize = offset;
else if (section->e_shdr.sh_entsize != offset) {
fprintf(stderr, "as: %s, line %u: section <entsize> mismatch\n", scan_filename, scan_linenr);
return -1;
}
}
if (stmt->u.s_section.groupname != NULL) {
if (section->groupname == NULL)
section->groupname = stmt->u.s_section.groupname;
else if (stmt->u.s_section.groupname != section->groupname) {
fprintf(stderr, "as: %s, line %u: section <groupname> mismatch\n", scan_filename, scan_linenr);
return -1;
}
}
if (stmt->u.s_section.linkage != NULL) {
if (section->linkage == NULL)
section->linkage = stmt->u.s_section.linkage;
else if (stmt->u.s_section.linkage != section->linkage) {
fprintf(stderr, "as: %s, line %u: section <linkage> mismatch\n", scan_filename, scan_linenr);
return -1;
}
}
if (push && stmt->u.s_section.subsectnr != NULL) {
pdp10_uint36_t offset;
if (eval_abs_verbose(stmt->u.s_section.subsectnr, &offset) < 0)
return -1;
subsectnr = offset;
} else
subsectnr = 0;
subsection = subsection_enter(section, subsectnr);
if (push) {
struct sections_stack_element *top;
top = emalloc(sizeof *top);
top->sects = state->sects;
top->next = state->sects_stack;
state->sects_stack = top;
}
state->sects.prevsec = state->sects.cursec;
state->sects.cursec.section = section;
state->sects.cursec.subsection = subsection;
return 0;
}
static int pass1_s_subsection(struct pass1_state *state, struct stmt *stmt)
{
pdp10_uint36_t offset;
struct subsection *subsection;
if (eval_abs_verbose(stmt->u.s_subsection.expr, &offset) < 0)
return -1;
subsection = subsection_enter(state->sects.cursec.section, (int)(pdp10_int36_t)offset);
state->sects.prevsec = state->sects.cursec;
state->sects.cursec.subsection = subsection;
return 0;
}
static int pass1_interpret(struct pass1_state *state, struct stmt *stmt)
{
switch (stmt->tag) {
/* in pass1 we have to deal with section-altering directives */
case S_POPSECTION:
return pass1_s_popsection(state);
case S_PREVIOUS:
return pass1_s_previous(state);
case S_PUSHSECTION:
return pass1_s_section(state, stmt, 1);
case S_SECTION:
return pass1_s_section(state, stmt, 0);
case S_SUBSECTION:
return pass1_s_subsection(state, stmt);
/* remaining directives, and the non-directives, enter data
into sections or manipulate symbols; delay these for pass2 */
XXX;
/* XXX: wrong, symbol values, even section-relative, are needed as soon
as possible, so all statements must be interpreted right away */
case S_ALIGN:
case S_ASCII:
case S_ASCIZ:
case S_BYTE:
case S_COMM:
case S_FILE:
case S_GLOBL:
case S_HIDDEN:
case S_IDENT:
case S_INTERNAL:
case S_LOCAL:
case S_LONG:
case S_ORG:
case S_PROTECTED:
case S_SET:
case S_SHORT:
case S_SIZE:
case S_SYMVER:
case S_TYPE:
case S_WEAK:
case S_WEAKREF:
case S_LABEL:
case S_INSN:
{
struct stmt *stmt2;
stmt2 = arrlst_append(state->sects.cursec.subsection->stmts);
/* XXX: error check */
*stmt2 = *stmt;
return 0;
}
default:
fprintf(stderr, "as: %s, line %u: %s(): unknown stmt tag %u\n", scan_filename, scan_linenr, __FUNCTION__, stmt->tag);
return -1;
}
}
static void pass1_init_state(struct pass1_state *state)
{
struct section *text;
text = section_enter(strtab_enter(".text"));
text->e_shdr.sh_type = SHT_PROGBITS;
text->e_shdr.sh_flags = SHF_ALLOC | SHF_EXECINSTR;
text->e_shdr.sh_addralign = 4; /* XXX: PDP10-specific */
state->sects.cursec.section = text;
state->sects.cursec.subsection = subsection_enter(text, 0);
state->sects.prevsec.section = NULL;
state->sects.prevsec.subsection = NULL;
state->sects_stack = NULL;
}
static void pass1_fini_state(struct pass1_state *state)
{
struct sections_stack_element *here, *next;
here = state->sects_stack;
while (here != NULL) {
next = here->next;
free(here);
here = next;
}
}
int pass1(const char *filename)
{
struct pass1_state state;
struct stmt stmt;
int status;
if (scan_freopen(filename) < 0)
return -1;
pass1_init_state(&state);
for (;;) {
status = parse_stmt(&stmt);
if (status < 0)
return -1;
else if (status == 0)
break;
else if (pass1_interpret(&state, &stmt) < 0)
return -1;
}
pass1_fini_state(&state);
return 0;
}

9
as/pass1.h Normal file
View File

@ -0,0 +1,9 @@
/*
* pass1.h
*/
#ifndef PASS1_H
#define PASS1_H
int pass1(const char *filename);
#endif /* PASS1_H */

636
as/scan.c Normal file
View File

@ -0,0 +1,636 @@
/*
* scan.c
*/
#include <errno.h>
#include <stdio.h> /* host stdio since we're dealing with plain text */
#include <stdlib.h>
#include <string.h>
#include "charbuf.h"
#include "scan.h"
#include "token.h"
/* XXX: we should have a pdp10-limits.h */
#define PDP10_UCHAR_MAX PDP10_UINT9_MAX
const char *scan_filename = "<stdin>";
int scan_freopen(const char *filename)
{
if (filename != NULL) {
if (freopen(filename, "r", stdin) == NULL) {
fprintf(stderr, "as: Error opening %s: %s\n", filename, strerror(errno));
return -1;
}
scan_filename = filename;
}
return 0;
}
unsigned int scan_linenr;
static void scan_ungetc(int ch)
{
if (ch != EOF && ungetc(ch, stdin) == EOF)
perror("ungetc");
}
static int scan_getchar(void)
{
return fgetc(stdin);
}
static void badchar(int ch, const char *context)
{
char buf[7];
if (ch == EOF) {
buf[0] = '<';
buf[1] = 'E';
buf[2] = 'O';
buf[3] = 'F';
buf[4] = '>';
buf[5] = '\0';
} else if (' ' <= ch && ch <= '~') {
buf[0] = '\'';
buf[1] = ch;
buf[2] = '\'';
buf[3] = '\0';
} else {
buf[0] = '\'';
buf[1] = '\\';
buf[2] = '0' + ((ch >> 6) & 3);
buf[3] = '0' + ((ch >> 3) & 7);
buf[4] = '0' + (ch & 7);
buf[5] = '\'';
buf[6] = '\0';
}
fprintf(stderr, "as: %s, line %u: invalid character %s %s\n", scan_filename, scan_linenr, buf, context);
}
static int is_decimal_digit(char ch)
{
return '0' <= ch && ch <= '9';
}
static int is_octal_digit(char ch)
{
return '0' <= ch && ch <= '7';
}
static unsigned int get_chval(int ch)
{
if ('0' <= ch && ch <= '9')
return ch - '0';
if ('A' <= ch && ch <= 'F')
return ch - ('A' - 10);
if ('a' <= ch && ch <= 'f')
return ch - ('a' - 10);
return -1U;
}
static int do_escape(void)
{
int ch;
ch = scan_getchar();
switch (ch) {
case 'n':
return '\n';
case 't':
return '\t';
case 'f':
return '\f';
case 'r':
return '\r';
case 'b':
return '\b';
case '\\':
return ch;
case '\'':
return ch;
case '"':
return ch;
case 'x':
case 'X':
{
unsigned int chval;
ch = scan_getchar();
chval = get_chval(ch);
if (chval <= 15) {
unsigned int val = 0;
do {
val = val * 16 + chval;
ch = scan_getchar();
chval = get_chval(ch);
} while (chval <= 15);
scan_ungetc(ch);
if (val > PDP10_UCHAR_MAX)
fprintf(stderr, "as: %s, line %u: truncating escaped value from %#x to %#x\n", scan_filename, scan_linenr, val, val & PDP10_UCHAR_MAX);
return val & PDP10_UCHAR_MAX;
}
break;
}
case EOF:
break;
default:
if (is_octal_digit(ch)) {
unsigned int val = ch - '0';
ch = scan_getchar();
if (is_octal_digit(ch)) {
val = val * 8 + (ch - '0');
ch = scan_getchar();
if (is_octal_digit(ch))
val = val * 8 + (ch - '0');
else
scan_ungetc(ch);
} else
scan_ungetc(ch);
if (val > PDP10_UCHAR_MAX)
fprintf(stderr, "as: %s, line %u: truncating escaped value from %#x to %#x\n", scan_filename, scan_linenr, val, val & PDP10_UCHAR_MAX);
return val & PDP10_UCHAR_MAX;
}
break;
}
badchar(ch, "in \\ character escape sequence");
return ch;
}
static enum token do_char(union token_attribute *token_attr)
{
int ch;
ch = scan_getchar();
switch (ch) {
case '\\':
ch = do_escape();
if (ch == EOF)
return T_ERROR;
break;
case '\'':
case EOF:
badchar(ch, "in character literal");
return T_ERROR;
default:
break;
}
token_attr->uint = ch;
ch = scan_getchar();
if (ch != '\'') {
badchar(ch, "after character literal");
return T_ERROR;
}
return T_UINTEGER;
}
/* XXX: strings should be sequences of uint9_t not sequences of unsigned char */
static enum token do_string(union token_attribute *token_attr, struct charbuf *charbuf)
{
int ch;
for (;;) {
ch = scan_getchar();
switch (ch) {
case '"':
token_attr->text = charbuf_string(charbuf);
return T_STRING;
case '\\':
ch = do_escape();
if (ch == EOF)
return T_ERROR;
break;
case EOF:
case '\n':
badchar(ch, "in string literal");
if (ch == '\n')
++scan_linenr;
return T_ERROR;
default:
break;
}
charbuf_append(charbuf, ch);
}
}
static const struct {
enum token token;
const char *name;
} directives[] = {
{ T_DOT_ALIGN, ".align" },
{ T_DOT_ASCII, ".ascii" },
{ T_DOT_ASCIZ, ".asciz" },
{ T_DOT_BALIGN, ".balign" },
{ T_DOT_BSS, ".bss" },
{ T_DOT_BYTE, ".byte" },
{ T_DOT_COMM, ".comm" },
{ T_DOT_DATA, ".data" },
{ T_DOT_FILE, ".file" },
{ T_DOT_GLOBL, ".globl" },
{ T_DOT_HIDDEN, ".hidden" },
{ T_DOT_IDENT, ".ident" },
{ T_DOT_INTERNAL, ".internal" },
{ T_DOT_LOCAL, ".local" },
{ T_DOT_LONG, ".long" },
{ T_DOT_ORG, ".org" },
{ T_DOT_P2ALIGN, ".p2align" },
{ T_DOT_POPSECTION, ".popsection" },
{ T_DOT_PREVIOUS, ".previous" },
{ T_DOT_PROTECTED, ".protected" },
{ T_DOT_PUSHSECTION, ".pushsection" },
{ T_DOT_RODATA, ".rodata" },
{ T_DOT_SECTION, ".section" },
{ T_DOT_SET, ".set" },
{ T_DOT_SHORT, ".short" },
{ T_DOT_SIZE, ".size" },
{ T_DOT_SUBSECTION, ".subsection" },
{ T_DOT_SYMVER, ".symver" },
{ T_DOT_TEXT, ".text" },
{ T_DOT_TYPE, ".type" },
{ T_DOT_WEAK, ".weak" },
{ T_DOT_WEAKREF, ".weakref" },
};
static enum token mk_symbol(union token_attribute *token_attr, const struct charbuf *charbuf)
{
if (charbuf->head.buf[0] == '.') { /* check for <.directive> */
unsigned int low, high;
low = 0;
high = sizeof directives / sizeof directives[0];
while (low < high) {
unsigned int middle;
int cmp;
middle = (low + high) / 2;
cmp = charbuf_strcmp(charbuf, directives[middle].name);
if (cmp < 0)
high = middle;
else if (cmp > 0)
low = middle + 1;
else
return directives[middle].token;
}
} else if (charbuf->head.buf[0] == '$'
&& charbuf->head.next == NULL) { /* check for $<reg> */
if (charbuf->pos == 2
&& is_decimal_digit(charbuf->head.buf[1])) {
token_attr->uint = charbuf->head.buf[1] - '0';
return T_REGISTER;
} else if (charbuf->pos == 3) {
if (is_decimal_digit(charbuf->head.buf[1])
&& is_decimal_digit(charbuf->head.buf[2])) {
unsigned int val;
val = (charbuf->head.buf[1] - '0') * 10 + (charbuf->head.buf[2] - '0');
if (val < 16) {
token_attr->uint = val;
return T_REGISTER;
}
} else if (charbuf->head.buf[1] == 's'
&& charbuf->head.buf[2] == 'p') {
token_attr->uint = 15;
return T_REGISTER;
}
}
}
token_attr->text = charbuf_string(charbuf);
return T_SYMBOL;
}
static int is_symbol_internal_char(int ch)
{
return
('A' <= ch && ch <= 'Z')
|| ('a' <= ch && ch <= 'z')
|| ('0' <= ch && ch <= '9')
|| ch == '_'
|| ch == '$'
|| ch == '.';
}
static enum token do_symbol(union token_attribute *token_attr, int ch, struct charbuf *charbuf)
{
do {
charbuf_append(charbuf, ch);
ch = scan_getchar();
} while (is_symbol_internal_char(ch));
scan_ungetc(ch);
return mk_symbol(token_attr, charbuf);
}
static enum token do_number(union token_attribute *token_attr, int ch)
{
unsigned int base, chval;
pdp10_uint36_t numval;
base = (ch == '0') ? 8 : 10;
numval = ch - '0';
ch = scan_getchar();
/* handle 0x<first hexdig> */
if (ch == 'x' || ch == 'X') {
base = 16;
/* must have at least one hex digit after 0x */
ch = scan_getchar();
chval = get_chval(ch);
if (chval <= 15)
numval = chval;
else {
badchar(ch, "after 0x in hexadecimal literal");
return T_ERROR;
}
ch = scan_getchar();
}
/* the number is non-empty, consume and accumulate trailing
characters as long as they are valid in the base */
for (;;) {
chval = get_chval(ch);
if (chval >= base)
break;
numval = numval * base + chval;
ch = scan_getchar();
}
/* check for <local label>{b,f} */
if (base <= 10 && (ch == 'b' || ch == 'f')) {
/* represent the local label + direction in sign-magnitude with
the sign in the least significant bit; using sign-magnitude
allows to distinguish 0f from 0b (i.e., +0 from -0); storing
the sign in the least significant bit makes us independent of
word size */
token_attr->uint = (numval << 1) | (ch == 'f' ? 1 : 0);
return T_LOCAL_LABEL;
}
/* plain integer literal */
scan_ungetc(ch);
token_attr->uint = numval;
return T_UINTEGER;
}
static enum token do_eq(void)
{
int ch;
ch = scan_getchar();
switch (ch) {
case '=':
return T_EQEQ;
default:
scan_ungetc(ch);
return T_EQ;
}
}
static enum token do_ampersand(void)
{
int ch;
ch = scan_getchar();
switch (ch) {
case '&':
return T_ANDAND;
default:
scan_ungetc(ch);
return T_AND;
}
}
static enum token do_bar(void)
{
int ch;
ch = scan_getchar();
switch (ch) {
case '|':
return T_OROR;
default:
scan_ungetc(ch);
return T_OR;
}
}
static enum token do_gt(void)
{
int ch;
ch = scan_getchar();
switch (ch) {
case '>':
return T_RSHIFT;
case '=':
return T_GE;
default:
scan_ungetc(ch);
return T_GT;
}
}
static enum token do_lt(void)
{
int ch;
ch = scan_getchar();
switch (ch) {
case '<':
return T_LSHIFT;
case '=':
return T_LE;
case '>': /* <> is the same as != */
return T_NEQ;
default:
scan_ungetc(ch);
return T_LT;
}
}
static enum token do_c_comment(void)
{
int ch;
for (;;) {
ch = scan_getchar();
switch (ch) {
case EOF:
badchar(ch, "in /**/-style comment");
return T_ERROR;
case '*':
for (;;) {
ch = scan_getchar();
switch (ch) {
case '*':
continue;
case '/':
return T_EOF; /* fake token for a C comment */
case EOF:
badchar(ch, "in /**/-style comment");
return T_ERROR;
case '\n':
++scan_linenr;
/*FALLTHROUGH*/
default:
break;
}
break;
}
continue;
case '\n':
++scan_linenr;
/*FALLTHROUGH*/
default:
continue;
}
}
}
static enum token do_slash(void)
{
int ch;
ch = scan_getchar();
switch (ch) {
case '*':
return do_c_comment();
default:
scan_ungetc(ch);
return T_DIV;
}
}
static enum token do_bang(void)
{
int ch;
ch = scan_getchar();
switch (ch) {
case '=':
return T_NEQ;
default:
scan_ungetc(ch);
return T_BANG;
}
}
static int do_line_comment(void)
{
int ch;
for (;;) {
ch = scan_getchar();
switch (ch) {
case '\n':
++scan_linenr;
return 0;
case EOF:
badchar(ch, "in line comment");
return -1;
default:
continue;
}
}
}
static enum token do_scan(union token_attribute *token_attr, struct charbuf *charbuf)
{
int ch;
ch = scan_getchar();
for (;; ch = scan_getchar()) {
switch (ch) {
case ' ':
case '\t':
case '\r':
case '\f':
continue;
case '\n':
++scan_linenr;
return T_NEWLINE;
case '#':
if (do_line_comment() != 0)
return T_ERROR;
return T_NEWLINE;
case ';':
return T_NEWLINE;
case EOF:
return T_EOF;
case '@':
return T_AT;
case ':':
return T_COLON;
case ',':
return T_COMMA;
case '(':
return T_LPAREN;
case ')':
return T_RPAREN;
case '~':
return T_TILDE;
case '*':
return T_MUL;
case '/': /* "/""*", "/" */
switch (do_slash()) {
case T_DIV:
return T_DIV;
case T_EOF: /* fake token for a C comment */
continue;
default: /* error, eof in comment */
return T_ERROR;
}
case '%':
return T_REM;
case '<': /* <<, <=, < */
return do_lt();
case '>': /* >>, >=, > */
return do_gt();
case '|': /* ||, | */
return do_bar();
case '&': /* &&, & */
return do_ampersand();
case '^':
return T_CARET;
case '!': /* !=, ! */
return do_bang();
case '+':
return T_PLUS;
case '-':
return T_MINUS;
case '=': /* ==, = */
return do_eq();
case '"':
return do_string(token_attr, charbuf);
case '\'':
return do_char(token_attr);
case '.':
/* Dot may start a floating point literal, but tests show that
gcc always outputs floating point values as integer literals,
so we shouldn't have to support floating point literals at all. */
case '$':
case '_':
return do_symbol(token_attr, ch, charbuf);
default:
if ('0' <= ch && ch <= '9') /* number or <decimal>{b,f} */
return do_number(token_attr, ch);
if (('A' <= ch && ch <= 'Z') ||
('a' <= ch && ch <= 'z'))
return do_symbol(token_attr, ch, charbuf);
}
badchar(ch, "");
return T_ERROR;
}
}
enum token scan(union token_attribute *token_attr)
{
struct charbuf charbuf;
enum token token;
charbuf_init(&charbuf);
token = do_scan(token_attr, &charbuf);
charbuf_fini(&charbuf);
return token;
}

15
as/scan.h Normal file
View File

@ -0,0 +1,15 @@
/*
* scan.h
*/
#ifndef SCAN_H
#define SCAN_H
#include "token.h"
const char *scan_filename;
int scan_freopen(const char *filename);
unsigned int scan_linenr;
enum token scan(union token_attribute *token_attr);
#endif /* SCAN_H */

59
as/section.c Normal file
View File

@ -0,0 +1,59 @@
/*
* section.c
*/
#include <string.h>
#include "emalloc.h"
#include "htab.h"
#include "section.h"
static struct section *section_from_hnode(const struct hnode *hnode)
{
/* hnode is first in section, so no need to mess with offsetof() */
return (struct section*)hnode;
}
static struct htab section_htab;
void section_init(void)
{
htab_init(&section_htab, 8, NULL);
}
struct section *section_enter(const struct strnode *strnode)
{
struct section *section;
section = section_from_hnode(htab_lookup(&section_htab, (uintptr_t)strnode, NULL));
if (section == NULL) {
section = emalloc(sizeof *section);
memset(section, '\0', sizeof *section);
section->hnode.hval = (uintptr_t)strnode;
htab_init(&section->subsects, 4, NULL);
htab_insert(&section_htab, &section->hnode);
}
return section;
}
static struct subsection *subsection_from_hnode(const struct hnode *hnode)
{
/* hnode is first in subsection, so no need to mess with offsetof() */
return (struct subsection*)hnode;
}
struct subsection *subsection_enter(struct section *section, int subsectnr)
{
struct subsection *subsection;
subsection = subsection_from_hnode(htab_lookup(&section->subsects, (uintptr_t)subsectnr, NULL));
if (subsection == NULL) {
subsection = emalloc(sizeof *subsection);
subsection->hnode.hval = (uintptr_t)subsectnr;
subsection->stmts = arrlst_alloc(sizeof(struct stmt));
htab_insert(&section->subsects, &subsection->hnode);
}
return subsection;
}

35
as/section.h Normal file
View File

@ -0,0 +1,35 @@
/*
* section.h
*/
#ifndef SECTION_H
#define SECTION_H
#include "pdp10-elf36.h"
#include "arrlst.h"
#include "htab.h"
#include "stmt.h"
#include "strtab.h"
struct subsection {
struct hnode hnode; /* hnode.hval == subsect nr */
struct arrlst *stmts;
};
struct section {
struct hnode hnode; /* hnode.hval == struct strnode* */
Elf36_Shdr e_shdr;
struct htab subsects;
const struct strnode *groupname;
const struct strnode *linkage;
unsigned int dot; /* Elf36_Off? */
};
#define SECTION_ABS ((struct section*)0)
#define SECTION_UNDEF ((struct section*)1)
void section_init(void);
struct section *section_enter(const struct strnode *strnode);
struct subsection *subsection_enter(struct section *section, int subsectnr);
#endif /* SECTION_H */

122
as/stmt.h Normal file
View File

@ -0,0 +1,122 @@
/*
* stmt.h
*/
#ifndef STMT_H
#define STMT_H
#include "pdp10-elf36.h"
#include "expr.h"
#include "strtab.h"
struct expr_list {
struct expr *expr;
struct expr_list *next;
};
struct string_list {
const struct strnode *string;
struct string_list *next;
};
enum stmt_tag {
/* directives */
S_ALIGN, /* .align, .balign, and .p2align map to this */
S_ASCII,
S_ASCIZ,
S_BYTE,
S_COMM,
S_FILE,
S_GLOBL,
S_HIDDEN,
S_IDENT,
S_INTERNAL,
S_LOCAL,
S_LONG,
S_ORG,
S_POPSECTION, /* no attribute */
S_PREVIOUS, /* no attribute */
S_PROTECTED,
S_PUSHSECTION,
S_SECTION, /* .bss, .data, .rodata, and .text also map to this */
S_SET,
S_SHORT,
S_SIZE,
S_SUBSECTION,
S_SYMVER,
S_TYPE,
S_WEAK,
S_WEAKREF,
/* non-directives */
S_LABEL,
S_INSN,
};
struct stmt {
enum stmt_tag tag;
union {
struct {
unsigned char flags; /* p2 vs b, none/w/l */
struct expr *balign;
struct expr *fill;
struct expr *maxskip;
} s_align;
struct {
struct string_list *list;
} s_string_list;
struct {
struct expr_list *list;
} s_expr_list;
struct {
const struct strnode *name;
struct expr *length;
struct expr *balign;
} s_comm;
struct {
const struct strnode *string;
} s_string;
struct {
struct expr *newlc;
struct expr *fill;
} s_org;
struct {
const struct strnode *name;
struct expr *subsectnr;
Elf36_Word sh_flags;
Elf36_Word sh_type;
struct expr *sh_entsize;
const struct strnode *groupname;
const struct strnode *linkage;
} s_section;
struct {
const struct strnode *name;
struct expr *expr;
} s_setsize;
struct {
struct expr *expr;
} s_subsection;
struct {
const struct strnode *name1;
const struct strnode *name2;
const struct strnode *name3;
unsigned char nrats; /* 1, 2, or 3 */
} s_symver;
struct {
const struct strnode *name;
unsigned char st_type;
} s_type;
struct {
const struct strnode *alias;
const struct strnode *target;
} s_weakref;
struct {
const struct strnode *name;
unsigned int accumulator;
int at;
struct expr *expr;
unsigned int indexreg;
} s_insn;
} u;
};
#endif /* STMT_H */

65
as/strtab.c Normal file
View File

@ -0,0 +1,65 @@
/*
* strtab.c
*/
#include <string.h>
#include "emalloc.h"
#include "htab.h"
#include "strtab.h"
static struct strnode *strnode_from_hnode(const struct hnode *hnode)
{
/* hnode is first in strnode, so no need to mess with offsetof() */
return (struct strnode*)hnode;
}
static int strtab_cmpfn(const struct hnode *hnode, const void *data)
{
const struct strnode *strnode = strnode_from_hnode(hnode);
const char *string = data;
return strcmp(strnode->string, string);
}
static struct htab strtab_htab;
void strtab_init(void)
{
htab_init(&strtab_htab, 64, strtab_cmpfn);
}
static uintptr_t strtab_hash(const char *string)
{
const unsigned char *s;
uintptr_t h;
unsigned char c;
s = (const unsigned char*)string;
h = 0;
for (;;) {
c = *s++;
if (c == '\0')
break;
h = (h << 5) + h + c;
}
return h;
}
const struct strnode *strtab_enter(const char *string)
{
uintptr_t hval;
struct strnode *strnode;
hval = strtab_hash(string);
strnode = strnode_from_hnode(htab_lookup(&strtab_htab, hval, string));
if (strnode == NULL) {
strnode = emalloc(offsetof(struct strnode, string) + strlen(string) + 1);
strnode->hnode.hval = hval;
strcpy(strnode->string, string);
htab_insert(&strtab_htab, &strnode->hnode);
}
return strnode;
}

17
as/strtab.h Normal file
View File

@ -0,0 +1,17 @@
/*
* strtab.h
*/
#ifndef STRTAB_H
#define STRTAB_H
#include "htab.h"
struct strnode {
struct hnode hnode;
char string[];
};
void strtab_init(void);
const struct strnode *strtab_enter(const char *string);
#endif /* STRTAB */

54
as/token.c Normal file
View File

@ -0,0 +1,54 @@
/*
* token.c
*/
#include <stdio.h>
#include "pdp10-inttypes.h"
#include "token.h"
enum {
FMT_NONE = 0,
FMT_UINT = 1,
FMT_SYMBOL = 2,
FMT_STRING = 3,
};
struct token_info {
char print_name[15];
unsigned char attribute_fmt;
};
static const struct token_info token_info[] = {
#define TOKEN(T,P,F) { P, F },
#include "token.def"
#undef TOKEN
};
void token_print(FILE *fp, enum token token, const union token_attribute *token_attr)
{
const struct token_info *ti;
if (token >= sizeof token_info / sizeof token_info[0]) {
fprintf(fp, "<invalid token %u>", token);
return;
}
ti = &token_info[token];
fprintf(fp, "%.*s", (int) sizeof ti->print_name, ti->print_name);
if (!token_attr)
return;
switch (ti->attribute_fmt) {
case FMT_UINT:
fprintf(fp, " [%" PDP10_PRIu36 "u]", token_attr->uint);
break;
case FMT_SYMBOL:
fprintf(fp, " [%s]", token_attr->text);
break;
case FMT_STRING:
fprintf(fp, " [\"%s\"]", token_attr->text);
break;
default:
break;
}
}

77
as/token.def Normal file
View File

@ -0,0 +1,77 @@
/*
* token.def
*
* TOKEN(T_<name>, <print name>, <attribute fmt>)
*/
/* directives */
TOKEN(T_DOT_ALIGN, ".align", FMT_NONE)
TOKEN(T_DOT_ASCII, ".ascii", FMT_NONE)
TOKEN(T_DOT_ASCIZ, ".asciz", FMT_NONE)
TOKEN(T_DOT_BALIGN, ".balign", FMT_NONE)
TOKEN(T_DOT_BSS, ".bss", FMT_NONE)
TOKEN(T_DOT_BYTE, ".byte", FMT_NONE)
TOKEN(T_DOT_COMM, ".comm", FMT_NONE)
TOKEN(T_DOT_DATA, ".data", FMT_NONE)
TOKEN(T_DOT_FILE, ".file", FMT_NONE)
TOKEN(T_DOT_GLOBL, ".globl", FMT_NONE)
TOKEN(T_DOT_HIDDEN, ".hidden", FMT_NONE)
TOKEN(T_DOT_IDENT, ".ident", FMT_NONE)
TOKEN(T_DOT_INTERNAL, ".internal", FMT_NONE)
TOKEN(T_DOT_LOCAL, ".local", FMT_NONE)
TOKEN(T_DOT_LONG, ".long", FMT_NONE)
TOKEN(T_DOT_ORG, ".org", FMT_NONE)
TOKEN(T_DOT_P2ALIGN, ".p2align", FMT_NONE)
TOKEN(T_DOT_POPSECTION, ".popsection", FMT_NONE)
TOKEN(T_DOT_PREVIOUS, ".previous", FMT_NONE)
TOKEN(T_DOT_PROTECTED, ".protected", FMT_NONE)
TOKEN(T_DOT_PUSHSECTION, ".pushsection", FMT_NONE)
TOKEN(T_DOT_RODATA, ".rodata", FMT_NONE)
TOKEN(T_DOT_SECTION, ".section", FMT_NONE)
TOKEN(T_DOT_SET, ".set", FMT_NONE)
TOKEN(T_DOT_SHORT, ".short", FMT_NONE)
TOKEN(T_DOT_SIZE, ".size", FMT_NONE)
TOKEN(T_DOT_SUBSECTION, ".subsection", FMT_NONE)
TOKEN(T_DOT_SYMVER, ".symver", FMT_NONE)
TOKEN(T_DOT_TEXT, ".text", FMT_NONE)
TOKEN(T_DOT_TYPE, ".type", FMT_NONE)
TOKEN(T_DOT_WEAK, ".weak", FMT_NONE)
TOKEN(T_DOT_WEAKREF, ".weakref", FMT_NONE)
/* other symbols */
TOKEN(T_REGISTER, "<register>", FMT_UINT)
TOKEN(T_SYMBOL, "<symbol>", FMT_SYMBOL)
TOKEN(T_LOCAL_LABEL, "<local label>", FMT_UINT) /* 1f, 2b */
TOKEN(T_AT, "@", FMT_NONE)
TOKEN(T_COLON, ":", FMT_NONE)
/* literals */
TOKEN(T_UINTEGER, "<integer>", FMT_UINT)
TOKEN(T_STRING, "<string>", FMT_STRING)
/* operators, separators */
TOKEN(T_COMMA, ",", FMT_NONE)
TOKEN(T_LPAREN, "(", FMT_NONE)
TOKEN(T_RPAREN, ")", FMT_NONE)
TOKEN(T_TILDE, "~", FMT_NONE)
TOKEN(T_MUL, "*", FMT_NONE)
TOKEN(T_DIV, "/", FMT_NONE)
TOKEN(T_REM, "%", FMT_NONE)
TOKEN(T_LSHIFT, "<<", FMT_NONE)
TOKEN(T_RSHIFT, ">>", FMT_NONE)
TOKEN(T_OR, "|", FMT_NONE)
TOKEN(T_AND, "&", FMT_NONE)
TOKEN(T_CARET, "^", FMT_NONE)
TOKEN(T_BANG, "!", FMT_NONE)
TOKEN(T_PLUS, "+", FMT_NONE)
TOKEN(T_MINUS, "-", FMT_NONE)
TOKEN(T_EQ, "=", FMT_NONE)
TOKEN(T_EQEQ, "==", FMT_NONE)
TOKEN(T_NEQ, "!=", FMT_NONE)
TOKEN(T_LT, "<", FMT_NONE)
TOKEN(T_GT, ">", FMT_NONE)
TOKEN(T_GE, ">=", FMT_NONE)
TOKEN(T_LE, "<=", FMT_NONE)
TOKEN(T_ANDAND, "&&", FMT_NONE)
TOKEN(T_OROR, "||", FMT_NONE)
/* misc */
TOKEN(T_NEWLINE, "<newline>", FMT_NONE)
TOKEN(T_EOF, "<eof>", FMT_NONE)
TOKEN(T_ERROR, "<error>", FMT_NONE)

22
as/token.h Normal file
View File

@ -0,0 +1,22 @@
/*
* token.h
*/
#ifndef TOKEN_H
#define TOKEN_H
#include "pdp10-stdint.h"
enum token {
#define TOKEN(T,P,F) T,
#include "token.def"
#undef TOKEN
};
union token_attribute {
const char *text; /* symbol, string */
pdp10_uint36_t uint; /* uinteger */
};
void token_print(FILE *fp, enum token token, const union token_attribute *token_attr);
#endif /* TOKEN_H */

134
include/pdp10-arith.h Normal file
View File

@ -0,0 +1,134 @@
/*
* pdp10-arith.h
*
* Provide functions for performing arithmetic operations on PDP10 integer types.
* Currently only 36-bit signed operations are supported.
*/
#ifndef PDP10_ARITH_H
#define PDP10_ARITH_H
#include "pdp10-stdint.h"
/* Zero-extend a pdp10_{u,}int36_t to the full width of its representation type.
* Use this to prepare operands before unsigned operations, or to correct results
* after signed operations.
*/
static inline pdp10_uint36_t pdp10_zext_uint36(pdp10_uint36_t x)
{
return x & PDP10_UINT36_MAX;
}
/* Sign-extend a pdp10_int36_t to the full width of its representation type.
* Use this to prepare operands before signed operations.
*
* Based on the following trick for sign-extending an octet x: ((x & 0xff) ^ 0x80) - 0x80,
* c.f. <http://sourceware.org/ml/binutils/2001-05/msg00093.html>.
*/
static inline pdp10_int36_t pdp10_sext_int36(pdp10_uint36_t x)
{
const pdp10_uint36_t PDP10_UINT36_SBIT = ~(PDP10_UINT36_MAX >> 1) & PDP10_UINT36_MAX;
return ((x & PDP10_UINT36_MAX) ^ PDP10_UINT36_SBIT) - PDP10_UINT36_SBIT;
}
static inline pdp10_uint36_t pdp10_neg_int36(pdp10_uint36_t x)
{
return pdp10_zext_uint36(-pdp10_sext_int36(x));
}
static inline pdp10_uint36_t pdp10_not_int36(pdp10_uint36_t x)
{
return pdp10_zext_uint36(~pdp10_sext_int36(x));
}
static inline pdp10_uint36_t pdp10_add_int36(pdp10_uint36_t x, pdp10_uint36_t y)
{
return pdp10_zext_uint36(pdp10_sext_int36(x) + pdp10_sext_int36(y));
}
static inline pdp10_uint36_t pdp10_sub_int36(pdp10_uint36_t x, pdp10_uint36_t y)
{
return pdp10_zext_uint36(pdp10_sext_int36(x) - pdp10_sext_int36(y));
}
static inline pdp10_uint36_t pdp10_mul_int36(pdp10_uint36_t x, pdp10_uint36_t y)
{
return pdp10_zext_uint36(pdp10_sext_int36(x) * pdp10_sext_int36(y));
}
static inline pdp10_uint36_t pdp10_div_int36(pdp10_uint36_t x, pdp10_uint36_t y)
{
return pdp10_zext_uint36(pdp10_sext_int36(x) / pdp10_sext_int36(y));
}
static inline pdp10_uint36_t pdp10_rem_int36(pdp10_uint36_t x, pdp10_uint36_t y)
{
return pdp10_zext_uint36(pdp10_sext_int36(x) % pdp10_sext_int36(y));
}
static inline pdp10_uint36_t pdp10_lsl_int36(pdp10_uint36_t x, pdp10_uint36_t y)
{
return pdp10_zext_uint36(pdp10_zext_uint36(x) << pdp10_zext_uint36(y));
}
static inline pdp10_uint36_t pdp10_lsr_int36(pdp10_uint36_t x, pdp10_uint36_t y)
{
return pdp10_zext_uint36(pdp10_zext_uint36(x) >> pdp10_zext_uint36(y));
}
static inline pdp10_uint36_t pdp10_asr_int36(pdp10_uint36_t x, pdp10_uint36_t y)
{
return pdp10_zext_uint36(pdp10_sext_int36(x) >> pdp10_zext_uint36(y));
}
static inline pdp10_uint36_t pdp10_or_int36(pdp10_uint36_t x, pdp10_uint36_t y)
{
return (pdp10_zext_uint36(x) | pdp10_zext_uint36(y));
}
static inline pdp10_uint36_t pdp10_and_int36(pdp10_uint36_t x, pdp10_uint36_t y)
{
return (pdp10_zext_uint36(x) & pdp10_zext_uint36(y));
}
static inline pdp10_uint36_t pdp10_xor_int36(pdp10_uint36_t x, pdp10_uint36_t y)
{
return (pdp10_zext_uint36(x) ^ pdp10_zext_uint36(y));
}
static inline int pdp10_eq_int36(pdp10_uint36_t x, pdp10_uint36_t y)
{
return pdp10_zext_uint36(x) == pdp10_zext_uint36(y);
}
static inline int pdp10_ne_int36(pdp10_uint36_t x, pdp10_uint36_t y)
{
return pdp10_zext_uint36(x) != pdp10_zext_uint36(y);
}
static inline int pdp10_lt_int36(pdp10_uint36_t x, pdp10_uint36_t y)
{
return pdp10_sext_int36(x) < pdp10_sext_int36(y);
}
static inline int pdp10_gt_int36(pdp10_uint36_t x, pdp10_uint36_t y)
{
return pdp10_sext_int36(x) > pdp10_sext_int36(y);
}
static inline int pdp10_ge_int36(pdp10_uint36_t x, pdp10_uint36_t y)
{
return pdp10_sext_int36(x) >= pdp10_sext_int36(y);
}
static inline int pdp10_le_int36(pdp10_uint36_t x, pdp10_uint36_t y)
{
return pdp10_sext_int36(x) <= pdp10_sext_int36(y);
}
static inline int pdp10_nonzero_int36(pdp10_uint36_t x)
{
return pdp10_zext_uint36(x) != 0;
}
#endif /* PDP10_ARITH_H */

1235
include/pdp10-elf36.h Normal file

File diff suppressed because it is too large Load Diff

28
include/pdp10-extint.h Normal file
View File

@ -0,0 +1,28 @@
/*
* pdp10-extint.h
*
* Provide types and procedures for converting 18 and 36-bit integers
* to and from arrays of 9-bit bytes (nonets). Use these together with
* pdp10_fread() and pdp10_fwrite() to convert 18 and 36-bit integers
* between host-level and file-level binary representations.
*/
#ifndef PDP10_EXTINT_H
#define PDP10_EXTINT_H
#include "pdp10-stdint.h"
typedef struct {
pdp10_uint9_t x[2];
} pdp10_ext_uint18_t;
void pdp10_uint18_to_ext(pdp10_uint18_t val, pdp10_ext_uint18_t *ext);
pdp10_uint18_t pdp10_uint18_from_ext(const pdp10_ext_uint18_t *ext);
typedef struct {
pdp10_uint9_t x[4];
} pdp10_ext_uint36_t;
void pdp10_uint36_to_ext(pdp10_uint36_t val, pdp10_ext_uint36_t *ext);
pdp10_uint36_t pdp10_uint36_from_ext(const pdp10_ext_uint36_t *ext);
#endif /* PDP10_EXTINT_H */

38
include/pdp10-inttypes.h Normal file
View File

@ -0,0 +1,38 @@
/*
* pdp10-inttypes.h
*
* Provide format conversions for 18 and 36-bit integers.
* For 9-bit integers, pdp_uint9_t, just use the regular
* int-sized d/o/u/x formats.
*/
#ifndef PDP10_INTTYPES_H
#define PDP10_INTTYPES_H
#include <inttypes.h>
#include "pdp10-stdint.h"
#if defined(UINT18_MAX)
#define PDP10_PRId18 PRId18
#define PDP10_PRIo18 PRIo18
#define PDP10_PRIu18 PRIu18
#define PDP10_PRIx18 PRIx18
#else
#define PDP10_PRId18 PRId32
#define PDP10_PRIo18 PRIo32
#define PDP10_PRIu18 PRIu32
#define PDP10_PRIx18 PRIx32
#endif
#if defined(UINT36_MAX)
#define PDP10_PRId36 PRId36
#define PDP10_PRIo36 PRIo36
#define PDP10_PRIu36 PRIu36
#define PDP10_PRIx36 PRIx36
#else
#define PDP10_PRId36 PRId64
#define PDP10_PRIo36 PRIo64
#define PDP10_PRIu36 PRIu64
#define PDP10_PRIx36 PRIx64
#endif
#endif /* PDP10_INTTYPES_H */

71
include/pdp10-stdint.h Normal file
View File

@ -0,0 +1,71 @@
/*
* pdp10-stdint.h
*
* Provide stdint.h-like type names and macros for 9, 18, and 36-bit unsigned
* integer types.
*
* Standard uint<N>_t types must not contain any any extraneous bits, but that
* cannot be guaranteed for these 9, 18, and 36-bit types when they are embedded
* in larger 16, 32, and 64-bit host types. For arithmetic on these types, use
* the operations provided by pdp10-arith.h.
*
* Do not use these 18 or 36-bit types for file-level binary data structures,
* instead use the pdp10-extint.h and pdp10-stdio.h facilities to explicitly
* convert between file-level and host-level binary data structures.
*/
#ifndef PDP10_STDINT_H
#define PDP10_STDINT_H
#include <stdint.h>
#if defined(UINT9_MAX)
typedef uint9_t pdp10_uint9_t;
#define PDP10_UINT9_MAX UINT9_MAX
#define PDP10_UINT9_C(c) UINT9_C(c)
#else /* !UINT9_MAX */
typedef uint16_t pdp10_uint9_t;
#define PDP10_UINT9_MAX ((1U << 9) - 1)
#define PDP10_UINT9_C(c) c
#endif /* !UINT9_MAX */
#if defined(UINT18_MAX)
typedef uint18_t pdp10_uint18_t;
#define PDP10_UINT18_MAX UINT18_MAX
#define PDP10_UINT18_C(c) UINT18_C(c)
#else /* !UINT18_MAX */
typedef uint32_t pdp10_uint18_t;
#define PDP10_UINT18_MAX ((1UL << 18) - 1)
#define PDP10_UINT18_C(c) c ## U
#endif /* !UINT18_MAX */
#if defined(UINT36_MAX)
typedef uint36_t pdp10_uint36_t;
#define PDP10_UINT36_MAX UINT36_MAX
#define PDP10_UINT36_C(c) UINT36_C(c)
typedef int36_t pdp10_int36_t;
#define PDP10_INT36_MAX INT36_MAX
#define PDP10_INT36_C(c) INT36_C(c)
#else /* !UINT36_MAX */
typedef uint64_t pdp10_uint36_t;
#define PDP10_UINT36_MAX ((1ULL << 36) - 1)
#define PDP10_UINT36_C(c) c ## ULL
typedef int64_t pdp10_int36_t;
#define PDP10_INT36_MAX ((1LL << (36 - 1)) - 1)
#define PDP10_INT36_C(c) c ## LL
#endif /* !UINT36_MAX */
#endif /* PDP10_STDINT_H */

38
include/pdp10-stdio.h Normal file
View File

@ -0,0 +1,38 @@
/*
* pdp10-stdio.h
*
* Provide stdio.h-like interface for I/O to and from files with 9-bit logical bytes (nonets),
* represented by native files with 8-bit physical bytes (octets).
*/
#ifndef PDP10_STDIO_H
#define PDP10_STDIO_H
#include <stdint.h>
struct pdp10_file;
typedef struct pdp10_file PDP10_FILE;
/* append modes are not permitted */
PDP10_FILE *pdp10_fopen(const char *path, const char *mode);
int pdp10_fflush(PDP10_FILE *pdp10fp);
int pdp10_fclose(PDP10_FILE *pdp10fp);
int pdp10_fgetc(PDP10_FILE *pdp10fp); /* returns a nonet, [0-511], or EOF */
int pdp10_fputc(uint16_t nonet_ch, PDP10_FILE *pdp10fp);
enum {
PDP10_SEEK_SET = 0,
PDP10_SEEK_CUR = 1,
PDP10_SEEK_END = 2,
};
int pdp10_fseeko(PDP10_FILE *pdp10fp, off_t offset, int whence);
/* pdp10_fread() and pdp10_fwrite() deliberately only permit transfers of strings
* (size == 1), marshalled 9/18/36-bit primitives (nmemb == 1, size == 1, 2, or 4),
* or empty objects (size == 0 || nmemb == 0). To transfer structures, transfer
* their primitive fields individually.
*/
size_t pdp10_fread(uint16_t *ptr, size_t size, size_t nmemb, PDP10_FILE *pdp10fp);
size_t pdp10_fwrite(const uint16_t *ptr, size_t size, size_t nmemb, PDP10_FILE *pdp10fp);
#endif /* PDP10_STDIO_H */

44
lib/pdp10-extint.c Normal file
View File

@ -0,0 +1,44 @@
/*
* pdp10-extint.c
*
* Provide types and procedures for converting 18 and 36-bit integers
* to and from arrays of 9-bit bytes (nonets). Use these together with
* pdp10_fread() and pdp10_fwrite() to convert 18 and 36-bit integers
* between host-level and file-level binary representations.
*/
#include "pdp10-extint.h"
/*
* The behaviour of the PDP10's byte pointers implies a big-endian storage model,
* as does the layout of its 72-bit long integers.
*/
void pdp10_uint18_to_ext(pdp10_uint18_t val, pdp10_ext_uint18_t *ext)
{
ext->x[0] = (val >> 9) & 0x1FF;
ext->x[1] = val & 0x1FF;
}
pdp10_uint18_t pdp10_uint18_from_ext(const pdp10_ext_uint18_t *ext)
{
return
((pdp10_uint18_t)(ext->x[0] & 0x1FF) << 9)
| (ext->x[1] & 0x1FF);
}
void pdp10_uint36_to_ext(pdp10_uint36_t val, pdp10_ext_uint36_t *ext)
{
ext->x[0] = (val >> 27) & 0x1FF;
ext->x[1] = (val >> 18) & 0x1FF;
ext->x[2] = (val >> 9) & 0x1FF;
ext->x[3] = val & 0x1FF;
}
pdp10_uint36_t pdp10_uint36_from_ext(const pdp10_ext_uint36_t *ext)
{
return
((pdp10_uint36_t)(ext->x[0] & 0x1FF) << 27)
| ((pdp10_uint36_t)(ext->x[1] & 0x1FF) << 18)
| ((pdp10_uint36_t)(ext->x[2] & 0x1FF) << 9)
| (ext->x[3] & 0x1FF);
}

397
lib/pdp10-stdio.c Normal file
View File

@ -0,0 +1,397 @@
/*
* pdp10-stdio.h
*
* Provide stdio.h-like interface for I/O to and from files with 9-bit logical bytes (nonets),
* represented by native files with 8-bit physical bytes (octets).
*
* Theory of operation:
*
* - The state of a pdp10 file is composed of: a FILE* for an underlying octet file,
* the current read/write position in the nonet file, a 16-bit shift register buffering
* partial octets (writes) or partial nonets (reads), a counter indicating the number
* of bits in the shift register (which may be negative after a call to pdp10_fseek),
* and a boolean flag indicating if there may be unwritten buffered output.
*
* - Write streams: pdp10_fputc adds 9 bits to shiftreg and 9 to shiftreg_nr_bits, then each
* complete group of 8 bits in shiftreg is shifted out and written to the octet file.
* Between pdp10_fputc calls shiftreg contains between 0 and 7 bits, inclusive, during a
* pdp10_fputc it may temporarily contain up to 7+9 == 16 bits.
*
* - Read streams: pdp10_fgetc reads an octet from the octet file and adds 8 bits to shiftreg
* and 8 to shiftreg_nr_bits; this is repeated once more if needed to make shiftreg
* contains at least 9 bits. Then 9 bits are shifted out of shiftreg and returned.
* Between pdp10_fgetc calls shiftreg contains between 0 and 7 bits, inclusive, during an
* fgetc it may contain up to 8+8 == 16 bits.
*
* - An output operation (pdp10_fputc or pdp10_fwrite) may not be directly followed by an
* input operation (pdp10_fgetc or pdp10_fread) without an intervening call to pdp10_fflush
* or pdp10_fseeko, and an input operation may not be directly followed by an output
* operation without an intervening call to pdp10_fseeko, unless the input operation
* encountered end-of-file. (Same restriction as ANSI/ISO C.)
*
* - A pdp_fseeko repositions the octet file to the closest octet boundary at or before the
* requested nonet boundary, and sets shiftreg_nr_bits to the bit difference, as a number
* between 0 and -7, inclusive. A subsequent pdp10_fgetc or pdp10_fputc detects this
* special state and reinitializes shiftreg as appropriate for that I/O direction.
*/
#include <errno.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include "pdp10-stdio.h"
struct pdp10_file {
FILE *octet_fp;
off_t nonet_pos; /* current read or write nonet offset */
unsigned int shiftreg; /* contains 0 to 16 buffered bits */
int shiftreg_nr_bits;
int writing; /* non-zero if shiftreg may contain pending output data */
};
PDP10_FILE *pdp10_fopen(const char *path, const char *mode)
{
PDP10_FILE *pdp10fp;
/* "a+" won't work, and "a" is not yet implemented */
if (mode[0] == 'a') {
errno = EINVAL;
return NULL;
}
pdp10fp = malloc(sizeof *pdp10fp);
if (!pdp10fp)
return NULL;
pdp10fp->octet_fp = fopen(path, mode);
if (!pdp10fp->octet_fp) {
int oerrno = errno;
free(pdp10fp);
errno = oerrno;
return NULL;
}
pdp10fp->nonet_pos = 0;
pdp10fp->shiftreg = 0;
pdp10fp->shiftreg_nr_bits = 0;
pdp10fp->writing = 0;
return pdp10fp;
}
static int pdp10_flush_buffered_write(PDP10_FILE *pdp10fp)
{
int octet_ch;
if (!pdp10fp->writing)
return 0;
if (pdp10fp->shiftreg_nr_bits <= 0)
return 0;
/* read the next octet which we will partially overwrite */
if (fseeko(pdp10fp->octet_fp, 0, SEEK_CUR) == -1)
return EOF;
octet_ch = fgetc(pdp10fp->octet_fp);
/* rewind by one octet, or by zero octets if we read EOF above */
if (fseeko(pdp10fp->octet_fp, octet_ch == EOF ? 0 : -1, SEEK_CUR) == -1)
return EOF;
if (octet_ch == EOF)
octet_ch = 0;
octet_ch &= (1 << (8 - pdp10fp->shiftreg_nr_bits)) - 1;
octet_ch |= (pdp10fp->shiftreg << (8 - pdp10fp->shiftreg_nr_bits)) & 0xFF;
if (fputc(octet_ch, pdp10fp->octet_fp) == EOF)
return EOF;
/* rewind by one octet to permit further writes; XXX: this is unnecessary
when the flush is called from fclose() or fseeko() */
if (fseeko(pdp10fp->octet_fp, -1, SEEK_CUR) == -1)
return EOF;
return 0;
}
int pdp10_fflush(PDP10_FILE *pdp10fp)
{
if (pdp10_flush_buffered_write(pdp10fp) == EOF)
return EOF;
return fflush(pdp10fp->octet_fp);
}
int pdp10_fclose(PDP10_FILE *pdp10fp)
{
int status;
FILE *octet_fp;
status = pdp10_flush_buffered_write(pdp10fp);
octet_fp = pdp10fp->octet_fp;
free(pdp10fp);
if (fclose(octet_fp) == EOF)
status = EOF;
return status;
}
static int pdp10_fgetc_one_octet(PDP10_FILE *pdp10fp)
{
int octet_ch;
octet_ch = fgetc(pdp10fp->octet_fp);
if (octet_ch == EOF)
return -1; /* incomplete nonets are discarded */
/* XXX: big-endian conversion */
pdp10fp->shiftreg = (pdp10fp->shiftreg << 8) | (octet_ch & 0xFF);
pdp10fp->shiftreg_nr_bits += 8;
return 0;
}
int pdp10_fgetc(PDP10_FILE *pdp10fp)
{
uint16_t nonet_ch;
pdp10fp->writing = 0;
if (pdp10fp->shiftreg_nr_bits < 9) {
/*
* There are three cases to consider here:
*
* 1. 1 <= shiftreg_nr_bits <= 8.
* We have a partially filled nonet in the buffer.
* We'll read one octet.
*
* 2. shiftreg_nr_bits == 0.
* The last read took us to a 72-bit boundary, emptying the buffer.
* We'll read two octets.
*
* 3. -7 <= shiftreg_nr_bits <= -1.
* An fseek placed octet_pos 1 to 7 bits before nonet_pos.
* We'll read two octets, but the first -shiftreg_nr_bits
* bits will be discarded.
*
* Either way we read one or two octets, append them to the buffer,
* and increment shiftreg_nr_bits by the number of bits read.
*
* An EOF during read permits the next operation to be a write, without
* an intervening fflush() or fseeko(). Therefore we must reposition
* octet_pos before nonet_pos if an EOF occurs here.
*/
if (pdp10_fgetc_one_octet(pdp10fp) < 0
|| (pdp10fp->shiftreg_nr_bits < 9
&& pdp10_fgetc_one_octet(pdp10fp) < 0)) {
if (pdp10fp->shiftreg_nr_bits > 0) {
/* if this fseeko() fails then presumably subsequent fseeko()s
will also fail; if not, then data may not be read or written
where we expect it to be XXX */
(void)fseeko(pdp10fp->octet_fp, -1, SEEK_CUR);
pdp10fp->shiftreg_nr_bits -= 8;
}
return EOF;
}
}
/* XXX: big-endian conversion */
nonet_ch = (pdp10fp->shiftreg >> (pdp10fp->shiftreg_nr_bits - 9)) & 0x1FF;
pdp10fp->shiftreg_nr_bits -= 9;
pdp10fp->nonet_pos += 1;
return nonet_ch;
}
static int pdp10_fputc_one_octet(PDP10_FILE *pdp10fp)
{
unsigned char rest_bits;
unsigned char octet_ch;
rest_bits = pdp10fp->shiftreg_nr_bits - 8;
octet_ch = (pdp10fp->shiftreg >> rest_bits) & 0xFF;
if (fputc((char)octet_ch, pdp10fp->octet_fp) == EOF)
return -1;
pdp10fp->shiftreg_nr_bits = rest_bits;
return 0;
}
int pdp10_fputc(uint16_t nonet_ch, PDP10_FILE *pdp10fp)
{
if (pdp10fp->shiftreg_nr_bits < 0) {
int octet_ch;
/*
* -7 <= shiftreg_nr_bits <= -1.
* An fseek placed octet_pos 1 to 7 bits before nonet_pos.
* We will peek at the octet at octet_pos, and preload shiftreg with the
* -shiftreg_nr_bits high bits from the octet.
*/
/* read the next octet, which we will partially overwrite */
#if 0 /* XXX: the pdp10_fseek did that already */
if (fseeko(pdp10fp->octet_fp, 0, SEEK_CUR) == -1)
return EOF;
#endif
octet_ch = fgetc(pdp10fp->octet_fp);
/* rewind by one octet, or by zero octets if we read EOF above */
if (fseeko(pdp10fp->octet_fp, octet_ch == EOF ? 0 : -1, SEEK_CUR) == -1)
return EOF;
if (octet_ch == EOF)
octet_ch = 0;
pdp10fp->shiftreg_nr_bits = -pdp10fp->shiftreg_nr_bits;
pdp10fp->shiftreg = (octet_ch & 0xFF) >> (8 - pdp10fp->shiftreg_nr_bits);
}
pdp10fp->writing = 1;
pdp10fp->shiftreg = (pdp10fp->shiftreg << 9) | (nonet_ch & 0x1FF);
pdp10fp->shiftreg_nr_bits += 9;
if (pdp10_fputc_one_octet(pdp10fp) < 0)
return EOF;
if (pdp10fp->shiftreg_nr_bits == 8
&& pdp10_fputc_one_octet(pdp10fp) < 0)
return EOF;
pdp10fp->nonet_pos += 1;
return nonet_ch & 0x1FF;
}
int pdp10_fseeko(PDP10_FILE *pdp10fp, off_t offset, int whence)
{
off_t octet_pos, nonet_pos;
if (pdp10_flush_buffered_write(pdp10fp) == EOF)
return -1;
switch (whence) {
case PDP10_SEEK_SET:
nonet_pos = 0;
break;
case PDP10_SEEK_CUR:
nonet_pos = pdp10fp->nonet_pos;
break;
case PDP10_SEEK_END:
if (fseeko(pdp10fp->octet_fp, 0, SEEK_END) == -1)
return -1;
octet_pos = ftello(pdp10fp->octet_fp);
if (octet_pos == -1)
return -1;
/*
* Compute 'nonet_pos = (octet_pos * 8) / 9;' without
* overflowing the intermediate term.
*
* Let octet_pos = A * 9 + B, where A = octet_pos / 9 and B = octet_pos % 9.
*
* (octet_pos * 8) / 9
* == ((A * 9 + B) * 8) / 9
* == (A * 9 * 8 + B * 8) / 9
* == A * 8 + (B * 8) / 9
* == (octet_pos / 9) * 8 + ((octet_pos % 9) * 8) / 9
*/
nonet_pos = (octet_pos / 9) * 8 + ((octet_pos % 9) * 8) / 9;
break;
default:
errno = EINVAL;
return -1;
}
nonet_pos += offset;
/*
* Compute 'octet_pos = (nonet_pos * 9) / 8;' without
* overflowing the intermediate term.
*
* Let nonet_pos = C * 8 + D, where C = nonet_pos / 8 and D = nonet_pos % 8.
*
* (nonet_pos * 9) / 8
* == ((C * 8 + D) * 9) / 8
* == (C * 8 * 9 + D * 9) / 8
* == C * 9 + (D * 9) / 8
* == (nonet_pos / 8) * 9 + ((nonet_pos % 8) * 9) / 8
*/
octet_pos = (nonet_pos / 8) * 9 + ((nonet_pos % 8) * 9) / 8;
if (fseeko(pdp10fp->octet_fp, octet_pos, SEEK_SET) == -1)
return -1;
pdp10fp->nonet_pos = nonet_pos;
/*
* Now octet_pos will be from 0 to 7 bits before nonet_pos.
* Depending on whether the next I/O is a read or a write,
* different actions need to be taken. Set shiftreg_nr_bits
* to the negation of the number of "slack" bits to signal
* this case.
*/
pdp10fp->shiftreg = 0;
pdp10fp->shiftreg_nr_bits = -(nonet_pos % 8);
pdp10fp->writing = 0;
return 0;
}
/*
* On an octet-based host, in-core data structures representing nonet-based
* target data will in fact contain oversize octet-based host data. For
* example, 9/18/36-bit target integers are typically stored in 16/32/64-bit
* host integers.
*
* This means that I/O of aggreate structures must be avoided, and instead
* be performed on each primitive data field individually, using explicit
* marshalling code for multi-nonet primitive data types.
*
* To detect mistakes in I/O, fread and fwrite only accepts strings (size == 1)
* and single marshalled primitive data values (nmemb == 1, size == 1, 2, or 4).
*/
static int pdp10_freadwrite_bad_params(size_t size, size_t nmemb)
{
return !(size == 1 || (nmemb == 1 && (size == 2 || size == 4)));
}
size_t pdp10_fread(uint16_t *ptr, size_t size, size_t nmemb, PDP10_FILE *pdp10fp)
{
size_t i, nr_nonets;
int nonet_ch;
if (size == 0 || nmemb == 0)
return nmemb;
if (pdp10_freadwrite_bad_params(size, nmemb)) {
errno = EINVAL;
return 0;
}
nr_nonets = size * nmemb;
for (i = 0; i < nr_nonets; ++i) {
nonet_ch = pdp10_fgetc(pdp10fp);
if (nonet_ch == EOF)
break;
ptr[i] = nonet_ch & 0x1FF;
}
return i / size;
}
size_t pdp10_fwrite(const uint16_t *ptr, size_t size, size_t nmemb, PDP10_FILE *pdp10fp)
{
size_t i, nr_nonets;
if (size == 0 || nmemb == 0)
return nmemb;
if (pdp10_freadwrite_bad_params(size, nmemb)) {
errno = EINVAL;
return 0;
}
nr_nonets = size * nmemb;
for (i = 0; i < nr_nonets; ++i)
if (pdp10_fputc(ptr[i] & 0x1FF, pdp10fp) == EOF)
break;
return i / size;
}