Initial import

2026-04-19 17:13:05 +00:00 · 2013-07-03 16:29:42 +00:00
commit 7c189dd488
36 changed files with 5441 additions and 0 deletions
--- a/as/Makefile
+++ b/as/Makefile
@@ -0,0 +1,22 @@
+CC=gcc
+CFLAGS=-O2 -g -Wall
+CPPFLAGS=-I../include
+
+OBJS=	arrlst.o charbuf.o emalloc.o htab.o main.o parse.o pass1.o scan.o section.o strtab.o token.o
+
+all:	$(OBJS)
+
+arrlst.o:	arrlst.h emalloc.h
+charbuf.o:	charbuf.h emalloc.h
+emalloc.o:	emalloc.h
+htab.o:		emalloc.h htab.h
+main.o:		pass1.h
+parse.o:	emalloc.h parse.h scan.h token.h token.def stmt.h expr.h strtab.h section.h ../include/pdp10-stdint.h ../include/pdp10-elf36.h
+pass1.o:	parse.h pass1.h scan.h section.h stmt.h expr.h token.h ../include/pdp10-elf36.h arrlst.h htab.h strtab.h ../include/pdp10-stdint.h
+scan.o:		charbuf.h scan.h token.h token.def ../include/pdp10-stdint.h
+section.o:	emalloc.h htab.h section.h arrlst.h stmt.h expr.h strtab.h ../include/pdp10-elf36.h ../include/pdp10-stdint.h
+strtab.o:	emalloc.h htab.h strtab.h
+token.o:	../include/pdp10-inttypes.h token.h token.def
+
+clean:
+	rm -f $(OBJS) a.out core core.*
--- a/as/Notes.txt
+++ b/as/Notes.txt
@@ -0,0 +1,52 @@
+* strip whitespace, leave one space before a keyword on a line, compress any
+  other whitespace to a single space
+* white is blank, tab, /* ... */
+* target-spec line comment, .e.g. #
+* symbol = [letter | _ | . | $] [letter | _ | . | $ | digit]*
+* stmt: {label}* { directive | instruction | empty }
+
+* %r0..%r15 with %sp == %r15 ? (no, % is also an operator)
+* $0..$15 with $sp == $15? (yes, these are just symbols)
+
+
+* PDP10 syntax:
+
+mnemonic accumulator,address
+
+	movem 1,foo
+
+mnemonic accumulator,
+
+	popj 17,
+
+mnemonic address
+
+	setzm foo
+
+	skipe 0(16)
+
+address prefixed by "@" makes it indirect
+
+	setzm @foo
+
+address suffixed by "(ixreg)" makes it indexed
+
+	setzm 3(16)
+
+
+";" is line comment char
+
+	foo ;comment
+
+radix is 8 by default; a single-digit number is always decimal
+radix can be changed by the RADIX directive
+a number can indicate its radix by a ^B, ^O, or ^D prefix (XXX: add ^X for hex)
+
+symbols can use letters, digits, dot, dollar, and percent signs
+
+exp1,,exp2 assembles two 18-bit expressions into a 36-bit value
+
+< expr > parenteses
+
+there is no "semi-colon" like symbol for putting multiple statements
+on a single line
--- a/as/TODO
+++ b/as/TODO
@@ -0,0 +1,28 @@
+section text
+* subsection text0
+** frag0, frag1, ... each being sth that generates object code
+* subsection test1
+** frag...
+section data
+* subsection data0
+** frag...
+
+array of sections (ELF allows user-defined ones)
+a section is array of subsections, sorted and concatenated during output, ld sees no subsections
+a subsection is array of fragments
+a fragment describes a directive, an insn, a label defn
+
+pass1 parses .s text, creates frags, and appends them to subsections
+
+pass2 computes labels
+
+pass3 outputs concatenated data in ELF form
+
+helpers
+* string table
+* symbol table
+* section table
+
+copy ELF32 headers and use them throughout, esp. for symbol/section types etc
+
+Look up section group and comdat in sco elf draft html pages.
--- a/as/arrlst.c
+++ b/as/arrlst.c
@@ -0,0 +1,186 @@
+/*
+ * arrlst.c
+ */
+#include <stdlib.h>
+#include <string.h>
+#include "arrlst.h"
+#include "emalloc.h"
+
+enum {
+    ARRLST_CHUNK_NRELEM = 128,
+};
+
+struct arrlst {
+    void *head;
+    void *tail;
+    unsigned int eltsz;
+    unsigned int tailpos;
+
+    /* We only allow a single iterator per arrlst, so we
+       allocate the iterator in the arrlst itself.  */
+    struct {
+	void *chunk;
+	unsigned int chunkpos;
+	unsigned int chunklen;
+    } iter;
+};
+
+static void **arrlst_chunk_nextp(const void *chunk)
+{
+    return (void**)chunk;
+}
+
+static unsigned int eltsz_chunk_header_nrelem(unsigned int eltsz)
+{
+    unsigned int nrelem;
+
+    /* compute how may eltsz elements are needed to cover a void* */
+    nrelem = sizeof(void*) / eltsz;
+    if (nrelem * eltsz < sizeof(void*))
+	++nrelem;
+
+    return nrelem;
+}
+
+static unsigned int arrlst_chunk_header_nrelem(const struct arrlst *arrlst)
+{
+    return eltsz_chunk_header_nrelem(arrlst->eltsz);
+}
+
+static void *arrlst_chunk_element(const struct arrlst *arrlst, void *chunk, unsigned int eltnr)
+{
+    return (char*)chunk + eltnr * arrlst->eltsz;
+}
+
+static void *arrlst_alloc_chunk(const struct arrlst *arrlst)
+{
+    unsigned int nrbytes;
+    void *chunk;
+
+    nrbytes = arrlst->eltsz * ARRLST_CHUNK_NRELEM;
+    chunk = emalloc(nrbytes);
+    return chunk;
+}
+
+struct arrlst *arrlst_alloc(size_t eltsz)
+{
+    struct arrlst *arrlst;
+
+    if (eltsz == 0
+	|| eltsz > (unsigned int)-1
+	|| eltsz_chunk_header_nrelem(eltsz) >= ARRLST_CHUNK_NRELEM)
+	return NULL;
+
+    arrlst = emalloc(sizeof *arrlst);
+
+    /* these fields will be adjusted in the first call to append() */
+    arrlst->head = NULL;
+    arrlst->tail = NULL;
+    arrlst->eltsz = eltsz;
+    arrlst->tailpos = ARRLST_CHUNK_NRELEM;
+
+    return arrlst;
+}
+
+void arrlst_free(struct arrlst *arrlst)
+{
+    void *chunk, *next;
+
+    chunk = arrlst->head;
+    while (chunk) {
+	next = *arrlst_chunk_nextp(chunk);
+	free(chunk);
+	chunk = next;
+    }
+
+    free(arrlst);
+}
+
+size_t arrlst_length(const struct arrlst *arrlst)
+{
+    const void *chunk;
+    size_t length;
+    size_t chunk_header_nrelem;
+
+    chunk = arrlst->head;
+    if (!chunk)
+	return 0;
+    length = 0;
+    chunk_header_nrelem = arrlst_chunk_header_nrelem(arrlst);
+    for (;;) {
+	chunk = *arrlst_chunk_nextp(chunk);
+	if (!chunk)
+	    break;
+	length += ARRLST_CHUNK_NRELEM - chunk_header_nrelem;
+    }
+    return length + arrlst->tailpos - chunk_header_nrelem;
+}
+
+void *arrlst_append(struct arrlst *arrlst)
+{
+    void *tail;
+    void *elt;
+
+    tail = arrlst->tail;
+
+    if (arrlst->tailpos >= ARRLST_CHUNK_NRELEM) {
+	void *new_tail;
+
+	new_tail = arrlst_alloc_chunk(arrlst);
+	if (!new_tail)
+	    return NULL;
+
+	if (tail)
+	    *arrlst_chunk_nextp(tail) = new_tail;
+	else {
+	    arrlst->head = new_tail;
+	    arrlst->tail = new_tail;
+	}
+
+	arrlst->tailpos = arrlst_chunk_header_nrelem(arrlst);
+
+	tail = new_tail;
+    }
+
+    elt = arrlst_chunk_element(arrlst, tail, arrlst->tailpos);
+    ++arrlst->tailpos;
+
+    return elt;
+}
+
+void arrlst_iter_rewind(struct arrlst *arrlst)
+{
+    /* these fields will be adjusted in the first call to next() */
+    arrlst->iter.chunk = NULL;
+    arrlst->iter.chunklen = 0;
+    arrlst->iter.chunkpos = 0;
+}
+
+void *arrlst_iter_next(struct arrlst *arrlst)
+{
+    void *chunk;
+    void *elt;
+
+    chunk = arrlst->iter.chunk;
+    if (arrlst->iter.chunkpos >= arrlst->iter.chunklen) {
+	if (!chunk)
+	    chunk = arrlst->head;
+	else
+	    chunk = *arrlst_chunk_nextp(chunk);
+	if (!chunk)
+	    return NULL;
+	arrlst->iter.chunk = chunk;
+
+	if (*arrlst_chunk_nextp(chunk))
+	    arrlst->iter.chunklen = ARRLST_CHUNK_NRELEM;
+	else
+	    arrlst->iter.chunklen = arrlst->tailpos;
+
+	arrlst->iter.chunkpos = arrlst_chunk_header_nrelem(arrlst);
+    }
+
+    elt = arrlst_chunk_element(arrlst, chunk, arrlst->iter.chunkpos);
+    ++arrlst->iter.chunkpos;
+
+    return elt;
+}
--- a/as/arrlst.h
+++ b/as/arrlst.h
@@ -0,0 +1,20 @@
+/*
+ * arrlst.h
+ */
+#ifndef ARRLST_H
+#define ARRLST_H
+
+#include <stdlib.h>	/* size_t */
+
+struct arrlst;
+
+struct arrlst *arrlst_alloc(size_t eltsz);
+void arrlst_free(struct arrlst *arrlst);
+size_t arrlst_length(const struct arrlst *arrlst);
+void *arrlst_append(struct arrlst *arrlst);
+
+/* for now there is only one iterator per arrlst */
+void arrlst_iter_rewind(struct arrlst *arrlst);
+void *arrlst_iter_next(struct arrlst *arrlst);
+
+#endif /* ARRLST_H */
--- a/as/charbuf.c
+++ b/as/charbuf.c
@@ -0,0 +1,90 @@
+/*
+ * charbuf.c
+ */
+#include <stdlib.h>
+#include <string.h>
+#include "charbuf.h"
+#include "emalloc.h"
+
+void charbuf_init(struct charbuf *charbuf)
+{
+    charbuf->head.next = NULL;
+    charbuf->tail = &charbuf->head;
+    charbuf->pos = 0;
+}
+
+void charbuf_fini(struct charbuf *charbuf)
+{
+    struct charbuf_chunk *chunk, *next;
+
+    chunk = charbuf->head.next;
+    while (chunk != NULL) {
+	next = chunk->next;
+	free(chunk);
+	chunk = next;
+    }
+}
+
+void charbuf_append(struct charbuf *charbuf, int ch)
+{
+    struct charbuf_chunk *tail, *next;
+    unsigned int pos;
+
+    tail = charbuf->tail;
+    pos = charbuf->pos;
+    if (pos >= sizeof tail->buf) {
+	next = emalloc(sizeof *next);
+	next->next = NULL;
+	tail->next = next;
+	tail = next;
+	charbuf->tail = tail;
+	pos = 0;
+    }
+    tail->buf[pos] = ch;
+    charbuf->pos = pos + 1;
+}
+
+int charbuf_strcmp(const struct charbuf *charbuf, const char *string)
+{
+    const struct charbuf_chunk *chunk;
+    int cmp;
+
+    chunk = &charbuf->head;
+    while (chunk->next != NULL) {
+	cmp = strncmp(chunk->buf, string, sizeof chunk->buf);
+	if (cmp)
+	    return cmp;
+	string += sizeof chunk->buf;
+	chunk = chunk->next;
+    }
+    return strncmp(chunk->buf, string, charbuf->pos);
+}
+
+char *charbuf_string(const struct charbuf *charbuf)
+{
+    const struct charbuf_chunk *chunk;
+    size_t nrbytes;
+    char *string, *strp;
+
+    chunk = &charbuf->head;
+    nrbytes = 0;
+    while (chunk->next != NULL) {
+	nrbytes += sizeof chunk->buf;
+	chunk = chunk->next;
+    }
+    nrbytes = nrbytes + charbuf->pos + 1;
+
+    string = emalloc(nrbytes);
+
+    chunk = &charbuf->head;
+    strp = string;
+    while (chunk->next != NULL) {
+	memcpy(strp, chunk->buf, sizeof chunk->buf);
+	strp += sizeof chunk->buf;
+	chunk = chunk->next;
+    }
+    memcpy(strp, chunk->buf, charbuf->pos);
+    strp[charbuf->pos] = '\0';
+
+    return string;
+}
--- a/as/charbuf.h
+++ b/as/charbuf.h
@@ -0,0 +1,24 @@
+/*
+ * charbuf.h
+ */
+#ifndef CHARBUF_H
+#define CHARBUF_H
+
+struct charbuf_chunk {
+    char buf[128 - sizeof(struct charbuf_chunk*)];
+    struct charbuf_chunk *next;
+};
+
+struct charbuf {
+    struct charbuf_chunk head;
+    struct charbuf_chunk *tail;	/* INV: tail->next == NULL */
+    unsigned int pos;		/* in tail chunk */
+};
+
+void charbuf_init(struct charbuf *charbuf);
+void charbuf_fini(struct charbuf *charbuf);
+void charbuf_append(struct charbuf *charbuf, int ch);
+int charbuf_strcmp(const struct charbuf *charbuf, const char *string);
+char *charbuf_string(const struct charbuf *charbuf);
+
+#endif /* CHARBUF_H */
--- a/as/emalloc.c
+++ b/as/emalloc.c
@@ -0,0 +1,20 @@
+/*
+ * emalloc.c
+ */
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "emalloc.h"
+
+void *emalloc(size_t nrbytes)
+{
+    void *p;
+
+    p = malloc(nrbytes);
+    if (!p) {
+	fprintf(stderr, "malloc(%zu) failed: %s\n", nrbytes, strerror(errno));
+	exit(1);
+    }
+    return p;
+}
--- a/as/emalloc.h
+++ b/as/emalloc.h
@@ -0,0 +1,11 @@
+/*
+ * emalloc.h
+ */
+#ifndef EMALLOC_H
+#define EMALLOC_H
+
+#include <stddef.h>	/* size_t */
+
+void *emalloc(size_t nrbytes);
+
+#endif /* EMALLOC_H */
--- a/as/expr.c
+++ b/as/expr.c
@@ -0,0 +1,168 @@
+/*
+ * expr.c
+ */
+#include <stdio.h>
+#include "pdp10-arith.h"
+#include "expr.h"
+
+int eval(const struct expr *expr, struct value *value, int quiet)
+{
+    switch (expr->tag) {
+    case E_UINTEGER:
+	value->section = SECTION_ABS;
+	value->offset = expr->u.e_uinteger.val;
+	break;
+    case E_SYMBOL:
+	xxx;
+	break;
+    case E_UNARY:
+	if (eval(expr->u.e_unary.expr, value, quiet) < 0)
+	    return -1;
+	if (value->section != SECTION_ABS) {
+	    if (!quiet)
+		fprintf(stderr, "as: %s(): unary operand is not absolute\n", __FUNCTION__);
+	    return -1;
+	}
+	switch (expr->u.e_unary.unop) {
+	case E_UMINUS:
+	    value->offset = pdp10_neg_int36(value->offset);
+	    break;
+	case E_NOT:
+	    value->offset = pdp10_not_int36(value->offset);
+	    break;
+	}
+	break;
+    case E_BINARY:
+    {
+	struct value value2;
+
+	if (eval(expr->u.e_binary.expr1, value, quiet) < 0
+	    || eval(expr->u.e_binary.expr2, &value2, quiet) < 0)
+	    return -1;
+
+	switch (expr->u.e_binary.binop) {
+	case E_ADD:
+	    if (value->section == SECTION_ABS)
+		value->section = value2.section;
+	    else if (value2.section == SECTION_ABS)
+		;
+	    else if (value->section == value2.section)
+		;
+	    else {
+		if (!quiet)
+		    fprintf(stderr, "as: %s(): adding operands from different sections\n", __FUNCTION__);
+		return -1;
+	    }
+	    value->offset = pdp10_add_int36(value->offset, value2.offset);
+	    return 0;
+	case E_SUB:
+	    if (value2.section == SECTION_ABS)
+		;
+	    else if (value->section == value2.section)
+		value->section = SECTION_ABS;
+	    else {
+		if (!quiet)
+		    fprintf(stderr, "as: %s(): subtracting operands from different sections\n", __FUNCTION__);
+		return -1;
+	    }
+	    value->offset = pdp10_sub_int36(value->offset, value2.offset);
+	    return 0;
+	default:
+	    break;
+	}	    
+	if (value->section != SECTION_ABS
+	    || value2.section != SECTION_ABS) {
+	    if (!quiet)
+		fprintf(stderr, "as: %s(): binary sub-<expr> is not absolute\n", __FUNCTION__);
+	    return -1;
+	}
+	switch (expr->u.e_binary.binop) {
+	case E_MUL:
+	    value->offset = pdp10_mul_int36(value->offset, value2.offset);
+	    break;
+	case E_DIV:
+	    /* XXX: div-by-zero check */
+	    value->offset = pdp10_div_int36(value->offset, value2.offset);
+	    break;
+	case E_REM:
+	    /* XXX: div-by-zero check */
+	    value->offset = pdp10_rem_int36(value->offset, value2.offset);
+	    break;
+	case E_LSHIFT:
+	    /* XXX: range check */
+	    value->offset = pdp10_asl_int36(value->offset, value2.offset);
+	    break;
+	case E_RSHIFT:
+	    /* XXX: range check */
+	    value->offset = pdp10_asr_int36(value->offset, value2.offset);
+	    break;
+	case E_OR:
+	    value->offset = pdp10_or_int36(value->offset, value2.offset);
+	    break;
+	case E_AND:
+	    value->offset = pdp10_and_int36(value->offset, value2.offset);
+	    break;
+	case E_XOR:
+	    value->offset = pdp10_xor_int36(value->offset, value2.offset);
+	    break;
+	case E_ORNOT:
+	    value->offset = pdp10_or_int36(value->offset, pdp10_not_int36(value2.offset));
+	    break;
+	case E_EQ:
+	    value->offset = pdp10_eq_int36(value->offset, value2.offset) ? PDP10_UINT36_MAX : 0;
+	    break;
+	case E_NE:
+	    value->offset = pdp10_ne_int36(value->offset, value2.offset) ? PDP10_UINT36_MAX : 0;
+	    break;
+	case E_LT:
+	    value->offset = pdp10_lt_int36(value->offset, value2.offset) ? PDP10_UINT36_MAX : 0;
+	    break;
+	case E_GT:
+	    value->offset = pdp10_gt_int36(value->offset, value2.offset) ? PDP10_UINT36_MAX : 0;
+	    break;
+	case E_GE:
+	    value->offset = pdp10_ge_int36(value->offset, value2.offset) ? PDP10_UINT36_MAX : 0;
+	    break;
+	case E_LE:
+	    value->offset = pdp10_le_int36(value->offset, value2.offset) ? PDP10_UINT36_MAX : 0;
+	    break;
+	case E_ANDAND:
+	    value->offset = (pdp10_nonzero_int36(value->offset) && pdp10_nonzero_int36(value2.offset)) ? 1 : 0;
+	    break;
+	case E_OROR:
+	    value->offset = (pdp10_nonzero_int36(value->offset) || pdp10_nonzero_int36(value2.offset)) ? 1 : 0;
+	    break;
+	default:
+	    /* E_ADD / E_SUB cannot occur here, but the compiler may not see that */
+	    break;
+	}
+	break;
+    }
+    }
+    return 0;
+}
+
+int eval_abs(const struct expr *expr, pdp10_uint36_t *offset, int quiet)
+{
+    struct value value;
+
+    if (eval(expr, &value, quiet) < 0)
+	return -1;
+    if (value.section != SECTION_ABS) {
+	if (!quiet)
+	    fprintf(stderr, "as: non-absolute expression\n");
+	return -1;
+    }
+    *offset = value.offset;
+    return 0;
+}
+
+int eval_abs_verbose(const struct expr *expr, pdp10_uint36_t *offset)
+{
+    return eval_abs(expr, offset, 0);
+}
+
+int eval_abs_quiet(const struct expr *expr, pdp10_uint36_t *offset)
+{
+    return eval_abs(expr, offset, 1);
+}
--- a/as/expr.h
+++ b/as/expr.h
@@ -0,0 +1,76 @@
+/*
+ * expr.h
+ */
+#ifndef EXPR_H
+#define EXPR_H
+
+#include "pdp10-elf36.h"
+#include "section.h"
+#include "strtab.h"
+
+enum expr_tag {
+    E_UINTEGER,
+    E_SYMBOL,
+    E_UNARY,
+    E_BINARY,
+};
+
+enum expr_unop {
+    E_UMINUS,
+    E_NOT,
+};
+
+enum expr_binop {
+    E_MUL,
+    E_DIV,
+    E_REM,
+    E_LSHIFT,
+    E_RSHIFT,
+    E_OR,
+    E_AND,
+    E_XOR,
+    E_ORNOT,
+    E_ADD,
+    E_SUB,
+    E_EQ,
+    E_NE,
+    E_LT,
+    E_GT,
+    E_GE,
+    E_LE,
+    E_ANDAND,
+    E_OROR,
+};
+
+struct expr {
+    enum expr_tag tag;
+    union {
+	struct {
+	    pdp10_uint36_t val;
+	} e_uinteger;
+	struct {
+	    const struct strnode *name;
+	} e_symbol;
+	struct {
+	    enum expr_unop unop;
+	    struct expr *expr;
+	} e_unary;
+	struct {
+	    enum expr_binop binop;
+	    struct expr *expr1;
+	    struct expr *expr2;
+	} e_binary;
+    } u;
+};
+
+struct value {
+    struct section *section;
+    pdp10_uint36_t offset;
+};
+
+int eval(const struct expr *expr, struct value *value, int quiet);
+int eval_abs(const struct expr *expr, pdp10_uint36_t *offset, int quiet);
+int eval_abs_verbose(const struct expr *expr, pdp10_uint36_t *offset);
+int eval_abs_quiet(const struct expr *expr, pdp10_uint36_t *offset);
+
+#endif /* EXPR_H */
--- a/as/htab.c
+++ b/as/htab.c
@@ -0,0 +1,136 @@
+/*
+ * htab.c
+ */
+#include <stdlib.h>
+#include <string.h>
+#include "emalloc.h"
+#include "htab.h"
+
+static struct hnode **htab_alloc_bucket(unsigned int size)
+{
+    size_t nrbytes;
+    struct hnode **bucket;
+
+    nrbytes = size * sizeof(struct hnode*);
+    bucket = emalloc(nrbytes);
+    memset(bucket, 0, nrbytes);
+    return bucket;
+}
+
+void htab_init(struct htab *htab, unsigned int log2size, htab_cmpfn_t cmpfn)
+{
+    unsigned int size;
+
+    size = 1 << log2size;
+    htab->log2size = log2size;
+    htab->mask = size - 1;
+    htab->used = 0;
+    htab->cmpfn = cmpfn;
+    htab->bucket = htab_alloc_bucket(size);
+}
+
+struct hnode *htab_lookup(const struct htab *htab, uintptr_t hval, const void *data)
+{
+    htab_cmpfn_t cmpfn;
+    unsigned int i;
+    struct hnode *hnode;
+
+    cmpfn = htab->cmpfn;
+    i = hval & htab->mask;
+
+    hnode = htab->bucket[i];
+    while (hnode != NULL) {
+	if (hnode->hval == hval
+	    && (cmpfn == NULL || (*cmpfn)(hnode, data) == 0))
+	    break;
+	hnode = hnode->hnext;
+    }
+
+    return hnode;
+}
+
+static void htab_grow(struct htab *htab)
+{
+    unsigned int old_size, new_size, new_mask;
+    struct hnode **old_bucket, **new_bucket;
+    unsigned int i;
+
+    old_size = 1 << htab->log2size;
+    htab->log2size += 1;
+    new_size = 1 << htab->log2size;
+    new_mask = new_size - 1;
+    htab->mask = new_mask;
+    old_bucket = htab->bucket;
+    new_bucket = htab_alloc_bucket(new_size);
+    htab->bucket = new_bucket;
+    for (i = 0; i < old_size; ++i) {
+	struct hnode *hnode = old_bucket[i];
+	while (hnode != NULL) {
+	    struct hnode *hnext = hnode->hnext;
+	    unsigned int j = hnode->hval & new_mask;
+	    hnode->hnext = new_bucket[j];
+	    new_bucket[j] = hnode;
+	    hnode = hnext;
+	}
+    }
+    free(old_bucket);
+}
+
+void htab_insert(struct htab *htab, struct hnode *hnode)
+{
+    unsigned int i;
+    unsigned int size;
+
+    i = hnode->hval & htab->mask;
+    hnode->hnext = htab->bucket[i];
+    htab->bucket[i] = hnode;
+    htab->used += 1;
+    size = 1 << htab->log2size;
+    if (htab->used > (4 * size) / 5)	/* rehash at 80% */
+	htab_grow(htab);
+}
+
+#if 0
+struct hash_node *am_hash_reset(struct hash_table *hash_table)
+{
+    unsigned int i;
+    unsigned int size;
+    struct hash_node * volatile *bucket, *all_nodes, *head, *tail;
+
+    all_nodes = NULL;
+    bucket = hash_table->bucket;
+    size = 1 << hash_table->log2size;
+    for (i = 0; i < size; ++i) {
+	head = bucket[i];
+	if (head) {
+	    bucket[i] = NULL;
+	    tail = head;
+	    while (tail->next)
+		tail = tail->next;
+	    tail->next = all_nodes;
+	    all_nodes = head;
+	}
+    }
+    hash_table->used = 0;
+    return all_nodes;
+}
+
+void am_hash_enumerate(const struct hash_table *hash_table,
+                       void (*callback)(struct hash_node *hash_node, void *data),
+                       void *data)
+{
+    unsigned int i;
+    unsigned int size;
+    struct hash_node * volatile *bucket, *head;
+
+    bucket = hash_table->bucket;
+    size = 1 << hash_table->log2size;
+    for (i = 0; i < size; ++i) {
+	head = bucket[i];
+	while (head != NULL) {
+            callback(head, data);
+            head = head->next;
+	}
+    }
+}
+#endif
--- a/as/htab.h
+++ b/as/htab.h
@@ -0,0 +1,36 @@
+/*
+ * htab.h
+ */
+#ifndef HTAB_H
+#define HTAB_H
+
+#include <stdint.h>	/* uintptr_t */
+
+struct hnode {
+    uintptr_t hval;
+    struct hnode *hnext;
+};
+
+typedef int (*htab_cmpfn_t)(const struct hnode *hnode, const void *data);
+
+struct htab {
+    unsigned int log2size;
+    unsigned int mask;		/* INV: mask == (1 << log2size) - 1 */
+    unsigned int used;
+    htab_cmpfn_t cmpfn;
+    struct hnode **bucket;
+};
+
+void htab_init(struct htab *htab, unsigned int log2size, htab_cmpfn_t cmpfn);
+
+struct hnode *htab_lookup(const struct htab *htab, uintptr_t hval, const void *data);
+
+struct hnode *htab_reset(struct htab *htab);
+
+void htab_enumerate(const struct htab *htab,
+		    void (*callback)(struct hnode *hnode, void *data),
+		    void *data);
+
+void htab_insert(struct htab *htab, struct hnode *hnode);
+
+#endif /* HTAB_H */
--- a/as/main.c
+++ b/as/main.c
@@ -0,0 +1,40 @@
+/*
+ * main.c
+ */
+#include <stdio.h>
+#include <unistd.h>
+#include "pass1.h"
+
+int main(int argc, char **argv)
+{
+    int ch;
+    const char *outfile = "a.out";
+    const char *infile = NULL;
+
+    for (;;) {
+	ch = getopt(argc, argv, "o:");
+	switch (ch) {
+	case 'o':
+	    outfile = optarg;
+	    continue;
+	case -1:
+	    break;
+	default:
+	    fprintf(stderr, "as: invalid option '%c'\n", ch);
+	    return 1;
+	}
+    }
+    if (optind + 1 == argc)
+	infile = argv[optind];
+
+    if (pass1(infile) < 0)
+	return 1;
+
+    if (pass2() < 0)
+	return 1;
+
+    if (pass3(outfile) < 0)
+	return 1;
+
+    return 0;
+}
--- a/as/parse.c
+++ b/as/parse.c
--- a/as/parse.h
+++ b/as/parse.h
@@ -0,0 +1,11 @@
+/*
+ * parse.h
+ */
+#ifndef PARSE_H
+#define PARSE_H
+
+#include "stmt.h"
+
+int parse_stmt(struct stmt *stmt);
+
+#endif /* PARSE_H */
--- a/as/pass1.c
+++ b/as/pass1.c
@@ -0,0 +1,270 @@
+/*
+ * pass1.c
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include "emalloc.h"
+#include "parse.h"
+#include "pass1.h"
+#include "scan.h"
+#include "section.h"
+
+struct section_and_subsection {
+    struct section *section;
+    struct subsection *subsection;
+};
+
+struct current_and_previous_sections {
+    struct section_and_subsection cursec;
+    struct section_and_subsection prevsec;
+};
+
+struct sections_stack_element {
+    struct current_and_previous_sections sects;
+    struct sections_stack_element *next;
+};
+
+struct pass1_state {
+    struct current_and_previous_sections sects;
+    struct sections_stack_element *sects_stack;
+};
+
+static int pass1_s_popsection(struct pass1_state *state)
+{
+    struct sections_stack_element *top;
+
+    top = state->sects_stack;
+    if (top == NULL) {
+	fprintf(stderr, "as: %s, line %u: .popsection with no previous .pushsection\n", scan_filename, scan_linenr);
+	return -1;
+    }
+
+    state->sects = top->sects;
+    state->sects_stack = top->next;
+    free(top);
+
+    return 0;
+}
+
+static int pass1_s_previous(struct pass1_state *state)
+{
+    struct section_and_subsection prevsec;
+
+    prevsec = state->sects.prevsec;
+    if (prevsec.section == NULL) {
+	fprintf(stderr, "as: %s, line %u: .previous with no previous .section\n", scan_filename, scan_linenr);
+	return -1;
+    }
+
+    state->sects.prevsec = state->sects.cursec;
+    state->sects.cursec = prevsec;
+
+    return 0;
+}
+
+static int pass1_s_section(struct pass1_state *state, struct stmt *stmt, int push)
+{
+    struct section *section;
+    struct subsection *subsection;
+    int subsectnr;
+
+    section = section_enter(stmt->u.s_section.name);
+
+    if (stmt->u.s_section.sh_type != 0) {
+	if (section->e_shdr.sh_type == 0)
+	    section->e_shdr.sh_type = stmt->u.s_section.sh_type;
+	else if (section->e_shdr.sh_type != stmt->u.s_section.sh_type) {
+	    fprintf(stderr, "as: %s, line %u: section type mismatch\n", scan_filename, scan_linenr);
+	    return -1;
+	}
+    }
+
+    section->e_shdr.sh_flags |= stmt->u.s_section.sh_flags;
+
+    if (stmt->u.s_section.sh_entsize != NULL) {
+	pdp10_uint36_t offset;
+
+	if (eval_abs_verbose(stmt->u.s_section.sh_entsize, &offset) < 0)
+	    return -1;
+	if (section->e_shdr.sh_entsize == 0)
+	    section->e_shdr.sh_entsize = offset;
+	else if (section->e_shdr.sh_entsize != offset) {
+	    fprintf(stderr, "as: %s, line %u: section <entsize> mismatch\n", scan_filename, scan_linenr);
+	    return -1;
+	}
+    }
+
+    if (stmt->u.s_section.groupname != NULL) {
+	if (section->groupname == NULL)
+	    section->groupname = stmt->u.s_section.groupname;
+	else if (stmt->u.s_section.groupname != section->groupname) {
+	    fprintf(stderr, "as: %s, line %u: section <groupname> mismatch\n", scan_filename, scan_linenr);
+	    return -1;
+	}
+    }
+
+    if (stmt->u.s_section.linkage != NULL) {
+	if (section->linkage == NULL)
+	    section->linkage = stmt->u.s_section.linkage;
+	else if (stmt->u.s_section.linkage != section->linkage) {
+	    fprintf(stderr, "as: %s, line %u: section <linkage> mismatch\n", scan_filename, scan_linenr);
+	    return -1;
+	}
+    }
+
+    if (push && stmt->u.s_section.subsectnr != NULL) {
+	pdp10_uint36_t offset;
+
+	if (eval_abs_verbose(stmt->u.s_section.subsectnr, &offset) < 0)
+	    return -1;
+	subsectnr = offset;
+    } else
+	subsectnr = 0;
+
+    subsection = subsection_enter(section, subsectnr);
+
+    if (push) {
+	struct sections_stack_element *top;
+
+	top = emalloc(sizeof *top);
+	top->sects = state->sects;
+	top->next = state->sects_stack;
+	state->sects_stack = top;
+    }
+
+    state->sects.prevsec = state->sects.cursec;
+    state->sects.cursec.section = section;
+    state->sects.cursec.subsection = subsection;
+
+    return 0;
+}
+
+static int pass1_s_subsection(struct pass1_state *state, struct stmt *stmt)
+{
+    pdp10_uint36_t offset;
+    struct subsection *subsection;
+
+    if (eval_abs_verbose(stmt->u.s_subsection.expr, &offset) < 0)
+	return -1;
+
+    subsection = subsection_enter(state->sects.cursec.section, (int)(pdp10_int36_t)offset);
+
+    state->sects.prevsec = state->sects.cursec;
+    state->sects.cursec.subsection = subsection;
+
+    return 0;
+}
+
+static int pass1_interpret(struct pass1_state *state, struct stmt *stmt)
+{
+    switch (stmt->tag) {
+	/* in pass1 we have to deal with section-altering directives */
+    case S_POPSECTION:
+	return pass1_s_popsection(state);
+    case S_PREVIOUS:
+	return pass1_s_previous(state);
+    case S_PUSHSECTION:
+	return pass1_s_section(state, stmt, 1);
+    case S_SECTION:
+	return pass1_s_section(state, stmt, 0);
+    case S_SUBSECTION:
+	return pass1_s_subsection(state, stmt);
+
+	/* remaining directives, and the non-directives, enter data
+	   into sections or manipulate symbols; delay these for pass2 */
+	XXX;
+	/* XXX: wrong, symbol values, even section-relative, are needed as soon
+	   as possible, so all statements must be interpreted right away */
+    case S_ALIGN:
+    case S_ASCII:
+    case S_ASCIZ:
+    case S_BYTE:
+    case S_COMM:
+    case S_FILE:
+    case S_GLOBL:
+    case S_HIDDEN:
+    case S_IDENT:
+    case S_INTERNAL:
+    case S_LOCAL:
+    case S_LONG:
+    case S_ORG:
+    case S_PROTECTED:
+    case S_SET:
+    case S_SHORT:
+    case S_SIZE:
+    case S_SYMVER:
+    case S_TYPE:
+    case S_WEAK:
+    case S_WEAKREF:
+    case S_LABEL:
+    case S_INSN:
+    {
+	struct stmt *stmt2;
+
+	stmt2 = arrlst_append(state->sects.cursec.subsection->stmts);
+	/* XXX: error check */
+	*stmt2 = *stmt;
+	return 0;
+    }
+    default:
+	fprintf(stderr, "as: %s, line %u: %s(): unknown stmt tag %u\n", scan_filename, scan_linenr, __FUNCTION__, stmt->tag);
+	return -1;
+    }
+}
+
+static void pass1_init_state(struct pass1_state *state)
+{
+    struct section *text;
+
+    text = section_enter(strtab_enter(".text"));
+
+    text->e_shdr.sh_type = SHT_PROGBITS;
+    text->e_shdr.sh_flags = SHF_ALLOC | SHF_EXECINSTR;
+    text->e_shdr.sh_addralign = 4;	/* XXX: PDP10-specific */
+
+    state->sects.cursec.section = text;
+    state->sects.cursec.subsection = subsection_enter(text, 0);
+
+    state->sects.prevsec.section = NULL;
+    state->sects.prevsec.subsection = NULL;
+
+    state->sects_stack = NULL;
+}
+
+static void pass1_fini_state(struct pass1_state *state)
+{
+    struct sections_stack_element *here, *next;
+
+    here = state->sects_stack;
+    while (here != NULL) {
+	next = here->next;
+	free(here);
+	here = next;
+    }
+}
+
+int pass1(const char *filename)
+{
+    struct pass1_state state;
+    struct stmt stmt;
+    int status;
+
+    if (scan_freopen(filename) < 0)
+	return -1;
+
+    pass1_init_state(&state);
+
+    for (;;) {
+	status = parse_stmt(&stmt);
+	if (status < 0)
+	    return -1;
+	else if (status == 0)
+	    break;
+	else if (pass1_interpret(&state, &stmt) < 0)
+	    return -1;
+    }
+
+    pass1_fini_state(&state);
+
+    return 0;
+}
--- a/as/pass1.h
+++ b/as/pass1.h
@@ -0,0 +1,9 @@
+/*
+ * pass1.h
+ */
+#ifndef PASS1_H
+#define PASS1_H
+
+int pass1(const char *filename);
+
+#endif /* PASS1_H */
--- a/as/scan.c
+++ b/as/scan.c
@@ -0,0 +1,636 @@
+/*
+ * scan.c
+ */
+#include <errno.h>
+#include <stdio.h>	/* host stdio since we're dealing with plain text */
+#include <stdlib.h>
+#include <string.h>
+#include "charbuf.h"
+#include "scan.h"
+#include "token.h"
+
+/* XXX: we should have a pdp10-limits.h */
+#define PDP10_UCHAR_MAX PDP10_UINT9_MAX
+
+const char *scan_filename = "<stdin>";
+
+int scan_freopen(const char *filename)
+{
+    if (filename != NULL) {
+	if (freopen(filename, "r", stdin) == NULL) {
+	    fprintf(stderr, "as: Error opening %s: %s\n", filename, strerror(errno));
+	    return -1;
+	}
+	scan_filename = filename;
+    }
+    return 0;
+}
+
+unsigned int scan_linenr;
+
+static void scan_ungetc(int ch)
+{
+    if (ch != EOF && ungetc(ch, stdin) == EOF)
+	perror("ungetc");
+}
+
+static int scan_getchar(void)
+{
+    return fgetc(stdin);
+}
+
+static void badchar(int ch, const char *context)
+{
+    char buf[7];
+
+    if (ch == EOF) {
+	buf[0] = '<';
+	buf[1] = 'E';
+	buf[2] = 'O';
+	buf[3] = 'F';
+	buf[4] = '>';
+	buf[5] = '\0';
+    } else if (' ' <= ch && ch <= '~') {
+	buf[0] = '\'';
+	buf[1] = ch;
+	buf[2] = '\'';
+	buf[3] = '\0';
+    } else {
+	buf[0] = '\'';
+	buf[1] = '\\';
+	buf[2] = '0' + ((ch >> 6) & 3);
+	buf[3] = '0' + ((ch >> 3) & 7);
+	buf[4] = '0' + (ch & 7);
+	buf[5] = '\'';
+	buf[6] = '\0';
+    }
+
+    fprintf(stderr, "as: %s, line %u: invalid character %s %s\n", scan_filename, scan_linenr, buf, context);
+}
+
+static int is_decimal_digit(char ch)
+{
+    return '0' <= ch && ch <= '9';
+}
+
+static int is_octal_digit(char ch)
+{
+    return '0' <= ch && ch <= '7';
+}
+
+static unsigned int get_chval(int ch)
+{
+    if ('0' <= ch && ch <= '9')
+	return ch - '0';
+    if ('A' <= ch && ch <= 'F')
+	return ch - ('A' - 10);
+    if ('a' <= ch && ch <= 'f')
+	return ch - ('a' - 10);
+    return -1U;
+}
+
+static int do_escape(void)
+{
+    int ch;
+
+    ch = scan_getchar();
+    switch (ch) {
+    case 'n':
+	return '\n';
+    case 't':
+	return '\t';
+    case 'f':
+	return '\f';
+    case 'r':
+	return '\r';
+    case 'b':
+	return '\b';
+    case '\\':
+	return ch;
+    case '\'':
+	return ch;
+    case '"':
+	return ch;
+    case 'x':
+    case 'X':
+    {
+	unsigned int chval;
+
+	ch = scan_getchar();
+	chval = get_chval(ch);
+	if (chval <= 15) {
+	    unsigned int val = 0;
+	    do {
+		val = val * 16 + chval;
+		ch = scan_getchar();
+		chval = get_chval(ch);
+	    } while (chval <= 15);
+	    scan_ungetc(ch);
+	    if (val > PDP10_UCHAR_MAX)
+		fprintf(stderr, "as: %s, line %u: truncating escaped value from %#x to %#x\n", scan_filename, scan_linenr, val, val & PDP10_UCHAR_MAX);
+	    return val & PDP10_UCHAR_MAX;
+	}
+	break;
+    }
+    case EOF:
+	break;
+    default:
+	if (is_octal_digit(ch)) {
+	    unsigned int val = ch - '0';
+	    ch = scan_getchar();
+	    if (is_octal_digit(ch)) {
+		val = val * 8 + (ch - '0');
+		ch = scan_getchar();
+		if (is_octal_digit(ch))
+		    val = val * 8 + (ch - '0');
+		else
+		    scan_ungetc(ch);
+	    } else
+		scan_ungetc(ch);
+	    if (val > PDP10_UCHAR_MAX)
+		fprintf(stderr, "as: %s, line %u: truncating escaped value from %#x to %#x\n", scan_filename, scan_linenr, val, val & PDP10_UCHAR_MAX);
+	    return val & PDP10_UCHAR_MAX;
+	}
+	break;
+    }
+    badchar(ch, "in \\ character escape sequence");
+    return ch;
+}
+
+static enum token do_char(union token_attribute *token_attr)
+{
+    int ch;
+
+    ch = scan_getchar();
+    switch (ch) {
+    case '\\':
+	ch = do_escape();
+	if (ch == EOF)
+	    return T_ERROR;
+	break;
+    case '\'':
+    case EOF:
+	badchar(ch, "in character literal");
+	return T_ERROR;
+    default:
+	break;
+    }
+    token_attr->uint = ch;
+    ch = scan_getchar();
+    if (ch != '\'') {
+	badchar(ch, "after character literal");
+	return T_ERROR;
+    }
+    return T_UINTEGER;
+}
+
+/* XXX: strings should be sequences of uint9_t not sequences of unsigned char */
+
+static enum token do_string(union token_attribute *token_attr, struct charbuf *charbuf)
+{
+    int ch;
+
+    for (;;) {
+	ch = scan_getchar();
+	switch (ch) {
+	case '"':
+	    token_attr->text = charbuf_string(charbuf);
+	    return T_STRING;
+	case '\\':
+	    ch = do_escape();
+	    if (ch == EOF)
+		return T_ERROR;
+	    break;
+	case EOF:
+	case '\n':
+	    badchar(ch, "in string literal");
+	    if (ch == '\n')
+		++scan_linenr;
+	    return T_ERROR;
+	default:
+	    break;
+	}
+	charbuf_append(charbuf, ch);
+    }
+}
+
+static const struct {
+    enum token token;
+    const char *name;
+} directives[] = {
+    { T_DOT_ALIGN, ".align" },
+    { T_DOT_ASCII, ".ascii" },
+    { T_DOT_ASCIZ, ".asciz" },
+    { T_DOT_BALIGN, ".balign" },
+    { T_DOT_BSS, ".bss" },
+    { T_DOT_BYTE, ".byte" },
+    { T_DOT_COMM, ".comm" },
+    { T_DOT_DATA, ".data" },
+    { T_DOT_FILE, ".file" },
+    { T_DOT_GLOBL, ".globl" },
+    { T_DOT_HIDDEN, ".hidden" },
+    { T_DOT_IDENT, ".ident" },
+    { T_DOT_INTERNAL, ".internal" },
+    { T_DOT_LOCAL, ".local" },
+    { T_DOT_LONG, ".long" },
+    { T_DOT_ORG, ".org" },
+    { T_DOT_P2ALIGN, ".p2align" },
+    { T_DOT_POPSECTION, ".popsection" },
+    { T_DOT_PREVIOUS, ".previous" },
+    { T_DOT_PROTECTED, ".protected" },
+    { T_DOT_PUSHSECTION, ".pushsection" },
+    { T_DOT_RODATA, ".rodata" },
+    { T_DOT_SECTION, ".section" },
+    { T_DOT_SET, ".set" },
+    { T_DOT_SHORT, ".short" },
+    { T_DOT_SIZE, ".size" },
+    { T_DOT_SUBSECTION, ".subsection" },
+    { T_DOT_SYMVER, ".symver" },
+    { T_DOT_TEXT, ".text" },
+    { T_DOT_TYPE, ".type" },
+    { T_DOT_WEAK, ".weak" },
+    { T_DOT_WEAKREF, ".weakref" },
+};
+
+static enum token mk_symbol(union token_attribute *token_attr, const struct charbuf *charbuf)
+{
+    if (charbuf->head.buf[0] == '.') {			/* check for <.directive> */
+	unsigned int low, high;
+
+	low = 0;
+	high = sizeof directives / sizeof directives[0];
+
+	while (low < high) {
+	    unsigned int middle;
+	    int cmp;
+
+	    middle = (low + high) / 2;
+	    cmp = charbuf_strcmp(charbuf, directives[middle].name);
+
+	    if (cmp < 0)
+		high = middle;
+	    else if (cmp > 0)
+		low = middle + 1;
+	    else
+		return directives[middle].token;
+	}
+    } else if (charbuf->head.buf[0] == '$'
+	       && charbuf->head.next == NULL) {		/* check for $<reg> */
+	if (charbuf->pos == 2
+	    && is_decimal_digit(charbuf->head.buf[1])) {
+	    token_attr->uint = charbuf->head.buf[1] - '0';
+	    return T_REGISTER;
+	} else if (charbuf->pos == 3) {
+	    if (is_decimal_digit(charbuf->head.buf[1])
+		&& is_decimal_digit(charbuf->head.buf[2])) {
+		unsigned int val;
+
+		val = (charbuf->head.buf[1] - '0') * 10 + (charbuf->head.buf[2] - '0');
+		if (val < 16) {
+		    token_attr->uint = val;
+		    return T_REGISTER;
+		}
+	    } else if (charbuf->head.buf[1] == 's'
+		       && charbuf->head.buf[2] == 'p') {
+		token_attr->uint = 15;
+		return T_REGISTER;
+	    }
+	}
+    }
+
+    token_attr->text = charbuf_string(charbuf);
+    return T_SYMBOL;
+}
+
+static int is_symbol_internal_char(int ch)
+{
+    return
+	('A' <= ch && ch <= 'Z')
+	|| ('a' <= ch && ch <= 'z')
+	|| ('0' <= ch && ch <= '9')
+	|| ch == '_'
+	|| ch == '$'
+	|| ch == '.';
+}
+
+static enum token do_symbol(union token_attribute *token_attr, int ch, struct charbuf *charbuf)
+{
+    do {
+	charbuf_append(charbuf, ch);
+	ch = scan_getchar();
+    } while (is_symbol_internal_char(ch));
+    scan_ungetc(ch);
+    return mk_symbol(token_attr, charbuf);
+}
+
+static enum token do_number(union token_attribute *token_attr, int ch)
+{
+    unsigned int base, chval;
+    pdp10_uint36_t numval;
+
+    base = (ch == '0') ? 8 : 10;
+    numval = ch - '0';
+
+    ch = scan_getchar();
+    /* handle 0x<first hexdig> */
+    if (ch == 'x' || ch == 'X') {
+	base = 16;
+	/* must have at least one hex digit after 0x */
+	ch = scan_getchar();
+	chval = get_chval(ch);
+	if (chval <= 15)
+	    numval = chval;
+	else {
+	    badchar(ch, "after 0x in hexadecimal literal");
+	    return T_ERROR;
+	}
+	ch = scan_getchar();
+    }
+    /* the number is non-empty, consume and accumulate trailing
+       characters as long as they are valid in the base */
+    for (;;) {
+	chval = get_chval(ch);
+	if (chval >= base)
+	    break;
+	numval = numval * base + chval;
+	ch = scan_getchar();
+    }
+    /* check for <local label>{b,f} */
+    if (base <= 10 && (ch == 'b' || ch == 'f')) {
+	/* represent the local label + direction in sign-magnitude with
+	   the sign in the least significant bit; using sign-magnitude
+	   allows to distinguish 0f from 0b (i.e., +0 from -0); storing
+	   the sign in the least significant bit makes us independent of
+	   word size */
+	token_attr->uint = (numval << 1) | (ch == 'f' ? 1 : 0);
+	return T_LOCAL_LABEL;
+    }
+    /* plain integer literal */
+    scan_ungetc(ch);
+    token_attr->uint = numval;
+    return T_UINTEGER;
+}
+
+static enum token do_eq(void)
+{
+    int ch;
+
+    ch = scan_getchar();
+    switch (ch) {
+    case '=':
+	return T_EQEQ;
+    default:
+	scan_ungetc(ch);
+	return T_EQ;
+    }
+}
+
+static enum token do_ampersand(void)
+{
+    int ch;
+
+    ch = scan_getchar();
+    switch (ch) {
+    case '&':
+	return T_ANDAND;
+    default:
+	scan_ungetc(ch);
+	return T_AND;
+    }
+}
+
+static enum token do_bar(void)
+{
+    int ch;
+
+    ch = scan_getchar();
+    switch (ch) {
+    case '|':
+	return T_OROR;
+    default:
+	scan_ungetc(ch);
+	return T_OR;
+    }
+}
+
+static enum token do_gt(void)
+{
+    int ch;
+
+    ch = scan_getchar();
+    switch (ch) {
+    case '>':
+	return T_RSHIFT;
+    case '=':
+	return T_GE;
+    default:
+	scan_ungetc(ch);
+	return T_GT;
+    }
+}
+
+static enum token do_lt(void)
+{
+    int ch;
+
+    ch = scan_getchar();
+    switch (ch) {
+    case '<':
+	return T_LSHIFT;
+    case '=':
+	return T_LE;
+    case '>':	/* <> is the same as != */
+	return T_NEQ;
+    default:
+	scan_ungetc(ch);
+	return T_LT;
+    }
+}
+
+static enum token do_c_comment(void)
+{
+    int ch;
+
+    for (;;) {
+	ch = scan_getchar();
+	switch (ch) {
+	case EOF:
+	    badchar(ch, "in /**/-style comment");
+	    return T_ERROR;
+	case '*':
+	    for (;;) {
+		ch = scan_getchar();
+		switch (ch) {
+		case '*':
+		    continue;
+		case '/':
+		    return T_EOF;	/* fake token for a C comment */
+		case EOF:
+		    badchar(ch, "in /**/-style comment");
+		    return T_ERROR;
+		case '\n':
+		    ++scan_linenr;
+		    /*FALLTHROUGH*/
+		default:
+		    break;
+		}
+		break;
+	    }
+	    continue;
+	case '\n':
+	    ++scan_linenr;
+	    /*FALLTHROUGH*/
+	default:
+	    continue;
+	}
+    }
+}
+
+static enum token do_slash(void)
+{
+    int ch;
+
+    ch = scan_getchar();
+    switch (ch) {
+    case '*':
+	return do_c_comment();
+    default:
+	scan_ungetc(ch);
+	return T_DIV;
+    }
+}
+
+static enum token do_bang(void)
+{
+    int ch;
+
+    ch = scan_getchar();
+    switch (ch) {
+    case '=':
+	return T_NEQ;
+    default:
+	scan_ungetc(ch);
+	return T_BANG;
+    }
+}
+
+static int do_line_comment(void)
+{
+    int ch;
+
+    for (;;) {
+	ch = scan_getchar();
+	switch (ch) {
+	case '\n':
+	    ++scan_linenr;
+	    return 0;
+	case EOF:
+	    badchar(ch, "in line comment");
+	    return -1;
+	default:
+	    continue;
+	}
+    }
+}
+
+static enum token do_scan(union token_attribute *token_attr, struct charbuf *charbuf)
+{
+    int ch;
+
+    ch = scan_getchar();
+
+    for (;; ch = scan_getchar()) {
+	switch (ch) {
+	case ' ':
+	case '\t':
+	case '\r':
+	case '\f':
+	    continue;
+	case '\n':
+	    ++scan_linenr;
+	    return T_NEWLINE;
+	case '#':
+	    if (do_line_comment() != 0)
+		return T_ERROR;
+	    return T_NEWLINE;
+	case ';':
+	    return T_NEWLINE;
+	case EOF:
+	    return T_EOF;
+	case '@':
+	    return T_AT;
+	case ':':
+	    return T_COLON;
+	case ',':
+	    return T_COMMA;
+	case '(':
+	    return T_LPAREN;
+	case ')':
+	    return T_RPAREN;
+	case '~':
+	    return T_TILDE;
+	case '*':
+	    return T_MUL;
+	case '/':	/* "/""*", "/" */
+	    switch (do_slash()) {
+	    case T_DIV:
+		return T_DIV;
+	    case T_EOF:	/* fake token for a C comment */
+		continue;
+	    default:	/* error, eof in comment */
+		return T_ERROR;
+	    }
+	case '%':
+	    return T_REM;
+	case '<':	/* <<, <=, < */
+	    return do_lt();
+	case '>':	/* >>, >=, > */
+	    return do_gt();
+	case '|':	/* ||, | */
+	    return do_bar();
+	case '&':	/* &&, & */
+	    return do_ampersand();
+	case '^':
+	    return T_CARET;
+	case '!':	/* !=, ! */
+	    return do_bang();
+	case '+':
+	    return T_PLUS;
+	case '-':
+	    return T_MINUS;
+	case '=':	/* ==, = */
+	    return do_eq();
+	case '"':
+	    return do_string(token_attr, charbuf);
+	case '\'':
+	    return do_char(token_attr);
+	case '.':
+	    /* Dot may start a floating point literal, but tests show that
+	       gcc always outputs floating point values as integer literals,
+	       so we shouldn't have to support floating point literals at all.  */
+	case '$':
+	case '_':
+	    return do_symbol(token_attr, ch, charbuf);
+	default:
+	    if ('0' <= ch && ch <= '9')	/* number or <decimal>{b,f} */
+		return do_number(token_attr, ch);
+	    if (('A' <= ch && ch <= 'Z') ||
+		('a' <= ch && ch <= 'z'))
+		return do_symbol(token_attr, ch, charbuf);
+	}
+	badchar(ch, "");
+	return T_ERROR;
+    }
+}
+
+enum token scan(union token_attribute *token_attr)
+{
+    struct charbuf charbuf;
+    enum token token;
+
+    charbuf_init(&charbuf);
+    token = do_scan(token_attr, &charbuf);
+    charbuf_fini(&charbuf);
+
+    return token;
+}
--- a/as/scan.h
+++ b/as/scan.h
@@ -0,0 +1,15 @@
+/*
+ * scan.h
+ */
+#ifndef SCAN_H
+#define SCAN_H
+
+#include "token.h"
+
+const char *scan_filename;
+int scan_freopen(const char *filename);
+
+unsigned int scan_linenr;
+enum token scan(union token_attribute *token_attr);
+
+#endif /* SCAN_H */
--- a/as/section.c
+++ b/as/section.c
@@ -0,0 +1,59 @@
+/*
+ * section.c
+ */
+#include <string.h>
+#include "emalloc.h"
+#include "htab.h"
+#include "section.h"
+
+static struct section *section_from_hnode(const struct hnode *hnode)
+{
+    /* hnode is first in section, so no need to mess with offsetof() */
+    return (struct section*)hnode;
+}
+
+static struct htab section_htab;
+
+void section_init(void)
+{
+    htab_init(&section_htab, 8, NULL);
+}
+
+struct section *section_enter(const struct strnode *strnode)
+{
+    struct section *section;
+
+    section = section_from_hnode(htab_lookup(&section_htab, (uintptr_t)strnode, NULL));
+
+    if (section == NULL) {
+	section = emalloc(sizeof *section);
+	memset(section, '\0', sizeof *section);
+	section->hnode.hval = (uintptr_t)strnode;
+	htab_init(&section->subsects, 4, NULL);
+	htab_insert(&section_htab, &section->hnode);
+    }
+
+    return section;
+}
+
+static struct subsection *subsection_from_hnode(const struct hnode *hnode)
+{
+    /* hnode is first in subsection, so no need to mess with offsetof() */
+    return (struct subsection*)hnode;
+}
+
+struct subsection *subsection_enter(struct section *section, int subsectnr)
+{
+    struct subsection *subsection;
+
+    subsection = subsection_from_hnode(htab_lookup(&section->subsects, (uintptr_t)subsectnr, NULL));
+
+    if (subsection == NULL) {
+	subsection = emalloc(sizeof *subsection);
+	subsection->hnode.hval = (uintptr_t)subsectnr;
+	subsection->stmts = arrlst_alloc(sizeof(struct stmt));
+	htab_insert(&section->subsects, &subsection->hnode);
+    }
+
+    return subsection;
+}
--- a/as/section.h
+++ b/as/section.h
@@ -0,0 +1,35 @@
+/*
+ * section.h
+ */
+#ifndef SECTION_H
+#define SECTION_H
+
+#include "pdp10-elf36.h"
+
+#include "arrlst.h"
+#include "htab.h"
+#include "stmt.h"
+#include "strtab.h"
+
+struct subsection {
+    struct hnode hnode;	/* hnode.hval == subsect nr */
+    struct arrlst *stmts;
+};
+
+struct section {
+    struct hnode hnode;	/* hnode.hval == struct strnode* */
+    Elf36_Shdr e_shdr;
+    struct htab subsects;
+    const struct strnode *groupname;
+    const struct strnode *linkage;
+    unsigned int dot;	/* Elf36_Off? */
+};
+
+#define SECTION_ABS	((struct section*)0)
+#define SECTION_UNDEF	((struct section*)1)
+
+void section_init(void);
+struct section *section_enter(const struct strnode *strnode);
+struct subsection *subsection_enter(struct section *section, int subsectnr);
+
+#endif /* SECTION_H */
--- a/as/stmt.h
+++ b/as/stmt.h
@@ -0,0 +1,122 @@
+/*
+ * stmt.h
+ */
+#ifndef STMT_H
+#define STMT_H
+
+#include "pdp10-elf36.h"
+#include "expr.h"
+#include "strtab.h"
+
+struct expr_list {
+    struct expr *expr;
+    struct expr_list *next;
+};
+
+struct string_list {
+    const struct strnode *string;
+    struct string_list *next;
+};
+
+enum stmt_tag {
+    /* directives */
+    S_ALIGN,		/* .align, .balign, and .p2align map to this */
+    S_ASCII,
+    S_ASCIZ,
+    S_BYTE,
+    S_COMM,
+    S_FILE,
+    S_GLOBL,
+    S_HIDDEN,
+    S_IDENT,
+    S_INTERNAL,
+    S_LOCAL,
+    S_LONG,
+    S_ORG,
+    S_POPSECTION,	/* no attribute */
+    S_PREVIOUS,		/* no attribute */
+    S_PROTECTED,
+    S_PUSHSECTION,
+    S_SECTION,		/* .bss, .data, .rodata, and .text also map to this */
+    S_SET,
+    S_SHORT,
+    S_SIZE,
+    S_SUBSECTION,
+    S_SYMVER,
+    S_TYPE,
+    S_WEAK,
+    S_WEAKREF,
+    /* non-directives */
+    S_LABEL,
+    S_INSN,
+};
+
+struct stmt {
+    enum stmt_tag tag;
+    union {
+	struct {
+	    unsigned char flags;	/* p2 vs b, none/w/l */
+	    struct expr *balign;
+	    struct expr *fill;
+	    struct expr *maxskip;
+	} s_align;
+	struct {
+	    struct string_list *list;
+	} s_string_list;
+	struct {
+	    struct expr_list *list;
+	} s_expr_list;
+	struct {
+	    const struct strnode *name;
+	    struct expr *length;
+	    struct expr *balign;
+	} s_comm;
+	struct {
+	    const struct strnode *string;
+	} s_string;
+	struct {
+	    struct expr *newlc;
+	    struct expr *fill;
+	} s_org;
+	struct {
+	    const struct strnode *name;
+	    struct expr *subsectnr;
+	    Elf36_Word sh_flags;
+	    Elf36_Word sh_type;
+	    struct expr *sh_entsize;
+	    const struct strnode *groupname;
+	    const struct strnode *linkage;
+	} s_section;
+	struct {
+	    const struct strnode *name;
+	    struct expr *expr;
+	} s_setsize;
+	struct {
+	    struct expr *expr;
+	} s_subsection;
+	struct {
+	    const struct strnode *name1;
+	    const struct strnode *name2;
+	    const struct strnode *name3;
+	    unsigned char nrats;	/* 1, 2, or 3 */
+	} s_symver;
+	struct {
+	    const struct strnode *name;
+	    unsigned char st_type;
+	} s_type;
+	struct {
+	    const struct strnode *alias;
+	    const struct strnode *target;
+	} s_weakref;
+	struct {
+	    const struct strnode *name;
+	    unsigned int accumulator;
+	    int at;
+	    struct expr *expr;
+	    unsigned int indexreg;
+	} s_insn;
+
+    } u;
+};
+
+#endif /* STMT_H */
--- a/as/strtab.c
+++ b/as/strtab.c
@@ -0,0 +1,65 @@
+/*
+ * strtab.c
+ */
+#include <string.h>
+#include "emalloc.h"
+#include "htab.h"
+#include "strtab.h"
+
+static struct strnode *strnode_from_hnode(const struct hnode *hnode)
+{
+    /* hnode is first in strnode, so no need to mess with offsetof() */
+    return (struct strnode*)hnode;
+}
+
+static int strtab_cmpfn(const struct hnode *hnode, const void *data)
+{
+    const struct strnode *strnode = strnode_from_hnode(hnode);
+    const char *string = data;
+
+    return strcmp(strnode->string, string);
+}
+
+static struct htab strtab_htab;
+
+void strtab_init(void)
+{
+    htab_init(&strtab_htab, 64, strtab_cmpfn);
+}
+
+static uintptr_t strtab_hash(const char *string)
+{
+    const unsigned char *s;
+    uintptr_t h;
+    unsigned char c;
+
+    s = (const unsigned char*)string;
+    h = 0;
+
+    for (;;) {
+	c = *s++;
+	if (c == '\0')
+	    break;
+	h = (h << 5) + h + c;
+    }
+
+    return h;
+}
+
+const struct strnode *strtab_enter(const char *string)
+{
+    uintptr_t hval;
+    struct strnode *strnode;
+
+    hval = strtab_hash(string);
+    strnode = strnode_from_hnode(htab_lookup(&strtab_htab, hval, string));
+
+    if (strnode == NULL) {
+	strnode = emalloc(offsetof(struct strnode, string) + strlen(string) + 1);
+	strnode->hnode.hval = hval;
+	strcpy(strnode->string, string);
+	htab_insert(&strtab_htab, &strnode->hnode);
+    }
+
+    return strnode;
+}
--- a/as/strtab.h
+++ b/as/strtab.h
@@ -0,0 +1,17 @@
+/*
+ * strtab.h
+ */
+#ifndef STRTAB_H
+#define STRTAB_H
+
+#include "htab.h"
+
+struct strnode {
+    struct hnode hnode;
+    char string[];
+};
+
+void strtab_init(void);
+const struct strnode *strtab_enter(const char *string);
+
+#endif /* STRTAB */
--- a/as/token.c
+++ b/as/token.c
@@ -0,0 +1,54 @@
+/*
+ * token.c
+ */
+#include <stdio.h>
+#include "pdp10-inttypes.h"
+#include "token.h"
+
+enum {
+    FMT_NONE = 0,
+    FMT_UINT = 1,
+    FMT_SYMBOL = 2,
+    FMT_STRING = 3,
+};
+
+struct token_info {
+    char print_name[15];
+    unsigned char attribute_fmt;
+};
+
+static const struct token_info token_info[] = {
+#define TOKEN(T,P,F) { P, F },
+#include "token.def"
+#undef TOKEN
+};
+
+void token_print(FILE *fp, enum token token, const union token_attribute *token_attr)
+{
+    const struct token_info *ti;
+
+    if (token >= sizeof token_info / sizeof token_info[0]) {
+	fprintf(fp, "<invalid token %u>", token);
+	return;
+    }
+
+    ti = &token_info[token];
+    fprintf(fp, "%.*s", (int) sizeof ti->print_name, ti->print_name);
+
+    if (!token_attr)
+	return;
+
+    switch (ti->attribute_fmt) {
+    case FMT_UINT:
+	fprintf(fp, " [%" PDP10_PRIu36 "u]", token_attr->uint);
+	break;
+    case FMT_SYMBOL:
+	fprintf(fp, " [%s]", token_attr->text);
+	break;
+    case FMT_STRING:
+	fprintf(fp, " [\"%s\"]", token_attr->text);
+	break;
+    default:
+	break;
+    }
+}
--- a/as/token.def
+++ b/as/token.def
@@ -0,0 +1,77 @@
+/*
+ * token.def
+ *
+ * TOKEN(T_<name>, <print name>, <attribute fmt>)
+ */
+
+/* directives */
+TOKEN(T_DOT_ALIGN, ".align", FMT_NONE)
+TOKEN(T_DOT_ASCII, ".ascii", FMT_NONE)
+TOKEN(T_DOT_ASCIZ, ".asciz", FMT_NONE)
+TOKEN(T_DOT_BALIGN, ".balign", FMT_NONE)
+TOKEN(T_DOT_BSS, ".bss", FMT_NONE)
+TOKEN(T_DOT_BYTE, ".byte", FMT_NONE)
+TOKEN(T_DOT_COMM, ".comm", FMT_NONE)
+TOKEN(T_DOT_DATA, ".data", FMT_NONE)
+TOKEN(T_DOT_FILE, ".file", FMT_NONE)
+TOKEN(T_DOT_GLOBL, ".globl", FMT_NONE)
+TOKEN(T_DOT_HIDDEN, ".hidden", FMT_NONE)
+TOKEN(T_DOT_IDENT, ".ident", FMT_NONE)
+TOKEN(T_DOT_INTERNAL, ".internal", FMT_NONE)
+TOKEN(T_DOT_LOCAL, ".local", FMT_NONE)
+TOKEN(T_DOT_LONG, ".long", FMT_NONE)
+TOKEN(T_DOT_ORG, ".org", FMT_NONE)
+TOKEN(T_DOT_P2ALIGN, ".p2align", FMT_NONE)
+TOKEN(T_DOT_POPSECTION, ".popsection", FMT_NONE)
+TOKEN(T_DOT_PREVIOUS, ".previous", FMT_NONE)
+TOKEN(T_DOT_PROTECTED, ".protected", FMT_NONE)
+TOKEN(T_DOT_PUSHSECTION, ".pushsection", FMT_NONE)
+TOKEN(T_DOT_RODATA, ".rodata", FMT_NONE)
+TOKEN(T_DOT_SECTION, ".section", FMT_NONE)
+TOKEN(T_DOT_SET, ".set", FMT_NONE)
+TOKEN(T_DOT_SHORT, ".short", FMT_NONE)
+TOKEN(T_DOT_SIZE, ".size", FMT_NONE)
+TOKEN(T_DOT_SUBSECTION, ".subsection", FMT_NONE)
+TOKEN(T_DOT_SYMVER, ".symver", FMT_NONE)
+TOKEN(T_DOT_TEXT, ".text", FMT_NONE)
+TOKEN(T_DOT_TYPE, ".type", FMT_NONE)
+TOKEN(T_DOT_WEAK, ".weak", FMT_NONE)
+TOKEN(T_DOT_WEAKREF, ".weakref", FMT_NONE)
+/* other symbols */
+TOKEN(T_REGISTER, "<register>", FMT_UINT)
+TOKEN(T_SYMBOL, "<symbol>", FMT_SYMBOL)
+TOKEN(T_LOCAL_LABEL, "<local label>", FMT_UINT)	/* 1f, 2b */
+TOKEN(T_AT, "@", FMT_NONE)
+TOKEN(T_COLON, ":", FMT_NONE)
+/* literals */
+TOKEN(T_UINTEGER, "<integer>", FMT_UINT)
+TOKEN(T_STRING, "<string>", FMT_STRING)
+/* operators, separators */
+TOKEN(T_COMMA, ",", FMT_NONE)
+TOKEN(T_LPAREN, "(", FMT_NONE)
+TOKEN(T_RPAREN, ")", FMT_NONE)
+TOKEN(T_TILDE, "~", FMT_NONE)
+TOKEN(T_MUL, "*", FMT_NONE)
+TOKEN(T_DIV, "/", FMT_NONE)
+TOKEN(T_REM, "%", FMT_NONE)
+TOKEN(T_LSHIFT, "<<", FMT_NONE)
+TOKEN(T_RSHIFT, ">>", FMT_NONE)
+TOKEN(T_OR, "|", FMT_NONE)
+TOKEN(T_AND, "&", FMT_NONE)
+TOKEN(T_CARET, "^", FMT_NONE)
+TOKEN(T_BANG, "!", FMT_NONE)
+TOKEN(T_PLUS, "+", FMT_NONE)
+TOKEN(T_MINUS, "-", FMT_NONE)
+TOKEN(T_EQ, "=", FMT_NONE)
+TOKEN(T_EQEQ, "==", FMT_NONE)
+TOKEN(T_NEQ, "!=", FMT_NONE)
+TOKEN(T_LT, "<", FMT_NONE)
+TOKEN(T_GT, ">", FMT_NONE)
+TOKEN(T_GE, ">=", FMT_NONE)
+TOKEN(T_LE, "<=", FMT_NONE)
+TOKEN(T_ANDAND, "&&", FMT_NONE)
+TOKEN(T_OROR, "||", FMT_NONE)
+/* misc */
+TOKEN(T_NEWLINE, "<newline>", FMT_NONE)
+TOKEN(T_EOF, "<eof>", FMT_NONE)
+TOKEN(T_ERROR, "<error>", FMT_NONE)
--- a/as/token.h
+++ b/as/token.h
@@ -0,0 +1,22 @@
+/*
+ * token.h
+ */
+#ifndef TOKEN_H
+#define TOKEN_H
+
+#include "pdp10-stdint.h"
+
+enum token {
+#define TOKEN(T,P,F)	T,
+#include "token.def"
+#undef TOKEN
+};
+
+union token_attribute {
+    const char *text;		/* symbol, string */
+    pdp10_uint36_t uint;	/* uinteger */
+};
+
+void token_print(FILE *fp, enum token token, const union token_attribute *token_attr);
+
+#endif /* TOKEN_H */
--- a/include/pdp10-arith.h
+++ b/include/pdp10-arith.h
@@ -0,0 +1,134 @@
+/*
+ * pdp10-arith.h
+ *
+ * Provide functions for performing arithmetic operations on PDP10 integer types.
+ * Currently only 36-bit signed operations are supported.
+ */
+#ifndef PDP10_ARITH_H
+#define PDP10_ARITH_H
+
+#include "pdp10-stdint.h"
+
+/* Zero-extend a pdp10_{u,}int36_t to the full width of its representation type.
+ * Use this to prepare operands before unsigned operations, or to correct results
+ * after signed operations.
+ */
+static inline pdp10_uint36_t pdp10_zext_uint36(pdp10_uint36_t x)
+{
+    return x & PDP10_UINT36_MAX;
+}
+
+/* Sign-extend a pdp10_int36_t to the full width of its representation type.
+ * Use this to prepare operands before signed operations.
+ *
+ * Based on the following trick for sign-extending an octet x: ((x & 0xff) ^ 0x80) - 0x80,
+ * c.f. <http://sourceware.org/ml/binutils/2001-05/msg00093.html>.
+ */
+static inline pdp10_int36_t pdp10_sext_int36(pdp10_uint36_t x)
+{
+    const pdp10_uint36_t PDP10_UINT36_SBIT = ~(PDP10_UINT36_MAX >> 1) & PDP10_UINT36_MAX;
+
+    return ((x & PDP10_UINT36_MAX) ^ PDP10_UINT36_SBIT) - PDP10_UINT36_SBIT;
+}
+
+static inline pdp10_uint36_t pdp10_neg_int36(pdp10_uint36_t x)
+{
+    return pdp10_zext_uint36(-pdp10_sext_int36(x));
+}
+
+static inline pdp10_uint36_t pdp10_not_int36(pdp10_uint36_t x)
+{
+    return pdp10_zext_uint36(~pdp10_sext_int36(x));
+}
+
+static inline pdp10_uint36_t pdp10_add_int36(pdp10_uint36_t x, pdp10_uint36_t y)
+{
+    return pdp10_zext_uint36(pdp10_sext_int36(x) + pdp10_sext_int36(y));
+}
+
+static inline pdp10_uint36_t pdp10_sub_int36(pdp10_uint36_t x, pdp10_uint36_t y)
+{
+    return pdp10_zext_uint36(pdp10_sext_int36(x) - pdp10_sext_int36(y));
+}
+
+static inline pdp10_uint36_t pdp10_mul_int36(pdp10_uint36_t x, pdp10_uint36_t y)
+{
+    return pdp10_zext_uint36(pdp10_sext_int36(x) * pdp10_sext_int36(y));
+}
+
+static inline pdp10_uint36_t pdp10_div_int36(pdp10_uint36_t x, pdp10_uint36_t y)
+{
+    return pdp10_zext_uint36(pdp10_sext_int36(x) / pdp10_sext_int36(y));
+}
+
+static inline pdp10_uint36_t pdp10_rem_int36(pdp10_uint36_t x, pdp10_uint36_t y)
+{
+    return pdp10_zext_uint36(pdp10_sext_int36(x) % pdp10_sext_int36(y));
+}
+
+static inline pdp10_uint36_t pdp10_lsl_int36(pdp10_uint36_t x, pdp10_uint36_t y)
+{
+    return pdp10_zext_uint36(pdp10_zext_uint36(x) << pdp10_zext_uint36(y));
+}
+
+static inline pdp10_uint36_t pdp10_lsr_int36(pdp10_uint36_t x, pdp10_uint36_t y)
+{
+    return pdp10_zext_uint36(pdp10_zext_uint36(x) >> pdp10_zext_uint36(y));
+}
+
+static inline pdp10_uint36_t pdp10_asr_int36(pdp10_uint36_t x, pdp10_uint36_t y)
+{
+    return pdp10_zext_uint36(pdp10_sext_int36(x) >> pdp10_zext_uint36(y));
+}
+
+static inline pdp10_uint36_t pdp10_or_int36(pdp10_uint36_t x, pdp10_uint36_t y)
+{
+    return (pdp10_zext_uint36(x) | pdp10_zext_uint36(y));
+}
+
+static inline pdp10_uint36_t pdp10_and_int36(pdp10_uint36_t x, pdp10_uint36_t y)
+{
+    return (pdp10_zext_uint36(x) & pdp10_zext_uint36(y));
+}
+
+static inline pdp10_uint36_t pdp10_xor_int36(pdp10_uint36_t x, pdp10_uint36_t y)
+{
+    return (pdp10_zext_uint36(x) ^ pdp10_zext_uint36(y));
+}
+
+static inline int pdp10_eq_int36(pdp10_uint36_t x, pdp10_uint36_t y)
+{
+    return pdp10_zext_uint36(x) == pdp10_zext_uint36(y);
+}
+
+static inline int pdp10_ne_int36(pdp10_uint36_t x, pdp10_uint36_t y)
+{
+    return pdp10_zext_uint36(x) != pdp10_zext_uint36(y);
+}
+
+static inline int pdp10_lt_int36(pdp10_uint36_t x, pdp10_uint36_t y)
+{
+    return pdp10_sext_int36(x) < pdp10_sext_int36(y);
+}
+
+static inline int pdp10_gt_int36(pdp10_uint36_t x, pdp10_uint36_t y)
+{
+    return pdp10_sext_int36(x) > pdp10_sext_int36(y);
+}
+
+static inline int pdp10_ge_int36(pdp10_uint36_t x, pdp10_uint36_t y)
+{
+    return pdp10_sext_int36(x) >= pdp10_sext_int36(y);
+}
+
+static inline int pdp10_le_int36(pdp10_uint36_t x, pdp10_uint36_t y)
+{
+    return pdp10_sext_int36(x) <= pdp10_sext_int36(y);
+}
+
+static inline int pdp10_nonzero_int36(pdp10_uint36_t x)
+{
+    return pdp10_zext_uint36(x) != 0;
+}
+
+#endif /* PDP10_ARITH_H */
--- a/include/pdp10-elf36.h
+++ b/include/pdp10-elf36.h
--- a/include/pdp10-extint.h
+++ b/include/pdp10-extint.h
@@ -0,0 +1,28 @@
+/*
+ * pdp10-extint.h
+ *
+ * Provide types and procedures for converting 18 and 36-bit integers
+ * to and from arrays of 9-bit bytes (nonets).  Use these together with
+ * pdp10_fread() and pdp10_fwrite() to convert 18 and 36-bit integers
+ * between host-level and file-level binary representations.
+ */
+#ifndef PDP10_EXTINT_H
+#define PDP10_EXTINT_H
+
+#include "pdp10-stdint.h"
+
+typedef struct {
+    pdp10_uint9_t x[2];
+} pdp10_ext_uint18_t;
+
+void pdp10_uint18_to_ext(pdp10_uint18_t val, pdp10_ext_uint18_t *ext);
+pdp10_uint18_t pdp10_uint18_from_ext(const pdp10_ext_uint18_t *ext);
+
+typedef struct {
+    pdp10_uint9_t x[4];
+} pdp10_ext_uint36_t;
+
+void pdp10_uint36_to_ext(pdp10_uint36_t val, pdp10_ext_uint36_t *ext);
+pdp10_uint36_t pdp10_uint36_from_ext(const pdp10_ext_uint36_t *ext);
+
+#endif /* PDP10_EXTINT_H */
--- a/include/pdp10-inttypes.h
+++ b/include/pdp10-inttypes.h
@@ -0,0 +1,38 @@
+/*
+ * pdp10-inttypes.h
+ *
+ * Provide format conversions for 18 and 36-bit integers.
+ * For 9-bit integers, pdp_uint9_t, just use the regular
+ * int-sized d/o/u/x formats.
+ */
+#ifndef PDP10_INTTYPES_H
+#define PDP10_INTTYPES_H
+
+#include <inttypes.h>
+#include "pdp10-stdint.h"
+
+#if defined(UINT18_MAX)
+#define PDP10_PRId18	PRId18
+#define PDP10_PRIo18	PRIo18
+#define PDP10_PRIu18	PRIu18
+#define PDP10_PRIx18	PRIx18
+#else
+#define PDP10_PRId18	PRId32
+#define PDP10_PRIo18	PRIo32
+#define PDP10_PRIu18	PRIu32
+#define PDP10_PRIx18	PRIx32
+#endif
+
+#if defined(UINT36_MAX)
+#define PDP10_PRId36	PRId36
+#define PDP10_PRIo36	PRIo36
+#define PDP10_PRIu36	PRIu36
+#define PDP10_PRIx36	PRIx36
+#else
+#define PDP10_PRId36	PRId64
+#define PDP10_PRIo36	PRIo64
+#define PDP10_PRIu36	PRIu64
+#define PDP10_PRIx36	PRIx64
+#endif
+
+#endif /* PDP10_INTTYPES_H */
--- a/include/pdp10-stdint.h
+++ b/include/pdp10-stdint.h
@@ -0,0 +1,71 @@
+/*
+ * pdp10-stdint.h
+ *
+ * Provide stdint.h-like type names and macros for 9, 18, and 36-bit unsigned
+ * integer types.
+ *
+ * Standard uint<N>_t types must not contain any any extraneous bits, but that
+ * cannot be guaranteed for these 9, 18, and 36-bit types when they are embedded
+ * in larger 16, 32, and 64-bit host types.  For arithmetic on these types, use
+ * the operations provided by pdp10-arith.h.
+ *
+ * Do not use these 18 or 36-bit types for file-level binary data structures,
+ * instead use the pdp10-extint.h and pdp10-stdio.h facilities to explicitly
+ * convert between file-level and host-level binary data structures.
+ */
+#ifndef PDP10_STDINT_H
+#define PDP10_STDINT_H
+
+#include <stdint.h>
+
+#if	defined(UINT9_MAX)
+
+typedef uint9_t			pdp10_uint9_t;
+#define PDP10_UINT9_MAX		UINT9_MAX
+#define PDP10_UINT9_C(c)	UINT9_C(c)
+
+#else	/* !UINT9_MAX */
+
+typedef uint16_t		pdp10_uint9_t;
+#define PDP10_UINT9_MAX		((1U << 9) - 1)
+#define PDP10_UINT9_C(c)	c
+
+#endif	/* !UINT9_MAX */
+
+#if	defined(UINT18_MAX)
+
+typedef uint18_t		pdp10_uint18_t;
+#define PDP10_UINT18_MAX	UINT18_MAX
+#define PDP10_UINT18_C(c)	UINT18_C(c)
+
+#else	/* !UINT18_MAX */
+
+typedef uint32_t		pdp10_uint18_t;
+#define PDP10_UINT18_MAX	((1UL << 18) - 1)
+#define PDP10_UINT18_C(c)	c ## U
+
+#endif	/* !UINT18_MAX */
+
+#if	defined(UINT36_MAX)
+
+typedef uint36_t		pdp10_uint36_t;
+#define PDP10_UINT36_MAX	UINT36_MAX
+#define PDP10_UINT36_C(c)	UINT36_C(c)
+
+typedef int36_t			pdp10_int36_t;
+#define PDP10_INT36_MAX		INT36_MAX
+#define PDP10_INT36_C(c)	INT36_C(c)
+
+#else	/* !UINT36_MAX */
+
+typedef uint64_t		pdp10_uint36_t;
+#define PDP10_UINT36_MAX	((1ULL << 36) - 1)
+#define PDP10_UINT36_C(c)	c ## ULL
+
+typedef int64_t			pdp10_int36_t;
+#define PDP10_INT36_MAX		((1LL << (36 - 1)) - 1)
+#define PDP10_INT36_C(c)	c ## LL
+
+#endif	/* !UINT36_MAX */
+
+#endif /* PDP10_STDINT_H */
--- a/include/pdp10-stdio.h
+++ b/include/pdp10-stdio.h
@@ -0,0 +1,38 @@
+/*
+ * pdp10-stdio.h
+ *
+ * Provide stdio.h-like interface for I/O to and from files with 9-bit logical bytes (nonets),
+ * represented by native files with 8-bit physical bytes (octets).
+ */
+#ifndef PDP10_STDIO_H
+#define PDP10_STDIO_H
+
+#include <stdint.h>
+
+struct pdp10_file;
+typedef struct pdp10_file PDP10_FILE;
+
+/* append modes are not permitted */
+PDP10_FILE *pdp10_fopen(const char *path, const char *mode);
+
+int pdp10_fflush(PDP10_FILE *pdp10fp);
+int pdp10_fclose(PDP10_FILE *pdp10fp);
+int pdp10_fgetc(PDP10_FILE *pdp10fp);	/* returns a nonet, [0-511], or EOF */
+int pdp10_fputc(uint16_t nonet_ch, PDP10_FILE *pdp10fp);
+
+enum {
+    PDP10_SEEK_SET = 0,
+    PDP10_SEEK_CUR = 1,
+    PDP10_SEEK_END = 2,
+};
+int pdp10_fseeko(PDP10_FILE *pdp10fp, off_t offset, int whence);
+
+/* pdp10_fread() and pdp10_fwrite() deliberately only permit transfers of strings
+ * (size == 1), marshalled 9/18/36-bit primitives (nmemb == 1, size == 1, 2, or 4),
+ * or empty objects (size == 0 || nmemb == 0).  To transfer structures, transfer
+ * their primitive fields individually.
+ */
+size_t pdp10_fread(uint16_t *ptr, size_t size, size_t nmemb, PDP10_FILE *pdp10fp);
+size_t pdp10_fwrite(const uint16_t *ptr, size_t size, size_t nmemb, PDP10_FILE *pdp10fp);
+
+#endif /* PDP10_STDIO_H */
--- a/lib/pdp10-extint.c
+++ b/lib/pdp10-extint.c
@@ -0,0 +1,44 @@
+/*
+ * pdp10-extint.c
+ *
+ * Provide types and procedures for converting 18 and 36-bit integers
+ * to and from arrays of 9-bit bytes (nonets).  Use these together with
+ * pdp10_fread() and pdp10_fwrite() to convert 18 and 36-bit integers
+ * between host-level and file-level binary representations.
+ */
+#include "pdp10-extint.h"
+
+/*
+ * The behaviour of the PDP10's byte pointers implies a big-endian storage model,
+ * as does the layout of its 72-bit long integers.
+ */
+
+void pdp10_uint18_to_ext(pdp10_uint18_t val, pdp10_ext_uint18_t *ext)
+{
+    ext->x[0] = (val >> 9) & 0x1FF;
+    ext->x[1] = val & 0x1FF;
+}
+
+pdp10_uint18_t pdp10_uint18_from_ext(const pdp10_ext_uint18_t *ext)
+{
+    return
+	((pdp10_uint18_t)(ext->x[0] & 0x1FF) << 9)
+	| (ext->x[1] & 0x1FF);
+}
+
+void pdp10_uint36_to_ext(pdp10_uint36_t val, pdp10_ext_uint36_t *ext)
+{
+    ext->x[0] = (val >> 27) & 0x1FF;
+    ext->x[1] = (val >> 18) & 0x1FF;
+    ext->x[2] = (val >> 9) & 0x1FF;
+    ext->x[3] = val & 0x1FF;
+}
+
+pdp10_uint36_t pdp10_uint36_from_ext(const pdp10_ext_uint36_t *ext)
+{
+    return
+	((pdp10_uint36_t)(ext->x[0] & 0x1FF) << 27)
+	| ((pdp10_uint36_t)(ext->x[1] & 0x1FF) << 18)
+	| ((pdp10_uint36_t)(ext->x[2] & 0x1FF) << 9)
+	| (ext->x[3] & 0x1FF);
+}
--- a/lib/pdp10-stdio.c
+++ b/lib/pdp10-stdio.c
@@ -0,0 +1,397 @@
+/*
+ * pdp10-stdio.h
+ *
+ * Provide stdio.h-like interface for I/O to and from files with 9-bit logical bytes (nonets),
+ * represented by native files with 8-bit physical bytes (octets).
+ *
+ * Theory of operation:
+ *
+ * - The state of a pdp10 file is composed of: a FILE* for an underlying octet file,
+ *   the current read/write position in the nonet file, a 16-bit shift register buffering
+ *   partial octets (writes) or partial nonets (reads), a counter indicating the number
+ *   of bits in the shift register (which may be negative after a call to pdp10_fseek),
+ *   and a boolean flag indicating if there may be unwritten buffered output.
+ *
+ * - Write streams: pdp10_fputc adds 9 bits to shiftreg and 9 to shiftreg_nr_bits, then each
+ *   complete group of 8 bits in shiftreg is shifted out and written to the octet file.
+ *   Between pdp10_fputc calls shiftreg contains between 0 and 7 bits, inclusive, during a
+ *   pdp10_fputc it may temporarily contain up to 7+9 == 16 bits.
+ *
+ * - Read streams: pdp10_fgetc reads an octet from the octet file and adds 8 bits to shiftreg
+ *   and 8 to shiftreg_nr_bits; this is repeated once more if needed to make shiftreg
+ *   contains at least 9 bits.  Then 9 bits are shifted out of shiftreg and returned.
+ *   Between pdp10_fgetc calls shiftreg contains between 0 and 7 bits, inclusive, during an
+ *   fgetc it may contain up to 8+8 == 16 bits.
+ *
+ * - An output operation (pdp10_fputc or pdp10_fwrite) may not be directly followed by an
+ *   input operation (pdp10_fgetc or pdp10_fread) without an intervening call to pdp10_fflush
+ *   or pdp10_fseeko, and an input operation may not be directly followed by an output
+ *   operation without an intervening call to pdp10_fseeko, unless the input operation
+ *   encountered end-of-file.  (Same restriction as ANSI/ISO C.)
+ *
+ * - A pdp_fseeko repositions the octet file to the closest octet boundary at or before the
+ *   requested nonet boundary, and sets shiftreg_nr_bits to the bit difference, as a number
+ *   between 0 and -7, inclusive.  A subsequent pdp10_fgetc or pdp10_fputc detects this
+ *   special state and reinitializes shiftreg as appropriate for that I/O direction.
+ */
+#include <errno.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include "pdp10-stdio.h"
+
+struct pdp10_file {
+    FILE *octet_fp;
+    off_t nonet_pos;		/* current read or write nonet offset */
+    unsigned int shiftreg;	/* contains 0 to 16 buffered bits */
+    int shiftreg_nr_bits;
+    int writing;		/* non-zero if shiftreg may contain pending output data */
+};
+
+PDP10_FILE *pdp10_fopen(const char *path, const char *mode)
+{
+    PDP10_FILE *pdp10fp;
+
+    /* "a+" won't work, and "a" is not yet implemented */
+    if (mode[0] == 'a') {
+	errno = EINVAL;
+	return NULL;
+    }
+
+    pdp10fp = malloc(sizeof *pdp10fp);
+    if (!pdp10fp)
+	return NULL;
+
+    pdp10fp->octet_fp = fopen(path, mode);
+    if (!pdp10fp->octet_fp) {
+	int oerrno = errno;
+	free(pdp10fp);
+	errno = oerrno;
+	return NULL;
+    }
+
+    pdp10fp->nonet_pos = 0;
+    pdp10fp->shiftreg = 0;
+    pdp10fp->shiftreg_nr_bits = 0;
+    pdp10fp->writing = 0;
+
+    return pdp10fp;
+}
+
+static int pdp10_flush_buffered_write(PDP10_FILE *pdp10fp)
+{
+    int octet_ch;
+
+    if (!pdp10fp->writing)
+	return 0;
+
+    if (pdp10fp->shiftreg_nr_bits <= 0)
+	return 0;
+
+    /* read the next octet which we will partially overwrite */
+    if (fseeko(pdp10fp->octet_fp, 0, SEEK_CUR) == -1)
+	return EOF;
+    octet_ch = fgetc(pdp10fp->octet_fp);
+
+    /* rewind by one octet, or by zero octets if we read EOF above */
+    if (fseeko(pdp10fp->octet_fp, octet_ch == EOF ? 0 : -1, SEEK_CUR) == -1)
+	return EOF;
+
+    if (octet_ch == EOF)
+	octet_ch = 0;
+
+    octet_ch &= (1 << (8 - pdp10fp->shiftreg_nr_bits)) - 1;
+    octet_ch |= (pdp10fp->shiftreg << (8 - pdp10fp->shiftreg_nr_bits)) & 0xFF;
+
+    if (fputc(octet_ch, pdp10fp->octet_fp) == EOF)
+	return EOF;
+
+    /* rewind by one octet to permit further writes; XXX: this is unnecessary
+       when the flush is called from fclose() or fseeko() */
+    if (fseeko(pdp10fp->octet_fp, -1, SEEK_CUR) == -1)
+	return EOF;
+
+    return 0;
+}
+
+int pdp10_fflush(PDP10_FILE *pdp10fp)
+{
+    if (pdp10_flush_buffered_write(pdp10fp) == EOF)
+	return EOF;
+    return fflush(pdp10fp->octet_fp);
+}
+
+int pdp10_fclose(PDP10_FILE *pdp10fp)
+{
+    int status;
+    FILE *octet_fp;
+
+    status = pdp10_flush_buffered_write(pdp10fp);
+    octet_fp = pdp10fp->octet_fp;
+    free(pdp10fp);
+    if (fclose(octet_fp) == EOF)
+	status = EOF;
+    return status;
+}
+
+static int pdp10_fgetc_one_octet(PDP10_FILE *pdp10fp)
+{
+    int octet_ch;
+
+    octet_ch = fgetc(pdp10fp->octet_fp);
+    if (octet_ch == EOF)
+	return -1;	/* incomplete nonets are discarded */
+
+    /* XXX: big-endian conversion */
+    pdp10fp->shiftreg = (pdp10fp->shiftreg << 8) | (octet_ch & 0xFF);
+    pdp10fp->shiftreg_nr_bits += 8;
+
+    return 0;
+}
+
+int pdp10_fgetc(PDP10_FILE *pdp10fp)
+{
+    uint16_t nonet_ch;
+
+    pdp10fp->writing = 0;
+
+    if (pdp10fp->shiftreg_nr_bits < 9) {
+	/*
+	 * There are three cases to consider here:
+	 *
+	 * 1. 1 <= shiftreg_nr_bits <= 8.
+	 *    We have a partially filled nonet in the buffer.
+	 *    We'll read one octet.
+	 *
+	 * 2. shiftreg_nr_bits == 0.
+	 *    The last read took us to a 72-bit boundary, emptying the buffer.
+	 *    We'll read two octets.
+	 *
+	 * 3. -7 <= shiftreg_nr_bits <= -1.
+	 *    An fseek placed octet_pos 1 to 7 bits before nonet_pos.
+	 *    We'll read two octets, but the first -shiftreg_nr_bits
+	 *    bits will be discarded.
+	 *
+	 * Either way we read one or two octets, append them to the buffer,
+	 * and increment shiftreg_nr_bits by the number of bits read.
+	 *
+	 * An EOF during read permits the next operation to be a write, without
+	 * an intervening fflush() or fseeko().  Therefore we must reposition
+	 * octet_pos before nonet_pos if an EOF occurs here.
+	 */
+	if (pdp10_fgetc_one_octet(pdp10fp) < 0
+	    || (pdp10fp->shiftreg_nr_bits < 9
+		&& pdp10_fgetc_one_octet(pdp10fp) < 0)) {
+	    if (pdp10fp->shiftreg_nr_bits > 0) {
+		/* if this fseeko() fails then presumably subsequent fseeko()s
+		   will also fail; if not, then data may not be read or written
+		   where we expect it to be XXX */
+		(void)fseeko(pdp10fp->octet_fp, -1, SEEK_CUR);
+		pdp10fp->shiftreg_nr_bits -= 8;
+	    }
+	    return EOF;
+	}
+    }
+    /* XXX: big-endian conversion */
+    nonet_ch = (pdp10fp->shiftreg >> (pdp10fp->shiftreg_nr_bits - 9)) & 0x1FF;
+    pdp10fp->shiftreg_nr_bits -= 9;
+    pdp10fp->nonet_pos += 1;
+    return nonet_ch;
+}
+
+static int pdp10_fputc_one_octet(PDP10_FILE *pdp10fp)
+{
+    unsigned char rest_bits;
+    unsigned char octet_ch;
+
+    rest_bits = pdp10fp->shiftreg_nr_bits - 8;
+    octet_ch = (pdp10fp->shiftreg >> rest_bits) & 0xFF;
+
+    if (fputc((char)octet_ch, pdp10fp->octet_fp) == EOF)
+	return -1;
+
+    pdp10fp->shiftreg_nr_bits = rest_bits;
+
+    return 0;
+}
+
+int pdp10_fputc(uint16_t nonet_ch, PDP10_FILE *pdp10fp)
+{
+    if (pdp10fp->shiftreg_nr_bits < 0) {
+	int octet_ch;
+
+	/*
+	 * -7 <= shiftreg_nr_bits <= -1.
+	 * An fseek placed octet_pos 1 to 7 bits before nonet_pos.
+	 * We will peek at the octet at octet_pos, and preload shiftreg with the
+	 * -shiftreg_nr_bits high bits from the octet.
+	 */
+
+	/* read the next octet, which we will partially overwrite */
+#if 0	/* XXX: the pdp10_fseek did that already */
+	if (fseeko(pdp10fp->octet_fp, 0, SEEK_CUR) == -1)
+	    return EOF;
+#endif
+	octet_ch = fgetc(pdp10fp->octet_fp);
+
+	/* rewind by one octet, or by zero octets if we read EOF above */
+	if (fseeko(pdp10fp->octet_fp, octet_ch == EOF ? 0 : -1, SEEK_CUR) == -1)
+	    return EOF;
+
+	if (octet_ch == EOF)
+	    octet_ch = 0;
+
+	pdp10fp->shiftreg_nr_bits = -pdp10fp->shiftreg_nr_bits;
+	pdp10fp->shiftreg = (octet_ch & 0xFF) >> (8 - pdp10fp->shiftreg_nr_bits);
+    }
+
+    pdp10fp->writing = 1;
+    pdp10fp->shiftreg = (pdp10fp->shiftreg << 9) | (nonet_ch & 0x1FF);
+    pdp10fp->shiftreg_nr_bits += 9;
+    if (pdp10_fputc_one_octet(pdp10fp) < 0)
+	return EOF;
+    if (pdp10fp->shiftreg_nr_bits == 8
+	&& pdp10_fputc_one_octet(pdp10fp) < 0)
+	return EOF;
+    pdp10fp->nonet_pos += 1;
+    return nonet_ch & 0x1FF;
+}
+
+int pdp10_fseeko(PDP10_FILE *pdp10fp, off_t offset, int whence)
+{
+    off_t octet_pos, nonet_pos;
+
+    if (pdp10_flush_buffered_write(pdp10fp) == EOF)
+	return -1;
+
+    switch (whence) {
+    case PDP10_SEEK_SET:
+	nonet_pos = 0;
+	break;
+    case PDP10_SEEK_CUR:
+	nonet_pos = pdp10fp->nonet_pos;
+	break;
+    case PDP10_SEEK_END:
+	if (fseeko(pdp10fp->octet_fp, 0, SEEK_END) == -1)
+	    return -1;
+	octet_pos = ftello(pdp10fp->octet_fp);
+	if (octet_pos == -1)
+	    return -1;
+
+	/*
+	 * Compute 'nonet_pos = (octet_pos * 8) / 9;' without
+	 * overflowing the intermediate term.
+	 *
+	 * Let octet_pos = A * 9 + B, where A = octet_pos / 9 and B = octet_pos % 9.
+	 *
+	 * (octet_pos * 8) / 9
+	 * == ((A * 9 + B) * 8) / 9
+	 * == (A * 9 * 8 + B * 8) / 9
+	 * == A * 8 + (B * 8) / 9
+	 * == (octet_pos / 9) * 8 + ((octet_pos % 9) * 8) / 9
+	 */
+	nonet_pos = (octet_pos / 9) * 8 + ((octet_pos % 9) * 8) / 9;
+	break;
+    default:
+	errno = EINVAL;
+	return -1;
+    }
+
+    nonet_pos += offset;
+
+    /*
+     * Compute 'octet_pos = (nonet_pos * 9) / 8;' without
+     * overflowing the intermediate term.
+     *
+     * Let nonet_pos = C * 8 + D, where C = nonet_pos / 8 and D = nonet_pos % 8.
+     *
+     * (nonet_pos * 9) / 8
+     * == ((C * 8 + D) * 9) / 8
+     * == (C * 8 * 9 + D * 9) / 8
+     * == C * 9 + (D * 9) / 8
+     * == (nonet_pos / 8) * 9 + ((nonet_pos % 8) * 9) / 8
+     */
+    octet_pos = (nonet_pos / 8) * 9 + ((nonet_pos % 8) * 9) / 8;
+
+    if (fseeko(pdp10fp->octet_fp, octet_pos, SEEK_SET) == -1)
+	return -1;
+
+    pdp10fp->nonet_pos = nonet_pos;
+
+    /*
+     * Now octet_pos will be from 0 to 7 bits before nonet_pos.
+     * Depending on whether the next I/O is a read or a write,
+     * different actions need to be taken.  Set shiftreg_nr_bits
+     * to the negation of the number of "slack" bits to signal
+     * this case.
+     */
+    pdp10fp->shiftreg = 0;
+    pdp10fp->shiftreg_nr_bits = -(nonet_pos % 8);
+    pdp10fp->writing = 0;
+
+    return 0;
+}
+
+/*
+ * On an octet-based host, in-core data structures representing nonet-based
+ * target data will in fact contain oversize octet-based host data.  For
+ * example, 9/18/36-bit target integers are typically stored in 16/32/64-bit
+ * host integers.
+ *
+ * This means that I/O of aggreate structures must be avoided, and instead
+ * be performed on each primitive data field individually, using explicit
+ * marshalling code for multi-nonet primitive data types.
+ *
+ * To detect mistakes in I/O, fread and fwrite only accepts strings (size == 1)
+ * and single marshalled primitive data values (nmemb == 1, size == 1, 2, or 4).
+ */
+static int pdp10_freadwrite_bad_params(size_t size, size_t nmemb)
+{
+    return !(size == 1 || (nmemb == 1 && (size == 2 || size == 4)));
+}
+
+size_t pdp10_fread(uint16_t *ptr, size_t size, size_t nmemb, PDP10_FILE *pdp10fp)
+{
+    size_t i, nr_nonets;
+    int nonet_ch;
+
+    if (size == 0 || nmemb == 0)
+	return nmemb;
+
+    if (pdp10_freadwrite_bad_params(size, nmemb)) {
+	errno = EINVAL;
+	return 0;
+    }
+
+    nr_nonets = size * nmemb;
+
+    for (i = 0; i < nr_nonets; ++i) {
+	nonet_ch = pdp10_fgetc(pdp10fp);
+	if (nonet_ch == EOF)
+	    break;
+	ptr[i] = nonet_ch & 0x1FF;
+    }
+
+    return i / size;
+}
+
+size_t pdp10_fwrite(const uint16_t *ptr, size_t size, size_t nmemb, PDP10_FILE *pdp10fp)
+{
+    size_t i, nr_nonets;
+
+    if (size == 0 || nmemb == 0)
+	return nmemb;
+
+    if (pdp10_freadwrite_bad_params(size, nmemb)) {
+	errno = EINVAL;
+	return 0;
+    }
+
+    nr_nonets = size * nmemb;
+
+    for (i = 0; i < nr_nonets; ++i)
+	if (pdp10_fputc(ptr[i] & 0x1FF, pdp10fp) == EOF)
+	    break;
+
+    return i / size;
+}