diff --git a/as/0LD/htab.c b/as/0LD/htab.c deleted file mode 100644 index e7e1b9e..0000000 --- a/as/0LD/htab.c +++ /dev/null @@ -1,136 +0,0 @@ -/* - * htab.c - */ -#include -#include -#include "emalloc.h" -#include "htab.h" - -static struct hnode **htab_alloc_bucket(unsigned int size) -{ - size_t nrbytes; - struct hnode **bucket; - - nrbytes = size * sizeof(struct hnode*); - bucket = emalloc(nrbytes); - memset(bucket, 0, nrbytes); - return bucket; -} - -void htab_init(struct htab *htab, unsigned int log2size, htab_cmpfn_t cmpfn) -{ - unsigned int size; - - size = 1 << log2size; - htab->log2size = log2size; - htab->mask = size - 1; - htab->used = 0; - htab->cmpfn = cmpfn; - htab->bucket = htab_alloc_bucket(size); -} - -struct hnode *htab_lookup(const struct htab *htab, uintptr_t hval, const void *data) -{ - htab_cmpfn_t cmpfn; - unsigned int i; - struct hnode *hnode; - - cmpfn = htab->cmpfn; - i = hval & htab->mask; - - hnode = htab->bucket[i]; - while (hnode != NULL) { - if (hnode->hval == hval - && (cmpfn == NULL || (*cmpfn)(hnode, data) == 0)) - break; - hnode = hnode->hnext; - } - - return hnode; -} - -static void htab_grow(struct htab *htab) -{ - unsigned int old_size, new_size, new_mask; - struct hnode **old_bucket, **new_bucket; - unsigned int i; - - old_size = 1 << htab->log2size; - htab->log2size += 1; - new_size = 1 << htab->log2size; - new_mask = new_size - 1; - htab->mask = new_mask; - old_bucket = htab->bucket; - new_bucket = htab_alloc_bucket(new_size); - htab->bucket = new_bucket; - for (i = 0; i < old_size; ++i) { - struct hnode *hnode = old_bucket[i]; - while (hnode != NULL) { - struct hnode *hnext = hnode->hnext; - unsigned int j = hnode->hval & new_mask; - hnode->hnext = new_bucket[j]; - new_bucket[j] = hnode; - hnode = hnext; - } - } - free(old_bucket); -} - -void htab_insert(struct htab *htab, struct hnode *hnode) -{ - unsigned int i; - unsigned int size; - - i = hnode->hval & htab->mask; - hnode->hnext = htab->bucket[i]; - htab->bucket[i] = hnode; - htab->used += 1; - size = 1 << htab->log2size; - if (htab->used > (4 * size) / 5) /* rehash at 80% */ - htab_grow(htab); -} - -#if 0 -struct hash_node *am_hash_reset(struct hash_table *hash_table) -{ - unsigned int i; - unsigned int size; - struct hash_node * volatile *bucket, *all_nodes, *head, *tail; - - all_nodes = NULL; - bucket = hash_table->bucket; - size = 1 << hash_table->log2size; - for (i = 0; i < size; ++i) { - head = bucket[i]; - if (head) { - bucket[i] = NULL; - tail = head; - while (tail->next) - tail = tail->next; - tail->next = all_nodes; - all_nodes = head; - } - } - hash_table->used = 0; - return all_nodes; -} - -void am_hash_enumerate(const struct hash_table *hash_table, - void (*callback)(struct hash_node *hash_node, void *data), - void *data) -{ - unsigned int i; - unsigned int size; - struct hash_node * volatile *bucket, *head; - - bucket = hash_table->bucket; - size = 1 << hash_table->log2size; - for (i = 0; i < size; ++i) { - head = bucket[i]; - while (head != NULL) { - callback(head, data); - head = head->next; - } - } -} -#endif diff --git a/as/0LD/htab.h b/as/0LD/htab.h deleted file mode 100644 index d749291..0000000 --- a/as/0LD/htab.h +++ /dev/null @@ -1,36 +0,0 @@ -/* - * htab.h - */ -#ifndef HTAB_H -#define HTAB_H - -#include /* uintptr_t */ - -struct hnode { - uintptr_t hval; - struct hnode *hnext; -}; - -typedef int (*htab_cmpfn_t)(const struct hnode *hnode, const void *data); - -struct htab { - unsigned int log2size; - unsigned int mask; /* INV: mask == (1 << log2size) - 1 */ - unsigned int used; - htab_cmpfn_t cmpfn; - struct hnode **bucket; -}; - -void htab_init(struct htab *htab, unsigned int log2size, htab_cmpfn_t cmpfn); - -struct hnode *htab_lookup(const struct htab *htab, uintptr_t hval, const void *data); - -struct hnode *htab_reset(struct htab *htab); - -void htab_enumerate(const struct htab *htab, - void (*callback)(struct hnode *hnode, void *data), - void *data); - -void htab_insert(struct htab *htab, struct hnode *hnode); - -#endif /* HTAB_H */ diff --git a/as/Makefile b/as/Makefile index 1afe2cc..5feaa0e 100644 --- a/as/Makefile +++ b/as/Makefile @@ -2,18 +2,21 @@ CC=gcc CFLAGS=-O2 -g -Wall CPPFLAGS=-I../include -ASOBJS=assemble.o input.o main.o output.o parse.o scan.o token.o +ASOBJS=assemble.o hashtab.o input.o main.o output.o parse.o scan.o token.o tunit.o LIBOBJS=../lib/pdp10-elf36.o ../lib/pdp10-extint.o ../lib/pdp10-opcodes.o ../lib/pdp10-stdio.o as: $(ASOBJS) $(LIBOBJS) $(LINK.c) -o $@ $^ -input.o: input.h parse.h scan.h token.def token.h -main.o: assemble.h input.h output.h -output.o: assemble.h output.h -parse.o: input.h scan.h token.def token.h -scan.o: scan.h token.def token.h -token.o: token.def token.h +assemble.o: assemble.h tunit.h hashtab.h +hashtab.o: hashtab.h +input.o: hashtab.h input.h parse.h scan.h tunit.h token.h token.def +main.o: assemble.h input.h output.h tunit.h hashtab.h +output.o: output.h tunit.h hashtab.h +parse.o: input.h scan.h token.h tunit.h token.def hashtab.h +scan.o: scan.h token.h token.def +token.o: token.h token.def +tunit.o: hashtab.h tunit.h clean: rm -f $(ASOBJS) as a.out core.* diff --git a/as/assemble.c b/as/assemble.c index 1c07e42..c0caf0c 100644 --- a/as/assemble.c +++ b/as/assemble.c @@ -6,101 +6,77 @@ #include #include #include "assemble.h" -#include "input.h" +#include "hashtab.h" +#include "tunit.h" -static struct aunit_symbol *symbol(const char *progname, struct aunit *aunit, const char *name) -{ - struct aunit_symbol *sym; - - for (sym = aunit->symbols; sym; sym = sym->next) - if (strcmp(name, sym->name) == 0) - return sym; - - sym = malloc(sizeof *sym); - if (!sym) { - fprintf(stderr, "%s: failed to allocate %zu bytes for aunit_symbol: %s\n", progname, sizeof *sym, strerror(errno)); - return NULL; - } - - sym->name = name; - sym->text_offset = 0; - sym->is_global = 0; - sym->is_defined = 0; - - sym->next = aunit->symbols; - aunit->symbols = sym; - - return sym; -} - -int assemble(const char *progname, struct iunit *iunit, struct aunit *aunit) +static int assemble_section(struct hashnode *hashnode, void *data) { + struct section *section = (struct section*)hashnode; /*XXX*/ + struct tunit *tunit = data; + unsigned long dot; struct stmt *stmt; - struct aunit_symbol *sym; - pdp10_uint36_t i, n; - aunit->text_words = NULL; - aunit->text_nr_words = 0; - aunit->symbols = NULL; + /* if it's not .text-like then we have nothing to do (for now) */ + if (section->sh_type != SHT_PROGBITS + || section->sh_flags != (SHF_ALLOC | SHF_EXECINSTR)) + return 0; - n = 0; - for (stmt = iunit->text.head; stmt; stmt = stmt->next) { - switch (stmt->tag) { - case S_DOT_GLOBL: - sym = symbol(progname, aunit, stmt->u.symbol.name); - if (!sym) - return -1; - sym->is_global = 1; - break; - case S_LABEL: - (void)symbol(progname, aunit, stmt->u.symbol.name); - break; - case S_INSN: - ++n; - break; - default: - break; - } - } + section->dot = (section->dot + 3) & ~(unsigned long)3; - aunit->text_nr_words = n; - aunit->text_words = malloc(n * sizeof(pdp10_uint36_t)); - if (!aunit->text_words) { - fprintf(stderr, "%s: failed to allocate %zu bytes for text image: %s\n", progname, n * sizeof(pdp10_uint36_t), strerror(errno)); + section->image_words = malloc((section->dot / 4) * sizeof(pdp10_uint36_t)); + if (!section->image_words) { + fprintf(stderr, "%s: %s: failed to allocate %zu bytes for text image: %s\n", + tunit->progname, __FUNCTION__, (section->dot / 4) * sizeof(pdp10_uint36_t), strerror(errno)); return -1; } - i = 0; - for (stmt = iunit->text.head; stmt; stmt = stmt->next) { + dot = 0; + for (stmt = section->head; stmt; stmt = stmt->next) { switch (stmt->tag) { case S_LABEL: - sym = symbol(progname, aunit, stmt->u.symbol.name); - if (!sym) + { + struct symbol *symbol; + + symbol = tunit_symbol_lookup(tunit, stmt->u.symbol.name); + if (!symbol) return -1; - sym->is_defined = 1; - sym->text_offset = i * 4; + + if (symbol->section != section + || !symbol->defined + || symbol->st_value != dot) + return -1; + break; + } case S_INSN: - if (i >= n) { - fprintf(stderr, "%s: internal error: text image overflow\n", progname); + { + if (dot >= section->dot) { + fprintf(stderr, "%s: %s: internal error: text image overflow\n", tunit->progname, __FUNCTION__); return -1; } - aunit->text_words[i] = + section->image_words[dot / 4] = ((pdp10_uint36_t)(stmt->u.insn.opcode & 0x1FF) << (36 - 9) | ((stmt->u.insn.accumulator & 0xF) << (36 - 13)) | ((stmt->u.insn.at & 1) << (36 - 14)) | ((stmt->u.insn.indexreg & 0xF) << (36 - 18)) | (stmt->u.insn.address & PDP10_UINT18_MAX)); - ++i; + dot += 4; break; + } default: break; } } - if (i != n) { - fprintf(stderr, "%s: internal error: text image size mismatch\n", progname); + + if (dot != section->dot) { + fprintf(stderr, "%s: %s: internal error: text image size mismatch\n", tunit->progname, __FUNCTION__); return -1; } return 0; } + +int assemble(struct tunit *tunit) +{ + return hashtab_enumerate(&tunit->sections, assemble_section, tunit); +} diff --git a/as/assemble.h b/as/assemble.h index 0641caf..6779908 100644 --- a/as/assemble.h +++ b/as/assemble.h @@ -4,23 +4,8 @@ #ifndef ASSEMBLE_H #define ASSEMBLE_H -#include "pdp10-stdint.h" -#include "input.h" +#include "tunit.h" -struct aunit_symbol { - struct aunit_symbol *next; - const char *name; - pdp10_uint36_t text_offset; - int is_global; - int is_defined; -}; - -struct aunit { - pdp10_uint36_t *text_words; - pdp10_uint36_t text_nr_words; - struct aunit_symbol *symbols; -}; - -int assemble(const char *progname, struct iunit *iunit, struct aunit *aunit); +int assemble(struct tunit *tunit); #endif /* ASSEMBLE_H */ diff --git a/as/hashtab.c b/as/hashtab.c new file mode 100644 index 0000000..f2f63b2 --- /dev/null +++ b/as/hashtab.c @@ -0,0 +1,151 @@ +/* + * hashtab.c + */ +#include +#include +#include +#include +#include "hashtab.h" + +static struct hashnode **hashtab_alloc_bucket(unsigned int nrelem) +{ + size_t nrbytes; + struct hashnode **bucket; + unsigned int i; + + nrbytes = nrelem * sizeof(struct hashnode*); + bucket = malloc(nrbytes); + if (!bucket) { + fprintf(stderr, "%s: malloc(%zu) failed: %s\n", __FUNCTION__, nrbytes, strerror(errno)); + return NULL; + } + for (i = 0; i < nrelem; ++i) + bucket[i] = NULL; + return bucket; +} + +int hashtab_init(struct hashtab *hashtab, unsigned int log2size, hashtab_eq_func_t eq_func) +{ + unsigned int size; + + size = 1 << log2size; + hashtab->log2size = log2size; + hashtab->mask = size - 1; + hashtab->used = 0; + hashtab->eq_func = eq_func; + hashtab->bucket = hashtab_alloc_bucket(size); + return hashtab->bucket ? 0 : -1; +} + +struct hashnode *hashtab_lookup(const struct hashtab *hashtab, uintptr_t hashval, const void *data) +{ + unsigned int i; + struct hashnode *hashnode; + + i = hashval & hashtab->mask; + + hashnode = hashtab->bucket[i]; + while (hashnode != NULL) { + if (hashnode->hashval == hashval + && (hashtab->eq_func == NULL || (*hashtab->eq_func)(hashnode, data) != 0)) + break; + hashnode = hashnode->next; + } + + return hashnode; +} + +static int hashtab_grow(struct hashtab *hashtab) +{ + unsigned int old_size, new_size, new_mask, i; + struct hashnode **old_bucket, **new_bucket; + + old_size = 1 << hashtab->log2size; + new_size = old_size << 1; + new_bucket = hashtab_alloc_bucket(new_size); + if (!new_bucket) + return -1; + + old_bucket = hashtab->bucket; + hashtab->log2size += 1; + new_mask = new_size - 1; + hashtab->mask = new_mask; + hashtab->bucket = new_bucket; + + for (i = 0; i < old_size; ++i) { + struct hashnode *hashnode = old_bucket[i]; + while (hashnode != NULL) { + struct hashnode *next = hashnode->next; + unsigned int j = hashnode->hashval & new_mask; + hashnode->next = new_bucket[j]; + new_bucket[j] = hashnode; + hashnode = next; + } + } + + free(old_bucket); + return 0; +} + +int hashtab_insert(struct hashtab *hashtab, struct hashnode *hashnode) +{ + unsigned int i, size; + + i = hashnode->hashval & hashtab->mask; + hashnode->next = hashtab->bucket[i]; + hashtab->bucket[i] = hashnode; + hashtab->used += 1; + size = 1 << hashtab->log2size; + if (hashtab->used > (4 * size) / 5) /* rehash at 80% */ + return hashtab_grow(hashtab); + return 0; +} + +struct hashnode *hashtab_reset(struct hashtab *hashtab) +{ + unsigned int i, size; + struct hashnode **bucket, *all_nodes, *head, *tail; + + all_nodes = NULL; + bucket = hashtab->bucket; + size = 1 << hashtab->log2size; + + for (i = 0; i < size; ++i) { + head = bucket[i]; + if (head) { + bucket[i] = NULL; + tail = head; + while (tail->next) + tail = tail->next; + tail->next = all_nodes; + all_nodes = head; + } + } + + hashtab->used = 0; + return all_nodes; +} + +int hashtab_enumerate(const struct hashtab *hashtab, + int (*func)(struct hashnode *hashnode, void *data), + void *data) +{ + unsigned int i, size; + struct hashnode **bucket, *head; + int status; + + bucket = hashtab->bucket; + size = 1 << hashtab->log2size; + + for (i = 0; i < size; ++i) { + head = bucket[i]; + while (head != NULL) { + status = (*func)(head, data); + if (status != 0) + return status; + head = head->next; + } + } + + return 0; +} diff --git a/as/hashtab.h b/as/hashtab.h new file mode 100644 index 0000000..1e711e7 --- /dev/null +++ b/as/hashtab.h @@ -0,0 +1,36 @@ +/* + * hashtab.h + */ +#ifndef HASHTAB_H +#define HASHTAB_H + +#include /* uintptr_t */ + +struct hashnode { + uintptr_t hashval; + struct hashnode *next; +}; + +typedef int (*hashtab_eq_func_t)(const struct hashnode *hashnode, const void *data); + +struct hashtab { + unsigned int log2size; + unsigned int mask; /* INV: mask == (1 << log2size) - 1 */ + unsigned int used; + hashtab_eq_func_t eq_func; + struct hashnode **bucket; +}; + +int hashtab_init(struct hashtab *hashtab, unsigned int log2size, hashtab_eq_func_t eq_func); + +struct hashnode *hashtab_lookup(const struct hashtab *hashtab, uintptr_t hashval, const void *data); + +int hashtab_insert(struct hashtab *hashtab, struct hashnode *hashnode); + +struct hashnode *hashtab_reset(struct hashtab *hashtab); + +int hashtab_enumerate(const struct hashtab *hashtab, + int (*func)(struct hashnode *hashnode, void *data), + void *data); + +#endif /* HASHTAB_H */ diff --git a/as/input.c b/as/input.c index 494b1a8..9964c11 100644 --- a/as/input.c +++ b/as/input.c @@ -5,28 +5,16 @@ #include #include #include +#include "hashtab.h" #include "input.h" #include "parse.h" #include "scan.h" +#include "tunit.h" -static int interpret(struct scan_state *scan_state, struct iunit *iunit, struct stmt *stmt) +static int do_append(struct scan_state *scan_state, struct tunit *tunit, struct stmt *stmt, unsigned long dot_incr) { struct stmt *stmt2; - - switch (stmt->tag) { - case S_DOT_GLOBL: - break; - case S_DOT_TEXT: - return 0; /* XXX: nothing to do yet */ - case S_LABEL: - break; - case S_INSN: - break; - default: - fprintf(stderr, "%s: %s line %u: parser returned unexpected stmt->tag %u\n", - scan_state->progname, scan_state->filename, scan_state->linenr, stmt->tag); - return -1; - } + struct section *section; stmt2 = malloc(sizeof *stmt2); if (!stmt2) { @@ -38,13 +26,127 @@ static int interpret(struct scan_state *scan_state, struct iunit *iunit, struct *stmt2 = *stmt; stmt2->next = NULL; - *iunit->text.tailptr = stmt2; - iunit->text.tailptr = &stmt2->next; + section = tunit->cursect; + + *section->tailptr = stmt2; + section->tailptr = &stmt2->next; + section->dot += dot_incr; return 0; } -int input(const char *progname, char **files, int nrfiles, struct iunit *iunit) +static int do_dot_file(struct scan_state *scan_state, struct tunit *tunit, struct stmt *stmt) +{ + struct symbol *symbol; + + symbol = tunit_symbol_enter(tunit, stmt->u.string.text); + if (!symbol) + return -1; + + symbol->section = NULL; + symbol->defined = 1; + symbol->st_value = 0; + symbol->st_size = 0; + symbol->st_info = ELF_ST_INFO(STB_LOCAL, STT_FILE); + + return 0; +} + +static int do_dot_globl(struct scan_state *scan_state, struct tunit *tunit, struct stmt *stmt) +{ + struct symbol *symbol; + + symbol = tunit_symbol_enter(tunit, stmt->u.symbol.name); + if (!symbol) + return -1; + + if (ELF_ST_BIND(symbol->st_info) != STB_LOCAL) { + fprintf(stderr, "%s: %s line %u: symbol %s already has non-zero binding type %u\n", + scan_state->progname, scan_state->filename, scan_state->linenr, stmt->u.symbol.name, ELF_ST_BIND(symbol->st_info)); + return -1; + } + + symbol->st_info = ELF_ST_INFO(STB_GLOBAL, ELF_ST_TYPE(symbol->st_info)); + + return 0; +} + +static int do_dot_text(struct scan_state *scan_state, struct tunit *tunit, struct stmt *stmt) +{ + struct section *section; + + section = tunit_section_enter(tunit, ".text"); + if (!section) + return -1; + + if (section->sh_type == SHT_NULL) { + section->sh_type = SHT_PROGBITS; + section->sh_flags = SHF_ALLOC | SHF_EXECINSTR; + section->sh_addralign = 4; /* XXX: PDP10-specific */ + } + + tunit->cursect = section; + + return 0; +} + +static int do_label(struct scan_state *scan_state, struct tunit *tunit, struct stmt *stmt) +{ + struct symbol *symbol; + struct section *section; + + symbol = tunit_symbol_enter(tunit, stmt->u.symbol.name); + if (!symbol) + return -1; + + if (symbol->section + || symbol->defined + || symbol->st_value) { + fprintf(stderr, "%s: %s line %u: symbol %s already defined\n", + scan_state->progname, scan_state->filename, scan_state->linenr, stmt->u.symbol.name); + return -1; + } + + section = tunit->cursect; + + symbol->section = section; + symbol->defined = 1; + symbol->st_value = section->dot; + + return do_append(scan_state, tunit, stmt, 0); +} + +static int do_insn(struct scan_state *scan_state, struct tunit *tunit, struct stmt *stmt) +{ + if (tunit->cursect->dot & 3) { /* XXX: PDP10-specific */ + fprintf(stderr, "%s: %s line %u: misaligned instruction\n", + scan_state->progname, scan_state->filename, scan_state->linenr); + return -1; + } + return do_append(scan_state, tunit, stmt, 4); /* XXX: PDP10-specific sizeof */ +} + +static int interpret(struct scan_state *scan_state, struct tunit *tunit, struct stmt *stmt) +{ + switch (stmt->tag) { + case S_DOT_FILE: + return do_dot_file(scan_state, tunit, stmt); + case S_DOT_GLOBL: + return do_dot_globl(scan_state, tunit, stmt); + case S_DOT_TEXT: + return do_dot_text(scan_state, tunit, stmt); + case S_LABEL: + return do_label(scan_state, tunit, stmt); + case S_INSN: + return do_insn(scan_state, tunit, stmt); + default: + fprintf(stderr, "%s: %s line %u: parser returned unexpected stmt->tag %u\n", + scan_state->progname, scan_state->filename, scan_state->linenr, stmt->tag); + return -1; + } +} + +int input(char **files, int nrfiles, struct tunit *tunit) { char fake_file[3]; char *fake_files[1]; @@ -62,10 +164,7 @@ int input(const char *progname, char **files, int nrfiles, struct iunit *iunit) nrfiles = 1; } - iunit->text.head = NULL; - iunit->text.tailptr = &iunit->text.head; - - scan_init(&scan_state, progname); + scan_init(&scan_state, tunit->progname); for (i = 0; i < nrfiles; ++i) { if (scan_open(&scan_state, files[i]) < 0) @@ -76,7 +175,7 @@ int input(const char *progname, char **files, int nrfiles, struct iunit *iunit) return -1; if (status == 0) break; - if (interpret(&scan_state, iunit, &stmt) < 0) + if (interpret(&scan_state, tunit, &stmt) < 0) return -1; } } diff --git a/as/input.h b/as/input.h index 687dd90..0e6e1ce 100644 --- a/as/input.h +++ b/as/input.h @@ -4,50 +4,8 @@ #ifndef INPUT_H #define INPUT_H -/* - * A directives, label, or instruction is parsed to a statement, which is - * either interpreted immediately or appended to the representation of the - * current section. - */ +#include "tunit.h" -enum stmt_tag { - /* directives */ - S_DOT_GLOBL, - S_DOT_TEXT, - /* non-directives */ - S_LABEL, - S_INSN, -}; - -struct stmt { - struct stmt *next; - enum stmt_tag tag; - union { - struct { /* S_DOT_GLOBL, S_LABEL */ - const char *name; - } symbol; - struct { /* S_INSN */ - unsigned int opcode; - unsigned int accumulator; - int at; - unsigned int address; /* XXX: relocatable expr */ - unsigned int indexreg; - } insn; - } u; -}; - -/* - * The input unit object is the top-level container for the representation - * of the sections, and all other information collected from the input. - */ - -struct iunit { - struct { - struct stmt *head; - struct stmt **tailptr; - } text; -}; - -int input(const char *progname, char **files, int nrfiles, struct iunit *iunit); +int input(char **files, int nrfiles, struct tunit *tunit); #endif /* INPUT_H */ diff --git a/as/main.c b/as/main.c index 143f8a4..631ea41 100644 --- a/as/main.c +++ b/as/main.c @@ -8,20 +8,23 @@ #include "assemble.h" #include "input.h" #include "output.h" +#include "tunit.h" -#define VERSION "pdp10-tools as version 0.1, built " __DATE__ " " __TIME__ "\n" +#define VERSION "pdp10-tools as version 0.2, built " __DATE__ " " __TIME__ "\n" int main(int argc, char **argv) { const char *outfile = "a.out"; - struct iunit iunit; - struct aunit aunit; + struct tunit tunit; for (;;) { int ch; - ch = getopt(argc, argv, "vo:"); + ch = getopt(argc, argv, "vo:VQ:"); switch (ch) { + case 'Q': /* SVR4 compat, ignored */ + continue; + case 'V': /* SVR4 compat, alias for -v */ case 'v': printf(VERSION); continue; @@ -37,16 +40,19 @@ int main(int argc, char **argv) break; } - if (input(argv[0], &argv[optind], argc - optind, &iunit) < 0) + if (tunit_init(&tunit, argv[0]) < 0) return 1; - if (assemble(argv[0], &iunit, &aunit) < 0) + if (input(&argv[optind], argc - optind, &tunit) < 0) return 1; - /* XXX: iunit_fini(&iunit) */ - - if (output(argv[0], &aunit, outfile) < 0) + if (assemble(&tunit) < 0) return 1; + if (output(&tunit, outfile) < 0) + return 1; + + tunit_fini(&tunit); + return 0; } diff --git a/as/output.c b/as/output.c index bf51adb..12adb62 100644 --- a/as/output.c +++ b/as/output.c @@ -9,6 +9,7 @@ #include "pdp10-stdint.h" #include "pdp10-stdio.h" #include "assemble.h" +#include "hashtab.h" #include "output.h" struct strtab_entry { @@ -28,7 +29,7 @@ static void strtab_init(struct strtab *strtab) strtab->nrbytes = 0; } -static pdp10_uint36_t strtab_enter(const char *progname, struct strtab *strtab, const char *name) +static pdp10_uint36_t strtab_enter(struct tunit *tunit, struct strtab *strtab, const char *name) { struct strtab_entry *prev, *here; pdp10_uint36_t index; @@ -47,7 +48,7 @@ static pdp10_uint36_t strtab_enter(const char *progname, struct strtab *strtab, here = malloc(sizeof *here); if (!here) { fprintf(stderr, "%s: failed to allocate %zu bytes for a strtab_entry: %s\n", - progname, sizeof *here, strerror(errno)); + tunit->progname, sizeof *here, strerror(errno)); return 0; } here->next = NULL; @@ -67,6 +68,14 @@ static pdp10_uint36_t strtab_enter(const char *progname, struct strtab *strtab, return index; } +static int output_padding(PDP10_FILE *pdp10fp, unsigned int nrbytes) +{ + for (; nrbytes; --nrbytes) + if (pdp10_elf36_write_uint9(pdp10fp, '\0') < 0) + return -1; + return 0; +} + static int strtab_write(PDP10_FILE *pdp10fp, const struct strtab *strtab) { struct strtab_entry *here; @@ -80,82 +89,208 @@ static int strtab_write(PDP10_FILE *pdp10fp, const struct strtab *strtab) if (pdp10_elf36_write_uint9(pdp10fp, here->string[i]) < 0) return -1; - i = (4 - (strtab->nrbytes & 3)) & 3; - while (i != 0) { - if (pdp10_elf36_write_uint9(pdp10fp, '\0') < 0) - return -1; - --i; - } + return 0; +} + +struct context { + struct tunit *tunit; + Elf36_Word shnum; + Elf36_Word offset; + struct strtab shstrtab; + Elf36_Word symnum; + struct strtab symstrtab; + PDP10_FILE *pdp10fp; +}; + +static int append_section(struct context *context, struct section *section) +{ + if (section->dot == 0 || section->image_words == NULL) + return 0; + + section->st_shndx = context->shnum; + ++context->shnum; + + section->sh_offset = (context->offset + (section->sh_addralign - 1)) & ~(section->sh_addralign - 1); + context->offset = section->sh_offset + section->dot; + + section->sh_name = strtab_enter(context->tunit, &context->shstrtab, section->name); + if (section->sh_name == 0) + return -1; return 0; } -int output(const char *progname, struct aunit *aunit, const char *outfile) +static int process_section(struct hashnode *hashnode, void *data) { - pdp10_uint36_t shnum, text_shndx, symtab_shndx, strtab_shndx, shstrtab_shndx; - pdp10_uint36_t text_shstrndx, symtab_shstrndx, strtab_shstrndx, shstrtab_shstrndx; + struct section *section = (struct section*)hashnode; + struct context *context = data; + + return append_section(context, section); +} + +static int output_section_prologue(struct context *context, struct section *section) +{ + if (section->st_shndx != context->shnum) + abort(); + ++context->shnum; + + if (section->sh_offset < context->offset) + abort(); + + if (output_padding(context->pdp10fp, section->sh_offset - context->offset) < 0) + return -1; + + return 0; +} + +static void output_section_epilogue(struct context *context, struct section *section) +{ + context->offset = section->sh_offset + section->dot; +} + +static int output_section(struct hashnode *hashnode, void *data) +{ + struct section *section = (struct section*)hashnode; + struct context *context = data; + unsigned int i; + + if (section->dot == 0 || section->image_words == NULL) + return 0; + + if (output_section_prologue(context, section) < 0) + return -1; + + /* XXX: ->image_words[] should be uint9_t[] not uint36_t[] */ + for (i = 0; i < section->dot; i += 4) + if (pdp10_elf36_write_uint36(context->pdp10fp, section->image_words[i / 4]) < 0) + return -1; + + output_section_epilogue(context, section); + + return 0; +} + +static int output_section_header(struct context *context, const struct section *section) +{ + Elf36_Shdr shdr; + + shdr.sh_name = section->sh_name; + shdr.sh_type = section->sh_type; + shdr.sh_flags = section->sh_flags; + shdr.sh_addr = 0; + shdr.sh_offset = section->sh_offset; + shdr.sh_size = section->dot; + shdr.sh_link = section->sh_link; + shdr.sh_info = 0; /* XXX: for symtab, LAST_LOCAL + 1 */ + shdr.sh_addralign = section->sh_addralign; + shdr.sh_entsize = section->sh_entsize; + + return pdp10_elf36_write_shdr(context->pdp10fp, &shdr); +} + +static int output_shdr(struct hashnode *hashnode, void *data) +{ + struct section *section = (struct section*)hashnode; + struct context *context = data; + + + if (section->dot == 0 || section->image_words == NULL) + return 0; + + return output_section_header(context, section); +} + +static int output_strtab(struct context *context, struct section *section, struct strtab *strtab) +{ + if (output_section_prologue(context, section) < 0) + return -1; + if (strtab_write(context->pdp10fp, strtab) < 0) + return -1; + output_section_epilogue(context, section); + return 0; +} + +static int process_symbol(struct hashnode *hashnode, void *data) +{ + struct symbol *symbol = (struct symbol*)hashnode; + struct context *context = data; + + ++context->symnum; + + symbol->st_name = strtab_enter(context->tunit, &context->symstrtab, symbol->name); + if (symbol->st_name == 0) + return -1; + + return 0; +} + +struct finalize_symbol_context { Elf36_Sym *symtab; - pdp10_uint36_t symnum; - struct strtab strtab, shstrtab; - struct aunit_symbol *asym; - pdp10_uint36_t i; - Elf36_Shdr *shtab; - pdp10_uint36_t offset; + unsigned int i; +}; + +static int finalize_symbol(struct hashnode *hashnode, void *data) +{ + struct symbol *symbol = (struct symbol*)hashnode; + struct finalize_symbol_context *fsctx = data; + + fsctx->symtab[fsctx->i].st_name = symbol->st_name; + fsctx->symtab[fsctx->i].st_value = symbol->st_value; + fsctx->symtab[fsctx->i].st_size = symbol->st_size; + fsctx->symtab[fsctx->i].st_info = symbol->st_info; + fsctx->symtab[fsctx->i].st_other = STV_DEFAULT; + fsctx->symtab[fsctx->i].st_shndx = symbol->section->st_shndx; + + ++fsctx->i; + + return 0; +} + +int output(struct tunit *tunit, const char *outfile) +{ + struct context context; + struct section section_symtab; + struct section section_strtab; + struct section section_shstrtab; Elf36_Ehdr ehdr; - PDP10_FILE *pdp10fp; - shnum = 0; - shstrtab_shndx = 0; - text_shndx = 0; - symtab_shndx = 0; - strtab_shndx = 0; - symtab = NULL; - symnum = 0; - strtab_init(&strtab); - strtab_init(&shstrtab); - shtab = NULL; + context.tunit = tunit; + context.shnum = 1; + context.offset = ELF36_EHDR_SIZEOF; + strtab_init(&context.shstrtab); + context.symnum = 0; + strtab_init(&context.symstrtab); + context.pdp10fp = NULL; - shnum = 1; /* tentative */ + if (hashtab_enumerate(&tunit->sections, process_section, &context) < 0) + return -1; - if (aunit->text_nr_words != 0) { - text_shstrndx = strtab_enter(progname, &shstrtab, ".text"); - if (text_shstrndx == 0) + if (hashtab_enumerate(&tunit->symbols, process_symbol, &context) < 0) + return -1; + + section_init(§ion_symtab, ".symtab"); + section_init(§ion_strtab, ".strtab"); + section_init(§ion_shstrtab, ".shstrtab"); + + /* if we have symbols, synthesize .strtab and .symtab */ + if (context.symnum) { + Elf36_Sym *symtab; + struct finalize_symbol_context fsctx; + + section_strtab.sh_type = SHT_STRTAB; + section_strtab.sh_addralign = 1; + section_strtab.dot = context.symstrtab.nrbytes; /* XXX: fixme */ + section_strtab.image_words = (pdp10_uint36_t*)4; /* XXX: fixme */ + + if (append_section(&context, §ion_strtab) < 0) return -1; - text_shndx = shnum; - ++shnum; - } - for (asym = aunit->symbols; asym; asym = asym->next) - ++symnum; - if (symnum != 0) { - symtab_shstrndx = strtab_enter(progname, &shstrtab, ".symtab"); - if (symtab_shstrndx == 0) - return -1; - strtab_shstrndx = strtab_enter(progname, &shstrtab, ".strtab"); - if (strtab_shstrndx == 0) - return -1; - symtab_shndx = shnum; - strtab_shndx = shnum + 1; - shnum += 2; - } + ++context.symnum; /* for initial stub entry */ - if (shnum == 1) { - shstrtab_shndx = 0; - shnum = 0; - } else { - shstrtab_shstrndx = strtab_enter(progname, &shstrtab, ".shstrtab"); - if (shstrtab_shstrndx == 0) - return -1; - shstrtab_shndx = shnum; - ++shnum; - } - - if (symnum) { - ++symnum; /* for initial stub entry */ - symtab = malloc(symnum * sizeof(Elf36_Sym)); + symtab = malloc(context.symnum * sizeof(Elf36_Sym)); if (!symtab) { fprintf(stderr, "%s: failed to allocate %zu bytes for Elf36 symbol table: %s\n", - progname, symnum * sizeof(Elf36_Sym), strerror(errno)); + tunit->progname, context.symnum * sizeof(Elf36_Sym), strerror(errno)); return -1; } @@ -166,101 +301,50 @@ int output(const char *progname, struct aunit *aunit, const char *outfile) symtab[0].st_other = 0; symtab[0].st_shndx = SHN_UNDEF; - for (i = 1, asym = aunit->symbols; asym; ++i, asym = asym->next) { - symtab[i].st_name = strtab_enter(progname, &strtab, asym->name); - if (symtab[i].st_name == 0) - return -1; - symtab[i].st_value = asym->text_offset; - symtab[i].st_size = 0; - if (asym->is_global) - symtab[i].st_info = ELF36_ST_INFO(STB_GLOBAL, STT_NOTYPE); - else - symtab[i].st_info = ELF36_ST_INFO(STB_LOCAL, STT_NOTYPE); - symtab[i].st_other = STV_DEFAULT; - symtab[i].st_shndx = text_shndx; - } + fsctx.symtab = symtab; + fsctx.i = 1; + + if (hashtab_enumerate(&tunit->symbols, finalize_symbol, &fsctx) < 0) + return -1; + + section_symtab.sh_type = SHT_SYMTAB; + section_symtab.sh_entsize = ELF36_SYM_SIZEOF; + section_symtab.sh_link = section_strtab.st_shndx; + section_symtab.sh_addralign = 4; /* XXX: PDP10-specific */ + section_symtab.dot = context.symnum * ELF36_SYM_SIZEOF; /* XXX: fixme */ + section_symtab.image_words = (pdp10_uint36_t*)symtab; /* XXX: fixme */ + + if (append_section(&context, §ion_symtab) < 0) + return -1; } - if (shnum) { - shtab = malloc(shnum * sizeof(Elf36_Shdr)); - if (!shtab) { - fprintf(stderr, "%s: failed to allocate %zu bytes for Elf36 section header table: %s\n", - progname, shnum * sizeof(Elf36_Shdr), strerror(errno)); + /* if we have sections, synthesize .shstrtab */ + if (context.shnum > 1) { + section_shstrtab.sh_type = SHT_STRTAB; + section_shstrtab.sh_addralign = 1; + + /* append_section() open-coded and rearranged to work for this special case */ + + section_shstrtab.sh_name = strtab_enter(tunit, &context.shstrtab, ".shstrtab"); + if (section_shstrtab.sh_name == 0) return -1; - } - shtab[0].sh_name = 0; - shtab[0].sh_type = SHT_NULL; - shtab[0].sh_flags = 0; - shtab[0].sh_addr = 0; - shtab[0].sh_offset = 0; - shtab[0].sh_size = 0; - shtab[0].sh_link = 0; - shtab[0].sh_info = 0; - shtab[0].sh_addralign = 0; - shtab[0].sh_entsize = 0; + section_shstrtab.dot = context.shstrtab.nrbytes; /* XXX: fixme */ + section_shstrtab.image_words = (pdp10_uint36_t*)4; /* XXX: fixme */ - offset = ELF36_EHDR_SIZEOF; + section_shstrtab.st_shndx = context.shnum; + ++context.shnum; - if (text_shndx) { - shtab[text_shndx].sh_name = text_shstrndx; - shtab[text_shndx].sh_type = SHT_PROGBITS; - shtab[text_shndx].sh_flags = SHF_ALLOC | SHF_EXECINSTR; - shtab[text_shndx].sh_addr = 0; - shtab[text_shndx].sh_offset = offset; - shtab[text_shndx].sh_size = aunit->text_nr_words * 4; - shtab[text_shndx].sh_link = 0; - shtab[text_shndx].sh_info = 0; - shtab[text_shndx].sh_addralign = 4; - shtab[text_shndx].sh_entsize = 0; - offset += aunit->text_nr_words * 4; - } + section_shstrtab.sh_offset = context.offset; + context.offset = section_shstrtab.sh_offset + section_shstrtab.dot; - if (symtab_shndx) { - shtab[symtab_shndx].sh_name = symtab_shstrndx; - shtab[symtab_shndx].sh_type = SHT_SYMTAB; - shtab[symtab_shndx].sh_flags = 0; - shtab[symtab_shndx].sh_addr = 0; - shtab[symtab_shndx].sh_offset = offset; - shtab[symtab_shndx].sh_size = symnum * ELF36_SYM_SIZEOF; - shtab[symtab_shndx].sh_link = strtab_shndx; - shtab[symtab_shndx].sh_info = 0 + 1; /* XXX: LAST_LOCAL + 1 */ - shtab[symtab_shndx].sh_addralign = 4; - shtab[symtab_shndx].sh_entsize = ELF36_SYM_SIZEOF; - offset += symnum * ELF36_SYM_SIZEOF; - } + context.offset = (context.offset + (4 - 1)) & ~(Elf36_Word)(4 - 1); - if (strtab_shndx) { - shtab[strtab_shndx].sh_name = strtab_shstrndx; - shtab[strtab_shndx].sh_type = SHT_STRTAB; - shtab[strtab_shndx].sh_flags = 0; - shtab[strtab_shndx].sh_addr = 0; - shtab[strtab_shndx].sh_offset = offset; - shtab[strtab_shndx].sh_size = strtab.nrbytes; - shtab[strtab_shndx].sh_link = 0; - shtab[strtab_shndx].sh_info = 0; - shtab[strtab_shndx].sh_addralign = 1; - shtab[strtab_shndx].sh_entsize = 0; - offset += (strtab.nrbytes + 3) & ~3; - } - - if (shstrtab_shndx) { - shtab[shstrtab_shndx].sh_name = shstrtab_shstrndx; - shtab[shstrtab_shndx].sh_type = SHT_STRTAB; - shtab[shstrtab_shndx].sh_flags = 0; - shtab[shstrtab_shndx].sh_addr = 0; - shtab[shstrtab_shndx].sh_offset = offset; - shtab[shstrtab_shndx].sh_size = shstrtab.nrbytes; - shtab[shstrtab_shndx].sh_link = 0; - shtab[shstrtab_shndx].sh_info = 0; - shtab[shstrtab_shndx].sh_addralign = 1; - shtab[shstrtab_shndx].sh_entsize = 0; - offset += (shstrtab.nrbytes + 3) & ~3; - } - - /* offset is now the offset of the section header table, which is last in the file */ - } else - offset = 0; + /* context.offset is now the offset of the section header table, which is last in the file */ + } else { + context.shnum = 0; + context.offset = 0; + } ehdr.e_wident[0] = (((pdp10_uint36_t)ELFMAG0 << 28) | (ELFMAG1 << 20) @@ -279,47 +363,81 @@ int output(const char *progname, struct aunit *aunit, const char *outfile) ehdr.e_version = EV_CURRENT; ehdr.e_entry = 0; ehdr.e_phoff = 0; - ehdr.e_shoff = offset; + ehdr.e_shoff = context.offset; ehdr.e_flags = 0; ehdr.e_ehsize = ELF36_EHDR_SIZEOF; ehdr.e_phentsize = 0; ehdr.e_phnum = 0; ehdr.e_shentsize = ELF36_SHDR_SIZEOF; - ehdr.e_shnum = shnum; - ehdr.e_shstrndx = shstrtab_shndx; + ehdr.e_shnum = context.shnum; + ehdr.e_shstrndx = section_shstrtab.st_shndx; - pdp10fp = pdp10_fopen(outfile, "wb"); - if (!pdp10fp) { - fprintf(stderr, "%s: failed to open %s: %s\n", progname, outfile, strerror(errno)); + context.pdp10fp = pdp10_fopen(outfile, "wb"); + if (!context.pdp10fp) { + fprintf(stderr, "%s: failed to open %s: %s\n", tunit->progname, outfile, strerror(errno)); return -1; } - if (pdp10_elf36_write_ehdr(pdp10fp, &ehdr) < 0) + if (pdp10_elf36_write_ehdr(context.pdp10fp, &ehdr) < 0) return -1; - if (text_shndx) - for (i = 0; i < aunit->text_nr_words; ++i) - if (pdp10_elf36_write_uint36(pdp10fp, aunit->text_words[i]) < 0) - return -1; + context.shnum = 1; + context.offset = ELF36_EHDR_SIZEOF; - if (symtab_shndx) - for (i = 0; i < symnum; ++i) - if (pdp10_elf36_write_sym(pdp10fp, &symtab[i]) < 0) - return -1; + if (hashtab_enumerate(&tunit->sections, output_section, &context) < 0) + return -1; - if (strtab_shndx) - if (strtab_write(pdp10fp, &strtab) < 0) + if (context.symnum) { + unsigned int i; + + if (output_strtab(&context, §ion_strtab, &context.symstrtab) < 0) return -1; - if (shstrtab_shndx) - if (strtab_write(pdp10fp, &shstrtab) < 0) + if (output_section_prologue(&context, §ion_symtab) < 0) return -1; - if (shnum) - for (i = 0; i < shnum; ++i) - if (pdp10_elf36_write_shdr(pdp10fp, &shtab[i]) < 0) + for (i = 0; i < context.symnum; ++i) + if (pdp10_elf36_write_sym(context.pdp10fp, + &((Elf36_Sym*)section_symtab.image_words)[i]) < 0) return -1; - pdp10_fclose(pdp10fp); + output_section_epilogue(&context, §ion_symtab); + } + + if (context.shnum > 1) { + struct section section0; + + if (output_strtab(&context, §ion_shstrtab, &context.shstrtab) < 0) + return -1; + + if (ehdr.e_shoff < context.offset) + abort(); + if (output_padding(context.pdp10fp, ehdr.e_shoff - context.offset) < 0) + return -1; + section0.name = ""; + section0.sh_name = 0; + section0.sh_type = SHT_NULL; + section0.sh_flags = 0; + section0.sh_offset = 0; + section0.dot = 0; + section0.sh_link = 0; + section0.sh_addralign = 0; + section0.sh_entsize = 0; + if (output_section_header(&context, §ion0) < 0) + return -1; + if (hashtab_enumerate(&tunit->sections, output_shdr, &context) < 0) + return -1; + if (context.symnum) { + if (output_section_header(&context, §ion_strtab) < 0) + return -1; + if (output_section_header(&context, §ion_symtab) < 0) + return -1; + } + if (output_section_header(&context, §ion_shstrtab) < 0) + return -1; + } + + pdp10_fclose(context.pdp10fp); + return 0; } diff --git a/as/output.h b/as/output.h index a4f3a3d..e59e4fe 100644 --- a/as/output.h +++ b/as/output.h @@ -4,8 +4,8 @@ #ifndef OUTPUT_H #define OUTPUT_H -#include "assemble.h" +#include "tunit.h" -int output(const char *progname, struct aunit *aunit, const char *outfile); +int output(struct tunit *tunit, const char *outfile); #endif /* OUTPUT_H */ diff --git a/as/parse.c b/as/parse.c index b6dbe25..3dd9ebb 100644 --- a/as/parse.c +++ b/as/parse.c @@ -6,6 +6,7 @@ #include #include "pdp10-opcodes.h" #include "input.h" /* for struct stmt */ +#include "parse.h" #include "scan.h" #include "token.h" @@ -18,6 +19,23 @@ static int error(struct scan_state *scan_state, const char *msg, enum token toke return -1; } +static int parse_dot_file(struct scan_state *scan_state, struct stmt *stmt) +{ + enum token token; + union token_attribute token_attr; + + token = scan_token(scan_state, &token_attr); + if (token == T_STRING) { + stmt->u.string.text = token_attr.text; + token = scan_token(scan_state, &token_attr); + if (token == T_NEWLINE) { + stmt->tag = S_DOT_FILE; + return 1; + } + } + return error(scan_state, "junk after .file directive", token, &token_attr); +} + static int parse_dot_globl(struct scan_state *scan_state, struct stmt *stmt) { enum token token; @@ -329,6 +347,8 @@ int parse_stmt(struct scan_state *scan_state, struct stmt *stmt) /* * directives */ + case T_DOT_FILE: + return parse_dot_file(scan_state, stmt); case T_DOT_GLOBL: return parse_dot_globl(scan_state, stmt); case T_DOT_TEXT: diff --git a/as/scan.c b/as/scan.c index a7d918e..25deed2 100644 --- a/as/scan.c +++ b/as/scan.c @@ -2,6 +2,7 @@ * scan.c */ #include +#include /* XXX: for UCHAR_MAX, deleteme */ #include #include #include @@ -69,7 +70,7 @@ static void badchar(struct scan_state *scan_state, int ch, const char *context) buf[6] = '\0'; } - fprintf(stderr, "%s: %s, line %u: invalid character %s%s\n", + fprintf(stderr, "%s: %s line %u: invalid character %s%s\n", scan_state->progname, scan_state->filename, scan_state->linenr, buf, context); } @@ -84,6 +85,107 @@ static unsigned int get_chval(int ch) return -1U; } +static int is_octal_digit(int ch) +{ + return ch >= '0' && ch <= '7'; +} + +static int do_escape(struct scan_state *scan_state) +{ + int ch; + + ch = scan_getchar(); + switch (ch) { + case 'n': + return '\n'; + case 't': + return '\t'; + case 'f': + return '\f'; + case 'r': + return '\r'; + case 'b': + return '\b'; + case '\\': + case '\'': + case '"': + return ch; + default: + if (is_octal_digit(ch)) { + unsigned int val = ch - '0'; + ch = scan_getchar(); + if (is_octal_digit(ch)) { + val = val * 8 + (ch - '0'); + ch = scan_getchar(); + if (is_octal_digit(ch)) + val = val * 8 + (ch - '0'); + else + scan_ungetc(scan_state, ch); + } else + scan_ungetc(scan_state, ch); + /* XXX: this should be PDP10_UINT9_MAX, but our string elements are still char not pdp10_uint9_t for now */ + if (val > UCHAR_MAX) { + fprintf(stderr, "%s: %s line %u: out of range character escape value %#x\n", + scan_state->progname, scan_state->filename, scan_state->linenr, val); + return EOF; + } + return val & UCHAR_MAX; + } + break; + } + badchar(scan_state, ch, "in \\ character escape"); + if (ch == '\n') + ++scan_state->linenr; + return EOF; +} + +/* XXX: string literals should be sequences of pdp10_uint9_t, not sequences of char */ + +static enum token do_string(struct scan_state *scan_state, union token_attribute *token_attr) +{ + char charbuf[4096]; /* 4095 char + NUL, XXX: make it dynamic */ + unsigned int len; + char *text; + int ch; + + len = 0; + for (;;) { + ch = scan_getchar(); + switch (ch) { + case '"': + text = malloc(len + 1); + if (!text) { + fprintf(stderr, "%s: %s line %u: malloc(%u) failed: %s\n", + scan_state->progname, scan_state->filename, scan_state->linenr, len + 1, strerror(errno)); + return T_ERROR; + } + strcpy(text, charbuf); + token_attr->text = text; + return T_STRING; + case '\\': + ch = do_escape(scan_state); + if (ch == EOF) + return T_ERROR; + break; + case EOF: + case '\n': + badchar(scan_state, ch, "in string literal"); + if (ch == '\n') + ++scan_state->linenr; + return T_ERROR; + default: + break; + } + if (len >= sizeof charbuf - 1) { + fprintf(stderr, "%s: %s line %u: too long string literal\n", + scan_state->progname, scan_state->filename, scan_state->linenr); + return T_ERROR; + } + charbuf[len] = ch; + ++len; + } +} + static int is_symbol_internal_char(int ch) { return @@ -218,6 +320,8 @@ enum token scan_token(struct scan_state *scan_state, union token_attribute *toke return T_LPAREN; case ')': return T_RPAREN; + case '"': + return do_string(scan_state, token_attr); case '.': /* Dot may start a floating point literal, but tests show that gcc always outputs floating point values as integer literals, diff --git a/as/token.def b/as/token.def index 4327a67..20f292b 100644 --- a/as/token.def +++ b/as/token.def @@ -5,12 +5,14 @@ */ /* reserved symbols including directives; MUST come first and MUST be listed in increasing alphanumeric order */ +TOKEN(T_DOT_FILE, ".file", TAFMT_NONE) TOKEN(T_DOT_GLOBL, ".globl", TAFMT_NONE) TOKEN(T_DOT_TEXT, ".text", TAFMT_NONE) /* non-reserved symbols; T_SYMBOL MUST be the first token after the list of reserved symbols */ TOKEN(T_SYMBOL, "", TAFMT_SYMBOL) /* literals */ TOKEN(T_UINTEGER, "", TAFMT_UINT) +TOKEN(T_STRING, "", TAFMT_STRING) /* special symbols including operators and separators */ TOKEN(T_AT, "@", TAFMT_NONE) TOKEN(T_COLON, ":", TAFMT_NONE) @@ -32,7 +34,6 @@ TOKEN(T_DOT_BSS, ".bss", TAFMT_NONE) TOKEN(T_DOT_BYTE, ".byte", TAFMT_NONE) TOKEN(T_DOT_COMM, ".comm", TAFMT_NONE) TOKEN(T_DOT_DATA, ".data", TAFMT_NONE) -TOKEN(T_DOT_FILE, ".file", TAFMT_NONE) TOKEN(T_DOT_HIDDEN, ".hidden", TAFMT_NONE) TOKEN(T_DOT_IDENT, ".ident", TAFMT_NONE) TOKEN(T_DOT_INTERNAL, ".internal", TAFMT_NONE) @@ -58,7 +59,6 @@ TOKEN(T_DOT_WEAKREF, ".weakref", TAFMT_NONE) TOKEN(T_REGISTER, "", TAFMT_UINT) TOKEN(T_LOCAL_LABEL, "", TAFMT_UINT) /* 1f, 2b */ /* literals */ -TOKEN(T_STRING, "", TAFMT_STRING) /* operators, separators */ TOKEN(T_TILDE, "~", TAFMT_NONE) TOKEN(T_MUL, "*", TAFMT_NONE)