mirror of
https://github.com/mikpe/pdp10-tools.git
synced 2026-01-11 23:53:19 +00:00
453 lines
14 KiB
C
453 lines
14 KiB
C
/*
|
|
* parse.c
|
|
* Copyright (C) 2013-2015 Mikael Pettersson
|
|
*
|
|
* This file is part of pdp10-tools.
|
|
*
|
|
* pdp10-tools is free software: you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation, either version 3 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* pdp10-tools is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with pdp10-tools. If not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include "pdp10-opcodes.h"
|
|
#include "input.h" /* for struct stmt */
|
|
#include "parse.h"
|
|
#include "scan.h"
|
|
#include "token.h"
|
|
|
|
static int error(struct scan_state *scan_state, const char *msg, enum token token, const union token_attribute *token_attr)
|
|
{
|
|
fprintf(stderr, "%s: %s line %u: syntax error: %s; current token is ",
|
|
scan_state->progname, scan_state->filename, scan_state->linenr, msg);
|
|
token_print(stderr, token, token_attr);
|
|
fprintf(stderr, "\n");
|
|
return -1;
|
|
}
|
|
|
|
static int parse_dot_file(struct scan_state *scan_state, struct stmt *stmt)
|
|
{
|
|
enum token token;
|
|
union token_attribute token_attr;
|
|
|
|
token = scan_token(scan_state, &token_attr);
|
|
if (token == T_STRING) {
|
|
stmt->u.string.text = token_attr.text;
|
|
token = scan_token(scan_state, &token_attr);
|
|
if (token == T_NEWLINE) {
|
|
stmt->tag = S_DOT_FILE;
|
|
return 1;
|
|
}
|
|
}
|
|
return error(scan_state, "junk after .file directive", token, &token_attr);
|
|
}
|
|
|
|
static int parse_dot_globl(struct scan_state *scan_state, struct stmt *stmt)
|
|
{
|
|
enum token token;
|
|
union token_attribute token_attr;
|
|
|
|
token = scan_token(scan_state, &token_attr);
|
|
if (token == T_SYMBOL) {
|
|
stmt->u.symbol.name = token_attr.text;
|
|
token = scan_token(scan_state, &token_attr);
|
|
if (token == T_NEWLINE) {
|
|
stmt->tag = S_DOT_GLOBL;
|
|
return 1;
|
|
}
|
|
}
|
|
return error(scan_state, "junk after .globl directive", token, &token_attr);
|
|
}
|
|
|
|
/* for now, accepts: .size <sym>,.-<sym> */
|
|
static int parse_dot_size(struct scan_state *scan_state, struct stmt *stmt)
|
|
{
|
|
enum token token;
|
|
union token_attribute token_attr;
|
|
|
|
token = scan_token(scan_state, &token_attr);
|
|
if (token == T_SYMBOL) {
|
|
stmt->u.symbol.name = token_attr.text;
|
|
token = scan_token(scan_state, &token_attr);
|
|
if (token == T_COMMA) {
|
|
token = scan_token(scan_state, &token_attr);
|
|
if (token == T_DOT) {
|
|
token = scan_token(scan_state, &token_attr);
|
|
if (token == T_MINUS) {
|
|
token = scan_token(scan_state, &token_attr);
|
|
if (token == T_SYMBOL
|
|
&& strcmp(token_attr.text, stmt->u.symbol.name) == 0) {
|
|
token = scan_token(scan_state, &token_attr);
|
|
if (token == T_NEWLINE) {
|
|
stmt->tag = S_DOT_SIZE;
|
|
return 1;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return error(scan_state, "junk after .size directive", token, &token_attr);
|
|
}
|
|
|
|
static int parse_dot_text(struct scan_state *scan_state, struct stmt *stmt)
|
|
{
|
|
enum token token;
|
|
union token_attribute token_attr;
|
|
|
|
token = scan_token(scan_state, &token_attr);
|
|
if (token == T_NEWLINE) {
|
|
stmt->tag = S_DOT_TEXT;
|
|
return 1;
|
|
}
|
|
return error(scan_state, "junk after .text directive", token, &token_attr);
|
|
}
|
|
|
|
static int parse_dot_type(struct scan_state *scan_state, struct stmt *stmt)
|
|
{
|
|
enum token token;
|
|
union token_attribute token_attr;
|
|
|
|
token = scan_token(scan_state, &token_attr);
|
|
if (token == T_SYMBOL) {
|
|
stmt->u.symbol.name = token_attr.text;
|
|
token = scan_token(scan_state, &token_attr);
|
|
if (token == T_COMMA) {
|
|
token = scan_token(scan_state, &token_attr);
|
|
if (token == T_AT) {
|
|
token = scan_token(scan_state, &token_attr);
|
|
if (token == T_SYMBOL
|
|
&& strcmp(token_attr.text, "function") == 0) {
|
|
token = scan_token(scan_state, &token_attr);
|
|
if (token == T_NEWLINE) {
|
|
stmt->tag = S_DOT_TYPE_FUNCTION;
|
|
return 1;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return error(scan_state, "junk after .type directive", token, &token_attr);
|
|
}
|
|
|
|
/*
|
|
* Recognize:
|
|
*
|
|
* <label> ::= <symbol> ":"
|
|
*
|
|
* <insn> ::= <symbol> (<accumulator> ",")? <address> <newline>
|
|
*
|
|
* <accumulator> ::= <uinteger> [uint <= 0xF]
|
|
*
|
|
* <address> ::= "@"? <displacement>? <index>?
|
|
*
|
|
* <displacement> ::= <uinteger> [uint <= 1^18 - 1]
|
|
* <displacement> ::= "(" <uinteger> ")" [uint <= 1^18 - 1]
|
|
*
|
|
* <index> ::= "(" <indexreg> ")"
|
|
* <indexreg> ::= <uinteger> [uint <= 0xF]
|
|
*
|
|
* Examples:
|
|
* foo:
|
|
* popj 17,
|
|
* pushj 17,bar
|
|
* movei 1,@fum(2)
|
|
*
|
|
* Ambiguous examples:
|
|
*
|
|
* <symbol> (<uinteger>) <newline>
|
|
*
|
|
* This is ambigouous since we have no special notation for <register>, and the same kind of
|
|
* parentheses are used for expression grouping in the displacement as for the index register.
|
|
*
|
|
* This might denote an insn with a parenthesized displacement and no index,
|
|
* or it might denote an insn with an index but no displacement.
|
|
*
|
|
* However, the uinteger in an indexreg cannot be > 0xF, and it rarely makes sense to form
|
|
* an effective address with a displacement <= 0xF and no index.
|
|
*
|
|
* Therefore, if the uinteger is <= 0xF this is an index with no displacement,
|
|
* otherwise it is a displacement without an index.
|
|
*/
|
|
|
|
static int parse_insn_index_after_lparen(struct scan_state *scan_state, struct stmt *stmt)
|
|
{
|
|
enum token token;
|
|
union token_attribute token_attr;
|
|
|
|
token = scan_token(scan_state, &token_attr);
|
|
if (token != T_UINTEGER
|
|
|| token_attr.uint > 0xF)
|
|
return error(scan_state, "invalid <indexreg>", token, &token_attr);
|
|
|
|
stmt->u.insn.indexreg = token_attr.uint;
|
|
|
|
token = scan_token(scan_state, &token_attr);
|
|
if (token != T_RPAREN)
|
|
return error(scan_state, "junk after '(' <indexreg>", token, &token_attr);
|
|
|
|
token = scan_token(scan_state, &token_attr);
|
|
if (token != T_NEWLINE)
|
|
return error(scan_state, "junk after '(' <indexreg> ')'", token, &token_attr);
|
|
|
|
return 1;
|
|
}
|
|
|
|
static int parse_insn_address_after_lparen_uinteger_rparen(struct scan_state *scan_state, struct stmt *stmt, union token_attribute *uinteger_attr)
|
|
{
|
|
enum token token;
|
|
union token_attribute token_attr;
|
|
|
|
token = scan_token(scan_state, &token_attr);
|
|
switch (token) {
|
|
case T_NEWLINE: /* might be <displacement> or <index>, inspect the <uinteger>'s value to disambiguate */
|
|
if (uinteger_attr->uint > PDP10_UINT18_MAX)
|
|
return error(scan_state, "invalid <displacement>", T_UINTEGER, uinteger_attr);
|
|
if (uinteger_attr->uint <= 0xF) /* it's the <index> */
|
|
stmt->u.insn.indexreg = uinteger_attr->uint;
|
|
else /* it's the <displacement> */
|
|
stmt->u.insn.address = uinteger_attr->uint;
|
|
return 1;
|
|
case T_LPAREN: /* the <uinteger> is the <displacement>, followed by <index> */
|
|
if (uinteger_attr->uint > PDP10_UINT18_MAX)
|
|
return error(scan_state, "invalid <displacement>", T_UINTEGER, uinteger_attr);
|
|
stmt->u.insn.address = uinteger_attr->uint;
|
|
return parse_insn_index_after_lparen(scan_state, stmt);
|
|
default:
|
|
return error(scan_state, "junk in <address> after '(' <uinteger> ')'", token, &token_attr);
|
|
}
|
|
}
|
|
|
|
static int parse_insn_address_after_lparen_uinteger(struct scan_state *scan_state, struct stmt *stmt, union token_attribute *uinteger_attr)
|
|
{
|
|
enum token token;
|
|
union token_attribute token_attr;
|
|
|
|
token = scan_token(scan_state, &token_attr);
|
|
switch (token) {
|
|
case T_RPAREN: /* might be <displacement> or <index> */
|
|
return parse_insn_address_after_lparen_uinteger_rparen(scan_state, stmt, uinteger_attr);
|
|
default:
|
|
return error(scan_state, "junk in <address> after '(' <uinteger>", token, &token_attr);
|
|
}
|
|
}
|
|
|
|
static int parse_insn_address_after_lparen(struct scan_state *scan_state, struct stmt *stmt)
|
|
{
|
|
enum token token;
|
|
union token_attribute token_attr;
|
|
|
|
token = scan_token(scan_state, &token_attr);
|
|
switch (token) {
|
|
case T_UINTEGER: /* might be <displacement> or <index> */
|
|
return parse_insn_address_after_lparen_uinteger(scan_state, stmt, &token_attr);
|
|
default:
|
|
return error(scan_state, "junk in <address> after '('", token, &token_attr);
|
|
}
|
|
}
|
|
|
|
static int parse_insn_after_displacement(struct scan_state *scan_state, struct stmt *stmt)
|
|
{
|
|
enum token token;
|
|
union token_attribute token_attr;
|
|
|
|
token = scan_token(scan_state, &token_attr);
|
|
switch (token) {
|
|
case T_NEWLINE: /* no <index> */
|
|
return 1;
|
|
case T_LPAREN: /* need <index> */
|
|
return parse_insn_index_after_lparen(scan_state, stmt);
|
|
default:
|
|
return error(scan_state, "junk in <address> after <displacement>", token, &token_attr);
|
|
}
|
|
}
|
|
|
|
static int parse_insn_address_after_at(struct scan_state *scan_state, struct stmt *stmt)
|
|
{
|
|
enum token token;
|
|
union token_attribute token_attr;
|
|
|
|
token = scan_token(scan_state, &token_attr);
|
|
switch (token) {
|
|
case T_NEWLINE:
|
|
return 1;
|
|
case T_LPAREN: /* might be <displacement> or <index> */
|
|
return parse_insn_address_after_lparen(scan_state, stmt);
|
|
case T_UINTEGER:
|
|
if (token_attr.uint > PDP10_UINT18_MAX)
|
|
return error(scan_state, "invalid <displacement>", token, &token_attr);
|
|
stmt->u.insn.address = token_attr.uint;
|
|
return parse_insn_after_displacement(scan_state, stmt);
|
|
default:
|
|
return error(scan_state, "invalid <address>", token, &token_attr);
|
|
}
|
|
}
|
|
|
|
static int parse_insn_address(struct scan_state *scan_state, struct stmt *stmt, const struct pdp10_instruction *insndesc)
|
|
{
|
|
enum token token;
|
|
union token_attribute token_attr;
|
|
|
|
token = scan_token(scan_state, &token_attr);
|
|
if (token == T_NEWLINE)
|
|
return 1;
|
|
|
|
if (insndesc->type & PDP10_E_UNUSED)
|
|
return error(scan_state, "<address> not allowed in this instruction", token, &token_attr);
|
|
|
|
switch (token) {
|
|
case T_LPAREN: /* might be <displacement> or <index> */
|
|
return parse_insn_address_after_lparen(scan_state, stmt);
|
|
case T_UINTEGER:
|
|
if (token_attr.uint > PDP10_UINT18_MAX)
|
|
return error(scan_state, "invalid <displacement>", token, &token_attr);
|
|
stmt->u.insn.address = token_attr.uint;
|
|
return parse_insn_after_displacement(scan_state, stmt);
|
|
case T_AT:
|
|
stmt->u.insn.at = 1;
|
|
return parse_insn_address_after_at(scan_state, stmt);
|
|
default:
|
|
return error(scan_state, "invalid <address>", token, &token_attr);
|
|
}
|
|
}
|
|
|
|
static int parse_insn_after_symbol_uinteger(
|
|
struct scan_state *scan_state, struct stmt *stmt, const struct pdp10_instruction *insndesc, union token_attribute *uinteger_attr)
|
|
{
|
|
enum token token;
|
|
union token_attribute token_attr;
|
|
|
|
token = scan_token(scan_state, &token_attr);
|
|
if (token == T_COMMA) { /* the <uinteger> is the <accumulator> */
|
|
if (uinteger_attr->uint > 0xF)
|
|
return error(scan_state, "invalid <accumulator>", T_UINTEGER, uinteger_attr);
|
|
if (insndesc->type & (PDP10_A_OPCODE | PDP10_A_UNUSED))
|
|
return error(scan_state, "<accumulator> not allowed in this instruction", T_UINTEGER, uinteger_attr);
|
|
stmt->u.insn.accumulator = uinteger_attr->uint;
|
|
return parse_insn_address(scan_state, stmt, insndesc);
|
|
}
|
|
|
|
if (insndesc->type & PDP10_E_UNUSED)
|
|
return error(scan_state, "<address> not allowed in this instruction", token, &token_attr);
|
|
|
|
switch (token) {
|
|
case T_LPAREN: /* the <uinteger> is the <displacement>, followed by <index> */
|
|
if (uinteger_attr->uint > PDP10_UINT18_MAX)
|
|
return error(scan_state, "invalid <displacement>", T_UINTEGER, uinteger_attr);
|
|
stmt->u.insn.address = uinteger_attr->uint;
|
|
return parse_insn_index_after_lparen(scan_state, stmt);
|
|
case T_NEWLINE: /* the <uinteger> is the <displacement>, there is no <accumulator> or <index> */
|
|
if (uinteger_attr->uint > PDP10_UINT18_MAX)
|
|
return error(scan_state, "invalid <displacement>", T_UINTEGER, uinteger_attr);
|
|
stmt->u.insn.address = uinteger_attr->uint;
|
|
return 1;
|
|
default:
|
|
return error(scan_state, "junk after <symbol> <uinteger>", token, &token_attr);
|
|
}
|
|
}
|
|
|
|
static int parse_after_symbol(struct scan_state *scan_state, struct stmt *stmt, union token_attribute *symbol_attr)
|
|
{
|
|
enum token token;
|
|
union token_attribute token_attr;
|
|
const struct pdp10_instruction *insndesc;
|
|
|
|
token = scan_token(scan_state, &token_attr);
|
|
if (token == T_COLON) {
|
|
stmt->u.symbol.name = symbol_attr->text;
|
|
stmt->tag = S_LABEL;
|
|
return 1;
|
|
}
|
|
|
|
insndesc = pdp10_instruction_from_name(symbol_attr->text);
|
|
if (!insndesc)
|
|
return error(scan_state, "invalid instruction name", T_SYMBOL, symbol_attr);
|
|
|
|
stmt->tag = S_INSN;
|
|
stmt->u.insn.at = 0;
|
|
stmt->u.insn.address = 0;
|
|
stmt->u.insn.indexreg = 0;
|
|
|
|
if (insndesc->type & PDP10_A_OPCODE) {
|
|
/* XXX: this is too intimate with quirky ->opcode representation */
|
|
stmt->u.insn.opcode = (insndesc->opcode >> 6) & 0x1FF;
|
|
stmt->u.insn.accumulator = (insndesc->opcode >> 2) & 0xF;
|
|
} else {
|
|
stmt->u.insn.opcode = insndesc->opcode & 0x1FF;
|
|
stmt->u.insn.accumulator = 0;
|
|
}
|
|
|
|
switch (token) {
|
|
case T_NEWLINE:
|
|
return 1;
|
|
case T_UINTEGER: /* might be <accumulator> or <displacement> */
|
|
return parse_insn_after_symbol_uinteger(scan_state, stmt, insndesc, &token_attr);
|
|
default:
|
|
break;
|
|
}
|
|
|
|
if (insndesc->type & PDP10_E_UNUSED)
|
|
return error(scan_state, "<address> not allowed in this instruction", token, &token_attr);
|
|
|
|
switch (token) {
|
|
case T_AT:
|
|
stmt->u.insn.at = 1;
|
|
return parse_insn_address_after_at(scan_state, stmt);
|
|
case T_LPAREN: /* might be <displacement> or <index> */
|
|
return parse_insn_address_after_lparen(scan_state, stmt);
|
|
default:
|
|
return error(scan_state, "junk after instruction name", token, &token_attr);
|
|
}
|
|
}
|
|
|
|
int parse_stmt(struct scan_state *scan_state, struct stmt *stmt)
|
|
{
|
|
enum token token;
|
|
union token_attribute token_attr;
|
|
|
|
for (;;) {
|
|
token = scan_token(scan_state, &token_attr);
|
|
switch (token) {
|
|
/*
|
|
* directives
|
|
*/
|
|
case T_DOT_FILE:
|
|
return parse_dot_file(scan_state, stmt);
|
|
case T_DOT_GLOBL:
|
|
return parse_dot_globl(scan_state, stmt);
|
|
case T_DOT_SIZE:
|
|
return parse_dot_size(scan_state, stmt);
|
|
case T_DOT_TEXT:
|
|
return parse_dot_text(scan_state, stmt);
|
|
case T_DOT_TYPE:
|
|
return parse_dot_type(scan_state, stmt);
|
|
/*
|
|
* other symbols
|
|
*/
|
|
case T_SYMBOL: /* start of label, insn, or symbol assignment */
|
|
return parse_after_symbol(scan_state, stmt, &token_attr);
|
|
/*
|
|
* synthetic symbols
|
|
*/
|
|
case T_ERROR:
|
|
return -1; /* diagnostics already emitted by scan.c */
|
|
case T_EOF:
|
|
return 0;
|
|
case T_NEWLINE:
|
|
continue;
|
|
default:
|
|
return error(scan_state, "expected directive, label, or instruction", token, &token_attr);
|
|
}
|
|
}
|
|
}
|