mirror of
https://github.com/rzzzwilson/pymlac.git
synced 2025-06-10 09:32:41 +00:00
548 lines
16 KiB
Python
Executable File
548 lines
16 KiB
Python
Executable File
#!/usr/bin/python
|
|
# -*- coding: utf-8 -*-
|
|
|
|
"""
|
|
An assembler for the Imlac simulator.
|
|
|
|
Usage: pyasm [ -h ] [ -l <listfile> ] [ -o <outputfile> ] <asmfile>
|
|
|
|
Where <asmfile> is the file to assemble,
|
|
<outputfile> is the output PTP file
|
|
<listfile> is the optional listing file
|
|
|
|
If <outputfile> is not specified the output filename is the input
|
|
<asmfile> with any extension removed and a .ptp axtenstion added.
|
|
"""
|
|
|
|
"""
|
|
The basic structure of the assembler:
|
|
0. Read all file lines into memory
|
|
1. Create ORG blocks
|
|
2. Create CODE blocks from ORG blocks (assemble() function)
|
|
3. Check for undefined things in the symbol table
|
|
4. Allocate addresses to literal CODE blocks
|
|
5. Fix relative addresses in literal blocks
|
|
6. Backpatch all code blocks
|
|
7. Emit PTP data
|
|
"""
|
|
|
|
import sys
|
|
import os
|
|
import copy
|
|
import string
|
|
import getopt
|
|
|
|
######
|
|
# Globals
|
|
######
|
|
|
|
# the input assembler filename
|
|
AsmFile = None
|
|
|
|
# the output listing file
|
|
ListFile = None
|
|
ListFileHandle = None # open listing file
|
|
|
|
# the output PTP file
|
|
OutputFile = None
|
|
OutputFileHandle = None # open output file
|
|
|
|
# the program start address (optional)
|
|
StartAddr = None
|
|
|
|
# the symbol table(s)
|
|
# {<name>: <value>, ... }
|
|
SymTable = {}
|
|
# {<name>: <line#>, ... }
|
|
SymTableLine = {}
|
|
|
|
# the backpatch list
|
|
# [[symname, coderef, offset], [symname, coderef, offset], ... ]
|
|
BackpatchList = []
|
|
|
|
# current line number and the line
|
|
CurrentLine = None
|
|
CurrentLineNumber = None
|
|
|
|
# Any undefined label
|
|
Undefined = None
|
|
|
|
######
|
|
# Mostly constant stuff
|
|
######
|
|
|
|
# the output PTP filename extension
|
|
PTPExtension = '.ptp'
|
|
|
|
# the output listing filename extension
|
|
ListFileExtension = '.lst'
|
|
|
|
|
|
def usage(msg=None):
|
|
"""Print usage and optional error message."""
|
|
if msg:
|
|
print('*'*60)
|
|
print(msg)
|
|
print('*'*60)
|
|
print(__doc__)
|
|
|
|
def error(msg):
|
|
"""Print error message and stop."""
|
|
|
|
print(msg)
|
|
sys.exit(10)
|
|
|
|
def write_list(code, addr, lnum, line):
|
|
"""Generate one line of listing file.
|
|
|
|
code is the word of generated code
|
|
addr is the address of the generated code
|
|
lnum file line number
|
|
line complete text of the line of assembler
|
|
"""
|
|
|
|
code_str = '%06o' % code if code else ''
|
|
addr_str = '%04o' % addr if addr else ''
|
|
lnum_str = '%04d:' % lnum if lnum else ''
|
|
line_str = '%s' % line if line else ''
|
|
|
|
ListFileHandle.write('%6s %4s %s %s\n'
|
|
% (code_str, addr_str, lnum_str, line_str))
|
|
ListFileHandle.flush()
|
|
|
|
def eval_expression(expr, dot):
|
|
"""Evaluate an expression.
|
|
|
|
expr string holding expression
|
|
dot current code address (the "." symbol value)
|
|
|
|
Valid expressions are:
|
|
<expr> ::= <string> | <numeric_expression>
|
|
<string> ::= ('|") <characters> ('|")
|
|
<numeric_expression> ::= (<numeric_const>|<name_const>|".") [ (+|-) <numeric_expression> ]
|
|
<name_const> ::= <name_characters>
|
|
"""
|
|
|
|
global Undefined
|
|
|
|
# if no expression, do nothing
|
|
if expr is None:
|
|
return None
|
|
|
|
# if expression is a string, return an iterable of ASCII values
|
|
if expr[0] in "'\"":
|
|
delim = expr[0]
|
|
if expr[-1] != delim:
|
|
error("Badly formed string value: '%s'" % expr)
|
|
return expr[1:-1]
|
|
|
|
# replace any "." value with "dot" defined in the symbol table
|
|
expr = string.replace(expr, '.', 'dot')
|
|
expr = expr.upper()
|
|
env = copy.deepcopy(SymTable)
|
|
env['DOT'] = dot # add in the "." address
|
|
|
|
# evaluate the expression
|
|
try:
|
|
result = eval(expr, env)
|
|
except TypeError as e:
|
|
error('ORG pseudo-opcode expression contains unsatisfied references\n'
|
|
'%d: %s' % (CurrentLineNumber, CurrentLine))
|
|
except NameError as e:
|
|
Undefined = e.message
|
|
if 'is not defined' in e.message:
|
|
Undefined = e.message[len("name '"):-len("' is not defined")]
|
|
raise
|
|
|
|
return result
|
|
|
|
def assemble_line(lnum, opcode, addr):
|
|
"""Assemble one line of code.
|
|
|
|
lnum source file line number
|
|
opcode opcode, uppercase
|
|
addr address expression, uppercase
|
|
|
|
Returns the 16bit word value, with possibly the backpatch list
|
|
updated.
|
|
"""
|
|
|
|
return 1025
|
|
|
|
def define_label(label, address, lnum):
|
|
"""Put 'label' into the symbol tables."""
|
|
|
|
if label in SymTable:
|
|
prev_lnum = SymTableLine[label]
|
|
error("Label '%s' define twice, lines %d and %d."
|
|
% (label, prev_lnum, lnum))
|
|
SymTable[label] = address
|
|
SymTableLine[label] = lnum
|
|
|
|
def gen_ascii(string, address, lnum, line):
|
|
"""Generate data words for ASCII string.
|
|
|
|
string the string to generate data for
|
|
address 'dot' value for first word generated
|
|
lnum line number of code
|
|
line actual line of code
|
|
"""
|
|
|
|
# ensure even number of bytes in string
|
|
if len(string) % 2:
|
|
string += '\0' # append zero byte as padding
|
|
|
|
# get 2 bytes at a time and generate words
|
|
while string:
|
|
ch1 = string[0]
|
|
ch2 = string[1]
|
|
string = string[2:]
|
|
value = (ord(ch1) << 8) + ord(ch2)
|
|
write_list(value, address, lnum, line)
|
|
address += 1
|
|
lnum = None
|
|
line = None
|
|
|
|
def assemble_oblock(oblock):
|
|
"""Assemble one org block to a code block.
|
|
|
|
An org block: [(lnum, line, label, opcode, addr), ...]
|
|
A code block: [address, [word, word, word, ...]]
|
|
"""
|
|
|
|
global CurrentLineNumber, CurrentLine
|
|
|
|
cblock = [None, []]
|
|
|
|
# if we get an empty block, do nothing
|
|
if len(oblock) == 0:
|
|
return cblock
|
|
|
|
# assemble the org block
|
|
address = None # '.' value
|
|
for (lnum, line, label, opcode, addr) in oblock:
|
|
CurrentLineNumber = lnum
|
|
CurrentLine = line
|
|
|
|
# if no code, just list it
|
|
if label is None and opcode is None:
|
|
write_list(None, None, lnum, line)
|
|
else:
|
|
# maybe we have some code to generate
|
|
if opcode:
|
|
if opcode == 'ORG':
|
|
# ORG just sets the code address and starts the code block
|
|
if label:
|
|
error('%d: %s\nORG opcode may not have a label'
|
|
% (lnum, line))
|
|
try:
|
|
address = eval_expression(addr, address)
|
|
except NameError as e:
|
|
error("%d: %s\nName '%s' is undefined"
|
|
% (lnum, line, Undefined))
|
|
cblock = [address, []]
|
|
write_list(None, None, lnum, line)
|
|
continue
|
|
elif opcode == 'END':
|
|
# END pseudo-op, terminate assembly
|
|
start_addr = None
|
|
if addr:
|
|
try:
|
|
start_addr = eval_expression(addr, address)
|
|
except NameError as e:
|
|
error("%d: %s\nName '%s' is undefined"
|
|
% (lnum, line, undefined))
|
|
write_list(None, start_addr, lnum, line)
|
|
return cblock
|
|
elif opcode == 'EQU':
|
|
if label is None:
|
|
error('%d: %s\nEQU opcode missing a label'
|
|
% (lnum, line))
|
|
try:
|
|
value = eval_expression(addr, None)
|
|
except NameError as e:
|
|
error("%d: %s\nName '%s' is undefined"
|
|
% (lnum, line, Undefined))
|
|
define_label(label, value, lnum)
|
|
write_list(None, None, lnum, line)
|
|
continue
|
|
elif opcode == 'DATA':
|
|
try:
|
|
value = eval_expression(addr, address)
|
|
except NameError as e:
|
|
error("%d: %s\nName '%s' is undefined"
|
|
% (lnum, line, Undefined))
|
|
ln = lnum
|
|
li = line
|
|
if isinstance(value, basestring):
|
|
gen_ascii(value, address, lnum, line)
|
|
else:
|
|
write_list(value, address, lnum, line)
|
|
address += 1
|
|
else:
|
|
code = assemble_line(lnum, opcode, addr)
|
|
cblock[1].append((code, addr))
|
|
write_list(code, address, lnum, line)
|
|
if label:
|
|
define_label(label, address, lnum)
|
|
address += 1
|
|
elif label:
|
|
# we have a label and no opcode, just define the label and list it
|
|
define_label(label, address, lnum)
|
|
write_list(None, None, lnum, line)
|
|
|
|
return cblock
|
|
|
|
def assemble_org_blocks(blocks):
|
|
"""Assemble org blocks producing a code blocks list."""
|
|
|
|
code_block = []
|
|
|
|
# for each org block, assemble to code block
|
|
for oblock in blocks:
|
|
code_block.append(assemble_oblock(oblock))
|
|
|
|
return code_block
|
|
|
|
def next_symbol(line):
|
|
"""Return next symbol and line remainder."""
|
|
|
|
fields = string.split(line, maxsplit=1)
|
|
if len(fields) != 2:
|
|
fields.append('')
|
|
|
|
return fields
|
|
|
|
def split_fields(line):
|
|
"""Split one ASM line into fields: label, opcode, address.
|
|
|
|
Returns a tuple: (label, opcode, address).
|
|
If label and opcode ar not None, uppercase the result string.
|
|
|
|
If a field is missing, return None for it. If the line is empty, return
|
|
(None, None, None).
|
|
|
|
We take pains not to split the address field if it's something like
|
|
ALPHA + 100
|
|
"""
|
|
|
|
if not line:
|
|
return (None, None, None)
|
|
|
|
# check for the label
|
|
label = None
|
|
if line[0] not in ' \t;':
|
|
(label, remainder) = next_symbol(line)
|
|
label = label.upper()
|
|
else:
|
|
remainder = line.strip()
|
|
|
|
# get opcode
|
|
opcode = None
|
|
if remainder and remainder[0] != ';':
|
|
(opcode, remainder) = next_symbol(remainder)
|
|
opcode = opcode.upper()
|
|
|
|
# get address
|
|
address = None
|
|
if remainder and remainder[0] != ';':
|
|
# first, check for a string
|
|
if remainder[0] in "'\"":
|
|
delim = remainder[0]
|
|
remainder = remainder[1:]
|
|
ndx = remainder.find(delim)
|
|
if ndx == -1:
|
|
error('Unbalanced string delimiter:\n'
|
|
'%d: %s' % (CurrentLineNumber, CurrentLine))
|
|
address = '"' + remainder[:ndx].strip() + '"'
|
|
remainder = remainder[ndx+1:].strip()
|
|
else:
|
|
# strip off any comment
|
|
ndx = remainder.find(';')
|
|
if ndx != -1:
|
|
remainder = remainder[:ndx].strip()
|
|
address = remainder.strip()
|
|
remainder = None
|
|
|
|
# check that remainder is empty or only a comment
|
|
if remainder and remainder[0] != ';':
|
|
error('Badly formed instruction:\n'
|
|
'%d: %s' % (CurrentLineNumber, CurrentLine))
|
|
|
|
return (label, opcode, address)
|
|
|
|
def split_orgs(asm_lines):
|
|
"""Split ASM lines into ORG blocks (one or more).
|
|
|
|
Error if no ORG pseudo-opcode found.
|
|
"""
|
|
|
|
global CurrentLineNumber, CurrentLine
|
|
|
|
result = []
|
|
block = []
|
|
|
|
for (lnum, line) in enumerate(asm_lines):
|
|
(label, opcode, addr) = split_fields(line)
|
|
CurrentLineNumber = lnum
|
|
CurrentLine = line
|
|
if opcode:
|
|
if opcode.lower() == 'org':
|
|
if block:
|
|
# save previous ORG block
|
|
result.append(block)
|
|
block = []
|
|
if addr is None:
|
|
error('Line %d: %s\nORG pseudo-opcode without an address?'
|
|
% (lnum, line))
|
|
block.append((lnum+1, line, label, opcode, addr))
|
|
|
|
if block:
|
|
# save previous ORG block
|
|
result.append(block)
|
|
block = []
|
|
|
|
return result
|
|
|
|
def check_org_overlap(code_blocks):
|
|
"""Check that we don't have overlapping ORG blocks."""
|
|
|
|
# remember org block span: [(start, end), (start', end'), ...]
|
|
block_span = []
|
|
|
|
for cb in code_blocks:
|
|
(address, code) = cb
|
|
if address is None and not code:
|
|
# empty code block, ignore
|
|
continue
|
|
(block_start, code) = cb
|
|
block_end = block_start + len(code) - 1
|
|
|
|
for (start, end) in block_span:
|
|
if start <= block_start <= end:
|
|
error('ORG block at address %06o overwrites previous block at [%06o,%06o]'
|
|
% (block_start, start, end))
|
|
if start <= block_end <= end:
|
|
error('ORG block at address %06o overwrites previous block at [%06o,%06o]'
|
|
% (block_start, start, end))
|
|
|
|
block_span.append((block_start, block_end))
|
|
|
|
def allocate_literals(code_blocks):
|
|
"""Allocate space for literal blocks."""
|
|
|
|
pass
|
|
|
|
def backpatch(code_blocks):
|
|
"""Generate final code values, evaluate address fields.
|
|
|
|
Returns a simplified code block list: [[address, [code, code, ...]], ...].
|
|
"""
|
|
|
|
result = []
|
|
|
|
for cb in code_blocks:
|
|
(address, codes) = cb
|
|
if address is None and not codes:
|
|
# empty code block, ignore
|
|
continue
|
|
|
|
# got a code_block that has code
|
|
dot = address
|
|
new_code = []
|
|
for (code, addr) in codes:
|
|
try:
|
|
addr_value = eval_expression(addr, dot)
|
|
except NameError as e:
|
|
error("Name '%s' is undefined" % Undefined)
|
|
if addr_value:
|
|
code |= addr_value
|
|
new_code.append(code)
|
|
dot += 1
|
|
|
|
result.append([address, new_code])
|
|
|
|
return [address, new_code]
|
|
|
|
def assemble_file():
|
|
"""Assemble the file and produce listing, output files."""
|
|
|
|
# read all of ASM file into memory, strip \n, etc
|
|
with open(AsmFile, 'rb') as fd:
|
|
asm_lines = fd.readlines()
|
|
asm_lines = [line.rstrip() for line in asm_lines]
|
|
print('asm_lines=\n%s' % '\n'.join(asm_lines))
|
|
|
|
org_blocks = split_orgs(asm_lines)
|
|
print('org_blocks=%s' % str(org_blocks))
|
|
|
|
code_blocks = assemble_org_blocks(org_blocks)
|
|
print('code_blocks=%s' % str(code_blocks))
|
|
print('SymTable=%s' % str(SymTable))
|
|
print('SymTableLine=%s' % str(SymTableLine))
|
|
|
|
# allow programmer to overlap ORG blocks
|
|
# check_org_overlap(code_blocks)
|
|
|
|
allocate_literals(code_blocks)
|
|
|
|
patched_code_blocks = backpatch(code_blocks)
|
|
print('patched_code_blocks=%s' % str(patched_code_blocks))
|
|
|
|
def main():
|
|
"""The assembler."""
|
|
|
|
global AsmFile, ListFile, OutputFile
|
|
global ListFileHandle, OutputFileHandle
|
|
|
|
# handle the options
|
|
try:
|
|
(opts, args) = getopt.gnu_getopt(sys.argv, "hl:o:",
|
|
["help", "list=", "output="])
|
|
except getopt.GetoptError:
|
|
usage()
|
|
sys.exit(10)
|
|
|
|
ListFile = None
|
|
OutputFile = None
|
|
|
|
for opt, arg in opts:
|
|
if opt in ('-h', '--help'):
|
|
usage()
|
|
sys.exit(0)
|
|
elif opt in ('-l', '--list'):
|
|
ListFile = arg
|
|
elif opt in ('-o', '--output'):
|
|
OutputFile = arg
|
|
|
|
if len(args) != 2:
|
|
usage()
|
|
sys.exit(10)
|
|
|
|
# get ASM filename and make sure it exists
|
|
AsmFile = args[1]
|
|
try:
|
|
f = open(AsmFile, 'rb')
|
|
except IOError:
|
|
print("Sorry, can't find file '%s'" % AsmFile)
|
|
sys.exit(10)
|
|
f.close()
|
|
|
|
if OutputFile is None:
|
|
(path, ext) = os.path.splitext(AsmFile)
|
|
OutputFile = path + PTPExtension
|
|
OutputFileHandle = open(OutputFile, 'wb')
|
|
|
|
if ListFile is None:
|
|
(path, ext) = os.path.splitext(AsmFile)
|
|
ListFile = path + ListFileExtension
|
|
ListFileHandle = open(ListFile, 'wb')
|
|
|
|
print('ListFile=%s, OutputFile=%s, AsmFile=%s'
|
|
% (str(ListFile), str(OutputFile), str(AsmFile)))
|
|
|
|
assemble_file()
|
|
|
|
if __name__ == '__main__':
|
|
main()
|