1
0
mirror of https://github.com/rzzzwilson/pymlac.git synced 2025-06-10 09:32:41 +00:00
Files
rzzzwilson.pymlac/pyasm/pyasm
2016-01-26 13:58:30 +07:00

548 lines
16 KiB
Python
Executable File

#!/usr/bin/python
# -*- coding: utf-8 -*-
"""
An assembler for the Imlac simulator.
Usage: pyasm [ -h ] [ -l <listfile> ] [ -o <outputfile> ] <asmfile>
Where <asmfile> is the file to assemble,
<outputfile> is the output PTP file
<listfile> is the optional listing file
If <outputfile> is not specified the output filename is the input
<asmfile> with any extension removed and a .ptp axtenstion added.
"""
"""
The basic structure of the assembler:
0. Read all file lines into memory
1. Create ORG blocks
2. Create CODE blocks from ORG blocks (assemble() function)
3. Check for undefined things in the symbol table
4. Allocate addresses to literal CODE blocks
5. Fix relative addresses in literal blocks
6. Backpatch all code blocks
7. Emit PTP data
"""
import sys
import os
import copy
import string
import getopt
######
# Globals
######
# the input assembler filename
AsmFile = None
# the output listing file
ListFile = None
ListFileHandle = None # open listing file
# the output PTP file
OutputFile = None
OutputFileHandle = None # open output file
# the program start address (optional)
StartAddr = None
# the symbol table(s)
# {<name>: <value>, ... }
SymTable = {}
# {<name>: <line#>, ... }
SymTableLine = {}
# the backpatch list
# [[symname, coderef, offset], [symname, coderef, offset], ... ]
BackpatchList = []
# current line number and the line
CurrentLine = None
CurrentLineNumber = None
# Any undefined label
Undefined = None
######
# Mostly constant stuff
######
# the output PTP filename extension
PTPExtension = '.ptp'
# the output listing filename extension
ListFileExtension = '.lst'
def usage(msg=None):
"""Print usage and optional error message."""
if msg:
print('*'*60)
print(msg)
print('*'*60)
print(__doc__)
def error(msg):
"""Print error message and stop."""
print(msg)
sys.exit(10)
def write_list(code, addr, lnum, line):
"""Generate one line of listing file.
code is the word of generated code
addr is the address of the generated code
lnum file line number
line complete text of the line of assembler
"""
code_str = '%06o' % code if code else ''
addr_str = '%04o' % addr if addr else ''
lnum_str = '%04d:' % lnum if lnum else ''
line_str = '%s' % line if line else ''
ListFileHandle.write('%6s %4s %s %s\n'
% (code_str, addr_str, lnum_str, line_str))
ListFileHandle.flush()
def eval_expression(expr, dot):
"""Evaluate an expression.
expr string holding expression
dot current code address (the "." symbol value)
Valid expressions are:
<expr> ::= <string> | <numeric_expression>
<string> ::= ('|") <characters> ('|")
<numeric_expression> ::= (<numeric_const>|<name_const>|".") [ (+|-) <numeric_expression> ]
<name_const> ::= <name_characters>
"""
global Undefined
# if no expression, do nothing
if expr is None:
return None
# if expression is a string, return an iterable of ASCII values
if expr[0] in "'\"":
delim = expr[0]
if expr[-1] != delim:
error("Badly formed string value: '%s'" % expr)
return expr[1:-1]
# replace any "." value with "dot" defined in the symbol table
expr = string.replace(expr, '.', 'dot')
expr = expr.upper()
env = copy.deepcopy(SymTable)
env['DOT'] = dot # add in the "." address
# evaluate the expression
try:
result = eval(expr, env)
except TypeError as e:
error('ORG pseudo-opcode expression contains unsatisfied references\n'
'%d: %s' % (CurrentLineNumber, CurrentLine))
except NameError as e:
Undefined = e.message
if 'is not defined' in e.message:
Undefined = e.message[len("name '"):-len("' is not defined")]
raise
return result
def assemble_line(lnum, opcode, addr):
"""Assemble one line of code.
lnum source file line number
opcode opcode, uppercase
addr address expression, uppercase
Returns the 16bit word value, with possibly the backpatch list
updated.
"""
return 1025
def define_label(label, address, lnum):
"""Put 'label' into the symbol tables."""
if label in SymTable:
prev_lnum = SymTableLine[label]
error("Label '%s' define twice, lines %d and %d."
% (label, prev_lnum, lnum))
SymTable[label] = address
SymTableLine[label] = lnum
def gen_ascii(string, address, lnum, line):
"""Generate data words for ASCII string.
string the string to generate data for
address 'dot' value for first word generated
lnum line number of code
line actual line of code
"""
# ensure even number of bytes in string
if len(string) % 2:
string += '\0' # append zero byte as padding
# get 2 bytes at a time and generate words
while string:
ch1 = string[0]
ch2 = string[1]
string = string[2:]
value = (ord(ch1) << 8) + ord(ch2)
write_list(value, address, lnum, line)
address += 1
lnum = None
line = None
def assemble_oblock(oblock):
"""Assemble one org block to a code block.
An org block: [(lnum, line, label, opcode, addr), ...]
A code block: [address, [word, word, word, ...]]
"""
global CurrentLineNumber, CurrentLine
cblock = [None, []]
# if we get an empty block, do nothing
if len(oblock) == 0:
return cblock
# assemble the org block
address = None # '.' value
for (lnum, line, label, opcode, addr) in oblock:
CurrentLineNumber = lnum
CurrentLine = line
# if no code, just list it
if label is None and opcode is None:
write_list(None, None, lnum, line)
else:
# maybe we have some code to generate
if opcode:
if opcode == 'ORG':
# ORG just sets the code address and starts the code block
if label:
error('%d: %s\nORG opcode may not have a label'
% (lnum, line))
try:
address = eval_expression(addr, address)
except NameError as e:
error("%d: %s\nName '%s' is undefined"
% (lnum, line, Undefined))
cblock = [address, []]
write_list(None, None, lnum, line)
continue
elif opcode == 'END':
# END pseudo-op, terminate assembly
start_addr = None
if addr:
try:
start_addr = eval_expression(addr, address)
except NameError as e:
error("%d: %s\nName '%s' is undefined"
% (lnum, line, undefined))
write_list(None, start_addr, lnum, line)
return cblock
elif opcode == 'EQU':
if label is None:
error('%d: %s\nEQU opcode missing a label'
% (lnum, line))
try:
value = eval_expression(addr, None)
except NameError as e:
error("%d: %s\nName '%s' is undefined"
% (lnum, line, Undefined))
define_label(label, value, lnum)
write_list(None, None, lnum, line)
continue
elif opcode == 'DATA':
try:
value = eval_expression(addr, address)
except NameError as e:
error("%d: %s\nName '%s' is undefined"
% (lnum, line, Undefined))
ln = lnum
li = line
if isinstance(value, basestring):
gen_ascii(value, address, lnum, line)
else:
write_list(value, address, lnum, line)
address += 1
else:
code = assemble_line(lnum, opcode, addr)
cblock[1].append((code, addr))
write_list(code, address, lnum, line)
if label:
define_label(label, address, lnum)
address += 1
elif label:
# we have a label and no opcode, just define the label and list it
define_label(label, address, lnum)
write_list(None, None, lnum, line)
return cblock
def assemble_org_blocks(blocks):
"""Assemble org blocks producing a code blocks list."""
code_block = []
# for each org block, assemble to code block
for oblock in blocks:
code_block.append(assemble_oblock(oblock))
return code_block
def next_symbol(line):
"""Return next symbol and line remainder."""
fields = string.split(line, maxsplit=1)
if len(fields) != 2:
fields.append('')
return fields
def split_fields(line):
"""Split one ASM line into fields: label, opcode, address.
Returns a tuple: (label, opcode, address).
If label and opcode ar not None, uppercase the result string.
If a field is missing, return None for it. If the line is empty, return
(None, None, None).
We take pains not to split the address field if it's something like
ALPHA + 100
"""
if not line:
return (None, None, None)
# check for the label
label = None
if line[0] not in ' \t;':
(label, remainder) = next_symbol(line)
label = label.upper()
else:
remainder = line.strip()
# get opcode
opcode = None
if remainder and remainder[0] != ';':
(opcode, remainder) = next_symbol(remainder)
opcode = opcode.upper()
# get address
address = None
if remainder and remainder[0] != ';':
# first, check for a string
if remainder[0] in "'\"":
delim = remainder[0]
remainder = remainder[1:]
ndx = remainder.find(delim)
if ndx == -1:
error('Unbalanced string delimiter:\n'
'%d: %s' % (CurrentLineNumber, CurrentLine))
address = '"' + remainder[:ndx].strip() + '"'
remainder = remainder[ndx+1:].strip()
else:
# strip off any comment
ndx = remainder.find(';')
if ndx != -1:
remainder = remainder[:ndx].strip()
address = remainder.strip()
remainder = None
# check that remainder is empty or only a comment
if remainder and remainder[0] != ';':
error('Badly formed instruction:\n'
'%d: %s' % (CurrentLineNumber, CurrentLine))
return (label, opcode, address)
def split_orgs(asm_lines):
"""Split ASM lines into ORG blocks (one or more).
Error if no ORG pseudo-opcode found.
"""
global CurrentLineNumber, CurrentLine
result = []
block = []
for (lnum, line) in enumerate(asm_lines):
(label, opcode, addr) = split_fields(line)
CurrentLineNumber = lnum
CurrentLine = line
if opcode:
if opcode.lower() == 'org':
if block:
# save previous ORG block
result.append(block)
block = []
if addr is None:
error('Line %d: %s\nORG pseudo-opcode without an address?'
% (lnum, line))
block.append((lnum+1, line, label, opcode, addr))
if block:
# save previous ORG block
result.append(block)
block = []
return result
def check_org_overlap(code_blocks):
"""Check that we don't have overlapping ORG blocks."""
# remember org block span: [(start, end), (start', end'), ...]
block_span = []
for cb in code_blocks:
(address, code) = cb
if address is None and not code:
# empty code block, ignore
continue
(block_start, code) = cb
block_end = block_start + len(code) - 1
for (start, end) in block_span:
if start <= block_start <= end:
error('ORG block at address %06o overwrites previous block at [%06o,%06o]'
% (block_start, start, end))
if start <= block_end <= end:
error('ORG block at address %06o overwrites previous block at [%06o,%06o]'
% (block_start, start, end))
block_span.append((block_start, block_end))
def allocate_literals(code_blocks):
"""Allocate space for literal blocks."""
pass
def backpatch(code_blocks):
"""Generate final code values, evaluate address fields.
Returns a simplified code block list: [[address, [code, code, ...]], ...].
"""
result = []
for cb in code_blocks:
(address, codes) = cb
if address is None and not codes:
# empty code block, ignore
continue
# got a code_block that has code
dot = address
new_code = []
for (code, addr) in codes:
try:
addr_value = eval_expression(addr, dot)
except NameError as e:
error("Name '%s' is undefined" % Undefined)
if addr_value:
code |= addr_value
new_code.append(code)
dot += 1
result.append([address, new_code])
return [address, new_code]
def assemble_file():
"""Assemble the file and produce listing, output files."""
# read all of ASM file into memory, strip \n, etc
with open(AsmFile, 'rb') as fd:
asm_lines = fd.readlines()
asm_lines = [line.rstrip() for line in asm_lines]
print('asm_lines=\n%s' % '\n'.join(asm_lines))
org_blocks = split_orgs(asm_lines)
print('org_blocks=%s' % str(org_blocks))
code_blocks = assemble_org_blocks(org_blocks)
print('code_blocks=%s' % str(code_blocks))
print('SymTable=%s' % str(SymTable))
print('SymTableLine=%s' % str(SymTableLine))
# allow programmer to overlap ORG blocks
# check_org_overlap(code_blocks)
allocate_literals(code_blocks)
patched_code_blocks = backpatch(code_blocks)
print('patched_code_blocks=%s' % str(patched_code_blocks))
def main():
"""The assembler."""
global AsmFile, ListFile, OutputFile
global ListFileHandle, OutputFileHandle
# handle the options
try:
(opts, args) = getopt.gnu_getopt(sys.argv, "hl:o:",
["help", "list=", "output="])
except getopt.GetoptError:
usage()
sys.exit(10)
ListFile = None
OutputFile = None
for opt, arg in opts:
if opt in ('-h', '--help'):
usage()
sys.exit(0)
elif opt in ('-l', '--list'):
ListFile = arg
elif opt in ('-o', '--output'):
OutputFile = arg
if len(args) != 2:
usage()
sys.exit(10)
# get ASM filename and make sure it exists
AsmFile = args[1]
try:
f = open(AsmFile, 'rb')
except IOError:
print("Sorry, can't find file '%s'" % AsmFile)
sys.exit(10)
f.close()
if OutputFile is None:
(path, ext) = os.path.splitext(AsmFile)
OutputFile = path + PTPExtension
OutputFileHandle = open(OutputFile, 'wb')
if ListFile is None:
(path, ext) = os.path.splitext(AsmFile)
ListFile = path + ListFileExtension
ListFileHandle = open(ListFile, 'wb')
print('ListFile=%s, OutputFile=%s, AsmFile=%s'
% (str(ListFile), str(OutputFile), str(AsmFile)))
assemble_file()
if __name__ == '__main__':
main()