#!/usr/bin/python # -*- coding: utf-8 -*- """ An assembler for the Imlac simulator. Usage: pyasm [ -h ] [ -l ] [ -o ] Where is the file to assemble, is the output PTP file is the optional listing file If is not specified the output filename is the input with any extension removed and a .ptp axtenstion added. """ """ The basic structure of the assembler: 0. Read all file lines into memory 1. Create ORG blocks 2. Create CODE blocks from ORG blocks (assemble() function) 3. Check for undefined things in the symbol table 4. Allocate addresses to literal CODE blocks 5. Fix relative addresses in literal blocks 6. Backpatch all code blocks 7. Emit PTP data """ import sys import os import copy import string import getopt ###### # Globals ###### # the input assembler filename AsmFile = None # the output listing file ListFile = None ListFileHandle = None # open listing file # the output PTP file OutputFile = None OutputFileHandle = None # open output file # the program start address (optional) StartAddr = None # the symbol table(s) # {: , ... } SymTable = {} # {: , ... } SymTableLine = {} # the backpatch list # [[symname, coderef, offset], [symname, coderef, offset], ... ] BackpatchList = [] # current line number and the line CurrentLine = None CurrentLineNumber = None # Any undefined label Undefined = None ###### # Mostly constant stuff ###### # the output PTP filename extension PTPExtension = '.ptp' # the output listing filename extension ListFileExtension = '.lst' def usage(msg=None): """Print usage and optional error message.""" if msg: print('*'*60) print(msg) print('*'*60) print(__doc__) def error(msg): """Print error message and stop.""" print(msg) sys.exit(10) def write_list(code, addr, lnum, line): """Generate one line of listing file. code is the word of generated code addr is the address of the generated code lnum file line number line complete text of the line of assembler """ code_str = '%06o' % code if code else '' addr_str = '%04o' % addr if addr else '' lnum_str = '%04d:' % lnum if lnum else '' line_str = '%s' % line if line else '' ListFileHandle.write('%6s %4s %s %s\n' % (code_str, addr_str, lnum_str, line_str)) ListFileHandle.flush() def eval_expression(expr, dot): """Evaluate an expression. expr string holding expression dot current code address (the "." symbol value) Valid expressions are: ::= | ::= ('|") ('|") ::= (||".") [ (+|-) ] ::= """ global Undefined # if no expression, do nothing if expr is None: return None # if expression is a string, return an iterable of ASCII values if expr[0] in "'\"": delim = expr[0] if expr[-1] != delim: error("Badly formed string value: '%s'" % expr) return expr[1:-1] # replace any "." value with "dot" defined in the symbol table expr = string.replace(expr, '.', 'dot') expr = expr.upper() env = copy.deepcopy(SymTable) env['DOT'] = dot # add in the "." address # evaluate the expression try: result = eval(expr, env) except TypeError as e: error('ORG pseudo-opcode expression contains unsatisfied references\n' '%d: %s' % (CurrentLineNumber, CurrentLine)) except NameError as e: Undefined = e.message if 'is not defined' in e.message: Undefined = e.message[len("name '"):-len("' is not defined")] raise return result def assemble_line(lnum, opcode, addr): """Assemble one line of code. lnum source file line number opcode opcode, uppercase addr address expression, uppercase Returns the 16bit word value, with possibly the backpatch list updated. """ return 1025 def define_label(label, address, lnum): """Put 'label' into the symbol tables.""" if label in SymTable: prev_lnum = SymTableLine[label] error("Label '%s' define twice, lines %d and %d." % (label, prev_lnum, lnum)) SymTable[label] = address SymTableLine[label] = lnum def gen_ascii(string, address, lnum, line): """Generate data words for ASCII string. string the string to generate data for address 'dot' value for first word generated lnum line number of code line actual line of code """ # ensure even number of bytes in string if len(string) % 2: string += '\0' # append zero byte as padding # get 2 bytes at a time and generate words while string: ch1 = string[0] ch2 = string[1] string = string[2:] value = (ord(ch1) << 8) + ord(ch2) write_list(value, address, lnum, line) address += 1 lnum = None line = None def assemble_oblock(oblock): """Assemble one org block to a code block. An org block: [(lnum, line, label, opcode, addr), ...] A code block: [address, [word, word, word, ...]] """ global CurrentLineNumber, CurrentLine cblock = [None, []] # if we get an empty block, do nothing if len(oblock) == 0: return cblock # assemble the org block address = None # '.' value for (lnum, line, label, opcode, addr) in oblock: CurrentLineNumber = lnum CurrentLine = line # if no code, just list it if label is None and opcode is None: write_list(None, None, lnum, line) else: # maybe we have some code to generate if opcode: if opcode == 'ORG': # ORG just sets the code address and starts the code block if label: error('%d: %s\nORG opcode may not have a label' % (lnum, line)) try: address = eval_expression(addr, address) except NameError as e: error("%d: %s\nName '%s' is undefined" % (lnum, line, Undefined)) cblock = [address, []] write_list(None, None, lnum, line) continue elif opcode == 'END': # END pseudo-op, terminate assembly start_addr = None if addr: try: start_addr = eval_expression(addr, address) except NameError as e: error("%d: %s\nName '%s' is undefined" % (lnum, line, undefined)) write_list(None, start_addr, lnum, line) return cblock elif opcode == 'EQU': if label is None: error('%d: %s\nEQU opcode missing a label' % (lnum, line)) try: value = eval_expression(addr, None) except NameError as e: error("%d: %s\nName '%s' is undefined" % (lnum, line, Undefined)) define_label(label, value, lnum) write_list(None, None, lnum, line) continue elif opcode == 'DATA': try: value = eval_expression(addr, address) except NameError as e: error("%d: %s\nName '%s' is undefined" % (lnum, line, Undefined)) ln = lnum li = line if isinstance(value, basestring): gen_ascii(value, address, lnum, line) else: write_list(value, address, lnum, line) address += 1 else: code = assemble_line(lnum, opcode, addr) cblock[1].append((code, addr)) write_list(code, address, lnum, line) if label: define_label(label, address, lnum) address += 1 elif label: # we have a label and no opcode, just define the label and list it define_label(label, address, lnum) write_list(None, None, lnum, line) return cblock def assemble_org_blocks(blocks): """Assemble org blocks producing a code blocks list.""" code_block = [] # for each org block, assemble to code block for oblock in blocks: code_block.append(assemble_oblock(oblock)) return code_block def next_symbol(line): """Return next symbol and line remainder.""" fields = string.split(line, maxsplit=1) if len(fields) != 2: fields.append('') return fields def split_fields(line): """Split one ASM line into fields: label, opcode, address. Returns a tuple: (label, opcode, address). If label and opcode ar not None, uppercase the result string. If a field is missing, return None for it. If the line is empty, return (None, None, None). We take pains not to split the address field if it's something like ALPHA + 100 """ if not line: return (None, None, None) # check for the label label = None if line[0] not in ' \t;': (label, remainder) = next_symbol(line) label = label.upper() else: remainder = line.strip() # get opcode opcode = None if remainder and remainder[0] != ';': (opcode, remainder) = next_symbol(remainder) opcode = opcode.upper() # get address address = None if remainder and remainder[0] != ';': # first, check for a string if remainder[0] in "'\"": delim = remainder[0] remainder = remainder[1:] ndx = remainder.find(delim) if ndx == -1: error('Unbalanced string delimiter:\n' '%d: %s' % (CurrentLineNumber, CurrentLine)) address = '"' + remainder[:ndx].strip() + '"' remainder = remainder[ndx+1:].strip() else: # strip off any comment ndx = remainder.find(';') if ndx != -1: remainder = remainder[:ndx].strip() address = remainder.strip() remainder = None # check that remainder is empty or only a comment if remainder and remainder[0] != ';': error('Badly formed instruction:\n' '%d: %s' % (CurrentLineNumber, CurrentLine)) return (label, opcode, address) def split_orgs(asm_lines): """Split ASM lines into ORG blocks (one or more). Error if no ORG pseudo-opcode found. """ global CurrentLineNumber, CurrentLine result = [] block = [] for (lnum, line) in enumerate(asm_lines): (label, opcode, addr) = split_fields(line) CurrentLineNumber = lnum CurrentLine = line if opcode: if opcode.lower() == 'org': if block: # save previous ORG block result.append(block) block = [] if addr is None: error('Line %d: %s\nORG pseudo-opcode without an address?' % (lnum, line)) block.append((lnum+1, line, label, opcode, addr)) if block: # save previous ORG block result.append(block) block = [] return result def check_org_overlap(code_blocks): """Check that we don't have overlapping ORG blocks.""" # remember org block span: [(start, end), (start', end'), ...] block_span = [] for cb in code_blocks: (address, code) = cb if address is None and not code: # empty code block, ignore continue (block_start, code) = cb block_end = block_start + len(code) - 1 for (start, end) in block_span: if start <= block_start <= end: error('ORG block at address %06o overwrites previous block at [%06o,%06o]' % (block_start, start, end)) if start <= block_end <= end: error('ORG block at address %06o overwrites previous block at [%06o,%06o]' % (block_start, start, end)) block_span.append((block_start, block_end)) def allocate_literals(code_blocks): """Allocate space for literal blocks.""" pass def backpatch(code_blocks): """Generate final code values, evaluate address fields. Returns a simplified code block list: [[address, [code, code, ...]], ...]. """ result = [] for cb in code_blocks: (address, codes) = cb if address is None and not codes: # empty code block, ignore continue # got a code_block that has code dot = address new_code = [] for (code, addr) in codes: try: addr_value = eval_expression(addr, dot) except NameError as e: error("Name '%s' is undefined" % Undefined) if addr_value: code |= addr_value new_code.append(code) dot += 1 result.append([address, new_code]) return [address, new_code] def assemble_file(): """Assemble the file and produce listing, output files.""" # read all of ASM file into memory, strip \n, etc with open(AsmFile, 'rb') as fd: asm_lines = fd.readlines() asm_lines = [line.rstrip() for line in asm_lines] print('asm_lines=\n%s' % '\n'.join(asm_lines)) org_blocks = split_orgs(asm_lines) print('org_blocks=%s' % str(org_blocks)) code_blocks = assemble_org_blocks(org_blocks) print('code_blocks=%s' % str(code_blocks)) print('SymTable=%s' % str(SymTable)) print('SymTableLine=%s' % str(SymTableLine)) # allow programmer to overlap ORG blocks # check_org_overlap(code_blocks) allocate_literals(code_blocks) patched_code_blocks = backpatch(code_blocks) print('patched_code_blocks=%s' % str(patched_code_blocks)) def main(): """The assembler.""" global AsmFile, ListFile, OutputFile global ListFileHandle, OutputFileHandle # handle the options try: (opts, args) = getopt.gnu_getopt(sys.argv, "hl:o:", ["help", "list=", "output="]) except getopt.GetoptError: usage() sys.exit(10) ListFile = None OutputFile = None for opt, arg in opts: if opt in ('-h', '--help'): usage() sys.exit(0) elif opt in ('-l', '--list'): ListFile = arg elif opt in ('-o', '--output'): OutputFile = arg if len(args) != 2: usage() sys.exit(10) # get ASM filename and make sure it exists AsmFile = args[1] try: f = open(AsmFile, 'rb') except IOError: print("Sorry, can't find file '%s'" % AsmFile) sys.exit(10) f.close() if OutputFile is None: (path, ext) = os.path.splitext(AsmFile) OutputFile = path + PTPExtension OutputFileHandle = open(OutputFile, 'wb') if ListFile is None: (path, ext) = os.path.splitext(AsmFile) ListFile = path + ListFileExtension ListFileHandle = open(ListFile, 'wb') print('ListFile=%s, OutputFile=%s, AsmFile=%s' % (str(ListFile), str(OutputFile), str(AsmFile))) assemble_file() if __name__ == '__main__': main()