1
0
mirror of https://github.com/rzzzwilson/pymlac.git synced 2025-06-10 09:32:41 +00:00
2016-03-01 11:15:05 +07:00

1355 lines
48 KiB
Python
Executable File

#!/usr/bin/python
# -*- coding: utf-8 -*-
"""
An assembler for the Imlac simulator.
Usage: pyasm [ -h ] [ -l <listfile> ] [ -o <outputfile> ] [ -b <loader> ] <asmfile>
Where <asmfile> is the file to assemble,
<outputfile> is the output PTP file
<listfile> is the optional listing file
If <outputfile> is not specified the output filename is the input
<asmfile> with any extension removed and a .ptp axtenstion added.
The -b option specifies the blockloader code to use. <loader> values are either
'c8lds' or 'lc16sd'. The default is 'c8lds'.
"""
"""
This is a classic two pass assembler. The first pass reads all lines and keeps
track of the current location in memory (dot). Labels and EQU values are
remembered in the symbol table.
Pass 2 does pretty much the same thing as pass 1, except that now all labels
must be defined and code is generated.
"""
import sys
import os
import copy
import string
import getopt
import traceback
######
# Globals
######
# the input assembler filename
AsmFile = None
# the output listing file
ListFile = None
ListFileHandle = None # open listing file
# the output PTP file
OutputFile = None
OutputFileHandle = None # open output file
# the blockloader code to emit
BlockLoader = None
# the current address during assembly
Dot = None
# the program start address (optional)
StartAddress = None
# the symbol table(s)
# {<name>: <value>, ... }
SymTable = {}
# {<name>: <line#>, ... }
SymTableLine = {}
# current line number and the line
CurrentLine = None
CurrentLineNumber = None
# Any undefined label
Undefined = None
# buffer for blocked code
BlockMaxSize = 255 # maximum size of buffer (words)
BlockBuffer = bytearray() # actual buffered words
BlockBufferBase = None # base address of the block
# DEIM state
ShortVectorMode = False # True if within a DEIM/INC instruction
BeamState = 0 # 1 if beam is ON, 0 if OFF
######
# Mostly constant stuff
######
# mask for 16bit values
WordMask = 0xFFFF
# high bit in 16-bit word
HighBit = 0x8000
# the output PTP filename extension
PTPExtension = '.ptp'
# the output listing filename extension
ListFileExtension = '.lst'
# number of bytes in the 'zero' leader
ZeroLeaderSize = 16
######
# dict mapping opcode to generated word, address opts, address mask & indirect allowed
######
# helper function to generate N-bit mask, right justified
def mask(n):
value = 0
for _ in range(n):
value = (value << 1) + 1
return value
# address states, AYES = address required,
# ANO = address NOT required
# AOPT = address optional
(AYES, ANO, AOPT) = range(3)
OpcodeData = {
'LAW': ( 0004000, AYES, mask(11), False),
'LWC': ( 0104000, AYES, mask(11), False),
'JMP': ( 0010000, AYES, mask(11), True ),
'DAC': ( 0020000, AYES, mask(11), True ),
'XAM': ( 0024000, AYES, mask(11), True ),
'ISZ': ( 0030000, AYES, mask(11), True ),
'JMS': ( 0034000, AYES, mask(11), True ),
'AND': ( 0044000, AYES, mask(11), True ),
'IOR': ( 0050000, AYES, mask(11), True ),
'XOR': ( 0054000, AYES, mask(11), True ),
'LAC': ( 0060000, AYES, mask(11), True ),
'ADD': ( 0064000, AYES, mask(11), True ),
'SUB': ( 0070000, AYES, mask(11), True ),
'SAM': ( 0074000, AYES, mask(11), True ),
'HLT': ( 0000000, AOPT, mask(11), False),
'NOP': ( 0100000, ANO, 0, False),
'CLA': ( 0100001, ANO, 0, False),
'CMA': ( 0100002, ANO, 0, False),
'STA': ( 0100003, ANO, 0, False),
'IAC': ( 0100004, ANO, 0, False),
'COA': ( 0100005, ANO, 0, False),
'CIA': ( 0100006, ANO, 0, False),
'CLL': ( 0100010, ANO, 0, False),
'CML': ( 0100020, ANO, 0, False),
'STL': ( 0100030, ANO, 0, False),
'ODA': ( 0100040, ANO, 0, False),
'LDA': ( 0100041, ANO, 0, False),
'CAL': ( 0100011, ANO, 0, False),
'RAL': ( 0003000, AYES, mask(2), False),
'RAR': ( 0003020, AYES, mask(2), False),
'SAL': ( 0003040, AYES, mask(2), False),
'SAR': ( 0003060, AYES, mask(2), False),
'DON': ( 0003100, ANO, 0, False),
'ASZ': ( 0002001, ANO, 0, False),
'ASN': ( 0102001, ANO, 0, False),
'ASP': ( 0002002, ANO, 0, False),
'ASM': ( 0102002, ANO, 0, False),
'LSZ': ( 0002004, ANO, 0, False),
'LSN': ( 0102004, ANO, 0, False),
'DSF': ( 0002010, ANO, 0, False),
'DSN': ( 0102010, ANO, 0, False),
'KSF': ( 0002020, ANO, 0, False),
'KSN': ( 0102020, ANO, 0, False),
'RSF': ( 0002040, ANO, 0, False),
'RSN': ( 0102040, ANO, 0, False),
'TSF': ( 0002100, ANO, 0, False),
'TSN': ( 0102100, ANO, 0, False),
'SSF': ( 0002200, ANO, 0, False),
'SSN': ( 0102200, ANO, 0, False),
'HSF': ( 0002400, ANO, 0, False),
'HSN': ( 0102400, ANO, 0, False),
'DLA': ( 0001003, ANO, 0, False),
'CTB': ( 0001011, ANO, 0, False),
'DOF': ( 0001012, ANO, 0, False),
'KRB': ( 0001021, ANO, 0, False),
'KCF': ( 0001022, ANO, 0, False),
'KRC': ( 0001023, ANO, 0, False),
'RRB': ( 0001031, ANO, 0, False),
'RCF': ( 0001032, ANO, 0, False),
'RRC': ( 0001033, ANO, 0, False),
'TPR': ( 0001041, ANO, 0, False),
'TCF': ( 0001042, ANO, 0, False),
'TPC': ( 0001043, ANO, 0, False),
'HRB': ( 0001051, ANO, 0, False),
'HOF': ( 0001052, ANO, 0, False),
'HON': ( 0001061, ANO, 0, False),
'STB': ( 0001062, ANO, 0, False),
'SCF': ( 0001071, ANO, 0, False),
'IOS': ( 0001072, ANO, 0, False),
'IOT': ( 0001000, AYES, mask(9), False),
'IOF': ( 0001161, ANO, 0, False),
'ION': ( 0001162, ANO, 0, False),
'PUN': ( 0001271, ANO, 0, False),
'PSF': ( 0001274, ANO, 0, False),
'PPC': ( 0001271, ANO, 0, False),
'DLXA': (0010000, AYES, mask(12), False),
'DLYA': (0020000, AYES, mask(12), False),
# 'DEIM': (0030000, AYES, mask(12), False), # handled as pseudo-op
'DJMS': (0050000, AYES, mask(12), False),
'DJMP': (0060000, AYES, mask(12), False),
'DOPR': (0004000, AYES, mask(4), False),
'DHLT': (0000000, ANO, 0, False),
'DSTS': (0004004, AYES, mask(2), False),
'DSTB': (0004010, AYES, mask(3), False),
'DRJM': (0004040, ANO, 0, False),
'DIXM': (0005000, ANO, 0, False),
'DIYM': (0004400, ANO, 0, False),
'DDXM': (0004200, ANO, 0, False),
'DDYM': (0004100, ANO, 0, False),
'DHVC': (0006000, ANO, 0, False),
'DDSP': (0004020, ANO, 0, False),
'DNOP': (0004000, ANO, 0, False),
}
######
# The papertape/teletype loader code (c8lds form)
######
BlockLoader_C8LDS = [
# code address assembler source code
# ------- ------- ------------------------------------------------------------------------
# ; Imlac Papertape Program Block Loader
# ;
# ; This loader is loaded by the bootstrap program at x7700, where x=0 for
# ; a 4K machine, and x=1 for an 8K machine, etc.
# ;
# ; The load format consists of one or more data blocks.
# ; Each block has the form:
# ;
# ; word count (byte)
# ; load address
# ; data word 1
# ; data word 2
# ; ...
# ; data word n
# ; checksum
# ;
# ; All values are 16bit words, except the word count, which is an 8bit byte.
# ; Words are always received high-order byte first.
# ;
# ; After the word count there is the load address, followed by <word count>
# ; data words, which are loaded starting at "load address".
# ;
# ; The sum of all the data words in the block must be the same as the checksum
# ; word which follows the data words. The checksum is calculated with 16bit
# ; integers, incrementing the sum whenever the 16bit value overflows.
# ;
# ; The end of the load is signalled by a block with a
# ; starting address 0177777.
# ;
# ; Disassembled from the 40tp_simpleDisplay.ptp image file.
# ;
# org 003700 ;
# cksum equ .-1 ;checksum stored here (at 003677)
0001032, # 003700 rcf ;
0013740, # 003701 jmp patch ;go decide TTY or PTR, clear AC
0023677, # 003702 ndpatch dac cksum ;zero checksum, AC is zero (from patch)
0037760, # 003703 jms rdbyte ;
0102001, # 003704 asn ;wait here for non-zero byte
0013703, # 003705 jmp .-2 ;
0100006, # 003706 cia ;
0023777, # 003707 dac wrdcnt ;store negative word count
0037750, # 003710 jms rdword ;read load address
0023776, # 003711 dac ldaddr ;
0077730, # 003712 sam neg1 ;
0013715, # 003713 jmp rdblock ;
0000000, # 003714 hlt ;if load address is -1, finished
0037750, # 003715 rdblock jms rdword ;now read block to load address
0123776, # 003716 dac *ldaddr ;
0037731, # 003717 jms dosum ;
0033776, # 003720 isz ldaddr ;
0033777, # 003721 isz wrdcnt ;
0013715, # 003722 jmp rdblock ;
0037750, # 003723 jms rdword ;get expected checksum
0073677, # 003724 sub cksum ;compare with calculated
0102001, # 003725 asn ;
0013746, # 003726 jmp newblk ;if same, get next block
0000000, # 003727 hlt ;if not same, ERROR
0177777, # 003730 neg1 data 0177777 ;this load address means STOP
# ;------------------------
# ;Compute checksum. Word to sum in AC.
# ;------------------------
0017720, # 003731 dosum bss 1 ;
0100010, # 003732 cll ; clear link
0067677, # 003733 add cksum ; add AC to checksum
0002004, # 003734 lsz ; if overflow:
0100004, # 003735 iac ; increment new checksum
0023677, # 003736 dac cksum ; save new checksum
0113731, # 003737 jmp *dosum ;
# ;------------------------
# ;Decide what input device we are using, PTR or TTY.
# ;------------------------
0001061, # 003740 patch hon ;
0063774, # 003741 lac ttyset ;
0023761, # 003742 dac devpat ;
0005032, # 003743 law 1032 ;
0177775, # 003744 sam *adr044 ;
0023761, # 003745 dac devpat ;
0100011, # 003746 newblk cal ;
0013702, # 003747 jmp ndpatch ;
# ;------------------------
# ;Read WORD from input device.
# ;------------------------
0017711, # 003750 rdword bss 1 ;
0100011, # 003751 cal ;
0037760, # 003752 jms rdbyte ;
0003003, # 003753 ral 3 ;
0003003, # 003754 ral 3 ;
0003002, # 003755 ral 2 ;
0037760, # 003756 jms rdbyte ;
0113750, # 003757 jmp *rdword ;
# ;------------------------
# ;Read BYTE from input device. Read from PTR or TTY.
# ;------------------------
0017757, # 003760 rdbyte bss 1 ;
0001032, # 003761 devpat rcf ;could be patched to 'jmp rdtty'
0102400, # 003762 hsn ;
0013762, # 003763 jmp .-1 ;
0002400, # 003764 hsf ;
0013764, # 003765 jmp .-1 ;
0001051, # 003766 hrb ;read PTR byte
0113760, # 003767 jmp *rdbyte ;
0002040, # 003770 rdtty rsf ;
0013770, # 003771 jmp .-1 ;
0001033, # 003772 rrc ;read TTY byte, clear flag
0113760, # 003773 jmp *rdbyte ;
# ;------------------------
0013770, # 003774 ttyset jmp rdtty ;
0000044, # 003775 adr044 data 044 ;
0000000, # 003776 ldaddr data 0 ;
0000000, # 003777 wrdcnt data 0 ;
# ;------------------------
# end ;
]
BlockLoader_LC16SD = [
# 03700 ORG 03700 ;
#
0001032, # 03700 ldaddr RCF ;
0037701, # 03701 numwrd JMS . ; get address of 'chksum' into 'numwrd'
#
0063701, # 03702 chksum LAC numwrd ; are we are running in high memory (017700+)
0077775, # 03703 SAM himem ;
0013710, # 03704 JMP rdblk ; if not, just load tape
#
0104012, # 03705 LWC 012 ; else turn on the display
0001003, # 03706 DLA ;
0003100, # 03707 DON ;
#
0100011, # 03710 rdblk CAL ; initialize block checksum
0023702, # 03711 DAC chksum ;
0037746, # 03712 JMS rdword ; get load address
0023700, # 03713 DAC ldaddr ;
0002002, # 03714 ASP ; if high bit set
0013740, # 03715 JMP ldend ; then end of tape load
0037746, # 03716 JMS rdword ; else get number of words in block
0023701, # 03717 DAC numwrd ;
0037746, # 03720 JMS rdword ; read checksum word, add to checksum
0037746, # 03721 blklp JMS rdword ; get data word
0123700, # 03722 DAC *ldaddr ; store into memory
0063700, # 03723 LAC ldaddr ; get load address
0003063, # 03724 SAR 3 ; echo load address in display (if running)
0047765, # 03725 AND low10 ;
0053764, # 03726 IOR dlya0 ;
0023766, # 03727 DAC disp ;
0163700, # 03730 LAC *ldaddr ; get last data word
0033700, # 03731 ISZ ldaddr ; move 'load address' pointer
0033701, # 03732 ISZ numwrd ; check end of block
0013721, # 03733 JMP blklp ; jump if not ended
0067702, # 03734 ADD chksum ; block end, check checksum
0002001, # 03735 ASZ ; if checksum invalid,
0013736, # 03736 JMP . ; busy wait here
0013710, # 03737 JMP rdblk ; else go get next block
# ; end of load, AC is load address, high bit set
0001012, # 03740 ldend DOF ; turn off the display
0100004, # 03741 IAC ;
0102001, # 03742 ASN ; if address is 0177777
0000000, # 03743 HLT ; then just halt
0037746, # 03744 JMS rdword ; else get AC contents
0113700, # 03745 JMP *ldaddr ; and jump to start address
# ; read a word from tape, leave in AC
0000000, # 03746 rdword DATA 0 ;
0067702, # 03747 ADD chksum ;
0023702, # 03750 DAC chksum ;
0100011, # 03751 CAL ;
0002040, # 03752 RSF ;
0013752, # 03753 JMP .-1 ;
0001033, # 03754 RRC ;
0003003, # 03755 RAL 3 ;
0003003, # 03756 RAL 3 ;
0003002, # 03757 RAL 2 ;
0002040, # 03760 RSF ;
0013760, # 03761 JMP .-1 ;
0001033, # 03762 RRC ;
0113746, # 03763 JMP *rdword ;
#
0020000, # 03764 dlya0 DLYA 0 ;
0001777, # 03765 low10 DATA 001777 ;
#
# ; display routine, used if running in extended memory
0020000, # 03766 disp DLYA 00000 ;
0010000, # 03767 DLXA 00000 ;
0004005, # 03770 DSTS 1 ;
0046000, # 03771 DLVH 02000 ;
0021777, # 03772 DLYA 01777 ;
0000000, # 03773 DHLT ;
#
0067766, # 03774 DATA 0067766 ;
0017702, # 03775 himem DATA 0017702 ;
0010000, # 03776 DATA 0010000 ;
0177777, # 03777 DATA 0177777 ;
0000000, # 04000 DATA 0000000 ;
#
# END ;
]
def usage(msg=None):
"""Print usage and optional error message."""
if msg:
print('*'*60)
print(msg)
print('*'*60)
print(__doc__)
def error(msg):
"""Print a syntax error and abort."""
lnum = CurrentLineNumber if CurrentLineNumber else 1
line = CurrentLine if CurrentLine else ''
print('-' * 80)
print("%04d: %s" % (lnum, line))
print(msg)
print('-' * 80)
# traceback.print_stack()
# print('SymTable=%s' % str(SymTable))
ListFileHandle.write('-' * 80 + '\n')
ListFileHandle.write("%04d: %s\n" % (lnum, line))
ListFileHandle.write(msg + '\n')
ListFileHandle.write('-' * 80 + '\n')
ListFileHandle.flush()
sys.exit(10)
def write_byte(byte):
"""Write one byte into the output code file.
Write only the low 8 bits of 'byte'.
"""
OutputFileHandle.write(chr(byte & 0xFF))
def write_word(word):
"""Emit a 16-bit word to the output file."""
write_byte(word >> 8)
write_byte(word)
def write_start(address=None, ac=None):
"""Write the start block.
address the desired start address, if specified
ac initial contents of the AC, if specified
"""
if ac is None:
ac = 0
if address is None:
write_word(0xffff)
else:
write_word(address + HighBit)
if address != 0 and ac is not None:
write_word(ac & WordMask)
def write_leader(size=ZeroLeaderSize):
"""Write the papertape leader."""
for _ in range(size):
write_byte(0)
def write_block_loader():
"""Emit the block loader prefix code."""
for word in BlockLoader:
write_word(word)
def start_block(addr):
"""Prepare next block to start at 'addr'"""
global BlockBuffer, BlockBufferBase
BlockBuffer = []
BlockBufferBase = addr
def emit_word(word):
"""Put a word into the code block buffer.
Write buffer out if full.
"""
code_block_size = len(BlockBuffer)
if code_block_size >= BlockMaxSize:
write_block()
start_block(Dot)
BlockBuffer.append(word)
def write_eot():
"""Write End-Of-Tape data for whatever format tape."""
if BlockLoader is BlockLoader_C8LDS:
write_byte(0377)
write_word(0177777)
else:
if StartAddress is not None:
write_start(StartAddress, ac=0177777)
else:
write_start()
def write_block():
"""Write block in desired format."""
if BlockLoader is BlockLoader_C8LDS:
write_block_c8lds()
else:
write_block_lc16sd()
def write_block_c8lds():
"""Write the current code block and reset the buffer.
Write the code block in 'c8lds' form:
data count (8 bits)
load address (16 bits)
first data word (16 bits)
...
last word (16 bits)
checksum (16 bits)
Note that a data block may be preceded by a zero leader.
The 'checksum' is not well defined in the documentation: the checksum is
the sum of all the contents modulo 077777. Yet the example tape has a
checksum of 165054. It is assumed the doc is in error and the checksum
is the sum of all the data words, modulo 177777.
"""
global BlockBuffer, BlockBufferBase
code_block_size = len(BlockBuffer)
if code_block_size == 0:
# block buffer is empty, do nothing
return
# emit the word count (byte) and load address (word)
write_byte(code_block_size)
write_word(BlockBufferBase)
# write out data words
for word in BlockBuffer:
write_word(word)
# finally, calculate and write the checksum
checksum = sum(BlockBuffer)
write_word(checksum & WordMask)
# reset the code buffer
start_block(None)
def write_block_lc16sd():
"""Write the current code block and reset the buffer."""
global BlockBuffer, BlockBufferBase
code_block_size = len(BlockBuffer)
if code_block_size == 0:
# block buffer is empty, do nothing
return
# emit the block size and data word count (negated)
neg_size = (~code_block_size+1) & WordMask
write_word(BlockBufferBase)
write_word(neg_size)
# calculate and write the checksum
checksum = BlockBufferBase + neg_size + sum(BlockBuffer)
write_word((~checksum+1) & WordMask)
# finally, write out data words
for word in BlockBuffer:
write_word(word)
# reset the code buffer
start_block(None)
def write_list(code, addr, lnum, line):
"""Generate one line of listing file.
code is the word of generated code
addr is the address of the generated code
lnum file line number
line complete text of the line of assembler
"""
code_str = '%07o' % code if code is not None else ''
addr_str = '%05o' % addr if addr is not None else ''
lnum_str = '%04d:' % lnum if lnum else ''
line_str = '%s' % line if line else ''
ListFileHandle.write('%7s %5s %s %s\n'
% (code_str, addr_str, lnum_str, line_str))
ListFileHandle.flush()
def eval_expr(expr):
"""Evaluate a expression string.
expr string holding expression
Returns 'None' if there is no valid expression.
"""
global Undefined
# if no expression, do nothing
if expr is None:
return None
# replace any "." value with "dot" defined in the symbol table
expr = string.replace(expr, '.', 'DOT')
globs = copy.deepcopy(SymTable)
globs['DOT'] = Dot # add in the "." address
# evaluate the expression
try:
result = eval(expr, globs)
except (TypeError, NameError) as e:
Undefined = e.message
if 'is not defined' in e.message:
Undefined = e.message[len("name '"):-len("' is not defined")]
error("ORG pseudo-opcode expression has '%s' undefined" % Undefined)
error("ORG pseudo-opcode expression has an error")
return result
def num_gen_words(opcode, addr):
"""Calculate number of words generated by this opcode."""
if opcode:
# we assume opcode will return 1
# TODO has to changed when macros are implemented
return 1
return 0
def pass_1(lines):
"""Do pass 1 of the assembly.
lines lines of text with terminal EOL removed
Just read text and fill in the symbol table.
Returns False if there was an error.
"""
global Dot, StartAddress
global CurrentLineNumber, CurrentLine
global SymTable, SymTableLine
# initialize things
Dot = None
Symtable = {}
SymTableLine = {}
# for each line in the file
for (lnum, line) in enumerate(lines):
lnum += 1 # line numbers are 1-based
CurrentLineNumber = lnum
CurrentLine = line
# get line fields
(label, opcode, indirect, addr) = split_fields(line)
if opcode:
# we have an opcode, so code might be generated
if opcode == 'ORG':
if not addr or eval_expr(addr) is None:
error("ORG pseudo-op has a bad address")
return False
Dot = eval_expr(addr)
if label:
error("ORG pseudo-op must not have a label")
elif opcode == 'EQU':
# no code, but we define a label
if not label:
error("EQU pseudo-op must have a label")
return False
if not addr or eval_expr(addr) is None:
error("EQU pseudo-op has a bad value")
return False
define_label(label, eval_expr(addr), lnum)
elif opcode == 'BSS':
# no code, but Dot moves
if not addr or eval_expr(addr) is None:
error("BSS pseudo-op has a bad value")
return False
if label:
define_label(label, Dot, lnum)
Dot += eval_expr(addr)
elif opcode == 'DATA':
# a single data word
if not addr or eval_expr(addr) is None:
error("DATA pseudo-op has a bad value")
return False
if label:
define_label(label, Dot, lnum)
Dot += 1
elif opcode == 'ASCII':
# ASCII string, pack two bytes/word, maybe zero byte fill at end
if not addr:
error("ASCII pseudo-op must have a data field")
if addr[0] not in "'\"":
error("ASCII pseudo-op must data field must be a delimited string")
delim = addr[0]
if addr[-1] != delim:
error("ASCII pseudo-op has a badly delimited delimited string")
addr = addr[1:-1]
ascii_len = len(addr)
ascii_words = ascii_len / 2
if ascii_len % 2:
ascii_words += 1
if label:
define_label(label, Dot, lnum)
Dot += ascii_words
elif opcode == 'ASCIIZ':
# ASCII string, pack two bytes/word, ensure zero byte fill at end
if not addr:
error("ASCIIZ pseudo-op must have a data field")
if addr[0] not in "'\"":
error("ASCIIZ pseudo-op must data field must be a delimited string")
delim = addr[0]
if addr[-1] != delim:
error("ASCIIZ pseudo-op has a badly delimited delimited string")
addr = addr[1:-1]
ascii_len = len(addr) + 1
ascii_words = ascii_len / 2
if ascii_len % 2:
ascii_words += 1
if label:
define_label(label, Dot, lnum)
Dot += ascii_words
elif opcode == 'INC':
# start of short vector mode
if not addr:
error("INC pseudo-op must have a data field")
if label:
define_label(label, Dot, lnum)
Dot += 1
elif opcode == 'END':
# get the (optional) start address
if addr:
StartAddress = eval_expr(addr)
if StartAddress is None:
error("END pseudo-op has a bad address")
return False
return True
else:
# actual machine instruction!
if label:
define_label(label, Dot, lnum)
Dot += num_gen_words(opcode, addr);
elif label:
# label but no code generated, just set label in symtab
if label in SymTable:
error("Label '%s' has already been defined" % label)
return False
define_label(label, Dot, lnum)
return True
def pass_2(lines):
"""Perform the second pass of the assembly.
Very similar to pass_1(), but we:
. don't define labels, that was done in pass_1()
. check that any pass_2 labels haven't changed value
"""
global Dot, StartAddress, CurrentLineNumber, CurrentLine
# punch the zero leader and ptr/tty loader
write_leader()
write_block_loader()
# for each line in the file
Dot = None
for (lnum, line) in enumerate(lines):
lnum += 1 # line numbers are 1-based
CurrentLineNumber = lnum
CurrentLine = line
# get line fields
(label, opcode, indirect, addr) = split_fields(line)
if opcode:
# we have an opcode, so code might be generated
if opcode == 'ORG':
if label:
error("ORG pseudo-op may not have a label")
return False
if not addr or eval_expr(addr) is None:
error("ORG pseudo-op has a bad address")
return False
if BlockBufferBase is not None:
write_block() # write any code accumulated so far
Dot = eval_expr(addr)
start_block(Dot)
write_list(None, Dot, lnum, line)
elif opcode == 'EQU':
# no code, but we must have a label
if not label:
error("EQU pseudo-op must have a label")
return False
if not addr or eval_expr(addr) is None:
error("EQU pseudo-op has a bad value")
return False
value = eval_expr(addr)
# check EQU value unchanged
try:
old_value = SymTable[label]
if value != old_value:
error("EQU value in '%s' has changed,\n"
"was %06o, is now %06o"
% (old_value, value))
except KeyError:
error("EQU label '%s' wasn't defined in first pass!?"
% label)
write_list(None, value, lnum, line)
elif opcode == 'BSS':
# no code, but Dot moves
if not addr or eval_expr(addr) is None:
error("BSS pseudo-op has bad value")
return False
value = eval_expr(addr)
if value is None:
error("BSS pseudo-op has bad value: '%s'" % addr)
if label:
try:
old_dot = SymTable[label]
if Dot != old_dot:
error("BSS dot value has changed, "
"was %06o, is now %06o"
% (old_dot, Dot))
except KeyError:
error("BSS label '%s' wasn't defined in first pass!?"
% label)
write_block() # write any code accumulated so far
Dot += value
start_block(Dot)
write_list(None, None, lnum, line)
elif opcode == 'DATA':
# a single data word
if not addr or eval_expr(addr) is None:
error("DATA pseudo-op has a bad value")
return False
value = eval_expr(addr)
if label:
try:
old_dot = SymTable[label]
if Dot != old_dot:
old_dot_str = ('%06o' % old_dot) if old_dot else str(old_dot)
dot_str = ('%06o' % Dot) if Dot else str(Dot)
error("DATA address has changed, "
"was %s, is now %s"
% (old_dot, dot_str))
except KeyError:
error("DATA label '%s' wasn't defined in first pass!?"
% label)
emit_word(value)
write_list(value, Dot, lnum, line)
Dot += 1
elif opcode == 'ASCII':
# 'addr' must exist and be a quote-delimited string
if not addr:
error("ASCII pseudo-op must have a data field")
if addr[0] not in "'\"":
error("ASCII pseudo-op must data field must be a delimited string")
delim = addr[0]
if addr[-1] != delim:
error("ASCII pseudo-op has a badly delimited delimited string")
addr = addr[1:-1]
len_addr = len(addr)
list_lnum = lnum
list_line = line
for i in range(0, len_addr-1, 2):
word_value = (ord(addr[i]) << 8) + ord(addr[i+1])
emit_word(word_value)
write_list(word_value, Dot, list_lnum, list_line)
list_lnum = ''
list_line = ''
Dot += 1
if len_addr % 2:
word_value = (ord(addr[-1]) << 8)
emit_word(word_value)
write_list(word_value, Dot, list_lnum, list_line)
Dot += 1
elif opcode == 'ASCIIZ':
# 'addr' must exist and be a quote-delimited string
if not addr:
error("ASCIIZ pseudo-op must have a data field")
if addr[0] not in "'\"":
error("ASCIIZ pseudo-op must data field must be a delimited string")
delim = addr[0]
if addr[-1] != delim:
error("ASCIIZ pseudo-op has a badly delimited delimited string")
addr = addr[1:-1]
len_addr = len(addr)
list_lnum = lnum
list_line = line
for i in range(0, len_addr-1, 2):
word_value = (ord(addr[i]) << 8) + ord(addr[i+1])
emit_word(word_value)
write_list(word_value, Dot, list_lnum, list_line)
list_lnum = ''
list_line = ''
Dot += 1
if len_addr % 2:
word_value = (ord(addr[-1]) << 8)
emit_word(word_value)
write_list(word_value, Dot, list_lnum, list_line)
Dot += 1
else:
word_value = 0
emit_word(word_value)
write_list(word_value, Dot, list_lnum, list_line)
Dot += 1
elif opcode == 'INC':
if not addr:
error("INC pseudo-op must have a data field")
if label:
try:
old_dot = SymTable[label]
if Dot != old_dot:
error("INC dot value has changed, "
"was %06o, is now %06o"
% (old_dot, Dot))
except KeyError:
error("INC label '%s' wasn't defined in first pass!?"
% label)
inc_fields = addr.split(',')
if len(inc_fields) != 2:
error("INC pseudo-op must have a two-part field")
(first_code, second_code) = inc_fields
high_byte = geninc(first_code)
low_byte = geninc(second_code)
word_value = (high_byte << 8) + low_byte
emit_word(word_value)
write_list(word_value, Dot, lnum, line)
Dot += 1
elif opcode == 'END':
# get optional start address
if addr:
# we have the optional start address
start_address = eval_expr(addr)
if start_address != StartAddress:
error("Pass 2 start address is different from pass 1, "
"was %06o but now %06o"
% (StartAddress, start_address))
StartAddress = start_address
write_list(None, StartAddress, lnum, line)
break # end of pass
else:
# actual machine instruction!
# if label, check value unchanged
if label:
if not label in SymTable:
error("Label '%s' not defined in pass 1!?" % label)
old_dot = SymTable[label]
if old_dot != Dot:
error("Start address has different value in pass 2.\n"
"Was %06o, now %06o"
% (old_dot, Dot))
code = gen_code(lnum, line, label, label, opcode, indirect, addr)
write_list(code, Dot, lnum, line)
Dot += 1
elif label:
# label but no code generated, just check Dot for label unchanged
if label in SymTable:
dot = SymTable[label]
if dot != Dot:
error("Label '%s' has value %06o, was %06o in first pass"
% (label, Dot, dot))
write_list(None, Dot, lnum, line)
else:
write_list(None, None, lnum, line)
# write the final block of code and optional start address
write_block()
# write EOT data and end leader
write_eot()
write_leader()
# check nothing after END
if lnum - 1 > len(lines):
error("Something after the 'END' pseudo-op!?")
def genincfield(code):
"""Generate code value of INC sub-field.
The sub-field code may contain:
[+|-] n [+|-] n
"""
xneg = yneg = 0
# handle possible X +/- operation
if code[0] in '+-':
s = code[0]
code = code[1:]
if s == '+':
xneg = 0
elif s == '-':
xneg = 1
# handle X numeric field
if code[0] not in '0123':
error('INC numeric X value must be 0, 1, 2 or 3')
x = int(code[0], base=8)
code = code[1:]
# handle possible Y +/- operation
if code[0] in '+-':
s = code[0]
code = code[1:]
if s == '+':
yneg = 0
elif s == '-':
yneg = 1
# handle Y numeric field
if code[0] not in '0123':
error('INC numeric Y value must be 0, 1, 2 or 3')
y = int(code[0], base=8)
code = code[1:]
if len(code) > 0:
error('INC value has trailing characters')
return 0200 | (BeamState << 6) | (xneg << 5) | (x << 3) | (yneg << 2) | y
def geninc(code):
"""Generate the byte value for an INC code.
Two code fields follow and INC opcode, separated by ','.
Each field may contain:
E | F | N | R | Axxx | (B|D) (+|-) n (+|-) n
Where 'E' may only be the firstcode in an INC chain, and 'n' must be one
of 0, 1, 2 or 3. xxx is any 8-bit value.
"""
global ShortVectorMode, BeamState
if code[0] == 'A': # make an SVM byte
code = code[1:]
byte = int(code, base=8)
if byte & (~0377):
error('Overflow in INC A field')
if byte & 0100:
ShortVectorMode = False
return byte
elif code[0] == 'B': # beam on
if not ShortVectorMode:
error('Not in INC mode!?')
BeamState = 1
return genincfield(code[1:])
elif code[0] == 'D': # beam off
if not ShortVectorMode:
error('Not in INC mode!?')
BeamState = 0
return genincfield(code[1:])
elif code[0] == 'E': # enter SVM mode
if ShortVectorMode:
error('Already in INC mode!?')
ShortVectorMode = True
BeamState = 1
return 0060
elif code[0] == 'F': # exit SVM, DRJM, ++Xmsb, X&Ylsb<-0
ShortVectorMode = False
BeamState = 0
return 0171
elif code[0] == 'N': # X&Ylsb<-0
if not ShortVectorMode:
error('Not in INC mode!?')
ShortVectorMode = False
return 0111
elif code[0] == 'P': # pause (filler)
if not ShortVectorMode:
error('Not in INC mode!?')
return 0200
elif code[0] == 'R': # exit SVM, DRJM, X&Ylsb<-0
if not ShortVectorMode:
error('Not in INC mode!?')
ShortVectorMode = False
return 0151
elif code[0] == 'X': # Xlsb <- 0
if not ShortVectorMode:
error('Not in INC mode!?')
return 0010
elif code[0] == 'Y': # Ylsb <- 0
if not ShortVectorMode:
error('Not in INC mode!?')
return 0001
elif code[0] in '0123+-':
byte = genincfield(code)
return byte
else:
error("Unrecognized INC code: '%s' (code=%s)" % (code[0], code))
def gen_code(lnum, line, dot, label, opcode, indirect, addr):
"""Assemble one line of code.
lnum source file line number
line the actual source line (for error reporting)
dot current address in the assembly
label optional label
opcode opcode, uppercase
indirect True if indirect flag found
addr address expression, uppercase
Puts the assembled word into the punch buffer.
Also returns the final assembled word.
"""
# get instruction coding details
try:
(word, aok, mask, ind) = OpcodeData[opcode]
except KeyError:
error("%d: %s\nUnrecognized opcode '%s'" % (lnum, line, opcode))
value = eval_expr(addr)
word_s = format(word, '016b')
mask_s = format(mask, '016b') if mask else ''
value_s = format(value, '016b') if value else ''
# check if 'addr' has overflowed. add in if OK
if value:
if value & mask != value:
error("Address field overflow: %06o" % value)
word += value
# if indirect and indirect OK, set high bit
if indirect and ind:
word += 0100000
if not ind and indirect:
error("Indirect not allowed here")
emit_word(word)
return word
def define_label(label, value, lnum):
"""Put 'label' into the symbol tables.
label the label to define
value dot value for the label
lnum line number the label is defined on
It's an error if the label is already defined.
"""
if label in SymTable:
prev_lnum = SymTableLine[label]
error("Label '%s' defined twice, at lines %d and %d."
% (label, prev_lnum, lnum))
SymTable[label] = value
SymTableLine[label] = lnum
def next_symbol(line):
"""Return next symbol and line remainder."""
fields = string.split(line, maxsplit=1)
if len(fields) != 2:
fields.append('')
return fields
def split_fields(line):
"""Split one ASM line into fields: label, opcode, indirect, address.
Returns a tuple: (label, opcode, indirect, address).
If label and opcode are not None, uppercase the result string.
If address is not None and is not a string, it's uppercased.
'indirect' is either True or False.
If a field is missing, return None for it. If the line is empty, return
(None, None, False, None).
We take pains not to split the address field if it's something like
ALPHA + 100
"""
if not line:
return (None, None, False, None)
# check for the label
label = None
if line[0] not in ' \t;':
(label, remainder) = next_symbol(line)
label = label.upper()
else:
remainder = line.strip()
# get opcode
opcode = None
if remainder and remainder[0] != ';':
(opcode, remainder) = next_symbol(remainder)
opcode = opcode.upper()
# get address
indirect = False
address = None
if remainder and remainder[0] != ';':
if remainder[0] in "'\"":
# it's a string
delim = remainder[0]
remainder = remainder[1:]
ndx = remainder.find(delim)
if ndx == -1:
error('Unbalanced string delimiter:\n'
'%d: %s' % (CurrentLineNumber, CurrentLine))
address = '"' + remainder[:ndx].strip() + '"'
remainder = remainder[ndx+1:].strip()
else:
# otherwise just an expression, strip off any indirect
ndx = remainder.find(';')
if ndx != -1:
remainder = remainder[:ndx].strip()
if remainder[0] == '*':
indirect = True
remainder = remainder[1:]
address = remainder.strip().upper()
remainder = None
# check that remainder is empty or only a comment
if remainder and remainder[0] != ';':
error('Badly formed instruction:\n'
'%d: %s' % (CurrentLineNumber, CurrentLine))
return (label, opcode, indirect, address)
def assemble_file():
"""Assemble the file and produce listing & output files."""
# read all of ASM file into memory, strip \n, etc
with open(AsmFile, 'rb') as fd:
asm_lines = fd.readlines()
asm_lines = [line.rstrip() for line in asm_lines]
if len(asm_lines) == 0:
error("File '%s' is empty" % AsmFile)
if pass_1(asm_lines):
pass_2(asm_lines)
def main():
"""The assembler."""
global AsmFile, ListFile, OutputFile, BlockLoader
global ListFileHandle, OutputFileHandle
# handle the options
try:
(opts, args) = getopt.gnu_getopt(sys.argv, "hl:o:b:",
["help", "list=", "output=",
"blockloader="])
except getopt.GetoptError:
usage()
sys.exit(10)
ListFile = None
OutputFile = None
BlockLoader = BlockLoader_C8LDS
for opt, arg in opts:
if opt in ('-h', '--help'):
usage()
sys.exit(0)
elif opt in ('-l', '--list'):
ListFile = arg
elif opt in ('-o', '--output'):
OutputFile = arg
elif opt in ('-b', '--blockloader'):
loader = arg.upper()
if loader not in ['C8LDS', 'LC16SD']:
usage("-b options expects either C8LDS or LC16SD param")
if loader == 'C8LDS':
BlockLoader = BlockLoader_C8LDS
else:
BlockLoader = BlockLoader_LC16SD
if len(args) != 2:
usage()
sys.exit(10)
# get ASM filename and make sure it exists
AsmFile = args[1]
try:
f = open(AsmFile, 'rb')
except IOError:
print("Sorry, can't find file '%s'" % AsmFile)
sys.exit(10)
f.close()
if OutputFile is None:
(path, ext) = os.path.splitext(AsmFile)
OutputFile = path + PTPExtension
if ListFile is None:
(path, ext) = os.path.splitext(AsmFile)
ListFile = path + ListFileExtension
ListFileHandle = open(ListFile, 'wb')
with open(OutputFile, 'wb') as OutputFileHandle:
assemble_file()
if __name__ == '__main__':
main()