1
0
mirror of https://github.com/rzzzwilson/pymlac.git synced 2025-06-10 09:32:41 +00:00
2021-06-14 14:26:53 +07:00

1375 lines
48 KiB
Python
Executable File

#!/usr/bin/python
# -*- coding: utf-8 -*-
"""
An assembler for the Imlac simulator.
Usage: pyasm [ -h ] [ -l <listfile> ] [ -o <outputfile> ] [ -b <loader> ] <asmfile>
Where <asmfile> is the file to assemble,
<outputfile> is the output PTP file
<listfile> is the optional listing file
If <outputfile> is not specified the output filename is the input
<asmfile> with any extension removed and a .ptp axtenstion added.
The -b option specifies the blockloader code to use. <loader> values are either
'c8lds' or 'lc16sd'. The default is 'c8lds'.
"""
"""
This is a classic two pass assembler. The first pass reads all lines and keeps
track of the current location in memory (dot). Labels and EQU values are
remembered in the symbol table.
Pass 2 does pretty much the same thing as pass 1, except that now all labels
must be defined and code is generated.
"""
import sys
import os
import copy
import string
import getopt
import traceback
######
# Globals
######
# the input assembler filename
AsmFile = None
# the output listing file
ListFile = None
ListFileHandle = None # open listing file
# the output PTP file
OutputFile = None
OutputFileHandle = None # open output file
# the blockloader code to emit
BlockLoader = None
# the current address during assembly
Dot = None
# the program start address (optional)
StartAddress = None
# the symbol table(s)
# {<name>: <value>, ... }
SymTable = {}
# {<name>: <line#>, ... }
SymTableLine = {}
# current line number and the line
CurrentLine = None
CurrentLineNumber = None
# Any undefined label
Undefined = None
# buffer for blocked code
BlockMaxSize = 255 # maximum size of buffer (words)
BlockBuffer = bytearray() # actual buffered words
BlockBufferBase = None # base address of the block
# DEIM state
ShortVectorMode = False # True if within a DEIM/INC instruction
BeamState = 0 # 1 if beam is ON, 0 if OFF
# stuff about the current code block
CodeBlockMask = 0o174000 # high 5 bit address mask
CodeBlockSize = 0o4000 # 2K code block size
CodeBlockBase = None # base of the current code block
AddressMask = 0o3777 # mask for 11 bit address
MaxMemory = 0o177777 # highest possible address
######
# Mostly constant stuff
######
# mask for 16bit values
WordMask = 0xFFFF
# high bit in 16-bit word
HighBit = 0x8000
# the output PTP filename extension
PTPExtension = '.ptp'
# the output listing filename extension
ListFileExtension = '.lst'
# number of bytes in the 'zero' leader
ZeroLeaderSize = 16
######
# dict mapping opcode to generated word, address opts, address mask & indirect allowed
######
# helper function to generate N-bit mask, right justified
def mask(n):
value = 0
for _ in range(n):
value = (value << 1) + 1
return value
# address states, AYES = address required,
# ANO = address NOT required
# AOPT = address optional
(AYES, ANO, AOPT) = range(3)
OpcodeData = {
'LAW': ( 0o004000, AOPT, mask(11), False),
'LWC': ( 0o104000, AOPT, mask(11), False),
'JMP': ( 0o010000, AYES, mask(11), True ),
'DAC': ( 0o020000, AYES, mask(11), True ),
'XAM': ( 0o024000, AYES, mask(11), True ),
'ISZ': ( 0o030000, AYES, mask(11), True ),
'JMS': ( 0o034000, AYES, mask(11), True ),
'AND': ( 0o044000, AYES, mask(11), True ),
'IOR': ( 0o050000, AYES, mask(11), True ),
'XOR': ( 0o054000, AYES, mask(11), True ),
'LAC': ( 0o060000, AYES, mask(11), True ),
'ADD': ( 0o064000, AYES, mask(11), True ),
'SUB': ( 0o070000, AYES, mask(11), True ),
'SAM': ( 0o074000, AYES, mask(11), True ),
'HLT': ( 0o000000, AOPT, mask(11), False),
'NOP': ( 0o100000, ANO, 0, False),
'CLA': ( 0o100001, ANO, 0, False),
'CMA': ( 0o100002, ANO, 0, False),
'STA': ( 0o100003, ANO, 0, False),
'IAC': ( 0o100004, ANO, 0, False),
'COA': ( 0o100005, ANO, 0, False),
'CIA': ( 0o100006, ANO, 0, False),
'CLL': ( 0o100010, ANO, 0, False),
'CML': ( 0o100020, ANO, 0, False),
'STL': ( 0o100030, ANO, 0, False),
'ODA': ( 0o100040, ANO, 0, False),
'LDA': ( 0o100041, ANO, 0, False),
'CAL': ( 0o100011, ANO, 0, False),
'RAL': ( 0o003000, AOPT, mask(2), False),
'RAR': ( 0o003020, AOPT, mask(2), False),
'SAL': ( 0o003040, AOPT, mask(2), False),
'SAR': ( 0o003060, AOPT, mask(2), False),
'DON': ( 0o003100, ANO, 0, False),
'ASZ': ( 0o002001, ANO, 0, False),
'ASN': ( 0o102001, ANO, 0, False),
'ASP': ( 0o002002, ANO, 0, False),
'ASM': ( 0o102002, ANO, 0, False),
'LSZ': ( 0o002004, ANO, 0, False),
'LSN': ( 0o102004, ANO, 0, False),
'DSF': ( 0o002010, ANO, 0, False),
'DSN': ( 0o102010, ANO, 0, False),
'KSF': ( 0o002020, ANO, 0, False),
'KSN': ( 0o102020, ANO, 0, False),
'RSF': ( 0o002040, ANO, 0, False),
'RSN': ( 0o102040, ANO, 0, False),
'TSF': ( 0o002100, ANO, 0, False),
'TSN': ( 0o102100, ANO, 0, False),
'SSF': ( 0o002200, ANO, 0, False),
'SSN': ( 0o102200, ANO, 0, False),
'HSF': ( 0o002400, ANO, 0, False),
'HSN': ( 0o102400, ANO, 0, False),
'DLA': ( 0o001003, ANO, 0, False),
'CTB': ( 0o001011, ANO, 0, False),
'DOF': ( 0o001012, ANO, 0, False),
'KRB': ( 0o001021, ANO, 0, False),
'KCF': ( 0o001022, ANO, 0, False),
'KRC': ( 0o001023, ANO, 0, False),
'RRB': ( 0o001031, ANO, 0, False),
'RCF': ( 0o001032, ANO, 0, False),
'RRC': ( 0o001033, ANO, 0, False),
'TPR': ( 0o001041, ANO, 0, False),
'TCF': ( 0o001042, ANO, 0, False),
'TPC': ( 0o001043, ANO, 0, False),
'HRB': ( 0o001051, ANO, 0, False),
'HOF': ( 0o001052, ANO, 0, False),
'HON': ( 0o001061, ANO, 0, False),
'STB': ( 0o001062, ANO, 0, False),
'SCF': ( 0o001071, ANO, 0, False),
'IOS': ( 0o001072, ANO, 0, False),
'IOT': ( 0o001000, AYES, mask(9), False),
'IOF': ( 0o001161, ANO, 0, False),
'ION': ( 0o001162, ANO, 0, False),
'PUN': ( 0o001271, ANO, 0, False),
'PSF': ( 0o001274, ANO, 0, False),
'PPC': ( 0o001271, ANO, 0, False),
'DLXA': (0o010000, AYES, mask(12), False),
'DLYA': (0o020000, AYES, mask(12), False),
# 'DEIM': (0o030000, AYES, mask(12), False), # handled as pseudo-op
'DJMS': (0o050000, AYES, mask(12), False),
'DJMP': (0o060000, AYES, mask(12), False),
'DOPR': (0o004000, AYES, mask(4), False),
'DHLT': (0o000000, ANO, 0, False),
'DSTS': (0o004004, AYES, mask(2), False),
'DSTB': (0o004010, AYES, mask(3), False),
'DRJM': (0o004040, ANO, 0, False),
'DIXM': (0o005000, ANO, 0, False),
'DIYM': (0o004400, ANO, 0, False),
'DDXM': (0o004200, ANO, 0, False),
'DDYM': (0o004100, ANO, 0, False),
'DHVC': (0o006000, ANO, 0, False),
'DDSP': (0o004020, ANO, 0, False),
'DNOP': (0o004000, ANO, 0, False),
}
######
# The papertape/teletype blockloader codes
######
# the LC16SD blockloader as disassembled from bin/munch.ptp
BlockLoader_LC16SD = [
#03700 ORG 03700 ;
#
0o001032, #03700 ldaddr RCF ;
0o037701, #03701 numwrd JMS . ; get address of 'chksum' into 'numwrd'
#
0o063701, #03702 chksum LAC numwrd ; are we are running in high memory (017700+)
0o077775, #03703 SAM himem ;
0o013710, #03704 JMP rdblk ; if not, just load tape
#
0o104012, #03705 LWC 012 ; else turn on the display
0o001003, #03706 DLA ;
0o003100, #03707 DON ;
#
0o100011, #03710 rdblk CAL ; initialize block checksum
0o023702, #03711 DAC chksum ;
0o037746, #03712 JMS rdword ; get load address
0o023700, #03713 DAC ldaddr ;
0o002002, #03714 ASP ; if high bit set
0o013740, #03715 JMP ldend ; then end of tape load
0o037746, #03716 JMS rdword ; else get number of words in block
0o023701, #03717 DAC numwrd ;
0o037746, #03720 JMS rdword ; read checksum word, add to checksum
0o037746, #03721 blklp JMS rdword ; get data word
0o123700, #03722 DAC *ldaddr ; store into memory
0o063700, #03723 LAC ldaddr ; get load address
0o003063, #03724 SAR 3 ; echo load address in display (if running)
0o047765, #03725 AND low10 ;
0o053764, #03726 IOR dlya0 ;
0o023766, #03727 DAC disp ;
0o163700, #03730 LAC *ldaddr ; get last data word
0o033700, #03731 ISZ ldaddr ; move 'load address' pointer
0o033701, #03732 ISZ numwrd ; check end of block
0o013721, #03733 JMP blklp ; jump if not ended
0o067702, #03734 ADD chksum ; block end, check checksum
0o002001, #03735 ASZ ; if checksum invalid,
0o013736, #03736 JMP . ; busy wait here
0o013710, #03737 JMP rdblk ; else go get next block
# ; end of load, AC is load address, high bit set
0o001012, #03740 ldend DOF ; turn off the display
0o100004, #03741 IAC ;
0o102001, #03742 ASN ; if address is 0177777
0o000000, #03743 HLT ; then just halt
0o037746, #03744 JMS rdword ; else get AC contents
0o113700, #03745 JMP *ldaddr ; and jump to start address
# ; read a word from tape, leave in AC
0o000000, #03746 rdword DATA 0 ;
0o067702, #03747 ADD chksum ;
0o023702, #03750 DAC chksum ;
0o100011, #03751 CAL ;
0o002040, #03752 RSF ;
0o013752, #03753 JMP .-1 ;
0o001033, #03754 RRC ;
0o003003, #03755 RAL 3 ;
0o003003, #03756 RAL 3 ;
0o003002, #03757 RAL 2 ;
0o002040, #03760 RSF ;
0o013760, #03761 JMP .-1 ;
0o001033, #03762 RRC ;
0o113746, #03763 JMP *rdword ;
#
0o020000, #03764 dlya0 DLYA 0 ;
0o001777, #03765 low10 DATA 001777 ;
#
# ; display routine, used if running in extended memory
0o020000, #03766 disp DLYA 00000 ;
0o010000, #03767 DLXA 00000 ;
0o004005, #03770 DSTS 1 ;
0o046000, #03771 DLVH 02000 ;
0o021777, #03772 DLYA 01777 ;
0o000000, #03773 DHLT ;
#
0o067766, #03774 DATA 0067766 ;
0o017702, #03775 himem DATA 0017702 ;
0o010000, #03776 DATA 0010000 ;
0o177777, #03777 DATA 0177777 ;
#
# END ;
]
BlockLoader_C8LDS = [
# 0001: ; 'c8lds' blockloader disassembled from chars.ptp.
# 0002: ;
# 0003: ; this loader is smart - it looks at the ROM code
# 0004: ; and decides if it should use PTR or TTY.
# 0005:
#03700 0006: ORG 03700 ;
# 0007:
#03677 0008: cksum equ 03677 ; store checksum just below start address
# 0009:
0o001032, #03700 0010: start RCF ; 03700 0001032
0o013740, #03701 0011: JMP patch ; 03701 0013740 ; patch loadeer to use PTR or TTY
0o023677, #03702 0012: rdblok DAC cksum ; 03702 0023677 ; AC is zero at this point
0o037760, #03703 0013: skpzer JMS rdbyte ; 03703 0037760 ; wait for non-zero byte
0o102001, #03704 0014: ASN ; 03704 0102001
0o013703, #03705 0015: JMP skpzer ; 03705 0013703
0o100006, #03706 0016: CIA ; 03706 0100006
0o023777, #03707 0017: DAC wrdcnt ; 03707 0023777 ; save word count (or load address?)
0o037750, #03710 0018: JMS rdword ; 03710 0037750 ; get load address
0o023776, #03711 0019: DAC ldaddr ; 03711 0023776
0o077730, #03712 0020: SAM eolval ; 03712 0077730
0o013715, #03713 0021: JMP rddata ; 03713 0013715
0o000000, #03714 0022: HLT ; 03714 0000000
0o037750, #03715 0023: rddata JMS rdword ; 03715 0037750
0o123776, #03716 0024: DAC *ldaddr ; 03716 0123776
0o037731, #03717 0025: JMS updcks ; 03717 0037731
0o033776, #03720 0026: ISZ ldaddr ; 03720 0033776
0o033777, #03721 0027: ISZ wrdcnt ; 03721 0033777
0o013715, #03722 0028: JMP rddata ; 03722 0013715
0o037750, #03723 0029: JMS rdword ; 03723 0037750
0o073677, #03724 0030: SUB cksum ; 03724 0073677
0o102001, #03725 0031: ASN ; 03725 0102001
0o013746, #03726 0032: JMP nxtblk ; 03726 0013746
0o000000, #03727 0033: HLT ; 03727 0000000
# 0034:
0o177777, #03730 0035: eolval DATA 0177777 ; 03730 0177777 ;' end-of-load' load address value
# 0036:
# 0037: ;------------------------
# 0038: ; Update checksum. New word in AC.
# 0039: ; On exit, AC is updated checksum.
# 0040: ;------------------------
0o000000, #03731 0041: updcks DATA 0 ; 03731 0017720
0o100010, #03732 0042: CLL ; 03732 0100010
0o067677, #03733 0043: ADD cksum ; 03733 0067677
0o002004, #03734 0044: LSZ ; 03734 0002004
0o100004, #03735 0045: IAC ; 03735 0100004
0o023677, #03736 0046: DAC cksum ; 03736 0023677
0o113731, #03737 0047: JMP *updcks ; 03737 0113731
# 0048:
# 0049: ;------------------------
# 0050: ; Patch loader to use TTY or PTR.
# 0051: ;------------------------
0o001061, #03740 0052: patch HON ; 03740 0001061
0o063774, #03741 0053: LAC pattty ; 03741 0063774
0o023761, #03742 0054: DAC usetty ; 03742 0023761
0o005032, #03743 0055: LAW 01032 ; 03743 0005032 ; load RCF instruction
0o177775, #03744 0056: SAM *romflg ; 03744 0177775 ; skip if ROM is TTY (RCF @044)
0o023761, #03745 0057: DAC usetty ; 03745 0023761 ; patch this code to use PTR
0o100011, #03746 0058: nxtblk CAL ; 03746 0100011
0o013702, #03747 0059: JMP rdblok ; 03747 0013702
# 0060:
# 0061: ;------------------------
# 0062: ; Read a word from the input device.
# 0063: ;------------------------
0o000000, #03750 0064: rdword DATA 0 ; 03750 0017711
0o100011, #03751 0065: CAL ; 03751 0100011
0o037760, #03752 0066: JMS rdbyte ; 03752 0037760
0o003003, #03753 0067: RAL 3 ; 03753 0003003
0o003003, #03754 0068: RAL 3 ; 03754 0003003
0o003002, #03755 0069: RAL 2 ; 03755 0003002
0o037760, #03756 0070: JMS rdbyte ; 03756 0037760
0o113750, #03757 0071: JMP *rdword ; 03757 0113750
# 0072:
# 0073: ;------------------------
# 0074: ; Read a byte. Patched to use TTY or PTR.
# 0075: ;------------------------
0o000000, #03760 0076: rdbyte DATA 0 ; 03760 0017757
0o001032, #03761 0077: usetty RCF ; 03761 0001032
0o102400, #03762 0078: HSN ; 03762 0102400
0o013762, #03763 0079: JMP .-1 ; 03763 0013762
0o002400, #03764 0080: HSF ; 03764 0002400
0o013764, #03765 0081: JMP .-1 ; 03765 0013764
0o001051, #03766 0082: HRB ; 03766 0001051
0o113760, #03767 0083: JMP *rdbyte ; 03767 0113760
# 0084:
0o002040, #03770 0085: rdtty RSF ; 03770 0002040
0o013770, #03771 0086: JMP .-1 ; 03771 0013770
0o001033, #03772 0087: RRC ; 03772 0001033
0o113760, #03773 0088: JMP *rdbyte ; 03773 0113760
# 0089:
0o013770, #03774 0090: pattty JMP rdtty ; 03774 0013770
0o000044, #03775 0091: romflg DATA 000044 ; 03775 0000044
0o000000, #03776 0092: ldaddr DATA 000000 ; 03776 0000000
0o000000, #03777 0093: wrdcnt DATA 000000 ; 03777 0000000
# 0094:
# 0095: END ;
]
def usage(msg=None):
"""Print usage and optional error message."""
if msg:
print('*'*60)
print(msg)
print('*'*60)
print(__doc__)
def error(msg):
"""Print a syntax error and abort."""
lnum = CurrentLineNumber if CurrentLineNumber else 1
line = CurrentLine if CurrentLine else ''
print('-' * 80)
print("%04d: %s" % (lnum, line))
print(msg)
print('-' * 80)
# traceback.print_stack()
# print('SymTable=%s' % str(SymTable))
ListFileHandle.write('-' * 80 + '\n')
ListFileHandle.write("%04d: %s\n" % (lnum, line))
ListFileHandle.write(msg + '\n')
ListFileHandle.write('-' * 80 + '\n')
ListFileHandle.flush()
sys.exit(10)
def write_byte(byte):
"""Write one byte into the output code file.
Write only the low 8 bits of 'byte'.
"""
OutputFileHandle.write(chr(byte & 0xFF))
def write_word(word):
"""Emit a 16-bit word to the output file."""
write_byte(word >> 8)
write_byte(word)
def write_start(address=None, ac=None):
"""Write the start block.
address the desired start address, if specified
ac initial contents of the AC, if specified
"""
if ac is None:
ac = 0
if address is None:
write_word(0xffff)
else:
write_word(address + HighBit)
if address != 0 and ac is not None:
write_word(ac & WordMask)
def write_leader(size=ZeroLeaderSize):
"""Write the papertape leader."""
for _ in range(size):
write_byte(0)
def write_block_loader():
"""Emit the block loader prefix code."""
for word in BlockLoader:
write_word(word)
def start_block(addr):
"""Prepare next block to start at 'addr'"""
global BlockBuffer, BlockBufferBase
BlockBuffer = []
BlockBufferBase = addr
def emit_word(word):
"""Put a word into the code block buffer.
Write buffer out if full.
"""
code_block_size = len(BlockBuffer)
if code_block_size >= BlockMaxSize:
write_block()
start_block(Dot)
BlockBuffer.append(word)
def write_eot():
"""Write End-Of-Tape data for whatever format tape."""
if BlockLoader is BlockLoader_C8LDS:
write_byte(0o377)
write_word(0o177777)
else:
if StartAddress is not None:
write_start(StartAddress, ac=0o177777)
else:
write_start()
def write_block():
"""Write block in desired format."""
if BlockLoader is BlockLoader_C8LDS:
write_leader()
write_block_c8lds()
else:
# can't write zero leader with lc16sd
write_block_lc16sd()
def write_block_c8lds():
"""Write the current code block and reset the buffer.
Write the code block in 'c8lds' form:
data count (8 bits)
load address (16 bits)
first data word (16 bits)
...
last word (16 bits)
checksum (16 bits)
Note that a data block may be preceded by a zero leader.
The 'checksum' is not well defined in the documentation: the checksum is
the sum of all the contents modulo 077777. Yet the example tape has a
checksum of 165054. It is assumed the doc is in error and the checksum
is the sum of all the data words, modulo 177777.
"""
code_block_size = len(BlockBuffer)
if code_block_size == 0:
# block buffer is empty, do nothing
return
# emit the word count (byte) and load address (word)
write_byte(code_block_size)
write_word(BlockBufferBase)
# write out data words
for word in BlockBuffer:
write_word(word)
# finally, calculate and write the checksum
checksum = 0
for v in BlockBuffer:
checksum += v
if checksum & WordMask != checksum:
checksum += 1
checksum &= WordMask
write_word(checksum & WordMask)
# reset the code buffer
start_block(None)
def write_block_lc16sd():
"""Write the current code block and reset the buffer."""
code_block_size = len(BlockBuffer)
if code_block_size == 0:
# block buffer is empty, do nothing
return
# emit the block size and data word count (negated)
neg_size = (~code_block_size+1) & WordMask
write_word(BlockBufferBase)
write_word(neg_size)
# calculate and write the checksum
checksum = (BlockBufferBase + neg_size + sum(BlockBuffer)) & WordMask
tape_checksum = (~checksum+1) & WordMask
write_word(tape_checksum)
# finally, write out data words
for word in BlockBuffer:
write_word(word)
# reset the code buffer
start_block(None)
def write_list(code, addr, lnum, line):
"""Generate one line of listing file.
code is the word of generated code
addr is the address of the generated code
lnum file line number
line complete text of the line of assembler
"""
code_str = '%07o' % code if code is not None else ''
addr_str = '%05o' % addr if addr is not None else ''
lnum_str = '%04d:' % lnum if lnum else ''
line_str = '%s' % line if line else ''
ListFileHandle.write('%7s %5s %s %s\n'
% (code_str, addr_str, lnum_str, line_str))
ListFileHandle.flush()
def eval_expr(expr):
"""Evaluate a expression string.
expr string holding expression
Returns 'None' if there is no valid expression.
"""
global Undefined
# if no expression, do nothing
if expr is None:
return None
# replace any "." value with "dot" defined in the symbol table
# expr = string.replace(expr, '.', '_D_O_T_')
expr = expr.replace('.', '_D_O_T_')
globs = copy.deepcopy(SymTable)
globs['_D_O_T_'] = Dot # add in the "." address
# evaluate the expression
try:
result = eval(expr, globs)
except (TypeError, NameError) as e:
Undefined = str(e)
if 'is not defined' in Undefined:
Undefined = Undefined[len("name '"):-len("' is not defined")]
msg = "Opcode expression has '%s' undefined" % Undefined
#error(msg)
raise NameError(msg)
error("Opcode expression has an error")
return result
def num_gen_words(opcode, addr):
"""Calculate number of words generated by this opcode."""
if opcode:
# we assume opcode will return 1
# TODO has to change when macros are implemented
return 1
return 0
def pass_1(lines):
"""Do pass 1 of the assembly.
lines lines of text with terminal EOL removed
Just read text and fill in the symbol table.
Returns False if there was an error.
"""
global Dot, StartAddress
global CurrentLineNumber, CurrentLine
global SymTable, SymTableLine
global CodeBlockBase
# initialize things
Dot = None
Symtable = {}
SymTableLine = {}
# for each line in the file
for (lnum, line) in enumerate(lines):
lnum += 1 # line numbers are 1-based
CurrentLineNumber = lnum
CurrentLine = line
# get line fields
(label, opcode, indirect, addr) = split_fields(line)
if opcode:
# we have an opcode, so code might be generated
if opcode == 'ORG':
if not addr or eval_expr(addr) is None:
error("ORG pseudo-op has a bad address")
return False
Dot = eval_expr(addr)
if Dot > MaxMemory:
error("ORG pseudo-op has an address overflow")
if label:
error("ORG pseudo-op must not have a label")
elif opcode == 'EQU':
# no code, but we define a label
if not label:
error("EQU pseudo-op must have a label")
return False
if not addr or eval_expr(addr) is None:
error("EQU pseudo-op has a bad value")
return False
define_label(label, eval_expr(addr), lnum)
elif opcode == 'BSS':
# no code, but Dot moves
if not addr or eval_expr(addr) is None:
error("BSS pseudo-op has a bad value")
return False
if label:
define_label(label, Dot, lnum)
Dot += eval_expr(addr)
elif opcode == 'DATA':
# a single data word
if not addr or eval_expr(addr) is None:
error("DATA pseudo-op has a bad value")
return False
if label:
define_label(label, Dot, lnum)
Dot += 1
elif opcode == 'ASCII':
# ASCII string, pack two bytes/word, maybe zero byte fill at end
if not addr:
error("ASCII pseudo-op must have a data field")
if addr[0] not in "'\"":
error("ASCII pseudo-op must data field must be a delimited string")
delim = addr[0]
if addr[-1] != delim:
error("ASCII pseudo-op has a badly delimited delimited string")
addr = addr[1:-1]
ascii_len = len(addr)
ascii_words = ascii_len / 2
if ascii_len % 2:
ascii_words += 1
if label:
define_label(label, Dot, lnum)
Dot += ascii_words
elif opcode == 'ASCIIZ':
# ASCII string, pack two bytes/word, ensure zero byte fill at end
if not addr:
error("ASCIIZ pseudo-op must have a data field")
if addr[0] not in "'\"":
error("ASCIIZ pseudo-op must data field must be a delimited string")
delim = addr[0]
if addr[-1] != delim:
error("ASCIIZ pseudo-op has a badly delimited delimited string")
addr = addr[1:-1]
ascii_len = len(addr) + 1
ascii_words = ascii_len / 2
if ascii_len % 2:
ascii_words += 1
if label:
define_label(label, Dot, lnum)
Dot += ascii_words
elif opcode == 'INC':
# start of short vector mode
if not addr:
error("INC pseudo-op must have a data field")
if label:
define_label(label, Dot, lnum)
Dot += 1
elif opcode == 'END':
# get the (optional) start address
if addr:
StartAddress = eval_expr(addr)
if StartAddress is None:
error("END pseudo-op has a bad address")
return False
return True
else:
# actual machine instruction!
if label:
define_label(label, Dot, lnum)
Dot += num_gen_words(opcode, addr);
elif label:
# label but no code generated, just set label in symtab
if label in SymTable:
error("Label '%s' has already been defined" % label)
return False
define_label(label, Dot, lnum)
# update the code block base address
# Dot moved either from ORG or generated code
if Dot:
CodeBlockBase = Dot & CodeBlockMask
return True
def pass_2(lines):
"""Perform the second pass of the assembly.
Very similar to pass_1(), but we:
. don't define labels, that was done in pass_1()
. check that any pass_2 labels haven't changed value
"""
global Dot, StartAddress, CurrentLineNumber, CurrentLine
# punch the zero leader and ptr/tty loader
write_leader()
write_block_loader()
# for each line in the file
Dot = None
for (lnum, line) in enumerate(lines):
lnum += 1 # line numbers are 1-based
CurrentLineNumber = lnum
CurrentLine = line
# get line fields
(label, opcode, indirect, addr) = split_fields(line)
if opcode:
# we have an opcode, so code might be generated
if opcode == 'ORG':
if label:
error("ORG pseudo-op may not have a label")
return False
if not addr or eval_expr(addr) is None:
error("ORG pseudo-op has a bad address")
return False
if BlockBufferBase is not None:
write_block() # write any code accumulated so far
Dot = eval_expr(addr)
start_block(Dot)
write_list(None, Dot, lnum, line)
elif opcode == 'EQU':
# no code, but we must have a label
if not label:
error("EQU pseudo-op must have a label")
return False
if not addr or eval_expr(addr) is None:
error("EQU pseudo-op has a bad value")
return False
value = eval_expr(addr)
# check EQU value unchanged
try:
old_value = SymTable[label]
if value != old_value:
error("EQU value in '%s' has changed,\n"
"was %06o, is now %06o"
% (old_value, value))
except KeyError:
error("EQU label '%s' wasn't defined in first pass!?"
% label)
write_list(None, value, lnum, line)
elif opcode == 'BSS':
# no code, but Dot moves
if not addr or eval_expr(addr) is None:
error("BSS pseudo-op has bad value")
return False
value = eval_expr(addr)
if value is None:
error("BSS pseudo-op has bad value: '%s'" % addr)
if label:
try:
old_dot = SymTable[label]
if Dot != old_dot:
error("BSS dot value has changed, "
"was %06o, is now %06o"
% (old_dot, Dot))
except KeyError:
error("BSS label '%s' wasn't defined in first pass!?"
% label)
write_block() # write any code accumulated so far
Dot += value
start_block(Dot)
write_list(None, None, lnum, line)
elif opcode == 'DATA':
# a single data word
if not addr or eval_expr(addr) is None:
error("DATA pseudo-op has a bad value")
return False
value = eval_expr(addr)
if label:
try:
old_dot = SymTable[label]
if Dot != old_dot:
old_dot_str = ('%06o' % old_dot) if old_dot else str(old_dot)
dot_str = ('%06o' % Dot) if Dot else str(Dot)
error("DATA address has changed, "
"was %s, is now %s"
% (old_dot, dot_str))
except KeyError:
error("DATA label '%s' wasn't defined in first pass!?"
% label)
emit_word(value)
write_list(value, Dot, lnum, line)
Dot += 1
elif opcode == 'ASCII':
# 'addr' must exist and be a quote-delimited string
if not addr:
error("ASCII pseudo-op must have a data field")
if addr[0] not in "'\"":
error("ASCII pseudo-op must data field must be a delimited string")
delim = addr[0]
if addr[-1] != delim:
error("ASCII pseudo-op has a badly delimited delimited string")
addr = addr[1:-1]
len_addr = len(addr)
list_lnum = lnum
list_line = line
for i in range(0, len_addr-1, 2):
word_value = (ord(addr[i]) << 8) + ord(addr[i+1])
emit_word(word_value)
write_list(word_value, Dot, list_lnum, list_line)
list_lnum = ''
list_line = ''
Dot += 1
if len_addr % 2:
word_value = (ord(addr[-1]) << 8)
emit_word(word_value)
write_list(word_value, Dot, list_lnum, list_line)
Dot += 1
elif opcode == 'ASCIIZ':
# 'addr' must exist and be a quote-delimited string
if not addr:
error("ASCIIZ pseudo-op must have a data field")
if addr[0] not in "'\"":
error("ASCIIZ pseudo-op must data field must be a delimited string")
delim = addr[0]
if addr[-1] != delim:
error("ASCIIZ pseudo-op has a badly delimited delimited string")
addr = addr[1:-1]
len_addr = len(addr)
list_lnum = lnum
list_line = line
for i in range(0, len_addr-1, 2):
word_value = (ord(addr[i]) << 8) + ord(addr[i+1])
emit_word(word_value)
write_list(word_value, Dot, list_lnum, list_line)
list_lnum = ''
list_line = ''
Dot += 1
if len_addr % 2:
word_value = (ord(addr[-1]) << 8)
emit_word(word_value)
write_list(word_value, Dot, list_lnum, list_line)
Dot += 1
else:
word_value = 0
emit_word(word_value)
write_list(word_value, Dot, list_lnum, list_line)
Dot += 1
elif opcode == 'INC':
if not addr:
error("INC pseudo-op must have a data field")
if label:
try:
old_dot = SymTable[label]
if Dot != old_dot:
error("INC dot value has changed, "
"was %06o, is now %06o"
% (old_dot, Dot))
except KeyError:
error("INC label '%s' wasn't defined in first pass!?"
% label)
inc_fields = addr.split(',')
if len(inc_fields) != 2:
error("INC pseudo-op must have a two-part field")
(first_code, second_code) = inc_fields
high_byte = geninc(first_code)
low_byte = geninc(second_code)
word_value = (high_byte << 8) + low_byte
emit_word(word_value)
write_list(word_value, Dot, lnum, line)
Dot += 1
elif opcode == 'END':
# get optional start address
if addr:
# we have the optional start address
start_address = eval_expr(addr)
if start_address != StartAddress:
error("Pass 2 start address is different from pass 1, "
"was %06o but now %06o"
% (StartAddress, start_address))
StartAddress = start_address
write_list(None, StartAddress, lnum, line)
break # end of pass
else:
# actual machine instruction!
# if label, check value unchanged
if label:
if not label in SymTable:
error("Label '%s' not defined in pass 1!?" % label)
old_dot = SymTable[label]
if old_dot != Dot:
error("Start address has different value in pass 2.\n"
"Was %06o, now %06o"
% (old_dot, Dot))
code = gen_code(lnum, line, label, label, opcode, indirect, addr)
write_list(code, Dot, lnum, line)
Dot += 1
elif label:
# label but no code generated, just check Dot for label unchanged
if label in SymTable:
dot = SymTable[label]
if dot != Dot:
error("Label '%s' has value %06o, was %06o in first pass"
% (label, Dot, dot))
write_list(None, Dot, lnum, line)
else:
write_list(None, None, lnum, line)
# update the code block base address
# Dot moved either from ORG or generated code
if Dot:
CodeBlockBase = Dot & CodeBlockMask
# write the final block of code and optional start address
write_block()
# write EOT data and end leader
write_eot()
write_leader()
# check nothing after END
if lnum - 1 > len(lines):
error("Something after the 'END' pseudo-op!?")
def genincfield(code):
"""Generate code value of INC sub-field.
The sub-field code may contain:
[+|-] n [+|-] n
"""
xneg = yneg = 0
# handle possible X +/- operation
if code[0] in '+-':
s = code[0]
code = code[1:]
if s == '+':
xneg = 0
elif s == '-':
xneg = 1
# handle X numeric field
if code[0] not in '0123':
error('INC numeric X value must be 0, 1, 2 or 3')
x = int(code[0], base=8)
code = code[1:]
# handle possible Y +/- operation
if code[0] in '+-':
s = code[0]
code = code[1:]
if s == '+':
yneg = 0
elif s == '-':
yneg = 1
# handle Y numeric field
if code[0] not in '0123':
error('INC numeric Y value must be 0, 1, 2 or 3')
y = int(code[0], base=8)
code = code[1:]
if len(code) > 0:
error('INC value has trailing characters')
return 0o200 | (BeamState << 6) | (xneg << 5) | (x << 3) | (yneg << 2) | y
def geninc(code):
"""Generate the byte value for an INC code.
Two code fields follow and INC opcode, separated by ','.
Each field may contain:
E | F | N | R | Axxx | (B|D) (+|-) n (+|-) n
Where 'E' may only be the firstcode in an INC chain, and 'n' must be one
of 0, 1, 2 or 3. xxx is any 8-bit value.
"""
global ShortVectorMode, BeamState
if code[0] == 'A': # make an SVM byte
code = code[1:]
byte = int(code, base=8)
if byte & (~0o377):
error('Overflow in INC A field')
if byte & 0o100:
ShortVectorMode = False
return byte
elif code[0] == 'B': # beam on
if not ShortVectorMode:
error('Not in INC mode!?')
BeamState = 1
return genincfield(code[1:])
elif code[0] == 'D': # beam off
if not ShortVectorMode:
error('Not in INC mode!?')
BeamState = 0
return genincfield(code[1:])
elif code[0] == 'E': # enter SVM mode
if ShortVectorMode:
error('Already in INC mode!?')
ShortVectorMode = True
BeamState = 1
return 0o060
elif code[0] == 'F': # exit SVM, DRJM, ++Xmsb, X&Ylsb<-0
ShortVectorMode = False
BeamState = 0
return 0o171
elif code[0] == 'N': # X&Ylsb<-0
if not ShortVectorMode:
error('Not in INC mode!?')
ShortVectorMode = False
return 0o111
elif code[0] == 'P': # pause (filler)
if not ShortVectorMode:
error('Not in INC mode!?')
return 0o200
elif code[0] == 'R': # exit SVM, DRJM, X&Ylsb<-0
if not ShortVectorMode:
error('Not in INC mode!?')
ShortVectorMode = False
return 0o151
elif code[0] == 'X': # Xlsb <- 0
if not ShortVectorMode:
error('Not in INC mode!?')
return 0o010
elif code[0] == 'Y': # Ylsb <- 0
if not ShortVectorMode:
error('Not in INC mode!?')
return 0o001
elif code[0] in '0123+-':
byte = genincfield(code)
return byte
else:
error("Unrecognized INC code: '%s' (code=%s)" % (code[0], code))
def gen_code(lnum, line, dot, label, opcode, indirect, addr):
"""Assemble one line of code.
lnum source file line number
line the actual source line (for error reporting)
dot current address in the assembly
label optional label
opcode opcode, uppercase
indirect True if indirect flag found
addr address expression, uppercase
Puts the assembled word into the punch buffer.
Also returns the final assembled word.
"""
# get instruction coding details
try:
(word, aok, mask, ind) = OpcodeData[opcode]
except KeyError:
error("%d: %s\nUnrecognized opcode '%s'" % (lnum, line, opcode))
value = eval_expr(addr)
word_s = format(word, '016b')
mask_s = format(mask, '016b') if mask else ''
value_s = format(value, '016b') if value else ''
# check if 'addr' has overflowed. add in if OK
if value:
if aok == AYES:
# check address is in current code block
if not (CodeBlockBase <= value <= (CodeBlockBase+CodeBlockSize)):
error("Address field overflow: %06o" % value)
# now mask address to 11 bits
word += value & AddressMask
elif aok == AOPT:
word += value
# if indirect and indirect OK, set high bit
if indirect and ind:
word += 0o100000
if not ind and indirect:
error("Indirect not allowed here")
emit_word(word)
return word
def define_label(label, value, lnum):
"""Put 'label' into the symbol tables.
label the label to define
value dot value for the label
lnum line number the label is defined on
It's an error if the label is already defined.
"""
if label in SymTable:
prev_lnum = SymTableLine[label]
error("Label '%s' defined twice, at lines %d and %d."
% (label, prev_lnum, lnum))
SymTable[label] = value
SymTableLine[label] = lnum
def next_symbol(line):
"""Return next symbol and line remainder."""
# fields = string.split(line, maxsplit=1)
fields = line.split(maxsplit=1)
if len(fields) != 2:
fields.append('')
return fields
def split_fields(line):
"""Split one ASM line into fields: label, opcode, indirect, address.
Returns a tuple: (label, opcode, indirect, address).
If label and opcode are not None, uppercase the result string.
If address is not None and is not a string, it's uppercased.
'indirect' is either True or False.
If a field is missing, return None for it. If the line is empty, return
(None, None, False, None).
We take pains not to split the address field if it's something like
ALPHA + 100
"""
if not line:
return (None, None, False, None)
# check for the label
label = None
if line[0] not in ' \t;':
(label, remainder) = next_symbol(line)
label = label.upper()
else:
remainder = line.strip()
# get opcode
opcode = None
if remainder and remainder[0] != ';':
(opcode, remainder) = next_symbol(remainder)
opcode = opcode.upper()
# get address
indirect = False
address = None
if remainder and remainder[0] != ';':
if remainder[0] in "'\"":
# it's a string
delim = remainder[0]
remainder = remainder[1:]
ndx = remainder.find(delim)
if ndx == -1:
error('Unbalanced string delimiter:\n'
'%d: %s' % (CurrentLineNumber, CurrentLine))
address = '"' + remainder[:ndx].strip() + '"'
remainder = remainder[ndx+1:].strip()
else:
# otherwise just an expression, strip off any indirect
ndx = remainder.find(';')
if ndx != -1:
remainder = remainder[:ndx].strip()
if remainder[0] == '*':
indirect = True
remainder = remainder[1:]
address = remainder.strip().upper()
remainder = None
# check that remainder is empty or only a comment
if remainder and remainder[0] != ';':
error('Badly formed instruction:\n'
'%d: %s' % (CurrentLineNumber, CurrentLine))
return (label, opcode, indirect, address)
def assemble_file():
"""Assemble the file and produce listing & output files."""
# read all of ASM file into memory, strip \n, etc
with open(AsmFile, 'rb') as fd:
asm_lines = fd.readlines()
asm_lines = [line.rstrip() for line in asm_lines]
if len(asm_lines) == 0:
error("File '%s' is empty" % AsmFile)
if pass_1(asm_lines):
pass_2(asm_lines)
def main():
"""The assembler."""
global AsmFile, ListFile, OutputFile, BlockLoader
global ListFileHandle, OutputFileHandle
# handle the options
try:
(opts, args) = getopt.gnu_getopt(sys.argv, "hl:o:b:",
["help", "list=", "output=",
"blockloader="])
except getopt.GetoptError:
usage()
sys.exit(10)
ListFile = None
OutputFile = None
BlockLoader = BlockLoader_C8LDS
for opt, arg in opts:
if opt in ('-h', '--help'):
usage()
sys.exit(0)
elif opt in ('-l', '--list'):
ListFile = arg
elif opt in ('-o', '--output'):
OutputFile = arg
elif opt in ('-b', '--blockloader'):
loader = arg.upper()
if loader not in ['C8LDS', 'LC16SD']:
usage("-b options expects either C8LDS or LC16SD param")
if loader == 'C8LDS':
print('BlockLoader=BlockLoader_C8LDS')
BlockLoader = BlockLoader_C8LDS
else:
print('BlockLoader=BlockLoader_LC16SD')
BlockLoader = BlockLoader_LC16SD
if len(args) != 2:
usage()
sys.exit(10)
# get ASM filename and make sure it exists
AsmFile = args[1]
try:
f = open(AsmFile, 'rb')
except IOError:
print("Sorry, can't find file '%s'" % AsmFile)
sys.exit(10)
f.close()
if OutputFile is None:
(path, ext) = os.path.splitext(AsmFile)
OutputFile = path + PTPExtension
if ListFile is None:
(path, ext) = os.path.splitext(AsmFile)
ListFile = path + ListFileExtension
ListFileHandle = open(ListFile, 'wb')
with open(OutputFile, 'wb') as OutputFileHandle:
assemble_file()
if __name__ == '__main__':
main()