1
0
mirror of https://github.com/DoctorWkt/pdp7-unix.git synced 2026-01-11 23:53:23 +00:00

632 lines
18 KiB
Perl
Executable File

#!/usr/bin/env perl
#
# Read in files of PDP-7 assembly code in Ken Thompson's as format
# and convert them into PDP-7 machine code
#
# (c) 2016 Warren Toomey, GPL3
# Tweaked by Phil Budne (line, expression parsing, output formats)
#
use strict;
use warnings;
use Data::Dumper;
use Getopt::Long qw(GetOptions);
Getopt::Long::Configure qw(gnu_getopt);
### Global variables ###
my %Var; # Variables such as ., .., t
my %Glabel; # Global labels that are defined once
my %Llabel; # Local labels that are defined once
my %Islocal; # True if the label is a local label
my %Rlabel; # Relative labels, e.g. 1:, 2:
# with an array of locations for each label
my @Mem; # Actual PDP-7 memory locations
my @Mline; # Source lines associated with mem locations
my $origline; # The original current input line of code
my $line; # line being parsed
my $stage = 1; # Pass one or pass two
my $errors = 0; # set to non-zero on error
my $line_error = ' ';
my $file; # current file name
my $lineno; # current line number
my $OUTPUT; # output file
my $RELATIVE = 01000000; # set on non-absolute symbol values
my $BASE = 0|$RELATIVE; # starting value for "."
### Main program ###
## command line options
my $debug = 0; # Run in debug mode
my $format = 'a7out'; # output format
my $namelist = 0; # output n.out file
my $output = 'a.out'; # output file
my $no_label_warnings = 0; # suppress multiply defined label warnings
# keep this near the GetOptions call to make it easy to add documentation!
sub usage {
die("Usage: $0 [--debug] [--format=a7out|list|ptr|rim ]\n" .
"\t[-n] [--out file] file1.s [file2.s ...]\n");
}
GetOptions(
'debug|d' => \$debug,
'format|f=s' => \$format,
'namelist|n' => \$namelist,
'output|o=s' => \$output,
'no-label-warnings' => \$no_label_warnings,
) or usage();
usage() if ( @ARGV < 1 );
# http://minnie.tuhs.org/cgi-bin/utree.pl?file=V3/man/manx/as.1
# ".." is the relocation constant and is added to each relocatable
# reference. On a PDP-11 with relocation hardware, its value is 0; on
# most systems without protection, its value is 40000(8).
# PLB: "relocatable" values are flagged with $RELATIVE
# start with the location counter at zero
# predefine syscall and opcodes as variables
%Var = (
'.' => $BASE,
'..' => 4096, # output base addr?
# as.s does not have an initial symbol table
# (except for the above), so there must have been a
# user "ops" file
save => 1, # saves core dump & user area!
getuid => 2,
open => 3,
read => 4,
write => 5,
creat => 6,
seek => 7,
tell => 8,
close => 9,
link => 10,
unlink => 11,
setuid => 12,
rename => 13,
exit => 14,
time => 15,
intrp => 16,
chdir => 17,
chmod => 18,
chown => 19,
# 20 removed
sysloc => 21, # return system addresses
# 22 removed
capt => 23, # capture display?
rele => 24, # release display?
status => 25, # "stat"
smes => 27,
rmes => 28,
fork => 29,
# List of instruction names and machine code values
# These come from https://raw.githubusercontent.com/simh/
sys => 0020000, # "cal i" instruction (trap indirect thru 020)
i => 0020000, # indirect bit
# memory reference instructions
dac => 0040000, # deposit AC
jms => 0100000, # jump to subroutine
dzm => 0140000, # deposit zero in memory
lac => 0200000, # load AC
xor => 0240000, # exclusive or
add => 0300000, # one's complement add
tad => 0340000, # two's complement add
xct => 0400000, # execute
isz => 0440000, # increment and skip if zero
and => 0500000, # AND with contents of Y
sad => 0540000, # skip if AC different from content of Y
jmp => 0600000, # jump to Y
# Type 177 Extended Arithmetic Element (EAE)
eae => 0640000, # base instruction (nop)
osc => 0640001, # OR SC into AC
omq => 0640002, # OR MQ into AC
cmq => 0640004, # Complement MQ
div => 0640323, # divide
norm => 0640444, # normalize, unsigned
lls => 0640600, # long left shift
clls => 0641600, # lls but clear AC first
als => 0640700, # AC shift
lrs => 0640500, # long right shift
ecla => 0641000, # clear AC
lacs => 0641001, # load AC with SC
lacq => 0641002, # load AC with MQ
abs => 0644000, # absolute value
divs => 0644323, # divide, signed
clq => 0650000, # clear MQ
frdiv => 0650323, # fractional divide
lmq => 0652000, # load MQ from AC
mul => 0653122, # multiply
idiv => 0653323, # integer divide
idivs => 0657323, # integer divide, signed
frdivs => 0654323, # fractional divide, signed
muls => 0657122, # multiply, signed
norms => 0660444, # normalize, signed
gsm => 0664000, # get sign and magnitude
lrss => 0660500, # long right shift, signed
llss => 0660600, # long left shift, signed
alss => 0660700, # AC left shift, signed
# PLB: removed I/OT instructions: kernel uses sop.s
# Operate Instructions
# Group 1 (OPR 1) instructions
opr => 0740000, # base operate instruction (nop)
nop => 0740000,
cma => 0740001, # complement accumulator
cml => 0740002, # complement link
oas => 0740004, # inclusive or accumulator switches
ral => 0740010, # rotate (ac, link) left
rar => 0740020, # rotate (ac, link) right
hlt => 0740040, # HALT
xx => 0740040,
sma => 0740100, # skip on minus accumulator
sza => 0740200, # skip on zero accumulator
snl => 0740400, # skip on non-zero link
skp => 0741000, # unconditional skip
spa => 0741100, # skip on positive accumulator
sna => 0741200, # skip on negative accumulator
szl => 0741400, # skip on zero link
rtl => 0742010, # rotate two left (ral*2)
rtr => 0742020, # rotate two right (rar*2)
cll => 0744000, # clear link
stl => 0744002, # set link
rcl => 0744010, # clear link, rotate left
rcr => 0744020, # clear link, rotate right
cla => 0750000, # clear accumulator
clc => 0750001, # clear and complement acc
las => 0750004, # load acc from switches
glk => 0750010, # get link
# Group 2 operate
law => 0760000, # load accumulator with (instruction)
# lam => 0777777, # (load accumulator minus)
);
# Parse all the files
print STDERR "I\n"; # like the real as
foreach my $file (@ARGV) {
parse_file($file);
}
# Now do it all again, pass two
$Var{'.'} = $BASE;
$stage = 2;
open(my $OUT, ">$output") || die "$output";
print STDERR "II\n"; # like the real as
foreach my $file (@ARGV) {
print STDERR "$file\n"; # like the real as
parse_file($file);
}
if ($format eq 'a7out') {
# print out the contents of memory
for my $i ( 0 .. $#Mem ) {
if ( defined( $Mem[$i] ) ) {
printf $OUT "%06o: %06o\t%s\n", $i, $Mem[$i], ($Mline[$i] || "");
}
}
}
elsif ($format eq 'list') {
print $OUT "\n";
print $OUT "Labels:\n";
dump_labels($OUT);
}
elsif ($format eq 'ptr') { # dump absolute memory in PTR binary
for my $loc ( $Var{'..'} .. $#Mem ) {
punch($Mem[$loc] || 0);
}
}
elsif ($format eq 'rim') { # "Hardware Read In" tape
# only handles continguous memory, but no overhead
my $base = $Var{'..'};
for my $loc ( $base .. $#Mem ) {
punch($Mem[$loc] || 0);
}
# final word: command; has 0100 lit on last frame
punch(0600000 | $base, 0100 );
}
else {
die("unknown format $format");
}
close($OUT);
if ($namelist) {
# as.s writes a binary file named n.out, ours is ascii
open (my $NOUT, ">", "n.out") || die "n.out";
dump_labels($NOUT);
close($NOUT);
}
exit($errors);
# report an assmebly error:
# sets error flag
# reports filename:lineno for emacs m-x compile
sub err {
$line_error = shift;
my $msg = shift;
$errors = 1; # exit status
if ($stage == 2) {
print STDERR "$file:$lineno: $msg\n";
print $OUT "$file:$lineno: $msg\n" if ($format eq 'list');
}
return 0; # expression value
}
# Set a label, either global or local
sub set_label
{
my ($label,$loc)= @_;
# PLB: truncate to eight: moo.s declares "standing"
# but references it as "standings"
$label = substr($label, 0, 8);
# It is a local label if we're told it is, or if it starts with "L"
if ($Islocal{$file}{$label} || $label=~ m{^L}) {
# An error to have different values
if ( defined( $Llabel{$file}{$label} ) && $Llabel{$file}{$label} != $loc ) {
# non-fatal: as.s doesn't even warn!!!!
print STDERR "$file:$lineno: Local label $label multiply defined\n"
if ($stage == 2 && !$no_label_warnings);
}
else {
$Llabel{$file}{$label} = $loc;
printf( "Set local label %s to %#o\n", $label, $loc ) if ($debug);
}
} else {
# original as doesn't complain about multiple definitions of labels
# (Space Travel depends on this). Now a warning (on by default)
if ( defined( $Glabel{$label} ) && $Glabel{$label} != $loc ) {
print STDERR "$file:$lineno: Warning: Global label $label multiply defined\n"
if ($stage == 2 && !$no_label_warnings);
}
$Glabel{$label} = $loc;
printf( "Set global label %s to %#o\n", $label, $loc ) if ($debug);
}
}
# Get the value of a global or local label
sub get_label
{
my $label= shift;
# PLB: truncate to eight: moo.s declares "standing"
# but references it as "standings"
$label = substr($label, 0, 8);
return($Llabel{$file}{$label}) if (defined($Llabel{$file}{$label}));
return($Glabel{$label});
}
# Open and parse the given file
sub parse_file {
$file = shift;
open( my $IN, "<", $file ) || die("Cannot open $file: $!\n");
$lineno = 0;
while ( $line = <$IN> ) {
$lineno++;
chomp($line); # Lose the end of line
$origline = $line;
print $OUT "\t\t$line\n"
if ($stage == 2 && $line ne '' && $format eq 'list');
parse_line();
}
close($IN);
}
# process a label and set its value to the location counter
# OK for symbolic label to be entered twice, so long as it's the same value
# (ie; both passes)
sub process_label {
my $label = shift;
my $loc = $Var{'.'};
print "process_label $label\n" if ($debug);
if ( $label =~ m{^\d+$} ) { # numeric (relative) label?
if ($stage == 1) {
push( @{ $Rlabel{$label} }, $loc );
printf( "Pushing %#o for label %s\n", $loc, $label ) if ($debug);
}
} # numeric label
else { # symbolic label
set_label($label, $loc);
}
}
# Parse assembler directives. These were not in the original
# PDP-7 Unix source, but we need them so that we can write
# compilers that target this assembler.
sub parse_directive
{
my $directive= shift;
print("Got directive $directive\n") if ($debug);
# Set this as a local label
if ($directive=~ m{^\@local\s+(\S+)}) {
$Islocal{$file}{$1}=1;
}
}
sub eol {
return $line eq '' || $line =~ m{^"}; # empty or comment
}
# Blame Phil for this....
# parses global $line based on prefixes, nibbling of a bit at a time
# (: and ; can appear in char literals)
# handles multiple ';' separated words per line
# allows " in character literals (tho none appear in listings)
sub parse_line {
while (1) {
$line_error = ' '; # clear listing error indicator
return if (eol());
print "parse_line: '$line'\n" if ($debug);
# Lose any leading whitespace
$line =~ s{^\s*}{};
# Assembler directives start with an @
if ($line =~ m{^\@}) {
parse_directive($line);
return;
}
while ($line =~ s{^([A-Za-z0-9_\.]+):\s*}{}) { # labels
process_label($1);
}
return if (eol());
if ( $line =~ s{^([^;= \t]+)\s*=}{}) { # assignment
my $lhs = $1;
my $word = parse_expression();
printf( "Setting variable %s to 0%o\n", $lhs, $word ) if ($debug);
$Var{$lhs} = $word;
printf $OUT "\t%06o %s\n", $word, $line_error
if ($stage == 2 && $format eq 'list');
}
else { # bare expression (not assignment)
# Get its value on pass two and save to memory
# Also save the input line that altered memory
my $word = parse_expression();
if ( $stage == 2 ) {
my $location = $Var{'.'};
if ($location & $RELATIVE) { # non-absolute location?
$location &= 0777777;
$location += $Var{'..'} & 0777777; # relocate
# XXX check for overflow?
}
if ($word & $RELATIVE) { # word created from relative addresses?
$word &= 0777777;
$word += $Var{'..'} & 0777777; # relocate
# XXX check for overflow?
}
if ($location < 0) {
err('.', 'below base');
}
else {
$Mem[$location] = $word;
}
$Mline[$location] = $origline;
$origline = '';
if ($format eq 'list' and defined($word)) {
# show flags??
printf $OUT "%06o: %06o %s\n",
$location, $word, $line_error;
}
}
# Move up to the next location in both passes
$Var{'.'}++;
} # expr
# eat trailing whitespace and ";", if any
$line =~ s{^\s*;?}{};
} # while
}
# Blame Phil for this bit too...
# Parse an expression off $line and return a PDP-7 word
# as a series of whitespace separated "syllables"
# ORed, added, or subtracted
sub parse_expression {
my $word = 0;
my $flags = 0;
print "expression: '$line'\n" if ($debug);
while (1) {
my $syllable = 0;
my $op = '|';
$line =~ s{^\s+}{}; # as.s accepts ",' as whitespace too!
if ($line eq '' || $line =~ m{^[";]}) { # EOL ; and " terminate expr
$word |= $flags;
printf("\tparse_expression => %#o\n", $word) if ($debug);
return $word;
}
print " '$line'\n" if ($debug);
if ($line =~ s{^-}{}) {
print "\tfound -\n" if ($debug);
$op = '-';
}
elsif ($line =~ s{^\+}{}) {
print "\tfound +\n" if ($debug);
$op = '+';
}
if ($line =~ s{^<(.)}{}) { # <char
print "\tfound <x\n" if ($debug);
$syllable = ord($1) << 9; # absolute
}
elsif ($line =~ s{^(.)>}{}) { # char>
print "\tfound x>\n" if ($debug);
$syllable = ord($1) # absolute
}
elsif ($line =~ s{^>(.)}{}) { # >char !!
print "\tfound >x\n" if ($debug);
$syllable = ord($1) # absolute
}
elsif ($line =~ s{^([A-Za-z_\.][A-Za-z0-9_\.]*)}{}) {
my $sym = $1;
print "\tsym: $sym\n" if ($debug);
if (defined($Var{$sym})) {
$syllable = $Var{$sym};
printf("\tvar: %s: %#o\n", $sym, $syllable) if ($debug);
}
elsif (defined(get_label($sym))) {
$syllable = get_label($sym);
printf("\tlbl: %s: %#o\n", $sym, $syllable) if ($debug);
}
elsif ($stage == 2) {
err('U', "$sym not defined")
} # pass 2
} # symbol
elsif ( $line =~ s{^(\d+)([fb])}{} ) { # relative label
printf "\tfound relative: $1$2\n" if ($debug);
$syllable = find_relative_label( $1, $2 ) if ($stage == 2);
}
elsif ( $line =~ s{^(\d+)}{} ) { # constant
my $value = $1;
printf "\tfound constant: $value\n" if ($debug);
if ( $value =~ m{^0} ) {
# PLB 2020-10-05: behave like as.s
$syllable = 0;
for my $digit (split(//, $value)) {
$syllable = $syllable * 010 + ord($digit) - ord('0');
}
}
else {
$syllable = $value + 0;
}
$syllable &= 0777777; # absolute
}
else {
# From the BSD fortune file:
# Ken Thompson has an automobile which he helped design.
# Unlike most automobiles, it has neither speedometer,
# nor gas gauge, nor any of the numerous idiot lights
# which plague the modern driver. Rather, if the driver
# makes any mistake, a giant "?" lights up in the center
# of the dashboard. "The experienced driver",
# he says, "will usually know what's wrong.
err('?', "huh? '$line'");
$line = ''; # abort processing
return undef;
}
my $sylflags = $syllable & $RELATIVE;
$syllable &= 0777777;
if ($op eq '+') {
$word += $syllable;
$flags |= $sylflags;
}
elsif ($op eq '-') {
$word -= $syllable;
if ($flags & $RELATIVE) {
# relative-relative => absolute!
if ($sylflags & $RELATIVE) {
$flags &= ~$RELATIVE;
}
# else: relative-abs => relative (no change)
}
else { # word is absolute
if ($sylflags & $RELATIVE) {
err('A', 'absolute value minus relative??');
}
# else: absolute-absolute => absolute (no change)
}
}
else {
$word |= $syllable;
$flags |= $sylflags;
}
$word &= 0777777;
printf("\tsyllable: %#o word: %#o\n", $syllable, $word) if ($debug);
}
}
# Given a relative label number and a direction,
# return the location of this relative label or
# die if we don't have one
sub find_relative_label {
my ( $label, $direction ) = @_;
my $curlocation = $Var{'.'};
# Error check: no labels at all
if ( !defined( $Rlabel{$label} ) ) {
return err('U', "relative label $label never defined");
}
# Get the list of possible locations for this label
my $locarray = $Rlabel{$label};
# Error check: no locations
return err('U', "No relative labels") if ( @{$locarray} == 0 );
if ( $direction eq 'f' ) {
# Search forward for first location larger then the current one
foreach my $reflocation ( @{$locarray} ) {
printf("forward %#o %#o\n", $reflocation, $curlocation) if ($debug);
return ($reflocation) if ( $reflocation > $curlocation );
}
}
else {
# Search backwards for first location smaller than the current one
foreach my $reflocation ( sort( { $b <=> $a } @{$locarray} ) ) {
printf("backward %#o %#o\n", $reflocation, $curlocation) if ($debug);
return ($reflocation) if ( $reflocation < $curlocation );
}
}
return err('U', "undefined relative reference $label$direction");
}
sub punch { # output a word in paper tape binary format
my $word = shift;
my $final = shift || 0;
printf $OUT "%c%c%c",
(($word >> 12) & 077) | 0200,
(($word >> 6) & 077) | 0200,
($word & 077) | 0200 | $final;
}
sub dump_labels { # for 'list' and --namelist
my $file = shift;
foreach my $key (sort keys %Glabel) {
my $addr = $Glabel{$key};
my $flags = ($addr & $RELATIVE) ? "r" : "";
if ($addr & $RELATIVE) {
$addr &= 0777777;
$addr += $Var{'..'};
}
printf $file "%-8.8s %#06o %s\n", $key, $addr & 0777777, $flags;
}
}