1
0
mirror of https://github.com/DoctorWkt/pdp7-unix.git synced 2026-01-22 02:26:34 +00:00
2016-02-25 21:03:39 -05:00

513 lines
15 KiB
Perl
Executable File

#!/usr/bin/perl
#
# Read in files of PDP-7 assembly code in Ken Thompon's as format
# and convert them into PDP-7 machine code
#
# (c) 2016 Warren Toomey, GPL3
#
use strict;
use warnings;
use Data::Dumper;
### Global variables ###
my %Var; # Variables such as ., .., t
my %Label; # Labels that are defined once
my %Rlabel; # Relative labels, e.g. 1:, 2:
# with an array of locations for each label
my @Mem; # Actual PDP-7 memory locations
my @Mline; # Source lines associated with mem locations
my $origline; # The current input line of code
my $stage = 1; # Pass one or pass two
my $debug = 0; # Run in debug mode
my $errors = 0; # set to non-zero on error
my %Undef; # undefined symbols: only complain once
### Main program ###
# Optional debug argument
if ( ( @ARGV > 0 ) && ( $ARGV[0] eq "-d" ) ) {
$debug = 1;
shift(@ARGV);
}
# Check the arguments
die("Usage: $0 [-d] file1.s [file2.s ...]\n") if ( @ARGV < 1 );
# Define the syscalls as variables so that
# I don't have to hand-code the logic below.
# Also, start with the location counter at zero
%Var = (
'.' => 0,
save => 1,
getuid => 2,
open => 3,
read => 4,
write => 5,
creat => 6,
seek => 7,
tell => 8,
close => 9,
link => 10,
unlink => 11,
setuid => 12,
rename => 13,
exit => 14,
time => 15,
intrp => 16,
chdir => 17,
chmod => 18,
chown => 19,
badcal => 20,
syslog => 21,
capt => 23,
rele => 24,
status => 25,
smes => 27,
rmes => 28,
fork => 29
);
# Parse all the files
foreach my $file (@ARGV) {
parse_file($file);
}
# Now do it all again, pass two
$Var{'.'} = 0;
$stage = 2;
print("Now in stage 2\n") if ($debug);
foreach my $file (@ARGV) {
parse_file($file);
}
# Now print out the contents of memory
for my $i ( 0 .. $#Mem ) {
if ( defined( $Mem[$i] ) ) {
printf( "%06o: %06o\t%s\n", $i, $Mem[$i], $Mline[$i] || "" );
}
}
exit($errors);
my $file; # global for error messages
my $lineno;
sub err {
my $msg = shift;
$errors = 1; # exit status
print STDERR "$file:$lineno: $msg\n";
return 0;
}
# Open and parse the given file
sub parse_file {
$file = shift;
open( my $IN, "<", $file ) || die("Cannot read $file: $!\n");
$lineno = 0;
while ( my $line = <$IN> ) {
$lineno++;
# Lose the end of line and any leading/trailing whitespace
# Discard any comments and preceding comment whitespace
chomp($line);
$origline = $line;
$line =~ s{^\s+}{};
$line =~ s{\s+$}{};
$line =~ s{\s*\".*}{};
#print("=>$line<=\n");
# Split the line into commands that are ; separated
# and parse each one
foreach my $cmd ( split( /;\s*/, $line ) ) {
# Split into a section with possible labels and a
# statement section with definitely no labels. The ?
# makes the first pattern less greedy.
#print("cmd is >$cmd<\n");
$cmd =~ m{(.*?)([^:]*$)};
my $labelsect = $1;
my $statement = $2;
#print(">$labelsect< >$statement<\n");
# Split $labelsect into labels using the : character
my @labellist = split( /:\s*/, $labelsect );
# First pass: parse the labels
if ( $stage == 1 ) {
foreach my $l (@labellist) {
parse_label($l);
}
}
# Parse the statements on both passes
parse_statement($statement);
}
}
close($IN);
}
# Parse a label and set its value to the location counter
sub parse_label {
my $label = shift;
# It's a relative label, save its current value in a list
if ( $label =~ m{^\d+$} ) {
push( @{ $Rlabel{$label} }, $Var{'.'} );
#printf( "Pushing 0%o for relative label %s\n", $Var{'.'}, $label );
return;
}
# It's a textual label, check if it's been defined before
if ( defined( $Label{$label} ) ) {
err("Label $label defined multiple times\n") if ( $stage == 1 );
}
# Otherwise, save its value
$Label{$label} = $Var{'.'};
#printf( "Set absolute label %s to 0%o\n", $label, $Label{$label} );
}
sub parse_statement {
my $statement = shift;
my $location = $Var{'.'};
#printf( "Location: 0%o\n", $location );
# Empty statement, nothing to do
return if ( $statement =~ m{^\s*$} );
# Lose any leading whitespace
$statement =~ s{^\s*}{};
# It's an assignment statement: lhs = rhs
if ( $statement =~ m{(\S+)\s*=\s*(\S+)} ) {
my $lhs = $1;
my $rhs = $2;
#print("Assignment $statement\n");
# Save the expression's value into the variable
my $result = parse_expression($rhs);
die("expression $rhs has no value in assignment\n")
if ( !defined($result) );
#printf( "Setting variable %s to 0%o\n", $lhs, $result );
$Var{$lhs} = $result;
return;
}
# It's an expression, not a statement
# Get its value on pass two and save to memory
# Also save the input line that altered memory
if ( $stage == 2 ) {
my $val = parse_expression($statement);
$Mem[$location] = $val & 0777777;
$Mline[$location] = $origline;
#printf( "Saving 0%o into memory location 0%o\n", $val, $location );
}
# Move up to the next location in both passes
$Var{'.'}++;
}
# Parse an expression and return either:
# a single value which is a PDP-7 word
# a single value which is undef, as this was unrecognised
sub parse_expression {
my $expression = shift;
# If it's a defined variable ( . , .. , etc.)
# return the value
return ( $Var{$expression} )
if ( defined( $Var{$expression} ) );
# If it's a numeric literal, simply return it
if ( $expression =~ m{^-?\d+$} ) {
#print("Returning numeric literal $expression\n");
return ( oct($expression) ) if ( $expression =~ m{^0} );
return ($expression);
}
# List of instruction names and machine code values
# These come from https://raw.githubusercontent.com/simh/
# simtools/master/crossassemblers/macro7/macro7.c
my %inst = (
'cal' => 0000000,
'dac' => 0040000,
'jms' => 0100000,
'dzm' => 0140000,
'lac' => 0200000,
'xor' => 0240000,
'add' => 0300000,
'tad' => 0340000,
'xct' => 0400000,
'isz' => 0440000,
'and' => 0500000,
'sad' => 0540000,
'jmp' => 0600000,
'i' => 0020000,
'eae' => 0640000,
'iot' => 0700000,
'sys' => 0700000,
'opr' => 0740000,
'law' => 0760000,
'lam' => 0777777,
'osc' => 0640001,
'lacs' => 0641001,
'omq' => 0640002,
'lacq' => 0641002,
'cmq' => 0640004,
'clq' => 0650000,
'lmq' => 0652000,
'abs' => 0644000,
'gsm' => 0664000,
'mul' => 0653122,
'muls' => 0657122,
'div' => 0640323,
'divs' => 0644323,
'idiv' => 0653323,
'idivs' => 0657323,
'frdiv' => 0650323,
'frdivs' => 0654323,
'norm' => 0640444,
'norms' => 0660444,
'lrs' => 0640500,
'lrss' => 0660500,
'lls' => 0640600,
'llss' => 0660600,
'als' => 0640700,
'alss' => 0660700,
'nop' => 0740000,
'cma' => 0740001,
'cml' => 0740002,
'oas' => 0740004,
'las' => 0750004,
'ral' => 0740010,
'rcl' => 0744010,
'rtl' => 0742010,
'rar' => 0740020,
'rcr' => 0744020,
'rtr' => 0742020,
'hlt' => 0740040,
'xx' => 0740040,
'sma' => 0740100,
'sza' => 0740200,
'snl' => 0740400,
'skp' => 0741000,
'spa' => 0741100,
'sna' => 0741200,
'szl' => 0741400,
'cll' => 0744000,
'stl' => 0744002,
'cla' => 0750000,
'clc' => 0750001,
'glk' => 0750010,
'clsf' => 0700001,
'iof' => 0700002,
'ion' => 0700042,
'iton' => 0700062,
'clof' => 0700004,
'clon' => 0700044,
'tts' => 0703301,
'skp7' => 0703341,
'caf' => 0703302,
'sem' => 0707701,
'eem' => 0707702,
'emir' => 0707742,
'lem' => 0707704,
'rsf' => 0700101,
'rrb' => 0700112,
'rcf' => 0700102,
'rsa' => 0700104,
'rsb' => 0700144,
'psf' => 0700201,
'pcf' => 0700202,
'psa' => 0700204,
'pls' => 0700204,
'psb' => 0700244,
'ksf' => 0700301,
'krb' => 0700312,
'iors' => 0700314,
'tsf' => 0700401,
'tcf' => 0700402,
'tls' => 0700406,
'lpsf' => 0706501,
'lpcb' => 0706502,
'lpb1' => 0706566,
'lpb2' => 0706526,
'lpb3' => 0706546,
'lpse' => 0706601,
'lpcf' => 0706602,
'lppb' => 0706606,
'lpls' => 0706626,
'lpps' => 0706646,
'crsf' => 0706701,
'crrb' => 0706712,
'crsa' => 0706704,
'crsb' => 0706744,
'mmdf' => 0707501,
'mmef' => 0707541,
'mmrd' => 0707512,
'mmwr' => 0707504,
'mmbf' => 0707601,
'mmrs' => 0707612,
'mmlc' => 0707604,
'mmse' => 0707644,
);
# Split the expression into two or three words separated by whitespace
my ( $word1, $word2, $word3 ) = split( /\s+/, $expression );
#printf( "o>%s<o o>%s<o o>%s<o\n",
# $word1 || "", $word2 || "", $word3 || "" );
# This is a 2-character string literal, put each in separate nine bits
if ($word1=~ m{^<(.)(.)>$}) {
print("String literal <$1 and $2>\n") if ($debug);
return((ord($1) << 9) | ord($2));
}
# This is a <n string literal which might have been followed by a numeric literal
if ($word1=~ m{^<(.)}) {
print("String literal <$1 and $word2\n") if ($debug);
my $msb= $1;
my $lsb= (defined($word2)) ? parse_expression($word2) : 0;
return((ord($msb) << 9) | $lsb);
}
# This is a n> string literal preceded by a numeric literal
if (defined($word2) && ($word2=~ m{(.)>$})) {
print("String literal $word1 and $1>\n") if ($debug);
my $msb= parse_expression($word1);
my $lsb= $1;
return(($msb << 9) | ord($lsb));
}
# This is a n> string literal not preceded by a numeric literal
if ($word1=~ m{(.)>$}) {
print("String literal $1>\n") if ($debug);
return(ord($1));
}
# This a defined instruction
if ( defined( $inst{$word1} ) ) {
#printf( "Found the instruction %s: 0%o\n", $word1, $inst{$word1} );
my $instruction = $inst{$word1};
# Is this an indirect instruction?
my $indirect = defined($word3) && ( $word3 eq "i" ) ? 020000 : 0;
# We have an expression for this instruction
if ( ( $stage == 2 ) && defined($word2) ) {
#print(" and I need to parse $word2\n");
my $val = parse_expression($word2);
die("Unable to parse $word2 on pass two\n") if ( !defined($val) );
$instruction |= $val | $indirect;
}
return ($instruction);
}
# This is a defined label
if ( defined( $Label{$word1} ) ) {
#printf( "Found the label %s: 0%o\n", $word1, $Label{$word1} );
return ( $Label{$word1} );
}
# This is a defined relative label: digits followed by f or b
if ( $word1 =~ m{^(\d+)([fb])} ) {
my $val = find_relative_label( $1, $2 );
#print("Got location $val for relative label $1$2\n");
return ($val);
}
# This is an addition. Some operands may not yet be defined
if ( $word1 =~ m{(.*)\+(.*)} ) {
my $sum = add( $1, $2 );
if ( !defined($sum) ) {
die("Unable to add $word1 on pass two\n") if ( !defined($sum) );
}
#print("Did an addition and got $sum\n");
return ($sum);
}
# This is a subtraction. Some operands may not yet be defined
if ( $word1 =~ m{(.*)-(.*)} ) {
my $diff = subtract( $1, $2 );
if ( !defined($diff) ) {
die("Unable to subtract $word1 on pass two\n")
if ( !defined($diff) );
}
#print("Did a subtraction and got $diff\n");
return ($diff);
}
if ( $stage == 2 ) {
err("undefined: $expression") unless (defined $Undef{$expression});
$Undef{$expression} = 1;
}
return 0;
}
# Add two expression together
sub add {
my ( $a, $b ) = @_;
my $val1 = parse_expression($a);
my $val2 = parse_expression($b);
#print("Adding $val1 + $val2\n");
return (undef) if ( !defined($val1) || !defined($val2) );
#print( " with a good value of ", $val1 + $val2, "\n" );
return ( $val1 + $val2 );
}
# Subtract two expression
sub subtract {
my ( $a, $b ) = @_;
my $val1 = parse_expression($a);
my $val2 = parse_expression($b);
#print("Subtracting $val1 - $val2\n");
return (undef) if ( !defined($val1) || !defined($val2) );
#print( " with a good value of ", $val1 - $val2, "\n" );
return ( $val1 - $val2 );
}
# Given a relative label number and a direction,
# return the location of this relative label or
# die if we don't have one
sub find_relative_label {
my ( $label, $direction ) = @_;
my $curlocation = $Var{'.'};
# Error check: no labels at all
if ( !defined( $Rlabel{$label} ) ) {
return err("relative label $label not defined\n");
}
# Get the list of possible locations for this label
my $locarray = $Rlabel{$label};
# Error check: no locations
return err("No relative labels") if ( @{$locarray} == 0 );
# Error check: forward but no next location, or backward but no previous
return err("No forward label $label")
if ( ( $direction eq 'f' ) && ( $curlocation > $locarray->[-1] ) );
return err("No backward label $label")
if ( ( $direction eq 'b' ) && ( $curlocation < $locarray->[0] ) );
# Search forward for a location larger then the current one
if ( $direction eq 'f' ) {
foreach my $reflocation ( @{$locarray} ) {
return ($reflocation) if ( $reflocation > $curlocation );
}
}
else {
# Search backwards for a location smaller than the current one
foreach my $reflocation ( sort( { $b <=> $b } @{$locarray} ) ) {
return ($reflocation) if ( $reflocation < $curlocation );
}
}
}