mirror of
https://github.com/livingcomputermuseum/pdp7-unix.git
synced 2026-01-22 10:41:05 +00:00
syntax. I think it's OK, but it probably needs more testing. write_test.s was changed to exercise the string parsing.
493 lines
14 KiB
Perl
Executable File
493 lines
14 KiB
Perl
Executable File
#!/usr/bin/perl
|
|
#
|
|
# Read in files of PDP-7 assembly code in Ken Thompon's as format
|
|
# and convert them into PDP-7 machine code
|
|
#
|
|
# (c) 2016 Warren Toomey, GPL3
|
|
#
|
|
use strict;
|
|
use warnings;
|
|
use Data::Dumper;
|
|
|
|
### Global variables ###
|
|
my %Var; # Variables such as ., .., t
|
|
my %Label; # Labels that are defined once
|
|
my %Rlabel; # Relative labels, e.g. 1:, 2:
|
|
# with an array of locations for each label
|
|
|
|
my @Mem; # Actual PDP-7 memory locations
|
|
my @Mline; # Source lines associated with mem locations
|
|
my $origline; # The current input line of code
|
|
|
|
my $stage = 1; # Pass one or pass two
|
|
my $debug = 0; # Run in debug mode
|
|
|
|
### Main program ###
|
|
|
|
# Optional debug argument
|
|
if ( ( @ARGV > 0 ) && ( $ARGV[0] eq "-d" ) ) {
|
|
$debug = 1;
|
|
shift(@ARGV);
|
|
}
|
|
|
|
# Check the arguments
|
|
die("Usage: $0 [-d] file1.s [file2.s ...]\n") if ( @ARGV < 1 );
|
|
|
|
# Define the syscalls as variables so that
|
|
# I don't have to hand-code the logic below.
|
|
# Also, start with the location counter at zero
|
|
%Var = (
|
|
'.' => 0,
|
|
save => 1,
|
|
getuid => 2,
|
|
open => 3,
|
|
read => 4,
|
|
write => 5,
|
|
creat => 6,
|
|
seek => 7,
|
|
tell => 8,
|
|
close => 9,
|
|
link => 10,
|
|
unlink => 11,
|
|
setuid => 12,
|
|
rename => 13,
|
|
exit => 14,
|
|
time => 15,
|
|
intrp => 16,
|
|
chdir => 17,
|
|
chmod => 18,
|
|
chown => 19,
|
|
badcal => 20,
|
|
syslog => 21,
|
|
capt => 23,
|
|
rele => 24,
|
|
status => 25,
|
|
smes => 27,
|
|
rmes => 28,
|
|
fork => 29
|
|
);
|
|
|
|
# Parse all the files
|
|
foreach my $file (@ARGV) {
|
|
parse_file($file);
|
|
}
|
|
|
|
# Now do it all again, pass two
|
|
$Var{'.'} = 0;
|
|
$stage = 2;
|
|
print("Now in stage 2\n") if ($debug);
|
|
foreach my $file (@ARGV) {
|
|
parse_file($file);
|
|
}
|
|
|
|
# Now print out the contents of memory
|
|
for my $i ( 0 .. $#Mem ) {
|
|
if ( defined( $Mem[$i] ) ) {
|
|
printf( "%06o: %06o\t%s\n", $i, $Mem[$i], $Mline[$i] || "" );
|
|
}
|
|
}
|
|
|
|
exit(0);
|
|
|
|
# Open and parse the given file
|
|
sub parse_file {
|
|
my $file = shift;
|
|
open( my $IN, "<", $file ) || die("Cannot read $file: $!\n");
|
|
while ( my $line = <$IN> ) {
|
|
|
|
# Lose the end of line and any leading/trailing whitespace
|
|
# Discard any comments and preceding comment whitespace
|
|
chomp($line);
|
|
$origline = $line;
|
|
$line =~ s{^\s+}{};
|
|
$line =~ s{\s+$}{};
|
|
$line =~ s{\s*\".*}{};
|
|
#print("=>$line<=\n");
|
|
|
|
# Split the line into commands that are ; separated
|
|
# and parse each one
|
|
foreach my $cmd ( split( /;\s*/, $line ) ) {
|
|
|
|
# Split into a section with possible labels and a
|
|
# statement section with definitely no labels. The ?
|
|
# makes the first pattern less greedy.
|
|
#print("cmd is >$cmd<\n");
|
|
$cmd =~ m{(.*?)([^:]*$)};
|
|
my $labelsect = $1;
|
|
my $statement = $2;
|
|
|
|
#print(">$labelsect< >$statement<\n");
|
|
|
|
# Split $labelsect into labels using the : character
|
|
my @labellist = split( /:\s*/, $labelsect );
|
|
|
|
# First pass: parse the labels
|
|
if ( $stage == 1 ) {
|
|
foreach my $l (@labellist) {
|
|
parse_label($l);
|
|
}
|
|
}
|
|
|
|
# Parse the statements on both passes
|
|
parse_statement($statement);
|
|
}
|
|
}
|
|
close($IN);
|
|
}
|
|
|
|
# Parse a label and set its value to the location counter
|
|
sub parse_label {
|
|
my $label = shift;
|
|
|
|
# It's a relative label, save its current value in a list
|
|
if ( $label =~ m{^\d+$} ) {
|
|
push( @{ $Rlabel{$label} }, $Var{'.'} );
|
|
#printf( "Pushing 0%o for relative label %s\n", $Var{'.'}, $label );
|
|
return;
|
|
}
|
|
|
|
# It's a textual label, check if it's been defined before
|
|
die("Label $label defined multiple times\n")
|
|
if ( defined( $Label{$label} ) );
|
|
|
|
# Otherwise, save its value
|
|
$Label{$label} = $Var{'.'};
|
|
#printf( "Set absolute label %s to 0%o\n", $label, $Label{$label} );
|
|
}
|
|
|
|
sub parse_statement {
|
|
my $statement = shift;
|
|
my $location = $Var{'.'};
|
|
#printf( "Location: 0%o\n", $location );
|
|
|
|
# Empty statement, nothing to do
|
|
return if ( $statement =~ m{^\s*$} );
|
|
|
|
# Lose any leading whitespace
|
|
$statement =~ s{^\s*}{};
|
|
|
|
# It's an assignment statement: lhs = rhs
|
|
if ( $statement =~ m{(\S+)\s*=\s*(\S+)} ) {
|
|
my $lhs = $1;
|
|
my $rhs = $2;
|
|
#print("Assignment $statement\n");
|
|
|
|
# Save the expression's value into the variable
|
|
my $result = parse_expression($rhs);
|
|
die("expression $rhs has no value in assignment\n")
|
|
if ( !defined($result) );
|
|
|
|
#printf( "Setting variable %s to 0%o\n", $lhs, $result );
|
|
$Var{$lhs} = $result;
|
|
return;
|
|
}
|
|
|
|
# It's an expression, not a statement
|
|
# Get its value on pass two and save to memory
|
|
# Also save the input line that altered memory
|
|
if ( $stage == 2 ) {
|
|
my $val = parse_expression($statement);
|
|
$Mem[$location] = $val & 0777777;
|
|
$Mline[$location] = $origline;
|
|
#printf( "Saving 0%o into memory location 0%o\n", $val, $location );
|
|
}
|
|
|
|
# Move up to the next location in both passes
|
|
$Var{'.'}++;
|
|
}
|
|
|
|
# Parse an expression and return either:
|
|
# a single value which is a PDP-7 word
|
|
# a single value which is undef, as this was unrecognised
|
|
sub parse_expression {
|
|
my $expression = shift;
|
|
|
|
# If it's a defined variable ( . , .. , etc.)
|
|
# return the value
|
|
return ( $Var{$expression} )
|
|
if ( defined( $Var{$expression} ) );
|
|
|
|
# If it's a numeric literal, simply return it
|
|
if ( $expression =~ m{^-?\d+$} ) {
|
|
#print("Returning numeric literal $expression\n");
|
|
return ( oct($expression) ) if ( $expression =~ m{^0} );
|
|
return ($expression);
|
|
}
|
|
|
|
# List of instruction names and machine code values
|
|
# These come from https://raw.githubusercontent.com/simh/
|
|
# simtools/master/crossassemblers/macro7/macro7.c
|
|
my %inst = (
|
|
'cal' => 0000000,
|
|
'dac' => 0040000,
|
|
'jms' => 0100000,
|
|
'dzm' => 0140000,
|
|
'lac' => 0200000,
|
|
'xor' => 0240000,
|
|
'add' => 0300000,
|
|
'tad' => 0340000,
|
|
'xct' => 0400000,
|
|
'isz' => 0440000,
|
|
'and' => 0500000,
|
|
'sad' => 0540000,
|
|
'jmp' => 0600000,
|
|
'i' => 0020000,
|
|
'eae' => 0640000,
|
|
'iot' => 0700000,
|
|
'sys' => 0700000,
|
|
'opr' => 0740000,
|
|
'law' => 0760000,
|
|
'lam' => 0777777,
|
|
'osc' => 0640001,
|
|
'lacs' => 0641001,
|
|
'omq' => 0640002,
|
|
'lacq' => 0641002,
|
|
'cmq' => 0640004,
|
|
'clq' => 0650000,
|
|
'lmq' => 0652000,
|
|
'abs' => 0644000,
|
|
'gsm' => 0664000,
|
|
'mul' => 0653122,
|
|
'muls' => 0657122,
|
|
'div' => 0640323,
|
|
'divs' => 0644323,
|
|
'idiv' => 0653323,
|
|
'idivs' => 0657323,
|
|
'frdiv' => 0650323,
|
|
'frdivs' => 0654323,
|
|
'norm' => 0640444,
|
|
'norms' => 0660444,
|
|
'lrs' => 0640500,
|
|
'lrss' => 0660500,
|
|
'lls' => 0640600,
|
|
'llss' => 0660600,
|
|
'als' => 0640700,
|
|
'alss' => 0660700,
|
|
'nop' => 0740000,
|
|
'cma' => 0740001,
|
|
'cml' => 0740002,
|
|
'oas' => 0740004,
|
|
'las' => 0750004,
|
|
'ral' => 0740010,
|
|
'rcl' => 0744010,
|
|
'rtl' => 0742010,
|
|
'rar' => 0740020,
|
|
'rcr' => 0744020,
|
|
'rtr' => 0742020,
|
|
'hlt' => 0740040,
|
|
'xx' => 0740040,
|
|
'sma' => 0740100,
|
|
'sza' => 0740200,
|
|
'snl' => 0740400,
|
|
'skp' => 0741000,
|
|
'spa' => 0741100,
|
|
'sna' => 0741200,
|
|
'szl' => 0741400,
|
|
'cll' => 0744000,
|
|
'stl' => 0744002,
|
|
'cla' => 0750000,
|
|
'clc' => 0750001,
|
|
'glk' => 0750010,
|
|
'clsf' => 0700001,
|
|
'iof' => 0700002,
|
|
'ion' => 0700042,
|
|
'iton' => 0700062,
|
|
'clof' => 0700004,
|
|
'clon' => 0700044,
|
|
'tts' => 0703301,
|
|
'skp7' => 0703341,
|
|
'caf' => 0703302,
|
|
'sem' => 0707701,
|
|
'eem' => 0707702,
|
|
'emir' => 0707742,
|
|
'lem' => 0707704,
|
|
'rsf' => 0700101,
|
|
'rrb' => 0700112,
|
|
'rcf' => 0700102,
|
|
'rsa' => 0700104,
|
|
'rsb' => 0700144,
|
|
'psf' => 0700201,
|
|
'pcf' => 0700202,
|
|
'psa' => 0700204,
|
|
'pls' => 0700204,
|
|
'psb' => 0700244,
|
|
'ksf' => 0700301,
|
|
'krb' => 0700312,
|
|
'iors' => 0700314,
|
|
'tsf' => 0700401,
|
|
'tcf' => 0700402,
|
|
'tls' => 0700406,
|
|
'lpsf' => 0706501,
|
|
'lpcb' => 0706502,
|
|
'lpb1' => 0706566,
|
|
'lpb2' => 0706526,
|
|
'lpb3' => 0706546,
|
|
'lpse' => 0706601,
|
|
'lpcf' => 0706602,
|
|
'lppb' => 0706606,
|
|
'lpls' => 0706626,
|
|
'lpps' => 0706646,
|
|
'crsf' => 0706701,
|
|
'crrb' => 0706712,
|
|
'crsa' => 0706704,
|
|
'crsb' => 0706744,
|
|
'mmdf' => 0707501,
|
|
'mmef' => 0707541,
|
|
'mmrd' => 0707512,
|
|
'mmwr' => 0707504,
|
|
'mmbf' => 0707601,
|
|
'mmrs' => 0707612,
|
|
'mmlc' => 0707604,
|
|
'mmse' => 0707644,
|
|
);
|
|
|
|
# Split the expression into two or three words separated by whitespace
|
|
my ( $word1, $word2, $word3 ) = split( /\s+/, $expression );
|
|
#printf( "o>%s<o o>%s<o o>%s<o\n",
|
|
# $word1 || "", $word2 || "", $word3 || "" );
|
|
|
|
# This is a 2-character string literal, put each in separate nine bits
|
|
if ($word1=~ m{^<(.)(.)>$}) {
|
|
print("String literal <$1 and $2>\n") if ($debug);
|
|
return((ord($1) << 9) | ord($2));
|
|
}
|
|
|
|
# This is a <n string literal which might have been followed by a numeric literal
|
|
if ($word1=~ m{^<(.)}) {
|
|
print("String literal <$1 and $word2\n") if ($debug);
|
|
my $msb= $1;
|
|
my $lsb= (defined($word2)) ? parse_expression($word2) : 0;
|
|
return((ord($msb) << 9) | $lsb);
|
|
}
|
|
|
|
# This is a n> string literal preceded by a numeric literal
|
|
if (defined($word2) && ($word2=~ m{(.)>$})) {
|
|
print("String literal $word1 and $1>\n") if ($debug);
|
|
my $msb= parse_expression($word1);
|
|
my $lsb= $1;
|
|
return(($msb << 9) | ord($lsb));
|
|
}
|
|
|
|
# This is a n> string literal not preceded by a numeric literal
|
|
if ($word1=~ m{(.)>$}) {
|
|
print("String literal $1>\n") if ($debug);
|
|
return(ord($1));
|
|
}
|
|
|
|
# This a defined instruction
|
|
if ( defined( $inst{$word1} ) ) {
|
|
#printf( "Found the instruction %s: 0%o\n", $word1, $inst{$word1} );
|
|
my $instruction = $inst{$word1};
|
|
|
|
# Is this an indirect instruction?
|
|
my $indirect = defined($word3) && ( $word3 eq "i" ) ? 020000 : 0;
|
|
|
|
# We have an expression for this instruction
|
|
if ( ( $stage == 2 ) && defined($word2) ) {
|
|
#print(" and I need to parse $word2\n");
|
|
my $val = parse_expression($word2);
|
|
die("Unable to parse $word2 on pass two\n") if ( !defined($val) );
|
|
$instruction |= $val | $indirect;
|
|
}
|
|
return ($instruction);
|
|
}
|
|
|
|
# This is a defined label
|
|
if ( defined( $Label{$word1} ) ) {
|
|
#printf( "Found the label %s: 0%o\n", $word1, $Label{$word1} );
|
|
return ( $Label{$word1} );
|
|
}
|
|
|
|
# This is a defined relative label: digits followed by f or b
|
|
if ( $word1 =~ m{^(\d+)([fb])} ) {
|
|
my $val = find_relative_label( $1, $2 );
|
|
#print("Got location $val for relative label $1$2\n");
|
|
return ($val);
|
|
}
|
|
|
|
# This is an addition. Some operands may not yet be defined
|
|
if ( $word1 =~ m{(.*)\+(.*)} ) {
|
|
my $sum = add( $1, $2 );
|
|
if ( !defined($sum) ) {
|
|
die("Unable to add $word1 on pass two\n") if ( !defined($sum) );
|
|
}
|
|
#print("Did an addition and got $sum\n");
|
|
return ($sum);
|
|
}
|
|
|
|
# This is a subtraction. Some operands may not yet be defined
|
|
if ( $word1 =~ m{(.*)-(.*)} ) {
|
|
my $diff = subtract( $1, $2 );
|
|
if ( !defined($diff) ) {
|
|
die("Unable to subtract $word1 on pass two\n")
|
|
if ( !defined($diff) );
|
|
}
|
|
|
|
#print("Did a subtraction and got $diff\n");
|
|
return ($diff);
|
|
}
|
|
die("I have no idea what $expression is in pass two\n") if ( $stage == 2 );
|
|
}
|
|
|
|
# Add two expression together
|
|
sub add {
|
|
my ( $a, $b ) = @_;
|
|
|
|
my $val1 = parse_expression($a);
|
|
my $val2 = parse_expression($b);
|
|
#print("Adding $val1 + $val2\n");
|
|
return (undef) if ( !defined($val1) || !defined($val2) );
|
|
|
|
#print( " with a good value of ", $val1 + $val2, "\n" );
|
|
return ( $val1 + $val2 );
|
|
}
|
|
|
|
# Subtract two expression
|
|
sub subtract {
|
|
my ( $a, $b ) = @_;
|
|
|
|
my $val1 = parse_expression($a);
|
|
my $val2 = parse_expression($b);
|
|
#print("Subtracting $val1 - $val2\n");
|
|
return (undef) if ( !defined($val1) || !defined($val2) );
|
|
|
|
#print( " with a good value of ", $val1 - $val2, "\n" );
|
|
return ( $val1 - $val2 );
|
|
}
|
|
|
|
# Given a relative label number and a direction,
|
|
# return the location of this relative label or
|
|
# die if we don't have one
|
|
sub find_relative_label {
|
|
my ( $label, $direction ) = @_;
|
|
my $curlocation = $Var{'.'};
|
|
|
|
# Error check: no labels at all
|
|
die("No relative labels\n") if ( !defined( $Rlabel{$label} ) );
|
|
|
|
# Get the list of possible locations for this label
|
|
my $locarray = $Rlabel{$label};
|
|
|
|
# Error check: no locations
|
|
die("No relative labels\n") if ( @{$locarray} == 0 );
|
|
|
|
# Error check: forward but no next location, or backward but no previous
|
|
die("No forward label\n")
|
|
if ( ( $direction eq 'f' ) && ( $curlocation > $locarray->[-1] ) );
|
|
die("No backward label\n")
|
|
if ( ( $direction eq 'b' ) && ( $curlocation < $locarray->[0] ) );
|
|
|
|
# Search forward for a location larger then the current one
|
|
if ( $direction eq 'f' ) {
|
|
foreach my $reflocation ( @{$locarray} ) {
|
|
return ($reflocation) if ( $reflocation > $curlocation );
|
|
}
|
|
}
|
|
else {
|
|
# Search backwards for a location smaller than the current one
|
|
foreach my $reflocation ( sort( { $b <=> $b } @{$locarray} ) ) {
|
|
return ($reflocation) if ( $reflocation < $curlocation );
|
|
}
|
|
}
|
|
}
|