#!/usr/bin/perl # # Read in files of PDP-7 assembly code in Ken Thompon's as format # and convert them into PDP-7 machine code # # (c) 2016 Warren Toomey, GPL3 # use strict; use warnings; use Data::Dumper; ### Global variables ### my %Var; # Variables such as ., .., t my %Label; # Labels that are defined once my %Rlabel; # Relative labels, e.g. 1:, 2: # with an array of locations for each label my @Mem; # Actual PDP-7 memory locations my @Mline; # Source lines associated with mem locations my $origline; # The current input line of code my $stage = 1; # Pass one or pass two my $debug = 0; # Run in debug mode my $errors = 0; # set to non-zero on error my %Undef; # undefined symbols: only complain once ### Main program ### # Optional debug argument if ( ( @ARGV > 0 ) && ( $ARGV[0] eq "-d" ) ) { $debug = 1; shift(@ARGV); } # Check the arguments die("Usage: $0 [-d] file1.s [file2.s ...]\n") if ( @ARGV < 1 ); # Define the syscalls as variables so that # I don't have to hand-code the logic below. # Also, start with the location counter at zero %Var = ( '.' => 0, save => 1, getuid => 2, open => 3, read => 4, write => 5, creat => 6, seek => 7, tell => 8, close => 9, link => 10, unlink => 11, setuid => 12, rename => 13, exit => 14, time => 15, intrp => 16, chdir => 17, chmod => 18, chown => 19, badcal => 20, syslog => 21, capt => 23, rele => 24, status => 25, smes => 27, rmes => 28, fork => 29 ); # Parse all the files foreach my $file (@ARGV) { parse_file($file); } # Now do it all again, pass two $Var{'.'} = 0; $stage = 2; print("Now in stage 2\n") if ($debug); foreach my $file (@ARGV) { parse_file($file); } # Now print out the contents of memory for my $i ( 0 .. $#Mem ) { if ( defined( $Mem[$i] ) ) { printf( "%06o: %06o\t%s\n", $i, $Mem[$i], $Mline[$i] || "" ); } } exit($errors); my $file; # global for error messages my $lineno; sub err { my $msg = shift; $errors = 1; # exit status print STDERR "$file:$lineno: $msg\n"; return 0; } # Open and parse the given file sub parse_file { $file = shift; open( my $IN, "<", $file ) || die("Cannot read $file: $!\n"); $lineno = 0; while ( my $line = <$IN> ) { $lineno++; # Lose the end of line and any leading/trailing whitespace # Discard any comments and preceding comment whitespace chomp($line); $origline = $line; $line =~ s{^\s+}{}; $line =~ s{\s+$}{}; $line =~ s{\s*\".*}{}; #print("=>$line<=\n"); # Split the line into commands that are ; separated # and parse each one foreach my $cmd ( split( /;\s*/, $line ) ) { # Split into a section with possible labels and a # statement section with definitely no labels. The ? # makes the first pattern less greedy. #print("cmd is >$cmd<\n"); $cmd =~ m{(.*?)([^:]*$)}; my $labelsect = $1; my $statement = $2; #print(">$labelsect< >$statement<\n"); # Split $labelsect into labels using the : character my @labellist = split( /:\s*/, $labelsect ); # First pass: parse the labels if ( $stage == 1 ) { foreach my $l (@labellist) { parse_label($l); } } # Parse the statements on both passes parse_statement($statement); } } close($IN); } # Parse a label and set its value to the location counter sub parse_label { my $label = shift; # It's a relative label, save its current value in a list if ( $label =~ m{^\d+$} ) { push( @{ $Rlabel{$label} }, $Var{'.'} ); #printf( "Pushing 0%o for relative label %s\n", $Var{'.'}, $label ); return; } # It's a textual label, check if it's been defined before if ( defined( $Label{$label} ) ) { err("Label $label defined multiple times\n") if ( $stage == 1 ); } # Otherwise, save its value $Label{$label} = $Var{'.'}; #printf( "Set absolute label %s to 0%o\n", $label, $Label{$label} ); } sub parse_statement { my $statement = shift; my $location = $Var{'.'}; #printf( "Location: 0%o\n", $location ); # Empty statement, nothing to do return if ( $statement =~ m{^\s*$} ); # Lose any leading whitespace $statement =~ s{^\s*}{}; # It's an assignment statement: lhs = rhs if ( $statement =~ m{(\S+)\s*=\s*(\S+)} ) { my $lhs = $1; my $rhs = $2; #print("Assignment $statement\n"); # Save the expression's value into the variable my $result = parse_expression($rhs); die("expression $rhs has no value in assignment\n") if ( !defined($result) ); #printf( "Setting variable %s to 0%o\n", $lhs, $result ); $Var{$lhs} = $result; return; } # It's an expression, not a statement # Get its value on pass two and save to memory # Also save the input line that altered memory if ( $stage == 2 ) { my $val = parse_expression($statement); $Mem[$location] = $val & 0777777; $Mline[$location] = $origline; #printf( "Saving 0%o into memory location 0%o\n", $val, $location ); } # Move up to the next location in both passes $Var{'.'}++; } # Parse an expression and return either: # a single value which is a PDP-7 word # a single value which is undef, as this was unrecognised sub parse_expression { my $expression = shift; # If it's a defined variable ( . , .. , etc.) # return the value return ( $Var{$expression} ) if ( defined( $Var{$expression} ) ); # If it's a numeric literal, simply return it if ( $expression =~ m{^-?\d+$} ) { #print("Returning numeric literal $expression\n"); return ( oct($expression) ) if ( $expression =~ m{^0} ); return ($expression); } # List of instruction names and machine code values # These come from https://raw.githubusercontent.com/simh/ # simtools/master/crossassemblers/macro7/macro7.c my %inst = ( 'cal' => 0000000, 'dac' => 0040000, 'jms' => 0100000, 'dzm' => 0140000, 'lac' => 0200000, 'xor' => 0240000, 'add' => 0300000, 'tad' => 0340000, 'xct' => 0400000, 'isz' => 0440000, 'and' => 0500000, 'sad' => 0540000, 'jmp' => 0600000, 'i' => 0020000, 'eae' => 0640000, 'iot' => 0700000, 'sys' => 0700000, 'opr' => 0740000, 'law' => 0760000, 'lam' => 0777777, 'osc' => 0640001, 'lacs' => 0641001, 'omq' => 0640002, 'lacq' => 0641002, 'cmq' => 0640004, 'clq' => 0650000, 'lmq' => 0652000, 'abs' => 0644000, 'gsm' => 0664000, 'mul' => 0653122, 'muls' => 0657122, 'div' => 0640323, 'divs' => 0644323, 'idiv' => 0653323, 'idivs' => 0657323, 'frdiv' => 0650323, 'frdivs' => 0654323, 'norm' => 0640444, 'norms' => 0660444, 'lrs' => 0640500, 'lrss' => 0660500, 'lls' => 0640600, 'llss' => 0660600, 'als' => 0640700, 'alss' => 0660700, 'nop' => 0740000, 'cma' => 0740001, 'cml' => 0740002, 'oas' => 0740004, 'las' => 0750004, 'ral' => 0740010, 'rcl' => 0744010, 'rtl' => 0742010, 'rar' => 0740020, 'rcr' => 0744020, 'rtr' => 0742020, 'hlt' => 0740040, 'xx' => 0740040, 'sma' => 0740100, 'sza' => 0740200, 'snl' => 0740400, 'skp' => 0741000, 'spa' => 0741100, 'sna' => 0741200, 'szl' => 0741400, 'cll' => 0744000, 'stl' => 0744002, 'cla' => 0750000, 'clc' => 0750001, 'glk' => 0750010, 'clsf' => 0700001, 'iof' => 0700002, 'ion' => 0700042, 'iton' => 0700062, 'clof' => 0700004, 'clon' => 0700044, 'tts' => 0703301, 'skp7' => 0703341, 'caf' => 0703302, 'sem' => 0707701, 'eem' => 0707702, 'emir' => 0707742, 'lem' => 0707704, 'rsf' => 0700101, 'rrb' => 0700112, 'rcf' => 0700102, 'rsa' => 0700104, 'rsb' => 0700144, 'psf' => 0700201, 'pcf' => 0700202, 'psa' => 0700204, 'pls' => 0700204, 'psb' => 0700244, 'ksf' => 0700301, 'krb' => 0700312, 'iors' => 0700314, 'tsf' => 0700401, 'tcf' => 0700402, 'tls' => 0700406, 'lpsf' => 0706501, 'lpcb' => 0706502, 'lpb1' => 0706566, 'lpb2' => 0706526, 'lpb3' => 0706546, 'lpse' => 0706601, 'lpcf' => 0706602, 'lppb' => 0706606, 'lpls' => 0706626, 'lpps' => 0706646, 'crsf' => 0706701, 'crrb' => 0706712, 'crsa' => 0706704, 'crsb' => 0706744, 'mmdf' => 0707501, 'mmef' => 0707541, 'mmrd' => 0707512, 'mmwr' => 0707504, 'mmbf' => 0707601, 'mmrs' => 0707612, 'mmlc' => 0707604, 'mmse' => 0707644, ); # Split the expression into two or three words separated by whitespace my ( $word1, $word2, $word3 ) = split( /\s+/, $expression ); #printf( "o>%s%s%s$}) { print("String literal <$1 and $2>\n") if ($debug); return((ord($1) << 9) | ord($2)); } # This is a string literal preceded by a numeric literal if (defined($word2) && ($word2=~ m{(.)>$})) { print("String literal $word1 and $1>\n") if ($debug); my $msb= parse_expression($word1); my $lsb= $1; return(($msb << 9) | ord($lsb)); } # This is a n> string literal not preceded by a numeric literal if ($word1=~ m{(.)>$}) { print("String literal $1>\n") if ($debug); return(ord($1)); } # This a defined instruction if ( defined( $inst{$word1} ) ) { #printf( "Found the instruction %s: 0%o\n", $word1, $inst{$word1} ); my $instruction = $inst{$word1}; # Is this an indirect instruction? my $indirect = defined($word3) && ( $word3 eq "i" ) ? 020000 : 0; # We have an expression for this instruction if ( ( $stage == 2 ) && defined($word2) ) { #print(" and I need to parse $word2\n"); my $val = parse_expression($word2); die("Unable to parse $word2 on pass two\n") if ( !defined($val) ); $instruction |= $val | $indirect; } return ($instruction); } # This is a defined label if ( defined( $Label{$word1} ) ) { #printf( "Found the label %s: 0%o\n", $word1, $Label{$word1} ); return ( $Label{$word1} ); } # This is a defined relative label: digits followed by f or b if ( $word1 =~ m{^(\d+)([fb])} ) { my $val = find_relative_label( $1, $2 ); #print("Got location $val for relative label $1$2\n"); return ($val); } # This is an addition. Some operands may not yet be defined if ( $word1 =~ m{(.*)\+(.*)} ) { my $sum = add( $1, $2 ); if ( !defined($sum) ) { die("Unable to add $word1 on pass two\n") if ( !defined($sum) ); } #print("Did an addition and got $sum\n"); return ($sum); } # This is a subtraction. Some operands may not yet be defined if ( $word1 =~ m{(.*)-(.*)} ) { my $diff = subtract( $1, $2 ); if ( !defined($diff) ) { die("Unable to subtract $word1 on pass two\n") if ( !defined($diff) ); } #print("Did a subtraction and got $diff\n"); return ($diff); } if ( $stage == 2 ) { err("undefined: $expression") unless (defined $Undef{$expression}); $Undef{$expression} = 1; } return 0; } # Add two expression together sub add { my ( $a, $b ) = @_; my $val1 = parse_expression($a); my $val2 = parse_expression($b); #print("Adding $val1 + $val2\n"); return (undef) if ( !defined($val1) || !defined($val2) ); #print( " with a good value of ", $val1 + $val2, "\n" ); return ( $val1 + $val2 ); } # Subtract two expression sub subtract { my ( $a, $b ) = @_; my $val1 = parse_expression($a); my $val2 = parse_expression($b); #print("Subtracting $val1 - $val2\n"); return (undef) if ( !defined($val1) || !defined($val2) ); #print( " with a good value of ", $val1 - $val2, "\n" ); return ( $val1 - $val2 ); } # Given a relative label number and a direction, # return the location of this relative label or # die if we don't have one sub find_relative_label { my ( $label, $direction ) = @_; my $curlocation = $Var{'.'}; # Error check: no labels at all if ( !defined( $Rlabel{$label} ) ) { return err("relative label $label not defined\n"); } # Get the list of possible locations for this label my $locarray = $Rlabel{$label}; # Error check: no locations return err("No relative labels") if ( @{$locarray} == 0 ); # Error check: forward but no next location, or backward but no previous return err("No forward label $label") if ( ( $direction eq 'f' ) && ( $curlocation > $locarray->[-1] ) ); return err("No backward label $label") if ( ( $direction eq 'b' ) && ( $curlocation < $locarray->[0] ) ); # Search forward for a location larger then the current one if ( $direction eq 'f' ) { foreach my $reflocation ( @{$locarray} ) { return ($reflocation) if ( $reflocation > $curlocation ); } } else { # Search backwards for a location smaller than the current one foreach my $reflocation ( sort( { $b <=> $b } @{$locarray} ) ) { return ($reflocation) if ( $reflocation < $curlocation ); } } }