From ce0595ef44ceb9efb50dfc3bb76d0b04528d5f5c Mon Sep 17 00:00:00 2001 From: Mikael Pettersson Date: Sun, 11 Aug 2019 21:50:14 +0200 Subject: [PATCH] as: rewrite in Erlang, initial version --- erlang/Makefile | 2 +- erlang/apps/as/priv/test1.s | 31 ++ erlang/apps/as/src/as.app.src | 25 ++ erlang/apps/as/src/as.erl | 75 ++++ erlang/apps/as/src/assemble.erl | 102 ++++++ erlang/apps/as/src/input.erl | 243 +++++++++++++ erlang/apps/as/src/output.erl | 560 ++++++++++++++++++++++++++++++ erlang/apps/as/src/parse.erl | 312 +++++++++++++++++ erlang/apps/as/src/scan.erl | 232 +++++++++++++ erlang/apps/as/src/scan_state.erl | 171 +++++++++ erlang/apps/as/src/token.erl | 62 ++++ erlang/apps/as/src/token.hrl | 76 ++++ erlang/apps/as/src/tunit.erl | 50 +++ erlang/apps/as/src/tunit.hrl | 116 +++++++ erlang/rebar.config | 1 + 15 files changed, 2057 insertions(+), 1 deletion(-) create mode 100644 erlang/apps/as/priv/test1.s create mode 100644 erlang/apps/as/src/as.app.src create mode 100644 erlang/apps/as/src/as.erl create mode 100644 erlang/apps/as/src/assemble.erl create mode 100644 erlang/apps/as/src/input.erl create mode 100644 erlang/apps/as/src/output.erl create mode 100644 erlang/apps/as/src/parse.erl create mode 100644 erlang/apps/as/src/scan.erl create mode 100644 erlang/apps/as/src/scan_state.erl create mode 100644 erlang/apps/as/src/token.erl create mode 100644 erlang/apps/as/src/token.hrl create mode 100644 erlang/apps/as/src/tunit.erl create mode 100644 erlang/apps/as/src/tunit.hrl diff --git a/erlang/Makefile b/erlang/Makefile index 48579e9..c4dfab2 100644 --- a/erlang/Makefile +++ b/erlang/Makefile @@ -21,7 +21,7 @@ REBAR3=$(shell type -p rebar3 || echo ./rebar3) REBAR3_GIT=https://github.com/erlang/rebar3.git REBAR3_VSN=3.7.5 -PROGRAMS=8to9 ar nm od +PROGRAMS=8to9 ar as nm od default: compile link diff --git a/erlang/apps/as/priv/test1.s b/erlang/apps/as/priv/test1.s new file mode 100644 index 0000000..457ad39 --- /dev/null +++ b/erlang/apps/as/priv/test1.s @@ -0,0 +1,31 @@ +/* + * test1.s + * Copyright (C) 2013-2019 Mikael Pettersson + * + * This file is part of pdp10-tools. + * + * pdp10-tools is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * pdp10-tools is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with pdp10-tools. If not, see . + */ + +/* int foo(void) { return 27; } + */ + .file "test1.c" + .text + .globl foo + .type foo,@function +foo: + movei 1,033 + popj 017, + .size foo,.-foo + .ident "GCC: (GNU) 4.3.0.- for XKL-2 (XKL LLC, Kirkland, WA, USA) Built 2013-08-15 23:03 +0200 on porter by mikpe" diff --git a/erlang/apps/as/src/as.app.src b/erlang/apps/as/src/as.app.src new file mode 100644 index 0000000..2f4de0b --- /dev/null +++ b/erlang/apps/as/src/as.app.src @@ -0,0 +1,25 @@ +%%% Copyright (C) 2019 Mikael Pettersson +%%% +%%% This file is part of pdp10-tools. +%%% +%%% pdp10-tools is free software: you can redistribute it and/or modify +%%% it under the terms of the GNU General Public License as published by +%%% the Free Software Foundation, either version 3 of the License, or +%%% (at your option) any later version. +%%% +%%% pdp10-tools is distributed in the hope that it will be useful, +%%% but WITHOUT ANY WARRANTY; without even the implied warranty of +%%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +%%% GNU General Public License for more details. +%%% +%%% You should have received a copy of the GNU General Public License +%%% along with pdp10-tools. If not, see . + +{application, as, + [{description, "'as' clone for pdp10-elf"}, + {vsn, "0.1.0"}, + {registered, []}, + {applications, [kernel, stdlib, lib]}, + {env, []}, + {modules, []} + ]}. diff --git a/erlang/apps/as/src/as.erl b/erlang/apps/as/src/as.erl new file mode 100644 index 0000000..88b3742 --- /dev/null +++ b/erlang/apps/as/src/as.erl @@ -0,0 +1,75 @@ +%%% -*- erlang-indent-level: 2 -*- +%%% +%%% 'as' clone for pdp10-elf +%%% Copyright (C) 2013-2019 Mikael Pettersson +%%% +%%% This file is part of pdp10-tools. +%%% +%%% pdp10-tools is free software: you can redistribute it and/or modify +%%% it under the terms of the GNU General Public License as published by +%%% the Free Software Foundation, either version 3 of the License, or +%%% (at your option) any later version. +%%% +%%% pdp10-tools is distributed in the hope that it will be useful, +%%% but WITHOUT ANY WARRANTY; without even the implied warranty of +%%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +%%% GNU General Public License for more details. +%%% +%%% You should have received a copy of the GNU General Public License +%%% along with pdp10-tools. If not, see . + +-module(as). +-export([main/1]). + +%% Command-line interface ====================================================== + +main(Argv) -> + escript_runtime:start(fun main_/1, Argv). + +main_(Argv) -> + case getopt:parse(Argv, "vo:", + [ {"version", no, version} + ]) of + {ok, {Options, Files}} -> + OutFile = scan_options(Options), + case as(Files, OutFile) of + ok -> halt(0); + {error, _Reason} = Error -> escript_runtime:fatal("~p\n", [Error]) + end; + {error, ErrMsg} -> + escript_runtime:errmsg("~s\n", [ErrMsg]), + usage() + end. + +scan_options(Options) -> + lists:foldl(fun scan_option/2, "a.out", Options). + +scan_option($v, OutFile) -> + version(), + OutFile; +scan_option(version, _OutFile) -> + version(), + halt(0); +scan_option({$o, OutFile}, _OutFile) -> + OutFile. + +usage() -> + escript_runtime:fmterr( + "Usage: ~s [-v] [-o objfile] [files..]\n", + [escript_runtime:progname()]), + halt(1). + +version() -> + io:format(standard_io, "pdp10-tools as version 0.2\n", []). + +%% As ========================================================================== + +as(Files, OutFile) -> + case input:files(Files) of + {ok, Tunit0} -> + case assemble:tunit(Tunit0) of + {ok, Tunit} -> output:tunit(Tunit, OutFile); + {error, _Reason} = Error -> Error + end; + {error, _Reason} = Error -> Error + end. diff --git a/erlang/apps/as/src/assemble.erl b/erlang/apps/as/src/assemble.erl new file mode 100644 index 0000000..d7f2bb9 --- /dev/null +++ b/erlang/apps/as/src/assemble.erl @@ -0,0 +1,102 @@ +%%% -*- erlang-indent-level: 2 -*- +%%% +%%% sections assembler for pdp10-elf as +%%% Copyright (C) 2013-2019 Mikael Pettersson +%%% +%%% This file is part of pdp10-tools. +%%% +%%% pdp10-tools is free software: you can redistribute it and/or modify +%%% it under the terms of the GNU General Public License as published by +%%% the Free Software Foundation, either version 3 of the License, or +%%% (at your option) any later version. +%%% +%%% pdp10-tools is distributed in the hope that it will be useful, +%%% but WITHOUT ANY WARRANTY; without even the implied warranty of +%%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +%%% GNU General Public License for more details. +%%% +%%% You should have received a copy of the GNU General Public License +%%% along with pdp10-tools. If not, see . + +-module(assemble). + +-export([ tunit/1 + ]). + +-include("tunit.hrl"). +-include_lib("lib/include/pdp10_elf36.hrl"). + +tunit(Tunit) -> + sections(maps:values(Tunit#tunit.sections), Tunit). + +sections([], Tunit) -> {ok, Tunit}; +sections([Section | Sections], Tunit) -> + case section(Section, Tunit) of + {ok, NewTunit} -> sections(Sections, NewTunit); + {error, _Reason} = Error -> Error + end. + +section(Section, Tunit) -> + case Section of + #section{ name = ".text" ++ _ + , sh_type = ?SHT_PROGBITS + , sh_flags = ?SHF_ALLOC bor ?SHF_EXECINSTR + } -> text(Section, Tunit); + #section{ name = ".comment" + , sh_type = ?SHT_PROGBITS + , sh_flags = ?SHF_MERGE bor ?SHF_STRINGS + } -> comment(Section, Tunit); + #section{ name = Name } -> + {error, io_lib:format("don't know how to assemble section ~s", [Name])} + end. + +%% Assemble .comment ----------------------------------------------------------- +%% +%% The image starts with a NUL, followed by the strings, all NUL-terminated. + +comment(Section = #section{data = {stmts, Stmts}}, Tunit) -> + Image = comment_image(Stmts), + NewSection = Section#section{data = {image, Image}, dot = image_size(Image)}, + {ok, tunit:put_section(Tunit, NewSection)}. + +comment_image(Stmts) -> comment_image(Stmts, []). + +comment_image([], []) -> []; +comment_image([], Acc) -> lists:reverse([0 | Acc]); +comment_image([#s_dot_ident{string = String} | Stmts], Acc) -> + comment_image(Stmts, [[0 | String] | Acc]). + +%% FIXME: duplicated +image_size(Image) -> image_size(Image, 0). + +image_size([H | T], Acc) -> image_size(T, image_size(H, Acc)); +image_size([], Acc) -> Acc; +image_size(TByte, Acc) when is_integer(TByte), 0 =< TByte, TByte =< 511 -> Acc + 1. + +%% Assemble .text -------------------------------------------------------------- + +text(Section = #section{data = {stmts, Stmts}}, Tunit) -> + Image = text_image(Stmts), + {ok, tunit:put_section(Tunit, Section#section{data = {image, Image}})}. + +text_image(Stmts) -> text_image(Stmts, []). + +text_image([], Acc) -> Acc; % the input Stmts were in reverse order +text_image([Stmt | Stmts], Acc) -> + text_image(Stmts, [insn_image(Stmt) | Acc]). + +insn_image(Insn) -> + #s_insn{ high13 = High13 + , at = At + , address = Address + , index = Index + } = Insn, + Word = (((High13 band ((1 bsl 13) - 1)) bsl (36 - 13)) bor + ((case At of true -> 1; false -> 0 end) bsl (36 - 14)) bor + ((Index band ((1 bsl 4) - 1)) bsl (36 - 18)) bor + (Address band ((1 bsl 18) - 1))), + %% big-endian conversion + [(Word bsr 27) band 511, + (Word bsr 18) band 511, + (Word bsr 9) band 511, + Word band 511]. diff --git a/erlang/apps/as/src/input.erl b/erlang/apps/as/src/input.erl new file mode 100644 index 0000000..a3ba055 --- /dev/null +++ b/erlang/apps/as/src/input.erl @@ -0,0 +1,243 @@ +%%% -*- erlang-indent-level: 2 -*- +%%% +%%% input processing phase for pdp10-elf as +%%% Copyright (C) 2013-2019 Mikael Pettersson +%%% +%%% This file is part of pdp10-tools. +%%% +%%% pdp10-tools is free software: you can redistribute it and/or modify +%%% it under the terms of the GNU General Public License as published by +%%% the Free Software Foundation, either version 3 of the License, or +%%% (at your option) any later version. +%%% +%%% pdp10-tools is distributed in the hope that it will be useful, +%%% but WITHOUT ANY WARRANTY; without even the implied warranty of +%%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +%%% GNU General Public License for more details. +%%% +%%% You should have received a copy of the GNU General Public License +%%% along with pdp10-tools. If not, see . + +-module(input). + +-export([ files/1 + ]). + +-include("tunit.hrl"). +-include_lib("lib/include/pdp10_elf36.hrl"). + +files(Files) -> + NewFiles = + case Files of + [] -> ["--"]; + _ -> Files + end, + files(NewFiles, tunit_init()). + +files([], Tunit) -> {ok, Tunit}; +files([File | Files], Tunit) -> + case file(File, Tunit) of + {ok, NewTunit} -> files(Files, NewTunit); + {error, _Reason} = Error -> Error + end. + +file(File, Tunit) -> + case scan_state_open(File) of + {ok, ScanState} -> + try process(ScanState, Tunit) + after scan_state:fclose(ScanState) + end; + {error, _Reason} = Error -> Error + end. + +%% Open next input file, support "--" and "-" as aliases for stdin. +scan_state_open(File) -> + case File of + "--" -> scan_state:stdin(); + "-" -> scan_state:stdin(); + _ -> scan_state:fopen(File) + end. + +process(ScanState, Tunit) -> + case parse:stmt(ScanState) of + eof -> {ok, Tunit}; + {ok, Stmt} -> + case interpret(ScanState, Tunit, Stmt) of + {ok, NewTunit} -> process(ScanState, NewTunit); + {error, _Reason} = Error -> Error + end; + {error, _Reason} = Error -> Error + end. + +interpret(ScanState, Tunit, Stmt) -> + case Stmt of + #s_dot_file{} -> dot_file(ScanState, Tunit, Stmt); + #s_dot_globl{} -> dot_globl(ScanState, Tunit, Stmt); + #s_dot_ident{} -> dot_ident(ScanState, Tunit, Stmt); + #s_dot_size{} -> dot_size(ScanState, Tunit, Stmt); + #s_dot_text{} -> dot_text(ScanState, Tunit, Stmt); + #s_dot_type{} -> dot_type(ScanState, Tunit, Stmt); + #s_label{} -> label(ScanState, Tunit, Stmt); + #s_insn{} -> insn(ScanState, Tunit, Stmt) + end. + +dot_file(_ScanState, Tunit, #s_dot_file{string = String}) -> + Symbol = #symbol{ name = String + , section = false + , st_value = 0 + , st_size = 0 + , st_info = ?ELF_ST_INFO(?STB_LOCAL, ?STT_FILE) + , st_name = 0 + , st_shndx = 0 + }, + {ok, tunit:put_symbol(Tunit, Symbol)}. + +dot_globl(ScanState, Tunit, #s_dot_globl{name = Name}) -> + case tunit:get_symbol(Tunit, Name) of + false -> + Symbol = + #symbol{ name = Name + , section = false + , st_value = false + , st_size = false + , st_info = ?ELF_ST_INFO(?STB_GLOBAL, ?STT_NOTYPE) + , st_name = 0 + , st_shndx = 0 + }, + {ok, tunit:put_symbol(Tunit, Symbol)}; + #symbol{st_info = StInfo} = OldSymbol -> + case ?ELF_ST_BIND(StInfo) of + ?STB_GLOBAL -> {ok, Tunit}; + ?STB_LOCAL -> % FIXME: assumed local-by-default, are there hard-local symbols? + Symbol = OldSymbol#symbol{st_info = ?ELF_ST_INFO(?STB_GLOBAL, ?ELF_ST_TYPE(StInfo))}, + {ok, tunit:put_symbol(Tunit, Symbol)}; + Bind -> + fmterr(ScanState, "symbol ~s has previous incompatible binding type ~p", [Name, Bind]) + end + end. + +dot_ident(_ScanState, Tunit, #s_dot_ident{} = Stmt) -> + #section{data = {stmts, Stmts}} = OldSection = + case tunit:get_section(Tunit, ".comment") of + false -> section_dot_comment(); + Section -> Section + end, + NewSection = OldSection#section{data = {stmts, [Stmt | Stmts]}}, + {ok, tunit:put_section(Tunit, NewSection)}. + +dot_size(ScanState, Tunit, #s_dot_size{name = Name}) -> + #tunit{cursect = Cursect} = Tunit, + #section{dot = Dot} = tunit:get_section(Tunit, Cursect), + case tunit:get_symbol(Tunit, Name) of + #symbol{st_size = StSize} when StSize =/= false -> + fmterr(ScanState, "size of symbol ~s already defined", [Name]); + #symbol{section = Section} when Section =/= Cursect -> + fmterr(ScanState, "symbol ~s not defined in same section as dot", [Name]); + #symbol{st_value = StValue} = OldSymbol when StValue =< Dot -> % note: false > integer() + Symbol = OldSymbol#symbol{st_size = Dot - StValue}, + {ok, tunit:put_symbol(Tunit, Symbol)}; + #symbol{st_value = StValue} when StValue =/= false, StValue > Dot -> + fmterr(ScanState, "cannot make symbol ~s negative size", [Name]); + _ -> + fmterr(ScanState, "symbol ~s not defined", [Name]) + end. + +dot_text(_ScanState, Tunit, #s_dot_text{}) -> + %% just check that .text has been pre-created + #section{} = tunit:get_section(Tunit, ".text"), + {ok, Tunit#tunit{cursect = ".text"}}. + +dot_type(ScanState, Tunit, #s_dot_type{name = Name}) -> + case tunit:get_symbol(Tunit, Name) of + false -> + Symbol = + #symbol{ name = Name + , section = false + , st_value = false + , st_size = false + , st_info = ?ELF_ST_INFO(?STB_LOCAL, ?STT_FUNC) + , st_name = 0 + , st_shndx = 0 + }, + {ok, tunit:put_symbol(Tunit, Symbol)}; + #symbol{st_info = StInfo} = OldSymbol -> + case ?ELF_ST_TYPE(StInfo) of + ?STT_FUNC -> {ok, Tunit}; + ?STT_NOTYPE -> + Symbol = OldSymbol#symbol{st_info = ?ELF_ST_INFO(?ELF_ST_BIND(StInfo), ?STT_FUNC)}, + {ok, tunit:put_symbol(Tunit, Symbol)}; + Type -> + fmterr(ScanState, "symbol ~s has previous incompatible type ~p", [Name, Type]) + end + end. + +label(ScanState, Tunit, #s_label{name = Name}) -> + case tunit:get_symbol(Tunit, Name) of + #symbol{section = false, st_value = false} = Symbol -> label2(Tunit, Symbol); + #symbol{} -> fmterr(ScanState, "label ~s already defined", [Name]); + false -> label2(Tunit, #symbol{name = Name, st_size = false, st_info = 0}) + end. + +label2(Tunit, Symbol) -> + #tunit{cursect = Cursect} = Tunit, + #section{dot = Dot} = tunit:get_section(Tunit, Cursect), + {ok, tunit:put_symbol(Tunit, Symbol#symbol{section = Cursect, st_value = Dot})}. + +insn(ScanState, Tunit, #s_insn{} = Stmt) -> + #tunit{cursect = Cursect} = Tunit, + #section{data = {stmts, Stmts}, dot = Dot} = Section = tunit:get_section(Tunit, Cursect), + case Dot rem 4 of % FIXME: target-specific + 0 -> + NewSection = + Section#section{ data = {stmts, [Stmt | Stmts]} + , dot = Dot + 4 % FIXME: target-specific + }, + {ok, tunit:put_section(Tunit, NewSection)}; + _ -> fmterr(ScanState, "misaligned address for instruction", []) + end. + +%% Initialization -------------------------------------------------------------- + +tunit_init() -> + SectionText = section_dot_text(), + Tunit = tunit:put_section(tunit:new(), SectionText), + Tunit#tunit{cursect = SectionText#section.name}. + +%% Predefined Sections --------------------------------------------------------- + +section_dot_comment() -> % ".comment" + #section{ name = ".comment" + , data = {stmts, []} + , dot = false % do not allow dot or labels here + , shndx = 0 + , sh_name = 0 + , sh_type = ?SHT_PROGBITS + , sh_offset = 0 + , sh_flags = ?SHF_MERGE bor ?SHF_STRINGS + , sh_link = 0 + , sh_addralign = 1 + , sh_entsize = 1 + }. + +section_dot_text() -> % ".text" + #section{ name = ".text" + , data = {stmts, []} + , dot = 0 + , shndx = 0 + , sh_name = 0 + , sh_type = ?SHT_PROGBITS + , sh_offset = 0 + , sh_flags = ?SHF_ALLOC bor ?SHF_EXECINSTR + , sh_link = 0 + , sh_addralign = 4 % FIXME: target-specific + , sh_entsize = 0 + }. + +%% Error reporting ------------------------------------------------------------- + +%% FIXME: this is duplicated a few times, move it to scan_state.erl +fmterr(ScanState, Fmt, Args) -> + {ok, FileName} = scan_state:filename(ScanState), + {ok, LineNr} = scan_state:linenr(ScanState), + {error, lists:flatten(io_lib:format("file ~s line ~p: " ++ Fmt, + [FileName, LineNr | Args]))}. diff --git a/erlang/apps/as/src/output.erl b/erlang/apps/as/src/output.erl new file mode 100644 index 0000000..3279914 --- /dev/null +++ b/erlang/apps/as/src/output.erl @@ -0,0 +1,560 @@ +%%% -*- erlang-indent-level: 2 -*- +%%% +%%% ELF output for pdp10-elf as +%%% Copyright (C) 2013-2019 Mikael Pettersson +%%% +%%% This file is part of pdp10-tools. +%%% +%%% pdp10-tools is free software: you can redistribute it and/or modify +%%% it under the terms of the GNU General Public License as published by +%%% the Free Software Foundation, either version 3 of the License, or +%%% (at your option) any later version. +%%% +%%% pdp10-tools is distributed in the hope that it will be useful, +%%% but WITHOUT ANY WARRANTY; without even the implied warranty of +%%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +%%% GNU General Public License for more details. +%%% +%%% You should have received a copy of the GNU General Public License +%%% along with pdp10-tools. If not, see . +%%% +%%%----------------------------------------------------------------------------- +%%% +%%% Output file layout: +%%% +%%% ELF header +%%% +%%% <.strtab, if any symbols> +%%% <.symtab, if any symbols> +%%% <.shstrtab, if any sections> +%%%
+%%% +%%% Processing steps: +%%% +%%% initialize context +%%% for each section: +%%% - add name to .shstrtab, assign sh_name +%%% - assign sh_offset and st_shndx +%%% - update context +%%% for each symbol: +%%% - add name to .strtab, assign st_name +%%% - assign st_shndx +%%% append .strtab to list of sections +%%% append .symtab to list of sections +%%% append .shstrtab to list of sections + +-module(output). + +-export([ tunit/2 + ]). + +-include("tunit.hrl"). +-include_lib("lib/include/pdp10_elf36.hrl"). + +-record(strtab, + { map :: #{string() => non_neg_integer()} + , dot :: pos_integer() + }). + +-record(context, + { tunit :: #tunit{} + , shnum :: pos_integer() + , offset :: pos_integer() + , shstrtab :: #strtab{} + , strtab :: #strtab{} + }). + +tunit(Tunit, File) -> + emit(layout(Tunit), File). + +%% LAYOUT ====================================================================== + +layout(Tunit) -> + lists:foldl( + fun(Fun, Context) -> Fun(Context) end, + context_new(Tunit), + [ fun process_sections/1 + , fun process_symbols/1 + , fun create_strtab/1 + , fun create_symtab/1 + , fun create_shstrtab/1 + , fun align_shtab/1 + ]). + +context_new(Tunit) -> + #context{ tunit = Tunit + , shnum = 1 + , offset = ?ELF36_EHDR_SIZEOF + , shstrtab = strtab_new() + , strtab = strtab_new() + }. + +%% Sections -------------------------------------------------------------------- + +process_sections(Context) -> + #context{tunit = #tunit{sections = Sections}} = Context, + lists:foldl(fun process_section/2, Context, maps:values(Sections)). + +process_section(Section, Context) -> + append_section(Context, Section). + +append_section(Context, Section) -> + #section{ dot = Dot + , name = Name + , sh_addralign = ShAddrAlign + } = Section, + case Dot of + 0 -> Context; + _ -> + #context{ tunit = Tunit + , shnum = ShNum + , offset = Offset + , shstrtab = ShStrTab + } = Context, + {ShName, NewShStrTab} = strtab_enter(ShStrTab, Name), + ShOffset = (Offset + ShAddrAlign - 1) band bnot (ShAddrAlign - 1), + NewSection = + Section#section{sh_name = ShName, sh_offset = ShOffset, shndx = ShNum}, + NewTunit = tunit:put_section(Tunit, NewSection), + Context#context{ tunit = NewTunit + , shnum = ShNum + 1 + , offset = ShOffset + Dot + , shstrtab = NewShStrTab + } + end. + +%% Symbols --------------------------------------------------------------------- + +process_symbols(Context) -> + #context{tunit = #tunit{symbols = Symbols}} = Context, + lists:foldl(fun process_symbol/2, Context, maps:values(Symbols)). + +process_symbol(Symbol, Context) -> + #symbol{name = Name, section = Section} = Symbol, + #context{tunit = Tunit, strtab = StrTab} = Context, + {StName, NewStrTab} = strtab_enter(StrTab, Name), + StShndx = + case tunit:get_section(Tunit, Section) of + false -> ?SHN_ABS; + #section{shndx = Shndx} -> Shndx % assigned in append_section/2 above + end, + NewSymbol = Symbol#symbol{st_name = StName, st_shndx = StShndx}, + NewTunit = tunit:put_symbol(Tunit, NewSymbol), + Context#context{tunit = NewTunit, strtab = NewStrTab}. + +%% Symbol string table (.strtab) ----------------------------------------------- + +create_strtab(Context) -> + case maps:size(Context#context.tunit#tunit.symbols) of + 0 -> Context; + _ -> + StrTab = Context#context.strtab, + Image = strtab_image(StrTab), + Section = + #section{ name = ".strtab" + , data = {image, Image} + , dot = image_size(Image) + , sh_type = ?SHT_STRTAB + , sh_flags = ?SHF_MERGE bor ?SHF_STRINGS % FIXME: check + , sh_link = 0 + , sh_addralign = 1 % FIXME: check + , sh_entsize = 1 % FIXME: check + }, + append_section(Context, Section) + end. + +%% Symbol table (.symtab) ------------------------------------------------------ + +create_symtab(Context) -> + #context{tunit = Tunit} = Context, + #tunit{symbols = Symbols} = Tunit, + case maps:size(Symbols) of + 0 -> Context; + NrSyms -> + #section{shndx = StrTabShndx} = tunit:get_section(Tunit, ".strtab"), + Image = symbols_image(Symbols), + Size = (NrSyms + 1) * ?ELF36_SYM_SIZEOF, + Size = image_size(Image), % consistency check + Section = + #section{ name = ".symtab" + , data = {image, Image} + , dot = Size + , sh_type = ?SHT_SYMTAB + , sh_flags = 0 + , sh_link = StrTabShndx + , sh_addralign = 4 % FIXME: check + , sh_entsize = ?ELF36_SYM_SIZEOF + }, + append_section(Context, Section) + end. + +symbols_image(Symbols) -> + ElfSym0 = + #elf36_Sym{ st_name = 0 + , st_value = 0 + , st_size = 0 + , st_info = ?ELF36_ST_INFO(?STB_LOCAL, ?STT_NOTYPE) + , st_other = 0 + , st_shndx = ?SHN_UNDEF + }, + [elf36_Sym_image(ElfSym0) | + lists:map(fun symbol_image/1, maps:values(Symbols))]. + +symbol_image(Symbol) -> + #symbol{ st_value = StValue + , st_size = StSize + , st_info = StInfo + , st_name = StName + , st_shndx = StShndx + } = Symbol, + ElfSym = + #elf36_Sym{ st_name = StName + , st_value = StValue + , st_size = StSize + , st_info = StInfo + , st_other = ?STV_DEFAULT % FIXME: should be set earlier + , st_shndx = StShndx + }, + elf36_Sym_image(ElfSym). + +%% FIXME: the code below belongs in a library + +elf36_Sym_image(ElfSym) -> + #elf36_Sym{ st_name = StName + , st_value = StValue + , st_size = StSize + , st_info = StInfo + , st_other = StOther + , st_shndx = StShndx + } = ElfSym, + [ elf36_Word_image(StName) + , elf36_Addr_image(StValue) + , elf36_Word_image(if StSize =:= false -> 0; true -> StSize end) + , elf36_Uchar_image(StInfo) + , elf36_Uchar_image(StOther) + , elf36_Half_image(StShndx) + ]. + +elf36_Addr_image(Addr) -> uint36_image(Addr). +elf36_Half_image(Half) -> uint18_image(Half). +elf36_Off_image(Off) -> uint36_image(Off). +elf36_Word_image(Word) -> uint36_image(Word). +elf36_Uchar_image(Uchar) -> uint9_image(Uchar). + +uint9_image(Uint9) -> + Uint9 band 511. + +uint18_image(Uint18) -> + [(Uint18 bsr 9) band 511, Uint18 band 511]. + +uint36_image(Uint36) -> + [(Uint36 bsr 27) band 511, + (Uint36 bsr 18) band 511, + (Uint36 bsr 9) band 511, + Uint36 band 511]. + +image_size(Image) -> image_size(Image, 0). + +image_size([H | T], Acc) -> image_size(T, image_size(H, Acc)); +image_size([], Acc) -> Acc; +image_size(TByte, Acc) when is_integer(TByte), 0 =< TByte, TByte =< 511 -> Acc + 1. + +%% Section Header String Table (.shstrtab) ------------------------------------- + +create_shstrtab(Context) -> + case Context#context.shnum of + 1 -> Context; + _ -> + %% Note that append_section/1 enters the section's name to shstrtab, + %% updating its contents if the name wasn't already there, which would + %% invalidate the image recorded in the section. To avoid that, enter + %% the name first. + OldShStrTab = Context#context.shstrtab, + {_ShName, NewShStrTab} = strtab_enter(OldShStrTab, ".shstrtab"), + Image = strtab_image(NewShStrTab), + Section = + #section{ name = ".shstrtab" + , data = {image, Image} + , dot = image_size(Image) + , sh_type = ?SHT_STRTAB + , sh_flags = ?SHF_MERGE bor ?SHF_STRINGS % FIXME: check + , sh_link = 0 + , sh_addralign = 1 % FIXME: check + , sh_entsize = 1 % FIXME: check + }, + append_section(Context#context{shstrtab = NewShStrTab}, Section) + end. + +%% Align Section Header Table -------------------------------------------------- + +align_shtab(Context) -> + case Context#context.shnum of + 1 -> + Context#context{shnum = 0, offset = 0}; + _ -> + Offset = Context#context.offset, + ShTabOffset = (Offset + (4 - 1)) band bnot (4 - 1), + Context#context{offset = ShTabOffset} + end. + +%% String Tables --------------------------------------------------------------- +%% FIXME: duplicates code for .ident directive / .comment section + +strtab_new() -> + #strtab{map = maps:new(), dot = 1}. % 1 due to NUL before 1st string + +strtab_enter(StrTab = #strtab{map = Map, dot = Dot}, String) -> + case maps:get(String, Map, false) of + false -> {Dot, StrTab#strtab{map = maps:put(String, Dot, Map), + dot = Dot + length(String) + 1}}; % +1 for terminating NUL + Offset -> {Offset, StrTab} + end. + +strtab_image(#strtab{map = Map}) -> + KVs = maps:to_list(Map), + VKs = lists:map(fun({K, V}) -> {V, K} end, KVs), + SortedVKs = lists:sort(VKs), + [0 | lists:map(fun({_V, K}) -> K ++ [0] end, SortedVKs)]. + +%% EMIT ======================================================================== + +emit(Context, File) -> + case pdp10_stdio:fopen(File, [raw, write, delayed_write]) of + {ok, FP} -> + try + Funs = + [ fun emit_elf_header/3 + , fun emit_sections/3 + , fun emit_shtab/3 + ], + emit(Funs, Context, FP, 0) + after pdp10_stdio:fclose(FP) + end; + {error, Reason} -> {error, io_lib:format("opening ~s: ~p", [File, Reason])} + end. + +emit([], _Context, _FP, _Offset) -> ok; +emit([Fun | Funs], Context, FP, Offset) -> + case Fun(Context, FP, Offset) of + {ok, NewOffset} -> emit(Funs, Context, FP, NewOffset); + {error, _Reason} = Error -> Error + end. + +emit_elf_header(Context, FP, Offset = 0) -> + ShStrTabShndx = + case tunit:get_section(Context#context.tunit, ".shstrtab") of + #section{shndx = Shndx} -> Shndx; + false -> 0 + end, + ElfHdr = + #elf36_Ehdr{ e_ident = e_ident() + , e_type = ?ET_REL + , e_machine = ?EM_PDP10 % FIXME: target-specific + , e_version = ?EV_CURRENT + , e_entry = 0 + , e_phoff = 0 + , e_shoff = Context#context.offset + , e_flags = 0 + , e_ehsize = ?ELF36_EHDR_SIZEOF + , e_phentsize = 0 + , e_phnum = 0 + , e_shentsize = ?ELF36_SHDR_SIZEOF + , e_shnum = Context#context.shnum + , e_shstrndx = ShStrTabShndx + }, + emit_image(elf36_Ehdr_image(ElfHdr), ?ELF36_EHDR_SIZEOF, FP, Offset). + +emit_sections(Context, FP, Offset = ?ELF36_EHDR_SIZEOF) -> + #context{tunit = #tunit{sections = SectionsMap}} = Context, + Sections = lists:sort(fun order_by_sh_offset/2, maps:values(SectionsMap)), + emit_sections2(Sections, FP, Offset). + +emit_sections2([], _FP, Offset) -> {ok, Offset}; +emit_sections2([Section | Sections], FP, Offset) -> + case emit_section(Section, FP, Offset) of + {ok, NewOffset} -> emit_sections2(Sections, FP, NewOffset); + {error, _Reason} = Error -> Error + end. + +emit_section(Section, FP, Offset) -> + case Section#section.dot of + 0 -> {ok, Offset}; + Dot -> + ShOffset = Section#section.sh_offset, + NrPadBytes = ShOffset - Offset, + case emit_padding(NrPadBytes, FP) of + ok -> + {image, Image} = Section#section.data, + emit_image(Image, Dot, FP, ShOffset); + {error, _Reason} = Error -> Error + end + end. + +emit_shtab(Context, FP, Offset) -> + case Context#context.offset of + 0 -> {ok, Offset}; + ShTabOffset -> + case emit_padding(ShTabOffset - Offset, FP) of + ok -> + case emit_shdr0(FP, ShTabOffset) of + {ok, NewOffset} -> + #context{tunit = #tunit{sections = SectionsMap}} = Context, + Sections = lists:sort(fun order_by_shndx/2, maps:values(SectionsMap)), + emit_shdrs(Sections, FP, NewOffset); + {error, _Reason} = Error -> Error + end; + {error, _Reason} = Error -> Error + end + end. + +emit_shdrs([], _FP, Offset) -> {ok, Offset}; +emit_shdrs([Section | Sections], FP, Offset) -> + case emit_shdr(Section, FP, Offset) of + {ok, NewOffset} -> emit_shdrs(Sections, FP, NewOffset); + {error, _Reason} = Error -> Error + end. + +emit_shdr(Section, FP, Offset) -> + case Section#section.dot of + 0 -> {ok, Offset}; + Dot -> + #section{ sh_name = ShName + , sh_type = ShType + , sh_offset = ShOffset + , sh_flags = ShFlags + , sh_link = ShLink + , sh_addralign = ShAddrAlign + , sh_entsize = ShEntSize + } = Section, + ElfShdr = + #elf36_Shdr{ sh_name = ShName + , sh_type = ShType + , sh_flags = ShFlags + , sh_addr = 0 + , sh_offset = ShOffset + , sh_size = Dot + , sh_link = ShLink + , sh_info = 0 % FIXME: for symtab, LAST_LOCAL + 1 + , sh_addralign = ShAddrAlign + , sh_entsize = ShEntSize + }, + emit_elf36_Shdr(ElfShdr, FP, Offset) + end. + +emit_shdr0(FP, Offset) -> + ElfShdr0 = + #elf36_Shdr{ sh_name = 0 + , sh_type = ?SHT_NULL + , sh_flags = 0 + , sh_addr = 0 + , sh_offset = 0 + , sh_size = 0 + , sh_link = 0 + , sh_info = 0 + , sh_addralign = 0 + , sh_entsize = 0 + }, + emit_elf36_Shdr(ElfShdr0, FP, Offset). + +emit_elf36_Shdr(Shdr, FP, Offset) -> + emit_image(elf36_Shdr_image(Shdr), ?ELF36_SHDR_SIZEOF, FP, Offset). + +elf36_Shdr_image(ElfShdr) -> + #elf36_Shdr{ sh_name = ShName + , sh_type = ShType + , sh_flags = ShFlags + , sh_addr = ShAddr + , sh_offset = ShOffset + , sh_size = ShSize + , sh_link = ShLink + , sh_info = ShInfo + , sh_addralign = ShAddrAlign + , sh_entsize = ShEntSize + } = ElfShdr, + [ elf36_Word_image(ShName) + , elf36_Word_image(ShType) + , elf36_Word_image(ShFlags) + , elf36_Addr_image(ShAddr) + , elf36_Off_image(ShOffset) + , elf36_Word_image(ShSize) + , elf36_Word_image(ShLink) + , elf36_Word_image(ShInfo) + , elf36_Word_image(ShAddrAlign) + , elf36_Word_image(ShEntSize) + ]. + +emit_padding(0, _FP) -> ok; +emit_padding(N, FP) when N > 0 -> + case pdp10_stdio:fputc(0, FP) of + ok -> emit_padding(N - 1, FP); + {error, _Reason} = Error -> Error + end. + +order_by_sh_offset(Section1, Section2) -> + Section1#section.sh_offset =< Section2#section.sh_offset. + +order_by_shndx(Section1, Section2) -> + Section1#section.shndx =< Section2#section.shndx. + +elf36_Ehdr_image(ElfHdr) -> + #elf36_Ehdr{ e_ident = EIdent + , e_type = EType + , e_machine = EMachine + , e_version = EVersion + , e_entry = EEntry + , e_phoff = EPhOff + , e_shoff = EShOff + , e_flags = EFlags + , e_ehsize = EEhSize + , e_phentsize = EPhEntSize + , e_phnum = EPhNum + , e_shentsize = EShEntSize + , e_shnum = EShNum + , e_shstrndx = EShStrNdx + } = ElfHdr, + [ EIdent % already a list of bytes + , elf36_Half_image(EType) + , elf36_Half_image(EMachine) + , elf36_Word_image(EVersion) + , elf36_Addr_image(EEntry) + , elf36_Off_image(EPhOff) + , elf36_Off_image(EShOff) + , elf36_Word_image(EFlags) + , elf36_Half_image(EEhSize) + , elf36_Half_image(EPhEntSize) + , elf36_Half_image(EPhNum) + , elf36_Half_image(EShEntSize) + , elf36_Half_image(EShNum) + , elf36_Half_image(EShStrNdx) + ]. + +e_ident() -> + tuple_to_list( + erlang:make_tuple( + ?EI_NIDENT, 0, + [ {1 + ?EI_MAG0, ?ELFMAG0} + , {1 + ?EI_MAG1, ?ELFMAG1} + , {1 + ?EI_MAG2, ?ELFMAG2} + , {1 + ?EI_MAG3, ?ELFMAG3} + , {1 + ?EI_CLASS, ?ELFCLASS36} + , {1 + ?EI_DATA, ?ELFDATA2MSB} + , {1 + ?EI_VERSION, ?EV_CURRENT} + , {1 + ?EI_OSABI, ?ELFOSABI_NONE} % TODO: ELFOSABI_LINUX instead? + , {1 + ?EI_ABIVERSION, 0} + ])). + +emit_image(Image, NrBytes, FP, Offset) -> + NrBytes = image_size(Image), % assert + case image_write(Image, FP) of + ok -> {ok, Offset + NrBytes}; + {error, _Reason} = Error -> Error + end. + +image_write([H | T], FP) -> + case image_write(H, FP) of + ok -> image_write(T, FP); + {error, _Reason} = Error -> Error + end; +image_write([], _FP) -> ok; +image_write(TByte, FP) when is_integer(TByte), 0 =< TByte, TByte =< 511 -> + pdp10_stdio:fputc(TByte, FP). diff --git a/erlang/apps/as/src/parse.erl b/erlang/apps/as/src/parse.erl new file mode 100644 index 0000000..7eeda67 --- /dev/null +++ b/erlang/apps/as/src/parse.erl @@ -0,0 +1,312 @@ +%%% -*- erlang-indent-level: 2 -*- +%%% +%%% parser for pdp10-elf as +%%% Copyright (C) 2013-2019 Mikael Pettersson +%%% +%%% This file is part of pdp10-tools. +%%% +%%% pdp10-tools is free software: you can redistribute it and/or modify +%%% it under the terms of the GNU General Public License as published by +%%% the Free Software Foundation, either version 3 of the License, or +%%% (at your option) any later version. +%%% +%%% pdp10-tools is distributed in the hope that it will be useful, +%%% but WITHOUT ANY WARRANTY; without even the implied warranty of +%%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +%%% GNU General Public License for more details. +%%% +%%% You should have received a copy of the GNU General Public License +%%% along with pdp10-tools. If not, see . + +-module(parse). + +-export([ stmt/1 + ]). + +-include("token.hrl"). +-include("tunit.hrl"). +-include_lib("lib/include/pdp10_opcodes.hrl"). + +stmt(ScanState) -> + case scan:token(ScanState) of + {ok, ?T_DOT_FILE} -> dot_file(ScanState); + {ok, ?T_DOT_GLOBL} -> dot_globl(ScanState); + {ok, ?T_DOT_IDENT} -> dot_ident(ScanState); + {ok, ?T_DOT_SIZE} -> dot_size(ScanState); + {ok, ?T_DOT_TEXT} -> dot_text(ScanState); + {ok, ?T_DOT_TYPE} -> dot_type(ScanState); + {ok, {?T_SYMBOL, Name}} -> stmt_after_symbol(ScanState, Name); + {ok, ?T_NEWLINE} -> stmt(ScanState); + {ok, ?T_EOF} -> eof; + ScanRes -> badtok(ScanState, "expected directive, label, or instruction", ScanRes) + end. + +%% Instructions and labels ----------------------------------------------------- +%% +%% Recognize: +%% +%%