as: scan: move scan_state handling here; scan_state: delete

This commit is contained in:
Mikael Pettersson 2019-12-27 21:39:13 +01:00
parent 0f121165fc
commit ac426a91f8
2 changed files with 98 additions and 152 deletions

View File

@ -29,24 +29,94 @@
-include("token.hrl").
-type scan_state() :: scan_state:scan_state().
-type location() :: scan_state:location().
-type scan_state() :: {scan_state, reference()}.
-type location() :: {Filename :: string(), LineNr :: pos_integer()}.
-export_type([scan_state/0, location/0]).
%% Scan State ------------------------------------------------------------------
-spec fclose(scan_state()) -> ok | {error, {module(), term()}}.
fclose(ScanState) ->
scan_state:fclose(ScanState).
%% The scanner state records the I/O handle, implements a one-character
%% pushback buffer, and maintains the current line number.
%% TODO: maintain column number too?
-record(scan_state,
{ filename :: string()
, iodev :: file:fd() | standard_io
, ungetc :: [] | byte()
, linenr :: pos_integer()
}).
-spec fclose(scan_state()) -> ok.
fclose(Handle) ->
ScanState = #scan_state{} = get(Handle),
case ScanState#scan_state.iodev of
standard_io -> ok;
IoDev -> file:close(IoDev)
end,
erase(Handle),
ok.
-spec fgetc(scan_state()) -> {ok, byte()} | eof | {error, {module(), term()}}.
fgetc(Handle) ->
ScanState = #scan_state{} = get(Handle),
case ScanState#scan_state.ungetc of
[] ->
case file:read(ScanState#scan_state.iodev, 1) of
{ok, [Byte]} ->
case Byte of
$\n ->
put(Handle, ScanState#scan_state{linenr = ScanState#scan_state.linenr + 1}),
{ok, $\n};
_ ->
{ok, Byte}
end;
eof ->
eof;
{error, Reason} ->
{error, {file, Reason}}
end;
Ch ->
put(Handle, ScanState#scan_state{ungetc = []}),
{ok, Ch}
end.
-spec fopen(string()) -> {ok, scan_state()} | {error, {module(), term()}}.
fopen(File) ->
scan_state:fopen(File).
fopen(Filename) ->
case file:open(Filename, [raw, read, read_ahead]) of
{ok, IoDev} -> do_fopen(Filename, IoDev);
{error, Reason} -> {error, {file, Reason}}
end.
-spec stdin() -> {ok, scan_state()}.
stdin() ->
scan_state:stdin().
do_fopen(_Filename = "<stdin>", _IoDev = standard_io).
do_fopen(Filename, IoDev) ->
ScanState = #scan_state{ filename = Filename
, iodev = IoDev
, ungetc = []
, linenr = 1
},
Handle = {scan_state, make_ref()},
put(Handle, ScanState),
{ok, Handle}.
-spec ungetc(byte(), scan_state()) -> ok | {error, {module(), term()}}.
ungetc(Ch, Handle) ->
ScanState = #scan_state{} = get(Handle),
case ScanState#scan_state.ungetc of
[] ->
put(Handle, ScanState#scan_state{ungetc = Ch}),
ok;
_ ->
{error, {?MODULE, ungetc}}
end.
-spec location(scan_state()) -> {ok, location()}.
location(Handle) ->
ScanState = #scan_state{} = get(Handle),
Location = {ScanState#scan_state.filename, ScanState#scan_state.linenr},
{ok, Location}.
%% Scanner ---------------------------------------------------------------------
@ -54,8 +124,8 @@ stdin() ->
-> {ok, {location(), token()}} | {error, {module(), term()}}.
token(ScanState) ->
%% TODO: optimize
{ok, Location} = scan_state:location(ScanState),
case scan_state:fgetc(ScanState) of
{ok, Location} = location(ScanState),
case fgetc(ScanState) of
{error, _Reason} = Error -> Error;
eof -> {ok, {Location, ?T_EOF}};
{ok, Ch} ->
@ -89,20 +159,20 @@ token(ScanState) ->
%% Scan after seeing '#'.
do_line_comment(ScanState) ->
case scan_state:fgetc(ScanState) of
case fgetc(ScanState) of
{error, _Reason} = Error -> Error;
eof -> badchar(ScanState, eof, "in line comment");
{ok, $\n} -> {ok, {scan_state:location(ScanState), ?T_NEWLINE}};
{ok, $\n} -> {ok, {location(ScanState), ?T_NEWLINE}};
{ok, _Ch} -> do_line_comment(ScanState)
end.
%% Scan after seeing '/'.
do_slash(ScanState) ->
case scan_state:fgetc(ScanState) of
case fgetc(ScanState) of
{error, _Reason} = Error -> Error;
{ok, $*} -> do_c_comment(ScanState, false);
{ok, Ch} ->
scan_state:ungetc(Ch, ScanState),
ungetc(Ch, ScanState),
badchar(ScanState, Ch, "after /"); % TODO: NYI: T_DIV
eof ->
badchar(ScanState, eof, "after /")
@ -110,7 +180,7 @@ do_slash(ScanState) ->
%% Scan after seeing '/* ...'.
do_c_comment(ScanState, PrevWasStar) ->
case scan_state:fgetc(ScanState) of
case fgetc(ScanState) of
{error, _Reason} = Error -> Error;
eof -> badchar(ScanState, eof, "in /*...*/ comment");
{ok, $*} -> do_c_comment(ScanState, true);
@ -120,7 +190,7 @@ do_c_comment(ScanState, PrevWasStar) ->
%% Scan after seeing '"'.
do_string(ScanState, Location, Chars) ->
case scan_state:fgetc(ScanState) of
case fgetc(ScanState) of
{error, _Reason} = Error -> Error;
eof -> badchar(ScanState, eof, "in string literal");
{ok, $\n} -> badchar(ScanState, $\n, "in string literal");
@ -135,7 +205,7 @@ do_string(ScanState, Location, Chars) ->
%% Scan after seeing '\' in a string literal.
do_escape(ScanState) ->
case scan_state:fgetc(ScanState) of
case fgetc(ScanState) of
{error, _Reason} = Error -> Error;
eof -> badchar(ScanState, eof, "in \\ character escape");
{ok, Ch} ->
@ -157,13 +227,13 @@ do_escape(ScanState) ->
do_octal_escape(_ScanState, Val, 0) -> {ok, Val};
do_octal_escape(ScanState, Val, N) ->
case scan_state:fgetc(ScanState) of
case fgetc(ScanState) of
{error, _Reason} = Error -> Error;
eof -> badchar(ScanState, eof, "in \\ character escape");
{ok, Ch} ->
if $0 =< Ch, Ch =< $t -> do_octal_escape(ScanState, Val * 8 + (Ch - $0), N - 1);
true ->
case scan_state:ungetc(Ch, ScanState) of
case ungetc(Ch, ScanState) of
{error, _Reason} = Error -> Error;
ok -> {ok, Val}
end
@ -171,7 +241,7 @@ do_octal_escape(ScanState, Val, N) ->
end.
do_symbol(ScanState, Location, Chars) ->
case scan_state:fgetc(ScanState) of
case fgetc(ScanState) of
{error, _Reason} = Error -> Error;
eof -> do_symbol(Location, lists:reverse(Chars));
{ok, Ch} ->
@ -182,7 +252,7 @@ do_symbol(ScanState, Location, Chars) ->
Ch =:= $$ orelse
Ch =:= $_ -> do_symbol(ScanState, Location, [Ch | Chars]);
true ->
case scan_state:ungetc(Ch, ScanState) of
case ungetc(Ch, ScanState) of
{error, _Reason} = Error -> Error;
ok -> do_symbol(Location, lists:reverse(Chars))
end
@ -199,13 +269,13 @@ do_symbol(Location, Chars) ->
do_number(ScanState, Location, Dig0) ->
case Dig0 of
$0 ->
case scan_state:fgetc(ScanState) of
case fgetc(ScanState) of
{error, _Reason} = Error -> Error;
eof -> {ok, {Location, {?T_UINTEGER, Dig0 - $0}}};
{ok, Ch} ->
if Ch =:= $x; Ch =:= $X ->
%% must have hex digit after 0x
case scan_state:fgetc(ScanState) of
case fgetc(ScanState) of
{error, _Reason} = Error -> Error;
eof -> badchar(ScanState, eof, "after 0x in number");
{ok, Ch} ->
@ -216,7 +286,7 @@ do_number(ScanState, Location, Dig0) ->
end
end;
true ->
case scan_state:ungetc(Ch, ScanState) of
case ungetc(Ch, ScanState) of
{error, _Reason} = Error -> Error;
ok -> do_number(ScanState, Location, _Base = 8, _Val = 0)
end
@ -226,7 +296,7 @@ do_number(ScanState, Location, Dig0) ->
end.
do_number(ScanState, Location, Base, Val) ->
case scan_state:fgetc(ScanState) of
case fgetc(ScanState) of
{error, _Reason} = Error -> Error;
eof -> {ok, {Location, {?T_UINTEGER, Val}}};
{ok, Ch} ->
@ -236,7 +306,7 @@ do_number(ScanState, Location, Base, Val) ->
_ChVal when Base =< 10 andalso (Ch =:= $b orelse Ch =:= $f) ->
{ok, {Location, {?T_LOCAL_LABEL, Val, Ch}}};
_ChVal ->
case scan_state:ungetc(Ch, ScanState) of
case ungetc(Ch, ScanState) of
{error, _Reason} = Error -> Error;
ok -> {ok, {Location, {?T_UINTEGER, Val}}}
end
@ -251,10 +321,11 @@ chval(Ch) ->
end.
badchar(ScanState, Ch, Context) ->
{ok, {FileName, LineNr}} = scan_state:location(ScanState),
{ok, {FileName, LineNr}} = location(ScanState),
{error, {?MODULE, {FileName, LineNr, Ch, Context}}}.
-spec format_error(term()) -> io_lib:chars().
format_error(ungetc) -> "internal error: invalid ungetc";
format_error({FileName, LineNr, Ch, Context}) ->
io_lib:format("~s line ~p: invalid character '~s' ~s",
[FileName, LineNr, char_to_string(Ch), Context]).

View File

@ -1,125 +0,0 @@
%%% -*- erlang-indent-level: 2 -*-
%%%
%%% scanner state manager for pdp10-elf as
%%% Copyright (C) 2019 Mikael Pettersson
%%%
%%% This file is part of pdp10-tools.
%%%
%%% pdp10-tools is free software: you can redistribute it and/or modify
%%% it under the terms of the GNU General Public License as published by
%%% the Free Software Foundation, either version 3 of the License, or
%%% (at your option) any later version.
%%%
%%% pdp10-tools is distributed in the hope that it will be useful,
%%% but WITHOUT ANY WARRANTY; without even the implied warranty of
%%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
%%% GNU General Public License for more details.
%%%
%%% You should have received a copy of the GNU General Public License
%%% along with pdp10-tools. If not, see <http://www.gnu.org/licenses/>.
-module(scan_state).
%% API
-export([ % file I/O wrappers
fclose/1
, fgetc/1
, fopen/1
, stdin/0
, ungetc/2
% meta-data accessors
, location/1
, format_error/1
]).
%% The scanner state records the I/O handle, implements a one-character
%% pushback buffer, and maintains the current line number.
%% TODO: maintain column number too?
-record(state,
{ filename :: string()
, iodev :: file:fd() | standard_io
, ungetc :: [] | byte()
, linenr :: pos_integer()
}).
-type scan_state() :: {scan_state, reference()}.
-type location() :: {Filename :: string(), LineNr :: pos_integer()}.
-export_type([scan_state/0, location/0]).
%% API -------------------------------------------------------------------------
-spec fclose(scan_state()) -> ok.
fclose(Handle) ->
State = #state{} = get(Handle),
case State#state.iodev of
standard_io -> ok;
IoDev -> file:close(IoDev)
end,
erase(Handle),
ok.
-spec fgetc(scan_state()) -> {ok, byte()} | eof | {error, {module(), term()}}.
fgetc(Handle) ->
State = #state{} = get(Handle),
case State#state.ungetc of
[] ->
case file:read(State#state.iodev, 1) of
{ok, [Byte]} ->
case Byte of
$\n ->
put(Handle, State#state{linenr = State#state.linenr + 1}),
{ok, $\n};
_ ->
{ok, Byte}
end;
eof ->
eof;
{error, Reason} ->
{error, {file, Reason}}
end;
Ch ->
put(Handle, State#state{ungetc = []}),
{ok, Ch}
end.
-spec fopen(string()) -> {ok, scan_state()} | {error, {module(), term()}}.
fopen(Filename) ->
case file:open(Filename, [raw, read, read_ahead]) of
{ok, IoDev} -> do_fopen(Filename, IoDev);
{error, Reason} -> {error, {file, Reason}}
end.
-spec stdin() -> {ok, scan_state()}.
stdin() ->
do_fopen(_Filename = "<stdin>", _IoDev = standard_io).
do_fopen(Filename, IoDev) ->
State = #state{ filename = Filename
, iodev = IoDev
, ungetc = []
, linenr = 1
},
Handle = {scan_state, make_ref()},
put(Handle, State),
{ok, Handle}.
-spec ungetc(byte(), scan_state()) -> ok | {error, {module(), term()}}.
ungetc(Ch, Handle) ->
State = #state{} = get(Handle),
case State#state.ungetc of
[] ->
put(Handle, State#state{ungetc = Ch}),
ok;
_ ->
{error, {?MODULE, ungetc}}
end.
-spec location(scan_state()) -> {ok, location()}.
location(Handle) ->
State = #state{} = get(Handle),
Location = {State#state.filename, State#state.linenr},
{ok, Location}.
-spec format_error(term()) -> io_lib:chars().
format_error(ungetc) -> "internal error: invalid ungetc".