1
0
mirror of https://github.com/PDP-10/its.git synced 2026-03-25 17:58:40 +00:00
Files
PDP-10.its/c20/lex/lex.old
2018-05-15 07:06:17 +02:00

661 lines
13 KiB
Plaintext

/*
* Copyright (c) 1978 Charles H. Forsyth
*/
/*
* lex -- initialisation, allocation, set creation
*
* Revised for PDP-11 (Decus) C by Martin Minow
*/
/* Modified 02-Dec-80 Bob Denny -- Conditionalized debug code for smaller size
* 01 -- Moved calls to dfa build, min, print, write
* and to stat, and code for ending() into
* this module so that 'ytab' could be put
* into overlay region.
* 29-May-81 Bob Denny -- More extern hacking for RSX overlaying.
* More 19-Mar-82 Bob Denny -- New C library & compiler
* More 03-May-82 Bob Denny -- Final touches, remove unreferenced autos
* 28-Aug-82 Bob Denny -- Add "-s" switch to supress references to
* "stdio.h" in generated code. Add switch
* comments in code. Add -e for "easy" com-
* mand line. "lex -e file" is the short way
* of saying:
* "lex -i file.lxi -o file.c -t file"
* More(!) 30-Oct-82 Bob Denny -- Fix RSX ODL to put lots of FCS junk into
* overlay, pick up (badly needed) 3KW for
* NFA nodes, etc. Change static allocations
* in LEXLEX.H for RSX so can do non-trivial
* things. Task is now big on RSX and grows
* from big to huge as it runs.
* Fix "-s" support so it is again possible
* to do a lexswitch() (dumb!).
*/
/*)BUILD $(PROGRAM) = lex
$(INCLUDE) = { lexlex.h ytab.h }
$(FILES) = { base dfa eclosu lex impure
min out1 out2 lexsrt ytab }
$(STACK) = 4000
$(TKBOPTIONS) = {
STACK = 3000
TASK = ...LEX
;DON'T TRY TO USE FCSRES (RBD)
}
$(ODL) = {
;
; ODL FOR BUILDING LEX ON RSX11M.
; BOB DENNY 29-MAY-81
; BOB DENNY 19-MAR-82
; BOB DENNY 03-MAY-82
; BOB DENNY 30-OCT-82 PUT SOME FCS JUNK IN OVERLAY. REORGANIZE FOR
; SIMPLICITY.
;
.NAME OVR1
.NAME FCSJNK
.NAME LXPROC
.NAME PARSER
.ROOT L0,L1
;
L0: .FCTR LEX-IMPURE-OUT2-LB:[1,1]C/LB
;
L1: .FCTR OVR1-*(FCS,STAT,PARS)
;
FCS: .FCTR FCSJNK-(F1,F2)
;
F1: .FCTR LB:[1,1]SYSLIB/LB:.CSI1:.CSI2
F2: .FCTR LB:[1,1]SYSLIB/LB:OPEN-(OP1,OP2)
;
OP1: .FCTR LB:[1,1]SYSLIB/LB:CREATE:CLOSE:DEL:FINIT:MKDL:OPFNB:RQLCB
OP2: .FCTR LB:[1,1]SYSLIB/LB:PARSE
;
STAT: .FCTR LXPROC-LEXSRT-DFA-BASE-ECLOSU-MIN-OUT1-LB:[1,1]C/LB
;
PARS: .FCTR PARSER-YTAB-LB:[1,1]C/LB
;
.END
}
$(OVR) = {
LEX
LEXSRT
IMPURE
$(SUPORT)
$(RTLIB)
DFA/O:1
MIN/O:1
OUT1/O:1
YTAB/O:1
BASE/O:2
OUT2/O:2
ECLOSU/O:2
}
*/
#ifdef DOCUMENTATION
title lex A Lexical Analyser Generator
index A Lexical Analyser Generator
synopsis
lex [-options] [-i grammar] [-o outfile] [-t table]
description
Lex compiles a lexical analyser from a grammar and description of
actions. It is described more fully in lex.doc: only usage is
described. The following options are available:
.lm +16
.s.i-16;-a Disable recognition of non-ASCII characters
(codes > 177 octal) for exception character classes (form [^ ...]).
.s.i-16;-d Enable debugging code within lex. Normally
needed only for debugging lex.
.s.i-16;-e "Easy" command line. Saying "lex#-e#name" is the
same as saying:
.s.i 4;"lex -i name.lxi -o name.c -t name"
.s
Do not include devices or an extension on "name" or make it longer
than 8 characters, or you'll get several error messages.
.s.i-16;-i file Read the grammar from the file. If "-i" is not
specified, input will be read from the standard input.
.s.i-16;-m Enable state minimization. Currently not
implemented, switch is a no-op.
.s.i-16;-o file Write the output to the file. If "-o" is not
specified, output will be written to file "lextab.c".
.s.i-16;-s "Stand-alone" switch. Supresses the line
"#include <stdio.h>" normally generated in the lex output. Use this
if LEX is generating a module to be used in a program which does not
use the "standard I/O" package.
.s.i-16;-t table Name the recognizer "table" instead of the
default "lextab". If -o is not given, output will be written to file
"table.c".
.s.i-16;-v [file] Verify -- write internal tables to the
indicated file. If "-v" is given without a file name argument,
tables will be written to "lex.out".
.lm -16
diagnostics
The following error messages may occur on invocation. See lex
documentation for information on compilation errors.
.lm +8
.s.i -8;Can't create ...
.s.i -8;Cannot open ...
.s.i -8;Illegal option.
.s.i -8;Illegal switch combination.
.s
"-i", "-o" or "-t" given with "-e" or vice-versa
.s.i -8;Table name too long.
.s
The table name (argument to "-t") must not be longer than 8 bytes.
.s.i -8;Missing table name.
.s.i -8;Missing input file.
.s.i -8;Missing output file.
.s.i -8;Missing name.
.lm -8
author
Charles Forsyth
Modified by Bob Denny
bugs
#endif
#include <stdio.h>
#include "lexlex.h"
extern char *lalloc();
extern char *tolower();
struct nfa nfa[MAXNFA];
struct nfa *nfap = &nfa[1];
struct xset sets[NCHARS];
char insets[NCHARS];
struct trans trans[NTRANS];
struct trans *transp = &trans[0];
char ccls[NCCLS][(NCHARS+1)/NBPC];
int nccls;
int ndfa;
struct dfa dfa[MAXDFA];
struct move move[NNEXT];
char *tabname = "lextab";
char tabfile[15];
char *infile = NULL;
char *outfile = NULL;
#ifdef DEBUG
char *dumpfile = "lex.out";
int lldebug = 0;
#endif
int llnxtmax = 0;
FILE *llout;
FILE *lexin;
FILE *lexlog;
/*
* Flags. Allow globals only for
* those requiring same. Some only
* used for checking for bad combos.
*/
int aflag = 0; /* Ignore non-ASCII in [^ ...] */
static int eflag = 0; /* Easy command line */
static int iflag = 0; /* "-i" given */
int mflag = 0; /* Enable state minimization (not imp.) */
static int oflag = 0; /* "-o" given */
int sflag = 0; /* Supress "#include <stdio.h>" in output */
static int tflag = 0; /* "-t" given */
struct set *setlist = 0;
main(argc, argv)
char **argv;
{
register char *cp, *cp2;
#ifdef DEBUG
int vflag;
vflag = 0;
#endif
for (; argc>1 && *argv[1]=='-'; argv++, argc--)
switch (tolower(argv[1][1])) {
#ifdef DEBUG
/*
* Create "verification" file, describing the
* scanner.
*/
case 'v': /* -v => lex.out */
vflag++; /* -v x.out => x.out */
if (argc > 2 && argv[2][1] != '1') {
--argc;
dumpfile = (++argv)[1];
}
break;
/*
* Enable debug displays
*/
case 'd':
lldebug++;
break;
#endif
/*
* Enable state minimization. Currently not
* implemented.
*/
case 'm':
mflag++;
break;
/*
* Disable matching of non-ASCII characters (codes > 177(8))
* for exception character classes (form "[^ ...]").
*/
case 'a':
aflag++;
break;
/*
* Supress "#include <stdio.h>" in generated
* code for programs not using standard I/O.
*/
case 's':
sflag++;
break;
/*
* "Easy" command line
*/
case 'e':
if(iflag || oflag || tflag) {
error("Illegal switch combination\n");
exit(1);
}
if (--argc <= 1) {
error("Missing name\n");
exit(1);
}
if (strlen(tabname = (++argv)[1]) > 8) {
error("Name too long\n");
exit(1);
}
infile = malloc(14);
outfile = malloc(12);
concat(infile, tabname, ".lxi", 0);
if (freopen(infile, "r", stdin) == NULL) {
error("Cannot open input \"%s\"\n", infile);
exit(1);
}
concat(outfile, tabname, ".c", 0);
break;
/*
* Specify input file name. Default = terminal (stdin)
*/
case 'i':
if (eflag) {
error("Illegal switch combination\n");
exit(1);
}
iflag++;
if (--argc <= 1) {
error("Missing input file\n");
exit(1);
}
infile = (++argv)[1];
if (freopen(infile, "r", stdin) == NULL) {
error("Cannot open input \"%s\"\n", infile);
exit(1);
}
break;
/*
* Specify output file name. Default = "lextab.c"
*/
case 'o':
if (eflag) {
error("Illegal switch combination\n");
exit(1);
}
oflag++;
if (--argc <= 1) {
error("Missing output file");
exit(1);
}
outfile = (++argv)[1];
break;
/*
* Specify table name. Default = "lextab". If "-o"
* not given, output will go to "tabname.c".
*/
case 't':
if (eflag) {
error("Illegal switch combination\n");
exit(1);
}
tflag++;
if (--argc <= 1) {
error("Missing table name");
exit(1);
}
if (strlen(tabname = (++argv)[1]) > 8) {
error("Table name too long\n");
exit(1);
}
break;
default:
error("Illegal option: %s\n", argv[1]);
exit(1);
}
lexin = stdin;
#ifdef DEBUG
cp = (vflag) ? dumpfile : "nl:"; /* Dec specific */
if ((lexlog = fopen(cp, "w")) == NULL) {
error("Cannot open \"%s\"", cp);
exit(1);
}
#endif
if (infile == NULL) {
infile = malloc(31);
fgetname(lexin, infile);
}
cp = infile; /* Fold infile to lower case */
while(*cp)
*cp++ = tolower(*cp);
cp = tabname; /* Fold tabname to lower case */
while(*cp)
*cp++ = tolower(*cp);
if (outfile == NULL) {
/*
* Typical hacker's idiom!
*/
for (cp = tabname, cp2 = tabfile; *cp2 = *cp++;)
cp2++;
for (cp = ".c"; *cp2++ = *cp++;)
;
outfile = tabfile;
}
if ((llout = freopen(outfile, "w", stdout))==NULL) {
error("Can't create %s\n", outfile);
exit(1);
}
heading();
if (yyparse())
error("Parse failed\n");
dfabuild(); /* 01+ */
dfamin();
dfaprint();
dfawrite();
#ifdef DEBUG
stats();
#endif /* 01- */
} /** END OF MAIN **/
/*
* This module was moved here from out.c so it could be called from
* ytab.c residing in same overlay region as out.c.
* 02-Dec-80 Bob Denny.
*/
/* 01+ */
ending()
{
static int ended;
if (ended++)
return;
fprintf(llout, "\t}\n\treturn(LEXSKIP);\n}\n");
setline();
}
/* 01- */
#ifdef DEBUG
stats()
{
fprintf(lexlog, "\n");
fprintf(lexlog, "%d/%d NFA states, %d/%d DFA states\n",
nfap-nfa, MAXNFA, ndfa, MAXDFA);
fprintf(lexlog, "%d/%d entries in move vectors\n", llnxtmax, NNEXT);
}
/*
* Print a state set on { ... } form on lexlog.
*/
pset(t, nf)
register struct set *t;
{
register i;
fprintf(lexlog, "{");
for (i = 0; i < t->s_len; i++)
if (nf)
fprintf(lexlog, " %d", t->s_els[i]-nfa); else
fprintf(lexlog, " %d", t->s_els[i]);
fprintf(lexlog, "}");
}
/*
* Print a character to lexlog in readable form.
* Returns the number of characters generated.
*/
chprint(ch)
{
register char *s;
ch &= 0377;
switch (ch) {
case '\t':
s = "\\t";
break;
case '\n':
s = "\\n";
break;
case '\b':
s = "\\b";
break;
case '\r':
s = "\\r";
break;
default:
if(ch<040 || ch>=0177)
{
fprintf(lexlog, "\\%03o", ch);
return(4);
}
else
{
putc(ch, lexlog);
return(1);
}
}
fprintf(lexlog, s);
return(2);
}
#endif
/*
* The following functions simply
* allocate various kinds of
* structures.
*/
struct nfa *
newnfa(ch, nf1, nf2)
struct nfa *nf1, *nf2;
{
register struct nfa *nf;
if ((nf = nfap++) >= &nfa[MAXNFA]) {
error("Too many NFA states");
exit(1);
}
nf->n_char = ch;
nf->n_succ[0] = nf1;
nf->n_succ[1] = nf2;
nf->n_trans = 0;
nf->n_flag = 0;
nf->n_look = 0;
return(nf);
}
newdfa()
{
register struct dfa *df;
if ((df = &dfa[ndfa++]) >= &dfa[MAXDFA]) {
error("Out of dfa states");
exit(1);
}
return(df);
}
char *
newccl(ccl)
char *ccl;
{
register char *p, *q;
register i;
int j;
for (j = 0; j < nccls; j++) {
p = ccl;
q = ccls[j];
for (i = sizeof(ccls[j]); i--;)
if (*p++ != *q++)
goto cont;
return(ccls[j]);
cont:;
}
if (nccls >= NCCLS) {
error("Too many character classes");
exit(1);
}
p = ccl;
q = ccls[j = nccls++];
for (i = sizeof(ccls[j]); i--;)
*q++ = *p++;
return(ccls[j]);
}
struct trans *
newtrans(st, en)
struct nfa *st, *en;
{
register struct trans *tp;
if ((tp = transp++) >= &trans[NTRANS]) {
error("Too many translations");
exit(1);
}
tp->t_start = st;
tp->t_final = en;
en->n_trans = tp;
return(tp);
}
/*
* create a new set.
* `sf', if set, indicates
* that the elements of the
* set are states of an NFA).
* If `sf' is not set, the
* elements are state numbers of
* a DFA.
*/
struct set *
newset(v, i, sf)
register struct nfa **v;
register i;
{
register struct set *t;
register k;
int setcomp();
qsort(v, i, sizeof(*v), setcomp);
for (t = setlist; t; t = t->s_next)
if (t->s_len==i && eqvec(t->s_els, v, i))
return(t);
t = lalloc(1, sizeof(*t)+i*sizeof(t->s_els[0]), "set nodes");
t->s_next = setlist;
setlist = t;
t->s_final = 0;
t->s_state = 0;
t->s_flag = 0;
t->s_len = i;
t->s_group = 0;
t->s_look = 0;
for (v += i; i;) {
--v;
if (sf) {
if ((*v)->n_char==FIN)
t->s_final = (*v)-nfa;
if ((*v)->n_flag&LOOK)
t->s_look |= 1<<(*v)->n_look;
} else {
k = *v;
dfa[k].df_name->s_group = t;
}
t->s_els[--i] = *v;
}
return(t);
}
setcomp(n1p, n2p)
struct nfa **n1p, **n2p;
{
register struct nfa *n1, *n2;
n1 = *n1p;
n2 = *n2p;
if (n1 > n2)
return(1);
if (n1==n2)
return(0);
return(-1);
}
eqvec(a, b, i)
register *a, *b, i;
{
if (i)
do {
if (*a++ != *b++)
return(0);
} while (--i);
return(1);
}
/*
* Ask for core, and
* complain if there is no more.
*/
char *
lalloc(n, s, w)
char *w;
{
register char *cp;
if ((cp = calloc(n, s)) == NULL) {
fprintf(stderr, "No space for %s", w);
#ifdef DEBUG
if (lldebug)
dfaprint();
#endif
exit(1);
}
return(cp);
}