540 lines
11 KiB
C
540 lines
11 KiB
C
#ifndef lint
|
|
static char sccsid[] = "@(#)b.c 1.1 94/10/31 SMI"; /* from S5R2 */
|
|
#endif
|
|
|
|
#include "awk.def"
|
|
#include "stdio.h"
|
|
#include "awk.h"
|
|
|
|
extern NODE *op2();
|
|
extern struct fa *cgotofn();
|
|
#define MAXLIN 256
|
|
#define NCHARS 128
|
|
#define NSTATES 256
|
|
|
|
#define type(v) v->nobj
|
|
#define left(v) v->narg[0]
|
|
#define right(v) v->narg[1]
|
|
#define parent(v) v->nnext
|
|
|
|
#define LEAF case CCL: case NCCL: case CHAR: case DOT:
|
|
#define UNARY case FINAL: case STAR: case PLUS: case QUEST:
|
|
|
|
/* encoding in tree NODEs:
|
|
leaf (CCL, NCCL, CHAR, DOT): left is index, right contains value or pointer to value
|
|
unary (FINAL, STAR, PLUS, QUEST): left is child, right is null
|
|
binary (CAT, OR): left and right are children
|
|
parent contains pointer to parent
|
|
*/
|
|
|
|
struct fa {
|
|
int cch;
|
|
struct fa *st;
|
|
};
|
|
|
|
int *state[NSTATES];
|
|
int *foll[MAXLIN];
|
|
char chars[MAXLIN];
|
|
int setvec[MAXLIN];
|
|
NODE *point[MAXLIN];
|
|
|
|
int setcnt;
|
|
int line;
|
|
|
|
|
|
struct fa *makedfa(p) /* returns dfa for tree pointed to by p */
|
|
NODE *p;
|
|
{
|
|
NODE *p1;
|
|
struct fa *fap;
|
|
p1 = op2(CAT, op2(STAR, op2(DOT, (NODE *) 0, (NODE *) 0), (NODE *) 0), p);
|
|
/* put DOT STAR in front of reg. exp. */
|
|
p1 = op2(FINAL, p1, (NODE *) 0); /* install FINAL NODE */
|
|
|
|
line = 0;
|
|
penter(p1); /* enter parent pointers and leaf indices */
|
|
point[line] = p1; /* FINAL NODE */
|
|
setvec[0] = 1; /* for initial DOT STAR */
|
|
cfoll(p1); /* set up follow sets */
|
|
fap = cgotofn();
|
|
freetr(p1); /* add this when alloc works */
|
|
return(fap);
|
|
}
|
|
|
|
penter(p) /* set up parent pointers and leaf indices */
|
|
NODE *p;
|
|
{
|
|
switch(type(p)) {
|
|
LEAF
|
|
left(p) = (NODE *) line;
|
|
point[line++] = p;
|
|
break;
|
|
UNARY
|
|
penter(left(p));
|
|
parent(left(p)) = p;
|
|
break;
|
|
case CAT:
|
|
case OR:
|
|
penter(left(p));
|
|
penter(right(p));
|
|
parent(left(p)) = p;
|
|
parent(right(p)) = p;
|
|
break;
|
|
default:
|
|
error(FATAL, "unknown type %d in penter\n", type(p));
|
|
break;
|
|
}
|
|
}
|
|
|
|
freetr(p) /* free parse tree and follow sets */
|
|
NODE *p;
|
|
{
|
|
switch(type(p)) {
|
|
LEAF
|
|
xfree(foll[(int) left(p)]);
|
|
xfree(p);
|
|
break;
|
|
UNARY
|
|
freetr(left(p));
|
|
xfree(p);
|
|
break;
|
|
case CAT:
|
|
case OR:
|
|
freetr(left(p));
|
|
freetr(right(p));
|
|
xfree(p);
|
|
break;
|
|
default:
|
|
error(FATAL, "unknown type %d in freetr", type(p));
|
|
break;
|
|
}
|
|
}
|
|
char *cclenter(p)
|
|
register char *p;
|
|
{
|
|
register i, c;
|
|
char *op;
|
|
|
|
op = p;
|
|
i = 0;
|
|
while ((c = *p++) != 0) {
|
|
if (c == '-' && i > 0 && chars[i-1] != 0) {
|
|
if (*p != 0) {
|
|
c = chars[i-1];
|
|
while (c < *p) {
|
|
if (i >= MAXLIN)
|
|
overflo();
|
|
chars[i++] = ++c;
|
|
}
|
|
p++;
|
|
continue;
|
|
}
|
|
}
|
|
if (i >= MAXLIN)
|
|
overflo();
|
|
chars[i++] = c;
|
|
}
|
|
chars[i++] = '\0';
|
|
dprintf("cclenter: in = |%s|, out = |%s|\n", op, chars, NULL);
|
|
xfree(op);
|
|
return(tostring(chars));
|
|
}
|
|
|
|
overflo()
|
|
{
|
|
error(FATAL, "regular expression too long\n");
|
|
}
|
|
|
|
cfoll(v) /* enter follow set of each leaf of vertex v into foll[leaf] */
|
|
register NODE *v;
|
|
{
|
|
register i;
|
|
int prev;
|
|
int *add();
|
|
|
|
switch(type(v)) {
|
|
LEAF
|
|
setcnt = 0;
|
|
for (i=1; i<=line; i++)
|
|
setvec[i] = 0;
|
|
follow(v);
|
|
if (notin(foll, ( (int) left(v))-1, &prev)) {
|
|
foll[(int) left(v)] = add(setcnt);
|
|
}
|
|
else
|
|
foll[ (int) left(v)] = foll[prev];
|
|
break;
|
|
UNARY
|
|
cfoll(left(v));
|
|
break;
|
|
case CAT:
|
|
case OR:
|
|
cfoll(left(v));
|
|
cfoll(right(v));
|
|
break;
|
|
default:
|
|
error(FATAL, "unknown type %d in cfoll", type(v));
|
|
}
|
|
}
|
|
|
|
first(p) /* collects initially active leaves of p into setvec */
|
|
register NODE *p; /* returns 0 or 1 depending on whether p matches empty string */
|
|
{
|
|
register b;
|
|
|
|
switch(type(p)) {
|
|
LEAF
|
|
if (setvec[(int) left(p)] != 1) {
|
|
setvec[(int) left(p)] = 1;
|
|
setcnt++;
|
|
}
|
|
if (type(p) == CCL && (*(char *) right(p)) == '\0')
|
|
return(0); /* empty CCL */
|
|
else return(1);
|
|
case FINAL:
|
|
case PLUS:
|
|
if (first(left(p)) == 0) return(0);
|
|
return(1);
|
|
case STAR:
|
|
case QUEST:
|
|
first(left(p));
|
|
return(0);
|
|
case CAT:
|
|
if (first(left(p)) == 0 && first(right(p)) == 0) return(0);
|
|
return(1);
|
|
case OR:
|
|
b = first(right(p));
|
|
if (first(left(p)) == 0 || b == 0) return(0);
|
|
return(1);
|
|
}
|
|
error(FATAL, "unknown type %d in first\n", type(p));
|
|
return(-1);
|
|
}
|
|
|
|
follow(v)
|
|
NODE *v; /* collects leaves that can follow v into setvec */
|
|
{
|
|
NODE *p;
|
|
|
|
if (type(v) == FINAL)
|
|
return;
|
|
p = parent(v);
|
|
switch (type(p)) {
|
|
case STAR:
|
|
case PLUS: first(v);
|
|
follow(p);
|
|
return;
|
|
|
|
case OR:
|
|
case QUEST: follow(p);
|
|
return;
|
|
|
|
case CAT: if (v == left(p)) { /* v is left child of p */
|
|
if (first(right(p)) == 0) {
|
|
follow(p);
|
|
return;
|
|
}
|
|
}
|
|
else /* v is right child */
|
|
follow(p);
|
|
return;
|
|
case FINAL: if (setvec[line] != 1) {
|
|
setvec[line] = 1;
|
|
setcnt++;
|
|
}
|
|
return;
|
|
}
|
|
}
|
|
|
|
member(c, s) /* is c in s? */
|
|
register char c, *s;
|
|
{
|
|
while (*s)
|
|
if (c == *s++)
|
|
return(1);
|
|
return(0);
|
|
}
|
|
|
|
notin(array, n, prev) /* is setvec in array[0] thru array[n]? */
|
|
int **array;
|
|
int *prev; {
|
|
register i, j;
|
|
int *ptr;
|
|
for (i=0; i<=n; i++) {
|
|
ptr = array[i];
|
|
if (*ptr == setcnt) {
|
|
for (j=0; j < setcnt; j++)
|
|
if (setvec[*(++ptr)] != 1) goto nxt;
|
|
*prev = i;
|
|
return(0);
|
|
}
|
|
nxt: ;
|
|
}
|
|
return(1);
|
|
}
|
|
|
|
int *add(n) { /* remember setvec */
|
|
int *ptr, *p;
|
|
register i;
|
|
if ((p = ptr = (int *) malloc((n+1)*sizeof(int))) == NULL)
|
|
overflo();
|
|
*ptr = n;
|
|
dprintf("add(%d)\n", n, NULL, NULL);
|
|
for (i=1; i <= line; i++)
|
|
if (setvec[i] == 1) {
|
|
*(++ptr) = i;
|
|
dprintf(" ptr = %o, *ptr = %d, i = %d\n", ptr, *ptr, i);
|
|
}
|
|
dprintf("\n", NULL, NULL, NULL);
|
|
return(p);
|
|
}
|
|
|
|
struct fa *cgotofn()
|
|
{
|
|
register i, k;
|
|
register int *ptr;
|
|
char c;
|
|
char *p;
|
|
NODE *cp;
|
|
int j, n, s, ind, numtrans;
|
|
int finflg;
|
|
int curpos, num, prev;
|
|
struct fa *where[NSTATES];
|
|
|
|
int fatab[257];
|
|
struct fa *pfa;
|
|
|
|
char index[MAXLIN];
|
|
char iposns[MAXLIN];
|
|
int sposns[MAXLIN];
|
|
int spmax, spinit;
|
|
|
|
char symbol[NCHARS];
|
|
char isyms[NCHARS];
|
|
char ssyms[NCHARS];
|
|
int ssmax, ssinit;
|
|
|
|
for (i=0; i<=line; i++) index[i] = iposns[i] = setvec[i] = 0;
|
|
for (i=0; i<NCHARS; i++) isyms[i] = symbol[i] = 0;
|
|
setcnt = 0;
|
|
/* compute initial positions and symbols of state 0 */
|
|
ssmax = 0;
|
|
ptr = state[0] = foll[0];
|
|
spinit = *ptr;
|
|
for (i=0; i<spinit; i++) {
|
|
curpos = *(++ptr);
|
|
sposns[i] = curpos;
|
|
iposns[curpos] = 1;
|
|
cp = point[curpos];
|
|
dprintf("i = %d, spinit = %d, curpos = %d\n", i, spinit, curpos);
|
|
switch (type(cp)) {
|
|
case CHAR:
|
|
k = (int) right(cp);
|
|
if (isyms[k] != 1) {
|
|
isyms[k] = 1;
|
|
ssyms[ssmax++] = k;
|
|
}
|
|
break;
|
|
case DOT:
|
|
for (k=1; k<NCHARS; k++) {
|
|
if (k != HAT) {
|
|
if (isyms[k] != 1) {
|
|
isyms[k] = 1;
|
|
ssyms[ssmax++] = k;
|
|
}
|
|
}
|
|
}
|
|
break;
|
|
case CCL:
|
|
for (p = (char *) right(cp); *p; p++) {
|
|
if (*p != HAT) {
|
|
if (isyms[*p] != 1) {
|
|
isyms[*p] = 1;
|
|
ssyms[ssmax++] = *p;
|
|
}
|
|
}
|
|
}
|
|
break;
|
|
case NCCL:
|
|
for (k=1; k<NCHARS; k++) {
|
|
if (k != HAT && !member(k, (char *) right(cp))) {
|
|
if (isyms[k] != 1) {
|
|
isyms[k] = 1;
|
|
ssyms[ssmax++] = k;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
ssinit = ssmax;
|
|
n = 0;
|
|
for (s=0; s<=n; s++) {
|
|
dprintf("s = %d\n", s, NULL, NULL);
|
|
ind = 0;
|
|
numtrans = 0;
|
|
finflg = 0;
|
|
if (*(state[s] + *state[s]) == line) { /* s final? */
|
|
finflg = 1;
|
|
goto tenter;
|
|
}
|
|
spmax = spinit;
|
|
ssmax = ssinit;
|
|
ptr = state[s];
|
|
num = *ptr;
|
|
for (i=0; i<num; i++) {
|
|
curpos = *(++ptr);
|
|
if (iposns[curpos] != 1 && index[curpos] != 1) {
|
|
index[curpos] = 1;
|
|
sposns[spmax++] = curpos;
|
|
}
|
|
cp = point[curpos];
|
|
switch (type(cp)) {
|
|
case CHAR:
|
|
k = (int) right(cp);
|
|
if (isyms[k] == 0 && symbol[k] == 0) {
|
|
symbol[k] = 1;
|
|
ssyms[ssmax++] = k;
|
|
}
|
|
break;
|
|
case DOT:
|
|
for (k=1; k<NCHARS; k++) {
|
|
if (k != HAT) {
|
|
if (isyms[k] == 0 && symbol[k] == 0) {
|
|
symbol[k] = 1;
|
|
ssyms[ssmax++] = k;
|
|
}
|
|
}
|
|
}
|
|
break;
|
|
case CCL:
|
|
for (p = (char *) right(cp); *p; p++) {
|
|
if (*p != HAT) {
|
|
if (isyms[*p] == 0 && symbol[*p] == 0) {
|
|
symbol[*p] = 1;
|
|
ssyms[ssmax++] = *p;
|
|
}
|
|
}
|
|
}
|
|
break;
|
|
case NCCL:
|
|
for (k=1; k<NCHARS; k++) {
|
|
if (k != HAT && !member(k, (char *) right(cp))) {
|
|
if (isyms[k] == 0 && symbol[k] == 0) {
|
|
symbol[k] = 1;
|
|
ssyms[ssmax++] = k;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
for (j=0; j<ssmax; j++) { /* nextstate(s, ssyms[j]) */
|
|
c = ssyms[j];
|
|
symbol[c] = 0;
|
|
setcnt = 0;
|
|
for (k=0; k<=line; k++) setvec[k] = 0;
|
|
for (i=0; i<spmax; i++) {
|
|
index[sposns[i]] = 0;
|
|
cp = point[sposns[i]];
|
|
if ((k = type(cp)) != FINAL)
|
|
if (k == CHAR && c == (int) right(cp)
|
|
|| k == DOT
|
|
|| k == CCL && member(c, (char *) right(cp))
|
|
|| k == NCCL && !member(c, (char *) right(cp))) {
|
|
ptr = foll[sposns[i]];
|
|
num = *ptr;
|
|
for (k=0; k<num; k++) {
|
|
if (setvec[*(++ptr)] != 1
|
|
&& iposns[*ptr] != 1) {
|
|
setvec[*ptr] = 1;
|
|
setcnt++;
|
|
}
|
|
}
|
|
}
|
|
} /* end nextstate */
|
|
if (notin(state, n, &prev)) {
|
|
if (n >= NSTATES - 1) {
|
|
dprintf("cgotofn: notin; state = %d, n = %d\n", state, n, NULL);
|
|
overflo();
|
|
}
|
|
state[++n] = add(setcnt);
|
|
dprintf(" delta(%d,%o) = %d", s,c,n);
|
|
dprintf(", ind = %d\n", ind+1, NULL, NULL);
|
|
fatab[++ind] = c;
|
|
fatab[++ind] = n;
|
|
numtrans++;
|
|
}
|
|
else {
|
|
if (prev != 0) {
|
|
dprintf(" delta(%d,%o) = %d", s,c,prev);
|
|
dprintf(", ind = %d\n", ind+1, NULL, NULL);
|
|
fatab[++ind] = c;
|
|
fatab[++ind] = prev;
|
|
numtrans++;
|
|
}
|
|
}
|
|
}
|
|
tenter:
|
|
if ((pfa = (struct fa *) malloc((numtrans + 1) * sizeof(struct fa))) == NULL)
|
|
overflo();
|
|
where[s] = pfa;
|
|
if (finflg)
|
|
pfa->cch = -1; /* s is a final state */
|
|
else
|
|
pfa->cch = numtrans;
|
|
pfa->st = 0;
|
|
for (i=1, pfa += 1; i<=numtrans; i++, pfa++) {
|
|
pfa->cch = fatab[2*i-1];
|
|
pfa->st = (struct fa *) fatab[2*i];
|
|
}
|
|
}
|
|
for (i=0; i<=n; i++) {
|
|
xfree(state[i]); /* free state[i] */
|
|
pfa = where[i];
|
|
pfa->st = where[0];
|
|
dprintf("state %d: (%o)\n", i, pfa, NULL);
|
|
dprintf(" numtrans = %d, default = %o\n", pfa->cch, pfa->st, NULL);
|
|
for (k=1; k<=pfa->cch; k++) {
|
|
(pfa+k)->st = where[ (int) (pfa+k)->st];
|
|
dprintf(" char = %o, nextstate = %o\n",(pfa+k)->cch, (pfa+k)->st, NULL);
|
|
}
|
|
}
|
|
pfa = where[0];
|
|
if ((num = pfa->cch) < 0)
|
|
return(where[0]);
|
|
for (pfa += num; num; num--, pfa--)
|
|
if (pfa->cch == HAT) {
|
|
return(pfa->st);
|
|
}
|
|
return(where[0]);
|
|
}
|
|
|
|
match(pfa, p)
|
|
register struct fa *pfa;
|
|
register char *p;
|
|
{
|
|
register count;
|
|
char c;
|
|
if (p == 0) return(0);
|
|
if (pfa->cch == 1) { /* fast test for first character, if possible */
|
|
c = (++pfa)->cch;
|
|
do
|
|
if (c == *p) {
|
|
p++;
|
|
pfa = pfa->st;
|
|
goto adv;
|
|
}
|
|
while (*p++ != 0);
|
|
return(0);
|
|
}
|
|
adv: if ((count = pfa->cch) < 0) return(1);
|
|
do {
|
|
for (pfa += count; count; count--, pfa--)
|
|
if (pfa->cch == *p) {
|
|
break;
|
|
}
|
|
pfa = pfa->st;
|
|
if ((count = pfa->cch) < 0) return(1);
|
|
} while(*p++ != 0);
|
|
return(0);
|
|
}
|