471 lines
8.3 KiB
C
471 lines
8.3 KiB
C
/*% cc -c -O %
|
|
*/
|
|
#include "vars.h"
|
|
#define ESIZE 128 /* ESIZE-1 must fit in a signed byte */
|
|
char expbuf[ESIZE+4];
|
|
int expgood /*0*/; /* flag indicating if compiled exp is good */
|
|
#define CCHR 2
|
|
#define CDOT 4
|
|
#define CCL 6
|
|
#define NCCL 8
|
|
#define CFUNNY 10
|
|
#define CALT 12
|
|
#define CBACK 14
|
|
|
|
#define STAR 01
|
|
#define STARABLE CBACK
|
|
|
|
#define CKET 16
|
|
#define CDOL 17
|
|
#define CEOF 18
|
|
#define CBRA 19
|
|
#define CBOI 20
|
|
#define CEOI 21
|
|
#define CSPACE 22
|
|
int circfl;
|
|
char pmagic[] = "/.$^*+\\()<|>{}[!_123456789";
|
|
void
|
|
compile(eof)
|
|
char eof;
|
|
{
|
|
register c;
|
|
register char *ep, *penultep;
|
|
char *lastep, *bracketp, bracket[NBRA];
|
|
int getsvc();
|
|
int getchar();
|
|
struct{
|
|
char *althd; /* start of code for < ... > */
|
|
char *altlast; /* start of code for last < or | */
|
|
char *bpstart; /* bracketp at start of < and | */
|
|
char *bpend; /* bracketp at end of > or | */
|
|
int nbstart; /* nbra at start of < and | */
|
|
int nbend; /* nbra at end of > or | */
|
|
int firstalt; /* is this the first alternative? */
|
|
} *asp, altstk[NBRA];
|
|
|
|
if(eof == '\n')
|
|
error('x');
|
|
pmagic[0] = eof;
|
|
if ((c=nextchar()) == eof || c=='\n') {
|
|
if (!expgood)
|
|
goto cerror;
|
|
if(c!='\n')
|
|
getchar(); /* eat the eof character */
|
|
return;
|
|
}
|
|
expgood = FALSE;
|
|
ep = expbuf;
|
|
lastep = 0;
|
|
bracketp = bracket;
|
|
nbra = 0;
|
|
asp = &altstk[-1];
|
|
startstring(); /* for the saved pattern register */
|
|
circfl = 0;
|
|
if (c=='^') {
|
|
getsvc(); /* save the caret */
|
|
circfl++;
|
|
}
|
|
for (;;) {
|
|
c = getquote(pmagic, getsvc);
|
|
if (c==eof || c=='\n') {
|
|
if (bracketp!=bracket || asp>=altstk)
|
|
goto cerror;
|
|
*ep++ = CEOF;
|
|
expgood = TRUE;
|
|
dropstring(); /* lose the eof character */
|
|
setstring(SAVPAT);
|
|
if(c=='\n')
|
|
ungetchar(c);
|
|
return;
|
|
}
|
|
if (ep >= &expbuf[ESIZE-5])
|
|
goto cerror;
|
|
penultep = lastep;
|
|
lastep = ep;
|
|
|
|
if(c != (eof|0200)) switch (c) {
|
|
case '('|0200:
|
|
if (nbra >= NBRA)
|
|
goto cerror;
|
|
*bracketp++ = nbra;
|
|
*ep++ = CBRA;
|
|
*ep++ = nbra++;
|
|
continue;
|
|
case ')'|0200:
|
|
if (bracketp <= bracket)
|
|
goto cerror;
|
|
*ep++ = CKET;
|
|
*ep++ = *--bracketp;
|
|
continue;
|
|
case '{'|0200:
|
|
*ep++ = CBOI;
|
|
continue;
|
|
case '}'|0200:
|
|
*ep++ = CEOI;
|
|
continue;
|
|
case '_'|0200:
|
|
*ep++ = CSPACE;
|
|
continue;
|
|
case '!'|0200:
|
|
*ep++ = CFUNNY;
|
|
continue;
|
|
case '<':
|
|
if (++asp >= &altstk[NBRA])
|
|
goto cerror;
|
|
*ep++ = CALT;
|
|
asp->althd = ep;
|
|
ep++;
|
|
asp->bpstart = bracketp;
|
|
asp->nbstart = nbra;
|
|
asp->firstalt = TRUE;
|
|
asp->altlast = ep++;
|
|
lastep = 0;
|
|
continue;
|
|
case '|':
|
|
if (asp<altstk)
|
|
break;
|
|
if (asp->firstalt) {
|
|
asp->bpend = bracketp;
|
|
asp->nbend = nbra;
|
|
}
|
|
if (bracketp!=asp->bpend || nbra!=asp->nbend)
|
|
goto cerror;
|
|
*ep++ = CEOF;
|
|
asp->altlast[0] = ep-asp->altlast;
|
|
asp->firstalt = FALSE;
|
|
bracketp = asp->bpstart;
|
|
nbra = asp->nbstart;
|
|
asp->altlast = ep++;
|
|
lastep = 0;
|
|
continue;
|
|
case '>':
|
|
if (asp<altstk)
|
|
break;
|
|
if (!asp->firstalt &&
|
|
(bracketp!=asp->bpend || nbra!=asp->nbend))
|
|
goto cerror;
|
|
*ep++ = CEOF;
|
|
asp->altlast[0] = ep-asp->altlast;
|
|
lastep = asp->althd;
|
|
*lastep = ep-lastep;
|
|
lastep--;
|
|
if (bracketp!=asp->bpstart || nbra!=asp->nbstart)
|
|
lastep = 0;
|
|
asp--;
|
|
continue;
|
|
case '*':
|
|
case '+':
|
|
if (penultep==0){
|
|
*ep++ = CCHR;
|
|
*ep++ = c;
|
|
} else {
|
|
if(*penultep>STARABLE)
|
|
goto cerror;
|
|
if(c == '+'){
|
|
if((ep-penultep)+ep >= &expbuf[ESIZE-1])
|
|
goto cerror;
|
|
do
|
|
*ep++ = *penultep++;
|
|
while (penultep!=lastep);
|
|
}
|
|
*penultep |= STAR;
|
|
lastep = 0;
|
|
}
|
|
continue;
|
|
case '.':
|
|
*ep++ = CDOT;
|
|
continue;
|
|
|
|
case '[':
|
|
penultep = ep;
|
|
*ep++ = CCL;
|
|
*ep++ = 0;
|
|
if ((c=getsvc()) == '^') {
|
|
c = getsvc();
|
|
ep[-2] = NCCL;
|
|
}
|
|
do {
|
|
if (c == EOF || c == '\n')
|
|
goto cerror;
|
|
*ep++ = c;
|
|
if ((lastc=getsvc()) == '-') {
|
|
c=getsvc();
|
|
if (c == EOF || c == '\n' || c<=ep[-1])
|
|
goto cerror;
|
|
ep[-1] |= 0200;
|
|
*ep++ = c;
|
|
lastc = getsvc(); /* prime lastc */
|
|
} else if (dflag&&'a'<=(c|' ')&&(c|' ')<='z')
|
|
*ep++ = c^' ';
|
|
if (ep >= &expbuf[ESIZE-1])
|
|
goto cerror;
|
|
} while ((c=lastc) != ']');
|
|
penultep[1] = ep-penultep-1;
|
|
continue;
|
|
|
|
|
|
case '$':
|
|
if (nextchar() == eof || peekc=='\n') {
|
|
*ep++ = CDOL;
|
|
continue;
|
|
}
|
|
/* fall through */
|
|
default:
|
|
break;
|
|
}
|
|
/* if fell through switch, match literal character */
|
|
/* Goddamned sign extension! */
|
|
if ((c&0200) && (c&0177)>='1' && (c&0177)<='9') {
|
|
*ep++ = CBACK;
|
|
*ep++ = c-('1'|0200);
|
|
continue;
|
|
}
|
|
c &= ~0200;
|
|
if(dflag && c|' '>='a' && c|' '<='z'){
|
|
*ep++ = CCL;
|
|
*ep++ = 3;
|
|
*ep++ = c;
|
|
*ep++ = c^' ';
|
|
}
|
|
else{
|
|
*ep++ = CCHR;
|
|
*ep++ = c;
|
|
}
|
|
}
|
|
cerror:
|
|
error('p');
|
|
}
|
|
getsvc(){
|
|
register c;
|
|
addstring(c=getchar());
|
|
return(c);
|
|
}
|
|
int
|
|
execute(addr)
|
|
int *addr;
|
|
{
|
|
register char *p1, *p2;
|
|
|
|
if (addr==0) {
|
|
if((p1=loc2) == 0) /* G command */
|
|
p1 = linebuf;
|
|
else if (circfl) /* not first search in substitute */
|
|
return(FALSE);
|
|
} else {
|
|
if (addr==zero)
|
|
return(FALSE);
|
|
p1 = getline(*addr, linebuf);
|
|
}
|
|
p2 = expbuf;
|
|
if (circfl) {
|
|
loc1 = p1;
|
|
return(advance(p1, p2));
|
|
}
|
|
do {
|
|
if (*p2 != CCHR || p2[1] == *p1) {
|
|
if (advance(p1, p2)) {
|
|
loc1 = p1;
|
|
return(TRUE);
|
|
}
|
|
}
|
|
} while (*p1++);
|
|
return(FALSE);
|
|
}
|
|
|
|
int
|
|
advance(lp, ep)
|
|
register char *lp, *ep;
|
|
{
|
|
register char *curlp;
|
|
char *althd, *altend;
|
|
|
|
for (;;) {
|
|
curlp = lp;
|
|
switch (*ep++) {
|
|
|
|
case CCHR:
|
|
if (*ep++ == *lp++)
|
|
continue;
|
|
return(FALSE);
|
|
|
|
case CCHR|STAR:
|
|
do ; while (*lp++ == *ep);
|
|
ep++;
|
|
break;
|
|
|
|
case CDOT:
|
|
if (*lp++)
|
|
continue;
|
|
return(FALSE);
|
|
|
|
case CDOT|STAR:
|
|
do ; while (*lp++);
|
|
break;
|
|
|
|
case CCL:
|
|
case NCCL:
|
|
if (cclass(ep, *lp++, ep[-1]==CCL)) {
|
|
ep += *ep;
|
|
continue;
|
|
}
|
|
return(FALSE);
|
|
|
|
case CCL|STAR:
|
|
case NCCL|STAR:
|
|
do ; while (cclass(ep, *lp++, ep[-1]==(CCL|STAR)));
|
|
ep += *ep;
|
|
break;
|
|
|
|
case CFUNNY:
|
|
if (*lp>=' ' && *lp!='\177' || *lp=='\t' || *lp=='\0')
|
|
return(FALSE);
|
|
lp++;
|
|
continue;
|
|
|
|
case CFUNNY|STAR:
|
|
while (*lp<' ' && *lp && *lp!='\t' || *lp=='\177')
|
|
lp++;
|
|
lp++;
|
|
break;
|
|
|
|
case CBACK:
|
|
if (braelist[*ep]==0)
|
|
error('p');
|
|
if (backref(*ep++, lp)) {
|
|
lp += braelist[ep[-1]] - braslist[ep[-1]];
|
|
continue;
|
|
}
|
|
return(FALSE);
|
|
|
|
case CBACK|STAR:
|
|
if (braelist[*ep] == 0)
|
|
error('p');
|
|
curlp = lp;
|
|
while (backref(*ep, lp))
|
|
lp += braelist[*ep] - braslist[*ep];
|
|
while (lp >= curlp) {
|
|
if (advance(lp, ep+1))
|
|
return(TRUE);
|
|
lp -= braelist[*ep] - braslist[*ep];
|
|
}
|
|
ep++;
|
|
continue;
|
|
|
|
case CBRA:
|
|
braslist[*ep++] = lp;
|
|
continue;
|
|
|
|
case CKET:
|
|
braelist[*ep++] = lp;
|
|
continue;
|
|
|
|
case CDOL:
|
|
if (*lp==0)
|
|
continue;
|
|
return(FALSE);
|
|
|
|
case CEOF:
|
|
loc2 = lp;
|
|
return(TRUE);
|
|
|
|
case CBOI:
|
|
if (alfmatch(*lp,0)
|
|
&& (lp==linebuf || !alfmatch(lp[-1],1)))
|
|
continue;
|
|
return(FALSE);
|
|
|
|
case CEOI:
|
|
if (!alfmatch(*lp,1)
|
|
&& lp!=linebuf && alfmatch(lp[-1],1))
|
|
continue;
|
|
return(FALSE);
|
|
|
|
case CSPACE:
|
|
if (*lp==' ' || *lp=='\t') {
|
|
while (*lp == ' ' || *lp=='\t')
|
|
lp++;
|
|
continue;
|
|
}
|
|
return(FALSE);
|
|
|
|
case CALT:
|
|
althd = ep-1;
|
|
altend = ep + *ep;
|
|
for(ep++; ; ep+= *ep) {
|
|
if(ep == altend)
|
|
return(FALSE);
|
|
if(advance(lp,ep+1) && advance(loc2,altend))
|
|
return(TRUE);
|
|
}
|
|
|
|
case CALT|STAR:
|
|
althd = ep-1;
|
|
altend = ep + *ep;
|
|
for(ep++; ep!=altend; ep+= *ep){
|
|
if(advance(lp, ep+1)){
|
|
if(loc2 == lp)
|
|
break;
|
|
if(advance(loc2, althd))
|
|
return(TRUE);
|
|
}
|
|
}
|
|
/* return (advance(lp,altend)) */
|
|
continue;
|
|
|
|
default:
|
|
error('!');
|
|
}
|
|
/* star logic: executed by falling out of switch */
|
|
do {
|
|
lp--;
|
|
if (advance(lp, ep))
|
|
return(TRUE);
|
|
} while (lp > curlp);
|
|
return(FALSE);
|
|
}
|
|
}
|
|
|
|
backref(i, lp)
|
|
register i;
|
|
register char *lp;
|
|
{
|
|
register char *bp;
|
|
|
|
bp = braslist[i];
|
|
while (*bp++ == *lp++)
|
|
if (bp >= braelist[i])
|
|
return(TRUE);
|
|
return(FALSE);
|
|
}
|
|
int alfmatch(c,tail)
|
|
register char c;
|
|
{
|
|
return (('a' <= c && c <= 'z') ||
|
|
('A' <= c && c <= 'Z') ||
|
|
(c == '_') ||
|
|
(tail && '0' <= c && c<= '9'));
|
|
}
|
|
|
|
|
|
cclass(set, c, f)
|
|
register char *set;
|
|
register c;
|
|
{
|
|
register n;
|
|
if (c == 0)
|
|
return(0);
|
|
n = *set++;
|
|
while (--n) {
|
|
if (*set&0200) {
|
|
if ((*set++ & 0177) <= c) {
|
|
if (c <= *set++)
|
|
return(f);
|
|
} else
|
|
set++;
|
|
--n;
|
|
} else if (*set++ == c)
|
|
return(f);
|
|
}
|
|
return(!f);
|
|
}
|