1425 lines
51 KiB
C
1425 lines
51 KiB
C
/* @(#)53 1.28 src/bos/usr/include/NLregexp.h, libcnls, bos411, 9428A410j 5/9/94 10:16:51 */
|
|
#ifndef _H_NLREGEXP
|
|
#define _H_NLREGEXP
|
|
/*
|
|
* COMPONENT_NAME: libcnls
|
|
*
|
|
* FUNCTIONS: __ecmp, __getintvl, __isthere, advance,
|
|
* compile, step
|
|
*
|
|
* ORIGINS: 3,27
|
|
*
|
|
* (C) COPYRIGHT International Business Machines Corp. 1989, 1994
|
|
* All Rights Reserved
|
|
* Licensed Materials - Property of IBM
|
|
*
|
|
* US Government Users Restricted Rights - Use, duplication or
|
|
* disclosure restricted by GSA ADP Schedule Contract with IBM Corp.
|
|
*
|
|
* Copyright (c) 1984 AT&T
|
|
* All Rights Reserved
|
|
*
|
|
* THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF AT&T
|
|
* The copyright notice above does not evidence any
|
|
* actual or intended publication of such source code.
|
|
*/
|
|
|
|
/*
|
|
* For C++ compilers.
|
|
*/
|
|
|
|
#ifdef __cplusplus
|
|
extern "C" {
|
|
#endif
|
|
|
|
#include <NLchar.h>
|
|
#include <values.h>
|
|
|
|
|
|
#ifndef RE_DUP_MAX /* sys/limits.h */
|
|
#define RE_DUP_MAX 255
|
|
#endif
|
|
|
|
/* This is the new (or large charset) version of regexp.h. */
|
|
/* The main differences are that the [bracket] range expression bitmap */
|
|
/* is replaced by a straight list for multibyte languages, and that a */
|
|
/* [:charclass:] definition is allowed within brackets. Ranges are */
|
|
/* handled as a "substring" of entries, with an "or" rather than */
|
|
/* "and" relationship. Note also that compares within brackets */
|
|
/* is done on character values, except for dashranges, where collation */
|
|
/* values are used. */
|
|
/* The normal method for defining character classes ([a-z]) does not */
|
|
/* work well in an international environment; the new charclass element */
|
|
/* (with syntax "[:" name ":]", e.g. [:upper:]) provides the needed */
|
|
/* capability. */
|
|
/* */
|
|
/* Hex compile codes: */
|
|
/****************************************************/
|
|
/* function normal normal normal */
|
|
/* + STAR +INTVL */
|
|
/* CCHR normal char 04 05 06 */
|
|
/* CDOT dot in this pos 08 09 0a */
|
|
/* CENT end group \) 0c 0d 0e */
|
|
/* CBACK \[1-9] indicator 10 11 12 */
|
|
/* CDOL EOL anchor ($) 14 */
|
|
/* CCEOF end compiled pat 16 */
|
|
/* CBRA new [ string 18 19 1a */
|
|
/* CNEG new [^ string 1c 1d 1e */
|
|
/* CLASS [:cclass:] 20 */
|
|
/* CEQV [=x=] (not [=.=]) 22 */
|
|
/* CELEM [.xx.] 24 */
|
|
/* CRNGE new range (a-z) 26 */
|
|
/* CKET new ] 28 */
|
|
/* CPAR start group \( 2c 2d 2e */
|
|
/* CBIT [ bitmap ] 30 31 32 */
|
|
/****************************************************/
|
|
/* A "typical" regular expression, e.g. */
|
|
/* 'ab*[a[.LL.]c-f[:digit:]].*\(x\)\{1,2\}' (LANG set to Sp_SP) */
|
|
/* would be compiled into (hex values): */
|
|
/* */
|
|
/* 04 61 05 62 18 00 10 04 00 61 24 01 7d 26 01 50 01 */
|
|
/* 65 20 03 28 09 2e 00 00 06 04 78 0e 00 01 02 16 */
|
|
/* */
|
|
/* which is: */
|
|
/* */
|
|
/* a b* [ a [.LL.] c - */
|
|
/* 04 61 05 62 18 00 10 04 00 61 24 01 7d 26 01 50 */
|
|
/* CCHR a CCHR b CBRA length CCHR a CELEM LL CRNGE c */
|
|
/* STAR in bytes */
|
|
/* */
|
|
/* f [:digit:] .* \( x */
|
|
/* 01 65 20 03 28 09 2e 00 00 06 04 78 */
|
|
/* f CLASS digit CKET CDOT CPAR group length CCHR x */
|
|
/* STAR INTVL zero in bytes */
|
|
/* */
|
|
/* \)\{ */
|
|
/* 0e 01 02 16 */
|
|
/* CENT lower upper CCEOF */
|
|
/* INTVL bound bound */
|
|
/* */
|
|
/* Note that character values are one or two bytes outside brackets, */
|
|
/* two bytes inside brackets. */
|
|
/* Also, a subexpression followed by a star or interval (e.g. \(ab\)* or */
|
|
/* \(ab\)\{1,2\}) will have the STAR or INTVL flag set on both the CPAR */
|
|
/* and CENT elements. */
|
|
/* */
|
|
/* The error numbers generated have the following meaning: */
|
|
/* Note that 70 is new!!!! */
|
|
/* ERROR(11) Interval endpoint too large */
|
|
/* ERROR(16) Bad number */
|
|
/* ERROR(25) "\digit" out of range */
|
|
/* ERROR(36) Illegal or missing delimiter */
|
|
/* ERROR(41) No remembered match string */
|
|
/* ERROR(42) \( \) imbalance */
|
|
/* ERROR(43) Too many \( */
|
|
/* ERROR(44) More than 2 numbers given in interval */
|
|
/* ERROR(45) } expected after \ */
|
|
/* ERROR(46) First number exceeds second in interval */
|
|
/* ERROR(49) [] imbalance */
|
|
/* ERROR(50) Regular expression overflow */
|
|
/* ERROR(70) Invalid endpoint in range */
|
|
/* ERROR(80) Star and interval on same expression */
|
|
/* */
|
|
|
|
|
|
#define _CCHR 0x04 /* normal char follows */
|
|
#define _CDOT 0x08 /* dot: any char... */
|
|
#define _CENT 0x0c /* end group - \) - here */
|
|
#define _CBACK 0x10 /* \number; n follows */
|
|
#define _CDOL 0x14 /* end-of-line anchor ($) */
|
|
#define _CCEOF 0x16 /* end-of-line seen */
|
|
#ifdef _KJI
|
|
#define _CBRA 0x18 /* start new []; count & items follow */
|
|
#define _CNEG 0x1c /* start [^; count & items follow */
|
|
#endif
|
|
#define _CLASS 0x20 /* charclass follows */
|
|
#define _CEQV 0x22 /* equiv class value follows */
|
|
#define _CELEM 0x24 /* collation element follows */
|
|
#define _CRNGE 0x26 /* range start and end chars follow */
|
|
#ifdef _KJI
|
|
#define _CKET 0x28 /* end new brackets */
|
|
#endif
|
|
#define _CPAR 0x2c /* start group - \( - next is group # */
|
|
#ifndef _KJI
|
|
#define _CBIT 0x30 /* [bitmap] using unique collating value */
|
|
#endif
|
|
|
|
#define _STAR 0x01 /* asterisk, i.e., 0 or more */
|
|
#define _INTVL 0x02 /* range \{m,n\} follows */
|
|
#define _NEG 0x04 /* bracket expr negation */
|
|
|
|
#define _NBRA 9 /* count of groups \(..\) */
|
|
|
|
#define _PLACE(c) *ep++ = (c >> 8), *ep++ = c
|
|
#define _GETWC(sp) ((sp[0] << 8) | sp[1])
|
|
|
|
/* The following macro will return a wchar_t
|
|
and place the char * pointer pas the
|
|
converted single- or multibyte character
|
|
It is used by _GETVAL.
|
|
*/
|
|
#ifdef _KJI
|
|
#define _CHNEXT(s) (NCisshift(s[0]) ? s+=2, _NCd2(s[-2], s[-1]) : *s++)
|
|
#else
|
|
#define _CHNEXT(s) (*s++)
|
|
#endif
|
|
|
|
/* The following macro is called with a
|
|
char * pointer and returns a collating
|
|
value and a coluniq value. The char *
|
|
is bumped to past the element (e.g.,
|
|
past the "ch"). If 1-to-n, return -1
|
|
and the coluniq value for the "repla-
|
|
ced" character.
|
|
*/
|
|
#define _GETVAL(co,cu,s,p,ch) \
|
|
if ( ((co = ((cu = NCcoluniq(ch = _CHNEXT(s))), NCcollate(ch))) < 0) \
|
|
&& ((co = _NLxcolu(co, (unsigned char**)&s, &p, &cu)) == -1) )
|
|
#ifndef _KJI
|
|
#define MIN_UNIQ_COLVAL 257
|
|
#define _SETBIT(c) \
|
|
{ \
|
|
__delta = NCcoluniq((wchar_t)c) - MIN_UNIQ_COLVAL; \
|
|
*(ep+((__delta >> 3) & 0x1fff)) |= __bits[__delta & 7]; \
|
|
}
|
|
#define _SETBITU(c) \
|
|
{ \
|
|
__delta = c - MIN_UNIQ_COLVAL; \
|
|
*(ep+((__delta >> 3) & 0x1fff)) |= __bits[__delta & 7]; \
|
|
}
|
|
#define _BITSET \
|
|
next_character = lp; \
|
|
_GETVAL(cvalue,uvalue,next_character,pwc,wc); \
|
|
__delta = uvalue - MIN_UNIQ_COLVAL; \
|
|
if ((*(ep+((__delta >> 3) & 0x1fff)) & __bits[__delta & 7]) == 0)
|
|
static int __delta; /* used by _SETBIT and _BITSET */
|
|
static char __bits[8] = {1,2,4,8,16,32,64,128};
|
|
#endif
|
|
|
|
/* Following variable names required by spec. */
|
|
char *loc1, *loc2;
|
|
int circf;
|
|
int sed, nbra;
|
|
|
|
/* Following variable names are undocumented, but required by sed. */
|
|
int nodelim;
|
|
char *locs;
|
|
char *braslist[_NBRA];
|
|
char *braelist[_NBRA];
|
|
|
|
#include <NLctype.h>
|
|
|
|
#define __CHECK_FOR_NULL(character,eof,errornum) \
|
|
{ if (!(character) && (character) != (eof) ) \
|
|
ERROR((errornum)); }
|
|
|
|
|
|
/* As the "is" functions aren't functions, but macros, we cannot put */
|
|
/* the "function" in the array below; thus another layer of indirection */
|
|
/* Wrap _NO_PROTO around these - p46680 */
|
|
#ifdef _NO_PROTO
|
|
_ALPHA(c) {return(NCisalpha(c));}
|
|
_UPPER(c) {return(NCisupper(c));}
|
|
_LOWER(c) {return(NCislower(c));}
|
|
_DIGIT(c) {return(NCisdigit(c));}
|
|
_ALNUM(c) {return(NCisalnum(c));}
|
|
_SPACE(c) {return(NCisspace(c));}
|
|
_PRINT(c) {return(NCisprint(c));}
|
|
_PUNCT(c) {return(NCispunct(c));}
|
|
_XDIGIT(c) {return(isascii(c) && isxdigit(c));}
|
|
_CNTRL(c) {return(NCiscntrl(c));}
|
|
_GRAPH(c) {return(NCisgraph(c));}
|
|
#ifdef _KJI
|
|
_JALPHA(c) {return(isjalpha(c));}
|
|
_JDIGIT(c) {return(isjdigit(c));}
|
|
_JSPACE(c) {return(isjspace(c));}
|
|
_JPUNCT(c) {return(isjpunct(c));}
|
|
_JPAREN(c) {return(isjparen(c));}
|
|
_JKANJI(c) {return(isjkanji(c));}
|
|
_JHIRA(c) {return(isjhira(c));}
|
|
_JKATA(c) {return(isjkata(c));}
|
|
_JXDIGIT(c) {return(isjxdigit(c));}
|
|
#endif
|
|
#else
|
|
extern int NCisalpha(int), NCisupper(int), NCislower(int), NCisdigit(int),
|
|
NCisalnum(int), NCisspace(int), NCisprint(int), NCispunct(int),
|
|
NCiscntrl(int), NCisgraph(int);
|
|
extern wchar_t NCcoluniq (wchar_t);
|
|
extern int NCcollate(wchar_t);
|
|
extern short _NLxcolu(short, unsigned char **, wchar_t**, wchar_t*);
|
|
|
|
_ALPHA(int c) {return(NCisalpha(c));}
|
|
_UPPER(int c) {return(NCisupper(c));}
|
|
_LOWER(int c) {return(NCislower(c));}
|
|
_DIGIT(int c) {return(NCisdigit(c));}
|
|
_ALNUM(int c) {return(NCisalnum(c));}
|
|
_SPACE(int c) {return(NCisspace(c));}
|
|
_PRINT(int c) {return(NCisprint(c));}
|
|
_PUNCT(int c) {return(NCispunct(c));}
|
|
_XDIGIT(int c) {return(isascii(c) && isxdigit(c));}
|
|
_CNTRL(int c) {return(NCiscntrl(c));}
|
|
_GRAPH(int c) {return(NCisgraph(c));}
|
|
#ifdef _KJI
|
|
extern int isjalpha(int), isjdigit(int), isjspace(int), isjpunct(int),
|
|
isjparen(int), isjkanji(int), isjhira(int), isjkata(int),
|
|
isjxdigit(int);
|
|
_JALPHA(int c) {return(isjalpha(c));}
|
|
_JDIGIT(int c) {return(isjdigit(c));}
|
|
_JSPACE(int c) {return(isjspace(c));}
|
|
_JPUNCT(int c) {return(isjpunct(c));}
|
|
_JPAREN(int c) {return(isjparen(c));}
|
|
_JKANJI(int c) {return(isjkanji(c));}
|
|
_JHIRA(int c) {return(isjhira(c));}
|
|
_JKATA(int c) {return(isjkata(c));}
|
|
_JXDIGIT(int c) {return(isjxdigit(c));}
|
|
#endif
|
|
#endif
|
|
static struct __isarray {
|
|
char *isstr;
|
|
#ifdef _NO_PROTO
|
|
int (*isfunc)();
|
|
#else
|
|
int (*isfunc)(int);
|
|
#endif
|
|
} __istab[] = {
|
|
{ "alpha", _ALPHA },
|
|
{ "upper", _UPPER },
|
|
{ "lower", _LOWER },
|
|
{ "digit", _DIGIT },
|
|
{ "alnum", _ALNUM },
|
|
{ "space", _SPACE },
|
|
{ "print", _PRINT },
|
|
{ "punct", _PUNCT },
|
|
{ "xdigit", _XDIGIT },
|
|
{ "cntrl", _CNTRL },
|
|
{ "graph", _GRAPH }
|
|
|
|
#ifdef _KJI
|
|
,
|
|
{ "jalpha", _JALPHA },
|
|
{ "jdigit", _JDIGIT },
|
|
{ "jspace", _JSPACE },
|
|
{ "jpunct", _JPUNCT },
|
|
{ "jparen", _JPAREN },
|
|
{ "jkanji", _JKANJI },
|
|
{ "jhira", _JHIRA },
|
|
{ "jkata", _JKATA },
|
|
{ "jxdigit", _JXDIGIT }
|
|
#endif
|
|
|
|
#define _NISTAB (sizeof(__istab) / sizeof(struct __isarray))
|
|
};
|
|
#define _IFBUFLEN 16
|
|
static char __ifbuf[_IFBUFLEN];
|
|
|
|
static int __ebra;
|
|
static unsigned int __low;
|
|
static unsigned int __ssize;
|
|
#ifndef _KJI
|
|
#define _BRACKET_LEN 48
|
|
#endif
|
|
|
|
#ifdef _NO_PROTO
|
|
int advance();
|
|
static void __getintvl();
|
|
static int __isthere();
|
|
static int __ecmp();
|
|
#else
|
|
int advance(char *lp, register char *ep);
|
|
static void __getintvl(char *str);
|
|
static int __isthere(char *sp, char *bp, char **next_character);
|
|
static __ecmp(char *a, char *b, int count);
|
|
#endif
|
|
|
|
|
|
char *
|
|
compile(char *instring,
|
|
register char *ep,
|
|
const char *endbuf,
|
|
int eof)
|
|
{
|
|
INIT /* Dependent declarations and initializations */
|
|
register c;
|
|
wchar_t wchr;
|
|
wchar_t *p;
|
|
char *lastep = 0; /* addr of start of simple r-e, */
|
|
/* for or-ing _INTVL or _STAR flag */
|
|
int cclcnt;
|
|
char bracket[_NBRA], *bracketp;
|
|
char *subexpr_start_location[_NBRA];
|
|
char *ib;
|
|
int dashfl;
|
|
struct nextelt {
|
|
char Class;
|
|
char rangeable;
|
|
short cvalue;
|
|
wchar_t uvalue;
|
|
} next, prev;
|
|
|
|
int closed;
|
|
#ifndef _KJI
|
|
int neg; /* [bitmap] is negated */
|
|
#else
|
|
char *cnclptr; /* addr of _CBRA's count bytes */
|
|
#endif
|
|
int lc;
|
|
int i, cflg;
|
|
unsigned int subexpr_length;
|
|
|
|
c = GETC();
|
|
__CHECK_FOR_NULL(c,eof,36);
|
|
|
|
if(c == eof || c == '\n') {
|
|
if(c == '\n') {
|
|
/* This apparently superfluous logic
|
|
* is required by sed
|
|
*/
|
|
UNGETC(c);
|
|
nodelim = 1;
|
|
}
|
|
if(*ep == 0 && !sed) /* WRONG *ep uninitialized! */
|
|
ERROR(41);
|
|
RETURN(ep);
|
|
}
|
|
bracketp = bracket;
|
|
|
|
circf = closed = nbra = __ebra = 0;
|
|
if(c == '^')
|
|
circf++;
|
|
else
|
|
UNGETC(c);
|
|
while(1) {
|
|
/* Will we overflow ep with this element?
|
|
* Aside from bracket lists, one r.e. element
|
|
* can produce no more than 3 bytes of compiled text.
|
|
*/
|
|
if(ep >= endbuf-3) ERROR(50);
|
|
|
|
c = GETC();
|
|
__CHECK_FOR_NULL(c,eof,36);
|
|
|
|
if(c != '*' && ((c != '\\') || (PEEKC() != '{'))) /*}*/
|
|
lastep = ep;
|
|
if(c == eof) {
|
|
*ep++ = _CCEOF;
|
|
RETURN(ep);
|
|
}
|
|
switch(c) {
|
|
|
|
case '.':
|
|
*ep++ = _CDOT;
|
|
continue;
|
|
|
|
case '\n':
|
|
if(!sed) {
|
|
UNGETC(c);
|
|
*ep++ = _CCEOF;
|
|
nodelim = 1;
|
|
RETURN(ep);
|
|
}
|
|
else ERROR(36);
|
|
case '*':
|
|
/* Accept * as ordinary character if first in
|
|
* pattern or if following \(.
|
|
* Undocumented, possibly POSIX-conflicting:
|
|
* also accept * following \).
|
|
*/
|
|
if(lastep == 0 || *lastep == _CPAR ) /* BDN */
|
|
goto defchar;
|
|
|
|
if(*lastep == _CENT )
|
|
{
|
|
/* In a subexpression, set the STAR on CPAR '\(' as well */
|
|
/* as CENT '\)'. */
|
|
/* The pointer 'lastep' points to the _CENT and one past */
|
|
/* it is the the number of that subreference. */
|
|
|
|
if (*(subexpr_start_location[(int) *(lastep+1)]) & _INTVL)
|
|
/* Do not allow STAR and INTVL on the same code */
|
|
ERROR(80);
|
|
|
|
*(subexpr_start_location[(int) *(lastep+1)]) |= _STAR;
|
|
}
|
|
|
|
if (*lastep & _INTVL)
|
|
/* Do not allow STAR and INTVL on the same code */
|
|
ERROR(80);
|
|
|
|
*lastep |= _STAR;
|
|
|
|
|
|
continue;
|
|
|
|
case '$':
|
|
if(PEEKC() != eof && PEEKC() != '\n')
|
|
goto defchar;
|
|
*ep++ = _CDOL;
|
|
continue;
|
|
|
|
case '[':
|
|
/* Bracket expressions are converted to a bitmask for single
|
|
* byte languages. This allows advance() to determine whether
|
|
* a character matches by a simple bitmap test. Internationalized
|
|
* classes result in bits being set for all characters within
|
|
* that class. Multibyte languages use a list or range of
|
|
* characters which must be sequentially searched for a match.
|
|
*
|
|
* Support for Posix NL bracket extensions, including
|
|
* equivalence classes and collating symbols.
|
|
* Syntactic rules for dash ranges are simplified: '-' is
|
|
* ordinary after '[', before ']', and immediately following
|
|
* a dashrange '-'.
|
|
* Any element may appear syntactically as a dashrange endpoint,
|
|
* including those that turn out to be semantically illegal:
|
|
* noncollating char; [:class:]; start>end; or previous endpoint
|
|
* as starting point, e.g. a-m-z.
|
|
*/
|
|
#ifndef _KJI
|
|
if (ep >= endbuf - _BRACKET_LEN)
|
|
ERROR(50);
|
|
neg = 0;
|
|
*ep++ = _CBIT;
|
|
if ((c=PEEKC()) == '^') {
|
|
neg++;
|
|
GETC();
|
|
}
|
|
bzero((char *)ep, _BRACKET_LEN);
|
|
#else
|
|
if((c = PEEKC()) == '^') {
|
|
*ep++ = _CBRA|_NEG; /* Bracket-^ start */
|
|
GETC();
|
|
}
|
|
else
|
|
*ep++ = _CBRA; /* Bracket start, no ^ */
|
|
cnclptr = ep;
|
|
*ep++ = 0; /* Space for count, */
|
|
*ep++ = 0; /* filled in at ] */
|
|
#endif
|
|
prev.Class = 0;
|
|
next.Class = 0;
|
|
dashfl = 0;
|
|
if ((c = PEEKC()) == '-' || c == ']') {
|
|
prev.Class = _CCHR;
|
|
prev.rangeable = 1;
|
|
prev.uvalue = c;
|
|
GETC();
|
|
}
|
|
|
|
while (1) { /* Iterate over elements of bracket list */
|
|
if(ep >= endbuf-6)
|
|
ERROR(50);
|
|
|
|
/* Worst case: 6 bytes could be added to
|
|
* ep in the case of ... - ]
|
|
*/
|
|
|
|
c = GETC();
|
|
__CHECK_FOR_NULL(c,eof,49);
|
|
|
|
if (c == '\0') ERROR(49); /* Stop when NUL is found*/
|
|
if (c == ']') {
|
|
if (prev.Class != 0) {
|
|
UNGETC(c);
|
|
goto stuffp;
|
|
}
|
|
if (dashfl) { /* Trailing dash is ordinary character */
|
|
#ifndef _KJI
|
|
_SETBIT('-')
|
|
#else
|
|
*ep++ = _CCHR;
|
|
wchr = '-';
|
|
_PLACE(wchr);
|
|
#endif
|
|
}
|
|
break;
|
|
}
|
|
else if (c == '-' && !dashfl) {
|
|
dashfl = 1;
|
|
continue;
|
|
}
|
|
/* Get next element into structure
|
|
next. It may be a:
|
|
|
|
_CLASS [:class:]
|
|
_CEQV [=collating-element=]
|
|
_CDOT [=.=]
|
|
_CELEM [.xx.] (collating-symbol)
|
|
_CCHR character
|
|
*/
|
|
else if (c == '[' &&
|
|
((lc=PEEKC()) == ':' || lc == '.' || lc == '=')) {
|
|
ib = __ifbuf;
|
|
GETC();
|
|
while ( (c = GETC()) != lc || PEEKC() != ']') {
|
|
__CHECK_FOR_NULL(c,eof,49);
|
|
|
|
if (c == '\n' || c == eof) ERROR(49);
|
|
*ib++ = c;
|
|
#ifdef _KJI
|
|
if (NCisshift(c)) *ib++ = GETC();
|
|
#endif
|
|
if (ib>__ifbuf+_IFBUFLEN-2)
|
|
ib-=2;
|
|
/* ifbuf is long enough that if we
|
|
* discard characters here, the contents
|
|
* are already known to be invalid.
|
|
*/
|
|
|
|
}
|
|
*ib = '\0';
|
|
ib = __ifbuf;
|
|
GETC(); /* Advance over trailing ] */
|
|
if (lc == ':') {
|
|
for (i = 0; i < _NISTAB; i++) {
|
|
if((strcmp((char *)__ifbuf,__istab[i].isstr))==0)
|
|
break;
|
|
}
|
|
if (i >= _NISTAB) ERROR(49);
|
|
next.Class = _CLASS;
|
|
next.rangeable = 0;
|
|
next.uvalue = i;
|
|
}
|
|
else if (lc == '.') {
|
|
next.Class = _CELEM;
|
|
next.rangeable = 1;
|
|
|
|
_GETVAL(next.cvalue,next.uvalue,ib,p,wchr);
|
|
if ((next.cvalue == 0) || (ib[0] != '\0'))
|
|
ERROR(36);
|
|
}
|
|
else {
|
|
/* Equivalence class. Special-case '.'
|
|
* to mean any char with a collating value;
|
|
* represent as CDOT in compiled string.
|
|
*/
|
|
if ((__ifbuf[0] == '.') && (__ifbuf[1] == '\0')){
|
|
next.Class = _CDOT;
|
|
next.rangeable = 0;
|
|
}
|
|
else {
|
|
next.Class = _CEQV;
|
|
_GETVAL(next.cvalue,next.uvalue,ib,p,wchr);
|
|
next.rangeable = 1;
|
|
if ((next.cvalue == 0) || (ib[0] != '\0'))
|
|
ERROR(36);
|
|
if (next.cvalue == next.uvalue)
|
|
next.Class = _CELEM;
|
|
|
|
|
|
}
|
|
}
|
|
}
|
|
else { /* Ordinary character,
|
|
* including [ followed by
|
|
* anything but :=.
|
|
*/
|
|
next.Class = _CCHR;
|
|
next.rangeable = 1;
|
|
#ifdef _KJI
|
|
if (NCisshift(c))
|
|
_NCdec2(c, GETC(), c);
|
|
#endif
|
|
next.uvalue = c;
|
|
}
|
|
/* Next element has been built and placed in next.
|
|
* Now dispose of it. */
|
|
if (dashfl) {
|
|
dashfl = 0;
|
|
/*
|
|
* '-' seen, not immediately following '['.
|
|
* The element preceding '-' is in struct prev and
|
|
* the element following is in struct next.
|
|
* It's legal if both prev and next are collatable
|
|
* and prev <= next.
|
|
*/
|
|
if (prev.Class == 0 ||
|
|
(!prev.rangeable || !next.rangeable))
|
|
ERROR(70);
|
|
/* one end of range was char-class
|
|
* or noncollating char, or 'start'
|
|
* of range was really endpoint of
|
|
* a preceding range, e.g. [a-m-z]
|
|
*/
|
|
prev.rangeable = 0;
|
|
/* Inhibit [a-m-z] */
|
|
if (prev.Class == _CCHR) {
|
|
ib = __ifbuf;
|
|
#ifdef _KJI
|
|
_NCe2(prev.uvalue, ib[0], ib[1]);
|
|
#else
|
|
*ib = prev.uvalue;
|
|
#endif
|
|
_GETVAL(prev.cvalue,prev.uvalue,ib,p,wchr);
|
|
if (prev.cvalue == 0)
|
|
ERROR(70);
|
|
}
|
|
if (next.Class == _CCHR) {
|
|
ib = __ifbuf;
|
|
#ifdef _KJI
|
|
_NCe2(next.uvalue, ib[0], ib[1]);
|
|
#else
|
|
*ib = next.uvalue;
|
|
#endif
|
|
_GETVAL(next.cvalue,next.uvalue,ib,p,wchr);
|
|
if (next.cvalue == 0)
|
|
ERROR(70);
|
|
}
|
|
if (next.uvalue < prev.uvalue)
|
|
ERROR(70);
|
|
|
|
#ifndef _KJI
|
|
for (i=prev.uvalue; i<=next.uvalue; i++) {
|
|
_SETBITU(i)
|
|
}
|
|
#else
|
|
*ep++ = _CRNGE;
|
|
if (prev.Class == _CEQV)
|
|
_PLACE(prev.cvalue);
|
|
else
|
|
_PLACE(prev.uvalue);
|
|
_PLACE(next.uvalue);
|
|
if (next.Class == _CEQV) {
|
|
*ep++ = _CEQV;
|
|
_PLACE(next.cvalue);
|
|
}
|
|
#endif
|
|
prev.Class = 0;
|
|
}
|
|
else { /* not a range */
|
|
if (prev.Class != 0) {
|
|
/* Insert class and value in ep.
|
|
* If [:class:], 1 byte of value;
|
|
* if [=.=], no value;
|
|
* otherwise 2 bytes of value.
|
|
*/
|
|
stuffp:
|
|
#ifndef _KJI
|
|
switch (prev.Class) {
|
|
case _CLASS:
|
|
for (i=1; i<256; i++) {
|
|
if ((*__istab[prev.uvalue].isfunc)(i) != 0) {
|
|
_SETBIT(i)
|
|
}
|
|
}
|
|
break;
|
|
case _CEQV:
|
|
for (i=1; i<256; i++) {
|
|
if (NCcollate(i) == prev.cvalue) {
|
|
_SETBIT(i)
|
|
}
|
|
}
|
|
break;
|
|
case _CELEM:
|
|
case _CCHR:
|
|
_SETBIT(prev.uvalue)
|
|
case _CDOT:
|
|
break;
|
|
}
|
|
#else
|
|
*ep++ = prev.Class;
|
|
switch (prev.Class) {
|
|
case _CLASS:
|
|
*ep++ = _NCbot(prev.uvalue);
|
|
break;
|
|
case _CEQV:
|
|
_PLACE(prev.cvalue);
|
|
break;
|
|
case _CELEM:
|
|
case _CCHR:
|
|
_PLACE(prev.uvalue);
|
|
case _CDOT:
|
|
break;
|
|
}
|
|
#endif
|
|
}
|
|
prev=next;
|
|
next.Class = 0;
|
|
}
|
|
}
|
|
|
|
#ifndef _KJI
|
|
if (neg != 0) {
|
|
for (i=0; i<_BRACKET_LEN; i++)
|
|
*ep++ = ~*ep;
|
|
*(ep-_BRACKET_LEN) &= 0xfe; /* eliminate NUL */
|
|
}
|
|
else
|
|
ep += _BRACKET_LEN;
|
|
#else
|
|
*ep++ = _CKET; /* trailing sentinel */
|
|
wchr = ep-cnclptr; /* Store [] string length */
|
|
*cnclptr = _NCtop(wchr); /* at head of string */
|
|
*(cnclptr+1) = _NCbot(wchr);
|
|
#endif
|
|
|
|
continue;
|
|
|
|
case '\\':
|
|
if ((c = GETC()) == '\0') ERROR(36);
|
|
switch(c) {
|
|
|
|
case '(':
|
|
if(nbra >= _NBRA)
|
|
ERROR(43);
|
|
*bracketp++ = nbra;
|
|
subexpr_start_location[nbra] = (char *) ep;
|
|
*ep++ = _CPAR;
|
|
*ep++ = nbra++;
|
|
*ep++ = 0; /* Space for count, */
|
|
*ep++ = 0; /* filled in at /) */
|
|
continue;
|
|
|
|
case ')':
|
|
if(bracketp <= bracket )
|
|
ERROR(42);
|
|
*ep++ = _CENT;
|
|
*ep = *--bracketp;
|
|
subexpr_length =
|
|
(char *) ep - subexpr_start_location[*ep] - 1;
|
|
|
|
if (subexpr_length > 0xffff)
|
|
/* Overflow, subexpr to long */
|
|
ERROR(50);
|
|
|
|
|
|
/* Now set the length of the subexpression */
|
|
*(subexpr_start_location[*ep] + 2) =
|
|
subexpr_length >> 8;
|
|
|
|
*(subexpr_start_location[*ep] + 3) =
|
|
subexpr_length & 0x00ff ;
|
|
|
|
|
|
ep++;
|
|
closed++;
|
|
continue;
|
|
|
|
case '{': /*}*/
|
|
if(lastep == 0)
|
|
goto defchar;
|
|
|
|
if(*lastep == _CENT)
|
|
{
|
|
if (*(subexpr_start_location[*(lastep+1)]) & _STAR)
|
|
/* Do not allow STAR and INTVL on the same code */
|
|
ERROR(80);
|
|
|
|
/* Set INTVL on the CPAR and CENT */
|
|
*(subexpr_start_location[*(lastep+1)]) |= _INTVL;
|
|
}
|
|
|
|
if (*lastep & _STAR)
|
|
/* Do not allow STAR and INTVL on the same code */
|
|
ERROR(80);
|
|
|
|
*lastep |= _INTVL;
|
|
|
|
cflg = 0;
|
|
c = GETC();
|
|
nlim:
|
|
i = 0;
|
|
do {
|
|
if('0' <= c && c <= '9')
|
|
i = 10 * i + c - '0';
|
|
else
|
|
ERROR(16);
|
|
} while(((c = GETC()) != '\\') && (c != ','));
|
|
if(i > RE_DUP_MAX)
|
|
ERROR(11);
|
|
*ep++ = i;
|
|
if(c == ',') {
|
|
if(cflg++)
|
|
ERROR(44);
|
|
if((c = GETC()) == '\\')
|
|
*ep++ = RE_DUP_MAX;
|
|
else goto nlim; /* get 2'nd number */
|
|
} /*{*/
|
|
if(GETC() != '}')
|
|
ERROR(45);
|
|
if(!cflg) /* one number */
|
|
*ep++ = i;
|
|
else if( *(ep -1)
|
|
< *(ep -2) || *(ep -1) == 0)
|
|
ERROR(46);
|
|
continue;
|
|
|
|
case '\n':
|
|
ERROR(36);
|
|
|
|
case 'n':
|
|
c = '\n';
|
|
goto defchar;
|
|
|
|
default:
|
|
if(c >= '1' && c <= '9') {
|
|
if((c -= '1') >= closed)
|
|
ERROR(25);
|
|
*ep++ = _CBACK;
|
|
*ep++ = c;
|
|
continue;
|
|
}
|
|
}
|
|
/* Drop through to default to use \ to turn off special chars */
|
|
defchar:
|
|
default:
|
|
lastep = ep;
|
|
*ep++ = _CCHR;
|
|
*ep++ = c;
|
|
#ifdef _KJI
|
|
if (NCisshift(c))
|
|
*ep++ = GETC();
|
|
#endif
|
|
}
|
|
}
|
|
}
|
|
|
|
step(register char *p1, register char *p2)
|
|
{
|
|
register unsigned c;
|
|
|
|
if(circf) {
|
|
loc1 = p1;
|
|
return(advance(p1, p2));
|
|
}
|
|
/* fast check for first character */
|
|
if(*p2 == _CCHR) {
|
|
c = p2[1];
|
|
do {
|
|
if (*p1==c)
|
|
if(advance(p1, p2)) {
|
|
loc1 = p1;
|
|
return(1);
|
|
}
|
|
#ifdef _KJI
|
|
if (NCisshift(*p1)) p1++;
|
|
#endif
|
|
} while(*p1++);
|
|
return(0);
|
|
}
|
|
/* regular algorithm */
|
|
do {
|
|
if(advance(p1, p2)) {
|
|
loc1 = p1;
|
|
return(1);
|
|
}
|
|
#ifdef _KJI
|
|
if (NCisshift(*p1)) p1++;
|
|
#endif
|
|
} while(*p1++);
|
|
return(0);
|
|
}
|
|
|
|
advance(char *lp, register char *ep)
|
|
{
|
|
char *curlp, *nxtep, *curwp;
|
|
int c2, lc;
|
|
register int c;
|
|
|
|
char *bbeg;
|
|
int subexpr_index;
|
|
int ct;
|
|
char RE_code;
|
|
#ifndef _KJI
|
|
short cvalue;
|
|
wchar_t uvalue;
|
|
wchar_t *pwc, wc;
|
|
#endif
|
|
|
|
char *next_character; /* This is used to point to the */
|
|
/* next 'character' in the 'lp' */
|
|
/* string. The __isthere() */
|
|
/* routine will pass this value */
|
|
/* back since it knows how big */
|
|
/* the 'character' is. */
|
|
|
|
while(1) {
|
|
switch(*ep++) {
|
|
|
|
case _CCHR:
|
|
#ifdef _KJI
|
|
c = *ep++;
|
|
if(c == *lp++)
|
|
if (!NCisshift(c) || *ep++ == *lp++)
|
|
continue;
|
|
#else
|
|
if (*ep++ == *lp++)
|
|
continue;
|
|
#endif
|
|
return(0);
|
|
|
|
case _CDOT:
|
|
#ifdef _KJI
|
|
if (*lp==0)
|
|
return(0);
|
|
else lp +=NLchrlen(lp);
|
|
continue;
|
|
#else
|
|
if (*lp++ != '\0')
|
|
continue;
|
|
return(0);
|
|
#endif
|
|
|
|
case _CDOL:
|
|
if(*lp == 0)
|
|
continue;
|
|
return(0);
|
|
|
|
case _CCEOF:
|
|
loc2 = lp;
|
|
return(1);
|
|
|
|
case _CPAR:
|
|
braslist[*ep++] = lp;
|
|
ep += 2;
|
|
|
|
continue;
|
|
|
|
case _CENT:
|
|
braelist[*ep++] = lp;
|
|
continue;
|
|
|
|
case _CPAR | _INTVL:
|
|
case _CPAR | _STAR:
|
|
subexpr_index = *ep++;
|
|
braslist[subexpr_index] = lp;
|
|
nxtep = ep + _GETWC(ep); /* Point at the INTVL range */
|
|
|
|
ep +=2; /* Move past the subexpr length */
|
|
if (*(ep - 4) == (_CPAR | _INTVL ) )
|
|
{
|
|
/* Get the interval info */
|
|
|
|
__getintvl(nxtep);
|
|
nxtep +=2; /* Point past the interval range */
|
|
|
|
/* If the __low is > 0, then the _CENT | _INTVL */
|
|
/* will take care of everything. */
|
|
if (__low > 0 )
|
|
continue;
|
|
}
|
|
|
|
/* Advance() must be called from here because */
|
|
/* even if the subexpression in not matched, we still */
|
|
/* need to continue. */
|
|
|
|
if (advance(lp,ep) == 1)
|
|
return(1); /* Matched the expression */
|
|
|
|
braelist[subexpr_index] = lp;
|
|
/* Skip past the RE for the subexpression and continue */
|
|
ep = nxtep;
|
|
if (lp != locs || *nxtep != _CCEOF)
|
|
continue;
|
|
|
|
return(0);
|
|
|
|
|
|
case _CBACK | _INTVL:
|
|
case _CBACK | _STAR:
|
|
case _CENT | _INTVL:
|
|
case _CENT | _STAR:
|
|
|
|
RE_code = *(ep -1 );
|
|
subexpr_index = *ep++;
|
|
|
|
if (RE_code & _CENT )
|
|
/* Set the end of the expression for \) */
|
|
braelist[subexpr_index] = lp;
|
|
|
|
bbeg = braslist[subexpr_index];
|
|
ct = braelist[subexpr_index] - bbeg;
|
|
|
|
if (ct == 0)
|
|
{
|
|
/* The subexpression matched a NULL string */
|
|
/* If we cannot advance from here, the */
|
|
/* pattern cannot be matched */
|
|
|
|
if (lp != locs && advance(lp,ep))
|
|
return(1);
|
|
|
|
return(0);
|
|
}
|
|
|
|
if ( RE_code & _INTVL )
|
|
{
|
|
/* Get the interval values */
|
|
__getintvl(ep); /* Point at the interval values */
|
|
ep +=2; /* Move past the interval values */
|
|
}
|
|
else
|
|
{
|
|
/* For the STAR, act as if the user had done an */
|
|
/* expression like \{0,\} */
|
|
|
|
__low = 0;
|
|
__ssize = MAXINT;
|
|
} /* endif */
|
|
|
|
|
|
if (RE_code == (_CENT | _INTVL ) )
|
|
/* Decrement __low because we already found one */
|
|
__low--;
|
|
|
|
while (__low--)
|
|
{
|
|
|
|
if (!__ecmp(bbeg, lp, ct))
|
|
return(0);
|
|
|
|
lp += ct;
|
|
}
|
|
|
|
curlp = lp;
|
|
|
|
while(__ssize-- && __ecmp(bbeg,lp,ct))
|
|
lp += ct ;
|
|
|
|
|
|
if (lp != locs)
|
|
{
|
|
while(lp >= curlp) {
|
|
if(advance(lp, ep)) return(1);
|
|
lp -= ct;
|
|
}
|
|
} /* endwhile */
|
|
|
|
return(0);
|
|
|
|
|
|
|
|
case _CCHR | _INTVL:
|
|
c = *ep++;
|
|
#ifdef _KJI
|
|
if (NCisshift(c)) {
|
|
c2 = *ep++;
|
|
__getintvl(ep);
|
|
while(__low--) {
|
|
if(*lp++ != c || *lp++ != c2)
|
|
return(0);
|
|
}
|
|
curlp = lp;
|
|
while (__ssize-- && *lp == c && lp[1] == c2) lp += 2;
|
|
} else {
|
|
#endif
|
|
__getintvl(ep);
|
|
while(__low--) {
|
|
if(*lp++ != c)
|
|
return(0);
|
|
}
|
|
curlp = lp;
|
|
while (__ssize-- && *lp == c) lp++;
|
|
#ifdef _KJI
|
|
}
|
|
#endif
|
|
ep += 2;
|
|
goto star;
|
|
|
|
case _CDOT | _INTVL:
|
|
__getintvl(ep);
|
|
while(__low--) {
|
|
#ifdef _KJI
|
|
if (NCisshift(*lp)) lp++;
|
|
#endif
|
|
if(*lp++ == '\0')
|
|
return(0);
|
|
}
|
|
curlp = lp;
|
|
while(__ssize-- && *lp != '\0') {
|
|
#ifdef _KJI
|
|
lp += (NCisshift(*lp) ? 2 : 1);
|
|
#else
|
|
lp++;
|
|
#endif
|
|
}
|
|
ep += 2;
|
|
goto star;
|
|
|
|
case _CBACK:
|
|
bbeg = braslist[*ep];
|
|
ct = braelist[*ep++] - bbeg;
|
|
|
|
if(__ecmp(bbeg, lp, ct)) {
|
|
lp += ct;
|
|
continue;
|
|
}
|
|
return(0);
|
|
|
|
case _CDOT | _STAR:
|
|
curlp = lp;
|
|
while(*lp) lp++;
|
|
goto star;
|
|
|
|
case _CCHR | _STAR:
|
|
curlp = lp;
|
|
c = *ep++;
|
|
#ifdef _KJI
|
|
if (NCisshift(c)){
|
|
c2 = *ep++;
|
|
while(*lp == c && lp[1] == c2) lp += 2;
|
|
}else {
|
|
while (*lp == c) lp++;
|
|
}
|
|
#else
|
|
while (*lp++ == c);
|
|
--lp;
|
|
#endif
|
|
goto star;
|
|
|
|
#ifndef _KJI
|
|
case _CBIT:
|
|
_BITSET
|
|
return(0);
|
|
ep += _BRACKET_LEN;
|
|
lp = next_character;
|
|
continue;
|
|
|
|
case _CBIT | _INTVL:
|
|
nxtep = ep + _BRACKET_LEN;
|
|
__getintvl(nxtep);
|
|
while (__low--) {
|
|
_BITSET
|
|
return(0);
|
|
lp = next_character;
|
|
}
|
|
curlp = lp;
|
|
while (__ssize--) {
|
|
_BITSET
|
|
break;
|
|
lp = next_character;
|
|
}
|
|
ep = nxtep += 2;
|
|
goto star;
|
|
|
|
case _CBIT | _STAR:
|
|
nxtep = ep + _BRACKET_LEN;
|
|
curlp = lp;
|
|
while (1) {
|
|
_BITSET
|
|
break;
|
|
lp = next_character;
|
|
}
|
|
ep = nxtep;
|
|
goto star;
|
|
#else
|
|
|
|
case _CBRA:
|
|
case _CBRA | _NEG:
|
|
nxtep = ep + _GETWC(ep);
|
|
ep += 2;
|
|
if(!__isthere(lp, ep,&next_character)) return(0);
|
|
|
|
lp = next_character;
|
|
ep = nxtep;
|
|
continue;
|
|
|
|
case _CBRA | _INTVL:
|
|
case _CBRA | _INTVL | _NEG:
|
|
nxtep = ep + _GETWC(ep);
|
|
ep += 2;
|
|
__getintvl(nxtep);
|
|
while (__low--) {
|
|
if(!__isthere(lp, ep,&next_character)) return(0);
|
|
lp = next_character;
|
|
}
|
|
curlp = lp;
|
|
while(__ssize-- && __isthere(lp, ep, &next_character))
|
|
lp = next_character;
|
|
ep = nxtep += 2;
|
|
goto star;
|
|
|
|
case _CBRA | _STAR:
|
|
case _CBRA | _STAR | _NEG:
|
|
nxtep = ep + _GETWC(ep);
|
|
ep += 2;
|
|
curlp = lp;
|
|
while(__isthere(lp, ep,&next_character))
|
|
lp = next_character;
|
|
|
|
ep = nxtep;
|
|
goto star;
|
|
#endif
|
|
|
|
star:
|
|
|
|
/* The logic of the backtracking done in this routine is based on the
|
|
* characteristics of the SJIS code set; where a single-byte character must
|
|
* be in the range 0x00-0xff, 0xa0-0xdf, and the first byte ("shift byte")
|
|
* of a 2-byte character must be in the range 0x80-0x9f, 0xe0-0xfc.
|
|
*
|
|
* Let "N" denote a non-shift byte (ASCII or katakana), and "S" denote a
|
|
* shift byte. If the byte stream ends with "...S", it must end with a
|
|
* two-byte character (otherwise we would not be at this point in the stream).
|
|
* Otherwise, "...NSS...SSN" parses as "...N(SS)...(SS)(N)" if there are an
|
|
* even number (including zero) of shift bytes preceding the last N, or
|
|
* "...N(SS)...(SS)(SN)" if there are an odd number.
|
|
* (And similarly if we reach the anchor point, curlp, instead of finding
|
|
* an N).
|
|
* In the worst case, this algorithm has to back up all the way to the
|
|
* beginning, but it will only have to do so once. (After backstepping the
|
|
* last character, the preceding string of (SS) characters can be
|
|
* backstepped quickly.) Thus, we can process an entire ".*" in linear time.
|
|
*
|
|
* Note that this routine also works well in the NLS case, as we never
|
|
* will find a shift byte; so it will just step back once...
|
|
*/
|
|
while (lp != locs) {
|
|
if (advance(lp, ep)) return (1);
|
|
if (lp <= curlp) return (0);
|
|
--lp;
|
|
#ifdef _KJI
|
|
if (NCisshift(*lp)) lp--;
|
|
else {
|
|
for (curwp = lp;
|
|
curwp > curlp && NCisshift(curwp[-1]);
|
|
--curwp);
|
|
if ((lp-curwp) & 1) --lp;
|
|
}
|
|
#endif
|
|
}
|
|
return (0);
|
|
|
|
}
|
|
}
|
|
}
|
|
/* this routine gets the low (or only) value into
|
|
* __low, and the delta to the high value into
|
|
* __ssize (for \{m,\}, set _ssize to max.)
|
|
* RE_DUP_MAX is a POSIX variable.
|
|
*/
|
|
static void
|
|
__getintvl(char *str)
|
|
{
|
|
__low = *str++;
|
|
__ssize = (*str == RE_DUP_MAX)? MAXINT: *str - __low;
|
|
}
|
|
|
|
static __ecmp(char *a, char *b, register int count)
|
|
{
|
|
while(count--)
|
|
if(*a++ != *b++)
|
|
return(0);
|
|
return(1);
|
|
}
|
|
|
|
#ifdef _KJI
|
|
/* This routine replaces the _ISTHERE macro; it matches the pattern */
|
|
/* within brackets (bp) against the char in sp. It will advance in the bp */
|
|
/* expression until a match occurs or the pattern is empty. */
|
|
/* The _NEG case is handled by switching the return codes. */
|
|
|
|
static int
|
|
__isthere(char *sp, char *bp, char **next_character)
|
|
{
|
|
int c, lc;
|
|
short co;
|
|
wchar_t cu;
|
|
wchar_t w;
|
|
wchar_t *p;
|
|
int ishere, nothere;
|
|
char *sa = sp;
|
|
|
|
nothere = (bp[-3] >> 2) & 1;
|
|
ishere = nothere ^ 1;
|
|
|
|
if(sp == (char *)0 || *sp == '\0' ||
|
|
next_character == (char **)0)
|
|
return(0);
|
|
|
|
*next_character = sp;
|
|
|
|
/* Set *next_character to the next 'character' */
|
|
/* in the string. This is so that when there */
|
|
/* are multiple character 'characters' like LL */
|
|
/* the string pointer can be properly incremented. */
|
|
/* The value *next_character is incemented here */
|
|
/* so that if 'sa' is not incremented, */
|
|
/* *next_character will still point to the next */
|
|
/* 'character'. */
|
|
|
|
_GETVAL(co, cu, (*next_character), p, w);
|
|
|
|
if(NCisshift(*sa))
|
|
c = _GETWC(sa);
|
|
else c = sa[0];
|
|
|
|
do {
|
|
|
|
sa = sp;
|
|
switch(*bp++) {
|
|
|
|
case _CCHR:
|
|
lc = _GETWC(bp);
|
|
if(lc == c) {
|
|
return(ishere);
|
|
}
|
|
bp += 2;
|
|
break;
|
|
|
|
case _CRNGE:
|
|
_GETVAL(co, cu, sa, p, w);
|
|
lc = _GETWC(bp);
|
|
if(cu >= lc) {
|
|
lc = ((bp[2] << 8) | bp[3]);
|
|
|
|
if(cu <= lc) {
|
|
*next_character = sa;
|
|
return(ishere);
|
|
}
|
|
}
|
|
bp += 4;
|
|
break;
|
|
|
|
case _CELEM:
|
|
_GETVAL(co, cu, sa, p, w);
|
|
if(cu == _GETWC(bp)) {
|
|
*next_character = sa;
|
|
return(ishere);
|
|
}
|
|
bp += 2;
|
|
break;
|
|
|
|
case _CEQV:
|
|
_GETVAL(co, cu, sa, p, w);
|
|
if(co == _GETWC(bp)) {
|
|
*next_character = sa;
|
|
return(ishere);
|
|
}
|
|
bp += 2;
|
|
break;
|
|
|
|
case _CDOT:
|
|
_GETVAL(co, cu, sa, p, w);
|
|
if (co != 0) {
|
|
*next_character = sa;
|
|
return(ishere);
|
|
}
|
|
break;
|
|
|
|
case _CLASS:
|
|
if((*__istab[*bp++].isfunc)(c)) {
|
|
return(ishere);
|
|
}
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
|
|
} while( *bp != _CKET);
|
|
|
|
/* If the pointer (sa) has been incremented, then */
|
|
/* set *next_character to point to the new location */
|
|
if (sa != sp)
|
|
*next_character = sa;
|
|
|
|
return(nothere);
|
|
}
|
|
#endif /* _KJI */
|
|
|
|
#ifdef __cplusplus
|
|
}
|
|
#endif
|
|
|
|
#endif /* _H_NLREGEXP */
|