diff options
Diffstat (limited to 'usr/src/cmd/awk/b.c')
| -rw-r--r-- | usr/src/cmd/awk/b.c | 576 | 
1 files changed, 392 insertions, 184 deletions
| diff --git a/usr/src/cmd/awk/b.c b/usr/src/cmd/awk/b.c index 9caee4e9d3..adca0cb633 100644 --- a/usr/src/cmd/awk/b.c +++ b/usr/src/cmd/awk/b.c @@ -1,4 +1,28 @@  /* + * Copyright (C) Lucent Technologies 1997 + * All Rights Reserved + * + * Permission to use, copy, modify, and distribute this software and + * its documentation for any purpose and without fee is hereby + * granted, provided that the above copyright notice appear in all + * copies and that both that the copyright notice and this + * permission notice and warranty disclaimer appear in supporting + * documentation, and that the name Lucent Technologies or any of + * its entities not be used in advertising or publicity pertaining + * to distribution of the software without specific, written prior + * permission. + * + * LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, + * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. + * IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY + * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER + * IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, + * ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF + * THIS SOFTWARE. + */ + +/*   * CDDL HEADER START   *   * The contents of this file are subject to the terms of the @@ -28,6 +52,8 @@  /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/  /*	  All Rights Reserved  	*/ +/* lasciate ogne speranza, voi ch'intrate. */ +  #define	DEBUG  #include "awk.h" @@ -37,74 +63,86 @@  				/* NCHARS is 2**n */  #define	MAXLIN (3 * LINE_MAX) -#define	type(v)		(v)->nobj +#define	type(v)		(v)->nobj	/* badly overloaded here */ +#define	info(v)		(v)->ntype	/* badly overloaded here */  #define	left(v)		(v)->narg[0]  #define	right(v)	(v)->narg[1]  #define	parent(v)	(v)->nnext  #define	LEAF	case CCL: case NCCL: case CHAR: case DOT: case FINAL: case ALL: +#define	ELEAF	case EMPTYRE:		/* empty string in regexp */  #define	UNARY	case STAR: case PLUS: case QUEST:  /*   * encoding in tree Nodes: - *	leaf (CCL, NCCL, CHAR, DOT, FINAL, ALL): + *	leaf (CCL, NCCL, CHAR, DOT, FINAL, ALL, EMPTYRE):   *		left is index, right contains value or pointer to value   *	unary (STAR, PLUS, QUEST): left is child, right is null   *	binary (CAT, OR): left and right are children   *	parent contains pointer to parent   */ -int	setvec[MAXLIN]; -int	tmpset[MAXLIN]; -Node	*point[MAXLIN]; +int	*setvec; +int	*tmpset; +int	maxsetvec = 0;  int	rtok;		/* next token in current re */  int	rlxval; -uchar	*rlxstr; -uchar	*prestr;	/* current position in current re */ -uchar	*lastre;	/* origin of last re */ +static uschar	*rlxstr; +static uschar	*prestr;	/* current position in current re */ +static uschar	*lastre;	/* origin of last re */  static	int setcnt;  static	int poscnt; -uchar	*patbeg; +char	*patbeg;  int	patlen;  #define	NFA	20	/* cache this many dynamic fa's */  fa	*fatab[NFA];  int	nfatab	= 0;	/* entries in fatab */ -static fa	*mkdfa(uchar *, int); +static fa	*mkdfa(const char *, int);  static int	makeinit(fa *, int);  static void	penter(Node *);  static void	freetr(Node *); -static void	overflo(char *); +static void	overflo(const char *); +static void	growvec(const char *);  static void	cfoll(fa *, Node *);  static void	follow(Node *); -static Node	*reparse(uchar *); +static Node	*reparse(const char *);  static int	relex(void);  static void	freefa(fa *);  static int	cgoto(fa *, int, int);  fa * -makedfa(uchar *s, int anchor)	/* returns dfa for reg expr s */ +makedfa(const char *s, int anchor)	/* returns dfa for reg expr s */  {  	int i, use, nuse;  	fa *pfa; +	static int now = 1; + +	if (setvec == NULL) {	/* first time through any RE */ +		maxsetvec = MAXLIN; +		setvec = (int *)malloc(maxsetvec * sizeof (int)); +		tmpset = (int *)malloc(maxsetvec * sizeof (int)); +		if (setvec == NULL || tmpset == NULL) +			overflo("out of space initializing makedfa"); +	}  	if (compile_time)	/* a constant for sure */  		return (mkdfa(s, anchor));  	for (i = 0; i < nfatab; i++) {	/* is it there already? */  		if (fatab[i]->anchor == anchor && -		    strcmp((char *)fatab[i]->restr, (char *)s) == 0) { -			fatab[i]->use++; +		    strcmp((const char *)fatab[i]->restr, s) == 0) { +			fatab[i]->use = now++;  			return (fatab[i]);  		}  	}  	pfa = mkdfa(s, anchor);  	if (nfatab < NFA) {	/* room for another */  		fatab[nfatab] = pfa; -		fatab[nfatab]->use = 1; +		fatab[nfatab]->use = now++;  		nfatab++;  		return (pfa);  	} @@ -117,13 +155,16 @@ makedfa(uchar *s, int anchor)	/* returns dfa for reg expr s */  		}  	freefa(fatab[nuse]);  	fatab[nuse] = pfa; -	pfa->use = 1; +	pfa->use = now++;  	return (pfa);  } +/* + * does the real work of making a dfa + * anchor = 1 for anchored matches, else 0 + */  fa * -mkdfa(uchar *s, int anchor)	/* does the real work of making a dfa */ -	/* anchor = 1 for anchored matches, else 0 */ +mkdfa(const char *s, int anchor)  {  	Node *p, *p1;  	fa *f; @@ -137,7 +178,7 @@ mkdfa(uchar *s, int anchor)	/* does the real work of making a dfa */  	poscnt = 0;  	penter(p1);	/* enter parent pointers and leaf indices */  	if ((f = (fa *)calloc(1, sizeof (fa) + poscnt * sizeof (rrow))) == NULL) -		overflo("no room for fa"); +		overflo("out of space for fa");  	/* penter has computed number of positions in re */  	f->accept = poscnt-1;  	cfoll(f, p1);	/* set up follow sets */ @@ -151,14 +192,14 @@ mkdfa(uchar *s, int anchor)	/* does the real work of making a dfa */  	*f->posns[1] = 0;  	f->initstat = makeinit(f, anchor);  	f->anchor = anchor; -	f->restr = tostring(s); +	f->restr = (uschar *)tostring(s);  	return (f);  }  static int  makeinit(fa *f, int anchor)  { -	register int i, k; +	int i, k;  	f->curstat = 2;  	f->out[2] = 0; @@ -192,9 +233,10 @@ void  penter(Node *p)	/* set up parent pointers and leaf indices */  {  	switch (type(p)) { +	ELEAF  	LEAF -		left(p) = (Node *) poscnt; -		point[poscnt++] = p; +		info(p) = poscnt; +		poscnt++;  		break;  	UNARY  		penter(left(p)); @@ -207,8 +249,8 @@ penter(Node *p)	/* set up parent pointers and leaf indices */  		parent(left(p)) = p;  		parent(right(p)) = p;  		break; -	default: -		ERROR "unknown type %d in penter", type(p) FATAL; +	default:	/* can't happen */ +		FATAL("can't happen: unknown type %d in penter", type(p));  		break;  	}  } @@ -217,6 +259,7 @@ static void  freetr(Node *p)	/* free parse tree */  {  	switch (type(p)) { +	ELEAF  	LEAF  		xfree(p);  		break; @@ -230,92 +273,168 @@ freetr(Node *p)	/* free parse tree */  		freetr(right(p));  		xfree(p);  		break; -	default: -		ERROR "unknown type %d in freetr", type(p) FATAL; +	default:	/* can't happen */ +		FATAL("can't happen: unknown type %d in freetr", type(p));  		break;  	}  } -uchar * -cclenter(uchar *p) +static void +growvec(const char *msg) +{ +	maxsetvec *= 4; +	setvec = (int *)realloc(setvec, maxsetvec * sizeof (int)); +	tmpset = (int *)realloc(tmpset, maxsetvec * sizeof (int)); +	if (setvec == NULL || tmpset == NULL) +		overflo(msg); +} + +/* + * in the parsing of regular expressions, metacharacters like . have + * to be seen literally; \056 is not a metacharacter. + */ + +/* + * find and eval hex string at pp, return new p; only pick up one 8-bit + * byte (2 chars). + */ +int +hexstr(uschar **pp) +{ +	uschar *p; +	int n = 0; +	int i; + +	for (i = 0, p = (uschar *)*pp; i < 2 && isxdigit(*p); i++, p++) { +		if (isdigit(*p)) +			n = 16 * n + *p - '0'; +		else if (*p >= 'a' && *p <= 'f') +			n = 16 * n + *p - 'a' + 10; +		else if (*p >= 'A' && *p <= 'F') +			n = 16 * n + *p - 'A' + 10; +	} +	*pp = (uschar *)p; +	return (n); +} + +#define	isoctdigit(c) ((c) >= '0' && (c) <= '7') + +/* pick up next thing after a \\ and increment *pp */ +int +quoted(uschar **pp)  { -	register int i, c; -	uchar *op, *chars, *ret; -	size_t	bsize; +	uschar *p = *pp; +	int c; + +	if ((c = *p++) == 't') +		c = '\t'; +	else if (c == 'n') +		c = '\n'; +	else if (c == 'f') +		c = '\f'; +	else if (c == 'r') +		c = '\r'; +	else if (c == 'b') +		c = '\b'; +	else if (c == '\\') +		c = '\\'; +	else if (c == 'x') {	/* hexadecimal goo follows */ +		c = hexstr(&p);	/* this adds a null if number is invalid */ +	} else if (isoctdigit(c)) {	/* \d \dd \ddd */ +		int n = c - '0'; +		if (isoctdigit(*p)) { +			n = 8 * n + *p++ - '0'; +			if (isoctdigit(*p)) +				n = 8 * n + *p++ - '0'; +		} +		c = n; +	} /* else */ +		/* c = c; */ +	*pp = p; +	return (c); +} + +char * +cclenter(const char *argp)	/* add a character class */ +{ +	int i, c, c2; +	uschar *p = (uschar *)argp; +	uschar *op, *bp; +	static uschar *buf = NULL; +	static size_t bufsz = 100; -	init_buf(&chars, &bsize, LINE_INCR);  	op = p; -	i = 0; -	while ((c = *p++) != 0) { +	if (buf == NULL && (buf = (uschar *)malloc(bufsz)) == NULL) +		FATAL("out of space for character class [%.10s...] 1", p); +	bp = buf; +	for (i = 0; (c = *p++) != 0; ) {  		if (c == '\\') { -			if ((c = *p++) == 't') -				c = '\t'; -			else if (c == 'n') -				c = '\n'; -			else if (c == 'f') -				c = '\f'; -			else if (c == 'r') -				c = '\r'; -			else if (c == 'b') -				c = '\b'; -			else if (c == '\\') -				c = '\\'; -			else if (isdigit(c)) { -				int n = c - '0'; -				if (isdigit(*p)) { -					n = 8 * n + *p++ - '0'; -					if (isdigit(*p)) -						n = 8 * n + *p++ - '0'; -				} -				c = n; -			} /* else */ -				/* c = c; */ -		} else if (c == '-' && i > 0 && chars[i-1] != 0) { +			c = quoted(&p); +		} else if (c == '-' && i > 0 && bp[-1] != 0) {  			if (*p != 0) { -				c = chars[i-1]; -				while ((uchar)c < *p) {	/* fails if *p is \\ */ -					expand_buf(&chars, &bsize, i); -					chars[i++] = ++c; +				c = bp[-1]; +				c2 = *p++; +				if (c2 == '\\') +					c2 = quoted(&p); +				if (c > c2) {	/* empty; ignore */ +					bp--; +					i--; +					continue; +				} +				while (c < c2) { +					if (!adjbuf((char **)&buf, &bufsz, +					    bp-buf+2, 100, (char **)&bp, +					    "cclenter1")) { +						FATAL( +			"out of space for character class [%.10s...] 2", p); +					} +					*bp++ = ++c; +					i++;  				} -				p++;  				continue;  			}  		} -		expand_buf(&chars, &bsize, i); -		chars[i++] = c; +		if (!adjbuf((char **)&buf, &bufsz, bp-buf+2, 100, (char **)&bp, +		    "cclenter2")) +			FATAL( +			    "out of space for character class [%.10s...] 3", p); +		*bp++ = c; +		i++;  	} -	chars[i++] = '\0'; -	dprintf(("cclenter: in = |%s|, out = |%s|\n", op, chars)); +	*bp = '\0'; +	dprintf(("cclenter: in = |%s|, out = |%s|\n", op, buf));  	xfree(op); -	ret = tostring(chars); -	free(chars); -	return (ret); +	return ((char *)tostring((char *)buf));  }  static void -overflo(char *s) +overflo(const char *s)  { -	ERROR "regular expression too big: %s", gettext((char *)s) FATAL; +	FATAL("regular expression too big: %.30s...", gettext((char *)s));  }  /* enter follow set of each leaf of vertex v into lfollow[leaf] */  static void  cfoll(fa *f, Node *v)  { -	register int i; -	register int *p; +	int i; +	int *p;  	switch (type(v)) { +	ELEAF  	LEAF -		f->re[(int)left(v)].ltype = type(v); -		f->re[(int)left(v)].lval = (int)right(v); +		f->re[info(v)].ltype = type(v); +		f->re[info(v)].lval.np = right(v); +		while (f->accept >= maxsetvec) {	/* guessing here! */ +			growvec("out of space in cfoll()"); +		}  		for (i = 0; i <= f->accept; i++)  			setvec[i] = 0;  		setcnt = 0;  		follow(v);	/* computes setvec and setcnt */  		if ((p = (int *)calloc(1, (setcnt+1) * sizeof (int))) == NULL) -			overflo("follow set overflow"); -		f->re[(int)left(v)].lfollow = p; +			overflo("out of space building follow set"); +		f->re[info(v)].lfollow = p;  		*p = setcnt;  		for (i = f->accept; i >= 0; i--) {  			if (setvec[i] == 1) @@ -330,8 +449,8 @@ cfoll(fa *f, Node *v)  		cfoll(f, left(v));  		cfoll(f, right(v));  		break; -	default: -		ERROR "unknown type %d in cfoll", type(v) FATAL; +	default:	/* can't happen */ +		FATAL("can't happen: unknown type %d in cfoll", type(v));  	}  } @@ -342,15 +461,25 @@ cfoll(fa *f, Node *v)  static int  first(Node *p)  { -	register int b; +	int b, lp;  	switch (type(p)) { +	ELEAF  	LEAF -		if (setvec[(int)left(p)] != 1) { -			setvec[(int)left(p)] = 1; +		lp = info(p);	/* look for high-water mark of subscripts */ +		while (setcnt >= maxsetvec || lp >= maxsetvec) { +			/* guessing here! */ +			growvec("out of space in first()"); +		} +		if (type(p) == EMPTYRE) { +			setvec[lp] = 0; +			return (0); +		} +		if (setvec[lp] != 1) { +			setvec[lp] = 1;  			setcnt++;  		} -		if (type(p) == CCL && (*(uchar *)right(p)) == '\0') +		if (type(p) == CCL && (*(char *)right(p)) == '\0')  			return (0);		/* empty CCL */  		else  			return (1); @@ -372,8 +501,7 @@ first(Node *p)  			return (0);  		return (1);  	} -	ERROR "unknown type %d in first", type(p) FATAL; -	return (-1); +	FATAL("can't happen: unknown type %d in first", type(p));  }  /* collects leaves that can follow v into setvec */ @@ -407,14 +535,16 @@ follow(Node *v)  			follow(p);  		return;  	default: -		ERROR "unknown type %d in follow", type(p) FATAL; +		FATAL("unknown type %d in follow", type(p));  		break;  	}  }  static int -member(uchar c, uchar *s)	/* is c in s? */ +member(int c, const char *sarg)	/* is c in s? */  { +	uschar *s = (uschar *)sarg; +  	while (*s)  		if (c == *s++)  			return (1); @@ -423,9 +553,10 @@ member(uchar c, uchar *s)	/* is c in s? */  int -match(fa *f, uchar *p) +match(fa *f, const char *p0)	/* shortest match ? */  { -	register int s, ns; +	int s, ns; +	uschar *p = (uschar *)p0;  	s = f->reset ? makeinit(f, 0) : f->initstat;  	if (f->out[s]) @@ -442,10 +573,11 @@ match(fa *f, uchar *p)  }  int -pmatch(fa *f, uchar *p) +pmatch(fa *f, const char *p0)	/* longest match, for sub */  { -	register int s, ns; -	register uchar *q; +	int s, ns; +	uschar *p = (uschar *)p0; +	uschar *q;  	int i, k;  	if (f->reset) { @@ -453,7 +585,7 @@ pmatch(fa *f, uchar *p)  	} else {  		s = f->initstat;  	} -	patbeg = p; +	patbeg = (char *)p;  	patlen = -1;  	do {  		q = p; @@ -466,16 +598,17 @@ pmatch(fa *f, uchar *p)  				s = cgoto(f, s, *q);  			if (s == 1) {	/* no transition */  				if (patlen >= 0) { -					patbeg = p; +					patbeg = (char *)p;  					return (1); -				} else +				} else {  					goto nextin;	/* no match */ +				}  			}  		} while (*q++ != 0);  		if (f->out[s])  			patlen = q - p - 1;	/* don't count $ */  		if (patlen >= 0) { -			patbeg = p; +			patbeg = (char *)p;  			return (1);  		}  	nextin: @@ -485,7 +618,7 @@ pmatch(fa *f, uchar *p)  				xfree(f->posns[i]);  			k = *f->posns[0];  			if ((f->posns[2] = -			    (int *)calloc(1, (k + 1) * sizeof (int))) == NULL) { +			    (int *)calloc(k + 1, sizeof (int))) == NULL) {  				overflo("out of space in pmatch");  			}  			for (i = 0; i <= k; i++) @@ -500,10 +633,11 @@ pmatch(fa *f, uchar *p)  }  int -nematch(fa *f, uchar *p) +nematch(fa *f, const char *p0)	/* non-empty match, for sub */  { -	register int s, ns; -	register uchar *q; +	int s, ns; +	uschar *p = (uschar *)p0; +	uschar *q;  	int i, k;  	if (f->reset) { @@ -523,7 +657,7 @@ nematch(fa *f, uchar *p)  				s = cgoto(f, s, *q);  			if (s == 1) {	/* no transition */  				if (patlen > 0) { -					patbeg = p; +					patbeg = (char *)p;  					return (1);  				} else  					goto nnextin;	/* no nonempty match */ @@ -532,7 +666,7 @@ nematch(fa *f, uchar *p)  		if (f->out[s])  			patlen = q - p - 1;	/* don't count $ */  		if (patlen > 0) { -			patbeg = p; +			patbeg = (char *)p;  			return (1);  		}  	nnextin: @@ -542,7 +676,7 @@ nematch(fa *f, uchar *p)  				xfree(f->posns[i]);  			k = *f->posns[0];  			if ((f->posns[2] = -			    (int *)calloc(1, (k + 1) * sizeof (int))) == NULL) { +			    (int *)calloc(k + 1, sizeof (int))) == NULL) {  				overflo("out of state space");  			}  			for (i = 0; i <= k; i++) @@ -560,31 +694,31 @@ nematch(fa *f, uchar *p)  static Node *regexp(void), *primary(void), *concat(Node *);  static Node *alt(Node *), *unary(Node *); +/* parses regular expression pointed to by p */ +/* uses relex() to scan regular expression */  static Node * -reparse(uchar *p) +reparse(const char *p)  { -	/* parses regular expression pointed to by p */ -	/* uses relex() to scan regular expression */  	Node *np;  	dprintf(("reparse <%s>\n", p)); -	lastre = prestr = p;	/* prestr points to string to be parsed */ + +	/* prestr points to string to be parsed */ +	lastre = prestr = (uschar *)p;  	rtok = relex(); -	if (rtok == '\0') -		ERROR "empty regular expression" FATAL; -	np = regexp(); +	/* GNU compatibility: an empty regexp matches anything */  	if (rtok == '\0') { -		return (np); -	} else { -		ERROR "syntax error in regular expression %s at %s", -		    lastre, prestr FATAL; +		return (op2(EMPTYRE, NIL, NIL));  	} -	/*NOTREACHED*/ -	return (NULL); +	np = regexp(); +	if (rtok != '\0') +		FATAL("syntax error in regular expression %s at %s", +		    lastre, prestr); +	return (np);  }  static Node * -regexp(void) +regexp(void)	/* top-level parse of reg expr */  {  	return (alt(concat(primary())));  } @@ -596,28 +730,31 @@ primary(void)  	switch (rtok) {  	case CHAR: -		np = op2(CHAR, NIL, (Node *)rlxval); +		np = op2(CHAR, NIL, itonp(rlxval));  		rtok = relex();  		return (unary(np));  	case ALL:  		rtok = relex();  		return (unary(op2(ALL, NIL, NIL))); +	case EMPTYRE: +		rtok = relex(); +		return (unary(op2(ALL, NIL, NIL)));  	case DOT:  		rtok = relex();  		return (unary(op2(DOT, NIL, NIL)));  	case CCL:  		/*LINTED align*/ -		np = op2(CCL, NIL, (Node *)cclenter(rlxstr)); +		np = op2(CCL, NIL, (Node *)cclenter((char *)rlxstr));  		rtok = relex();  		return (unary(np));  	case NCCL:  		/*LINTED align*/ -		np = op2(NCCL, NIL, (Node *)cclenter(rlxstr)); +		np = op2(NCCL, NIL, (Node *)cclenter((char *)rlxstr));  		rtok = relex();  		return (unary(np));  	case '^':  		rtok = relex(); -		return (unary(op2(CHAR, NIL, (Node *)HAT))); +		return (unary(op2(CHAR, NIL, itonp(HAT))));  	case '$':  		rtok = relex();  		return (unary(op2(CHAR, NIL, NIL))); @@ -627,20 +764,20 @@ primary(void)  			rtok = relex();  			return (unary(op2(CCL, NIL,  			    /*LINTED align*/ -			    (Node *)tostring((uchar *)"")))); +			    (Node *)tostring(""))));  		}  		np = regexp();  		if (rtok == ')') {  			rtok = relex();  			return (unary(np));  		} else { -			ERROR "syntax error in regular expression %s at %s", -			    lastre, prestr FATAL; +			FATAL("syntax error in regular expression %s at %s", +			    lastre, prestr);  		}  		/* FALLTHROUGH */  	default: -		ERROR "illegal primary in regular expression %s at %s", -		    lastre, prestr FATAL; +		FATAL("illegal primary in regular expression %s at %s", +		    lastre, prestr);  	}  	/*NOTREACHED*/  	return (NULL); @@ -650,7 +787,14 @@ static Node *  concat(Node *np)  {  	switch (rtok) { -	case CHAR: case DOT: case ALL: case CCL: case NCCL: case '$': case '(': +	case EMPTYRE: +	case CHAR: +	case DOT: +	case ALL: +	case CCL: +	case NCCL: +	case '$': +	case '(':  		return (concat(op2(CAT, np, primary())));  	default:  		return (np); @@ -685,12 +829,48 @@ unary(Node *np)  	}  } +/* + * Character class definitions conformant to the POSIX locale as + * defined in IEEE P1003.1 draft 7 of June 2001, assuming the source + * and operating character sets are both ASCII (ISO646) or supersets + * thereof. + * + * Note that to avoid overflowing the temporary buffer used in + * relex(), the expanded character class (prior to range expansion) + * must be less than twice the size of their full name. + */ + +struct charclass { +	const char *cc_name; +	int cc_namelen; +	int (*cc_func)(int); +} charclasses[] = { +	{ "alnum",	5,	isalnum }, +	{ "alpha",	5,	isalpha }, +	{ "blank",	5,	isblank }, +	{ "cntrl",	5,	iscntrl }, +	{ "digit",	5,	isdigit }, +	{ "graph",	5,	isgraph }, +	{ "lower",	5,	islower }, +	{ "print",	5,	isprint }, +	{ "punct",	5,	ispunct }, +	{ "space",	5,	isspace }, +	{ "upper",	5,	isupper }, +	{ "xdigit",	6,	isxdigit }, +	{ NULL,		0,	NULL }, +}; + +  static int  relex(void)		/* lexical analyzer for reparse */  { -	register int c; -	uchar *cbuf; -	int clen, cflag; +	int c, n; +	int cflag; +	static uschar *buf = 0; +	static size_t bufsz = 100; +	uschar *bp; +	struct charclass *cc; +	int i;  	switch (c = *prestr++) {  	case '|': return OR; @@ -705,64 +885,82 @@ relex(void)		/* lexical analyzer for reparse */  	case ')':  		return (c);  	case '\\': -		if ((c = *prestr++) == 't') -			c = '\t'; -		else if (c == 'n') -			c = '\n'; -		else if (c == 'f') -			c = '\f'; -		else if (c == 'r') -			c = '\r'; -		else if (c == 'b') -			c = '\b'; -		else if (c == '\\') -			c = '\\'; -		else if (isdigit(c)) { -			int n = c - '0'; -			if (isdigit(*prestr)) { -				n = 8 * n + *prestr++ - '0'; -				if (isdigit(*prestr)) -					n = 8 * n + *prestr++ - '0'; -			} -			c = n; -		} /* else it's now in c */ -		rlxval = c; +		rlxval = quoted(&prestr);  		return (CHAR);  	default:  		rlxval = c;  		return (CHAR);  	case '[': -		clen = 0; +		if (buf == NULL && (buf = (uschar *)malloc(bufsz)) == NULL) +			FATAL("out of space in reg expr %.10s..", lastre); +		bp = buf;  		if (*prestr == '^') {  			cflag = 1;  			prestr++;  		} else  			cflag = 0; -		init_buf(&cbuf, NULL, strlen((char *)prestr) * 2 + 1); +		n = 2 * strlen((const char *)prestr) + 1; +		if (!adjbuf((char **)&buf, &bufsz, n, n, (char **)&bp, +		    "relex1")) +			FATAL("out of space for reg expr %.10s...", lastre);  		for (;;) {  			if ((c = *prestr++) == '\\') { -				cbuf[clen++] = '\\'; +				*bp++ = '\\';  				if ((c = *prestr++) == '\0') { -					ERROR -			"nonterminated character class %s", lastre FATAL; +					FATAL("nonterminated character class " +					    "%.20s...", lastre); +				} +				*bp++ = c; +			} else if (c == '[' && *prestr == ':') { +				/* +				 * Handle POSIX character class names. +				 * Dag-Erling Smorgrav, des@ofug.org +				 */ +				for (cc = charclasses; cc->cc_name; cc++) +					if (strncmp((const char *)prestr + 1, +					    (const char *)cc->cc_name, +					    cc->cc_namelen) == 0) +						break; + +				if (cc->cc_name == NULL || +				    prestr[1 + cc->cc_namelen] != ':' || +				    prestr[2 + cc->cc_namelen] != ']') { +					*bp++ = c; +					continue;  				} -				cbuf[clen++] = c; + +				prestr += cc->cc_namelen + 3; +				/* +				 * BUG: We begin at 1, instead of 0, since we +				 * would otherwise prematurely terminate the +				 * string for classes like [[:cntrl:]]. This +				 * means that we can't match the NUL character, +				 * not without first adapting the entire +				 * program to track each string's length. +				 */ +				for (i = 1; i < NCHARS; i++) { +					(void) adjbuf((char **)&buf, &bufsz, +					    bp - buf + 1, 100, (char **)&bp, +					    "relex2"); +					if (cc->cc_func(i)) { +						*bp++ = i; +						n++; +					} +				} +			} else if (c == '\0') { +				FATAL("nonterminated character class %.20s", +				    lastre); +			} else if (bp == buf) {	/* 1st char is special */ +				*bp++ = c;  			} else if (c == ']') { -				cbuf[clen] = 0; -				rlxstr = tostring(cbuf); -				free(cbuf); +				*bp++ = '\0'; +				rlxstr = (uschar *)tostring((char *)buf);  				if (cflag == 0)  					return (CCL);  				else  					return (NCCL); -			} else if (c == '\n') { -				ERROR "newline in character class %s...", -				    lastre FATAL; -			} else if (c == '\0') { -				ERROR "nonterminated character class %s", -				    lastre FATAL;  			} else -				cbuf[clen++] = c; +				*bp++ = c;  		}  		/*NOTREACHED*/  	} @@ -772,9 +970,13 @@ relex(void)		/* lexical analyzer for reparse */  static int  cgoto(fa *f, int s, int c)  { -	register int i, j, k; -	register int *p, *q; +	int i, j, k; +	int *p, *q; +	assert(c == HAT || c < NCHARS); +	while (f->accept >= maxsetvec) {	/* guessing here! */ +		growvec("out of space in cgoto()"); +	}  	for (i = 0; i <= f->accept; i++)  		setvec[i] = 0;  	setcnt = 0; @@ -782,16 +984,20 @@ cgoto(fa *f, int s, int c)  	p = f->posns[s];  	for (i = 1; i <= *p; i++) {  		if ((k = f->re[p[i]].ltype) != FINAL) { -			if (k == CHAR && c == f->re[p[i]].lval || -			    k == DOT && c != 0 && c != HAT || -			    k == ALL && c != 0 || -			    k == CCL && -			    member(c, (uchar *)f->re[p[i]].lval) || -			    k == NCCL && -			    !member(c, (uchar *)f->re[p[i]].lval) && -			    c != 0 && c != HAT) { +			if ((k == CHAR && c == ptoi(f->re[p[i]].lval.np)) || +			    (k == DOT && c != 0 && c != HAT) || +			    (k == ALL && c != 0) || +			    (k == EMPTYRE && c != 0) || +			    (k == CCL && +			    member(c, (char *)f->re[p[i]].lval.up)) || +			    (k == NCCL && +			    !member(c, (char *)f->re[p[i]].lval.up) && +			    c != 0 && c != HAT)) {  				q = f->re[p[i]].lfollow;  				for (j = 1; j <= *q; j++) { +					if (q[j] >= maxsetvec) { +						growvec("cgoto overflow"); +					}  					if (setvec[q[j]] == 0) {  						setcnt++;  						setvec[q[j]] = 1; @@ -847,17 +1053,19 @@ cgoto(fa *f, int s, int c)  }  static void -freefa(fa *f) +freefa(fa *f)	/* free a finite automaton */  { - -	register int i; +	int i;  	if (f == NULL)  		return;  	for (i = 0; i <= f->curstat; i++)  		xfree(f->posns[i]); -	for (i = 0; i <= f->accept; i++) +	for (i = 0; i <= f->accept; i++) {  		xfree(f->re[i].lfollow); +		if (f->re[i].ltype == CCL || f->re[i].ltype == NCCL) +			xfree((f->re[i].lval.np)); +	}  	xfree(f->restr);  	xfree(f);  } | 
