summaryrefslogtreecommitdiff
path: root/usr/src/cmd/awk/b.c
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src/cmd/awk/b.c')
-rw-r--r--usr/src/cmd/awk/b.c576
1 files changed, 392 insertions, 184 deletions
diff --git a/usr/src/cmd/awk/b.c b/usr/src/cmd/awk/b.c
index 9caee4e9d3..adca0cb633 100644
--- a/usr/src/cmd/awk/b.c
+++ b/usr/src/cmd/awk/b.c
@@ -1,4 +1,28 @@
/*
+ * Copyright (C) Lucent Technologies 1997
+ * All Rights Reserved
+ *
+ * Permission to use, copy, modify, and distribute this software and
+ * its documentation for any purpose and without fee is hereby
+ * granted, provided that the above copyright notice appear in all
+ * copies and that both that the copyright notice and this
+ * permission notice and warranty disclaimer appear in supporting
+ * documentation, and that the name Lucent Technologies or any of
+ * its entities not be used in advertising or publicity pertaining
+ * to distribution of the software without specific, written prior
+ * permission.
+ *
+ * LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
+ * IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
+ * IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ * ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
+ * THIS SOFTWARE.
+ */
+
+/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
@@ -28,6 +52,8 @@
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
/* All Rights Reserved */
+/* lasciate ogne speranza, voi ch'intrate. */
+
#define DEBUG
#include "awk.h"
@@ -37,74 +63,86 @@
/* NCHARS is 2**n */
#define MAXLIN (3 * LINE_MAX)
-#define type(v) (v)->nobj
+#define type(v) (v)->nobj /* badly overloaded here */
+#define info(v) (v)->ntype /* badly overloaded here */
#define left(v) (v)->narg[0]
#define right(v) (v)->narg[1]
#define parent(v) (v)->nnext
#define LEAF case CCL: case NCCL: case CHAR: case DOT: case FINAL: case ALL:
+#define ELEAF case EMPTYRE: /* empty string in regexp */
#define UNARY case STAR: case PLUS: case QUEST:
/*
* encoding in tree Nodes:
- * leaf (CCL, NCCL, CHAR, DOT, FINAL, ALL):
+ * leaf (CCL, NCCL, CHAR, DOT, FINAL, ALL, EMPTYRE):
* left is index, right contains value or pointer to value
* unary (STAR, PLUS, QUEST): left is child, right is null
* binary (CAT, OR): left and right are children
* parent contains pointer to parent
*/
-int setvec[MAXLIN];
-int tmpset[MAXLIN];
-Node *point[MAXLIN];
+int *setvec;
+int *tmpset;
+int maxsetvec = 0;
int rtok; /* next token in current re */
int rlxval;
-uchar *rlxstr;
-uchar *prestr; /* current position in current re */
-uchar *lastre; /* origin of last re */
+static uschar *rlxstr;
+static uschar *prestr; /* current position in current re */
+static uschar *lastre; /* origin of last re */
static int setcnt;
static int poscnt;
-uchar *patbeg;
+char *patbeg;
int patlen;
#define NFA 20 /* cache this many dynamic fa's */
fa *fatab[NFA];
int nfatab = 0; /* entries in fatab */
-static fa *mkdfa(uchar *, int);
+static fa *mkdfa(const char *, int);
static int makeinit(fa *, int);
static void penter(Node *);
static void freetr(Node *);
-static void overflo(char *);
+static void overflo(const char *);
+static void growvec(const char *);
static void cfoll(fa *, Node *);
static void follow(Node *);
-static Node *reparse(uchar *);
+static Node *reparse(const char *);
static int relex(void);
static void freefa(fa *);
static int cgoto(fa *, int, int);
fa *
-makedfa(uchar *s, int anchor) /* returns dfa for reg expr s */
+makedfa(const char *s, int anchor) /* returns dfa for reg expr s */
{
int i, use, nuse;
fa *pfa;
+ static int now = 1;
+
+ if (setvec == NULL) { /* first time through any RE */
+ maxsetvec = MAXLIN;
+ setvec = (int *)malloc(maxsetvec * sizeof (int));
+ tmpset = (int *)malloc(maxsetvec * sizeof (int));
+ if (setvec == NULL || tmpset == NULL)
+ overflo("out of space initializing makedfa");
+ }
if (compile_time) /* a constant for sure */
return (mkdfa(s, anchor));
for (i = 0; i < nfatab; i++) { /* is it there already? */
if (fatab[i]->anchor == anchor &&
- strcmp((char *)fatab[i]->restr, (char *)s) == 0) {
- fatab[i]->use++;
+ strcmp((const char *)fatab[i]->restr, s) == 0) {
+ fatab[i]->use = now++;
return (fatab[i]);
}
}
pfa = mkdfa(s, anchor);
if (nfatab < NFA) { /* room for another */
fatab[nfatab] = pfa;
- fatab[nfatab]->use = 1;
+ fatab[nfatab]->use = now++;
nfatab++;
return (pfa);
}
@@ -117,13 +155,16 @@ makedfa(uchar *s, int anchor) /* returns dfa for reg expr s */
}
freefa(fatab[nuse]);
fatab[nuse] = pfa;
- pfa->use = 1;
+ pfa->use = now++;
return (pfa);
}
+/*
+ * does the real work of making a dfa
+ * anchor = 1 for anchored matches, else 0
+ */
fa *
-mkdfa(uchar *s, int anchor) /* does the real work of making a dfa */
- /* anchor = 1 for anchored matches, else 0 */
+mkdfa(const char *s, int anchor)
{
Node *p, *p1;
fa *f;
@@ -137,7 +178,7 @@ mkdfa(uchar *s, int anchor) /* does the real work of making a dfa */
poscnt = 0;
penter(p1); /* enter parent pointers and leaf indices */
if ((f = (fa *)calloc(1, sizeof (fa) + poscnt * sizeof (rrow))) == NULL)
- overflo("no room for fa");
+ overflo("out of space for fa");
/* penter has computed number of positions in re */
f->accept = poscnt-1;
cfoll(f, p1); /* set up follow sets */
@@ -151,14 +192,14 @@ mkdfa(uchar *s, int anchor) /* does the real work of making a dfa */
*f->posns[1] = 0;
f->initstat = makeinit(f, anchor);
f->anchor = anchor;
- f->restr = tostring(s);
+ f->restr = (uschar *)tostring(s);
return (f);
}
static int
makeinit(fa *f, int anchor)
{
- register int i, k;
+ int i, k;
f->curstat = 2;
f->out[2] = 0;
@@ -192,9 +233,10 @@ void
penter(Node *p) /* set up parent pointers and leaf indices */
{
switch (type(p)) {
+ ELEAF
LEAF
- left(p) = (Node *) poscnt;
- point[poscnt++] = p;
+ info(p) = poscnt;
+ poscnt++;
break;
UNARY
penter(left(p));
@@ -207,8 +249,8 @@ penter(Node *p) /* set up parent pointers and leaf indices */
parent(left(p)) = p;
parent(right(p)) = p;
break;
- default:
- ERROR "unknown type %d in penter", type(p) FATAL;
+ default: /* can't happen */
+ FATAL("can't happen: unknown type %d in penter", type(p));
break;
}
}
@@ -217,6 +259,7 @@ static void
freetr(Node *p) /* free parse tree */
{
switch (type(p)) {
+ ELEAF
LEAF
xfree(p);
break;
@@ -230,92 +273,168 @@ freetr(Node *p) /* free parse tree */
freetr(right(p));
xfree(p);
break;
- default:
- ERROR "unknown type %d in freetr", type(p) FATAL;
+ default: /* can't happen */
+ FATAL("can't happen: unknown type %d in freetr", type(p));
break;
}
}
-uchar *
-cclenter(uchar *p)
+static void
+growvec(const char *msg)
+{
+ maxsetvec *= 4;
+ setvec = (int *)realloc(setvec, maxsetvec * sizeof (int));
+ tmpset = (int *)realloc(tmpset, maxsetvec * sizeof (int));
+ if (setvec == NULL || tmpset == NULL)
+ overflo(msg);
+}
+
+/*
+ * in the parsing of regular expressions, metacharacters like . have
+ * to be seen literally; \056 is not a metacharacter.
+ */
+
+/*
+ * find and eval hex string at pp, return new p; only pick up one 8-bit
+ * byte (2 chars).
+ */
+int
+hexstr(uschar **pp)
+{
+ uschar *p;
+ int n = 0;
+ int i;
+
+ for (i = 0, p = (uschar *)*pp; i < 2 && isxdigit(*p); i++, p++) {
+ if (isdigit(*p))
+ n = 16 * n + *p - '0';
+ else if (*p >= 'a' && *p <= 'f')
+ n = 16 * n + *p - 'a' + 10;
+ else if (*p >= 'A' && *p <= 'F')
+ n = 16 * n + *p - 'A' + 10;
+ }
+ *pp = (uschar *)p;
+ return (n);
+}
+
+#define isoctdigit(c) ((c) >= '0' && (c) <= '7')
+
+/* pick up next thing after a \\ and increment *pp */
+int
+quoted(uschar **pp)
{
- register int i, c;
- uchar *op, *chars, *ret;
- size_t bsize;
+ uschar *p = *pp;
+ int c;
+
+ if ((c = *p++) == 't')
+ c = '\t';
+ else if (c == 'n')
+ c = '\n';
+ else if (c == 'f')
+ c = '\f';
+ else if (c == 'r')
+ c = '\r';
+ else if (c == 'b')
+ c = '\b';
+ else if (c == '\\')
+ c = '\\';
+ else if (c == 'x') { /* hexadecimal goo follows */
+ c = hexstr(&p); /* this adds a null if number is invalid */
+ } else if (isoctdigit(c)) { /* \d \dd \ddd */
+ int n = c - '0';
+ if (isoctdigit(*p)) {
+ n = 8 * n + *p++ - '0';
+ if (isoctdigit(*p))
+ n = 8 * n + *p++ - '0';
+ }
+ c = n;
+ } /* else */
+ /* c = c; */
+ *pp = p;
+ return (c);
+}
+
+char *
+cclenter(const char *argp) /* add a character class */
+{
+ int i, c, c2;
+ uschar *p = (uschar *)argp;
+ uschar *op, *bp;
+ static uschar *buf = NULL;
+ static size_t bufsz = 100;
- init_buf(&chars, &bsize, LINE_INCR);
op = p;
- i = 0;
- while ((c = *p++) != 0) {
+ if (buf == NULL && (buf = (uschar *)malloc(bufsz)) == NULL)
+ FATAL("out of space for character class [%.10s...] 1", p);
+ bp = buf;
+ for (i = 0; (c = *p++) != 0; ) {
if (c == '\\') {
- if ((c = *p++) == 't')
- c = '\t';
- else if (c == 'n')
- c = '\n';
- else if (c == 'f')
- c = '\f';
- else if (c == 'r')
- c = '\r';
- else if (c == 'b')
- c = '\b';
- else if (c == '\\')
- c = '\\';
- else if (isdigit(c)) {
- int n = c - '0';
- if (isdigit(*p)) {
- n = 8 * n + *p++ - '0';
- if (isdigit(*p))
- n = 8 * n + *p++ - '0';
- }
- c = n;
- } /* else */
- /* c = c; */
- } else if (c == '-' && i > 0 && chars[i-1] != 0) {
+ c = quoted(&p);
+ } else if (c == '-' && i > 0 && bp[-1] != 0) {
if (*p != 0) {
- c = chars[i-1];
- while ((uchar)c < *p) { /* fails if *p is \\ */
- expand_buf(&chars, &bsize, i);
- chars[i++] = ++c;
+ c = bp[-1];
+ c2 = *p++;
+ if (c2 == '\\')
+ c2 = quoted(&p);
+ if (c > c2) { /* empty; ignore */
+ bp--;
+ i--;
+ continue;
+ }
+ while (c < c2) {
+ if (!adjbuf((char **)&buf, &bufsz,
+ bp-buf+2, 100, (char **)&bp,
+ "cclenter1")) {
+ FATAL(
+ "out of space for character class [%.10s...] 2", p);
+ }
+ *bp++ = ++c;
+ i++;
}
- p++;
continue;
}
}
- expand_buf(&chars, &bsize, i);
- chars[i++] = c;
+ if (!adjbuf((char **)&buf, &bufsz, bp-buf+2, 100, (char **)&bp,
+ "cclenter2"))
+ FATAL(
+ "out of space for character class [%.10s...] 3", p);
+ *bp++ = c;
+ i++;
}
- chars[i++] = '\0';
- dprintf(("cclenter: in = |%s|, out = |%s|\n", op, chars));
+ *bp = '\0';
+ dprintf(("cclenter: in = |%s|, out = |%s|\n", op, buf));
xfree(op);
- ret = tostring(chars);
- free(chars);
- return (ret);
+ return ((char *)tostring((char *)buf));
}
static void
-overflo(char *s)
+overflo(const char *s)
{
- ERROR "regular expression too big: %s", gettext((char *)s) FATAL;
+ FATAL("regular expression too big: %.30s...", gettext((char *)s));
}
/* enter follow set of each leaf of vertex v into lfollow[leaf] */
static void
cfoll(fa *f, Node *v)
{
- register int i;
- register int *p;
+ int i;
+ int *p;
switch (type(v)) {
+ ELEAF
LEAF
- f->re[(int)left(v)].ltype = type(v);
- f->re[(int)left(v)].lval = (int)right(v);
+ f->re[info(v)].ltype = type(v);
+ f->re[info(v)].lval.np = right(v);
+ while (f->accept >= maxsetvec) { /* guessing here! */
+ growvec("out of space in cfoll()");
+ }
for (i = 0; i <= f->accept; i++)
setvec[i] = 0;
setcnt = 0;
follow(v); /* computes setvec and setcnt */
if ((p = (int *)calloc(1, (setcnt+1) * sizeof (int))) == NULL)
- overflo("follow set overflow");
- f->re[(int)left(v)].lfollow = p;
+ overflo("out of space building follow set");
+ f->re[info(v)].lfollow = p;
*p = setcnt;
for (i = f->accept; i >= 0; i--) {
if (setvec[i] == 1)
@@ -330,8 +449,8 @@ cfoll(fa *f, Node *v)
cfoll(f, left(v));
cfoll(f, right(v));
break;
- default:
- ERROR "unknown type %d in cfoll", type(v) FATAL;
+ default: /* can't happen */
+ FATAL("can't happen: unknown type %d in cfoll", type(v));
}
}
@@ -342,15 +461,25 @@ cfoll(fa *f, Node *v)
static int
first(Node *p)
{
- register int b;
+ int b, lp;
switch (type(p)) {
+ ELEAF
LEAF
- if (setvec[(int)left(p)] != 1) {
- setvec[(int)left(p)] = 1;
+ lp = info(p); /* look for high-water mark of subscripts */
+ while (setcnt >= maxsetvec || lp >= maxsetvec) {
+ /* guessing here! */
+ growvec("out of space in first()");
+ }
+ if (type(p) == EMPTYRE) {
+ setvec[lp] = 0;
+ return (0);
+ }
+ if (setvec[lp] != 1) {
+ setvec[lp] = 1;
setcnt++;
}
- if (type(p) == CCL && (*(uchar *)right(p)) == '\0')
+ if (type(p) == CCL && (*(char *)right(p)) == '\0')
return (0); /* empty CCL */
else
return (1);
@@ -372,8 +501,7 @@ first(Node *p)
return (0);
return (1);
}
- ERROR "unknown type %d in first", type(p) FATAL;
- return (-1);
+ FATAL("can't happen: unknown type %d in first", type(p));
}
/* collects leaves that can follow v into setvec */
@@ -407,14 +535,16 @@ follow(Node *v)
follow(p);
return;
default:
- ERROR "unknown type %d in follow", type(p) FATAL;
+ FATAL("unknown type %d in follow", type(p));
break;
}
}
static int
-member(uchar c, uchar *s) /* is c in s? */
+member(int c, const char *sarg) /* is c in s? */
{
+ uschar *s = (uschar *)sarg;
+
while (*s)
if (c == *s++)
return (1);
@@ -423,9 +553,10 @@ member(uchar c, uchar *s) /* is c in s? */
int
-match(fa *f, uchar *p)
+match(fa *f, const char *p0) /* shortest match ? */
{
- register int s, ns;
+ int s, ns;
+ uschar *p = (uschar *)p0;
s = f->reset ? makeinit(f, 0) : f->initstat;
if (f->out[s])
@@ -442,10 +573,11 @@ match(fa *f, uchar *p)
}
int
-pmatch(fa *f, uchar *p)
+pmatch(fa *f, const char *p0) /* longest match, for sub */
{
- register int s, ns;
- register uchar *q;
+ int s, ns;
+ uschar *p = (uschar *)p0;
+ uschar *q;
int i, k;
if (f->reset) {
@@ -453,7 +585,7 @@ pmatch(fa *f, uchar *p)
} else {
s = f->initstat;
}
- patbeg = p;
+ patbeg = (char *)p;
patlen = -1;
do {
q = p;
@@ -466,16 +598,17 @@ pmatch(fa *f, uchar *p)
s = cgoto(f, s, *q);
if (s == 1) { /* no transition */
if (patlen >= 0) {
- patbeg = p;
+ patbeg = (char *)p;
return (1);
- } else
+ } else {
goto nextin; /* no match */
+ }
}
} while (*q++ != 0);
if (f->out[s])
patlen = q - p - 1; /* don't count $ */
if (patlen >= 0) {
- patbeg = p;
+ patbeg = (char *)p;
return (1);
}
nextin:
@@ -485,7 +618,7 @@ pmatch(fa *f, uchar *p)
xfree(f->posns[i]);
k = *f->posns[0];
if ((f->posns[2] =
- (int *)calloc(1, (k + 1) * sizeof (int))) == NULL) {
+ (int *)calloc(k + 1, sizeof (int))) == NULL) {
overflo("out of space in pmatch");
}
for (i = 0; i <= k; i++)
@@ -500,10 +633,11 @@ pmatch(fa *f, uchar *p)
}
int
-nematch(fa *f, uchar *p)
+nematch(fa *f, const char *p0) /* non-empty match, for sub */
{
- register int s, ns;
- register uchar *q;
+ int s, ns;
+ uschar *p = (uschar *)p0;
+ uschar *q;
int i, k;
if (f->reset) {
@@ -523,7 +657,7 @@ nematch(fa *f, uchar *p)
s = cgoto(f, s, *q);
if (s == 1) { /* no transition */
if (patlen > 0) {
- patbeg = p;
+ patbeg = (char *)p;
return (1);
} else
goto nnextin; /* no nonempty match */
@@ -532,7 +666,7 @@ nematch(fa *f, uchar *p)
if (f->out[s])
patlen = q - p - 1; /* don't count $ */
if (patlen > 0) {
- patbeg = p;
+ patbeg = (char *)p;
return (1);
}
nnextin:
@@ -542,7 +676,7 @@ nematch(fa *f, uchar *p)
xfree(f->posns[i]);
k = *f->posns[0];
if ((f->posns[2] =
- (int *)calloc(1, (k + 1) * sizeof (int))) == NULL) {
+ (int *)calloc(k + 1, sizeof (int))) == NULL) {
overflo("out of state space");
}
for (i = 0; i <= k; i++)
@@ -560,31 +694,31 @@ nematch(fa *f, uchar *p)
static Node *regexp(void), *primary(void), *concat(Node *);
static Node *alt(Node *), *unary(Node *);
+/* parses regular expression pointed to by p */
+/* uses relex() to scan regular expression */
static Node *
-reparse(uchar *p)
+reparse(const char *p)
{
- /* parses regular expression pointed to by p */
- /* uses relex() to scan regular expression */
Node *np;
dprintf(("reparse <%s>\n", p));
- lastre = prestr = p; /* prestr points to string to be parsed */
+
+ /* prestr points to string to be parsed */
+ lastre = prestr = (uschar *)p;
rtok = relex();
- if (rtok == '\0')
- ERROR "empty regular expression" FATAL;
- np = regexp();
+ /* GNU compatibility: an empty regexp matches anything */
if (rtok == '\0') {
- return (np);
- } else {
- ERROR "syntax error in regular expression %s at %s",
- lastre, prestr FATAL;
+ return (op2(EMPTYRE, NIL, NIL));
}
- /*NOTREACHED*/
- return (NULL);
+ np = regexp();
+ if (rtok != '\0')
+ FATAL("syntax error in regular expression %s at %s",
+ lastre, prestr);
+ return (np);
}
static Node *
-regexp(void)
+regexp(void) /* top-level parse of reg expr */
{
return (alt(concat(primary())));
}
@@ -596,28 +730,31 @@ primary(void)
switch (rtok) {
case CHAR:
- np = op2(CHAR, NIL, (Node *)rlxval);
+ np = op2(CHAR, NIL, itonp(rlxval));
rtok = relex();
return (unary(np));
case ALL:
rtok = relex();
return (unary(op2(ALL, NIL, NIL)));
+ case EMPTYRE:
+ rtok = relex();
+ return (unary(op2(ALL, NIL, NIL)));
case DOT:
rtok = relex();
return (unary(op2(DOT, NIL, NIL)));
case CCL:
/*LINTED align*/
- np = op2(CCL, NIL, (Node *)cclenter(rlxstr));
+ np = op2(CCL, NIL, (Node *)cclenter((char *)rlxstr));
rtok = relex();
return (unary(np));
case NCCL:
/*LINTED align*/
- np = op2(NCCL, NIL, (Node *)cclenter(rlxstr));
+ np = op2(NCCL, NIL, (Node *)cclenter((char *)rlxstr));
rtok = relex();
return (unary(np));
case '^':
rtok = relex();
- return (unary(op2(CHAR, NIL, (Node *)HAT)));
+ return (unary(op2(CHAR, NIL, itonp(HAT))));
case '$':
rtok = relex();
return (unary(op2(CHAR, NIL, NIL)));
@@ -627,20 +764,20 @@ primary(void)
rtok = relex();
return (unary(op2(CCL, NIL,
/*LINTED align*/
- (Node *)tostring((uchar *)""))));
+ (Node *)tostring(""))));
}
np = regexp();
if (rtok == ')') {
rtok = relex();
return (unary(np));
} else {
- ERROR "syntax error in regular expression %s at %s",
- lastre, prestr FATAL;
+ FATAL("syntax error in regular expression %s at %s",
+ lastre, prestr);
}
/* FALLTHROUGH */
default:
- ERROR "illegal primary in regular expression %s at %s",
- lastre, prestr FATAL;
+ FATAL("illegal primary in regular expression %s at %s",
+ lastre, prestr);
}
/*NOTREACHED*/
return (NULL);
@@ -650,7 +787,14 @@ static Node *
concat(Node *np)
{
switch (rtok) {
- case CHAR: case DOT: case ALL: case CCL: case NCCL: case '$': case '(':
+ case EMPTYRE:
+ case CHAR:
+ case DOT:
+ case ALL:
+ case CCL:
+ case NCCL:
+ case '$':
+ case '(':
return (concat(op2(CAT, np, primary())));
default:
return (np);
@@ -685,12 +829,48 @@ unary(Node *np)
}
}
+/*
+ * Character class definitions conformant to the POSIX locale as
+ * defined in IEEE P1003.1 draft 7 of June 2001, assuming the source
+ * and operating character sets are both ASCII (ISO646) or supersets
+ * thereof.
+ *
+ * Note that to avoid overflowing the temporary buffer used in
+ * relex(), the expanded character class (prior to range expansion)
+ * must be less than twice the size of their full name.
+ */
+
+struct charclass {
+ const char *cc_name;
+ int cc_namelen;
+ int (*cc_func)(int);
+} charclasses[] = {
+ { "alnum", 5, isalnum },
+ { "alpha", 5, isalpha },
+ { "blank", 5, isblank },
+ { "cntrl", 5, iscntrl },
+ { "digit", 5, isdigit },
+ { "graph", 5, isgraph },
+ { "lower", 5, islower },
+ { "print", 5, isprint },
+ { "punct", 5, ispunct },
+ { "space", 5, isspace },
+ { "upper", 5, isupper },
+ { "xdigit", 6, isxdigit },
+ { NULL, 0, NULL },
+};
+
+
static int
relex(void) /* lexical analyzer for reparse */
{
- register int c;
- uchar *cbuf;
- int clen, cflag;
+ int c, n;
+ int cflag;
+ static uschar *buf = 0;
+ static size_t bufsz = 100;
+ uschar *bp;
+ struct charclass *cc;
+ int i;
switch (c = *prestr++) {
case '|': return OR;
@@ -705,64 +885,82 @@ relex(void) /* lexical analyzer for reparse */
case ')':
return (c);
case '\\':
- if ((c = *prestr++) == 't')
- c = '\t';
- else if (c == 'n')
- c = '\n';
- else if (c == 'f')
- c = '\f';
- else if (c == 'r')
- c = '\r';
- else if (c == 'b')
- c = '\b';
- else if (c == '\\')
- c = '\\';
- else if (isdigit(c)) {
- int n = c - '0';
- if (isdigit(*prestr)) {
- n = 8 * n + *prestr++ - '0';
- if (isdigit(*prestr))
- n = 8 * n + *prestr++ - '0';
- }
- c = n;
- } /* else it's now in c */
- rlxval = c;
+ rlxval = quoted(&prestr);
return (CHAR);
default:
rlxval = c;
return (CHAR);
case '[':
- clen = 0;
+ if (buf == NULL && (buf = (uschar *)malloc(bufsz)) == NULL)
+ FATAL("out of space in reg expr %.10s..", lastre);
+ bp = buf;
if (*prestr == '^') {
cflag = 1;
prestr++;
} else
cflag = 0;
- init_buf(&cbuf, NULL, strlen((char *)prestr) * 2 + 1);
+ n = 2 * strlen((const char *)prestr) + 1;
+ if (!adjbuf((char **)&buf, &bufsz, n, n, (char **)&bp,
+ "relex1"))
+ FATAL("out of space for reg expr %.10s...", lastre);
for (;;) {
if ((c = *prestr++) == '\\') {
- cbuf[clen++] = '\\';
+ *bp++ = '\\';
if ((c = *prestr++) == '\0') {
- ERROR
- "nonterminated character class %s", lastre FATAL;
+ FATAL("nonterminated character class "
+ "%.20s...", lastre);
+ }
+ *bp++ = c;
+ } else if (c == '[' && *prestr == ':') {
+ /*
+ * Handle POSIX character class names.
+ * Dag-Erling Smorgrav, des@ofug.org
+ */
+ for (cc = charclasses; cc->cc_name; cc++)
+ if (strncmp((const char *)prestr + 1,
+ (const char *)cc->cc_name,
+ cc->cc_namelen) == 0)
+ break;
+
+ if (cc->cc_name == NULL ||
+ prestr[1 + cc->cc_namelen] != ':' ||
+ prestr[2 + cc->cc_namelen] != ']') {
+ *bp++ = c;
+ continue;
}
- cbuf[clen++] = c;
+
+ prestr += cc->cc_namelen + 3;
+ /*
+ * BUG: We begin at 1, instead of 0, since we
+ * would otherwise prematurely terminate the
+ * string for classes like [[:cntrl:]]. This
+ * means that we can't match the NUL character,
+ * not without first adapting the entire
+ * program to track each string's length.
+ */
+ for (i = 1; i < NCHARS; i++) {
+ (void) adjbuf((char **)&buf, &bufsz,
+ bp - buf + 1, 100, (char **)&bp,
+ "relex2");
+ if (cc->cc_func(i)) {
+ *bp++ = i;
+ n++;
+ }
+ }
+ } else if (c == '\0') {
+ FATAL("nonterminated character class %.20s",
+ lastre);
+ } else if (bp == buf) { /* 1st char is special */
+ *bp++ = c;
} else if (c == ']') {
- cbuf[clen] = 0;
- rlxstr = tostring(cbuf);
- free(cbuf);
+ *bp++ = '\0';
+ rlxstr = (uschar *)tostring((char *)buf);
if (cflag == 0)
return (CCL);
else
return (NCCL);
- } else if (c == '\n') {
- ERROR "newline in character class %s...",
- lastre FATAL;
- } else if (c == '\0') {
- ERROR "nonterminated character class %s",
- lastre FATAL;
} else
- cbuf[clen++] = c;
+ *bp++ = c;
}
/*NOTREACHED*/
}
@@ -772,9 +970,13 @@ relex(void) /* lexical analyzer for reparse */
static int
cgoto(fa *f, int s, int c)
{
- register int i, j, k;
- register int *p, *q;
+ int i, j, k;
+ int *p, *q;
+ assert(c == HAT || c < NCHARS);
+ while (f->accept >= maxsetvec) { /* guessing here! */
+ growvec("out of space in cgoto()");
+ }
for (i = 0; i <= f->accept; i++)
setvec[i] = 0;
setcnt = 0;
@@ -782,16 +984,20 @@ cgoto(fa *f, int s, int c)
p = f->posns[s];
for (i = 1; i <= *p; i++) {
if ((k = f->re[p[i]].ltype) != FINAL) {
- if (k == CHAR && c == f->re[p[i]].lval ||
- k == DOT && c != 0 && c != HAT ||
- k == ALL && c != 0 ||
- k == CCL &&
- member(c, (uchar *)f->re[p[i]].lval) ||
- k == NCCL &&
- !member(c, (uchar *)f->re[p[i]].lval) &&
- c != 0 && c != HAT) {
+ if ((k == CHAR && c == ptoi(f->re[p[i]].lval.np)) ||
+ (k == DOT && c != 0 && c != HAT) ||
+ (k == ALL && c != 0) ||
+ (k == EMPTYRE && c != 0) ||
+ (k == CCL &&
+ member(c, (char *)f->re[p[i]].lval.up)) ||
+ (k == NCCL &&
+ !member(c, (char *)f->re[p[i]].lval.up) &&
+ c != 0 && c != HAT)) {
q = f->re[p[i]].lfollow;
for (j = 1; j <= *q; j++) {
+ if (q[j] >= maxsetvec) {
+ growvec("cgoto overflow");
+ }
if (setvec[q[j]] == 0) {
setcnt++;
setvec[q[j]] = 1;
@@ -847,17 +1053,19 @@ cgoto(fa *f, int s, int c)
}
static void
-freefa(fa *f)
+freefa(fa *f) /* free a finite automaton */
{
-
- register int i;
+ int i;
if (f == NULL)
return;
for (i = 0; i <= f->curstat; i++)
xfree(f->posns[i]);
- for (i = 0; i <= f->accept; i++)
+ for (i = 0; i <= f->accept; i++) {
xfree(f->re[i].lfollow);
+ if (f->re[i].ltype == CCL || f->re[i].ltype == NCCL)
+ xfree((f->re[i].lval.np));
+ }
xfree(f->restr);
xfree(f);
}