diff options
Diffstat (limited to 'usr/src/cmd/awk/b.c')
-rw-r--r-- | usr/src/cmd/awk/b.c | 337 |
1 files changed, 191 insertions, 146 deletions
diff --git a/usr/src/cmd/awk/b.c b/usr/src/cmd/awk/b.c index 9caee4e9d3..d44372393b 100644 --- a/usr/src/cmd/awk/b.c +++ b/usr/src/cmd/awk/b.c @@ -1,4 +1,28 @@ /* + * Copyright (C) Lucent Technologies 1997 + * All Rights Reserved + * + * Permission to use, copy, modify, and distribute this software and + * its documentation for any purpose and without fee is hereby + * granted, provided that the above copyright notice appear in all + * copies and that both that the copyright notice and this + * permission notice and warranty disclaimer appear in supporting + * documentation, and that the name Lucent Technologies or any of + * its entities not be used in advertising or publicity pertaining + * to distribution of the software without specific, written prior + * permission. + * + * LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, + * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. + * IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY + * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER + * IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, + * ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF + * THIS SOFTWARE. + */ + +/* * CDDL HEADER START * * The contents of this file are subject to the terms of the @@ -28,6 +52,8 @@ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ /* All Rights Reserved */ +/* lasciate ogne speranza, voi ch'intrate. */ + #define DEBUG #include "awk.h" @@ -37,7 +63,7 @@ /* NCHARS is 2**n */ #define MAXLIN (3 * LINE_MAX) -#define type(v) (v)->nobj +#define type(v) (v)->nobj /* badly overloaded here */ #define left(v) (v)->narg[0] #define right(v) (v)->narg[1] #define parent(v) (v)->nnext @@ -60,34 +86,34 @@ Node *point[MAXLIN]; int rtok; /* next token in current re */ int rlxval; -uchar *rlxstr; -uchar *prestr; /* current position in current re */ -uchar *lastre; /* origin of last re */ +static uschar *rlxstr; +static uschar *prestr; /* current position in current re */ +static uschar *lastre; /* origin of last re */ static int setcnt; static int poscnt; -uchar *patbeg; +char *patbeg; int patlen; #define NFA 20 /* cache this many dynamic fa's */ fa *fatab[NFA]; int nfatab = 0; /* entries in fatab */ -static fa *mkdfa(uchar *, int); +static fa *mkdfa(const char *, int); static int makeinit(fa *, int); static void penter(Node *); static void freetr(Node *); static void overflo(char *); static void cfoll(fa *, Node *); static void follow(Node *); -static Node *reparse(uchar *); +static Node *reparse(const char *); static int relex(void); static void freefa(fa *); static int cgoto(fa *, int, int); fa * -makedfa(uchar *s, int anchor) /* returns dfa for reg expr s */ +makedfa(const char *s, int anchor) /* returns dfa for reg expr s */ { int i, use, nuse; fa *pfa; @@ -96,7 +122,7 @@ makedfa(uchar *s, int anchor) /* returns dfa for reg expr s */ return (mkdfa(s, anchor)); for (i = 0; i < nfatab; i++) { /* is it there already? */ if (fatab[i]->anchor == anchor && - strcmp((char *)fatab[i]->restr, (char *)s) == 0) { + strcmp((const char *)fatab[i]->restr, s) == 0) { fatab[i]->use++; return (fatab[i]); } @@ -121,9 +147,12 @@ makedfa(uchar *s, int anchor) /* returns dfa for reg expr s */ return (pfa); } +/* + * does the real work of making a dfa + * anchor = 1 for anchored matches, else 0 + */ fa * -mkdfa(uchar *s, int anchor) /* does the real work of making a dfa */ - /* anchor = 1 for anchored matches, else 0 */ +mkdfa(const char *s, int anchor) { Node *p, *p1; fa *f; @@ -137,7 +166,7 @@ mkdfa(uchar *s, int anchor) /* does the real work of making a dfa */ poscnt = 0; penter(p1); /* enter parent pointers and leaf indices */ if ((f = (fa *)calloc(1, sizeof (fa) + poscnt * sizeof (rrow))) == NULL) - overflo("no room for fa"); + overflo("out of space for fa"); /* penter has computed number of positions in re */ f->accept = poscnt-1; cfoll(f, p1); /* set up follow sets */ @@ -151,14 +180,14 @@ mkdfa(uchar *s, int anchor) /* does the real work of making a dfa */ *f->posns[1] = 0; f->initstat = makeinit(f, anchor); f->anchor = anchor; - f->restr = tostring(s); + f->restr = (uschar *)tostring(s); return (f); } static int makeinit(fa *f, int anchor) { - register int i, k; + int i, k; f->curstat = 2; f->out[2] = 0; @@ -207,8 +236,8 @@ penter(Node *p) /* set up parent pointers and leaf indices */ parent(left(p)) = p; parent(right(p)) = p; break; - default: - ERROR "unknown type %d in penter", type(p) FATAL; + default: /* can't happen */ + FATAL("can't happen: unknown type %d in penter", type(p)); break; } } @@ -230,80 +259,112 @@ freetr(Node *p) /* free parse tree */ freetr(right(p)); xfree(p); break; - default: - ERROR "unknown type %d in freetr", type(p) FATAL; + default: /* can't happen */ + FATAL("can't happen: unknown type %d in freetr", type(p)); break; } } -uchar * -cclenter(uchar *p) +#define isoctdigit(c) ((c) >= '0' && (c) <= '7') + +/* pick up next thing after a \\ and increment *pp */ +int +quoted(uschar **pp) { - register int i, c; - uchar *op, *chars, *ret; - size_t bsize; + uschar *p = *pp; + int c; + + if ((c = *p++) == 't') + c = '\t'; + else if (c == 'n') + c = '\n'; + else if (c == 'f') + c = '\f'; + else if (c == 'r') + c = '\r'; + else if (c == 'b') + c = '\b'; + else if (c == '\\') + c = '\\'; + else if (isoctdigit(c)) { /* \d \dd \ddd */ + int n = c - '0'; + if (isoctdigit(*p)) { + n = 8 * n + *p++ - '0'; + if (isoctdigit(*p)) + n = 8 * n + *p++ - '0'; + } + c = n; + } /* else */ + /* c = c; */ + *pp = p; + return (c); +} + +char * +cclenter(const char *argp) /* add a character class */ +{ + int i, c, c2; + uschar *p = (uschar *)argp; + uschar *op, *bp; + static uschar *buf = NULL; + static size_t bufsz = 100; - init_buf(&chars, &bsize, LINE_INCR); op = p; - i = 0; - while ((c = *p++) != 0) { + if (buf == NULL && (buf = (uschar *)malloc(bufsz)) == NULL) + FATAL("out of space for character class [%.10s...] 1", p); + bp = buf; + for (i = 0; (c = *p++) != 0; ) { if (c == '\\') { - if ((c = *p++) == 't') - c = '\t'; - else if (c == 'n') - c = '\n'; - else if (c == 'f') - c = '\f'; - else if (c == 'r') - c = '\r'; - else if (c == 'b') - c = '\b'; - else if (c == '\\') - c = '\\'; - else if (isdigit(c)) { - int n = c - '0'; - if (isdigit(*p)) { - n = 8 * n + *p++ - '0'; - if (isdigit(*p)) - n = 8 * n + *p++ - '0'; - } - c = n; - } /* else */ - /* c = c; */ - } else if (c == '-' && i > 0 && chars[i-1] != 0) { + c = quoted(&p); + } else if (c == '-' && i > 0 && bp[-1] != 0) { if (*p != 0) { - c = chars[i-1]; - while ((uchar)c < *p) { /* fails if *p is \\ */ - expand_buf(&chars, &bsize, i); - chars[i++] = ++c; + c = bp[-1]; + c2 = *p++; + if (c2 == '\\') + c2 = quoted(&p); + if (c > c2) { /* empty; ignore */ + bp--; + i--; + continue; + } + while (c < c2) { + if (!adjbuf((char **)&buf, &bufsz, + bp-buf+2, 100, (char **)&bp, + "cclenter1")) { + FATAL( + "out of space for character class [%.10s...] 2", p); + } + *bp++ = ++c; + i++; } - p++; continue; } } - expand_buf(&chars, &bsize, i); - chars[i++] = c; + if (!adjbuf((char **)&buf, &bufsz, bp-buf+2, 100, (char **)&bp, + "cclenter2")) + FATAL( + "out of space for character class [%.10s...] 3", p); + *bp++ = c; + i++; } - chars[i++] = '\0'; - dprintf(("cclenter: in = |%s|, out = |%s|\n", op, chars)); + *bp = '\0'; + dprintf(("cclenter: in = |%s|, out = |%s|\n", op, buf)); xfree(op); - ret = tostring(chars); - free(chars); - return (ret); + return (char *)tostring((char *)buf); } static void overflo(char *s) { - ERROR "regular expression too big: %s", gettext((char *)s) FATAL; + FATAL("regular expression too big: %.30s...", gettext((char *)s)); } /* enter follow set of each leaf of vertex v into lfollow[leaf] */ static void cfoll(fa *f, Node *v) { - register int i; - register int *p; + int i; + int *p; switch (type(v)) { LEAF @@ -330,8 +391,8 @@ cfoll(fa *f, Node *v) cfoll(f, left(v)); cfoll(f, right(v)); break; - default: - ERROR "unknown type %d in cfoll", type(v) FATAL; + default: /* can't happen */ + FATAL("can't happen: unknown type %d in cfoll", type(v)); } } @@ -342,7 +403,7 @@ cfoll(fa *f, Node *v) static int first(Node *p) { - register int b; + int b; switch (type(p)) { LEAF @@ -350,7 +411,7 @@ first(Node *p) setvec[(int)left(p)] = 1; setcnt++; } - if (type(p) == CCL && (*(uchar *)right(p)) == '\0') + if (type(p) == CCL && (*(uschar *)right(p)) == '\0') return (0); /* empty CCL */ else return (1); @@ -372,8 +433,7 @@ first(Node *p) return (0); return (1); } - ERROR "unknown type %d in first", type(p) FATAL; - return (-1); + FATAL("can't happen: unknown type %d in first", type(p)); } /* collects leaves that can follow v into setvec */ @@ -407,14 +467,16 @@ follow(Node *v) follow(p); return; default: - ERROR "unknown type %d in follow", type(p) FATAL; + FATAL("unknown type %d in follow", type(p)); break; } } static int -member(uchar c, uchar *s) /* is c in s? */ +member(int c, const char *sarg) /* is c in s? */ { + uschar *s = (uschar *)sarg; + while (*s) if (c == *s++) return (1); @@ -423,9 +485,10 @@ member(uchar c, uchar *s) /* is c in s? */ int -match(fa *f, uchar *p) +match(fa *f, const char *p0) /* shortest match ? */ { - register int s, ns; + int s, ns; + uschar *p = (uschar *)p0; s = f->reset ? makeinit(f, 0) : f->initstat; if (f->out[s]) @@ -442,10 +505,11 @@ match(fa *f, uchar *p) } int -pmatch(fa *f, uchar *p) +pmatch(fa *f, const char *p0) /* longest match, for sub */ { - register int s, ns; - register uchar *q; + int s, ns; + uschar *p = (uschar *)p0; + uschar *q; int i, k; if (f->reset) { @@ -453,7 +517,7 @@ pmatch(fa *f, uchar *p) } else { s = f->initstat; } - patbeg = p; + patbeg = (char *)p; patlen = -1; do { q = p; @@ -466,16 +530,17 @@ pmatch(fa *f, uchar *p) s = cgoto(f, s, *q); if (s == 1) { /* no transition */ if (patlen >= 0) { - patbeg = p; + patbeg = (char *)p; return (1); - } else + } else { goto nextin; /* no match */ + } } } while (*q++ != 0); if (f->out[s]) patlen = q - p - 1; /* don't count $ */ if (patlen >= 0) { - patbeg = p; + patbeg = (char *)p; return (1); } nextin: @@ -500,10 +565,11 @@ pmatch(fa *f, uchar *p) } int -nematch(fa *f, uchar *p) +nematch(fa *f, const char *p0) /* non-empty match, for sub */ { - register int s, ns; - register uchar *q; + int s, ns; + uschar *p = (uschar *)p0; + uschar *q; int i, k; if (f->reset) { @@ -523,7 +589,7 @@ nematch(fa *f, uchar *p) s = cgoto(f, s, *q); if (s == 1) { /* no transition */ if (patlen > 0) { - patbeg = p; + patbeg = (char *)p; return (1); } else goto nnextin; /* no nonempty match */ @@ -532,7 +598,7 @@ nematch(fa *f, uchar *p) if (f->out[s]) patlen = q - p - 1; /* don't count $ */ if (patlen > 0) { - patbeg = p; + patbeg = (char *)p; return (1); } nnextin: @@ -561,30 +627,29 @@ static Node *regexp(void), *primary(void), *concat(Node *); static Node *alt(Node *), *unary(Node *); static Node * -reparse(uchar *p) +reparse(const char *p) { /* parses regular expression pointed to by p */ /* uses relex() to scan regular expression */ Node *np; dprintf(("reparse <%s>\n", p)); - lastre = prestr = p; /* prestr points to string to be parsed */ + + /* prestr points to string to be parsed */ + lastre = prestr = (uschar *)p; rtok = relex(); - if (rtok == '\0') - ERROR "empty regular expression" FATAL; - np = regexp(); if (rtok == '\0') { - return (np); - } else { - ERROR "syntax error in regular expression %s at %s", - lastre, prestr FATAL; + FATAL("empty regular expression"); } - /*NOTREACHED*/ - return (NULL); + np = regexp(); + if (rtok != '\0') + FATAL("syntax error in regular expression %s at %s", + lastre, prestr); + return (np); } static Node * -regexp(void) +regexp(void) /* top-level parse of reg expr */ { return (alt(concat(primary()))); } @@ -596,7 +661,7 @@ primary(void) switch (rtok) { case CHAR: - np = op2(CHAR, NIL, (Node *)rlxval); + np = op2(CHAR, NIL, itonp(rlxval)); rtok = relex(); return (unary(np)); case ALL: @@ -607,17 +672,17 @@ primary(void) return (unary(op2(DOT, NIL, NIL))); case CCL: /*LINTED align*/ - np = op2(CCL, NIL, (Node *)cclenter(rlxstr)); + np = op2(CCL, NIL, (Node *)cclenter((char *)rlxstr)); rtok = relex(); return (unary(np)); case NCCL: /*LINTED align*/ - np = op2(NCCL, NIL, (Node *)cclenter(rlxstr)); + np = op2(NCCL, NIL, (Node *)cclenter((char *)rlxstr)); rtok = relex(); return (unary(np)); case '^': rtok = relex(); - return (unary(op2(CHAR, NIL, (Node *)HAT))); + return (unary(op2(CHAR, NIL, itonp(HAT)))); case '$': rtok = relex(); return (unary(op2(CHAR, NIL, NIL))); @@ -627,20 +692,20 @@ primary(void) rtok = relex(); return (unary(op2(CCL, NIL, /*LINTED align*/ - (Node *)tostring((uchar *)"")))); + (Node *)tostring("")))); } np = regexp(); if (rtok == ')') { rtok = relex(); return (unary(np)); } else { - ERROR "syntax error in regular expression %s at %s", - lastre, prestr FATAL; + FATAL("syntax error in regular expression %s at %s", + lastre, prestr); } /* FALLTHROUGH */ default: - ERROR "illegal primary in regular expression %s at %s", - lastre, prestr FATAL; + FATAL("illegal primary in regular expression %s at %s", + lastre, prestr); } /*NOTREACHED*/ return (NULL); @@ -688,8 +753,8 @@ unary(Node *np) static int relex(void) /* lexical analyzer for reparse */ { - register int c; - uchar *cbuf; + int c; + uschar *cbuf; int clen, cflag; switch (c = *prestr++) { @@ -705,28 +770,7 @@ relex(void) /* lexical analyzer for reparse */ case ')': return (c); case '\\': - if ((c = *prestr++) == 't') - c = '\t'; - else if (c == 'n') - c = '\n'; - else if (c == 'f') - c = '\f'; - else if (c == 'r') - c = '\r'; - else if (c == 'b') - c = '\b'; - else if (c == '\\') - c = '\\'; - else if (isdigit(c)) { - int n = c - '0'; - if (isdigit(*prestr)) { - n = 8 * n + *prestr++ - '0'; - if (isdigit(*prestr)) - n = 8 * n + *prestr++ - '0'; - } - c = n; - } /* else it's now in c */ - rlxval = c; + rlxval = quoted(&prestr); return (CHAR); default: rlxval = c; @@ -738,29 +782,31 @@ relex(void) /* lexical analyzer for reparse */ prestr++; } else cflag = 0; - init_buf(&cbuf, NULL, strlen((char *)prestr) * 2 + 1); + init_buf((char **)&cbuf, NULL, strlen((char *)prestr) * 2 + 1); for (;;) { if ((c = *prestr++) == '\\') { cbuf[clen++] = '\\'; if ((c = *prestr++) == '\0') { - ERROR - "nonterminated character class %s", lastre FATAL; + FATAL("nonterminated character class " + "%.20s...", lastre); } cbuf[clen++] = c; + } else if (clen == 0) { + cbuf[clen++] = c; } else if (c == ']') { cbuf[clen] = 0; - rlxstr = tostring(cbuf); + rlxstr = (uschar *)tostring((char *)cbuf); free(cbuf); if (cflag == 0) return (CCL); else return (NCCL); } else if (c == '\n') { - ERROR "newline in character class %s...", - lastre FATAL; + FATAL("newline in character class %s...", + lastre); } else if (c == '\0') { - ERROR "nonterminated character class %s", - lastre FATAL; + FATAL("nonterminated character class %s", + lastre); } else cbuf[clen++] = c; } @@ -772,8 +818,8 @@ relex(void) /* lexical analyzer for reparse */ static int cgoto(fa *f, int s, int c) { - register int i, j, k; - register int *p, *q; + int i, j, k; + int *p, *q; for (i = 0; i <= f->accept; i++) setvec[i] = 0; @@ -786,9 +832,9 @@ cgoto(fa *f, int s, int c) k == DOT && c != 0 && c != HAT || k == ALL && c != 0 || k == CCL && - member(c, (uchar *)f->re[p[i]].lval) || + member(c, (char *)f->re[p[i]].lval) || k == NCCL && - !member(c, (uchar *)f->re[p[i]].lval) && + !member(c, (char *)f->re[p[i]].lval) && c != 0 && c != HAT) { q = f->re[p[i]].lfollow; for (j = 1; j <= *q; j++) { @@ -847,10 +893,9 @@ cgoto(fa *f, int s, int c) } static void -freefa(fa *f) +freefa(fa *f) /* free a finite automaton */ { - - register int i; + int i; if (f == NULL) return; |