summaryrefslogtreecommitdiff
path: root/usr/src/cmd/awk/b.c
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src/cmd/awk/b.c')
-rw-r--r--usr/src/cmd/awk/b.c337
1 files changed, 191 insertions, 146 deletions
diff --git a/usr/src/cmd/awk/b.c b/usr/src/cmd/awk/b.c
index 9caee4e9d3..d44372393b 100644
--- a/usr/src/cmd/awk/b.c
+++ b/usr/src/cmd/awk/b.c
@@ -1,4 +1,28 @@
/*
+ * Copyright (C) Lucent Technologies 1997
+ * All Rights Reserved
+ *
+ * Permission to use, copy, modify, and distribute this software and
+ * its documentation for any purpose and without fee is hereby
+ * granted, provided that the above copyright notice appear in all
+ * copies and that both that the copyright notice and this
+ * permission notice and warranty disclaimer appear in supporting
+ * documentation, and that the name Lucent Technologies or any of
+ * its entities not be used in advertising or publicity pertaining
+ * to distribution of the software without specific, written prior
+ * permission.
+ *
+ * LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
+ * IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
+ * IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ * ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
+ * THIS SOFTWARE.
+ */
+
+/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
@@ -28,6 +52,8 @@
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
/* All Rights Reserved */
+/* lasciate ogne speranza, voi ch'intrate. */
+
#define DEBUG
#include "awk.h"
@@ -37,7 +63,7 @@
/* NCHARS is 2**n */
#define MAXLIN (3 * LINE_MAX)
-#define type(v) (v)->nobj
+#define type(v) (v)->nobj /* badly overloaded here */
#define left(v) (v)->narg[0]
#define right(v) (v)->narg[1]
#define parent(v) (v)->nnext
@@ -60,34 +86,34 @@ Node *point[MAXLIN];
int rtok; /* next token in current re */
int rlxval;
-uchar *rlxstr;
-uchar *prestr; /* current position in current re */
-uchar *lastre; /* origin of last re */
+static uschar *rlxstr;
+static uschar *prestr; /* current position in current re */
+static uschar *lastre; /* origin of last re */
static int setcnt;
static int poscnt;
-uchar *patbeg;
+char *patbeg;
int patlen;
#define NFA 20 /* cache this many dynamic fa's */
fa *fatab[NFA];
int nfatab = 0; /* entries in fatab */
-static fa *mkdfa(uchar *, int);
+static fa *mkdfa(const char *, int);
static int makeinit(fa *, int);
static void penter(Node *);
static void freetr(Node *);
static void overflo(char *);
static void cfoll(fa *, Node *);
static void follow(Node *);
-static Node *reparse(uchar *);
+static Node *reparse(const char *);
static int relex(void);
static void freefa(fa *);
static int cgoto(fa *, int, int);
fa *
-makedfa(uchar *s, int anchor) /* returns dfa for reg expr s */
+makedfa(const char *s, int anchor) /* returns dfa for reg expr s */
{
int i, use, nuse;
fa *pfa;
@@ -96,7 +122,7 @@ makedfa(uchar *s, int anchor) /* returns dfa for reg expr s */
return (mkdfa(s, anchor));
for (i = 0; i < nfatab; i++) { /* is it there already? */
if (fatab[i]->anchor == anchor &&
- strcmp((char *)fatab[i]->restr, (char *)s) == 0) {
+ strcmp((const char *)fatab[i]->restr, s) == 0) {
fatab[i]->use++;
return (fatab[i]);
}
@@ -121,9 +147,12 @@ makedfa(uchar *s, int anchor) /* returns dfa for reg expr s */
return (pfa);
}
+/*
+ * does the real work of making a dfa
+ * anchor = 1 for anchored matches, else 0
+ */
fa *
-mkdfa(uchar *s, int anchor) /* does the real work of making a dfa */
- /* anchor = 1 for anchored matches, else 0 */
+mkdfa(const char *s, int anchor)
{
Node *p, *p1;
fa *f;
@@ -137,7 +166,7 @@ mkdfa(uchar *s, int anchor) /* does the real work of making a dfa */
poscnt = 0;
penter(p1); /* enter parent pointers and leaf indices */
if ((f = (fa *)calloc(1, sizeof (fa) + poscnt * sizeof (rrow))) == NULL)
- overflo("no room for fa");
+ overflo("out of space for fa");
/* penter has computed number of positions in re */
f->accept = poscnt-1;
cfoll(f, p1); /* set up follow sets */
@@ -151,14 +180,14 @@ mkdfa(uchar *s, int anchor) /* does the real work of making a dfa */
*f->posns[1] = 0;
f->initstat = makeinit(f, anchor);
f->anchor = anchor;
- f->restr = tostring(s);
+ f->restr = (uschar *)tostring(s);
return (f);
}
static int
makeinit(fa *f, int anchor)
{
- register int i, k;
+ int i, k;
f->curstat = 2;
f->out[2] = 0;
@@ -207,8 +236,8 @@ penter(Node *p) /* set up parent pointers and leaf indices */
parent(left(p)) = p;
parent(right(p)) = p;
break;
- default:
- ERROR "unknown type %d in penter", type(p) FATAL;
+ default: /* can't happen */
+ FATAL("can't happen: unknown type %d in penter", type(p));
break;
}
}
@@ -230,80 +259,112 @@ freetr(Node *p) /* free parse tree */
freetr(right(p));
xfree(p);
break;
- default:
- ERROR "unknown type %d in freetr", type(p) FATAL;
+ default: /* can't happen */
+ FATAL("can't happen: unknown type %d in freetr", type(p));
break;
}
}
-uchar *
-cclenter(uchar *p)
+#define isoctdigit(c) ((c) >= '0' && (c) <= '7')
+
+/* pick up next thing after a \\ and increment *pp */
+int
+quoted(uschar **pp)
{
- register int i, c;
- uchar *op, *chars, *ret;
- size_t bsize;
+ uschar *p = *pp;
+ int c;
+
+ if ((c = *p++) == 't')
+ c = '\t';
+ else if (c == 'n')
+ c = '\n';
+ else if (c == 'f')
+ c = '\f';
+ else if (c == 'r')
+ c = '\r';
+ else if (c == 'b')
+ c = '\b';
+ else if (c == '\\')
+ c = '\\';
+ else if (isoctdigit(c)) { /* \d \dd \ddd */
+ int n = c - '0';
+ if (isoctdigit(*p)) {
+ n = 8 * n + *p++ - '0';
+ if (isoctdigit(*p))
+ n = 8 * n + *p++ - '0';
+ }
+ c = n;
+ } /* else */
+ /* c = c; */
+ *pp = p;
+ return (c);
+}
+
+char *
+cclenter(const char *argp) /* add a character class */
+{
+ int i, c, c2;
+ uschar *p = (uschar *)argp;
+ uschar *op, *bp;
+ static uschar *buf = NULL;
+ static size_t bufsz = 100;
- init_buf(&chars, &bsize, LINE_INCR);
op = p;
- i = 0;
- while ((c = *p++) != 0) {
+ if (buf == NULL && (buf = (uschar *)malloc(bufsz)) == NULL)
+ FATAL("out of space for character class [%.10s...] 1", p);
+ bp = buf;
+ for (i = 0; (c = *p++) != 0; ) {
if (c == '\\') {
- if ((c = *p++) == 't')
- c = '\t';
- else if (c == 'n')
- c = '\n';
- else if (c == 'f')
- c = '\f';
- else if (c == 'r')
- c = '\r';
- else if (c == 'b')
- c = '\b';
- else if (c == '\\')
- c = '\\';
- else if (isdigit(c)) {
- int n = c - '0';
- if (isdigit(*p)) {
- n = 8 * n + *p++ - '0';
- if (isdigit(*p))
- n = 8 * n + *p++ - '0';
- }
- c = n;
- } /* else */
- /* c = c; */
- } else if (c == '-' && i > 0 && chars[i-1] != 0) {
+ c = quoted(&p);
+ } else if (c == '-' && i > 0 && bp[-1] != 0) {
if (*p != 0) {
- c = chars[i-1];
- while ((uchar)c < *p) { /* fails if *p is \\ */
- expand_buf(&chars, &bsize, i);
- chars[i++] = ++c;
+ c = bp[-1];
+ c2 = *p++;
+ if (c2 == '\\')
+ c2 = quoted(&p);
+ if (c > c2) { /* empty; ignore */
+ bp--;
+ i--;
+ continue;
+ }
+ while (c < c2) {
+ if (!adjbuf((char **)&buf, &bufsz,
+ bp-buf+2, 100, (char **)&bp,
+ "cclenter1")) {
+ FATAL(
+ "out of space for character class [%.10s...] 2", p);
+ }
+ *bp++ = ++c;
+ i++;
}
- p++;
continue;
}
}
- expand_buf(&chars, &bsize, i);
- chars[i++] = c;
+ if (!adjbuf((char **)&buf, &bufsz, bp-buf+2, 100, (char **)&bp,
+ "cclenter2"))
+ FATAL(
+ "out of space for character class [%.10s...] 3", p);
+ *bp++ = c;
+ i++;
}
- chars[i++] = '\0';
- dprintf(("cclenter: in = |%s|, out = |%s|\n", op, chars));
+ *bp = '\0';
+ dprintf(("cclenter: in = |%s|, out = |%s|\n", op, buf));
xfree(op);
- ret = tostring(chars);
- free(chars);
- return (ret);
+ return (char *)tostring((char *)buf);
}
static void
overflo(char *s)
{
- ERROR "regular expression too big: %s", gettext((char *)s) FATAL;
+ FATAL("regular expression too big: %.30s...", gettext((char *)s));
}
/* enter follow set of each leaf of vertex v into lfollow[leaf] */
static void
cfoll(fa *f, Node *v)
{
- register int i;
- register int *p;
+ int i;
+ int *p;
switch (type(v)) {
LEAF
@@ -330,8 +391,8 @@ cfoll(fa *f, Node *v)
cfoll(f, left(v));
cfoll(f, right(v));
break;
- default:
- ERROR "unknown type %d in cfoll", type(v) FATAL;
+ default: /* can't happen */
+ FATAL("can't happen: unknown type %d in cfoll", type(v));
}
}
@@ -342,7 +403,7 @@ cfoll(fa *f, Node *v)
static int
first(Node *p)
{
- register int b;
+ int b;
switch (type(p)) {
LEAF
@@ -350,7 +411,7 @@ first(Node *p)
setvec[(int)left(p)] = 1;
setcnt++;
}
- if (type(p) == CCL && (*(uchar *)right(p)) == '\0')
+ if (type(p) == CCL && (*(uschar *)right(p)) == '\0')
return (0); /* empty CCL */
else
return (1);
@@ -372,8 +433,7 @@ first(Node *p)
return (0);
return (1);
}
- ERROR "unknown type %d in first", type(p) FATAL;
- return (-1);
+ FATAL("can't happen: unknown type %d in first", type(p));
}
/* collects leaves that can follow v into setvec */
@@ -407,14 +467,16 @@ follow(Node *v)
follow(p);
return;
default:
- ERROR "unknown type %d in follow", type(p) FATAL;
+ FATAL("unknown type %d in follow", type(p));
break;
}
}
static int
-member(uchar c, uchar *s) /* is c in s? */
+member(int c, const char *sarg) /* is c in s? */
{
+ uschar *s = (uschar *)sarg;
+
while (*s)
if (c == *s++)
return (1);
@@ -423,9 +485,10 @@ member(uchar c, uchar *s) /* is c in s? */
int
-match(fa *f, uchar *p)
+match(fa *f, const char *p0) /* shortest match ? */
{
- register int s, ns;
+ int s, ns;
+ uschar *p = (uschar *)p0;
s = f->reset ? makeinit(f, 0) : f->initstat;
if (f->out[s])
@@ -442,10 +505,11 @@ match(fa *f, uchar *p)
}
int
-pmatch(fa *f, uchar *p)
+pmatch(fa *f, const char *p0) /* longest match, for sub */
{
- register int s, ns;
- register uchar *q;
+ int s, ns;
+ uschar *p = (uschar *)p0;
+ uschar *q;
int i, k;
if (f->reset) {
@@ -453,7 +517,7 @@ pmatch(fa *f, uchar *p)
} else {
s = f->initstat;
}
- patbeg = p;
+ patbeg = (char *)p;
patlen = -1;
do {
q = p;
@@ -466,16 +530,17 @@ pmatch(fa *f, uchar *p)
s = cgoto(f, s, *q);
if (s == 1) { /* no transition */
if (patlen >= 0) {
- patbeg = p;
+ patbeg = (char *)p;
return (1);
- } else
+ } else {
goto nextin; /* no match */
+ }
}
} while (*q++ != 0);
if (f->out[s])
patlen = q - p - 1; /* don't count $ */
if (patlen >= 0) {
- patbeg = p;
+ patbeg = (char *)p;
return (1);
}
nextin:
@@ -500,10 +565,11 @@ pmatch(fa *f, uchar *p)
}
int
-nematch(fa *f, uchar *p)
+nematch(fa *f, const char *p0) /* non-empty match, for sub */
{
- register int s, ns;
- register uchar *q;
+ int s, ns;
+ uschar *p = (uschar *)p0;
+ uschar *q;
int i, k;
if (f->reset) {
@@ -523,7 +589,7 @@ nematch(fa *f, uchar *p)
s = cgoto(f, s, *q);
if (s == 1) { /* no transition */
if (patlen > 0) {
- patbeg = p;
+ patbeg = (char *)p;
return (1);
} else
goto nnextin; /* no nonempty match */
@@ -532,7 +598,7 @@ nematch(fa *f, uchar *p)
if (f->out[s])
patlen = q - p - 1; /* don't count $ */
if (patlen > 0) {
- patbeg = p;
+ patbeg = (char *)p;
return (1);
}
nnextin:
@@ -561,30 +627,29 @@ static Node *regexp(void), *primary(void), *concat(Node *);
static Node *alt(Node *), *unary(Node *);
static Node *
-reparse(uchar *p)
+reparse(const char *p)
{
/* parses regular expression pointed to by p */
/* uses relex() to scan regular expression */
Node *np;
dprintf(("reparse <%s>\n", p));
- lastre = prestr = p; /* prestr points to string to be parsed */
+
+ /* prestr points to string to be parsed */
+ lastre = prestr = (uschar *)p;
rtok = relex();
- if (rtok == '\0')
- ERROR "empty regular expression" FATAL;
- np = regexp();
if (rtok == '\0') {
- return (np);
- } else {
- ERROR "syntax error in regular expression %s at %s",
- lastre, prestr FATAL;
+ FATAL("empty regular expression");
}
- /*NOTREACHED*/
- return (NULL);
+ np = regexp();
+ if (rtok != '\0')
+ FATAL("syntax error in regular expression %s at %s",
+ lastre, prestr);
+ return (np);
}
static Node *
-regexp(void)
+regexp(void) /* top-level parse of reg expr */
{
return (alt(concat(primary())));
}
@@ -596,7 +661,7 @@ primary(void)
switch (rtok) {
case CHAR:
- np = op2(CHAR, NIL, (Node *)rlxval);
+ np = op2(CHAR, NIL, itonp(rlxval));
rtok = relex();
return (unary(np));
case ALL:
@@ -607,17 +672,17 @@ primary(void)
return (unary(op2(DOT, NIL, NIL)));
case CCL:
/*LINTED align*/
- np = op2(CCL, NIL, (Node *)cclenter(rlxstr));
+ np = op2(CCL, NIL, (Node *)cclenter((char *)rlxstr));
rtok = relex();
return (unary(np));
case NCCL:
/*LINTED align*/
- np = op2(NCCL, NIL, (Node *)cclenter(rlxstr));
+ np = op2(NCCL, NIL, (Node *)cclenter((char *)rlxstr));
rtok = relex();
return (unary(np));
case '^':
rtok = relex();
- return (unary(op2(CHAR, NIL, (Node *)HAT)));
+ return (unary(op2(CHAR, NIL, itonp(HAT))));
case '$':
rtok = relex();
return (unary(op2(CHAR, NIL, NIL)));
@@ -627,20 +692,20 @@ primary(void)
rtok = relex();
return (unary(op2(CCL, NIL,
/*LINTED align*/
- (Node *)tostring((uchar *)""))));
+ (Node *)tostring(""))));
}
np = regexp();
if (rtok == ')') {
rtok = relex();
return (unary(np));
} else {
- ERROR "syntax error in regular expression %s at %s",
- lastre, prestr FATAL;
+ FATAL("syntax error in regular expression %s at %s",
+ lastre, prestr);
}
/* FALLTHROUGH */
default:
- ERROR "illegal primary in regular expression %s at %s",
- lastre, prestr FATAL;
+ FATAL("illegal primary in regular expression %s at %s",
+ lastre, prestr);
}
/*NOTREACHED*/
return (NULL);
@@ -688,8 +753,8 @@ unary(Node *np)
static int
relex(void) /* lexical analyzer for reparse */
{
- register int c;
- uchar *cbuf;
+ int c;
+ uschar *cbuf;
int clen, cflag;
switch (c = *prestr++) {
@@ -705,28 +770,7 @@ relex(void) /* lexical analyzer for reparse */
case ')':
return (c);
case '\\':
- if ((c = *prestr++) == 't')
- c = '\t';
- else if (c == 'n')
- c = '\n';
- else if (c == 'f')
- c = '\f';
- else if (c == 'r')
- c = '\r';
- else if (c == 'b')
- c = '\b';
- else if (c == '\\')
- c = '\\';
- else if (isdigit(c)) {
- int n = c - '0';
- if (isdigit(*prestr)) {
- n = 8 * n + *prestr++ - '0';
- if (isdigit(*prestr))
- n = 8 * n + *prestr++ - '0';
- }
- c = n;
- } /* else it's now in c */
- rlxval = c;
+ rlxval = quoted(&prestr);
return (CHAR);
default:
rlxval = c;
@@ -738,29 +782,31 @@ relex(void) /* lexical analyzer for reparse */
prestr++;
} else
cflag = 0;
- init_buf(&cbuf, NULL, strlen((char *)prestr) * 2 + 1);
+ init_buf((char **)&cbuf, NULL, strlen((char *)prestr) * 2 + 1);
for (;;) {
if ((c = *prestr++) == '\\') {
cbuf[clen++] = '\\';
if ((c = *prestr++) == '\0') {
- ERROR
- "nonterminated character class %s", lastre FATAL;
+ FATAL("nonterminated character class "
+ "%.20s...", lastre);
}
cbuf[clen++] = c;
+ } else if (clen == 0) {
+ cbuf[clen++] = c;
} else if (c == ']') {
cbuf[clen] = 0;
- rlxstr = tostring(cbuf);
+ rlxstr = (uschar *)tostring((char *)cbuf);
free(cbuf);
if (cflag == 0)
return (CCL);
else
return (NCCL);
} else if (c == '\n') {
- ERROR "newline in character class %s...",
- lastre FATAL;
+ FATAL("newline in character class %s...",
+ lastre);
} else if (c == '\0') {
- ERROR "nonterminated character class %s",
- lastre FATAL;
+ FATAL("nonterminated character class %s",
+ lastre);
} else
cbuf[clen++] = c;
}
@@ -772,8 +818,8 @@ relex(void) /* lexical analyzer for reparse */
static int
cgoto(fa *f, int s, int c)
{
- register int i, j, k;
- register int *p, *q;
+ int i, j, k;
+ int *p, *q;
for (i = 0; i <= f->accept; i++)
setvec[i] = 0;
@@ -786,9 +832,9 @@ cgoto(fa *f, int s, int c)
k == DOT && c != 0 && c != HAT ||
k == ALL && c != 0 ||
k == CCL &&
- member(c, (uchar *)f->re[p[i]].lval) ||
+ member(c, (char *)f->re[p[i]].lval) ||
k == NCCL &&
- !member(c, (uchar *)f->re[p[i]].lval) &&
+ !member(c, (char *)f->re[p[i]].lval) &&
c != 0 && c != HAT) {
q = f->re[p[i]].lfollow;
for (j = 1; j <= *q; j++) {
@@ -847,10 +893,9 @@ cgoto(fa *f, int s, int c)
}
static void
-freefa(fa *f)
+freefa(fa *f) /* free a finite automaton */
{
-
- register int i;
+ int i;
if (f == NULL)
return;