summaryrefslogtreecommitdiff
path: root/usr/src/cmd/awk
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src/cmd/awk')
-rw-r--r--usr/src/cmd/awk/DIVERGENCES5
-rw-r--r--usr/src/cmd/awk/Makefile13
-rw-r--r--usr/src/cmd/awk/awk.g.y223
-rw-r--r--usr/src/cmd/awk/awk.h252
-rw-r--r--usr/src/cmd/awk/awk.lx.l306
-rw-r--r--usr/src/cmd/awk/b.c576
-rw-r--r--usr/src/cmd/awk/lex.c637
-rw-r--r--usr/src/cmd/awk/lib.c813
-rw-r--r--usr/src/cmd/awk/main.c150
-rw-r--r--usr/src/cmd/awk/maketab.c78
-rw-r--r--usr/src/cmd/awk/parse.c157
-rw-r--r--usr/src/cmd/awk/run.c1732
-rw-r--r--usr/src/cmd/awk/tran.c518
13 files changed, 3442 insertions, 2018 deletions
diff --git a/usr/src/cmd/awk/DIVERGENCES b/usr/src/cmd/awk/DIVERGENCES
new file mode 100644
index 0000000000..ebc13cb036
--- /dev/null
+++ b/usr/src/cmd/awk/DIVERGENCES
@@ -0,0 +1,5 @@
+The illumos nawk(1) is slightly divergent from upstream:
+- We allow an unlimited number of input program files
+- We allow an unlimited number of "/pat/, /pat/" expressions
+- Some of the code has been altered to track the length of strings
+ better so that we can avoid repeatedly calling strlen(3C)
diff --git a/usr/src/cmd/awk/Makefile b/usr/src/cmd/awk/Makefile
index c49c932558..046f0b739e 100644
--- a/usr/src/cmd/awk/Makefile
+++ b/usr/src/cmd/awk/Makefile
@@ -30,14 +30,13 @@
PROG= nawk
-OBJ1= b.o lib.o main.o parse.o proctab.o run.o tran.o
-OBJ2= awk.g.o awk.lx.o
+OBJ1= b.o lib.o main.o parse.o proctab.o run.o tran.o lex.o
+OBJ2= awk.g.o
OBJS= $(OBJ2) $(OBJ1)
SRCS= $(OBJ1:%.o=%.c)
include ../Makefile.cmd
-CERRWARN += -_gcc=-Wno-implicit-function-declaration
CERRWARN += -_gcc=-Wno-unused-label
CERRWARN += -_gcc=-Wno-parentheses
CERRWARN += -_gcc=-Wno-unused-variable
@@ -56,13 +55,13 @@ XGETFLAGS += -a -x awk.xcl
CPPFLAGS += -D_FILE_OFFSET_BITS=64
YFLAGS += -d
-LDLIBS += -lm
+LDLIBS += -lm -lumem
LINTFLAGS += -u
-CLEANFILES= maketab proctab.c awk.g.c awk.lx.c y.tab.h
+CLEANFILES= maketab proctab.c awk.g.c y.tab.h
.KEEP_STATE:
-all: $(PROG)
+all: $(PROG)
$(PROG): $(OBJS)
$(LINK.c) $(OBJS) -o $@ $(LDLIBS)
@@ -94,8 +93,6 @@ awk.g.c + y.tab.h: awk.g.y
awk.g.o: awk.g.c
-awk.lx.c: awk.lx.l
-
proctab.o: proctab.c
$(COMPILE.c) proctab.c
$(POST_PROCESS_O)
diff --git a/usr/src/cmd/awk/awk.g.y b/usr/src/cmd/awk/awk.g.y
index 21bc8b6dc8..3b5efb3d7d 100644
--- a/usr/src/cmd/awk/awk.g.y
+++ b/usr/src/cmd/awk/awk.g.y
@@ -1,5 +1,29 @@
%{
/*
+ * Copyright (C) Lucent Technologies 1997
+ * All Rights Reserved
+ *
+ * Permission to use, copy, modify, and distribute this software and
+ * its documentation for any purpose and without fee is hereby
+ * granted, provided that the above copyright notice appear in all
+ * copies and that both that the copyright notice and this
+ * permission notice and warranty disclaimer appear in supporting
+ * documentation, and that the name Lucent Technologies or any of
+ * its entities not be used in advertising or publicity pertaining
+ * to distribution of the software without specific, written prior
+ * permission.
+ *
+ * LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
+ * IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
+ * IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ * ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
+ * THIS SOFTWARE.
+ */
+
+/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
@@ -30,30 +54,28 @@
/* All Rights Reserved */
%{
-#ident "%Z%%M% %I% %E% SMI" /* SVr4.0 2.10 */
-%}
-
-%{
#include "awk.h"
+
+void checkdup(Node *list, Cell *item);
int yywrap(void) { return(1); }
-#ifndef DEBUG
-# define PUTS(x)
-#endif
-Node *beginloc = 0, *endloc = 0;
-int infunc = 0; /* = 1 if in arglist or body of func */
-uchar *curfname = 0;
-Node *arglist = 0; /* list of args for current function */
+
+Node *beginloc = NULL;
+Node *endloc = NULL;
+int infunc = 0; /* = 1 if in arglist or body of func */
+int inloop = 0; /* = 1 if in while, for, do */
+char *curfname = NULL; /* current function name */
+Node *arglist = NULL; /* list of args for current function */
static void setfname(Cell *);
static int constnode(Node *);
-static uchar *strnode(Node *);
-static Node *notnull();
+static char *strnode(Node *);
+static Node *notnull(Node *);
%}
%union {
Node *p;
Cell *cp;
int i;
- uchar *s;
+ char *s;
}
%token <i> FIRSTTOKEN /* must be first */
@@ -61,25 +83,26 @@ static Node *notnull();
%token <i> NL ',' '{' '(' '|' ';' '/' ')' '}' '[' ']'
%token <i> ARRAY
%token <i> MATCH NOTMATCH MATCHOP
-%token <i> FINAL DOT ALL CCL NCCL CHAR OR STAR QUEST PLUS
+%token <i> FINAL DOT ALL CCL NCCL CHAR OR STAR QUEST PLUS EMPTYRE
%token <i> AND BOR APPEND EQ GE GT LE LT NE IN
-%token <i> ARG BLTIN BREAK CLOSE CONTINUE DELETE DO EXIT FOR FUNC
-%token <i> SUB GSUB IF INDEX LSUBSTR MATCHFCN NEXT
+%token <i> ARG BLTIN BREAK CLOSE CONTINUE DELETE DO EXIT FOR FUNC
+%token <i> SUB GSUB IF INDEX LSUBSTR MATCHFCN NEXT NEXTFILE
%token <i> ADD MINUS MULT DIVIDE MOD
%token <i> ASSIGN ASGNOP ADDEQ SUBEQ MULTEQ DIVEQ MODEQ POWEQ
%token <i> PRINT PRINTF SPRINTF
%token <p> ELSE INTEST CONDEXPR
%token <i> POSTINCR PREINCR POSTDECR PREDECR
-%token <cp> VAR IVAR VARNF CALL NUMBER STRING FIELD
+%token <cp> VAR IVAR VARNF CALL NUMBER STRING
%token <s> REGEXPR
-%type <p> pas pattern ppattern plist pplist patlist prarg term
+%type <p> pas pattern ppattern plist pplist patlist prarg term re
%type <p> pa_pat pa_stat pa_stats
%type <s> reg_expr
%type <p> simple_stmt opt_simple_stmt stmt stmtlist
%type <p> var varname funcname varlist
-%type <p> for if while
-%type <i> pst opt_pst lbrace rparen comma nl opt_nl and bor
+%type <p> for if else while
+%type <i> do st
+%type <i> pst opt_pst lbrace rbrace rparen comma nl opt_nl and bor
%type <i> subop print
%right ASGNOP
@@ -89,14 +112,14 @@ static Node *notnull();
%left AND
%left GETLINE
%nonassoc APPEND EQ GE GT LE LT NE MATCHOP IN '|'
-%left ARG BLTIN BREAK CALL CLOSE CONTINUE DELETE DO EXIT FOR FIELD FUNC
+%left ARG BLTIN BREAK CALL CLOSE CONTINUE DELETE DO EXIT FOR FUNC
%left GSUB IF INDEX LSUBSTR MATCHFCN NEXT NUMBER
%left PRINT PRINTF RETURN SPLIT SPRINTF STRING SUB SUBSTR
%left REGEXPR VAR VARNF IVAR WHILE '('
%left CAT
%left '+' '-'
%left '*' '/' '%'
-%left NOT UMINUS
+%left NOT UMINUS UPLUS
%right POWER
%right DECR INCR
%left INDIRECT
@@ -107,7 +130,7 @@ static Node *notnull();
program:
pas { if (errorflag==0)
winner = (Node *)stat3(PROGRAM, beginloc, $1, endloc); }
- | error { yyclearin; bracecheck(); ERROR "bailing out" SYNTAX; }
+ | error { yyclearin; bracecheck(); SYNTAX("bailing out"); }
;
and:
@@ -131,12 +154,12 @@ else:
;
for:
- FOR '(' opt_simple_stmt ';' pattern ';' opt_simple_stmt rparen stmt
- { $$ = stat4(FOR, $3, notnull($5), $7, $9); }
- | FOR '(' opt_simple_stmt ';' ';' opt_simple_stmt rparen stmt
- { $$ = stat4(FOR, $3, NIL, $6, $8); }
- | FOR '(' varname IN varname rparen stmt
- { $$ = stat3(IN, $3, makearr($5), $7); }
+ FOR '(' opt_simple_stmt ';' opt_nl pattern ';' opt_nl opt_simple_stmt rparen {inloop++;} stmt
+ { --inloop; $$ = stat4(FOR, $3, notnull($6), $9, $12); }
+ | FOR '(' opt_simple_stmt ';' ';' opt_nl opt_simple_stmt rparen {inloop++;} stmt
+ { --inloop; $$ = stat4(FOR, $3, NIL, $7, $10); }
+ | FOR '(' varname IN varname rparen {inloop++;} stmt
+ { --inloop; $$ = stat3(IN, $3, makearr($5), $8); }
;
funcname:
@@ -184,8 +207,8 @@ pa_pat:
pa_stat:
pa_pat { $$ = stat2(PASTAT, $1, stat2(PRINT, rectonode(), NIL)); }
| pa_pat lbrace stmtlist '}' { $$ = stat2(PASTAT, $1, $3); }
- | pa_pat ',' pa_pat { $$ = pa2stat($1, $3, stat2(PRINT, rectonode(), NIL)); }
- | pa_pat ',' pa_pat lbrace stmtlist '}' { $$ = pa2stat($1, $3, $5); }
+ | pa_pat ',' opt_nl pa_pat { $$ = pa2stat($1, $4, stat2(PRINT, rectonode(), NIL)); }
+ | pa_pat ',' opt_nl pa_pat lbrace stmtlist '}' { $$ = pa2stat($1, $4, $6); }
| lbrace stmtlist '}' { $$ = stat2(PASTAT, NIL, $2); }
| XBEGIN lbrace stmtlist '}'
{ beginloc = linkum(beginloc, $3); $$ = 0; }
@@ -202,19 +225,17 @@ pa_stats:
patlist:
pattern
- | patlist comma pattern { $$ = linkum($1, $3); }
+ | patlist comma pattern { $$ = linkum($1, $3); }
;
ppattern:
var ASGNOP ppattern { $$ = op2($2, $1, $3); }
| ppattern '?' ppattern ':' ppattern %prec '?'
- { $$ = op3(CONDEXPR, notnull($1), $3, $5); }
+ { $$ = op3(CONDEXPR, notnull($1), $3, $5); }
| ppattern bor ppattern %prec BOR
{ $$ = op2(BOR, notnull($1), notnull($3)); }
| ppattern and ppattern %prec AND
{ $$ = op2(AND, notnull($1), notnull($3)); }
- | NOT ppattern
- { $$ = op1(NOT, notnull($2)); }
| ppattern MATCHOP reg_expr { $$ = op3($2, NIL, $1, (Node*)makedfa($3, 0)); }
| ppattern MATCHOP ppattern
{ if (constnode($3))
@@ -224,21 +245,18 @@ ppattern:
| ppattern IN varname { $$ = op2(INTEST, $1, makearr($3)); }
| '(' plist ')' IN varname { $$ = op2(INTEST, $2, makearr($5)); }
| ppattern term %prec CAT { $$ = op2(CAT, $1, $2); }
- | reg_expr
- { $$ = op3(MATCH, NIL, rectonode(), (Node*)makedfa($1, 0)); }
+ | re
| term
;
pattern:
var ASGNOP pattern { $$ = op2($2, $1, $3); }
| pattern '?' pattern ':' pattern %prec '?'
- { $$ = op3(CONDEXPR, notnull($1), $3, $5); }
+ { $$ = op3(CONDEXPR, notnull($1), $3, $5); }
| pattern bor pattern %prec BOR
{ $$ = op2(BOR, notnull($1), notnull($3)); }
| pattern and pattern %prec AND
{ $$ = op2(AND, notnull($1), notnull($3)); }
- | NOT pattern
- { $$ = op1(NOT, op2(NE,$2,valtonode(lookup((uchar *)"$zero&null",symtab),CCON))); }
| pattern EQ pattern { $$ = op2($2, $1, $3); }
| pattern GE pattern { $$ = op2($2, $1, $3); }
| pattern GT pattern { $$ = op2($2, $1, $3); }
@@ -253,11 +271,14 @@ pattern:
$$ = op3($2, (Node *)1, $1, $3); }
| pattern IN varname { $$ = op2(INTEST, $1, makearr($3)); }
| '(' plist ')' IN varname { $$ = op2(INTEST, $2, makearr($5)); }
- | pattern '|' GETLINE var { $$ = op3(GETLINE, $4, (Node*)$2, $1); }
- | pattern '|' GETLINE { $$ = op3(GETLINE, (Node*)0, (Node*)$2, $1); }
+ | pattern '|' GETLINE var {
+ if (safe) SYNTAX("cmd | getline is unsafe");
+ else $$ = op3(GETLINE, $4, itonp($2), $1); }
+ | pattern '|' GETLINE {
+ if (safe) SYNTAX("cmd | getline is unsafe");
+ else $$ = op3(GETLINE, (Node*)0, itonp($2), $1); }
| pattern term %prec CAT { $$ = op2(CAT, $1, $2); }
- | reg_expr
- { $$ = op3(MATCH, NIL, rectonode(), (Node*)makedfa($1, 0)); }
+ | re
| term
;
@@ -269,6 +290,7 @@ plist:
pplist:
ppattern
| pplist comma ppattern { $$ = linkum($1, $3); }
+ ;
prarg:
/* empty */ { $$ = rectonode(); }
@@ -288,6 +310,12 @@ rbrace:
'}' | rbrace NL
;
+re:
+ reg_expr
+ { $$ = op3(MATCH, NIL, rectonode(), (Node*)makedfa($1, 0)); }
+ | NOT re { $$ = op1(NOT, notnull($2)); }
+ ;
+
reg_expr:
'/' {startreg();} REGEXPR '/' { $$ = $3; }
;
@@ -297,26 +325,34 @@ rparen:
;
simple_stmt:
- print prarg '|' term { $$ = stat3($1, $2, (Node *) $3, $4); }
- | print prarg APPEND term { $$ = stat3($1, $2, (Node *) $3, $4); }
- | print prarg GT term { $$ = stat3($1, $2, (Node *) $3, $4); }
+ print prarg '|' term {
+ if (safe) SYNTAX("print | is unsafe");
+ else $$ = stat3($1, $2, itonp($3), $4); }
+ | print prarg APPEND term {
+ if (safe) SYNTAX("print >> is unsafe");
+ else $$ = stat3($1, $2, itonp($3), $4); }
+ | print prarg GT term {
+ if (safe) SYNTAX("print > is unsafe");
+ else $$ = stat3($1, $2, itonp($3), $4); }
| print prarg { $$ = stat3($1, $2, NIL, NIL); }
| DELETE varname '[' patlist ']' { $$ = stat2(DELETE, makearr($2), $4); }
- | DELETE varname { yyclearin; ERROR "you can only delete array[element]" SYNTAX; $$ = stat1(DELETE, $2); }
+ | DELETE varname { $$ = stat2(DELETE, makearr($2), 0); }
| pattern { $$ = exptostat($1); }
- | error { yyclearin; ERROR "illegal statement" SYNTAX; }
+ | error { yyclearin; SYNTAX("illegal statement"); }
;
st:
- nl | ';' opt_nl
+ nl
+ | ';' opt_nl
;
stmt:
- BREAK st { $$ = stat1(BREAK, NIL); }
- | CLOSE pattern st { $$ = stat1(CLOSE, $2); }
- | CONTINUE st { $$ = stat1(CONTINUE, NIL); }
- | do stmt WHILE '(' pattern ')' st
- { $$ = stat2(DO, $2, notnull($5)); }
+ BREAK st { if (!inloop) SYNTAX("break illegal outside of loops");
+ $$ = stat1(BREAK, NIL); }
+ | CONTINUE st { if (!inloop) SYNTAX("continue illegal outside of loops");
+ $$ = stat1(CONTINUE, NIL); }
+ | do {inloop++;} stmt {--inloop;} WHILE '(' pattern ')' st
+ { $$ = stat2(DO, $3, notnull($7)); }
| EXIT pattern st { $$ = stat1(EXIT, $2); }
| EXIT st { $$ = stat1(EXIT, NIL); }
| for
@@ -324,12 +360,15 @@ stmt:
| if stmt { $$ = stat3(IF, $1, $2, NIL); }
| lbrace stmtlist rbrace { $$ = $2; }
| NEXT st { if (infunc)
- ERROR "next is illegal inside a function" SYNTAX;
+ SYNTAX("next is illegal inside a function");
$$ = stat1(NEXT, NIL); }
+ | NEXTFILE st { if (infunc)
+ SYNTAX("nextfile is illegal inside a function");
+ $$ = stat1(NEXTFILE, NIL); }
| RETURN pattern st { $$ = stat1(RETURN, $2); }
| RETURN st { $$ = stat1(RETURN, NIL); }
| simple_stmt st
- | while stmt { $$ = stat2(WHILE, $1, $2); }
+ | while {inloop++;} stmt { --inloop; $$ = stat2(WHILE, $1, $3); }
| ';' opt_nl { $$ = 0; }
;
@@ -343,31 +382,34 @@ subop:
;
term:
- term '+' term { $$ = op2(ADD, $1, $3); }
+ term '/' ASGNOP term { $$ = op2(DIVEQ, $1, $4); }
+ | term '+' term { $$ = op2(ADD, $1, $3); }
| term '-' term { $$ = op2(MINUS, $1, $3); }
| term '*' term { $$ = op2(MULT, $1, $3); }
| term '/' term { $$ = op2(DIVIDE, $1, $3); }
| term '%' term { $$ = op2(MOD, $1, $3); }
| term POWER term { $$ = op2(POWER, $1, $3); }
| '-' term %prec UMINUS { $$ = op1(UMINUS, $2); }
- | '+' term %prec UMINUS { $$ = $2; }
- | BLTIN '(' ')' { $$ = op2(BLTIN, (Node *) $1, rectonode()); }
- | BLTIN '(' patlist ')' { $$ = op2(BLTIN, (Node *) $1, $3); }
- | BLTIN { $$ = op2(BLTIN, (Node *) $1, rectonode()); }
- | CALL '(' ')' { $$ = op2(CALL, valtonode($1,CVAR), NIL); }
- | CALL '(' patlist ')' { $$ = op2(CALL, valtonode($1,CVAR), $3); }
+ | '+' term %prec UMINUS { $$ = op1(UPLUS, $2); }
+ | NOT term %prec UMINUS { $$ = op1(NOT, notnull($2)); }
+ | BLTIN '(' ')' { $$ = op2(BLTIN, itonp($1), rectonode()); }
+ | BLTIN '(' patlist ')' { $$ = op2(BLTIN, itonp($1), $3); }
+ | BLTIN { $$ = op2(BLTIN, itonp($1), rectonode()); }
+ | CALL '(' ')' { $$ = op2(CALL, celltonode($1,CVAR), NIL); }
+ | CALL '(' patlist ')' { $$ = op2(CALL, celltonode($1,CVAR), $3); }
+ | CLOSE term { $$ = op1(CLOSE, $2); }
| DECR var { $$ = op1(PREDECR, $2); }
| INCR var { $$ = op1(PREINCR, $2); }
| var DECR { $$ = op1(POSTDECR, $1); }
| var INCR { $$ = op1(POSTINCR, $1); }
- | GETLINE var LT term { $$ = op3(GETLINE, $2, (Node *)$3, $4); }
- | GETLINE LT term { $$ = op3(GETLINE, NIL, (Node *)$2, $3); }
+ | GETLINE var LT term { $$ = op3(GETLINE, $2, itonp($3), $4); }
+ | GETLINE LT term { $$ = op3(GETLINE, NIL, itonp($2), $3); }
| GETLINE var { $$ = op3(GETLINE, $2, NIL, NIL); }
| GETLINE { $$ = op3(GETLINE, NIL, NIL, NIL); }
| INDEX '(' pattern comma pattern ')'
{ $$ = op2(INDEX, $3, $5); }
| INDEX '(' pattern comma reg_expr ')'
- { ERROR "index() doesn't permit regular expressions" SYNTAX;
+ { SYNTAX("index() doesn't permit regular expressions");
$$ = op2(INDEX, $3, (Node*)$5); }
| '(' pattern ')' { $$ = $2; }
| MATCHFCN '(' pattern comma reg_expr ')'
@@ -377,7 +419,7 @@ term:
$$ = op3(MATCHFCN, NIL, $3, (Node*)makedfa(strnode($5), 1));
else
$$ = op3(MATCHFCN, (Node *)1, $3, $5); }
- | NUMBER { $$ = valtonode($1, CCON); }
+ | NUMBER { $$ = celltonode($1, CCON); }
| SPLIT '(' pattern comma varname comma pattern ')' /* string */
{ $$ = op4(SPLIT, $3, makearr($5), $7, (Node*)STRING); }
| SPLIT '(' pattern comma varname comma reg_expr ')' /* const /regexp/ */
@@ -385,7 +427,7 @@ term:
| SPLIT '(' pattern comma varname ')'
{ $$ = op4(SPLIT, $3, makearr($5), NIL, (Node*)STRING); } /* default */
| SPRINTF '(' patlist ')' { $$ = op1($1, $3); }
- | STRING { $$ = valtonode($1, CCON); }
+ | STRING { $$ = celltonode($1, CCON); }
| subop '(' reg_expr comma pattern ')'
{ $$ = op4($1, NIL, (Node*)makedfa($3, 1), $5, rectonode()); }
| subop '(' pattern comma pattern ')'
@@ -410,20 +452,21 @@ term:
var:
varname
| varname '[' patlist ']' { $$ = op2(ARRAY, makearr($1), $3); }
- | FIELD { $$ = valtonode($1, CFLD); }
- | IVAR { $$ = op1(INDIRECT, valtonode($1, CVAR)); }
- | INDIRECT term { $$ = op1(INDIRECT, $2); }
- ;
+ | IVAR { $$ = op1(INDIRECT, celltonode($1, CVAR)); }
+ | INDIRECT term { $$ = op1(INDIRECT, $2); }
+ ;
varlist:
/* nothing */ { arglist = $$ = 0; }
- | VAR { arglist = $$ = valtonode($1,CVAR); }
- | varlist comma VAR { arglist = $$ = linkum($1,valtonode($3,CVAR)); }
+ | VAR { arglist = $$ = celltonode($1,CVAR); }
+ | varlist comma VAR {
+ checkdup($1, $3);
+ arglist = $$ = linkum($1,celltonode($3,CVAR)); }
;
varname:
- VAR { $$ = valtonode($1, CVAR); }
- | ARG { $$ = op1(ARG, (Node *) $1); }
+ VAR { $$ = celltonode($1, CVAR); }
+ | ARG { $$ = op1(ARG, itonp($1)); }
| VARNF { $$ = op1(VARNF, (Node *) $1); }
;
@@ -438,20 +481,20 @@ static void
setfname(Cell *p)
{
if (isarr(p))
- ERROR "%s is an array, not a function", p->nval SYNTAX;
- else if (isfunc(p))
- ERROR "you can't define function %s more than once", p->nval SYNTAX;
+ SYNTAX("%s is an array, not a function", p->nval);
+ else if (isfcn(p))
+ SYNTAX("you can't define function %s more than once", p->nval);
curfname = p->nval;
+ p->tval |= FCN;
}
-
static int
constnode(Node *p)
{
- return p->ntype == NVALUE && ((Cell *) (p->narg[0]))->csub == CCON;
+ return isvalue(p) && ((Cell *) (p->narg[0]))->csub == CCON;
}
-static uchar *
+static char *
strnode(Node *p)
{
return ((Cell *)(p->narg[0]))->sval;
@@ -468,3 +511,15 @@ notnull(Node *n)
return op2(NE, n, nullnode);
}
}
+
+void
+checkdup(Node *vl, Cell *cp) /* check if name already in list */
+{
+ char *s = cp->nval;
+ for (; vl; vl = vl->nnext) {
+ if (strcmp(s, ((Cell *)(vl->narg[0]))->nval) == 0) {
+ SYNTAX("duplicate argument %s", s);
+ break;
+ }
+ }
+}
diff --git a/usr/src/cmd/awk/awk.h b/usr/src/cmd/awk/awk.h
index 987028fea2..dfbed45e9d 100644
--- a/usr/src/cmd/awk/awk.h
+++ b/usr/src/cmd/awk/awk.h
@@ -1,4 +1,28 @@
/*
+ * Copyright (C) Lucent Technologies 1997
+ * All Rights Reserved
+ *
+ * Permission to use, copy, modify, and distribute this software and
+ * its documentation for any purpose and without fee is hereby
+ * granted, provided that the above copyright notice appear in all
+ * copies and that both that the copyright notice and this
+ * permission notice and warranty disclaimer appear in supporting
+ * documentation, and that the name Lucent Technologies or any of
+ * its entities not be used in advertising or publicity pertaining
+ * to distribution of the software without specific, written prior
+ * permission.
+ *
+ * LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
+ * IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
+ * IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ * ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
+ * THIS SOFTWARE.
+ */
+
+/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
@@ -29,6 +53,7 @@
#ifndef AWK_H
#define AWK_H
+#include <assert.h>
#include <sys/types.h>
#include <ctype.h>
#include <stdio.h>
@@ -38,10 +63,15 @@
#include <limits.h>
typedef double Awkfloat;
-typedef unsigned char uchar;
-#define xfree(a) { if ((a) != NULL) { free(a); a = NULL; } }
+/* unsigned char is more trouble than it's worth */
+
+typedef unsigned char uschar;
+#define xfree(a) { if ((a) != NULL) { free((void *)(a)); (a) = NULL; } }
+
+/* guaranteed non-null for dprintf */
+#define NN(p) ((p) ? (p) : "(null)")
#define DEBUG
#ifdef DEBUG
/* uses have to be doubly parenthesized */
@@ -50,63 +80,56 @@ typedef unsigned char uchar;
#define dprintf(x)
#endif
-extern char errbuf[200];
-extern void error(int, char *);
-#define ERROR (void) snprintf(errbuf, sizeof (errbuf),
-/*CSTYLED*/
-#define FATAL ), error(1, errbuf)
-/*CSTYLED*/
-#define WARNING ), error(0, errbuf)
-/*CSTYLED*/
-#define SYNTAX ), yyerror(errbuf)
-/*CSTYLED*/
-#define CONT )
-
extern int compile_time; /* 1 if compiling, 0 if running */
+extern int safe; /* 0 => unsafe, 1 => safe */
#define FLD_INCR 64
#define LINE_INCR 256
+#define RECSIZE (8 * 1024) /* sets limit on records, fields, etc., etc. */
+extern size_t recsize; /* size of current record, orig RECSIZE */
/* ensure that there is extra 1 byte in the buffer */
#define expand_buf(p, n, r) \
if (*(n) == 0 || (r) >= (*(n) - 1)) r_expand_buf(p, n, r)
-extern uchar **FS;
-extern uchar **RS;
-extern uchar **ORS;
-extern uchar **OFS;
-extern uchar **OFMT;
+extern char **FS;
+extern char **RS;
+extern char **ORS;
+extern char **OFS;
+extern char **OFMT;
extern Awkfloat *NR;
extern Awkfloat *FNR;
extern Awkfloat *NF;
-extern uchar **FILENAME;
-extern uchar **SUBSEP;
+extern char **FILENAME;
+extern char **SUBSEP;
extern Awkfloat *RSTART;
extern Awkfloat *RLENGTH;
-extern uchar *record;
-extern size_t record_size;
-extern int errorflag;
+extern char *record; /* points to $0 */
+extern size_t recsize;
+extern int errorflag; /* 1 if error has occurred */
extern int donefld; /* 1 if record broken into fields */
extern int donerec; /* 1 if record is valid (no fld has changed */
-extern uchar *patbeg; /* beginning of pattern matched */
-extern int patlen; /* length. set in b.c */
+extern char *patbeg; /* beginning of pattern matched */
+extern int patlen; /* length of pattern matched. set in b.c */
/* Cell: all information about a variable or constant */
typedef struct Cell {
- uchar ctype; /* OCELL, OBOOL, OJUMP, etc. */
- uchar csub; /* CCON, CTEMP, CFLD, etc. */
- uchar *nval; /* name, for variables only */
- uchar *sval; /* string value */
+ uschar ctype; /* OCELL, OBOOL, OJUMP, etc. */
+ uschar csub; /* CCON, CTEMP, CFLD, etc. */
+ char *nval; /* name, for variables only */
+ char *sval; /* string value */
Awkfloat fval; /* value as number */
- unsigned tval;
- /* type info: STR|NUM|ARR|FCN|FLD|CON|DONTFREE */
+ int tval;
+ /* type info: STR|NUM|ARR|FCN|FLD|CON|DONTFREE|CONVC|CONVO */
+ char *fmt;
+ /* CONVFMT/OFMT value used to convert from number */
struct Cell *cnext; /* ptr to next if chained */
} Cell;
-typedef struct { /* symbol table array */
+typedef struct Array { /* symbol table array */
int nelem; /* elements in table right now */
int size; /* size of tab */
Cell **tab; /* hash table pointers */
@@ -114,15 +137,23 @@ typedef struct { /* symbol table array */
#define NSYMTAB 50 /* initial size of a symbol table */
extern Array *symtab, *makesymtab(int);
-extern Cell *setsymtab(uchar *, uchar *, Awkfloat, unsigned int, Array *);
-extern Cell *lookup(uchar *, Array *);
+extern Cell *setsymtab(const char *, const char *, Awkfloat,
+ unsigned int, Array *);
+extern Cell *lookup(const char *, Array *);
extern Cell *recloc; /* location of input record */
extern Cell *nrloc; /* NR */
extern Cell *fnrloc; /* FNR */
+extern Cell *fsloc; /* FS */
extern Cell *nfloc; /* NF */
+extern Cell *ofsloc; /* OFS */
+extern Cell *orsloc; /* ORS */
+extern Cell *rsloc; /* RS */
+extern Cell *rtloc; /* RT */
extern Cell *rstartloc; /* RSTART */
extern Cell *rlengthloc; /* RLENGTH */
+extern Cell *subseploc; /* SUBSEP */
+extern Cell *symtabloc; /* SYMTAB */
/* Cell.tval values: */
#define NUM 01 /* number value is valid */
@@ -133,17 +164,18 @@ extern Cell *rlengthloc; /* RLENGTH */
#define FCN 040 /* this is a function name */
#define FLD 0100 /* this is a field $1, $2, ... */
#define REC 0200 /* this is $0 */
+#define CONVC 0400 /* string was converted from number via CONVFMT */
+#define CONVO 01000 /* string was converted from number via OFMT */
-#define freeable(p) (!((p)->tval & DONTFREE))
-extern Awkfloat setfval(Cell *, Awkfloat), getfval(Cell *), r_getfval(Cell *);
-extern uchar *setsval(Cell *, uchar *), *getsval(Cell *), *r_getsval(Cell *);
-extern uchar *tostring(uchar *), *tokname(int), *qstring(uchar *, int);
-
-#define getfval(p) \
- (((p)->tval & (ARR|FLD|REC|NUM)) == NUM ? (p)->fval : r_getfval(p))
-#define getsval(p) \
- (((p)->tval & (ARR|FLD|REC|STR)) == STR ? (p)->sval : r_getsval(p))
+extern Awkfloat setfval(Cell *, Awkfloat);
+extern Awkfloat getfval(Cell *);
+extern char *setsval(Cell *, const char *);
+extern char *getsval(Cell *);
+extern char *getpssval(Cell *); /* for print */
+extern char *tostring(const char *);
+extern char *tokname(int);
+extern char *qstring(const char *, int);
/* function types */
#define FLENGTH 1
@@ -159,15 +191,16 @@ extern uchar *tostring(uchar *), *tokname(int), *qstring(uchar *, int);
#define FATAN 11
#define FTOUPPER 12
#define FTOLOWER 13
+#define FFLUSH 14
/* Node: parse tree is made of nodes, with Cell's at bottom */
typedef struct Node {
int ntype;
struct Node *nnext;
- off_t lineno;
+ off_t lineno;
int nobj;
- struct Node *narg[1];
+ struct Node *narg[1];
/* variable: actual size set by calling malloc */
} Node;
@@ -190,6 +223,7 @@ extern Node *nullnode;
#define CNAME 3
#define CVAR 2
#define CFLD 1
+#define CUNK 0
/* bool subtypes */
#define BTRUE 11
@@ -201,6 +235,7 @@ extern Node *nullnode;
#define JBREAK 23
#define JCONT 24
#define JRET 25
+#define JNEXTFILE 26
/* node types */
#define NVALUE 1
@@ -210,7 +245,7 @@ extern Node *nullnode;
extern Cell *(*proctab[])(Node **, int);
extern Cell *nullproc(Node **, int);
-extern int pairstack[], paircnt;
+extern int *pairstack, paircnt;
extern Node *stat1(int, Node *), *stat2(int, Node *, Node *);
extern Node *stat3(int, Node *, Node *, Node *);
@@ -219,7 +254,7 @@ extern Node *pa2stat(Node *, Node *, Node *);
extern Node *op1(int, Node *), *op2(int, Node *, Node *);
extern Node *op3(int, Node *, Node *, Node *);
extern Node *op4(int, Node *, Node *, Node *, Node *);
-extern Node *linkum(Node *, Node *), *valtonode(Cell *, int);
+extern Node *linkum(Node *, Node *), *celltonode(Cell *, int);
extern Node *rectonode(void), *exptostat(Node *);
extern Node *makearr(Node *);
@@ -231,89 +266,124 @@ extern Node *makearr(Node *);
#define isexit(n) ((n)->csub == JEXIT)
#define isbreak(n) ((n)->csub == JBREAK)
#define iscont(n) ((n)->csub == JCONT)
-#define isnext(n) ((n)->csub == JNEXT)
+#define isnext(n) ((n)->csub == JNEXT || (n)->csub == JNEXTFILE)
#define isret(n) ((n)->csub == JRET)
+#define isrec(n) ((n)->tval & REC)
+#define isfld(n) ((n)->tval & FLD)
#define isstr(n) ((n)->tval & STR)
#define isnum(n) ((n)->tval & NUM)
#define isarr(n) ((n)->tval & ARR)
-#define isfunc(n) ((n)->tval & FCN)
+#define isfcn(n) ((n)->tval & FCN)
#define istrue(n) ((n)->csub == BTRUE)
#define istemp(n) ((n)->csub == CTEMP)
+#define freeable(p) (((p)->tval & (STR|DONTFREE)) == STR)
+
+/* structures used by regular expression matching machinery, mostly b.c: */
-#define NCHARS (256+1)
+/* 256 handles 8-bit chars; 128 does 7-bit */
+/* watch out in match(), etc. */
+#define NCHARS (256+3)
#define NSTATES 32
typedef struct rrow {
- int ltype;
- int lval;
+ long ltype; /* long avoids pointer warnings on 64-bit */
+ union {
+ int i;
+ Node *np;
+ uschar *up;
+ } lval; /* because Al stores a pointer in it! */
int *lfollow;
} rrow;
typedef struct fa {
- uchar *restr;
+ uschar gototab[NSTATES][NCHARS];
+ uschar out[NSTATES];
+ uschar *restr;
+ int *posns[NSTATES];
int anchor;
int use;
- uchar gototab[NSTATES][NCHARS];
- int *posns[NSTATES];
- uchar out[NSTATES];
int initstat;
int curstat;
int accept;
int reset;
+ /* re is variable: actual size set by calling malloc */
struct rrow re[1];
} fa;
+/* lex.c */
+extern int yylex(void);
+extern void startreg(void);
+extern int input(void);
+extern void unput(int);
+extern void unputstr(const char *);
+extern int yylook(void);
+extern int yyback(int *, int);
+extern int yyinput(void);
+
+/* parse.c */
+extern void defn(Cell *, Node *, Node *);
+extern int ptoi(void *);
+extern Node *itonp(int);
+extern int isarg(const char *);
+
/* b.c */
-extern fa *makedfa(uchar *, int);
-extern int nematch(fa *, uchar *);
-extern int match(fa *, uchar *);
-extern int pmatch(fa *, uchar *);
+extern fa *makedfa(const char *, int);
+extern int nematch(fa *, const char *);
+extern int match(fa *, const char *);
+extern int pmatch(fa *, const char *);
/* lib.c */
-extern int isclvar(uchar *);
-extern int is_number(uchar *);
-extern void setclvar(uchar *);
-extern int readrec(uchar **, size_t *, FILE *);
+
+extern void SYNTAX(const char *, ...);
+extern void FATAL(const char *, ...) __attribute__((__noreturn__));
+extern void WARNING(const char *, ...);
+extern void error(void);
+extern void nextfile(void);
+extern void savefs(void);
+
+extern int isclvar(const char *);
+extern int is_number(const char *);
+extern void setclvar(char *);
+extern int readrec(char **, size_t *, FILE *);
extern void bracecheck(void);
+extern void recinit(unsigned int n);
extern void syminit(void);
-extern void yyerror(char *);
+extern void yyerror(const char *);
extern void fldbld(void);
extern void recbld(void);
-extern int getrec(uchar **, size_t *);
+extern int getrec(char **, size_t *, int);
extern Cell *fieldadr(int);
extern void newfld(int);
-extern Cell *getfld(int);
extern int fldidx(Cell *);
-extern double errcheck(double, char *);
+extern double errcheck(double, const char *);
extern void fpecatch(int);
-extern void init_buf(uchar **, size_t *, size_t);
-extern void adjust_buf(uchar **, size_t);
-extern void r_expand_buf(uchar **, size_t *, size_t);
-
-extern int donefld;
-extern int donerec;
-extern uchar *record;
-extern size_t record_size;
+extern void r_expand_buf(char **, size_t *, size_t);
+extern void makefields(int, int);
+extern void growfldtab(int n);
+extern void setlastfld(int n);
/* main.c */
extern int dbg;
-extern uchar *cmdname;
-extern uchar *lexprog;
+extern char *lexprog;
extern int compile_time;
-extern char radixpoint;
+extern char *cursource(void);
+extern int pgetc(void);
/* tran.c */
extern void syminit(void);
-extern void arginit(int, uchar **);
-extern void envinit(uchar **);
+extern void arginit(int, char **);
+extern void envinit(char **);
extern void freesymtab(Cell *);
-extern void freeelem(Cell *, uchar *);
-extern void funnyvar(Cell *, char *);
-extern int hash(uchar *, int);
+extern void freeelem(Cell *, const char *);
+extern void funnyvar(Cell *, const char *);
+extern int hash(const char *, int);
extern Awkfloat *ARGC;
/* run.c */
-extern void run(Node *);
+extern void run(Node *);
+extern const char *filename(FILE *);
+extern int adjbuf(char **pb, size_t *sz, size_t min, size_t q,
+ char **pbp, const char *what);
extern int paircnt;
extern Node *winner;
@@ -336,7 +406,7 @@ extern Cell *substr(Node **, int);
extern Cell *sub(Node **, int);
extern Cell *gsub(Node **, int);
extern Cell *sindex(Node **, int);
-extern Cell *a_sprintf(Node **, int);
+extern Cell *awksprintf(Node **, int);
extern Cell *arith(Node **, int);
extern Cell *incrdecr(Node **, int);
extern Cell *cat(Node **, int);
@@ -344,10 +414,10 @@ extern Cell *pastat(Node **, int);
extern Cell *dopa2(Node **, int);
extern Cell *matchop(Node **, int);
extern Cell *intest(Node **, int);
-extern Cell *aprintf(Node **, int);
-extern Cell *print(Node **, int);
+extern Cell *awkprintf(Node **, int);
+extern Cell *printstat(Node **, int);
extern Cell *closefile(Node **, int);
-extern Cell *delete(Node **, int);
+extern Cell *awkdelete(Node **, int);
extern Cell *split(Node **, int);
extern Cell *assign(Node **, int);
extern Cell *condexpr(Node **, int);
@@ -361,6 +431,6 @@ extern Cell *bltin(Node **, int);
extern Cell *call(Node **, int);
extern Cell *arg(Node **, int);
extern Cell *getnf(Node **, int);
-extern Cell *getaline(Node **, int);
+extern Cell *awkgetline(Node **, int);
#endif /* AWK_H */
diff --git a/usr/src/cmd/awk/awk.lx.l b/usr/src/cmd/awk/awk.lx.l
deleted file mode 100644
index a7e8185832..0000000000
--- a/usr/src/cmd/awk/awk.lx.l
+++ /dev/null
@@ -1,306 +0,0 @@
-%{
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
-/* All Rights Reserved */
-%}
-
-%{
-#pragma ident "%Z%%M% %I% %E% SMI"
-%}
-
-%Start A str sc reg comment
-
-%{
-
-#include <sys/types.h>
-#include "awk.h"
-#include "y.tab.h"
-
-#undef input /* defeat lex */
-#undef unput
-
-static void unput(int);
-static void unputstr(char *);
-
-extern YYSTYPE yylval;
-extern int infunc;
-
-off_t lineno = 1;
-int bracecnt = 0;
-int brackcnt = 0;
-int parencnt = 0;
-#define DEBUG
-#ifdef DEBUG
-# define RET(x) {if(dbg)printf("lex %s [%s]\n", tokname(x), yytext); return(x); }
-#else
-# define RET(x) return(x)
-#endif
-
-/*
- * The standards (SUSV2) requires that Record size be atleast LINE_MAX.
- * LINE_MAX is a standard variable defined in limits.h.
- * Though nawk is not standards compliant, we let RECSIZE
- * grow with LINE_MAX instead of the magic number 1024.
- */
-#define CBUFLEN (3 * LINE_MAX)
-
-#define CADD cbuf[clen++] = yytext[0]; \
- if (clen >= CBUFLEN-1) { \
- ERROR "string/reg expr %.10s... too long", cbuf SYNTAX; \
- BEGIN A; \
- }
-
-static uchar cbuf[CBUFLEN];
-static uchar *s;
-static int clen, cflag;
-%}
-
-A [a-zA-Z_]
-B [a-zA-Z0-9_]
-D [0-9]
-O [0-7]
-H [0-9a-fA-F]
-WS [ \t]
-
-%%
- switch (yybgin-yysvec-1) { /* witchcraft */
- case 0:
- BEGIN A;
- break;
- case sc:
- BEGIN A;
- RET('}');
- }
-
-<A>\n { lineno++; RET(NL); }
-<A>#.* { ; } /* strip comments */
-<A>{WS}+ { ; }
-<A>; { RET(';'); }
-
-<A>"\\"\n { lineno++; }
-<A>BEGIN { RET(XBEGIN); }
-<A>END { RET(XEND); }
-<A>func(tion)? { if (infunc) ERROR "illegal nested function" SYNTAX; RET(FUNC); }
-<A>return { if (!infunc) ERROR "return not in function" SYNTAX; RET(RETURN); }
-<A>"&&" { RET(AND); }
-<A>"||" { RET(BOR); }
-<A>"!" { RET(NOT); }
-<A>"!=" { yylval.i = NE; RET(NE); }
-<A>"~" { yylval.i = MATCH; RET(MATCHOP); }
-<A>"!~" { yylval.i = NOTMATCH; RET(MATCHOP); }
-<A>"<" { yylval.i = LT; RET(LT); }
-<A>"<=" { yylval.i = LE; RET(LE); }
-<A>"==" { yylval.i = EQ; RET(EQ); }
-<A>">=" { yylval.i = GE; RET(GE); }
-<A>">" { yylval.i = GT; RET(GT); }
-<A>">>" { yylval.i = APPEND; RET(APPEND); }
-<A>"++" { yylval.i = INCR; RET(INCR); }
-<A>"--" { yylval.i = DECR; RET(DECR); }
-<A>"+=" { yylval.i = ADDEQ; RET(ASGNOP); }
-<A>"-=" { yylval.i = SUBEQ; RET(ASGNOP); }
-<A>"*=" { yylval.i = MULTEQ; RET(ASGNOP); }
-<A>"/=" { yylval.i = DIVEQ; RET(ASGNOP); }
-<A>"%=" { yylval.i = MODEQ; RET(ASGNOP); }
-<A>"^=" { yylval.i = POWEQ; RET(ASGNOP); }
-<A>"**=" { yylval.i = POWEQ; RET(ASGNOP); }
-<A>"=" { yylval.i = ASSIGN; RET(ASGNOP); }
-<A>"**" { RET(POWER); }
-<A>"^" { RET(POWER); }
-
-<A>"$"{D}+ { yylval.cp = fieldadr(atoi(yytext+1)); RET(FIELD); }
-<A>"$NF" { unputstr("(NF)"); return(INDIRECT); }
-<A>"$"{A}{B}* { int c, n;
- c = input(); unput(c);
- if (c == '(' || c == '[' || infunc && (n=isarg(yytext+1)) >= 0) {
- unputstr(yytext+1);
- return(INDIRECT);
- } else {
- yylval.cp = setsymtab((uchar *)yytext+1,
- (uchar *)"",0.0,STR|NUM,symtab);
- RET(IVAR);
- }
- }
-<A>"$" { RET(INDIRECT); }
-<A>NF { yylval.cp = setsymtab((uchar *)yytext, (uchar *)"", 0.0, NUM, symtab); RET(VARNF); }
-
-<A>({D}+("."?){D}*|"."{D}+)((e|E)("+"|-)?{D}+)? {
- yylval.cp = setsymtab((uchar *)yytext, tostring((uchar *)yytext), atof(yytext), CON|NUM, symtab);
- RET(NUMBER); }
-
-<A>while { RET(WHILE); }
-<A>for { RET(FOR); }
-<A>do { RET(DO); }
-<A>if { RET(IF); }
-<A>else { RET(ELSE); }
-<A>next { RET(NEXT); }
-<A>exit { RET(EXIT); }
-<A>break { RET(BREAK); }
-<A>continue { RET(CONTINUE); }
-<A>print { yylval.i = PRINT; RET(PRINT); }
-<A>printf { yylval.i = PRINTF; RET(PRINTF); }
-<A>sprintf { yylval.i = SPRINTF; RET(SPRINTF); }
-<A>split { yylval.i = SPLIT; RET(SPLIT); }
-<A>substr { RET(SUBSTR); }
-<A>sub { yylval.i = SUB; RET(SUB); }
-<A>gsub { yylval.i = GSUB; RET(GSUB); }
-<A>index { RET(INDEX); }
-<A>match { RET(MATCHFCN); }
-<A>in { RET(IN); }
-<A>getline { RET(GETLINE); }
-<A>close { RET(CLOSE); }
-<A>delete { RET(DELETE); }
-<A>length { yylval.i = FLENGTH; RET(BLTIN); }
-<A>log { yylval.i = FLOG; RET(BLTIN); }
-<A>int { yylval.i = FINT; RET(BLTIN); }
-<A>exp { yylval.i = FEXP; RET(BLTIN); }
-<A>sqrt { yylval.i = FSQRT; RET(BLTIN); }
-<A>sin { yylval.i = FSIN; RET(BLTIN); }
-<A>cos { yylval.i = FCOS; RET(BLTIN); }
-<A>atan2 { yylval.i = FATAN; RET(BLTIN); }
-<A>system { yylval.i = FSYSTEM; RET(BLTIN); }
-<A>rand { yylval.i = FRAND; RET(BLTIN); }
-<A>srand { yylval.i = FSRAND; RET(BLTIN); }
-<A>toupper { yylval.i = FTOUPPER; RET(BLTIN); }
-<A>tolower { yylval.i = FTOLOWER; RET(BLTIN); }
-
-<A>{A}{B}* { int n, c;
- c = input(); unput(c); /* look for '(' */
- if (c != '(' && infunc && (n=isarg(yytext)) >= 0) {
- yylval.i = n;
- RET(ARG);
- } else {
- yylval.cp = setsymtab((uchar *)yytext,
- (uchar *)"",0.0,STR|NUM,symtab);
- if (c == '(') {
- RET(CALL);
- } else {
- RET(VAR);
- }
- }
- }
-<A>\" { BEGIN str; clen = 0; }
-
-<A>"}" { if (--bracecnt < 0) ERROR "extra }" SYNTAX; BEGIN sc; RET(';'); }
-<A>"]" { if (--brackcnt < 0) ERROR "extra ]" SYNTAX; RET(']'); }
-<A>")" { if (--parencnt < 0) ERROR "extra )" SYNTAX; RET(')'); }
-
-<A>. { if (yytext[0] == '{') bracecnt++;
- else if (yytext[0] == '[') brackcnt++;
- else if (yytext[0] == '(') parencnt++;
- RET(yylval.i = yytext[0]); /* everything else */ }
-
-<reg>\\. { cbuf[clen++] = '\\'; cbuf[clen++] = yytext[1]; }
-<reg>\n { ERROR "newline in regular expression %.10s...", cbuf SYNTAX; lineno++; BEGIN A; }
-<reg>"/" { BEGIN A;
- cbuf[clen] = 0;
- yylval.s = tostring(cbuf);
- unput('/');
- RET(REGEXPR); }
-<reg>. { CADD; }
-
-<str>\" { BEGIN A;
- cbuf[clen] = 0; s = tostring(cbuf);
- cbuf[clen] = ' '; cbuf[++clen] = 0;
- yylval.cp = setsymtab(cbuf, s, 0.0, CON|STR, symtab);
- RET(STRING); }
-<str>\n { ERROR "newline in string %.10s...", cbuf SYNTAX; lineno++; BEGIN A; }
-<str>"\\\"" { cbuf[clen++] = '"'; }
-<str>"\\"n { cbuf[clen++] = '\n'; }
-<str>"\\"t { cbuf[clen++] = '\t'; }
-<str>"\\"f { cbuf[clen++] = '\f'; }
-<str>"\\"r { cbuf[clen++] = '\r'; }
-<str>"\\"b { cbuf[clen++] = '\b'; }
-<str>"\\"v { cbuf[clen++] = '\v'; } /* these ANSIisms may not be known by */
-<str>"\\"a { cbuf[clen++] = '\007'; } /* your compiler. hence 007 for bell */
-<str>"\\\\" { cbuf[clen++] = '\\'; }
-<str>"\\"({O}{O}{O}|{O}{O}|{O}) { int n;
- sscanf(yytext+1, "%o", &n); cbuf[clen++] = n; }
-<str>"\\"x({H}+) { int n; /* ANSI permits any number! */
- sscanf(yytext+2, "%x", &n); cbuf[clen++] = n; }
-<str>"\\". { cbuf[clen++] = yytext[1]; }
-<str>. { CADD; }
-
-%%
-
-void
-startreg()
-{
- BEGIN reg;
- clen = 0;
-}
-
-/* input() and unput() are transcriptions of the standard lex
- macros for input and output with additions for error message
- printing. God help us all if someone changes how lex works.
-*/
-
-uchar ebuf[300];
-uchar *ep = ebuf;
-
-int
-input(void)
-{
- register int c;
- extern uchar *lexprog;
-
- if (yysptr > yysbuf)
- c = U(*--yysptr);
- else if (lexprog != NULL) /* awk '...' */
- c = *lexprog++;
- else /* awk -f ... */
- c = pgetc();
- if (c == '\n')
- yylineno++;
- else if (c == EOF)
- c = 0;
- if (ep >= ebuf + sizeof ebuf)
- ep = ebuf;
- return *ep++ = c;
-}
-
-static void
-unput(int c)
-{
- yytchar = c;
- if (yytchar == '\n')
- yylineno--;
- *yysptr++ = yytchar;
- if (--ep < ebuf)
- ep = ebuf + sizeof(ebuf) - 1;
-}
-
-
-static void
-unputstr(char *s)
-{
- int i;
-
- for (i = strlen(s)-1; i >= 0; i--)
- unput(s[i]);
-}
diff --git a/usr/src/cmd/awk/b.c b/usr/src/cmd/awk/b.c
index 9caee4e9d3..adca0cb633 100644
--- a/usr/src/cmd/awk/b.c
+++ b/usr/src/cmd/awk/b.c
@@ -1,4 +1,28 @@
/*
+ * Copyright (C) Lucent Technologies 1997
+ * All Rights Reserved
+ *
+ * Permission to use, copy, modify, and distribute this software and
+ * its documentation for any purpose and without fee is hereby
+ * granted, provided that the above copyright notice appear in all
+ * copies and that both that the copyright notice and this
+ * permission notice and warranty disclaimer appear in supporting
+ * documentation, and that the name Lucent Technologies or any of
+ * its entities not be used in advertising or publicity pertaining
+ * to distribution of the software without specific, written prior
+ * permission.
+ *
+ * LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
+ * IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
+ * IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ * ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
+ * THIS SOFTWARE.
+ */
+
+/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
@@ -28,6 +52,8 @@
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
/* All Rights Reserved */
+/* lasciate ogne speranza, voi ch'intrate. */
+
#define DEBUG
#include "awk.h"
@@ -37,74 +63,86 @@
/* NCHARS is 2**n */
#define MAXLIN (3 * LINE_MAX)
-#define type(v) (v)->nobj
+#define type(v) (v)->nobj /* badly overloaded here */
+#define info(v) (v)->ntype /* badly overloaded here */
#define left(v) (v)->narg[0]
#define right(v) (v)->narg[1]
#define parent(v) (v)->nnext
#define LEAF case CCL: case NCCL: case CHAR: case DOT: case FINAL: case ALL:
+#define ELEAF case EMPTYRE: /* empty string in regexp */
#define UNARY case STAR: case PLUS: case QUEST:
/*
* encoding in tree Nodes:
- * leaf (CCL, NCCL, CHAR, DOT, FINAL, ALL):
+ * leaf (CCL, NCCL, CHAR, DOT, FINAL, ALL, EMPTYRE):
* left is index, right contains value or pointer to value
* unary (STAR, PLUS, QUEST): left is child, right is null
* binary (CAT, OR): left and right are children
* parent contains pointer to parent
*/
-int setvec[MAXLIN];
-int tmpset[MAXLIN];
-Node *point[MAXLIN];
+int *setvec;
+int *tmpset;
+int maxsetvec = 0;
int rtok; /* next token in current re */
int rlxval;
-uchar *rlxstr;
-uchar *prestr; /* current position in current re */
-uchar *lastre; /* origin of last re */
+static uschar *rlxstr;
+static uschar *prestr; /* current position in current re */
+static uschar *lastre; /* origin of last re */
static int setcnt;
static int poscnt;
-uchar *patbeg;
+char *patbeg;
int patlen;
#define NFA 20 /* cache this many dynamic fa's */
fa *fatab[NFA];
int nfatab = 0; /* entries in fatab */
-static fa *mkdfa(uchar *, int);
+static fa *mkdfa(const char *, int);
static int makeinit(fa *, int);
static void penter(Node *);
static void freetr(Node *);
-static void overflo(char *);
+static void overflo(const char *);
+static void growvec(const char *);
static void cfoll(fa *, Node *);
static void follow(Node *);
-static Node *reparse(uchar *);
+static Node *reparse(const char *);
static int relex(void);
static void freefa(fa *);
static int cgoto(fa *, int, int);
fa *
-makedfa(uchar *s, int anchor) /* returns dfa for reg expr s */
+makedfa(const char *s, int anchor) /* returns dfa for reg expr s */
{
int i, use, nuse;
fa *pfa;
+ static int now = 1;
+
+ if (setvec == NULL) { /* first time through any RE */
+ maxsetvec = MAXLIN;
+ setvec = (int *)malloc(maxsetvec * sizeof (int));
+ tmpset = (int *)malloc(maxsetvec * sizeof (int));
+ if (setvec == NULL || tmpset == NULL)
+ overflo("out of space initializing makedfa");
+ }
if (compile_time) /* a constant for sure */
return (mkdfa(s, anchor));
for (i = 0; i < nfatab; i++) { /* is it there already? */
if (fatab[i]->anchor == anchor &&
- strcmp((char *)fatab[i]->restr, (char *)s) == 0) {
- fatab[i]->use++;
+ strcmp((const char *)fatab[i]->restr, s) == 0) {
+ fatab[i]->use = now++;
return (fatab[i]);
}
}
pfa = mkdfa(s, anchor);
if (nfatab < NFA) { /* room for another */
fatab[nfatab] = pfa;
- fatab[nfatab]->use = 1;
+ fatab[nfatab]->use = now++;
nfatab++;
return (pfa);
}
@@ -117,13 +155,16 @@ makedfa(uchar *s, int anchor) /* returns dfa for reg expr s */
}
freefa(fatab[nuse]);
fatab[nuse] = pfa;
- pfa->use = 1;
+ pfa->use = now++;
return (pfa);
}
+/*
+ * does the real work of making a dfa
+ * anchor = 1 for anchored matches, else 0
+ */
fa *
-mkdfa(uchar *s, int anchor) /* does the real work of making a dfa */
- /* anchor = 1 for anchored matches, else 0 */
+mkdfa(const char *s, int anchor)
{
Node *p, *p1;
fa *f;
@@ -137,7 +178,7 @@ mkdfa(uchar *s, int anchor) /* does the real work of making a dfa */
poscnt = 0;
penter(p1); /* enter parent pointers and leaf indices */
if ((f = (fa *)calloc(1, sizeof (fa) + poscnt * sizeof (rrow))) == NULL)
- overflo("no room for fa");
+ overflo("out of space for fa");
/* penter has computed number of positions in re */
f->accept = poscnt-1;
cfoll(f, p1); /* set up follow sets */
@@ -151,14 +192,14 @@ mkdfa(uchar *s, int anchor) /* does the real work of making a dfa */
*f->posns[1] = 0;
f->initstat = makeinit(f, anchor);
f->anchor = anchor;
- f->restr = tostring(s);
+ f->restr = (uschar *)tostring(s);
return (f);
}
static int
makeinit(fa *f, int anchor)
{
- register int i, k;
+ int i, k;
f->curstat = 2;
f->out[2] = 0;
@@ -192,9 +233,10 @@ void
penter(Node *p) /* set up parent pointers and leaf indices */
{
switch (type(p)) {
+ ELEAF
LEAF
- left(p) = (Node *) poscnt;
- point[poscnt++] = p;
+ info(p) = poscnt;
+ poscnt++;
break;
UNARY
penter(left(p));
@@ -207,8 +249,8 @@ penter(Node *p) /* set up parent pointers and leaf indices */
parent(left(p)) = p;
parent(right(p)) = p;
break;
- default:
- ERROR "unknown type %d in penter", type(p) FATAL;
+ default: /* can't happen */
+ FATAL("can't happen: unknown type %d in penter", type(p));
break;
}
}
@@ -217,6 +259,7 @@ static void
freetr(Node *p) /* free parse tree */
{
switch (type(p)) {
+ ELEAF
LEAF
xfree(p);
break;
@@ -230,92 +273,168 @@ freetr(Node *p) /* free parse tree */
freetr(right(p));
xfree(p);
break;
- default:
- ERROR "unknown type %d in freetr", type(p) FATAL;
+ default: /* can't happen */
+ FATAL("can't happen: unknown type %d in freetr", type(p));
break;
}
}
-uchar *
-cclenter(uchar *p)
+static void
+growvec(const char *msg)
+{
+ maxsetvec *= 4;
+ setvec = (int *)realloc(setvec, maxsetvec * sizeof (int));
+ tmpset = (int *)realloc(tmpset, maxsetvec * sizeof (int));
+ if (setvec == NULL || tmpset == NULL)
+ overflo(msg);
+}
+
+/*
+ * in the parsing of regular expressions, metacharacters like . have
+ * to be seen literally; \056 is not a metacharacter.
+ */
+
+/*
+ * find and eval hex string at pp, return new p; only pick up one 8-bit
+ * byte (2 chars).
+ */
+int
+hexstr(uschar **pp)
+{
+ uschar *p;
+ int n = 0;
+ int i;
+
+ for (i = 0, p = (uschar *)*pp; i < 2 && isxdigit(*p); i++, p++) {
+ if (isdigit(*p))
+ n = 16 * n + *p - '0';
+ else if (*p >= 'a' && *p <= 'f')
+ n = 16 * n + *p - 'a' + 10;
+ else if (*p >= 'A' && *p <= 'F')
+ n = 16 * n + *p - 'A' + 10;
+ }
+ *pp = (uschar *)p;
+ return (n);
+}
+
+#define isoctdigit(c) ((c) >= '0' && (c) <= '7')
+
+/* pick up next thing after a \\ and increment *pp */
+int
+quoted(uschar **pp)
{
- register int i, c;
- uchar *op, *chars, *ret;
- size_t bsize;
+ uschar *p = *pp;
+ int c;
+
+ if ((c = *p++) == 't')
+ c = '\t';
+ else if (c == 'n')
+ c = '\n';
+ else if (c == 'f')
+ c = '\f';
+ else if (c == 'r')
+ c = '\r';
+ else if (c == 'b')
+ c = '\b';
+ else if (c == '\\')
+ c = '\\';
+ else if (c == 'x') { /* hexadecimal goo follows */
+ c = hexstr(&p); /* this adds a null if number is invalid */
+ } else if (isoctdigit(c)) { /* \d \dd \ddd */
+ int n = c - '0';
+ if (isoctdigit(*p)) {
+ n = 8 * n + *p++ - '0';
+ if (isoctdigit(*p))
+ n = 8 * n + *p++ - '0';
+ }
+ c = n;
+ } /* else */
+ /* c = c; */
+ *pp = p;
+ return (c);
+}
+
+char *
+cclenter(const char *argp) /* add a character class */
+{
+ int i, c, c2;
+ uschar *p = (uschar *)argp;
+ uschar *op, *bp;
+ static uschar *buf = NULL;
+ static size_t bufsz = 100;
- init_buf(&chars, &bsize, LINE_INCR);
op = p;
- i = 0;
- while ((c = *p++) != 0) {
+ if (buf == NULL && (buf = (uschar *)malloc(bufsz)) == NULL)
+ FATAL("out of space for character class [%.10s...] 1", p);
+ bp = buf;
+ for (i = 0; (c = *p++) != 0; ) {
if (c == '\\') {
- if ((c = *p++) == 't')
- c = '\t';
- else if (c == 'n')
- c = '\n';
- else if (c == 'f')
- c = '\f';
- else if (c == 'r')
- c = '\r';
- else if (c == 'b')
- c = '\b';
- else if (c == '\\')
- c = '\\';
- else if (isdigit(c)) {
- int n = c - '0';
- if (isdigit(*p)) {
- n = 8 * n + *p++ - '0';
- if (isdigit(*p))
- n = 8 * n + *p++ - '0';
- }
- c = n;
- } /* else */
- /* c = c; */
- } else if (c == '-' && i > 0 && chars[i-1] != 0) {
+ c = quoted(&p);
+ } else if (c == '-' && i > 0 && bp[-1] != 0) {
if (*p != 0) {
- c = chars[i-1];
- while ((uchar)c < *p) { /* fails if *p is \\ */
- expand_buf(&chars, &bsize, i);
- chars[i++] = ++c;
+ c = bp[-1];
+ c2 = *p++;
+ if (c2 == '\\')
+ c2 = quoted(&p);
+ if (c > c2) { /* empty; ignore */
+ bp--;
+ i--;
+ continue;
+ }
+ while (c < c2) {
+ if (!adjbuf((char **)&buf, &bufsz,
+ bp-buf+2, 100, (char **)&bp,
+ "cclenter1")) {
+ FATAL(
+ "out of space for character class [%.10s...] 2", p);
+ }
+ *bp++ = ++c;
+ i++;
}
- p++;
continue;
}
}
- expand_buf(&chars, &bsize, i);
- chars[i++] = c;
+ if (!adjbuf((char **)&buf, &bufsz, bp-buf+2, 100, (char **)&bp,
+ "cclenter2"))
+ FATAL(
+ "out of space for character class [%.10s...] 3", p);
+ *bp++ = c;
+ i++;
}
- chars[i++] = '\0';
- dprintf(("cclenter: in = |%s|, out = |%s|\n", op, chars));
+ *bp = '\0';
+ dprintf(("cclenter: in = |%s|, out = |%s|\n", op, buf));
xfree(op);
- ret = tostring(chars);
- free(chars);
- return (ret);
+ return ((char *)tostring((char *)buf));
}
static void
-overflo(char *s)
+overflo(const char *s)
{
- ERROR "regular expression too big: %s", gettext((char *)s) FATAL;
+ FATAL("regular expression too big: %.30s...", gettext((char *)s));
}
/* enter follow set of each leaf of vertex v into lfollow[leaf] */
static void
cfoll(fa *f, Node *v)
{
- register int i;
- register int *p;
+ int i;
+ int *p;
switch (type(v)) {
+ ELEAF
LEAF
- f->re[(int)left(v)].ltype = type(v);
- f->re[(int)left(v)].lval = (int)right(v);
+ f->re[info(v)].ltype = type(v);
+ f->re[info(v)].lval.np = right(v);
+ while (f->accept >= maxsetvec) { /* guessing here! */
+ growvec("out of space in cfoll()");
+ }
for (i = 0; i <= f->accept; i++)
setvec[i] = 0;
setcnt = 0;
follow(v); /* computes setvec and setcnt */
if ((p = (int *)calloc(1, (setcnt+1) * sizeof (int))) == NULL)
- overflo("follow set overflow");
- f->re[(int)left(v)].lfollow = p;
+ overflo("out of space building follow set");
+ f->re[info(v)].lfollow = p;
*p = setcnt;
for (i = f->accept; i >= 0; i--) {
if (setvec[i] == 1)
@@ -330,8 +449,8 @@ cfoll(fa *f, Node *v)
cfoll(f, left(v));
cfoll(f, right(v));
break;
- default:
- ERROR "unknown type %d in cfoll", type(v) FATAL;
+ default: /* can't happen */
+ FATAL("can't happen: unknown type %d in cfoll", type(v));
}
}
@@ -342,15 +461,25 @@ cfoll(fa *f, Node *v)
static int
first(Node *p)
{
- register int b;
+ int b, lp;
switch (type(p)) {
+ ELEAF
LEAF
- if (setvec[(int)left(p)] != 1) {
- setvec[(int)left(p)] = 1;
+ lp = info(p); /* look for high-water mark of subscripts */
+ while (setcnt >= maxsetvec || lp >= maxsetvec) {
+ /* guessing here! */
+ growvec("out of space in first()");
+ }
+ if (type(p) == EMPTYRE) {
+ setvec[lp] = 0;
+ return (0);
+ }
+ if (setvec[lp] != 1) {
+ setvec[lp] = 1;
setcnt++;
}
- if (type(p) == CCL && (*(uchar *)right(p)) == '\0')
+ if (type(p) == CCL && (*(char *)right(p)) == '\0')
return (0); /* empty CCL */
else
return (1);
@@ -372,8 +501,7 @@ first(Node *p)
return (0);
return (1);
}
- ERROR "unknown type %d in first", type(p) FATAL;
- return (-1);
+ FATAL("can't happen: unknown type %d in first", type(p));
}
/* collects leaves that can follow v into setvec */
@@ -407,14 +535,16 @@ follow(Node *v)
follow(p);
return;
default:
- ERROR "unknown type %d in follow", type(p) FATAL;
+ FATAL("unknown type %d in follow", type(p));
break;
}
}
static int
-member(uchar c, uchar *s) /* is c in s? */
+member(int c, const char *sarg) /* is c in s? */
{
+ uschar *s = (uschar *)sarg;
+
while (*s)
if (c == *s++)
return (1);
@@ -423,9 +553,10 @@ member(uchar c, uchar *s) /* is c in s? */
int
-match(fa *f, uchar *p)
+match(fa *f, const char *p0) /* shortest match ? */
{
- register int s, ns;
+ int s, ns;
+ uschar *p = (uschar *)p0;
s = f->reset ? makeinit(f, 0) : f->initstat;
if (f->out[s])
@@ -442,10 +573,11 @@ match(fa *f, uchar *p)
}
int
-pmatch(fa *f, uchar *p)
+pmatch(fa *f, const char *p0) /* longest match, for sub */
{
- register int s, ns;
- register uchar *q;
+ int s, ns;
+ uschar *p = (uschar *)p0;
+ uschar *q;
int i, k;
if (f->reset) {
@@ -453,7 +585,7 @@ pmatch(fa *f, uchar *p)
} else {
s = f->initstat;
}
- patbeg = p;
+ patbeg = (char *)p;
patlen = -1;
do {
q = p;
@@ -466,16 +598,17 @@ pmatch(fa *f, uchar *p)
s = cgoto(f, s, *q);
if (s == 1) { /* no transition */
if (patlen >= 0) {
- patbeg = p;
+ patbeg = (char *)p;
return (1);
- } else
+ } else {
goto nextin; /* no match */
+ }
}
} while (*q++ != 0);
if (f->out[s])
patlen = q - p - 1; /* don't count $ */
if (patlen >= 0) {
- patbeg = p;
+ patbeg = (char *)p;
return (1);
}
nextin:
@@ -485,7 +618,7 @@ pmatch(fa *f, uchar *p)
xfree(f->posns[i]);
k = *f->posns[0];
if ((f->posns[2] =
- (int *)calloc(1, (k + 1) * sizeof (int))) == NULL) {
+ (int *)calloc(k + 1, sizeof (int))) == NULL) {
overflo("out of space in pmatch");
}
for (i = 0; i <= k; i++)
@@ -500,10 +633,11 @@ pmatch(fa *f, uchar *p)
}
int
-nematch(fa *f, uchar *p)
+nematch(fa *f, const char *p0) /* non-empty match, for sub */
{
- register int s, ns;
- register uchar *q;
+ int s, ns;
+ uschar *p = (uschar *)p0;
+ uschar *q;
int i, k;
if (f->reset) {
@@ -523,7 +657,7 @@ nematch(fa *f, uchar *p)
s = cgoto(f, s, *q);
if (s == 1) { /* no transition */
if (patlen > 0) {
- patbeg = p;
+ patbeg = (char *)p;
return (1);
} else
goto nnextin; /* no nonempty match */
@@ -532,7 +666,7 @@ nematch(fa *f, uchar *p)
if (f->out[s])
patlen = q - p - 1; /* don't count $ */
if (patlen > 0) {
- patbeg = p;
+ patbeg = (char *)p;
return (1);
}
nnextin:
@@ -542,7 +676,7 @@ nematch(fa *f, uchar *p)
xfree(f->posns[i]);
k = *f->posns[0];
if ((f->posns[2] =
- (int *)calloc(1, (k + 1) * sizeof (int))) == NULL) {
+ (int *)calloc(k + 1, sizeof (int))) == NULL) {
overflo("out of state space");
}
for (i = 0; i <= k; i++)
@@ -560,31 +694,31 @@ nematch(fa *f, uchar *p)
static Node *regexp(void), *primary(void), *concat(Node *);
static Node *alt(Node *), *unary(Node *);
+/* parses regular expression pointed to by p */
+/* uses relex() to scan regular expression */
static Node *
-reparse(uchar *p)
+reparse(const char *p)
{
- /* parses regular expression pointed to by p */
- /* uses relex() to scan regular expression */
Node *np;
dprintf(("reparse <%s>\n", p));
- lastre = prestr = p; /* prestr points to string to be parsed */
+
+ /* prestr points to string to be parsed */
+ lastre = prestr = (uschar *)p;
rtok = relex();
- if (rtok == '\0')
- ERROR "empty regular expression" FATAL;
- np = regexp();
+ /* GNU compatibility: an empty regexp matches anything */
if (rtok == '\0') {
- return (np);
- } else {
- ERROR "syntax error in regular expression %s at %s",
- lastre, prestr FATAL;
+ return (op2(EMPTYRE, NIL, NIL));
}
- /*NOTREACHED*/
- return (NULL);
+ np = regexp();
+ if (rtok != '\0')
+ FATAL("syntax error in regular expression %s at %s",
+ lastre, prestr);
+ return (np);
}
static Node *
-regexp(void)
+regexp(void) /* top-level parse of reg expr */
{
return (alt(concat(primary())));
}
@@ -596,28 +730,31 @@ primary(void)
switch (rtok) {
case CHAR:
- np = op2(CHAR, NIL, (Node *)rlxval);
+ np = op2(CHAR, NIL, itonp(rlxval));
rtok = relex();
return (unary(np));
case ALL:
rtok = relex();
return (unary(op2(ALL, NIL, NIL)));
+ case EMPTYRE:
+ rtok = relex();
+ return (unary(op2(ALL, NIL, NIL)));
case DOT:
rtok = relex();
return (unary(op2(DOT, NIL, NIL)));
case CCL:
/*LINTED align*/
- np = op2(CCL, NIL, (Node *)cclenter(rlxstr));
+ np = op2(CCL, NIL, (Node *)cclenter((char *)rlxstr));
rtok = relex();
return (unary(np));
case NCCL:
/*LINTED align*/
- np = op2(NCCL, NIL, (Node *)cclenter(rlxstr));
+ np = op2(NCCL, NIL, (Node *)cclenter((char *)rlxstr));
rtok = relex();
return (unary(np));
case '^':
rtok = relex();
- return (unary(op2(CHAR, NIL, (Node *)HAT)));
+ return (unary(op2(CHAR, NIL, itonp(HAT))));
case '$':
rtok = relex();
return (unary(op2(CHAR, NIL, NIL)));
@@ -627,20 +764,20 @@ primary(void)
rtok = relex();
return (unary(op2(CCL, NIL,
/*LINTED align*/
- (Node *)tostring((uchar *)""))));
+ (Node *)tostring(""))));
}
np = regexp();
if (rtok == ')') {
rtok = relex();
return (unary(np));
} else {
- ERROR "syntax error in regular expression %s at %s",
- lastre, prestr FATAL;
+ FATAL("syntax error in regular expression %s at %s",
+ lastre, prestr);
}
/* FALLTHROUGH */
default:
- ERROR "illegal primary in regular expression %s at %s",
- lastre, prestr FATAL;
+ FATAL("illegal primary in regular expression %s at %s",
+ lastre, prestr);
}
/*NOTREACHED*/
return (NULL);
@@ -650,7 +787,14 @@ static Node *
concat(Node *np)
{
switch (rtok) {
- case CHAR: case DOT: case ALL: case CCL: case NCCL: case '$': case '(':
+ case EMPTYRE:
+ case CHAR:
+ case DOT:
+ case ALL:
+ case CCL:
+ case NCCL:
+ case '$':
+ case '(':
return (concat(op2(CAT, np, primary())));
default:
return (np);
@@ -685,12 +829,48 @@ unary(Node *np)
}
}
+/*
+ * Character class definitions conformant to the POSIX locale as
+ * defined in IEEE P1003.1 draft 7 of June 2001, assuming the source
+ * and operating character sets are both ASCII (ISO646) or supersets
+ * thereof.
+ *
+ * Note that to avoid overflowing the temporary buffer used in
+ * relex(), the expanded character class (prior to range expansion)
+ * must be less than twice the size of their full name.
+ */
+
+struct charclass {
+ const char *cc_name;
+ int cc_namelen;
+ int (*cc_func)(int);
+} charclasses[] = {
+ { "alnum", 5, isalnum },
+ { "alpha", 5, isalpha },
+ { "blank", 5, isblank },
+ { "cntrl", 5, iscntrl },
+ { "digit", 5, isdigit },
+ { "graph", 5, isgraph },
+ { "lower", 5, islower },
+ { "print", 5, isprint },
+ { "punct", 5, ispunct },
+ { "space", 5, isspace },
+ { "upper", 5, isupper },
+ { "xdigit", 6, isxdigit },
+ { NULL, 0, NULL },
+};
+
+
static int
relex(void) /* lexical analyzer for reparse */
{
- register int c;
- uchar *cbuf;
- int clen, cflag;
+ int c, n;
+ int cflag;
+ static uschar *buf = 0;
+ static size_t bufsz = 100;
+ uschar *bp;
+ struct charclass *cc;
+ int i;
switch (c = *prestr++) {
case '|': return OR;
@@ -705,64 +885,82 @@ relex(void) /* lexical analyzer for reparse */
case ')':
return (c);
case '\\':
- if ((c = *prestr++) == 't')
- c = '\t';
- else if (c == 'n')
- c = '\n';
- else if (c == 'f')
- c = '\f';
- else if (c == 'r')
- c = '\r';
- else if (c == 'b')
- c = '\b';
- else if (c == '\\')
- c = '\\';
- else if (isdigit(c)) {
- int n = c - '0';
- if (isdigit(*prestr)) {
- n = 8 * n + *prestr++ - '0';
- if (isdigit(*prestr))
- n = 8 * n + *prestr++ - '0';
- }
- c = n;
- } /* else it's now in c */
- rlxval = c;
+ rlxval = quoted(&prestr);
return (CHAR);
default:
rlxval = c;
return (CHAR);
case '[':
- clen = 0;
+ if (buf == NULL && (buf = (uschar *)malloc(bufsz)) == NULL)
+ FATAL("out of space in reg expr %.10s..", lastre);
+ bp = buf;
if (*prestr == '^') {
cflag = 1;
prestr++;
} else
cflag = 0;
- init_buf(&cbuf, NULL, strlen((char *)prestr) * 2 + 1);
+ n = 2 * strlen((const char *)prestr) + 1;
+ if (!adjbuf((char **)&buf, &bufsz, n, n, (char **)&bp,
+ "relex1"))
+ FATAL("out of space for reg expr %.10s...", lastre);
for (;;) {
if ((c = *prestr++) == '\\') {
- cbuf[clen++] = '\\';
+ *bp++ = '\\';
if ((c = *prestr++) == '\0') {
- ERROR
- "nonterminated character class %s", lastre FATAL;
+ FATAL("nonterminated character class "
+ "%.20s...", lastre);
+ }
+ *bp++ = c;
+ } else if (c == '[' && *prestr == ':') {
+ /*
+ * Handle POSIX character class names.
+ * Dag-Erling Smorgrav, des@ofug.org
+ */
+ for (cc = charclasses; cc->cc_name; cc++)
+ if (strncmp((const char *)prestr + 1,
+ (const char *)cc->cc_name,
+ cc->cc_namelen) == 0)
+ break;
+
+ if (cc->cc_name == NULL ||
+ prestr[1 + cc->cc_namelen] != ':' ||
+ prestr[2 + cc->cc_namelen] != ']') {
+ *bp++ = c;
+ continue;
}
- cbuf[clen++] = c;
+
+ prestr += cc->cc_namelen + 3;
+ /*
+ * BUG: We begin at 1, instead of 0, since we
+ * would otherwise prematurely terminate the
+ * string for classes like [[:cntrl:]]. This
+ * means that we can't match the NUL character,
+ * not without first adapting the entire
+ * program to track each string's length.
+ */
+ for (i = 1; i < NCHARS; i++) {
+ (void) adjbuf((char **)&buf, &bufsz,
+ bp - buf + 1, 100, (char **)&bp,
+ "relex2");
+ if (cc->cc_func(i)) {
+ *bp++ = i;
+ n++;
+ }
+ }
+ } else if (c == '\0') {
+ FATAL("nonterminated character class %.20s",
+ lastre);
+ } else if (bp == buf) { /* 1st char is special */
+ *bp++ = c;
} else if (c == ']') {
- cbuf[clen] = 0;
- rlxstr = tostring(cbuf);
- free(cbuf);
+ *bp++ = '\0';
+ rlxstr = (uschar *)tostring((char *)buf);
if (cflag == 0)
return (CCL);
else
return (NCCL);
- } else if (c == '\n') {
- ERROR "newline in character class %s...",
- lastre FATAL;
- } else if (c == '\0') {
- ERROR "nonterminated character class %s",
- lastre FATAL;
} else
- cbuf[clen++] = c;
+ *bp++ = c;
}
/*NOTREACHED*/
}
@@ -772,9 +970,13 @@ relex(void) /* lexical analyzer for reparse */
static int
cgoto(fa *f, int s, int c)
{
- register int i, j, k;
- register int *p, *q;
+ int i, j, k;
+ int *p, *q;
+ assert(c == HAT || c < NCHARS);
+ while (f->accept >= maxsetvec) { /* guessing here! */
+ growvec("out of space in cgoto()");
+ }
for (i = 0; i <= f->accept; i++)
setvec[i] = 0;
setcnt = 0;
@@ -782,16 +984,20 @@ cgoto(fa *f, int s, int c)
p = f->posns[s];
for (i = 1; i <= *p; i++) {
if ((k = f->re[p[i]].ltype) != FINAL) {
- if (k == CHAR && c == f->re[p[i]].lval ||
- k == DOT && c != 0 && c != HAT ||
- k == ALL && c != 0 ||
- k == CCL &&
- member(c, (uchar *)f->re[p[i]].lval) ||
- k == NCCL &&
- !member(c, (uchar *)f->re[p[i]].lval) &&
- c != 0 && c != HAT) {
+ if ((k == CHAR && c == ptoi(f->re[p[i]].lval.np)) ||
+ (k == DOT && c != 0 && c != HAT) ||
+ (k == ALL && c != 0) ||
+ (k == EMPTYRE && c != 0) ||
+ (k == CCL &&
+ member(c, (char *)f->re[p[i]].lval.up)) ||
+ (k == NCCL &&
+ !member(c, (char *)f->re[p[i]].lval.up) &&
+ c != 0 && c != HAT)) {
q = f->re[p[i]].lfollow;
for (j = 1; j <= *q; j++) {
+ if (q[j] >= maxsetvec) {
+ growvec("cgoto overflow");
+ }
if (setvec[q[j]] == 0) {
setcnt++;
setvec[q[j]] = 1;
@@ -847,17 +1053,19 @@ cgoto(fa *f, int s, int c)
}
static void
-freefa(fa *f)
+freefa(fa *f) /* free a finite automaton */
{
-
- register int i;
+ int i;
if (f == NULL)
return;
for (i = 0; i <= f->curstat; i++)
xfree(f->posns[i]);
- for (i = 0; i <= f->accept; i++)
+ for (i = 0; i <= f->accept; i++) {
xfree(f->re[i].lfollow);
+ if (f->re[i].ltype == CCL || f->re[i].ltype == NCCL)
+ xfree((f->re[i].lval.np));
+ }
xfree(f->restr);
xfree(f);
}
diff --git a/usr/src/cmd/awk/lex.c b/usr/src/cmd/awk/lex.c
new file mode 100644
index 0000000000..ad0575f3cb
--- /dev/null
+++ b/usr/src/cmd/awk/lex.c
@@ -0,0 +1,637 @@
+/*
+ * Copyright (C) Lucent Technologies 1997
+ * All Rights Reserved
+ *
+ * Permission to use, copy, modify, and distribute this software and
+ * its documentation for any purpose and without fee is hereby
+ * granted, provided that the above copyright notice appear in all
+ * copies and that both that the copyright notice and this
+ * permission notice and warranty disclaimer appear in supporting
+ * documentation, and that the name Lucent Technologies or any of
+ * its entities not be used in advertising or publicity pertaining
+ * to distribution of the software without specific, written prior
+ * permission.
+ *
+ * LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
+ * IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
+ * IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ * ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
+ * THIS SOFTWARE.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include "awk.h"
+#include "y.tab.h"
+
+extern YYSTYPE yylval;
+extern int infunc;
+
+off_t lineno = 1;
+int bracecnt = 0;
+int brackcnt = 0;
+int parencnt = 0;
+
+typedef struct Keyword {
+ const char *word;
+ int sub;
+ int type;
+} Keyword;
+
+Keyword keywords[] = { /* keep sorted: binary searched */
+ { "BEGIN", XBEGIN, XBEGIN },
+ { "END", XEND, XEND },
+ { "NF", VARNF, VARNF },
+ { "atan2", FATAN, BLTIN },
+ { "break", BREAK, BREAK },
+ { "close", CLOSE, CLOSE },
+ { "continue", CONTINUE, CONTINUE },
+ { "cos", FCOS, BLTIN },
+ { "delete", DELETE, DELETE },
+ { "do", DO, DO },
+ { "else", ELSE, ELSE },
+ { "exit", EXIT, EXIT },
+ { "exp", FEXP, BLTIN },
+ { "fflush", FFLUSH, BLTIN },
+ { "for", FOR, FOR },
+ { "func", FUNC, FUNC },
+ { "function", FUNC, FUNC },
+ { "getline", GETLINE, GETLINE },
+ { "gsub", GSUB, GSUB },
+ { "if", IF, IF },
+ { "in", IN, IN },
+ { "index", INDEX, INDEX },
+ { "int", FINT, BLTIN },
+ { "length", FLENGTH, BLTIN },
+ { "log", FLOG, BLTIN },
+ { "match", MATCHFCN, MATCHFCN },
+ { "next", NEXT, NEXT },
+ { "nextfile", NEXTFILE, NEXTFILE },
+ { "print", PRINT, PRINT },
+ { "printf", PRINTF, PRINTF },
+ { "rand", FRAND, BLTIN },
+ { "return", RETURN, RETURN },
+ { "sin", FSIN, BLTIN },
+ { "split", SPLIT, SPLIT },
+ { "sprintf", SPRINTF, SPRINTF },
+ { "sqrt", FSQRT, BLTIN },
+ { "srand", FSRAND, BLTIN },
+ { "sub", SUB, SUB },
+ { "substr", SUBSTR, SUBSTR },
+ { "system", FSYSTEM, BLTIN },
+ { "tolower", FTOLOWER, BLTIN },
+ { "toupper", FTOUPPER, BLTIN },
+ { "while", WHILE, WHILE },
+};
+
+#define RET(x) { if (dbg) (void) printf("lex %s\n", tokname(x)); return (x); }
+
+int
+peek(void)
+{
+ int c = input();
+ unput(c);
+ return (c);
+}
+
+int
+gettok(char **pbuf, size_t *psz) /* get next input token */
+{
+ int c, retc;
+ char *buf = *pbuf;
+ size_t sz = *psz;
+ char *bp = buf;
+
+ c = input();
+ if (c == 0)
+ return (0);
+ buf[0] = c;
+ buf[1] = 0;
+ if (!isalnum(c) && c != '.' && c != '_')
+ return (c);
+
+ *bp++ = c;
+ if (isalpha(c) || c == '_') { /* it's a varname */
+ for (; (c = input()) != 0; ) {
+ if (bp-buf >= sz &&
+ !adjbuf(&buf, &sz, bp-buf+2, 100, &bp, "gettok"))
+ FATAL("out of space for name %.10s...", buf);
+ if (isalnum(c) || c == '_')
+ *bp++ = c;
+ else {
+ *bp = 0;
+ unput(c);
+ break;
+ }
+ }
+ *bp = 0;
+ retc = 'a'; /* alphanumeric */
+ } else { /* maybe it's a number, but could be . */
+ char *rem;
+ /* read input until can't be a number */
+ for (; (c = input()) != 0; ) {
+ if (bp-buf >= sz &&
+ !adjbuf(&buf, &sz, bp-buf+2, 100, &bp, "gettok"))
+ FATAL("out of space for number %.10s...", buf);
+ if (isdigit(c) || c == 'e' || c == 'E' ||
+ c == '.' || c == '+' || c == '-')
+ *bp++ = c;
+ else {
+ unput(c);
+ break;
+ }
+ }
+ *bp = 0;
+ (void) strtod(buf, &rem); /* parse the number */
+ if (rem == buf) { /* it wasn't a valid number at all */
+ buf[1] = 0; /* return one character as token */
+ retc = buf[0]; /* character is its own type */
+ unputstr(rem+1); /* put rest back for later */
+ } else { /* some prefix was a number */
+ unputstr(rem); /* put rest back for later */
+ rem[0] = 0; /* truncate buf after number part */
+ retc = '0'; /* type is number */
+ }
+ }
+ *pbuf = buf;
+ *psz = sz;
+ return (retc);
+}
+
+int word(char *);
+int string(void);
+int regexpr(void);
+int sc = 0; /* 1 => return a } right now */
+int reg = 0; /* 1 => return a REGEXPR now */
+
+int
+yylex(void)
+{
+ int c;
+ static char *buf = NULL;
+ /* BUG: setting this small causes core dump! */
+ static size_t bufsize = 5;
+
+ if (buf == NULL && (buf = (char *)malloc(bufsize)) == NULL)
+ FATAL("out of space in yylex");
+ if (sc) {
+ sc = 0;
+ RET('}');
+ }
+ if (reg) {
+ reg = 0;
+ return (regexpr());
+ }
+ for (;;) {
+ c = gettok(&buf, &bufsize);
+ if (c == 0)
+ return (0);
+ if (isalpha(c) || c == '_')
+ return (word(buf));
+ if (isdigit(c)) {
+ yylval.cp = setsymtab(
+ buf, tostring(buf), atof(buf), CON|NUM, symtab);
+ /* should this also have STR set? */
+ RET(NUMBER);
+ }
+
+ yylval.i = c;
+ switch (c) {
+ case '\n': /* {EOL} */
+ lineno++;
+ RET(NL);
+ case '\r': /* assume \n is coming */
+ case ' ': /* {WS}+ */
+ case '\t':
+ break;
+ case '#': /* #.* strip comments */
+ while ((c = input()) != '\n' && c != 0)
+ ;
+ unput(c);
+ break;
+ case ';':
+ RET(';');
+ case '\\':
+ if (peek() == '\n') {
+ (void) input();
+ lineno++;
+ } else if (peek() == '\r') {
+ (void) input();
+ (void) input(); /* BUG: check for \n */
+ lineno++;
+ } else {
+ RET(c);
+ }
+ break;
+ case '&':
+ if (peek() == '&') {
+ (void) input();
+ RET(AND);
+ } else
+ RET('&');
+ case '|':
+ if (peek() == '|') {
+ (void) input();
+ RET(BOR);
+ } else
+ RET('|');
+ case '!':
+ if (peek() == '=') {
+ (void) input();
+ yylval.i = NE;
+ RET(NE);
+ } else if (peek() == '~') {
+ (void) input();
+ yylval.i = NOTMATCH;
+ RET(MATCHOP);
+ } else
+ RET(NOT);
+ case '~':
+ yylval.i = MATCH;
+ RET(MATCHOP);
+ case '<':
+ if (peek() == '=') {
+ (void) input();
+ yylval.i = LE;
+ RET(LE);
+ } else {
+ yylval.i = LT;
+ RET(LT);
+ }
+ case '=':
+ if (peek() == '=') {
+ (void) input();
+ yylval.i = EQ;
+ RET(EQ);
+ } else {
+ yylval.i = ASSIGN;
+ RET(ASGNOP);
+ }
+ case '>':
+ if (peek() == '=') {
+ (void) input();
+ yylval.i = GE;
+ RET(GE);
+ } else if (peek() == '>') {
+ (void) input();
+ yylval.i = APPEND;
+ RET(APPEND);
+ } else {
+ yylval.i = GT;
+ RET(GT);
+ }
+ case '+':
+ if (peek() == '+') {
+ (void) input();
+ yylval.i = INCR;
+ RET(INCR);
+ } else if (peek() == '=') {
+ (void) input();
+ yylval.i = ADDEQ;
+ RET(ASGNOP);
+ } else
+ RET('+');
+ case '-':
+ if (peek() == '-') {
+ (void) input();
+ yylval.i = DECR;
+ RET(DECR);
+ } else if (peek() == '=') {
+ (void) input();
+ yylval.i = SUBEQ;
+ RET(ASGNOP);
+ } else
+ RET('-');
+ case '*':
+ if (peek() == '=') { /* *= */
+ (void) input();
+ yylval.i = MULTEQ;
+ RET(ASGNOP);
+ } else if (peek() == '*') { /* ** or **= */
+ (void) input(); /* eat 2nd * */
+ if (peek() == '=') {
+ (void) input();
+ yylval.i = POWEQ;
+ RET(ASGNOP);
+ } else {
+ RET(POWER);
+ }
+ } else
+ RET('*');
+ case '/':
+ RET('/');
+ case '%':
+ if (peek() == '=') {
+ (void) input();
+ yylval.i = MODEQ;
+ RET(ASGNOP);
+ } else
+ RET('%');
+ case '^':
+ if (peek() == '=') {
+ (void) input();
+ yylval.i = POWEQ;
+ RET(ASGNOP);
+ } else
+ RET(POWER);
+
+ case '$':
+ /* BUG: awkward, if not wrong */
+ c = gettok(&buf, &bufsize);
+ if (isalpha(c)) {
+ if (strcmp(buf, "NF") == 0) {
+ /* very special */
+ unputstr("(NF)");
+ RET(INDIRECT);
+ }
+ c = peek();
+ if (c == '(' || c == '[' ||
+ (infunc && isarg(buf) >= 0)) {
+ unputstr(buf);
+ RET(INDIRECT);
+ }
+ yylval.cp = setsymtab(
+ buf, "", 0.0, STR|NUM, symtab);
+ RET(IVAR);
+ } else if (c == 0) { /* */
+ SYNTAX("unexpected end of input after $");
+ RET(';');
+ } else {
+ unputstr(buf);
+ RET(INDIRECT);
+ }
+
+ case '}':
+ if (--bracecnt < 0)
+ SYNTAX("extra }");
+ sc = 1;
+ RET(';');
+ case ']':
+ if (--brackcnt < 0)
+ SYNTAX("extra ]");
+ RET(']');
+ case ')':
+ if (--parencnt < 0)
+ SYNTAX("extra )");
+ RET(')');
+ case '{':
+ bracecnt++;
+ RET('{');
+ case '[':
+ brackcnt++;
+ RET('[');
+ case '(':
+ parencnt++;
+ RET('(');
+
+ case '"':
+ /* BUG: should be like tran.c ? */
+ return (string());
+
+ default:
+ RET(c);
+ }
+ }
+}
+
+int
+string(void)
+{
+ int c, n;
+ char *s, *bp;
+ static char *buf = NULL;
+ static size_t bufsz = 500;
+
+ if (buf == NULL && (buf = (char *)malloc(bufsz)) == NULL)
+ FATAL("out of space for strings");
+ for (bp = buf; (c = input()) != '"'; ) {
+ if (!adjbuf(&buf, &bufsz, bp-buf+2, 500, &bp, "string"))
+ FATAL("out of space for string %.10s...", buf);
+ switch (c) {
+ case '\n':
+ case '\r':
+ case 0:
+ *bp = '\0';
+ SYNTAX("non-terminated string %.10s...", buf);
+ if (c == 0) /* hopeless */
+ FATAL("giving up");
+ lineno++;
+ break;
+ case '\\':
+ c = input();
+ switch (c) {
+ case '"': *bp++ = '"'; break;
+ case 'n': *bp++ = '\n'; break;
+ case 't': *bp++ = '\t'; break;
+ case 'f': *bp++ = '\f'; break;
+ case 'r': *bp++ = '\r'; break;
+ case 'b': *bp++ = '\b'; break;
+ case 'v': *bp++ = '\v'; break;
+ case 'a': *bp++ = '\007'; break;
+ case '\\': *bp++ = '\\'; break;
+
+ case '0': case '1': case '2': /* octal: \d \dd \ddd */
+ case '3': case '4': case '5': case '6': case '7':
+ n = c - '0';
+ if ((c = peek()) >= '0' && c < '8') {
+ n = 8 * n + input() - '0';
+ if ((c = peek()) >= '0' && c < '8')
+ n = 8 * n + input() - '0';
+ }
+ *bp++ = n;
+ break;
+
+ case 'x': { /* hex \x0-9a-fA-F + */
+ char xbuf[100], *px;
+ px = xbuf;
+ while ((c = input()) != 0 && px-xbuf < 100-2) {
+ if (isdigit(c) ||
+ (c >= 'a' && c <= 'f') ||
+ (c >= 'A' && c <= 'F'))
+ *px++ = c;
+ else
+ break;
+ }
+ *px = 0;
+ unput(c);
+ (void) sscanf(xbuf, "%x", (unsigned int *)&n);
+ *bp++ = n;
+ break;
+ }
+
+ default:
+ *bp++ = c;
+ break;
+ }
+ break;
+ default:
+ *bp++ = c;
+ break;
+ }
+ }
+ *bp = 0;
+ s = tostring(buf);
+ *bp++ = ' '; *bp++ = 0;
+ yylval.cp = setsymtab(buf, s, 0.0, CON|STR|DONTFREE, symtab);
+ RET(STRING);
+}
+
+
+int
+binsearch(char *w, Keyword *kp, int n)
+{
+ int cond, low, mid, high;
+
+ low = 0;
+ high = n - 1;
+ while (low <= high) {
+ mid = (low + high) / 2;
+ if ((cond = strcmp(w, kp[mid].word)) < 0)
+ high = mid - 1;
+ else if (cond > 0)
+ low = mid + 1;
+ else
+ return (mid);
+ }
+ return (-1);
+}
+
+int
+word(char *w)
+{
+ Keyword *kp;
+ int c, n;
+
+ n = binsearch(w, keywords, sizeof (keywords) / sizeof (keywords[0]));
+ if (n != -1) { /* found in table */
+ kp = keywords + n;
+ yylval.i = kp->sub;
+ switch (kp->type) { /* special handling */
+ case BLTIN:
+ if (kp->sub == FSYSTEM && safe)
+ SYNTAX("system is unsafe");
+ RET(kp->type);
+ case FUNC:
+ if (infunc)
+ SYNTAX("illegal nested function");
+ RET(kp->type);
+ case RETURN:
+ if (!infunc)
+ SYNTAX("return not in function");
+ RET(kp->type);
+ case VARNF:
+ yylval.cp = setsymtab("NF", "", 0.0, NUM, symtab);
+ RET(VARNF);
+ default:
+ RET(kp->type);
+ }
+ }
+ c = peek(); /* look for '(' */
+ if (c != '(' && infunc && (n = isarg(w)) >= 0) {
+ yylval.i = n;
+ RET(ARG);
+ } else {
+ yylval.cp = setsymtab(w, "", 0.0, STR|NUM|DONTFREE, symtab);
+ if (c == '(') {
+ RET(CALL);
+ } else {
+ RET(VAR);
+ }
+ }
+}
+
+void
+startreg(void) /* next call to yylex will return a regular expression */
+{
+ reg = 1;
+}
+
+int
+regexpr(void)
+{
+ int c;
+ static char *buf = NULL;
+ static size_t bufsz = 500;
+ char *bp;
+
+ if (buf == NULL && (buf = (char *)malloc(bufsz)) == NULL)
+ FATAL("out of space for rex expr");
+ bp = buf;
+ for (; (c = input()) != '/' && c != 0; ) {
+ if (!adjbuf(&buf, &bufsz, bp-buf+3, 500, &bp, "regexpr"))
+ FATAL("out of space for reg expr %.10s...", buf);
+ if (c == '\n') {
+ *bp = '\0';
+ SYNTAX("newline in regular expression %.10s...", buf);
+ unput('\n');
+ break;
+ } else if (c == '\\') {
+ *bp++ = '\\';
+ *bp++ = input();
+ } else {
+ *bp++ = c;
+ }
+ }
+ *bp = 0;
+ if (c == 0)
+ SYNTAX("non-terminated regular expression %.10s...", buf);
+ yylval.s = tostring(buf);
+ unput('/');
+ RET(REGEXPR);
+}
+
+/* low-level lexical stuff, sort of inherited from lex */
+
+char ebuf[300];
+char *ep = ebuf;
+char yysbuf[100]; /* pushback buffer */
+char *yysptr = yysbuf;
+FILE *yyin = NULL;
+
+int
+input(void) /* get next lexical input character */
+{
+ int c;
+ extern char *lexprog;
+
+ if (yysptr > yysbuf)
+ c = (uschar)*--yysptr;
+ else if (lexprog != NULL) { /* awk '...' */
+ if ((c = (uschar)*lexprog) != 0)
+ lexprog++;
+ } else /* awk -f ... */
+ c = pgetc();
+ if (c == EOF)
+ c = 0;
+ if (ep >= ebuf + sizeof (ebuf))
+ ep = ebuf;
+ *ep = c;
+ if (c != 0) {
+ ep++;
+ }
+ return (c);
+}
+
+void
+unput(int c) /* put lexical character back on input */
+{
+ if (yysptr >= yysbuf + sizeof (yysbuf))
+ FATAL("pushed back too much: %.20s...", yysbuf);
+ *yysptr++ = c;
+ if (--ep < ebuf)
+ ep = ebuf + sizeof (ebuf) - 1;
+}
+
+void
+unputstr(const char *s) /* put a string back on input */
+{
+ int i;
+
+ for (i = strlen(s)-1; i >= 0; i--)
+ unput(s[i]);
+}
diff --git a/usr/src/cmd/awk/lib.c b/usr/src/cmd/awk/lib.c
index ded064c6c3..fedd5d5137 100644
--- a/usr/src/cmd/awk/lib.c
+++ b/usr/src/cmd/awk/lib.c
@@ -1,4 +1,28 @@
/*
+ * Copyright (C) Lucent Technologies 1997
+ * All Rights Reserved
+ *
+ * Permission to use, copy, modify, and distribute this software and
+ * its documentation for any purpose and without fee is hereby
+ * granted, provided that the above copyright notice appear in all
+ * copies and that both that the copyright notice and this
+ * permission notice and warranty disclaimer appear in supporting
+ * documentation, and that the name Lucent Technologies or any of
+ * its entities not be used in advertising or publicity pertaining
+ * to distribution of the software without specific, written prior
+ * permission.
+ *
+ * LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
+ * IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
+ * IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ * ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
+ * THIS SOFTWARE.
+ */
+
+/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
@@ -27,63 +51,131 @@
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
/* All Rights Reserved */
-#pragma ident "%Z%%M% %I% %E% SMI"
+/* Copyright (c) Lucent Technologies 1997 */
+/* All Rights Reserved */
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
#include <errno.h>
+#include <stdlib.h>
+#include <stdarg.h>
#include "awk.h"
#include "y.tab.h"
-uchar *record;
-size_t record_size;
-
-int donefld; /* 1 = implies rec broken into fields */
-int donerec; /* 1 = record is valid (no flds have changed) */
+static FILE *infile = NULL;
+static char *file = "";
+char *record;
+size_t recsize = RECSIZE;
+static char *fields;
+static size_t fieldssize = RECSIZE;
+static char *rtbuf;
+static size_t rtbufsize = RECSIZE;
-static struct fldtab_chunk {
- struct fldtab_chunk *next;
- Cell fields[FLD_INCR];
-} *fldtab_head, *fldtab_tail;
+Cell **fldtab; /* pointers to Cells */
+char inputFS[100] = " ";
-static size_t fldtab_maxidx;
+#define MAXFLD 2
+int nfields = MAXFLD; /* last allocated slot for $i */
-static FILE *infile = NULL;
-static uchar *file = (uchar*) "";
-static uchar *fields;
-static size_t fields_size = LINE_INCR;
+int donefld; /* 1 = implies rec broken into fields */
+int donerec; /* 1 = record is valid (no flds have changed) */
-static int maxfld = 0; /* last used field */
+static int lastfld = 0; /* last used field */
static int argno = 1; /* current input argument number */
-static uchar *getargv(int);
+static Cell dollar0 = { OCELL, CFLD, NULL, "", 0.0, REC|STR|DONTFREE };
+static Cell dollar1 = { OCELL, CFLD, NULL, "", 0.0, FLD|STR|DONTFREE };
+
+static char *getargv(int);
static void cleanfld(int, int);
-static int refldbld(uchar *, uchar *);
+static int refldbld(const char *, const char *);
static void bcheck2(int, int, int);
static void eprint(void);
static void bclass(int);
+void
+recinit(unsigned int n)
+{
+ if ((record = (char *)malloc(n)) == NULL ||
+ (fields = (char *)malloc(n+2)) == NULL ||
+ (fldtab = (Cell **)malloc((nfields+1) * sizeof (Cell *))) == NULL ||
+ (fldtab[0] = (Cell *)malloc(sizeof (Cell))) == NULL)
+ FATAL("out of space for $0 and fields");
+ *fldtab[0] = dollar0;
+ fldtab[0]->sval = record;
+ fldtab[0]->nval = tostring("0");
+ makefields(1, nfields);
+}
+
+void
+makefields(int n1, int n2) /* create $n1..$n2 inclusive */
+{
+ char temp[50];
+ int i;
+
+ for (i = n1; i <= n2; i++) {
+ fldtab[i] = (Cell *)malloc(sizeof (Cell));
+ if (fldtab[i] == NULL)
+ FATAL("out of space in makefields %d", i);
+ *fldtab[i] = dollar1;
+ (void) sprintf(temp, "%d", i);
+ fldtab[i]->nval = tostring(temp);
+ }
+}
+
static void
initgetrec(void)
{
int i;
- uchar *p;
+ char *p;
for (i = 1; i < *ARGC; i++) {
- if (!isclvar(p = getargv(i))) /* find 1st real filename */
+ p = getargv(i); /* find 1st real filename */
+ if (p == NULL || *p == '\0') { /* deleted or zapped */
+ argno++;
+ continue;
+ }
+ if (!isclvar(p)) {
+ (void) setsval(lookup("FILENAME", symtab), p);
return;
+ }
setclvar(p); /* a commandline assignment before filename */
argno++;
}
infile = stdin; /* no filenames, so use stdin */
- /* *FILENAME = file = (uchar*) "-"; */
}
+/*
+ * POSIX specifies that fields are supposed to be evaluated as if they were
+ * split using the value of FS at the time that the record's value ($0) was
+ * read.
+ *
+ * Since field-splitting is done lazily, we save the current value of FS
+ * whenever a new record is read in (implicitly or via getline), or when
+ * a new value is assigned to $0.
+ */
+void
+savefs(void)
+{
+ if (strlen(getsval(fsloc)) >= sizeof (inputFS))
+ FATAL("field separator %.10s... is too long", *FS);
+ (void) strcpy(inputFS, *FS);
+}
+
+static int firsttime = 1;
+
+/*
+ * get next input record
+ * note: cares whether buf == record
+ */
int
-getrec(uchar **bufp, size_t *bufsizep)
+getrec(char **pbuf, size_t *pbufsize, int isrecord)
{
int c;
- static int firsttime = 1;
- uchar_t *buf, *nbuf;
- size_t len;
+ char *buf = *pbuf;
+ uschar saveb0;
+ size_t bufsize = *pbufsize, savebufsize = bufsize;
if (firsttime) {
firsttime = 0;
@@ -91,17 +183,24 @@ getrec(uchar **bufp, size_t *bufsizep)
}
dprintf(("RS=<%s>, FS=<%s>, ARGC=%f, FILENAME=%s\n",
*RS, *FS, *ARGC, *FILENAME));
- donefld = 0;
- donerec = 1;
+ if (isrecord) {
+ donefld = 0;
+ donerec = 1;
+ savefs();
+ }
+ saveb0 = buf[0];
+ buf[0] = '\0';
while (argno < *ARGC || infile == stdin) {
dprintf(("argno=%d, file=|%s|\n", argno, file));
if (infile == NULL) { /* have to open a new file */
file = getargv(argno);
- if (*file == '\0') { /* it's been zapped */
+ if (file == NULL || *file == '\0') {
+ /* deleted or zapped */
argno++;
continue;
}
- if (isclvar(file)) { /* a var=value arg */
+ if (isclvar(file)) {
+ /* a var=value arg */
setclvar(file);
argno++;
continue;
@@ -110,31 +209,28 @@ getrec(uchar **bufp, size_t *bufsizep)
dprintf(("opening file %s\n", file));
if (*file == '-' && *(file+1) == '\0')
infile = stdin;
- else if ((infile = fopen((char *)file, "r")) == NULL)
- ERROR "can't open file %s", file FATAL;
+ else if ((infile = fopen(file, "rF")) == NULL)
+ FATAL("can't open file %s", file);
(void) setfval(fnrloc, 0.0);
}
- c = readrec(&nbuf, &len, infile);
- expand_buf(bufp, bufsizep, len);
- buf = *bufp;
- (void) memcpy(buf, nbuf, len);
- buf[len] = '\0';
- free(nbuf);
+ c = readrec(&buf, &bufsize, infile);
if (c != 0 || buf[0] != '\0') { /* normal record */
- if (bufp == &record) {
- if (!(recloc->tval & DONTFREE))
+ if (isrecord) {
+ if (freeable(recloc))
xfree(recloc->sval);
- recloc->sval = record;
+ recloc->sval = buf; /* buf == record */
recloc->tval = REC | STR | DONTFREE;
if (is_number(recloc->sval)) {
recloc->fval =
- atof((const char *)recloc->sval);
+ atof(recloc->sval);
recloc->tval |= NUM;
}
}
(void) setfval(nrloc, nrloc->fval+1);
(void) setfval(fnrloc, fnrloc->fval+1);
+ *pbuf = buf;
+ *pbufsize = bufsize;
return (1);
}
/* EOF arrived on this file; set up next */
@@ -143,19 +239,39 @@ getrec(uchar **bufp, size_t *bufsizep)
infile = NULL;
argno++;
}
+ buf[0] = saveb0;
+ *pbuf = buf;
+ *pbufsize = savebufsize;
return (0); /* true end of file */
}
+void
+nextfile(void)
+{
+ if (infile != NULL && infile != stdin)
+ (void) fclose(infile);
+ infile = NULL;
+ argno++;
+}
+
+/*
+ * read one record into buf
+ */
int
-readrec(uchar **bufp, size_t *sizep, FILE *inf) /* read one record into buf */
+readrec(char **pbuf, size_t *pbufsize, FILE *inf)
{
int sep, c;
- uchar *buf;
- int count;
- size_t bufsize;
+ char *rr, *rt, *buf = *pbuf;
+ size_t bufsize = *pbufsize;
+ char *rs = getsval(rsloc);
+
+ if (rtbuf == NULL && (rtbuf = malloc(rtbufsize)) == NULL)
+ FATAL("out of memory in readrec");
- init_buf(&buf, &bufsize, LINE_INCR);
- if ((sep = **RS) == 0) {
+ rr = buf;
+ rt = rtbuf;
+
+ if ((sep = *rs) == '\0') {
sep = '\n';
/* skip leading \n's */
while ((c = getc(inf)) == '\n' && c != EOF)
@@ -163,47 +279,90 @@ readrec(uchar **bufp, size_t *sizep, FILE *inf) /* read one record into buf */
if (c != EOF)
(void) ungetc(c, inf);
}
- count = 0;
- for (;;) {
- while ((c = getc(inf)) != sep && c != EOF) {
- expand_buf(&buf, &bufsize, count);
- buf[count++] = c;
+ while ((c = getc(inf)) != EOF) {
+ if (c != sep) {
+ if (rr-buf+1 > bufsize) {
+ (void) adjbuf(&buf, &bufsize,
+ 1+rr-buf, recsize, &rr, "readrec1");
+ }
+ *rr++ = c;
+ continue;
+ }
+
+ /*
+ * Ensure enough space for either a single separator
+ * character, or at least two '\n' chars (when RS is
+ * the empty string).
+ */
+ (void) adjbuf(&rtbuf, &rtbufsize,
+ 2+rt-rtbuf, recsize, &rt, "readrec2");
+
+ if (*rs == sep) {
+ *rt++ = sep;
+ break;
}
- if (**RS == sep || c == EOF)
+
+ if ((c = getc(inf)) == '\n') { /* 2 in a row */
+ *rt++ = '\n';
+ *rt++ = '\n';
+ while ((c = getc(inf)) == '\n' && c != EOF) {
+ /* Read any further \n's and add them to RT. */
+ (void) adjbuf(&rtbuf, &rtbufsize,
+ 1+rt-rtbuf, recsize, &rt, "readrec3");
+ *rt++ = '\n';
+ }
+ if (c != EOF)
+ (void) ungetc(c, inf);
break;
- if ((c = getc(inf)) == '\n' || c == EOF) /* 2 in a row */
+ }
+
+ if (c == EOF) {
+ *rt++ = '\n';
break;
- expand_buf(&buf, &bufsize, count + 1);
- buf[count++] = '\n';
- buf[count++] = c;
+ }
+
+ (void) adjbuf(&buf, &bufsize,
+ 2+rr-buf, recsize, &rr, "readrec4");
+ *rr++ = '\n';
+ *rr++ = c;
}
- buf[count] = '\0';
+ (void) adjbuf(&buf, &bufsize, 1+rr-buf, recsize, &rr, "readrec5");
+ (void) adjbuf(&rtbuf, &rtbufsize, 1+rt-rtbuf, recsize, &rt, "readrec6");
+ *rr = '\0';
+ *rt = '\0';
dprintf(("readrec saw <%s>, returns %d\n",
- buf, c == EOF && count == 0 ? 0 : 1));
- *bufp = buf;
- *sizep = count;
- return (c == EOF && count == 0 ? 0 : 1);
+ buf, c == EOF && rr == buf ? 0 : 1));
+ *pbuf = buf;
+ *pbufsize = bufsize;
+ if (c == EOF && rr == buf) {
+ return (0);
+ } else {
+ (void) setsval(rtloc, rtbuf);
+ return (1);
+ }
}
/* get ARGV[n] */
-static uchar *
+static char *
getargv(int n)
{
Cell *x;
- uchar *s, temp[11];
+ char *s, temp[50];
extern Array *ARGVtab;
- (void) sprintf((char *)temp, "%d", n);
- x = setsymtab(temp, (uchar *)"", 0.0, STR, ARGVtab);
+ (void) sprintf(temp, "%d", n);
+ if (lookup(temp, ARGVtab) == NULL)
+ return (NULL);
+ x = setsymtab(temp, "", 0.0, STR, ARGVtab);
s = getsval(x);
dprintf(("getargv(%d) returns |%s|\n", n, s));
return (s);
}
void
-setclvar(uchar *s) /* set var=value from s */
+setclvar(char *s) /* set var=value from s */
{
- uchar *p;
+ char *p;
Cell *q;
for (p = s; *p != '='; p++)
@@ -213,7 +372,7 @@ setclvar(uchar *s) /* set var=value from s */
q = setsymtab(s, p, 0.0, STR, symtab);
(void) setsval(q, p);
if (is_number(q->sval)) {
- q->fval = atof((const char *)q->sval);
+ q->fval = atof(q->sval);
q->tval |= NUM;
}
dprintf(("command line set %s to |%s|\n", s, p));
@@ -221,236 +380,232 @@ setclvar(uchar *s) /* set var=value from s */
}
void
-fldbld(void)
+fldbld(void) /* create fields from current record */
{
- uchar *r, *fr, sep;
+ /* this relies on having fields[] the same length as $0 */
+ /* the fields are all stored in this one array with \0's */
+ /* possibly with a final trailing \0 not associated with any field */
+ char *r, *fr, sep;
Cell *p;
- int i;
- size_t len;
+ int i, j, n;
if (donefld)
return;
- if (!(recloc->tval & STR))
- (void) getsval(recloc);
- r = recloc->sval; /* was record! */
-
- /* make sure fields is always allocated */
- adjust_buf(&fields, fields_size);
-
- /*
- * make sure fields has enough size. We don't expand the buffer
- * in the middle of the loop, since p->sval has already pointed
- * the address in the fields.
- */
- len = strlen((char *)r) + 1;
- expand_buf(&fields, &fields_size, len);
+ if (!isstr(fldtab[0]))
+ (void) getsval(fldtab[0]);
+ r = fldtab[0]->sval;
+ n = strlen(r);
+ if (n > fieldssize) {
+ xfree(fields);
+ /* possibly 2 final \0s */
+ if ((fields = (char *)malloc(n + 2)) == NULL)
+ FATAL("out of space for fields in fldbld %d", n);
+ fieldssize = n;
+ }
fr = fields;
i = 0; /* number of fields accumulated here */
- if (strlen((char *)*FS) > 1) { /* it's a regular expression */
- i = refldbld(r, *FS);
- } else if ((sep = **FS) == ' ') {
+ if (strlen(inputFS) > 1) { /* it's a regular expression */
+ i = refldbld(r, inputFS);
+ } else if ((sep = *inputFS) == ' ') { /* default whitespace */
for (i = 0; ; ) {
while (*r == ' ' || *r == '\t' || *r == '\n')
r++;
- if (*r == 0)
+ if (*r == '\0')
break;
i++;
- p = getfld(i);
- if (!(p->tval & DONTFREE))
- xfree(p->sval);
- p->sval = fr;
- p->tval = FLD | STR | DONTFREE;
+ if (i > nfields)
+ growfldtab(i);
+ if (freeable(fldtab[i]))
+ xfree(fldtab[i]->sval);
+ fldtab[i]->sval = fr;
+ fldtab[i]->tval = FLD | STR | DONTFREE;
do
*fr++ = *r++;
while (*r != ' ' && *r != '\t' && *r != '\n' &&
*r != '\0')
;
- *fr++ = 0;
+ *fr++ = '\0';
+ }
+ *fr = '\0';
+ } else if ((sep = *inputFS) == '\0') {
+ /* new: FS="" => 1 char/field */
+ for (i = 0; *r != '\0'; r++) {
+ char buf[2];
+ i++;
+ if (i > nfields)
+ growfldtab(i);
+ if (freeable(fldtab[i]))
+ xfree(fldtab[i]->sval);
+ buf[0] = *r;
+ buf[1] = '\0';
+ fldtab[i]->sval = tostring(buf);
+ fldtab[i]->tval = FLD | STR;
}
- *fr = 0;
- } else if (*r != 0) { /* if 0, it's a null field */
+ *fr = '\0';
+ } else if (*r != '\0') { /* if 0, it's a null field */
+ /*
+ * subtlecase : if length(FS) == 1 && length(RS > 0)
+ * \n is NOT a field separator (cf awk book 61,84).
+ * this variable is tested in the inner while loop.
+ */
+ int rtest = '\n'; /* normal case */
+ if (strlen(*RS) > 0)
+ rtest = '\0';
for (;;) {
i++;
- p = getfld(i);
- if (!(p->tval & DONTFREE))
- xfree(p->sval);
- p->sval = fr;
- p->tval = FLD | STR | DONTFREE;
- /* \n always a separator */
- while (*r != sep && *r != '\n' && *r != '\0')
+ if (i > nfields)
+ growfldtab(i);
+ if (freeable(fldtab[i]))
+ xfree(fldtab[i]->sval);
+ fldtab[i]->sval = fr;
+ fldtab[i]->tval = FLD | STR | DONTFREE;
+ /* \n is always a separator */
+ while (*r != sep && *r != rtest && *r != '\0')
*fr++ = *r++;
- *fr++ = 0;
- if (*r++ == 0)
+ *fr++ = '\0';
+ if (*r++ == '\0')
break;
}
- *fr = 0;
+ *fr = '\0';
}
+ if (i > nfields)
+ FATAL("record `%.30s...' has too many fields; can't happen", r);
/* clean out junk from previous record */
- cleanfld(i, maxfld);
- maxfld = i;
+ cleanfld(i+1, lastfld);
+ lastfld = i;
donefld = 1;
- for (i = 1; i <= maxfld; i++) {
- p = getfld(i);
+ for (j = 1; j <= lastfld; j++) {
+ p = fldtab[j];
if (is_number(p->sval)) {
- p->fval = atof((const char *)p->sval);
+ p->fval = atof(p->sval);
p->tval |= NUM;
}
}
-
- (void) setfval(nfloc, (Awkfloat) maxfld);
+ (void) setfval(nfloc, (Awkfloat)lastfld);
+ donerec = 1; /* restore */
if (dbg) {
- for (i = 0; i <= maxfld; i++) {
- p = getfld(i);
- (void) printf("field %d: |%s|\n", i, p->sval);
+ for (j = 0; j <= lastfld; j++) {
+ p = fldtab[j];
+ (void) printf("field %d (%s): |%s|\n",
+ j, p->nval, p->sval);
}
}
}
+/* clean out fields n1 .. n2 inclusive; nvals remain intact */
static void
-cleanfld(int n1, int n2) /* clean out fields n1..n2 inclusive */
+cleanfld(int n1, int n2)
{
- static uchar *nullstat = (uchar *) "";
Cell *p;
- int i;
+ int i;
- for (i = n2; i > n1; i--) {
- p = getfld(i);
- if (!(p->tval & DONTFREE))
+ for (i = n1; i <= n2; i++) {
+ p = fldtab[i];
+ if (freeable(p))
xfree(p->sval);
+ p->sval = "";
p->tval = FLD | STR | DONTFREE;
- p->sval = nullstat;
}
}
void
-newfld(int n) /* add field n (after end) */
+newfld(int n) /* add field n after end of existing lastfld */
{
- if (n < 0)
- ERROR "accessing invalid field", record FATAL;
- (void) getfld(n);
- cleanfld(maxfld, n);
- maxfld = n;
- (void) setfval(nfloc, (Awkfloat) n);
+ if (n > nfields)
+ growfldtab(n);
+ cleanfld(lastfld+1, n);
+ lastfld = n;
+ (void) setfval(nfloc, (Awkfloat)n);
}
-/*
- * allocate field table. We don't reallocate the table since there
- * might be somewhere recording the address of the table.
- */
-static void
-morefld(void)
+void
+setlastfld(int n) /* set lastfld cleaning fldtab cells if necessary */
{
- int i;
- struct fldtab_chunk *fldcp;
- Cell *newfld;
-
- if ((fldcp = calloc(sizeof (struct fldtab_chunk), 1)) == NULL)
- ERROR "out of space in morefld" FATAL;
-
- newfld = &fldcp->fields[0];
- for (i = 0; i < FLD_INCR; i++) {
- newfld[i].ctype = OCELL;
- newfld[i].csub = CFLD;
- newfld[i].nval = NULL;
- newfld[i].sval = (uchar *)"";
- newfld[i].fval = 0.0;
- newfld[i].tval = FLD|STR|DONTFREE;
- newfld[i].cnext = NULL;
- }
- /*
- * link this field chunk
- */
- if (fldtab_head == NULL)
- fldtab_head = fldcp;
+ if (n < 0)
+ FATAL("cannot set NF to a negative value");
+ if (n > nfields)
+ growfldtab(n);
+
+ if (lastfld < n)
+ cleanfld(lastfld+1, n);
else
- fldtab_tail->next = fldcp;
- fldtab_tail = fldcp;
- fldcp->next = NULL;
+ cleanfld(n+1, lastfld);
- fldtab_maxidx += FLD_INCR;
+ lastfld = n;
}
Cell *
-getfld(int idx)
+fieldadr(int n) /* get nth field */
{
- struct fldtab_chunk *fldcp;
- int cbase;
-
- if (idx < 0)
- ERROR "trying to access field %d", idx FATAL;
- while (idx >= fldtab_maxidx)
- morefld();
- cbase = 0;
- for (fldcp = fldtab_head; fldcp != NULL; fldcp = fldcp->next) {
- if (idx < (cbase + FLD_INCR))
- return (&fldcp->fields[idx - cbase]);
- cbase += FLD_INCR;
- }
- /* should never happen */
- ERROR "trying to access invalid field %d", idx FATAL;
- return (NULL);
+ if (n < 0)
+ FATAL("trying to access out of range field %d", n);
+ if (n > nfields) /* fields after NF are empty */
+ growfldtab(n); /* but does not increase NF */
+ return (fldtab[n]);
}
-int
-fldidx(Cell *vp)
+void
+growfldtab(int n) /* make new fields up to at least $n */
{
- struct fldtab_chunk *fldcp;
- Cell *tbl;
- int cbase;
-
- cbase = 0;
- for (fldcp = fldtab_head; fldcp != NULL; fldcp = fldcp->next) {
- tbl = &fldcp->fields[0];
- if (vp >= tbl && vp < (tbl + FLD_INCR))
- return (cbase + (vp - tbl));
- cbase += FLD_INCR;
- }
- /* should never happen */
- ERROR "trying to access unknown field" FATAL;
- return (0);
+ int nf = 2 * nfields;
+ size_t s;
+
+ if (n > nf)
+ nf = n;
+ s = (nf+1) * (sizeof (Cell *)); /* freebsd: how much do we need? */
+ if (s / sizeof (Cell *) - 1 == nf) /* didn't overflow */
+ fldtab = (Cell **)realloc(fldtab, s);
+ else /* overflow sizeof int */
+ xfree(fldtab); /* make it null */
+ if (fldtab == NULL)
+ FATAL("out of space creating %d fields", nf);
+ makefields(nfields+1, nf);
+ nfields = nf;
}
+/* build fields from reg expr in FS */
static int
-refldbld(uchar *rec, uchar *fs) /* build fields from reg expr in FS */
+refldbld(const char *rec, const char *fs)
{
- uchar *fr;
- int i, tempstat;
+ /* this relies on having fields[] the same length as $0 */
+ /* the fields are all stored in this one array with \0's */
+ char *fr;
+ int i, tempstat, n;
fa *pfa;
- Cell *p;
- size_t len;
- /* make sure fields is allocated */
- adjust_buf(&fields, fields_size);
+ n = strlen(rec);
+ if (n > fieldssize) {
+ xfree(fields);
+ if ((fields = (char *)malloc(n+1)) == NULL)
+ FATAL("out of space for fields in refldbld %d", n);
+ fieldssize = n;
+ }
fr = fields;
*fr = '\0';
if (*rec == '\0')
return (0);
-
- len = strlen((char *)rec) + 1;
- expand_buf(&fields, &fields_size, len);
- fr = fields;
-
pfa = makedfa(fs, 1);
dprintf(("into refldbld, rec = <%s>, pat = <%s>\n", rec, fs));
tempstat = pfa->initstat;
for (i = 1; ; i++) {
- p = getfld(i);
- if (!(p->tval & DONTFREE))
- xfree(p->sval);
- p->tval = FLD | STR | DONTFREE;
- p->sval = fr;
+ if (i > nfields)
+ growfldtab(i);
+ if (freeable(fldtab[i]))
+ xfree(fldtab[i]->sval);
+ fldtab[i]->tval = FLD | STR | DONTFREE;
+ fldtab[i]->sval = fr;
dprintf(("refldbld: i=%d\n", i));
if (nematch(pfa, rec)) {
- pfa->initstat = 2;
+ pfa->initstat = 2; /* horrible coupling to b.c */
dprintf(("match %s (%d chars)\n", patbeg, patlen));
- (void) strncpy((char *)fr, (char *)rec, patbeg-rec);
+ (void) strncpy(fr, rec, patbeg-rec);
fr += patbeg - rec + 1;
*(fr-1) = '\0';
rec = patbeg + patlen;
} else {
dprintf(("no match %s\n", rec));
- (void) strcpy((char *)fr, (char *)rec);
+ (void) strcpy(fr, rec);
pfa->initstat = tempstat;
break;
}
@@ -459,71 +614,74 @@ refldbld(uchar *rec, uchar *fs) /* build fields from reg expr in FS */
}
void
-recbld(void)
+recbld(void) /* create $0 from $1..$NF if necessary */
{
int i;
- uchar *p;
+ char *p;
size_t cnt, len, olen;
+ char *sep = getsval(ofsloc);
if (donerec == 1)
return;
cnt = 0;
- olen = strlen((char *)*OFS);
+ olen = strlen(sep);
for (i = 1; i <= *NF; i++) {
- p = getsval(getfld(i));
- len = strlen((char *)p);
- expand_buf(&record, &record_size, cnt + len + olen);
+ p = getsval(fldtab[i]);
+ len = strlen(p);
+ expand_buf(&record, &recsize, cnt + len + olen);
(void) memcpy(&record[cnt], p, len);
cnt += len;
if (i < *NF) {
- (void) memcpy(&record[cnt], *OFS, olen);
+ (void) memcpy(&record[cnt], sep, olen);
cnt += olen;
}
}
record[cnt] = '\0';
- dprintf(("in recbld FS=%o, recloc=%p\n", **FS, (void *)recloc));
- if (!(recloc->tval & DONTFREE))
+ dprintf(("in recbld inputFS=%s, recloc=%p\n", inputFS, (void *)recloc));
+ if (freeable(recloc))
xfree(recloc->sval);
recloc->tval = REC | STR | DONTFREE;
recloc->sval = record;
- dprintf(("in recbld FS=%o, recloc=%p\n", **FS, (void *)recloc));
+ dprintf(("in recbld inputFS=%s, recloc=%p\n", inputFS, (void *)recloc));
dprintf(("recbld = |%s|\n", record));
donerec = 1;
}
-Cell *
-fieldadr(int n)
+int errorflag = 0;
+
+void
+yyerror(const char *s)
{
- if (n < 0)
- ERROR "trying to access field %d", n FATAL;
- return (getfld(n));
+ SYNTAX("%s", s);
}
-int errorflag = 0;
-char errbuf[200];
-
void
-yyerror(char *s)
+SYNTAX(const char *fmt, ...)
{
- extern uchar *cmdname, *curfname;
+ extern char *cmdname, *curfname;
static int been_here = 0;
+ va_list varg;
if (been_here++ > 2)
return;
- (void) fprintf(stderr, "%s: %s", cmdname, s);
- (void) fprintf(stderr, gettext(" at source line %lld"), lineno);
+ (void) fprintf(stderr, "%s: ", cmdname);
+ va_start(varg, fmt);
+ (void) vfprintf(stderr, fmt, varg);
+ va_end(varg);
+ (void) fprintf(stderr, " at source line %lld", lineno);
if (curfname != NULL)
- (void) fprintf(stderr, gettext(" in function %s"), curfname);
+ (void) fprintf(stderr, " in function %s", curfname);
+ if (compile_time == 1 && cursource() != NULL)
+ (void) fprintf(stderr, " source file %s", cursource());
(void) fprintf(stderr, "\n");
errorflag = 2;
eprint();
}
-/*ARGSUSED*/
void
-fpecatch(int sig)
+fpecatch(int n)
{
- ERROR "floating point exception" FATAL;
+ FATAL("floating point exception %d", n);
}
extern int bracecnt, brackcnt, parencnt;
@@ -558,47 +716,74 @@ bcheck2(int n, int c1, int c2)
}
void
-error(int f, char *s)
+FATAL(const char *fmt, ...)
{
- extern Node *curnode;
- extern uchar *cmdname;
+ extern char *cmdname;
+ va_list varg;
+
+ (void) fflush(stdout);
+ (void) fprintf(stderr, "%s: ", cmdname);
+ va_start(varg, fmt);
+ (void) vfprintf(stderr, fmt, varg);
+ va_end(varg);
+ error();
+ if (dbg > 1) /* core dump if serious debugging on */
+ abort();
+ exit(2);
+}
+
+void
+WARNING(const char *fmt, ...)
+{
+ extern char *cmdname;
+ va_list varg;
(void) fflush(stdout);
(void) fprintf(stderr, "%s: ", cmdname);
- (void) fprintf(stderr, "%s", s);
+ va_start(varg, fmt);
+ (void) vfprintf(stderr, fmt, varg);
+ va_end(varg);
+ error();
+}
+
+void
+error(void)
+{
+ extern Node *curnode;
+
(void) fprintf(stderr, "\n");
if (compile_time != 2 && NR && *NR > 0) {
(void) fprintf(stderr,
gettext(" input record number %g"), *FNR);
- if (strcmp((char *)*FILENAME, "-") != 0)
+ if (strcmp(*FILENAME, "-") != 0)
(void) fprintf(stderr, gettext(", file %s"), *FILENAME);
(void) fprintf(stderr, "\n");
}
if (compile_time != 2 && curnode)
- (void) fprintf(stderr, gettext(" source line number %lld\n"),
+ (void) fprintf(stderr, gettext(" source line number %lld"),
curnode->lineno);
else if (compile_time != 2 && lineno) {
(void) fprintf(stderr,
- gettext(" source line number %lld\n"), lineno);
+ gettext(" source line number %lld"), lineno);
}
+ if (compile_time == 1 && cursource() != NULL)
+ (void) fprintf(stderr, gettext(" source file %s"), cursource());
+ (void) fprintf(stderr, "\n");
eprint();
- if (f) {
- if (dbg)
- abort();
- exit(2);
- }
}
static void
eprint(void) /* try to print context around error */
{
- uchar *p, *q;
+ char *p, *q;
int c;
static int been_here = 0;
- extern uchar ebuf[300], *ep;
+ extern char ebuf[], *ep;
if (compile_time == 2 || compile_time == 0 || been_here++ > 0)
return;
+ if (ebuf == ep)
+ return;
p = ep - 1;
if (p > ebuf && *p == '\n')
p--;
@@ -640,30 +825,22 @@ bclass(int c)
}
double
-errcheck(double x, char *s)
+errcheck(double x, const char *s)
{
- extern int errno;
-
if (errno == EDOM) {
errno = 0;
- ERROR "%s argument out of domain", s WARNING;
+ WARNING("%s argument out of domain", s);
x = 1;
} else if (errno == ERANGE) {
errno = 0;
- ERROR "%s result out of range", s WARNING;
+ WARNING("%s result out of range", s);
x = 1;
}
return (x);
}
-void
-PUTS(uchar *s)
-{
- dprintf(("%s\n", s));
-}
-
int
-isclvar(uchar *s) /* is s of form var=something? */
+isclvar(const char *s) /* is s of form var=something ? */
{
if (s != NULL) {
@@ -686,88 +863,28 @@ isclvar(uchar *s) /* is s of form var=something? */
return (0);
}
-#define MAXEXPON 38 /* maximum exponent for fp number */
-
+#include <math.h>
int
-is_number(uchar *s)
+is_number(const char *s)
{
- int d1, d2;
- int point;
- uchar *es;
- extern char radixpoint;
-
- d1 = d2 = point = 0;
- while (*s == ' ' || *s == '\t' || *s == '\n')
- s++;
- if (*s == '\0')
- return (0); /* empty stuff isn't number */
- if (*s == '+' || *s == '-')
- s++;
- if (!isdigit(*s) && *s != radixpoint)
- return (0);
- if (isdigit(*s)) {
- do {
- d1++;
- s++;
- } while (isdigit(*s));
- }
- if (d1 >= MAXEXPON)
- return (0); /* too many digits to convert */
- if (*s == radixpoint) {
- point++;
- s++;
- }
- if (isdigit(*s)) {
- d2++;
- do {
- s++;
- } while (isdigit(*s));
- }
- if (!(d1 || point && d2))
+ double r;
+ char *ep;
+ errno = 0;
+ r = strtod(s, &ep);
+ if (ep == s || r == HUGE_VAL || errno == ERANGE)
return (0);
- if (*s == 'e' || *s == 'E') {
- s++;
- if (*s == '+' || *s == '-')
- s++;
- if (!isdigit(*s))
- return (0);
- es = s;
- do {
- s++;
- } while (isdigit(*s));
- if (s - es > 2) {
- return (0);
- } else if (s - es == 2 &&
- (int)(10 * (*es-'0') + *(es+1)-'0') >= MAXEXPON) {
- return (0);
- }
- }
- while (*s == ' ' || *s == '\t' || *s == '\n')
- s++;
- if (*s == '\0')
+ while (*ep == ' ' || *ep == '\t' || *ep == '\n')
+ ep++;
+ if (*ep == '\0')
return (1);
else
return (0);
}
void
-init_buf(uchar **optr, size_t *sizep, size_t amt)
+r_expand_buf(char **optr, size_t *sizep, size_t req)
{
- uchar *nptr = NULL;
-
- if ((nptr = malloc(amt)) == NULL)
- ERROR "out of space in init_buf" FATAL;
- /* initial buffer should have NULL terminated */
- *nptr = '\0';
- if (sizep != NULL)
- *sizep = amt;
- *optr = nptr;
-}
-
-void
-r_expand_buf(uchar **optr, size_t *sizep, size_t req)
-{
- uchar *nptr;
+ char *nptr;
size_t amt, size = *sizep;
if (size != 0 && req < (size - 1))
@@ -776,20 +893,10 @@ r_expand_buf(uchar **optr, size_t *sizep, size_t req)
amt = (amt / LINE_INCR + 1) * LINE_INCR;
if ((nptr = realloc(*optr, size + amt)) == NULL)
- ERROR "out of space in expand_buf" FATAL;
+ FATAL("out of space in expand_buf");
/* initial buffer should have NULL terminated */
if (size == 0)
*nptr = '\0';
*sizep += amt;
*optr = nptr;
}
-
-void
-adjust_buf(uchar **optr, size_t size)
-{
- uchar *nptr;
-
- if ((nptr = realloc(*optr, size)) == NULL)
- ERROR "out of space in adjust_buf" FATAL;
- *optr = nptr;
-}
diff --git a/usr/src/cmd/awk/main.c b/usr/src/cmd/awk/main.c
index b0c9d5ae98..ff004daf65 100644
--- a/usr/src/cmd/awk/main.c
+++ b/usr/src/cmd/awk/main.c
@@ -1,4 +1,28 @@
/*
+ * Copyright (C) Lucent Technologies 1997
+ * All Rights Reserved
+ *
+ * Permission to use, copy, modify, and distribute this software and
+ * its documentation for any purpose and without fee is hereby
+ * granted, provided that the above copyright notice appear in all
+ * copies and that both that the copyright notice and this
+ * permission notice and warranty disclaimer appear in supporting
+ * documentation, and that the name Lucent Technologies or any of
+ * its entities not be used in advertising or publicity pertaining
+ * to distribution of the software without specific, written prior
+ * permission.
+ *
+ * LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
+ * IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
+ * IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ * ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
+ * THIS SOFTWARE.
+ */
+
+/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
@@ -39,24 +63,25 @@
#include "awk.h"
#include "y.tab.h"
-char *version = "version Oct 11, 1989";
+char *version = "version Aug 27, 2018";
int dbg = 0;
-uchar *cmdname; /* gets argv[0] for error messages */
-uchar *lexprog; /* points to program argument if it exists */
+Awkfloat srand_seed = 1;
+char *cmdname; /* gets argv[0] for error messages */
+char *lexprog; /* points to program argument if it exists */
int compile_time = 2; /* for error printing: */
/* 2 = cmdline, 1 = compile, 0 = running */
-char radixpoint = '.';
-static uchar **pfile = NULL; /* program filenames from -f's */
+static char **pfile = NULL; /* program filenames from -f's */
static int npfile = 0; /* number of filenames */
static int curpfile = 0; /* current filename */
+int safe = 0; /* 1 => "safe" mode */
+
int
main(int argc, char *argv[], char *envp[])
{
- uchar *fs = NULL;
- char *nl_radix;
+ const char *fs = NULL;
/*
* At this point, numbers are still scanned as in
* the POSIX locale.
@@ -68,7 +93,7 @@ main(int argc, char *argv[], char *envp[])
#define TEXT_DOMAIN "SYS_TEST" /* Use this only if it weren't */
#endif
(void) textdomain(TEXT_DOMAIN);
- cmdname = (uchar *)argv[0];
+ cmdname = argv[0];
if (argc == 1) {
(void) fprintf(stderr, gettext(
"Usage: %s [-f programfile | 'program'] [-Ffieldsep] "
@@ -76,9 +101,19 @@ main(int argc, char *argv[], char *envp[])
exit(1);
}
(void) signal(SIGFPE, fpecatch);
+
+ srand_seed = 1;
+ srand((unsigned int)srand_seed);
+
yyin = NULL;
- syminit();
+ symtab = makesymtab(NSYMTAB/NSYMTAB);
while (argc > 1 && argv[1][0] == '-' && argv[1][1] != '\0') {
+ if (strcmp(argv[1], "-version") == 0 ||
+ strcmp(argv[1], "--version") == 0) {
+ (void) printf("awk %s\n", version);
+ exit(0);
+ break;
+ }
if (strcmp(argv[1], "--") == 0) {
/* explicit end of args */
argc--;
@@ -86,41 +121,66 @@ main(int argc, char *argv[], char *envp[])
break;
}
switch (argv[1][1]) {
+ case 's':
+ if (strcmp(argv[1], "-safe") == 0)
+ safe = 1;
+ break;
case 'f': /* next argument is program filename */
- argc--;
- argv++;
- if (argc <= 1)
- ERROR "no program filename" FATAL;
- pfile = realloc(pfile, sizeof (uchar *) * (npfile + 1));
- if (pfile == NULL)
- ERROR "out of space in main" FATAL;
- pfile[npfile++] = (uchar *)argv[1];
+ if (argv[1][2] != 0) { /* arg is -fsomething */
+ pfile = realloc(pfile,
+ sizeof (char *) * (npfile + 1));
+ if (pfile == NULL)
+ FATAL("out of space in main");
+ pfile[npfile++] = &argv[1][2];
+ } else { /* arg is -f something */
+ argc--; argv++;
+ if (argc <= 1)
+ FATAL("no program filename");
+ pfile = realloc(pfile,
+ sizeof (char *) * (npfile + 1));
+ if (pfile == NULL)
+ FATAL("out of space in main");
+ pfile[npfile++] = argv[1];
+ }
break;
case 'F': /* set field separator */
if (argv[1][2] != 0) { /* arg is -Fsomething */
/* wart: t=>\t */
if (argv[1][2] == 't' && argv[1][3] == 0)
- fs = (uchar *) "\t";
+ fs = "\t";
else if (argv[1][2] != 0)
- fs = (uchar *)&argv[1][2];
+ fs = &argv[1][2];
} else { /* arg is -F something */
argc--; argv++;
if (argc > 1) {
/* wart: t=>\t */
if (argv[1][0] == 't' &&
argv[1][1] == 0)
- fs = (uchar *) "\t";
+ fs = "\t";
else if (argv[1][0] != 0)
- fs = (uchar *)&argv[1][0];
+ fs = &argv[1][0];
}
}
if (fs == NULL || *fs == '\0')
- ERROR "field separator FS is empty" WARNING;
+ WARNING("field separator FS is empty");
break;
case 'v': /* -v a=1 to be done NOW. one -v for each */
- if (argv[1][2] == '\0' && --argc > 1 &&
- isclvar((uchar *)(++argv)[1]))
- setclvar((uchar *)argv[1]);
+ if (argv[1][2] != 0) { /* arg is -vsomething */
+ if (isclvar(&argv[1][2]))
+ setclvar(&argv[1][2]);
+ else
+ FATAL("invalid -v option argument: %s",
+ &argv[1][2]);
+ } else { /* arg is -v something */
+ argc--; argv++;
+ if (argc <= 1)
+ FATAL("no variable name");
+ if (isclvar(argv[1]))
+ setclvar(argv[1]);
+ else
+ FATAL("invalid -v option argument: %s",
+ argv[1]);
+ }
break;
case 'd':
dbg = atoi(&argv[1][2]);
@@ -129,7 +189,7 @@ main(int argc, char *argv[], char *envp[])
(void) printf("awk %s\n", version);
break;
default:
- ERROR "unknown option %s ignored", argv[1] WARNING;
+ WARNING("unknown option %s ignored", argv[1]);
break;
}
argc--;
@@ -140,18 +200,21 @@ main(int argc, char *argv[], char *envp[])
if (argc <= 1) {
if (dbg)
exit(0);
- ERROR "no program given" FATAL;
+ FATAL("no program given");
}
dprintf(("program = |%s|\n", argv[1]));
- lexprog = (uchar *)argv[1];
+ lexprog = argv[1];
argc--;
argv++;
}
+ recinit(recsize);
+ syminit();
compile_time = 1;
- argv[0] = (char *)cmdname; /* put prog name at front of arglist */
+ argv[0] = cmdname; /* put prog name at front of arglist */
dprintf(("argc=%d, argv[0]=%s\n", argc, argv[0]));
- arginit(argc, (uchar **)argv);
- envinit((uchar **)envp);
+ arginit(argc, argv);
+ if (!safe)
+ envinit(envp);
(void) yyparse();
if (fs)
*FS = qstring(fs, '\0');
@@ -160,9 +223,6 @@ main(int argc, char *argv[], char *envp[])
* done parsing, so now activate the LC_NUMERIC
*/
(void) setlocale(LC_ALL, "");
- nl_radix = nl_langinfo(RADIXCHAR);
- if (nl_radix)
- radixpoint = *nl_radix;
if (errorflag == 0) {
compile_time = 0;
@@ -173,7 +233,7 @@ main(int argc, char *argv[], char *envp[])
}
int
-pgetc(void) /* get program character */
+pgetc(void) /* get 1 character from awk program */
{
int c;
@@ -181,17 +241,27 @@ pgetc(void) /* get program character */
if (yyin == NULL) {
if (curpfile >= npfile)
return (EOF);
- yyin = (strcmp((char *)pfile[curpfile], "-") == 0) ?
- stdin : fopen((char *)pfile[curpfile], "r");
+ yyin = (strcmp(pfile[curpfile], "-") == 0) ?
+ stdin : fopen(pfile[curpfile], "rF");
if (yyin == NULL) {
- ERROR "can't open file %s",
- pfile[curpfile] FATAL;
+ FATAL("can't open file %s", pfile[curpfile]);
}
+ lineno = 1;
}
if ((c = getc(yyin)) != EOF)
return (c);
- (void) fclose(yyin);
+ if (yyin != stdin)
+ (void) fclose(yyin);
yyin = NULL;
curpfile++;
}
}
+
+char *
+cursource(void) /* current source file name */
+{
+ if (curpfile < npfile)
+ return (pfile[curpfile]);
+ else
+ return (NULL);
+}
diff --git a/usr/src/cmd/awk/maketab.c b/usr/src/cmd/awk/maketab.c
index 5c7d8601ea..9c625aabcc 100644
--- a/usr/src/cmd/awk/maketab.c
+++ b/usr/src/cmd/awk/maketab.c
@@ -1,4 +1,28 @@
/*
+ * Copyright (C) Lucent Technologies 1997
+ * All Rights Reserved
+ *
+ * Permission to use, copy, modify, and distribute this software and
+ * its documentation for any purpose and without fee is hereby
+ * granted, provided that the above copyright notice appear in all
+ * copies and that both that the copyright notice and this
+ * permission notice and warranty disclaimer appear in supporting
+ * documentation, and that the name Lucent Technologies or any of
+ * its entities not be used in advertising or publicity pertaining
+ * to distribution of the software without specific, written prior
+ * permission.
+ *
+ * LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
+ * IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
+ * IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ * ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
+ * THIS SOFTWARE.
+ */
+
+/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
@@ -26,6 +50,12 @@
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
/* All Rights Reserved */
+/*
+ * this program makes the table to link function names
+ * and type indices that is used by execute() in run.c.
+ * it finds the indices in ytab.h, produced by yacc.
+ */
+
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
@@ -35,8 +65,8 @@
struct xx {
int token;
- char *name;
- char *pname;
+ const char *name;
+ const char *pname;
} proc[] = {
{ PROGRAM, "program", NULL },
{ BOR, "boolop", " || " },
@@ -54,13 +84,14 @@ struct xx {
{ SUB, "sub", "sub" },
{ GSUB, "gsub", "gsub" },
{ INDEX, "sindex", "sindex" },
- { SPRINTF, "a_sprintf", "sprintf " },
+ { SPRINTF, "awksprintf", "sprintf " },
{ ADD, "arith", " + " },
{ MINUS, "arith", " - " },
{ MULT, "arith", " * " },
{ DIVIDE, "arith", " / " },
{ MOD, "arith", " % " },
{ UMINUS, "arith", " -" },
+ { UPLUS, "arith", " +" },
{ POWER, "arith", " **" },
{ PREINCR, "incrdecr", "++" },
{ POSTINCR, "incrdecr", "++" },
@@ -73,10 +104,10 @@ struct xx {
{ NOTMATCH, "matchop", " !~ " },
{ MATCHFCN, "matchop", "matchop" },
{ INTEST, "intest", "intest" },
- { PRINTF, "aprintf", "printf" },
- { PRINT, "print", "print" },
+ { PRINTF, "awkprintf", "printf" },
+ { PRINT, "printstat", "print" },
{ CLOSE, "closefile", "closefile" },
- { DELETE, "delete", "delete" },
+ { DELETE, "awkdelete", "awkdelete" },
{ SPLIT, "split", "split" },
{ ASSIGN, "assign", " = " },
{ ADDEQ, "assign", " += " },
@@ -92,6 +123,7 @@ struct xx {
{ DO, "dostat", "do" },
{ IN, "instat", "instat" },
{ NEXT, "jump", "next" },
+ { NEXTFILE, "jump", "nextfile" },
{ EXIT, "jump", "exit" },
{ BREAK, "jump", "break" },
{ CONTINUE, "jump", "continue" },
@@ -100,23 +132,24 @@ struct xx {
{ CALL, "call", "call" },
{ ARG, "arg", "arg" },
{ VARNF, "getnf", "NF" },
- { GETLINE, "getaline", "getline" },
+ { GETLINE, "awkgetline", "getline" },
{ 0, "", "" },
};
-#define SIZE LASTTOKEN - FIRSTTOKEN + 1
-char *table[SIZE];
+#define SIZE (LASTTOKEN - FIRSTTOKEN + 1)
+const char *table[SIZE];
char *names[SIZE];
int
-main()
+main(int argc, char *argv[])
{
- struct xx *p;
+ const struct xx *p;
int i, n, tok;
char c;
FILE *fp;
- char buf[100], name[100], def[100];
+ char buf[200], name[200], def[200];
+ printf("#include <stdio.h>\n");
printf("#include \"awk.h\"\n");
printf("#include \"y.tab.h\"\n\n");
@@ -124,28 +157,29 @@ main()
fprintf(stderr, gettext("maketab can't open y.tab.h!\n"));
exit(1);
}
- printf("static uchar *printname[%d] = {\n", SIZE);
+ printf("static char *printname[%d] = {\n", SIZE);
i = 0;
while (fgets(buf, sizeof (buf), fp) != NULL) {
n = sscanf(buf, "%1c %s %s %d", &c, def, name, &tok);
- /* not a valid #define? */
- if (c != '#' || n != 4 && strcmp(def, "define") != 0)
+ if (c != '#' || (n != 4 && strcmp(def, "define") != 0)) {
+ /* not a valid #define */
continue;
+ }
if (tok < FIRSTTOKEN || tok > LASTTOKEN) {
fprintf(stderr, gettext("maketab funny token %d %s\n"),
tok, buf);
exit(1);
}
- names[tok-FIRSTTOKEN] = malloc(strlen(name)+1);
+ names[tok-FIRSTTOKEN] = (char *)malloc(strlen(name)+1);
strcpy(names[tok-FIRSTTOKEN], name);
- printf("\t(uchar *) \"%s\",\t/* %d */\n", name, tok);
+ printf("\t(char *) \"%s\",\t/* %d */\n", name, tok);
i++;
}
printf("};\n\n");
for (p = proc; p->token != 0; p++)
table[p->token-FIRSTTOKEN] = p->name;
- printf("\nCell *(*proctab[%d])() = {\n", SIZE);
+ printf("\nCell *(*proctab[%d])(Node **, int) = {\n", SIZE);
for (i = 0; i < SIZE; i++)
if (table[i] == 0)
printf("\tnullproc,\t/* %s */\n", names[i]);
@@ -153,14 +187,14 @@ main()
printf("\t%s,\t/* %s */\n", table[i], names[i]);
printf("};\n\n");
- printf("uchar *\ntokname(int n)\n"); /* print a tokname() function */
+ printf("char *\ntokname(int n)\n"); /* print a tokname() function */
printf("{\n");
printf(" static char buf[100];\n\n");
printf(" if (n < FIRSTTOKEN || n > LASTTOKEN) {\n");
printf(" (void) sprintf(buf, \"token %%d\", n);\n");
- printf(" return ((uchar *)buf);\n");
+ printf(" return (buf);\n");
printf(" }\n");
- printf(" return printname[n-257];\n");
+ printf(" return printname[n-FIRSTTOKEN];\n");
printf("}\n");
- exit(0);
+ return (0);
}
diff --git a/usr/src/cmd/awk/parse.c b/usr/src/cmd/awk/parse.c
index 909977f10f..2afcf1e78f 100644
--- a/usr/src/cmd/awk/parse.c
+++ b/usr/src/cmd/awk/parse.c
@@ -1,4 +1,28 @@
/*
+ * Copyright (C) Lucent Technologies 1997
+ * All Rights Reserved
+ *
+ * Permission to use, copy, modify, and distribute this software and
+ * its documentation for any purpose and without fee is hereby
+ * granted, provided that the above copyright notice appear in all
+ * copies and that both that the copyright notice and this
+ * permission notice and warranty disclaimer appear in supporting
+ * documentation, and that the name Lucent Technologies or any of
+ * its entities not be used in advertising or publicity pertaining
+ * to distribution of the software without specific, written prior
+ * permission.
+ *
+ * LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
+ * IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
+ * IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ * ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
+ * THIS SOFTWARE.
+ */
+
+/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
@@ -28,8 +52,6 @@
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
/* All Rights Reserved */
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#define DEBUG
#include "awk.h"
#include "y.tab.h"
@@ -37,11 +59,11 @@
Node *
nodealloc(int n)
{
- register Node *x;
+ Node *x;
x = (Node *)malloc(sizeof (Node) + (n - 1) * sizeof (Node *));
if (x == NULL)
- ERROR "out of space in nodealloc" FATAL;
+ FATAL("out of space in nodealloc");
x->nnext = NULL;
x->lineno = lineno;
return (x);
@@ -57,7 +79,7 @@ exptostat(Node *a)
Node *
node1(int a, Node *b)
{
- register Node *x;
+ Node *x;
x = nodealloc(1);
x->nobj = a;
@@ -68,7 +90,7 @@ node1(int a, Node *b)
Node *
node2(int a, Node *b, Node *c)
{
- register Node *x;
+ Node *x;
x = nodealloc(2);
x->nobj = a;
@@ -80,7 +102,7 @@ node2(int a, Node *b, Node *c)
Node *
node3(int a, Node *b, Node *c, Node *d)
{
- register Node *x;
+ Node *x;
x = nodealloc(3);
x->nobj = a;
@@ -93,7 +115,8 @@ node3(int a, Node *b, Node *c, Node *d)
Node *
node4(int a, Node *b, Node *c, Node *d, Node *e)
{
- register Node *x;
+ Node *x;
+
x = nodealloc(4);
x->nobj = a;
x->narg[0] = b;
@@ -104,89 +127,89 @@ node4(int a, Node *b, Node *c, Node *d, Node *e)
}
Node *
-stat3(int a, Node *b, Node *c, Node *d)
+stat1(int a, Node *b)
{
- register Node *x;
+ Node *x;
- x = node3(a, b, c, d);
+ x = node1(a, b);
x->ntype = NSTAT;
return (x);
}
Node *
-op2(int a, Node *b, Node *c)
+stat2(int a, Node *b, Node *c)
{
- register Node *x;
+ Node *x;
x = node2(a, b, c);
- x->ntype = NEXPR;
+ x->ntype = NSTAT;
return (x);
}
Node *
-op1(int a, Node *b)
+stat3(int a, Node *b, Node *c, Node *d)
{
- register Node *x;
+ Node *x;
- x = node1(a, b);
- x->ntype = NEXPR;
+ x = node3(a, b, c, d);
+ x->ntype = NSTAT;
return (x);
}
Node *
-stat1(int a, Node *b)
+stat4(int a, Node *b, Node *c, Node *d, Node *e)
{
- register Node *x;
+ Node *x;
- x = node1(a, b);
+ x = node4(a, b, c, d, e);
x->ntype = NSTAT;
return (x);
}
Node *
-op3(int a, Node *b, Node *c, Node *d)
+op1(int a, Node *b)
{
- register Node *x;
+ Node *x;
- x = node3(a, b, c, d);
+ x = node1(a, b);
x->ntype = NEXPR;
return (x);
}
Node *
-op4(int a, Node *b, Node *c, Node *d, Node *e)
+op2(int a, Node *b, Node *c)
{
- register Node *x;
+ Node *x;
- x = node4(a, b, c, d, e);
+ x = node2(a, b, c);
x->ntype = NEXPR;
return (x);
}
Node *
-stat2(int a, Node *b, Node *c)
+op3(int a, Node *b, Node *c, Node *d)
{
- register Node *x;
+ Node *x;
- x = node2(a, b, c);
- x->ntype = NSTAT;
+ x = node3(a, b, c, d);
+ x->ntype = NEXPR;
return (x);
}
Node *
-stat4(int a, Node *b, Node *c, Node *d, Node *e)
+op4(int a, Node *b, Node *c, Node *d, Node *e)
{
- register Node *x;
+ Node *x;
x = node4(a, b, c, d, e);
- x->ntype = NSTAT;
+ x->ntype = NEXPR;
return (x);
}
Node *
-valtonode(Cell *a, int b)
+celltonode(Cell *a, int b)
{
- register Node *x;
+ Node *x;
a->ctype = OCELL;
a->csub = b;
@@ -196,10 +219,10 @@ valtonode(Cell *a, int b)
}
Node *
-rectonode(void)
+rectonode(void) /* make $0 into a Node */
{
- /* return valtonode(lookup("$0", symtab), CFLD); */
- return (valtonode(recloc, CFLD));
+ extern Cell *literal0;
+ return (op1(INDIRECT, celltonode(literal0, CUNK)));
}
Node *
@@ -209,23 +232,26 @@ makearr(Node *p)
if (isvalue(p)) {
cp = (Cell *)(p->narg[0]);
- if (isfunc(cp))
- ERROR "%s is a function, not an array", cp->nval SYNTAX;
+ if (isfcn(cp))
+ SYNTAX("%s is a function, not an array", cp->nval);
else if (!isarr(cp)) {
xfree(cp->sval);
- cp->sval = (uchar *)makesymtab(NSYMTAB);
+ cp->sval = (char *)makesymtab(NSYMTAB);
cp->tval = ARR;
}
}
return (p);
}
+int paircnt; /* number of them in use */
+int *pairstack; /* state of each pat,pat */
+
Node *
-pa2stat(Node *a, Node *b, Node *c)
+pa2stat(Node *a, Node *b, Node *c) /* pat, pat {...} */
{
- register Node *x;
+ Node *x;
- x = node4(PASTAT2, a, b, c, (Node *)paircnt);
+ x = node4(PASTAT2, a, b, c, itonp(paircnt));
paircnt++;
x->ntype = NSTAT;
return (x);
@@ -234,7 +260,7 @@ pa2stat(Node *a, Node *b, Node *c)
Node *
linkum(Node *a, Node *b)
{
- register Node *c;
+ Node *c;
if (errorflag) /* don't link things that are wrong */
return (a);
@@ -248,38 +274,55 @@ linkum(Node *a, Node *b)
return (a);
}
+/* turn on FCN bit in definition, */
+/* body of function, arglist */
void
-defn(Cell *v, Node *vl, Node *st) /* turn on FCN bit in definition */
+defn(Cell *v, Node *vl, Node *st)
{
Node *p;
int n;
if (isarr(v)) {
- ERROR "`%s' is an array name and a function name",
- v->nval SYNTAX;
+ SYNTAX("`%s' is an array name and a function name", v->nval);
+ return;
+ }
+ if (isarg(v->nval) != -1) {
+ SYNTAX("`%s' is both function name and argument name", v->nval);
return;
}
+
v->tval = FCN;
- v->sval = (uchar *)st;
+ v->sval = (char *)st;
n = 0; /* count arguments */
- for (p = vl; p; p = p->nnext)
+ for (p = vl; p != NULL; p = p->nnext)
n++;
v->fval = n;
dprintf(("defining func %s (%d args)\n", v->nval, n));
}
+/* is s in argument list for current function? */
+/* return -1 if not, otherwise arg # */
int
-isarg(uchar *s) /* is s in argument list for current function? */
+isarg(const char *s)
{
extern Node *arglist;
Node *p = arglist;
int n;
- for (n = 0; p != 0; p = p->nnext, n++) {
- if (strcmp((char *)((Cell *)(p->narg[0]))->nval,
- (char *)s) == 0) {
+ for (n = 0; p != NULL; p = p->nnext, n++)
+ if (strcmp(((Cell *)(p->narg[0]))->nval, s) == 0)
return (n);
- }
- }
return (-1);
}
+
+int
+ptoi(void *p) /* convert pointer to integer */
+{
+ return ((int)(long)p); /* swearing that p fits, of course */
+}
+
+Node *
+itonp(int i) /* and vice versa */
+{
+ return ((Node *)(long)i);
+}
diff --git a/usr/src/cmd/awk/run.c b/usr/src/cmd/awk/run.c
index 3cc8341d84..5226d43ed2 100644
--- a/usr/src/cmd/awk/run.c
+++ b/usr/src/cmd/awk/run.c
@@ -1,4 +1,28 @@
/*
+ * Copyright (C) Lucent Technologies 1997
+ * All Rights Reserved
+ *
+ * Permission to use, copy, modify, and distribute this software and
+ * its documentation for any purpose and without fee is hereby
+ * granted, provided that the above copyright notice appear in all
+ * copies and that both that the copyright notice and this
+ * permission notice and warranty disclaimer appear in supporting
+ * documentation, and that the name Lucent Technologies or any of
+ * its entities not be used in advertising or publicity pertaining
+ * to distribution of the software without specific, written prior
+ * permission.
+ *
+ * LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
+ * IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
+ * IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ * ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
+ * THIS SOFTWARE.
+ */
+
+/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
@@ -26,178 +50,221 @@
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
/* All Rights Reserved */
-#define tempfree(x, s) if (istemp(x)) tfree(x, s)
-
-#define execute(p) r_execute(p)
-
#define DEBUG
-#include "awk.h"
-#include <math.h>
-#include "y.tab.h"
#include <stdio.h>
#include <ctype.h>
#include <setjmp.h>
+#include <math.h>
#include <time.h>
+#include <sys/wait.h>
+#include "awk.h"
+#include "y.tab.h"
-#ifndef FOPEN_MAX
-#define FOPEN_MAX 15 /* max number of open files, from ANSI std. */
-#endif
-
+#define tempfree(x) if (istemp(x)) tfree(x)
static jmp_buf env;
+extern Awkfloat srand_seed;
-static Cell *r_execute(Node *);
-static Cell *gettemp(char *), *copycell(Cell *);
-static FILE *openfile(int, uchar *), *redirect(int, Node *);
-
-int paircnt;
-Node *winner = NULL;
+static Cell *execute(Node *);
+static Cell *gettemp(void), *copycell(Cell *);
+static FILE *openfile(int, const char *), *redirect(int, Node *);
-static Cell *tmps;
+Node *winner = NULL; /* root of parse tree */
+static Cell *tmps; /* free temporary cells for execution */
-static Cell truecell = { OBOOL, BTRUE, 0, 0, 1.0, NUM };
-Cell *true = &truecell;
-static Cell falsecell = { OBOOL, BFALSE, 0, 0, 0.0, NUM };
-Cell *false = &falsecell;
-static Cell breakcell = { OJUMP, JBREAK, 0, 0, 0.0, NUM };
+static Cell truecell = { OBOOL, BTRUE, NULL, NULL, 1.0, NUM, NULL };
+Cell *True = &truecell;
+static Cell falsecell = { OBOOL, BFALSE, NULL, NULL, 0.0, NUM, NULL };
+Cell *False = &falsecell;
+static Cell breakcell = { OJUMP, JBREAK, NULL, NULL, 0.0, NUM, NULL };
Cell *jbreak = &breakcell;
-static Cell contcell = { OJUMP, JCONT, 0, 0, 0.0, NUM };
+static Cell contcell = { OJUMP, JCONT, NULL, NULL, 0.0, NUM, NULL };
Cell *jcont = &contcell;
-static Cell nextcell = { OJUMP, JNEXT, 0, 0, 0.0, NUM };
+static Cell nextcell = { OJUMP, JNEXT, NULL, NULL, 0.0, NUM, NULL };
Cell *jnext = &nextcell;
-static Cell exitcell = { OJUMP, JEXIT, 0, 0, 0.0, NUM };
+static Cell nextfilecell = { OJUMP, JNEXTFILE, NULL, NULL, 0.0,
+ NUM, NULL };
+Cell *jnextfile = &nextfilecell;
+static Cell exitcell = { OJUMP, JEXIT, NULL, NULL, 0.0, NUM, NULL };
Cell *jexit = &exitcell;
-static Cell retcell = { OJUMP, JRET, 0, 0, 0.0, NUM };
+static Cell retcell = { OJUMP, JRET, NULL, NULL, 0.0, NUM, NULL };
Cell *jret = &retcell;
-static Cell tempcell = { OCELL, CTEMP, 0, 0, 0.0, NUM };
+static Cell tempcell = { OCELL, CTEMP, NULL, "", 0.0,
+ NUM|STR|DONTFREE, NULL };
Node *curnode = NULL; /* the node being executed, for debugging */
-static void tfree(Cell *, char *);
+static void tfree(Cell *);
static void closeall(void);
static double ipow(double, int);
+static void backsub(char **pb_ptr, char **sptr_ptr);
+
+
+/*
+ * buffer memory management
+ *
+ * pbuf: address of pointer to buffer being managed
+ * psiz: address of buffer size variable
+ * minlen: minimum length of buffer needed
+ * quantum: buffer size quantum
+ * pbptr: address of movable pointer into buffer, or 0 if none
+ * whatrtn: name of the calling routine if failure should cause fatal error
+ *
+ * return 0 for realloc failure, !=0 for success
+ */
+int
+adjbuf(char **pbuf, size_t *psiz, size_t minlen, size_t quantum, char **pbptr,
+ const char *whatrtn)
+{
+ if (minlen > *psiz) {
+ char *tbuf;
+ int rminlen = quantum ? minlen % quantum : 0;
+ int boff = pbptr ? *pbptr - *pbuf : 0;
+ /* round up to next multiple of quantum */
+ if (rminlen)
+ minlen += quantum - rminlen;
+ tbuf = (char *)realloc(*pbuf, minlen);
+ dprintf(("adjbuf %s: %d %d (pbuf=%p, tbuf=%p)\n", whatrtn,
+ *psiz, minlen, (void *)*pbuf, (void *)tbuf));
+ if (tbuf == NULL) {
+ if (whatrtn)
+ FATAL("out of memory in %s", whatrtn);
+ return (0);
+ }
+ *pbuf = tbuf;
+ *psiz = minlen;
+ if (pbptr)
+ *pbptr = tbuf + boff;
+ }
+ return (1);
+}
void
-run(Node *a)
+run(Node *a) /* execution of parse tree starts here */
{
+ extern void stdinit(void);
+
+ stdinit();
(void) execute(a);
closeall();
}
static Cell *
-r_execute(Node *u)
+execute(Node *u) /* execute a node of the parse tree */
{
- register Cell *(*proc)();
- register Cell *x;
- register Node *a;
+ Cell *(*proc)(Node **, int);
+ Cell *x;
+ Node *a;
if (u == NULL)
- return (true);
+ return (True);
for (a = u; ; a = a->nnext) {
curnode = a;
if (isvalue(a)) {
x = (Cell *) (a->narg[0]);
- if ((x->tval & FLD) && !donefld)
+ if (isfld(x) && !donefld)
fldbld();
- else if ((x->tval & REC) && !donerec)
+ else if (isrec(x) && !donerec)
recbld();
return (x);
}
/* probably a Cell* but too risky to print */
if (notlegal(a->nobj))
- ERROR "illegal statement" FATAL;
+ FATAL("illegal statement");
proc = proctab[a->nobj-FIRSTTOKEN];
x = (*proc)(a->narg, a->nobj);
- if ((x->tval & FLD) && !donefld)
+ if (isfld(x) && !donefld)
fldbld();
- else if ((x->tval & REC) && !donerec)
+ else if (isrec(x) && !donerec)
recbld();
if (isexpr(a))
return (x);
/* a statement, goto next statement */
if (isjump(x))
return (x);
- if (a->nnext == (Node *)NULL)
+ if (a->nnext == NULL)
return (x);
- tempfree(x, "execute");
+ tempfree(x);
}
}
+/* execute an awk program */
+/* a[0] = BEGIN, a[1] = body, a[2] = END */
/*ARGSUSED*/
Cell *
program(Node **a, int n)
{
- register Cell *x;
+ Cell *x;
if (setjmp(env) != 0)
goto ex;
if (a[0]) { /* BEGIN */
x = execute(a[0]);
if (isexit(x))
- return (true);
+ return (True);
if (isjump(x)) {
- ERROR "illegal break, continue or next from BEGIN"
- FATAL;
+ FATAL("illegal break, continue, next or nextfile "
+ "from BEGIN");
}
- tempfree(x, "");
+ tempfree(x);
}
-loop:
if (a[1] || a[2])
- while (getrec(&record, &record_size) > 0) {
+ while (getrec(&record, &recsize, 1) > 0) {
x = execute(a[1]);
if (isexit(x))
break;
- tempfree(x, "");
+ tempfree(x);
}
ex:
- if (setjmp(env) != 0)
+ if (setjmp(env) != 0) /* handles exit within END */
goto ex1;
if (a[2]) { /* END */
x = execute(a[2]);
- if (iscont(x)) /* read some more */
- goto loop;
- if (isbreak(x) || isnext(x))
- ERROR "illegal break or next from END" FATAL;
- tempfree(x, "");
+ if (isbreak(x) || isnext(x) || iscont(x))
+ FATAL("illegal break, continue, next or nextfile "
+ "from END");
+ tempfree(x);
}
ex1:
- return (true);
+ return (True);
}
-struct Frame {
+struct Frame { /* stack frame for awk function calls */
int nargs; /* number of arguments in this call */
Cell *fcncell; /* pointer to Cell for function */
Cell **args; /* pointer to array of arguments after execute */
Cell *retval; /* return value */
};
-#define NARGS 30
+#define NARGS 50 /* max args in a call */
-struct Frame *frame = NULL; /* base of stack frames; dynamically allocated */
+struct Frame *frame = NULL; /* base of stack frames; dynamically alloc'd */
int nframe = 0; /* number of frames allocated */
struct Frame *fp = NULL; /* frame pointer. bottom level unused */
/*ARGSUSED*/
Cell *
-call(Node **a, int n)
+call(Node **a, int n) /* function call. very kludgy and fragile */
{
static Cell newcopycell =
- { OCELL, CCOPY, 0, (uchar *) "", 0.0, NUM|STR|DONTFREE };
- int i, ncall, ndef, freed = 0;
+ { OCELL, CCOPY, 0, "", 0.0, NUM|STR|DONTFREE, NULL };
+ int i, ncall, ndef;
+ /* handles potential double freeing when fcn & param share a tempcell */
+ int freed = 0;
Node *x;
- Cell *args[NARGS], *oargs[NARGS], *y, *z, *fcn;
- uchar *s;
+ Cell *args[NARGS], *oargs[NARGS]; /* BUG: fixed size arrays */
+ Cell *y, *z, *fcn;
+ char *s;
fcn = execute(a[0]); /* the function itself */
s = fcn->nval;
- if (!isfunc(fcn))
- ERROR "calling undefined function %s", s FATAL;
+ if (!isfcn(fcn))
+ FATAL("calling undefined function %s", s);
if (frame == NULL) {
fp = frame = (struct Frame *)calloc(nframe += 100,
sizeof (struct Frame));
if (frame == NULL) {
- ERROR "out of space for stack frames calling %s",
- s FATAL;
+ FATAL("out of space for stack frames calling %s", s);
}
}
for (ncall = 0, x = a[1]; x != NULL; x = x->nnext) /* args in call */
@@ -206,12 +273,12 @@ call(Node **a, int n)
dprintf(("calling %s, %d args (%d in defn), fp=%d\n",
s, ncall, ndef, fp-frame));
if (ncall > ndef) {
- ERROR "function %s called with %d args, uses only %d",
- s, ncall, ndef WARNING;
+ WARNING("function %s called with %d args, uses only %d",
+ s, ncall, ndef);
}
if (ncall + ndef > NARGS) {
- ERROR "function %s has %d arguments, limit %d",
- s, ncall+ndef, NARGS FATAL;
+ FATAL("function %s has %d arguments, limit %d",
+ s, ncall+ndef, NARGS);
}
for (i = 0, x = a[1]; x != NULL; i++, x = x->nnext) {
/* get call args */
@@ -219,20 +286,20 @@ call(Node **a, int n)
y = execute(x);
oargs[i] = y;
dprintf(("args[%d]: %s %f <%s>, t=%o\n",
- i, y->nval, y->fval,
- isarr(y) ? "(array)" : (char *)y->sval, y->tval));
- if (isfunc(y)) {
- ERROR "can't use function %s as argument in %s",
- y->nval, s FATAL;
+ i, NN(y->nval), y->fval,
+ isarr(y) ? "(array)" : NN(y->sval), y->tval));
+ if (isfcn(y)) {
+ FATAL("can't use function %s as argument in %s",
+ y->nval, s);
}
if (isarr(y))
args[i] = y; /* arrays by ref */
else
args[i] = copycell(y);
- tempfree(y, "callargs");
+ tempfree(y);
}
- for (; i < ndef; i++) { /* add null args for ones not provided */
- args[i] = gettemp("nullargs");
+ for (; i < ndef; i++) { /* add null args for ones not provided */
+ args[i] = gettemp();
*args[i] = newcopycell;
}
fp++; /* now ok to up frame */
@@ -241,13 +308,13 @@ call(Node **a, int n)
frame = (struct Frame *)
realloc(frame, (nframe += 100) * sizeof (struct Frame));
if (frame == NULL)
- ERROR "out of space for stack frames in %s", s FATAL;
+ FATAL("out of space for stack frames in %s", s);
fp = frame + dfp;
}
fp->fcncell = fcn;
fp->args = args;
fp->nargs = ndef; /* number defined with (excess are locals) */
- fp->retval = gettemp("retval");
+ fp->retval = gettemp();
dprintf(("start exec of %s, fp=%d\n", s, fp-frame));
/*LINTED align*/
@@ -261,24 +328,29 @@ call(Node **a, int n)
if (i >= ncall) {
freesymtab(t);
t->csub = CTEMP;
+ tempfree(t);
} else {
oargs[i]->tval = t->tval;
oargs[i]->tval &= ~(STR|NUM|DONTFREE);
oargs[i]->sval = t->sval;
- tempfree(t, "oargsarr");
+ tempfree(t);
}
}
- } else {
+ } else if (t != y) { /* kludge to prevent freeing twice */
t->csub = CTEMP;
- tempfree(t, "fp->args");
- if (t == y) freed = 1;
+ tempfree(t);
+ } else if (t == y && t->csub == CCOPY) {
+ t->csub = CTEMP;
+ tempfree(t);
+ freed = 1;
}
}
- tempfree(fcn, "call.fcn");
+ tempfree(fcn);
if (isexit(y) || isnext(y))
return (y);
- if (!freed)
- tempfree(y, "fcn ret"); /* this can free twice! */
+ if (freed == 0) {
+ tempfree(y); /* don't free twice! */
+ }
z = fp->retval; /* return value */
dprintf(("%s returns %g |%s| %o\n",
s, getfval(z), getsval(z), z->tval));
@@ -291,42 +363,47 @@ copycell(Cell *x) /* make a copy of a cell in a temp */
{
Cell *y;
- y = gettemp("copycell");
+ /* copy is not constant or field */
+
+ y = gettemp();
+ y->tval = x->tval & ~(CON|FLD|REC);
y->csub = CCOPY; /* prevents freeing until call is over */
- y->nval = x->nval;
- y->sval = x->sval ? tostring(x->sval) : NULL;
+ y->nval = x->nval; /* BUG? */
+ if (isstr(x)) {
+ y->sval = tostring(x->sval);
+ y->tval &= ~DONTFREE;
+ } else
+ y->tval |= DONTFREE;
y->fval = x->fval;
- /* copy is not constant or field is DONTFREE right? */
- y->tval = x->tval & ~(CON|FLD|REC|DONTFREE);
return (y);
}
/*ARGSUSED*/
Cell *
-arg(Node **a, int nnn)
+arg(Node **a, int nnn) /* nth argument of a function */
{
int n;
- n = (int)a[0]; /* argument number, counting from 0 */
+ n = ptoi(a[0]); /* argument number, counting from 0 */
dprintf(("arg(%d), fp->nargs=%d\n", n, fp->nargs));
if (n+1 > fp->nargs) {
- ERROR "argument #%d of function %s was not supplied",
- n+1, fp->fcncell->nval FATAL;
+ FATAL("argument #%d of function %s was not supplied",
+ n+1, fp->fcncell->nval);
}
return (fp->args[n]);
}
Cell *
-jump(Node **a, int n)
+jump(Node **a, int n) /* break, continue, next, nextfile, return */
{
- register Cell *y;
+ Cell *y;
switch (n) {
case EXIT:
if (a[0] != NULL) {
y = execute(a[0]);
errorflag = (int)getfval(y);
- tempfree(y, "");
+ tempfree(y);
}
longjmp(env, 1);
/*NOTREACHED*/
@@ -341,77 +418,85 @@ jump(Node **a, int n)
(void) setsval(fp->retval, getsval(y));
else if (y->tval & NUM)
(void) setfval(fp->retval, getfval(y));
- tempfree(y, "");
+ else /* can't happen */
+ FATAL("bad type variable %d", y->tval);
+ tempfree(y);
}
return (jret);
case NEXT:
return (jnext);
+ case NEXTFILE:
+ nextfile();
+ return (jnextfile);
case BREAK:
return (jbreak);
case CONTINUE:
return (jcont);
default: /* can't happen */
- ERROR "illegal jump type %d", n FATAL;
+ FATAL("illegal jump type %d", n);
}
/*NOTREACHED*/
return (NULL);
}
Cell *
-getaline(Node **a, int n)
+awkgetline(Node **a, int n) /* get next line from specific input */
{
/* a[0] is variable, a[1] is operator, a[2] is filename */
- register Cell *r, *x;
- uchar *buf;
+ Cell *r, *x;
FILE *fp;
- size_t len;
+ char *buf;
+ size_t bufsize = recsize;
+ int mode;
+
+ if ((buf = (char *)malloc(bufsize)) == NULL)
+ FATAL("out of memory in getline");
(void) fflush(stdout); /* in case someone is waiting for a prompt */
- r = gettemp("");
+ r = gettemp();
if (a[1] != NULL) { /* getline < file */
x = execute(a[2]); /* filename */
- if ((int)a[1] == '|') /* input pipe */
- a[1] = (Node *)LE; /* arbitrary flag */
- fp = openfile((int)a[1], getsval(x));
- tempfree(x, "");
- buf = NULL;
+ mode = ptoi(a[1]);
+ if (mode == '|') /* input pipe */
+ mode = LE; /* arbitrary flag */
+ fp = openfile(mode, getsval(x));
+ tempfree(x);
if (fp == NULL)
n = -1;
else
- n = readrec(&buf, &len, fp);
- if (n > 0) {
- if (a[0] != NULL) { /* getline var <file */
- (void) setsval(execute(a[0]), buf);
- } else { /* getline <file */
- if (!(recloc->tval & DONTFREE))
- xfree(recloc->sval);
- expand_buf(&record, &record_size, len);
- (void) memcpy(record, buf, len);
- record[len] = '\0';
- recloc->sval = record;
- recloc->tval = REC | STR | DONTFREE;
- donerec = 1; donefld = 0;
+ n = readrec(&buf, &bufsize, fp);
+ /*LINTED if*/
+ if (n <= 0) {
+ ;
+ } else if (a[0] != NULL) { /* getline var <file */
+ x = execute(a[0]);
+ (void) setsval(x, buf);
+ tempfree(x);
+ } else { /* getline <file */
+ (void) setsval(recloc, buf);
+ if (is_number(recloc->sval)) {
+ recloc->fval = atof(recloc->sval);
+ recloc->tval |= NUM;
}
}
- if (buf != NULL)
- free(buf);
} else { /* bare getline; use current input */
if (a[0] == NULL) /* getline */
- n = getrec(&record, &record_size);
+ n = getrec(&record, &recsize, 1);
else { /* getline var */
- init_buf(&buf, &len, LINE_INCR);
- n = getrec(&buf, &len);
- (void) setsval(execute(a[0]), buf);
- free(buf);
+ n = getrec(&buf, &bufsize, 0);
+ x = execute(a[0]);
+ (void) setsval(x, buf);
+ tempfree(x);
}
}
(void) setfval(r, (Awkfloat)n);
+ free(buf);
return (r);
}
/*ARGSUSED*/
Cell *
-getnf(Node **a, int n)
+getnf(Node **a, int n) /* get NF */
{
if (donefld == 0)
fldbld();
@@ -420,208 +505,241 @@ getnf(Node **a, int n)
/*ARGSUSED*/
Cell *
-array(Node **a, int n)
+array(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */
{
- register Cell *x, *y, *z;
- register uchar *s;
- register Node *np;
- uchar *buf;
- size_t bsize, tlen, len, slen;
+ Cell *x, *y, *z;
+ char *s;
+ Node *np;
+ char *buf;
+ size_t bufsz = recsize;
+ size_t tlen = 0, len, nsub;
+
+ if ((buf = (char *)malloc(bufsz)) == NULL)
+ FATAL("out of memory in array");
x = execute(a[0]); /* Cell* for symbol table */
- init_buf(&buf, &bsize, LINE_INCR);
buf[0] = '\0';
- tlen = 0;
- slen = strlen((char *)*SUBSEP);
- for (np = a[1]; np; np = np->nnext) {
+ for (np = a[1]; np != NULL; np = np->nnext) {
y = execute(np); /* subscript */
s = getsval(y);
- len = strlen((char *)s);
- expand_buf(&buf, &bsize, tlen + len + slen);
+ len = strlen(s);
+ nsub = strlen(getsval(subseploc));
+ (void) adjbuf(&buf, &bufsz, tlen + len + nsub + 1,
+ recsize, 0, "array");
(void) memcpy(&buf[tlen], s, len);
tlen += len;
if (np->nnext) {
- (void) memcpy(&buf[tlen], *SUBSEP, slen);
- tlen += slen;
+ (void) memcpy(&buf[tlen], *SUBSEP, nsub);
+ tlen += nsub;
}
buf[tlen] = '\0';
- tempfree(y, "");
+ tempfree(y);
}
if (!isarr(x)) {
- dprintf(("making %s into an array\n", x->nval));
+ dprintf(("making %s into an array\n", NN(x->nval)));
if (freeable(x))
xfree(x->sval);
x->tval &= ~(STR|NUM|DONTFREE);
x->tval |= ARR;
- x->sval = (uchar *) makesymtab(NSYMTAB);
+ x->sval = (char *)makesymtab(NSYMTAB);
}
/*LINTED align*/
- z = setsymtab(buf, (uchar *)"", 0.0, STR|NUM, (Array *)x->sval);
+ z = setsymtab(buf, "", 0.0, STR|NUM, (Array *)x->sval);
z->ctype = OCELL;
z->csub = CVAR;
- tempfree(x, "");
+ tempfree(x);
free(buf);
return (z);
}
/*ARGSUSED*/
Cell *
-delete(Node **a, int n)
+awkdelete(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */
{
Cell *x, *y;
Node *np;
- uchar *buf, *s;
- size_t bsize, tlen, slen, len;
+ char *s;
+ size_t nsub;
+ size_t tlen = 0, len;
x = execute(a[0]); /* Cell* for symbol table */
- if (!isarr(x))
- return (true);
- init_buf(&buf, &bsize, LINE_INCR);
- buf[0] = '\0';
- tlen = 0;
- slen = strlen((char *)*SUBSEP);
- for (np = a[1]; np; np = np->nnext) {
- y = execute(np); /* subscript */
- s = getsval(y);
- len = strlen((char *)s);
- expand_buf(&buf, &bsize, tlen + len + slen);
- (void) memcpy(&buf[tlen], s, len);
- tlen += len;
- if (np->nnext) {
- (void) memcpy(&buf[tlen], *SUBSEP, slen);
- tlen += slen;
+ if (x == symtabloc) {
+ FATAL("cannot delete SYMTAB or its elements");
+ }
+ if (!isarr(x)) {
+ dprintf(("making %s into an array\n", x->nval));
+ if (freeable(x))
+ xfree(x->sval);
+ x->tval &= ~(STR|NUM|DONTFREE);
+ x->tval |= ARR;
+ x->sval = (char *)makesymtab(NSYMTAB);
+ }
+ if (a[1] == NULL) { /* delete the elements, not the table */
+ freesymtab(x);
+ x->tval &= ~STR;
+ x->tval |= ARR;
+ x->sval = (char *)makesymtab(NSYMTAB);
+ } else {
+ size_t bufsz = recsize;
+ char *buf;
+ if ((buf = (char *)malloc(bufsz)) == NULL)
+ FATAL("out of memory in awkdelete");
+ buf[0] = '\0';
+ for (np = a[1]; np != NULL; np = np->nnext) {
+ y = execute(np); /* subscript */
+ s = getsval(y);
+ len = strlen(s);
+ nsub = strlen(getsval(subseploc));
+ (void) adjbuf(&buf, &bufsz, tlen + len + nsub + 1,
+ recsize, 0, "awkdelete");
+ (void) memcpy(&buf[tlen], s, len);
+ tlen += len;
+ if (np->nnext) {
+ (void) memcpy(&buf[tlen], *SUBSEP, nsub);
+ tlen += nsub;
+ }
+ buf[tlen] = '\0';
+ tempfree(y);
}
- buf[tlen] = '\0';
- tempfree(y, "");
+ freeelem(x, buf);
+ free(buf);
}
- freeelem(x, buf);
- tempfree(x, "");
- free(buf);
- return (true);
+ tempfree(x);
+ return (True);
}
/*ARGSUSED*/
Cell *
-intest(Node **a, int n)
+intest(Node **a, int n) /* a[0] is index (list), a[1] is symtab */
{
- register Cell *x, *ap, *k;
+ Cell *x, *ap, *k;
Node *p;
- uchar *buf;
- uchar *s;
- size_t bsize, tlen, slen, len;
+ char *buf;
+ char *s;
+ size_t bufsz = recsize;
+ size_t nsub;
+ size_t tlen = 0, len;
ap = execute(a[1]); /* array name */
- if (!isarr(ap))
- ERROR "%s is not an array", ap->nval FATAL;
- init_buf(&buf, &bsize, LINE_INCR);
- buf[0] = 0;
- tlen = 0;
- slen = strlen((char *)*SUBSEP);
- for (p = a[0]; p; p = p->nnext) {
+ if (!isarr(ap)) {
+ dprintf(("making %s into an array\n", ap->nval));
+ if (freeable(ap))
+ xfree(ap->sval);
+ ap->tval &= ~(STR|NUM|DONTFREE);
+ ap->tval |= ARR;
+ ap->sval = (char *)makesymtab(NSYMTAB);
+ }
+ if ((buf = (char *)malloc(bufsz)) == NULL) {
+ FATAL("out of memory in intest");
+ }
+ buf[0] = '\0';
+ for (p = a[0]; p != NULL; p = p->nnext) {
x = execute(p); /* expr */
s = getsval(x);
- len = strlen((char *)s);
- expand_buf(&buf, &bsize, tlen + len + slen);
+ len = strlen(s);
+ nsub = strlen(getsval(subseploc));
+ (void) adjbuf(&buf, &bufsz, tlen + len + nsub + 1,
+ recsize, 0, "intest");
(void) memcpy(&buf[tlen], s, len);
tlen += len;
- tempfree(x, "");
+ tempfree(x);
if (p->nnext) {
- (void) memcpy(&buf[tlen], *SUBSEP, slen);
- tlen += slen;
+ (void) memcpy(&buf[tlen], *SUBSEP, nsub);
+ tlen += nsub;
}
buf[tlen] = '\0';
}
/*LINTED align*/
k = lookup(buf, (Array *)ap->sval);
- tempfree(ap, "");
+ tempfree(ap);
free(buf);
if (k == NULL)
- return (false);
+ return (False);
else
- return (true);
+ return (True);
}
Cell *
-matchop(Node **a, int n)
+matchop(Node **a, int n) /* ~ and match() */
{
- register Cell *x, *y;
- register uchar *s, *t;
- register int i;
+ Cell *x, *y;
+ char *s, *t;
+ int i;
fa *pfa;
- int (*mf)() = match, mode = 0;
+ int (*mf)(fa *, const char *) = match, mode = 0;
if (n == MATCHFCN) {
mf = pmatch;
mode = 1;
}
- x = execute(a[1]);
+ x = execute(a[1]); /* a[1] = target text */
s = getsval(x);
- if (a[0] == 0)
- i = (*mf)(a[2], s);
+ if (a[0] == NULL) /* a[1] == 0: already-compiled reg expr */
+ i = (*mf)((fa *)a[2], s);
else {
- y = execute(a[2]);
+ y = execute(a[2]); /* a[2] = regular expr */
t = getsval(y);
pfa = makedfa(t, mode);
i = (*mf)(pfa, s);
- tempfree(y, "");
+ tempfree(y);
}
- tempfree(x, "");
+ tempfree(x);
if (n == MATCHFCN) {
int start = patbeg - s + 1;
if (patlen < 0)
start = 0;
(void) setfval(rstartloc, (Awkfloat)start);
(void) setfval(rlengthloc, (Awkfloat)patlen);
- x = gettemp("");
+ x = gettemp();
x->tval = NUM;
x->fval = start;
return (x);
- } else if (n == MATCH && i == 1 || n == NOTMATCH && i == 0)
- return (true);
+ } else if ((n == MATCH && i == 1) || (n == NOTMATCH && i == 0))
+ return (True);
else
- return (false);
+ return (False);
}
Cell *
-boolop(Node **a, int n)
+boolop(Node **a, int n) /* a[0] || a[1], a[0] && a[1], !a[0] */
{
- register Cell *x, *y;
- register int i;
+ Cell *x, *y;
+ int i;
x = execute(a[0]);
i = istrue(x);
- tempfree(x, "");
+ tempfree(x);
switch (n) {
case BOR:
if (i)
- return (true);
+ return (True);
y = execute(a[1]);
i = istrue(y);
- tempfree(y, "");
- return (i ? true : false);
+ tempfree(y);
+ return (i ? True : False);
case AND:
if (!i)
- return (false);
+ return (False);
y = execute(a[1]);
i = istrue(y);
- tempfree(y, "");
- return (i ? true : false);
+ tempfree(y);
+ return (i ? True : False);
case NOT:
- return (i ? false : true);
+ return (i ? False : True);
default: /* can't happen */
- ERROR "unknown boolean operator %d", n FATAL;
+ FATAL("unknown boolean operator %d", n);
}
/*NOTREACHED*/
return (NULL);
}
Cell *
-relop(Node **a, int n)
+relop(Node **a, int n) /* a[0] < a[1], etc. */
{
- register int i;
- register Cell *x, *y;
+ int i;
+ Cell *x, *y;
Awkfloat j;
x = execute(a[0]);
@@ -630,102 +748,108 @@ relop(Node **a, int n)
j = x->fval - y->fval;
i = j < 0 ? -1: (j > 0 ? 1: 0);
} else {
- i = strcmp((char *)getsval(x), (char *)getsval(y));
+ i = strcmp(getsval(x), getsval(y));
}
- tempfree(x, "");
- tempfree(y, "");
+ tempfree(x);
+ tempfree(y);
switch (n) {
- case LT: return (i < 0 ? true : false);
- case LE: return (i <= 0 ? true : false);
- case NE: return (i != 0 ? true : false);
- case EQ: return (i == 0 ? true : false);
- case GE: return (i >= 0 ? true : false);
- case GT: return (i > 0 ? true : false);
+ case LT: return (i < 0 ? True : False);
+ case LE: return (i <= 0 ? True : False);
+ case NE: return (i != 0 ? True : False);
+ case EQ: return (i == 0 ? True : False);
+ case GE: return (i >= 0 ? True : False);
+ case GT: return (i > 0 ? True : False);
default: /* can't happen */
- ERROR "unknown relational operator %d", n FATAL;
+ FATAL("unknown relational operator %d", n);
}
/*NOTREACHED*/
- return (false);
+ return (False);
}
static void
-tfree(Cell *a, char *s)
+tfree(Cell *a) /* free a tempcell */
{
- if (dbg > 1) {
- (void) printf("## tfree %.8s %06lo %s\n",
- s, (ulong_t)a, a->sval ? a->sval : (uchar *)"");
- }
- if (freeable(a))
+ if (freeable(a)) {
+ dprintf(("freeing %s %s %o\n",
+ NN(a->nval), NN(a->sval), a->tval));
xfree(a->sval);
+ }
if (a == tmps)
- ERROR "tempcell list is curdled" FATAL;
+ FATAL("tempcell list is curdled");
a->cnext = tmps;
tmps = a;
}
static Cell *
-gettemp(char *s)
+gettemp(void) /* get a tempcell */
{
int i;
- register Cell *x;
+ Cell *x;
if (!tmps) {
tmps = (Cell *)calloc(100, sizeof (Cell));
if (!tmps)
- ERROR "no space for temporaries" FATAL;
+ FATAL("out of space for temporaries");
for (i = 1; i < 100; i++)
tmps[i-1].cnext = &tmps[i];
- tmps[i-1].cnext = 0;
+ tmps[i-1].cnext = NULL;
}
x = tmps;
tmps = x->cnext;
*x = tempcell;
- if (dbg > 1)
- (void) printf("## gtemp %.8s %06lo\n", s, (ulong_t)x);
+ dprintf(("gtemp %.8s %06lo\n", NN(x->nval), (ulong_t)x));
return (x);
}
/*ARGSUSED*/
Cell *
-indirect(Node **a, int n)
+indirect(Node **a, int n) /* $( a[0] ) */
{
- register Cell *x;
- register int m;
- register uchar *s;
+ Awkfloat val;
+ Cell *x;
+ int m;
+ char *s;
x = execute(a[0]);
- m = (int)getfval(x);
+
+ /* freebsd: defend against super large field numbers */
+ val = getfval(x);
+ if ((Awkfloat)INT_MAX < val)
+ FATAL("trying to access out of range field %s", x->nval);
+ m = (int)val;
if (m == 0 && !is_number(s = getsval(x))) /* suspicion! */
- ERROR "illegal field $(%s)", s FATAL;
- tempfree(x, "");
+ FATAL("illegal field $(%s), name \"%s\"", s, x->nval);
+ /* BUG: can x->nval ever be null??? */
+ tempfree(x);
x = fieldadr(m);
- x->ctype = OCELL;
+ x->ctype = OCELL; /* BUG? why are these needed? */
x->csub = CFLD;
return (x);
}
/*ARGSUSED*/
Cell *
-substr(Node **a, int nnn)
+substr(Node **a, int nnn) /* substr(a[0], a[1], a[2]) */
{
- register int k, m, n;
- register uchar *s;
+ int k, m, n;
+ char *s;
int temp;
- register Cell *x, *y, *z;
+ Cell *x, *y, *z = NULL;
x = execute(a[0]);
y = execute(a[1]);
- if (a[2] != 0)
+ if (a[2] != NULL)
z = execute(a[2]);
s = getsval(x);
- k = strlen((char *)s) + 1;
+ k = strlen(s) + 1;
if (k <= 1) {
- tempfree(x, "");
- tempfree(y, "");
- if (a[2] != 0)
- tempfree(z, "");
- x = gettemp("");
- (void) setsval(x, (uchar *)"");
+ tempfree(x);
+ tempfree(y);
+ if (a[2] != NULL) {
+ tempfree(z);
+ }
+ x = gettemp();
+ (void) setsval(x, "");
return (x);
}
m = (int)getfval(y);
@@ -733,10 +857,10 @@ substr(Node **a, int nnn)
m = 1;
else if (m > k)
m = k;
- tempfree(y, "");
- if (a[2] != 0) {
+ tempfree(y);
+ if (a[2] != NULL) {
n = (int)getfval(z);
- tempfree(z, "");
+ tempfree(z);
} else
n = k - 1;
if (n < 0)
@@ -744,21 +868,21 @@ substr(Node **a, int nnn)
else if (n > k - m)
n = k - m;
dprintf(("substr: m=%d, n=%d, s=%s\n", m, n, s));
- y = gettemp("");
+ y = gettemp();
temp = s[n + m - 1]; /* with thanks to John Linderman */
s[n + m - 1] = '\0';
(void) setsval(y, s + m - 1);
s[n + m - 1] = temp;
- tempfree(x, "");
+ tempfree(x);
return (y);
}
/*ARGSUSED*/
Cell *
-sindex(Node **a, int nnn)
+sindex(Node **a, int nnn) /* index(a[0], a[1]) */
{
- register Cell *x, *y, *z;
- register uchar *s1, *s2, *p1, *p2, *q;
+ Cell *x, *y, *z;
+ char *s1, *s2, *p1, *p2, *q;
Awkfloat v = 0.0;
x = execute(a[0]);
@@ -766,7 +890,7 @@ sindex(Node **a, int nnn)
y = execute(a[1]);
s2 = getsval(y);
- z = gettemp("");
+ z = gettemp();
for (p1 = s1; *p1 != '\0'; p1++) {
for (q = p1, p2 = s2; *p2 != '\0' && *q == *p2; q++, p2++)
;
@@ -775,26 +899,32 @@ sindex(Node **a, int nnn)
break;
}
}
- tempfree(x, "");
- tempfree(y, "");
+ tempfree(x);
+ tempfree(y);
(void) setfval(z, v);
return (z);
}
-void
-format(uchar **bufp, uchar *s, Node *a)
+#define MAXNUMSIZE 50
+
+/* printf-like conversions */
+int
+format(char **pbuf, int *pbufsize, const char *s, Node *a)
{
- uchar *fmt;
- register uchar *os;
- register Cell *x;
- int flag = 0, len;
- uchar_t *buf;
- size_t bufsize, fmtsize, cnt, tcnt, ret;
-
- init_buf(&buf, &bufsize, LINE_INCR);
- init_buf(&fmt, &fmtsize, LINE_INCR);
+ char *fmt;
+ const char *os;
+ Cell *x;
+ int flag = 0, n, len;
+ int fmtwd; /* format width */
+ char *buf = *pbuf;
+ size_t bufsize = *pbufsize;
+ size_t fmtsz = recsize;
+ size_t cnt, tcnt, ret;
+
os = s;
cnt = 0;
+ if ((fmt = (char *)malloc(fmtsz)) == NULL)
+ FATAL("out of memory in format()");
while (*s) {
if (*s != '%') {
expand_buf(&buf, &bufsize, cnt);
@@ -807,58 +937,77 @@ format(uchar **bufp, uchar *s, Node *a)
s += 2;
continue;
}
+ /*
+ * have to be real careful in case this is a huge number,
+ * eg, "%100000d".
+ */
+ fmtwd = atoi(s+1);
+ if (fmtwd < 0)
+ fmtwd = -fmtwd;
for (tcnt = 0; ; s++) {
- expand_buf(&fmt, &fmtsize, tcnt);
+ expand_buf(&fmt, &fmtsz, tcnt);
fmt[tcnt++] = *s;
if (*s == '\0')
break;
- if (isalpha(*s) && *s != 'l' && *s != 'h' && *s != 'L')
+ if (isalpha((uschar)*s) &&
+ *s != 'l' && *s != 'h' && *s != 'L')
break; /* the ansi panoply */
+ if (*s == '$') {
+ FATAL("'$' not permitted in awk formats");
+ }
if (*s == '*') {
if (a == NULL) {
- ERROR
- "not enough args in printf(%s) or sprintf(%s)", os, os FATAL;
+ FATAL("not enough args in printf(%s) "
+ "or sprintf(%s)", os, os);
}
x = execute(a);
a = a->nnext;
tcnt--;
- expand_buf(&fmt, &fmtsize, tcnt + 12);
- ret = sprintf((char *)&fmt[tcnt], "%d",
- (int)getfval(x));
+ expand_buf(&fmt, &fmtsz, tcnt + 12);
+ fmtwd = (int)getfval(x);
+ ret = sprintf(&fmt[tcnt], "%d", fmtwd);
+ if (fmtwd < 0)
+ fmtwd = -fmtwd;
tcnt += ret;
- tempfree(x, "");
+ tempfree(x);
}
}
fmt[tcnt] = '\0';
+ if (fmtwd < 0)
+ fmtwd = -fmtwd;
switch (*s) {
+ case 'a': case 'A':
+ flag = *s;
+ break;
case 'f': case 'e': case 'g': case 'E': case 'G':
- flag = 1;
+ flag = 'f';
break;
case 'd': case 'i':
- flag = 2;
+ flag = 'd';
if (*(s-1) == 'l')
break;
fmt[tcnt - 1] = 'l';
- expand_buf(&fmt, &fmtsize, tcnt);
+ expand_buf(&fmt, &fmtsz, tcnt);
fmt[tcnt++] = 'd';
fmt[tcnt] = '\0';
break;
case 'o': case 'x': case 'X': case 'u':
- flag = *(s-1) == 'l' ? 2 : 3;
+ flag = *(s-1) == 'l' ? 'd' : 'u';
break;
case 's':
- flag = 4;
+ flag = 's';
break;
case 'c':
- flag = 5;
+ flag = 'c';
break;
default:
- flag = 0;
+ WARNING("weird printf conversion %s", fmt);
+ flag = '?';
break;
}
- if (flag == 0) {
- len = strlen((char *)fmt);
+ if (flag == '?') {
+ len = strlen(fmt);
expand_buf(&buf, &bufsize, cnt + len);
(void) memcpy(&buf[cnt], fmt, len);
cnt += len;
@@ -866,79 +1015,100 @@ format(uchar **bufp, uchar *s, Node *a)
continue;
}
if (a == NULL) {
- ERROR
- "not enough args in printf(%s) or sprintf(%s)", os, os FATAL;
+ FATAL("not enough args in printf(%s) "
+ "or sprintf(%s)", os, os);
}
x = execute(a);
a = a->nnext;
- for (;;) {
- /* make sure we have at least 1 byte space */
- expand_buf(&buf, &bufsize, cnt + 1);
- len = bufsize - cnt;
- switch (flag) {
- case 1:
- /*LINTED*/
- ret = snprintf((char *)&buf[cnt], len,
- (char *)fmt, getfval(x));
- break;
- case 2:
- /*LINTED*/
- ret = snprintf((char *)&buf[cnt], len,
- (char *)fmt, (long)getfval(x));
- break;
- case 3:
+ n = MAXNUMSIZE;
+ if (fmtwd > n)
+ n = fmtwd;
+retry:
+ /* make sure we have at least 1 byte space */
+ (void) adjbuf(&buf, &bufsize, 1 + n + cnt,
+ recsize, NULL, "format5");
+ len = bufsize - cnt;
+ switch (flag) {
+ case 'a':
+ case 'A':
+ case 'f':
+ /*LINTED*/
+ ret = snprintf(&buf[cnt], len,
+ fmt, getfval(x));
+ break;
+ case 'd':
+ /*LINTED*/
+ ret = snprintf(&buf[cnt], len,
+ fmt, (long)getfval(x));
+ break;
+ case 'u':
+ /*LINTED*/
+ ret = snprintf(&buf[cnt], len,
+ fmt, (int)getfval(x));
+ break;
+ case 's':
+ /*LINTED*/
+ ret = snprintf(&buf[cnt], len,
+ fmt, getsval(x));
+ break;
+ case 'c':
+ if (!isnum(x)) {
/*LINTED*/
- ret = snprintf((char *)&buf[cnt], len,
- (char *)fmt, (int)getfval(x));
+ ret = snprintf(&buf[cnt], len,
+ fmt, getsval(x)[0]);
break;
- case 4:
+ }
+ if (getfval(x)) {
/*LINTED*/
- ret = snprintf((char *)&buf[cnt], len,
- (char *)fmt, getsval(x));
- break;
- case 5:
- if (isnum(x)) {
- /*LINTED*/
- ret = snprintf((char *)&buf[cnt], len,
- (char *)fmt, (int)getfval(x));
- } else {
- /*LINTED*/
- ret = snprintf((char *)&buf[cnt], len,
- (char *)fmt, getsval(x)[0]);
- }
- break;
- default:
- ret = 0;
+ ret = snprintf(&buf[cnt], len,
+ fmt, (int)getfval(x));
+ } else {
+ /* explicit null byte */
+ buf[cnt] = '\0';
+ /* next output will start here */
+ buf[cnt + 1] = '\0';
+ ret = 1;
}
- if (ret < len)
- break;
- expand_buf(&buf, &bufsize, cnt + ret);
+ break;
+ default:
+ FATAL("can't happen: "
+ "bad conversion %c in format()", flag);
+ }
+ if (ret >= len) {
+ (void) adjbuf(&buf, &bufsize, cnt + ret + 1,
+ recsize, NULL, "format6");
+ goto retry;
}
- tempfree(x, "");
+ tempfree(x);
cnt += ret;
s++;
}
buf[cnt] = '\0';
- for (; a; a = a->nnext) /* evaluate any remaining args */
- (void) execute(a);
- *bufp = tostring(buf);
- free(buf);
free(fmt);
+ for (; a != NULL; a = a->nnext) /* evaluate any remaining args */
+ (void) execute(a);
+ *pbuf = buf;
+ *pbufsize = bufsize;
+ return (cnt);
}
/*ARGSUSED*/
Cell *
-a_sprintf(Node **a, int n)
+awksprintf(Node **a, int n) /* sprintf(a[0]) */
{
- register Cell *x;
- register Node *y;
- uchar *buf;
+ Cell *x;
+ Node *y;
+ char *buf;
+ int bufsz = 3 * recsize;
+ if ((buf = (char *)malloc(bufsz)) == NULL)
+ FATAL("out of memory in awksprintf");
y = a[0]->nnext;
x = execute(a[0]);
- format(&buf, getsval(x), y);
- tempfree(x, "");
- x = gettemp("");
+ if (format(&buf, &bufsz, getsval(x), y) == -1)
+ FATAL("sprintf string %.30s... too long. can't happen.", buf);
+ tempfree(x);
+ x = gettemp();
x->sval = buf;
x->tval = STR;
return (x);
@@ -946,44 +1116,55 @@ a_sprintf(Node **a, int n)
/*ARGSUSED*/
Cell *
-aprintf(Node **a, int n)
+awkprintf(Node **a, int n) /* printf */
{
+ /* a[0] is list of args, starting with format string */
+ /* a[1] is redirection operator, a[2] is redirection file */
FILE *fp;
- register Cell *x;
- register Node *y;
- uchar *buf;
+ Cell *x;
+ Node *y;
+ char *buf;
+ int len;
+ int bufsz = 3 * recsize;
+ if ((buf = (char *)malloc(bufsz)) == NULL)
+ FATAL("out of memory in awkprintf");
y = a[0]->nnext;
x = execute(a[0]);
- format(&buf, getsval(x), y);
- tempfree(x, "");
- if (a[1] == NULL)
- (void) fputs((char *)buf, stdout);
- else {
- fp = redirect((int)a[1], a[2]);
- (void) fputs((char *)buf, fp);
+ if ((len = format(&buf, &bufsz, getsval(x), y)) == -1)
+ FATAL("printf string %.30s... too long. can't happen.", buf);
+ tempfree(x);
+ if (a[1] == NULL) {
+ (void) fwrite(buf, len, 1, stdout);
+ if (ferror(stdout))
+ FATAL("write error on stdout");
+ } else {
+ fp = redirect(ptoi(a[1]), a[2]);
+ (void) fwrite(buf, len, 1, fp);
(void) fflush(fp);
+ if (ferror(fp))
+ FATAL("write error on %s", filename(fp));
}
free(buf);
- return (true);
+ return (True);
}
Cell *
-arith(Node **a, int n)
+arith(Node **a, int n) /* a[0] + a[1], etc. also -a[0] */
{
- Awkfloat i, j;
+ Awkfloat i, j = 0;
double v;
- register Cell *x, *y, *z;
+ Cell *x, *y, *z;
x = execute(a[0]);
i = getfval(x);
- tempfree(x, "");
- if (n != UMINUS) {
+ tempfree(x);
+ if (n != UMINUS && n != UPLUS) {
y = execute(a[1]);
j = getfval(y);
- tempfree(y, "");
+ tempfree(y);
}
- z = gettemp("");
+ z = gettemp();
switch (n) {
case ADD:
i += j;
@@ -996,18 +1177,20 @@ arith(Node **a, int n)
break;
case DIVIDE:
if (j == 0)
- ERROR "division by zero" FATAL;
+ FATAL("division by zero");
i /= j;
break;
case MOD:
if (j == 0)
- ERROR "division by zero in mod" FATAL;
+ FATAL("division by zero in mod");
(void) modf(i/j, &v);
i = i - j * v;
break;
case UMINUS:
i = -i;
break;
+ case UPLUS: /* handled by getfval(), above */
+ break;
case POWER:
if (j >= 0 && modf(j, &v) == 0.0) /* pos integer exponent */
i = ipow(i, (int)j);
@@ -1015,14 +1198,14 @@ arith(Node **a, int n)
i = errcheck(pow(i, j), "pow");
break;
default: /* can't happen */
- ERROR "illegal arithmetic operator %d", n FATAL;
+ FATAL("illegal arithmetic operator %d", n);
}
(void) setfval(z, i);
return (z);
}
static double
-ipow(double x, int n)
+ipow(double x, int n) /* x**n. ought to be done by pow, but isn't always */
{
double v;
@@ -1036,10 +1219,10 @@ ipow(double x, int n)
}
Cell *
-incrdecr(Node **a, int n)
+incrdecr(Node **a, int n) /* a[0]++, etc. */
{
- register Cell *x, *z;
- register int k;
+ Cell *x, *z;
+ int k;
Awkfloat xf;
x = execute(a[0]);
@@ -1049,34 +1232,42 @@ incrdecr(Node **a, int n)
(void) setfval(x, xf + k);
return (x);
}
- z = gettemp("");
+ z = gettemp();
(void) setfval(z, xf);
(void) setfval(x, xf + k);
- tempfree(x, "");
+ tempfree(x);
return (z);
}
+/* a[0] = a[1], a[0] += a[1], etc. */
+/* this is subtle; don't muck with it. */
Cell *
assign(Node **a, int n)
{
- register Cell *x, *y;
+ Cell *x, *y;
Awkfloat xf, yf;
double v;
y = execute(a[1]);
x = execute(a[0]); /* order reversed from before... */
if (n == ASSIGN) { /* ordinary assignment */
- if ((y->tval & (STR|NUM)) == (STR|NUM)) {
+ /*LINTED if*/
+ if (x == y && !(x->tval & (FLD|REC)) && x != nfloc) {
+ /*
+ * If this is a self-assignment, we leave things alone,
+ * unless it's a field or NF.
+ */
+ } else if ((y->tval & (STR|NUM)) == (STR|NUM)) {
(void) setsval(x, getsval(y));
x->fval = getfval(y);
x->tval |= NUM;
- } else if (y->tval & STR)
+ } else if (isstr(y))
(void) setsval(x, getsval(y));
- else if (y->tval & NUM)
+ else if (isnum(y))
(void) setfval(x, getfval(y));
else
funnyvar(y, "read value of");
- tempfree(y, "");
+ tempfree(y);
return (x);
}
xf = getfval(x);
@@ -1093,12 +1284,12 @@ assign(Node **a, int n)
break;
case DIVEQ:
if (yf == 0)
- ERROR "division by zero in /=" FATAL;
+ FATAL("division by zero in /=");
xf /= yf;
break;
case MODEQ:
if (yf == 0)
- ERROR "division by zero in %%=" FATAL;
+ FATAL("division by zero in %%=");
(void) modf(xf/yf, &v);
xf = xf - yf * v;
break;
@@ -1109,55 +1300,55 @@ assign(Node **a, int n)
xf = errcheck(pow(xf, yf), "pow");
break;
default:
- ERROR "illegal assignment operator %d", n FATAL;
+ FATAL("illegal assignment operator %d", n);
break;
}
- tempfree(y, "");
+ tempfree(y);
(void) setfval(x, xf);
return (x);
}
/*ARGSUSED*/
Cell *
-cat(Node **a, int q)
+cat(Node **a, int q) /* a[0] cat a[1] */
{
- register Cell *x, *y, *z;
- register int n1, n2;
- register uchar *s;
+ Cell *x, *y, *z;
+ int n1, n2;
+ char *s = NULL;
+ size_t ssz = 0;
x = execute(a[0]);
+ n1 = strlen(getsval(x));
+ (void) adjbuf(&s, &ssz, n1 + 1, recsize, 0, "cat1");
+ (void) strncpy(s, x->sval, ssz);
+
y = execute(a[1]);
- (void) getsval(x);
- (void) getsval(y);
- n1 = strlen((char *)x->sval);
- n2 = strlen((char *)y->sval);
- s = (uchar *)malloc(n1 + n2 + 1);
- if (s == NULL) {
- ERROR "out of space concatenating %.15s and %.15s",
- x->sval, y->sval FATAL;
- }
- (void) strcpy((char *)s, (char *)x->sval);
- (void) strcpy((char *)s + n1, (char *)y->sval);
- tempfree(y, "");
- z = gettemp("");
+ n2 = strlen(getsval(y));
+ (void) adjbuf(&s, &ssz, n1 + n2 + 1, recsize, 0, "cat2");
+ (void) strncpy(s + n1, y->sval, ssz - n1);
+
+ tempfree(x);
+ tempfree(y);
+
+ z = gettemp();
z->sval = s;
z->tval = STR;
- tempfree(x, "");
+
return (z);
}
/*ARGSUSED*/
Cell *
-pastat(Node **a, int n)
+pastat(Node **a, int n) /* a[0] { a[1] } */
{
- register Cell *x;
+ Cell *x;
- if (a[0] == 0)
+ if (a[0] == NULL)
x = execute(a[1]);
else {
x = execute(a[0]);
if (istrue(x)) {
- tempfree(x, "");
+ tempfree(x);
x = execute(a[1]);
}
}
@@ -1166,73 +1357,83 @@ pastat(Node **a, int n)
/*ARGSUSED*/
Cell *
-dopa2(Node **a, int n)
+dopa2(Node **a, int n) /* a[0], a[1] { a[2] } */
{
Cell *x;
int pair;
- static int *pairstack = NULL;
if (!pairstack) {
/* first time */
dprintf(("paircnt: %d\n", paircnt));
- pairstack = (int *)malloc(sizeof (int) * paircnt);
- if (!pairstack)
- ERROR "out of space in dopa2" FATAL;
- (void) memset(pairstack, 0, sizeof (int) * paircnt);
+ pairstack = (int *)calloc(paircnt, sizeof (int));
+ if (pairstack == NULL)
+ FATAL("out of space in dopa2");
}
- pair = (int)a[3];
+ pair = ptoi(a[3]);
if (pairstack[pair] == 0) {
x = execute(a[0]);
if (istrue(x))
pairstack[pair] = 1;
- tempfree(x, "");
+ tempfree(x);
}
if (pairstack[pair] == 1) {
x = execute(a[1]);
if (istrue(x))
pairstack[pair] = 0;
- tempfree(x, "");
+ tempfree(x);
x = execute(a[2]);
return (x);
}
- return (false);
+ return (False);
}
/*ARGSUSED*/
Cell *
-split(Node **a, int nnn)
+split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */
{
- Cell *x, *y, *ap;
- register uchar *s;
- register int sep;
- uchar *t, temp, num[11], *fs;
- int n, tempstat;
+ Cell *x = NULL, *y, *ap;
+ char *s, *origs;
+ char *fs, *origfs = NULL;
+ int sep;
+ char *t, temp, num[50];
+ int n, tempstat, arg3type;
y = execute(a[0]); /* source string */
- s = getsval(y);
- if (a[2] == 0) /* fs string */
- fs = *FS;
- else if ((int)a[3] == STRING) { /* split(str,arr,"string") */
+ origs = s = tostring(getsval(y));
+ arg3type = ptoi(a[3]);
+ if (a[2] == NULL) /* fs string */
+ fs = getsval(fsloc);
+ else if (arg3type == STRING) { /* split(str,arr,"string") */
x = execute(a[2]);
- fs = getsval(x);
- } else if ((int)a[3] == REGEXPR)
- fs = (uchar *)"(regexpr)"; /* split(str,arr,/regexpr/) */
+ origfs = fs = tostring(getsval(x));
+ tempfree(x);
+ } else if (arg3type == REGEXPR)
+ fs = "(regexpr)"; /* split(str,arr,/regexpr/) */
else
- ERROR "illegal type of split()" FATAL;
+ FATAL("illegal type of split");
sep = *fs;
ap = execute(a[1]); /* array name */
freesymtab(ap);
- dprintf(("split: s=|%s|, a=%s, sep=|%s|\n", s, ap->nval, fs));
+ dprintf(("split: s=|%s|, a=%s, sep=|%s|\n", s, NN(ap->nval), fs));
ap->tval &= ~STR;
ap->tval |= ARR;
- ap->sval = (uchar *)makesymtab(NSYMTAB);
+ ap->sval = (char *)makesymtab(NSYMTAB);
n = 0;
- if (*s != '\0' && strlen((char *)fs) > 1 || (int)a[3] == REGEXPR) {
+ if (arg3type == REGEXPR && strlen((char *)((fa*)a[2])->restr) == 0) {
+ /*
+ * split(s, a, //); have to arrange things such that it looks
+ * like an empty separator.
+ */
+ arg3type = 0;
+ fs = "";
+ sep = 0;
+ }
+ if (*s != '\0' && (strlen(fs) > 1 || arg3type == REGEXPR)) {
/* reg expr */
fa *pfa;
- if ((int)a[3] == REGEXPR) { /* it's ready already */
+ if (arg3type == REGEXPR) { /* it's ready already */
pfa = (fa *)a[2];
} else {
pfa = makedfa(fs, 1);
@@ -1242,12 +1443,12 @@ split(Node **a, int nnn)
pfa->initstat = 2;
do {
n++;
- (void) sprintf((char *)num, "%d", n);
+ (void) sprintf(num, "%d", n);
temp = *patbeg;
*patbeg = '\0';
if (is_number(s)) {
(void) setsymtab(num, s,
- atof((char *)s),
+ atof(s),
/*LINTED align*/
STR|NUM, (Array *)ap->sval);
} else {
@@ -1259,19 +1460,22 @@ split(Node **a, int nnn)
s = patbeg + patlen;
if (*(patbeg+patlen-1) == 0 || *s == 0) {
n++;
- (void) sprintf((char *)num, "%d", n);
- (void) setsymtab(num, (uchar *)"", 0.0,
+ (void) sprintf(num, "%d", n);
+ (void) setsymtab(num, "", 0.0,
/*LINTED align*/
STR, (Array *)ap->sval);
pfa->initstat = tempstat;
goto spdone;
}
} while (nematch(pfa, s));
+ /* bwk: has to be here to reset */
+ /* cf gsub and refldbld */
+ pfa->initstat = tempstat;
}
n++;
- (void) sprintf((char *)num, "%d", n);
+ (void) sprintf(num, "%d", n);
if (is_number(s)) {
- (void) setsymtab(num, s, atof((char *)s),
+ (void) setsymtab(num, s, atof(s),
/*LINTED align*/
STR|NUM, (Array *)ap->sval);
} else {
@@ -1284,7 +1488,7 @@ spdone:
for (n = 0; ; ) {
while (*s == ' ' || *s == '\t' || *s == '\n')
s++;
- if (*s == 0)
+ if (*s == '\0')
break;
n++;
t = s;
@@ -1295,9 +1499,9 @@ spdone:
;
temp = *s;
*s = '\0';
- (void) sprintf((char *)num, "%d", n);
+ (void) sprintf(num, "%d", n);
if (is_number(t)) {
- (void) setsymtab(num, t, atof((char *)t),
+ (void) setsymtab(num, t, atof(t),
/*LINTED align*/
STR|NUM, (Array *)ap->sval);
} else {
@@ -1306,10 +1510,27 @@ spdone:
STR, (Array *)ap->sval);
}
*s = temp;
- if (*s != 0)
+ if (*s != '\0')
s++;
}
- } else if (*s != 0) {
+ } else if (sep == '\0') { /* split(s, a, "") => 1 char/elem */
+ for (n = 0; *s != 0; s++) {
+ char buf[2];
+ n++;
+ (void) sprintf(num, "%d", n);
+ buf[0] = *s;
+ buf[1] = '\0';
+ if (isdigit((uschar)buf[0])) {
+ (void) setsymtab(num, buf, atof(buf),
+ /*LINTED align*/
+ STR|NUM, (Array *)ap->sval);
+ } else {
+ (void) setsymtab(num, buf, 0.0,
+ /*LINTED align*/
+ STR, (Array *)ap->sval);
+ }
+ }
+ } else if (*s != '\0') {
for (;;) {
n++;
t = s;
@@ -1317,9 +1538,9 @@ spdone:
s++;
temp = *s;
*s = '\0';
- (void) sprintf((char *)num, "%d", n);
+ (void) sprintf(num, "%d", n);
if (is_number(t)) {
- (void) setsymtab(num, t, atof((char *)t),
+ (void) setsymtab(num, t, atof(t),
/*LINTED align*/
STR|NUM, (Array *)ap->sval);
} else {
@@ -1328,15 +1549,15 @@ spdone:
STR, (Array *)ap->sval);
}
*s = temp;
- if (*s++ == 0)
+ if (*s++ == '\0')
break;
}
}
- tempfree(ap, "");
- tempfree(y, "");
- if (a[2] != 0 && (int)a[3] == STRING)
- tempfree(x, "");
- x = gettemp("");
+ tempfree(ap);
+ tempfree(y);
+ free(origs);
+ free(origfs);
+ x = gettemp();
x->tval = NUM;
x->fval = n;
return (x);
@@ -1344,16 +1565,16 @@ spdone:
/*ARGSUSED*/
Cell *
-condexpr(Node **a, int n)
+condexpr(Node **a, int n) /* a[0] ? a[1] : a[2] */
{
- register Cell *x;
+ Cell *x;
x = execute(a[0]);
if (istrue(x)) {
- tempfree(x, "");
+ tempfree(x);
x = execute(a[1]);
} else {
- tempfree(x, "");
+ tempfree(x);
x = execute(a[2]);
}
return (x);
@@ -1361,16 +1582,16 @@ condexpr(Node **a, int n)
/*ARGSUSED*/
Cell *
-ifstat(Node **a, int n)
+ifstat(Node **a, int n) /* if (a[0]) a[1]; else a[2] */
{
- register Cell *x;
+ Cell *x;
x = execute(a[0]);
if (istrue(x)) {
- tempfree(x, "");
+ tempfree(x);
x = execute(a[1]);
- } else if (a[2] != 0) {
- tempfree(x, "");
+ } else if (a[2] != NULL) {
+ tempfree(x);
x = execute(a[2]);
}
return (x);
@@ -1378,123 +1599,139 @@ ifstat(Node **a, int n)
/*ARGSUSED*/
Cell *
-whilestat(Node **a, int n)
+whilestat(Node **a, int n) /* while (a[0]) a[1] */
{
- register Cell *x;
+ Cell *x;
for (;;) {
x = execute(a[0]);
if (!istrue(x))
return (x);
- tempfree(x, "");
+ tempfree(x);
x = execute(a[1]);
if (isbreak(x)) {
- x = true;
+ x = True;
return (x);
}
if (isnext(x) || isexit(x) || isret(x))
return (x);
- tempfree(x, "");
+ tempfree(x);
}
}
/*ARGSUSED*/
Cell *
-dostat(Node **a, int n)
+dostat(Node **a, int n) /* do a[0]; while(a[1]) */
{
- register Cell *x;
+ Cell *x;
for (;;) {
x = execute(a[0]);
if (isbreak(x))
- return (true);
+ return (True);
if (isnext(x) || isexit(x) || isret(x))
return (x);
- tempfree(x, "");
+ tempfree(x);
x = execute(a[1]);
if (!istrue(x))
return (x);
- tempfree(x, "");
+ tempfree(x);
}
}
/*ARGSUSED*/
Cell *
-forstat(Node **a, int n)
+forstat(Node **a, int n) /* for (a[0]; a[1]; a[2]) a[3] */
{
- register Cell *x;
+ Cell *x;
x = execute(a[0]);
- tempfree(x, "");
+ tempfree(x);
for (;;) {
- if (a[1] != 0) {
+ if (a[1] != NULL) {
x = execute(a[1]);
if (!istrue(x))
return (x);
else
- tempfree(x, "");
+ tempfree(x);
}
x = execute(a[3]);
if (isbreak(x)) /* turn off break */
- return (true);
+ return (True);
if (isnext(x) || isexit(x) || isret(x))
return (x);
- tempfree(x, "");
+ tempfree(x);
x = execute(a[2]);
- tempfree(x, "");
+ tempfree(x);
}
}
/*ARGSUSED*/
Cell *
-instat(Node **a, int n)
+instat(Node **a, int n) /* for (a[0] in a[1]) a[2] */
{
- register Cell *x, *vp, *arrayp, *cp, *ncp;
+ Cell *x, *vp, *arrayp, *cp, *ncp;
Array *tp;
int i;
vp = execute(a[0]);
arrayp = execute(a[1]);
- if (!isarr(arrayp))
- ERROR "%s is not an array", arrayp->nval FATAL;
+ if (!isarr(arrayp)) {
+ dprintf(("making %s into an array\n", arrayp->nval));
+ if (freeable(arrayp))
+ xfree(arrayp->sval);
+ arrayp->tval &= ~(STR|NUM|DONTFREE);
+ arrayp->tval |= ARR;
+ arrayp->sval = (char *)makesymtab(NSYMTAB);
+ }
/*LINTED align*/
tp = (Array *)arrayp->sval;
- tempfree(arrayp, "");
+ tempfree(arrayp);
for (i = 0; i < tp->size; i++) { /* this routine knows too much */
for (cp = tp->tab[i]; cp != NULL; cp = ncp) {
(void) setsval(vp, cp->nval);
ncp = cp->cnext;
x = execute(a[2]);
if (isbreak(x)) {
- tempfree(vp, "");
- return (true);
+ tempfree(vp);
+ return (True);
}
if (isnext(x) || isexit(x) || isret(x)) {
- tempfree(vp, "");
+ tempfree(vp);
return (x);
}
- tempfree(x, "");
+ tempfree(x);
}
}
- return (true);
+ return (True);
}
/*ARGSUSED*/
Cell *
-bltin(Node **a, int n)
+bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg list */
{
- register Cell *x, *y;
+ Cell *x, *y;
Awkfloat u;
- register int t;
- uchar *p, *buf;
+ int t;
+ Awkfloat tmp;
+ char *p, *buf;
Node *nextarg;
+ FILE *fp;
+ void flush_all(void);
+ int status = 0;
- t = (int)a[0];
+ t = ptoi(a[0]);
x = execute(a[1]);
nextarg = a[1]->nnext;
switch (t) {
case FLENGTH:
- u = (Awkfloat)strlen((char *)getsval(x)); break;
+ if (isarr(x)) {
+ /* LINTED align */
+ u = ((Array *)x->sval)->nelem;
+ } else {
+ u = strlen(getsval(x));
+ }
+ break;
case FLOG:
u = errcheck(log(getfval(x)), "log"); break;
case FINT:
@@ -1508,60 +1745,82 @@ bltin(Node **a, int n)
case FCOS:
u = cos(getfval(x)); break;
case FATAN:
- if (nextarg == 0) {
- ERROR "atan2 requires two arguments; returning 1.0"
- WARNING;
+ if (nextarg == NULL) {
+ WARNING("atan2 requires two arguments; returning 1.0");
u = 1.0;
} else {
y = execute(a[1]->nnext);
u = atan2(getfval(x), getfval(y));
- tempfree(y, "");
+ tempfree(y);
nextarg = nextarg->nnext;
}
break;
case FSYSTEM:
/* in case something is buffered already */
(void) fflush(stdout);
- /* 256 is unix-dep */
- u = (Awkfloat)system((char *)getsval(x)) / 256;
+ status = system(getsval(x));
+ u = status;
+ if (status != -1) {
+ if (WIFEXITED(status)) {
+ u = WEXITSTATUS(status);
+ } else if (WIFSIGNALED(status)) {
+ u = WTERMSIG(status) + 256;
+ if (WCOREDUMP(status))
+ u += 256;
+ } else /* something else?!? */
+ u = 0;
+ }
break;
case FRAND:
- u = (Awkfloat)(rand() % 32767) / 32767.0;
+ /* in principle, rand() returns something in 0..RAND_MAX */
+ u = (Awkfloat) (rand() % RAND_MAX) / RAND_MAX;
break;
case FSRAND:
- if (x->tval & REC) /* no argument provided */
+ if (isrec(x)) /* no argument provided */
u = time((time_t *)0);
else
u = getfval(x);
- srand((int)u); u = (int)u;
+ tmp = u;
+ srand((unsigned int) u);
+ u = srand_seed;
+ srand_seed = tmp;
break;
case FTOUPPER:
case FTOLOWER:
buf = tostring(getsval(x));
if (t == FTOUPPER) {
for (p = buf; *p; p++)
- if (islower(*p))
- *p = toupper(*p);
+ if (islower((uschar)*p))
+ *p = toupper((uschar)*p);
} else {
for (p = buf; *p; p++)
- if (isupper(*p))
- *p = tolower(*p);
+ if (isupper((uschar)*p))
+ *p = tolower((uschar)*p);
}
- tempfree(x, "");
- x = gettemp("");
+ tempfree(x);
+ x = gettemp();
(void) setsval(x, buf);
free(buf);
return (x);
+ case FFLUSH:
+ if (isrec(x) || strlen(getsval(x)) == 0) {
+ flush_all(); /* fflush() or fflush("") -> all */
+ u = 0;
+ } else if ((fp = openfile(FFLUSH, getsval(x))) == NULL)
+ u = EOF;
+ else
+ u = fflush(fp);
+ break;
default: /* can't happen */
- ERROR "illegal function type %d", t FATAL;
+ FATAL("illegal function type %d", t);
break;
}
- tempfree(x, "");
- x = gettemp("");
+ tempfree(x);
+ x = gettemp();
(void) setfval(x, u);
- if (nextarg != 0) {
- ERROR "warning: function has too many arguments" WARNING;
- for (; nextarg; nextarg = nextarg->nnext)
+ if (nextarg != NULL) {
+ WARNING("warning: function has too many arguments");
+ for (; nextarg != NULL; nextarg = nextarg->nnext)
(void) execute(nextarg);
}
return (x);
@@ -1569,28 +1828,30 @@ bltin(Node **a, int n)
/*ARGSUSED*/
Cell *
-print(Node **a, int n)
+printstat(Node **a, int n) /* print a[0] */
{
- register Node *x;
- register Cell *y;
+ Node *x;
+ Cell *y;
FILE *fp;
- if (a[1] == 0)
+ if (a[1] == NULL) /* a[1] is redirection operator, a[2] is file */
fp = stdout;
else
- fp = redirect((int)a[1], a[2]);
+ fp = redirect(ptoi(a[1]), a[2]);
for (x = a[0]; x != NULL; x = x->nnext) {
y = execute(x);
- (void) fputs((char *)getsval(y), fp);
- tempfree(y, "");
+ (void) fputs(getpssval(y), fp);
+ tempfree(y);
if (x->nnext == NULL)
- (void) fputs((char *)*ORS, fp);
+ (void) fputs(getsval(orsloc), fp);
else
- (void) fputs((char *)*OFS, fp);
+ (void) fputs(getsval(ofsloc), fp);
}
- if (a[1] != 0)
+ if (a[1] != NULL)
(void) fflush(fp);
- return (true);
+ if (ferror(fp))
+ FATAL("write error on %s", filename(fp));
+ return (True);
}
/*ARGSUSED*/
@@ -1600,67 +1861,100 @@ nullproc(Node **a, int n)
return (0);
}
-struct {
- FILE *fp;
- uchar *fname;
- int mode; /* '|', 'a', 'w' */
-} files[FOPEN_MAX];
static FILE *
-redirect(int a, Node *b)
+redirect(int a, Node *b) /* set up all i/o redirections */
{
FILE *fp;
Cell *x;
- uchar *fname;
+ char *fname;
x = execute(b);
fname = getsval(x);
fp = openfile(a, fname);
if (fp == NULL)
- ERROR "can't open file %s", fname FATAL;
- tempfree(x, "");
+ FATAL("can't open file %s", fname);
+ tempfree(x);
return (fp);
}
+struct files {
+ FILE *fp;
+ const char *fname;
+ int mode; /* '|', 'a', 'w' => LE/LT, GT */
+} *files;
+
+int nfiles;
+
+void
+stdinit(void) /* in case stdin, etc., are not constants */
+{
+ nfiles = FOPEN_MAX;
+ files = calloc(nfiles, sizeof (*files));
+ if (files == NULL)
+ FATAL("can't allocate file memory for %u files", nfiles);
+ files[0].fp = stdin;
+ files[0].fname = "/dev/stdin";
+ files[0].mode = LT;
+ files[1].fp = stdout;
+ files[1].fname = "/dev/stdout";
+ files[1].mode = GT;
+ files[2].fp = stderr;
+ files[2].fname = "/dev/stderr";
+ files[2].mode = GT;
+}
+
static FILE *
-openfile(int a, uchar *s)
+openfile(int a, const char *s)
{
- register int i, m;
- register FILE *fp;
+ int i, m;
+ FILE *fp = NULL;
if (*s == '\0')
- ERROR "null file name in print or getline" FATAL;
- for (i = 0; i < FOPEN_MAX; i++) {
- if (files[i].fname &&
- strcmp((char *)s, (char *)files[i].fname) == 0) {
+ FATAL("null file name in print or getline");
+ for (i = 0; i < nfiles; i++) {
+ if (files[i].fname && strcmp(s, files[i].fname) == 0) {
if (a == files[i].mode ||
- a == APPEND && files[i].mode == GT) {
+ (a == APPEND && files[i].mode == GT)) {
return (files[i].fp);
}
+ if (a == FFLUSH)
+ return (files[i].fp);
}
}
- for (i = 0; i < FOPEN_MAX; i++) {
+ if (a == FFLUSH) /* didn't find it, so don't create it! */
+ return (NULL);
+
+ for (i = 0; i < nfiles; i++) {
if (files[i].fp == 0)
break;
}
- if (i >= FOPEN_MAX)
- ERROR "%s makes too many open files", s FATAL;
+ if (i >= nfiles) {
+ struct files *nf;
+ int nnf = nfiles + FOPEN_MAX;
+ nf = realloc(files, nnf * sizeof (*nf));
+ if (nf == NULL)
+ FATAL("cannot grow files for %s and %d files", s, nnf);
+ (void) memset(&nf[nfiles], 0, FOPEN_MAX * sizeof (*nf));
+ nfiles = nnf;
+ files = nf;
+ }
(void) fflush(stdout); /* force a semblance of order */
m = a;
if (a == GT) {
- fp = fopen((char *)s, "w");
+ fp = fopen(s, "wF");
} else if (a == APPEND) {
- fp = fopen((char *)s, "a");
+ fp = fopen(s, "aF");
m = GT; /* so can mix > and >> */
} else if (a == '|') { /* output pipe */
- fp = popen((char *)s, "w");
+ fp = popen(s, "wF");
} else if (a == LE) { /* input pipe */
- fp = popen((char *)s, "r");
+ fp = popen(s, "rF");
} else if (a == LT) { /* getline <file */
- fp = strcmp((char *)s, "-") == 0 ?
- stdin : fopen((char *)s, "r"); /* "-" is stdin */
+ fp = strcmp(s, "-") == 0 ?
+ stdin : fopen(s, "rF"); /* "-" is stdin */
} else /* can't happen */
- ERROR "illegal redirection" FATAL;
+ FATAL("illegal redirection %d", a);
if (fp != NULL) {
files[i].fname = tostring(s);
files[i].fp = fp;
@@ -1669,38 +1963,52 @@ openfile(int a, uchar *s)
return (fp);
}
+const char *
+filename(FILE *fp)
+{
+ int i;
+
+ for (i = 0; i < nfiles; i++)
+ if (fp == files[i].fp)
+ return (files[i].fname);
+ return ("???");
+}
+
/*ARGSUSED*/
Cell *
closefile(Node **a, int n)
{
- register Cell *x;
+ Cell *x;
int i, stat;
x = execute(a[0]);
(void) getsval(x);
- for (i = 0; i < FOPEN_MAX; i++) {
- if (files[i].fname &&
- strcmp((char *)x->sval, (char *)files[i].fname) == 0) {
+ stat = -1;
+ for (i = 0; i < nfiles; i++) {
+ if (files[i].fname && strcmp(x->sval, files[i].fname) == 0) {
if (ferror(files[i].fp)) {
- ERROR "i/o error occurred on %s",
- files[i].fname WARNING;
+ WARNING("i/o error occurred on %s",
+ files[i].fname);
}
if (files[i].mode == '|' || files[i].mode == LE)
stat = pclose(files[i].fp);
else
stat = fclose(files[i].fp);
if (stat == EOF) {
- ERROR "i/o error occurred closing %s",
- files[i].fname WARNING;
+ WARNING("i/o error occurred closing %s",
+ files[i].fname);
}
- xfree(files[i].fname);
+ if (i > 2) /* don't do /dev/std... */
+ xfree(files[i].fname);
/* watch out for ref thru this */
files[i].fname = NULL;
files[i].fp = NULL;
}
}
- tempfree(x, "close");
- return (true);
+ tempfree(x);
+ x = gettemp();
+ (void) setfval(x, (Awkfloat) stat);
+ return (x);
}
static void
@@ -1708,197 +2016,257 @@ closeall(void)
{
int i, stat;
- for (i = 0; i < FOPEN_MAX; i++) {
+ for (i = 0; i < nfiles; i++) {
if (files[i].fp) {
if (ferror(files[i].fp)) {
- ERROR "i/o error occurred on %s",
- files[i].fname WARNING;
+ WARNING("i/o error occurred on %s",
+ files[i].fname);
}
if (files[i].mode == '|' || files[i].mode == LE)
stat = pclose(files[i].fp);
else
stat = fclose(files[i].fp);
if (stat == EOF) {
- ERROR "i/o error occurred while closing %s",
- files[i].fname WARNING;
+ WARNING("i/o error occurred while closing %s",
+ files[i].fname);
}
}
}
}
+void
+flush_all(void)
+{
+ int i;
+
+ for (i = 0; i < nfiles; i++)
+ if (files[i].fp)
+ (void) fflush(files[i].fp);
+}
+
/*ARGSUSED*/
Cell *
-sub(Node **a, int nnn)
+sub(Node **a, int nnn) /* substitute command */
{
- register uchar *sptr;
- register Cell *x, *y, *result;
- uchar *buf, *t;
+ char *sptr, *pb, *q;
+ Cell *x, *y, *result;
+ char *t, *buf;
fa *pfa;
- size_t bsize, cnt, len;
+ size_t bufsz = recsize;
+ if ((buf = (char *)malloc(bufsz)) == NULL)
+ FATAL("out of memory in sub");
x = execute(a[3]); /* target string */
t = getsval(x);
- if (a[0] == 0)
+ if (a[0] == NULL) /* 0 => a[1] is already-compiled regexpr */
pfa = (fa *)a[1]; /* regular expression */
else {
y = execute(a[1]);
pfa = makedfa(getsval(y), 1);
- tempfree(y, "");
+ tempfree(y);
}
y = execute(a[2]); /* replacement string */
- result = false;
+ result = False;
if (pmatch(pfa, t)) {
- init_buf(&buf, &bsize, LINE_INCR);
- cnt = 0;
sptr = t;
- len = patbeg - sptr;
- if (len > 0) {
- expand_buf(&buf, &bsize, cnt + len);
- (void) memcpy(buf, sptr, len);
- cnt += len;
- }
+ (void) adjbuf(&buf, &bufsz,
+ 1 + patbeg - sptr, recsize, 0, "sub");
+ pb = buf;
+ while (sptr < patbeg)
+ *pb++ = *sptr++;
sptr = getsval(y);
- while (*sptr != 0) {
- expand_buf(&buf, &bsize, cnt);
- if (*sptr == '\\' &&
- (*(sptr+1) == '&' || *(sptr+1) == '\\')) {
- sptr++; /* skip \, */
- buf[cnt++] = *sptr++; /* add & or \ */
+ while (*sptr != '\0') {
+ (void) adjbuf(&buf, &bufsz, 5 + pb - buf,
+ recsize, &pb, "sub");
+ if (*sptr == '\\') {
+ backsub(&pb, &sptr);
} else if (*sptr == '&') {
- expand_buf(&buf, &bsize, cnt + patlen);
sptr++;
- (void) memcpy(&buf[cnt], patbeg, patlen);
- cnt += patlen;
+ (void) adjbuf(&buf, &bufsz,
+ 1 + patlen + pb - buf, recsize, &pb, "sub");
+ for (q = patbeg; q < patbeg+patlen; )
+ *pb++ = *q++;
} else {
- buf[cnt++] = *sptr++;
+ *pb++ = *sptr++;
}
}
+ *pb = '\0';
+ if (pb > buf + bufsz)
+ FATAL("sub result1 %.30s too big; can't happen", buf);
sptr = patbeg + patlen;
if ((patlen == 0 && *patbeg) || (patlen && *(sptr-1))) {
- len = strlen((char *)sptr);
- expand_buf(&buf, &bsize, cnt + len);
- (void) memcpy(&buf[cnt], sptr, len);
- cnt += len;
+ (void) adjbuf(&buf, &bufsz,
+ 1 + strlen(sptr) + pb - buf, 0, &pb, "sub");
+ while ((*pb++ = *sptr++) != '\0')
+ ;
}
- buf[cnt] = '\0';
- (void) setsval(x, buf);
- free(buf);
- result = true;
+ if (pb > buf + bufsz)
+ FATAL("sub result2 %.30s too big; can't happen", buf);
+ (void) setsval(x, buf); /* BUG: should be able to avoid copy */
+ result = True;
}
- tempfree(x, "");
- tempfree(y, "");
+ tempfree(x);
+ tempfree(y);
+ free(buf);
return (result);
}
/*ARGSUSED*/
Cell *
-gsub(Node **a, int nnn)
+gsub(Node **a, int nnn) /* global substitute */
{
- register Cell *x, *y;
- register uchar *rptr, *sptr, *t;
- uchar *buf;
- register fa *pfa;
+ Cell *x, *y;
+ char *rptr, *sptr, *t, *pb, *q;
+ char *buf;
+ fa *pfa;
int mflag, tempstat, num;
- size_t bsize, cnt, len;
+ size_t bufsz = recsize;
+ if ((buf = (char *)malloc(bufsz)) == NULL)
+ FATAL("out of memory in gsub");
mflag = 0; /* if mflag == 0, can replace empty string */
num = 0;
x = execute(a[3]); /* target string */
t = getsval(x);
- if (a[0] == 0)
- pfa = (fa *) a[1]; /* regular expression */
+ if (a[0] == NULL) /* 0 => a[1] is already-compiled regexpr */
+ pfa = (fa *)a[1]; /* regular expression */
else {
y = execute(a[1]);
pfa = makedfa(getsval(y), 1);
- tempfree(y, "");
+ tempfree(y);
}
y = execute(a[2]); /* replacement string */
if (pmatch(pfa, t)) {
tempstat = pfa->initstat;
pfa->initstat = 2;
- init_buf(&buf, &bsize, LINE_INCR);
+ pb = buf;
rptr = getsval(y);
- cnt = 0;
do {
- if (patlen == 0 && *patbeg != 0) {
+ if (patlen == 0 && *patbeg != '\0') {
/* matched empty string */
if (mflag == 0) { /* can replace empty */
num++;
sptr = rptr;
- while (*sptr != 0) {
- expand_buf(&buf, &bsize, cnt);
- if (*sptr == '\\' &&
- (*(sptr+1) == '&' ||
- *(sptr+1) == '\\')) {
- sptr++;
- buf[cnt++] = *sptr++;
+ while (*sptr != '\0') {
+ (void) adjbuf(&buf, &bufsz,
+ 5 + pb - buf, recsize,
+ &pb, "gsub");
+ if (*sptr == '\\') {
+ backsub(&pb, &sptr);
} else if (*sptr == '&') {
- expand_buf(&buf,
- &bsize,
- cnt + patlen);
sptr++;
- (void) memcpy(&buf[cnt],
- patbeg, patlen);
- cnt += patlen;
+ (void) adjbuf(&buf,
+ &bufsz,
+ 1+patlen+pb-buf,
+ recsize,
+ &pb, "gsub");
+ for (
+ q = patbeg;
+ q < patbeg+patlen;
+ *pb++ = *q++)
+ ;
} else {
- buf[cnt++] = *sptr++;
+ *pb++ = *sptr++;
}
}
}
- if (*t == 0) /* at end */
+ if (*t == '\0') /* at end */
goto done;
- expand_buf(&buf, &bsize, cnt);
- buf[cnt++] = *t++;
+ (void) adjbuf(&buf, &bufsz,
+ 2 + pb - buf, recsize, &pb, "gsub");
+ *pb++ = *t++;
+ /* BUG: not sure of this test */
+ if (pb > buf + bufsz)
+ FATAL("gsub result0 %.30s too big; "
+ "can't happen", buf);
mflag = 0;
} else { /* matched nonempty string */
num++;
sptr = t;
- len = patbeg - sptr;
- if (len > 0) {
- expand_buf(&buf, &bsize, cnt + len);
- (void) memcpy(&buf[cnt], sptr, len);
- cnt += len;
- }
+ (void) adjbuf(&buf, &bufsz,
+ 1 + (patbeg - sptr) + pb - buf,
+ recsize, &pb, "gsub");
+ while (sptr < patbeg)
+ *pb++ = *sptr++;
sptr = rptr;
- while (*sptr != 0) {
- expand_buf(&buf, &bsize, cnt);
- if (*sptr == '\\' &&
- (*(sptr+1) == '&' ||
- *(sptr+1) == '\\')) {
- sptr++;
- buf[cnt++] = *sptr++;
+ while (*sptr != '\0') {
+ (void) adjbuf(&buf, &bufsz,
+ 5 + pb - buf, recsize, &pb, "gsub");
+ if (*sptr == '\\') {
+ backsub(&pb, &sptr);
} else if (*sptr == '&') {
- expand_buf(&buf, &bsize,
- cnt + patlen);
sptr++;
- (void) memcpy(&buf[cnt],
- patbeg, patlen);
- cnt += patlen;
+ (void) adjbuf(&buf, &bufsz,
+ 1 + patlen + pb - buf,
+ recsize, &pb, "gsub");
+ for (
+ q = patbeg;
+ q < patbeg+patlen;
+ *pb++ = *q++)
+ ;
} else {
- buf[cnt++] = *sptr++;
+ *pb++ = *sptr++;
}
}
t = patbeg + patlen;
- if ((*(t-1) == 0) || (*t == 0))
+ if (patlen == 0 || *(t-1) == '\0' || *t == '\0')
goto done;
+ if (pb > buf + bufsz)
+ FATAL("gsub result1 %.30s too big; "
+ "can't happen", buf);
mflag = 1;
}
} while (pmatch(pfa, t));
sptr = t;
- len = strlen((char *)sptr);
- expand_buf(&buf, &bsize, len + cnt);
- (void) memcpy(&buf[cnt], sptr, len);
- cnt += len;
+ (void) adjbuf(&buf, &bufsz,
+ 1 + strlen(sptr) + pb - buf, 0, &pb, "gsub");
+ while ((*pb++ = *sptr++) != '\0')
+ ;
done:
- buf[cnt] = '\0';
+ if (pb < buf + bufsz)
+ *pb = '\0';
+ else if (*(pb-1) != '\0')
+ FATAL("gsub result2 %.30s truncated; "
+ "can't happen", buf);
+ /* BUG: should be able to avoid copy + free */
(void) setsval(x, buf);
- free(buf);
pfa->initstat = tempstat;
}
- tempfree(x, "");
- tempfree(y, "");
- x = gettemp("");
+ tempfree(x);
+ tempfree(y);
+ x = gettemp();
x->tval = NUM;
x->fval = num;
+ free(buf);
return (x);
}
+
+/*
+ * handle \\& variations; sptr[0] == '\\'
+ */
+static void
+backsub(char **pb_ptr, char **sptr_ptr)
+{
+ char *pb = *pb_ptr, *sptr = *sptr_ptr;
+
+ if (sptr[1] == '\\') {
+ if (sptr[2] == '\\' && sptr[3] == '&') { /* \\\& -> \& */
+ *pb++ = '\\';
+ *pb++ = '&';
+ sptr += 4;
+ } else if (sptr[2] == '&') { /* \\& -> \ + matched */
+ *pb++ = '\\';
+ sptr += 2;
+ } else { /* \\x -> \\x */
+ *pb++ = *sptr++;
+ *pb++ = *sptr++;
+ }
+ } else if (sptr[1] == '&') { /* literal & */
+ sptr++;
+ *pb++ = *sptr++;
+ } else /* literal \ */
+ *pb++ = *sptr++;
+
+ *pb_ptr = pb;
+ *sptr_ptr = sptr;
+}
diff --git a/usr/src/cmd/awk/tran.c b/usr/src/cmd/awk/tran.c
index e8e42d780e..ba9a685d93 100644
--- a/usr/src/cmd/awk/tran.c
+++ b/usr/src/cmd/awk/tran.c
@@ -1,4 +1,28 @@
/*
+ * Copyright (C) Lucent Technologies 1997
+ * All Rights Reserved
+ *
+ * Permission to use, copy, modify, and distribute this software and
+ * its documentation for any purpose and without fee is hereby
+ * granted, provided that the above copyright notice appear in all
+ * copies and that both that the copyright notice and this
+ * permission notice and warranty disclaimer appear in supporting
+ * documentation, and that the name Lucent Technologies or any of
+ * its entities not be used in advertising or publicity pertaining
+ * to distribution of the software without specific, written prior
+ * permission.
+ *
+ * LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
+ * IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
+ * IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ * ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
+ * THIS SOFTWARE.
+ */
+
+/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
@@ -27,13 +51,12 @@
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
/* All Rights Reserved */
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#define DEBUG
#include <stdio.h>
-#include <stdlib.h>
+#include <math.h>
#include <ctype.h>
#include <string.h>
+#include <stdlib.h>
#include "awk.h"
#include "y.tab.h"
@@ -42,107 +65,114 @@
Array *symtab; /* main symbol table */
-uchar **FS; /* initial field sep */
-uchar **RS; /* initial record sep */
-uchar **OFS; /* output field sep */
-uchar **ORS; /* output record sep */
-uchar **OFMT; /* output format for numbers */
+char **FS; /* initial field sep */
+char **RS; /* initial record sep */
+char **OFS; /* output field sep */
+char **ORS; /* output record sep */
+char **OFMT; /* output format for numbers */
+char **CONVFMT; /* format for conversions in getsval */
Awkfloat *NF; /* number of fields in current record */
Awkfloat *NR; /* number of current record */
Awkfloat *FNR; /* number of current record in current file */
-uchar **FILENAME; /* current filename argument */
+char **FILENAME; /* current filename argument */
Awkfloat *ARGC; /* number of arguments from command line */
-uchar **SUBSEP; /* subscript separator for a[i,j,k]; default \034 */
+char **SUBSEP; /* subscript separator for a[i,j,k]; default \034 */
Awkfloat *RSTART; /* start of re matched with ~; origin 1 (!) */
Awkfloat *RLENGTH; /* length of same */
Cell *recloc; /* location of record */
+Cell *fsloc; /* FS */
Cell *nrloc; /* NR */
Cell *nfloc; /* NF */
Cell *fnrloc; /* FNR */
+Cell *ofsloc; /* OFS */
+Cell *orsloc; /* ORS */
+Cell *rsloc; /* RS */
+Cell *rtloc; /* RT */
Array *ARGVtab; /* symbol table containing ARGV[...] */
Array *ENVtab; /* symbol table containing ENVIRON[...] */
Cell *rstartloc; /* RSTART */
Cell *rlengthloc; /* RLENGTH */
+Cell *subseploc; /* SUBSEP */
Cell *symtabloc; /* SYMTAB */
-Cell *nullloc;
+Cell *nullloc; /* a guaranteed empty cell */
Node *nullnode; /* zero&null, converted into a node for comparisons */
+Cell *literal0;
static void rehash(Array *);
-void
-syminit(void)
+static void
+setfree(Cell *vp)
{
- init_buf(&record, &record_size, LINE_INCR);
+ if (&vp->sval == FS || &vp->sval == RS ||
+ &vp->sval == OFS || &vp->sval == ORS ||
+ &vp->sval == OFMT || &vp->sval == CONVFMT ||
+ &vp->sval == FILENAME || &vp->sval == SUBSEP)
+ vp->tval |= DONTFREE;
+ else
+ vp->tval &= ~DONTFREE;
+}
+void
+syminit(void) /* initialize symbol table with builtin vars */
+{
/* initialize $0 */
- recloc = getfld(0);
- recloc->nval = (uchar *)"$0";
+ recloc = fieldadr(0);
+ recloc->nval = "$0";
recloc->sval = record;
recloc->tval = REC|STR|DONTFREE;
- symtab = makesymtab(NSYMTAB);
- (void) setsymtab((uchar *)"0", (uchar *)"0", 0.0,
- NUM|STR|CON|DONTFREE, symtab);
+ literal0 = setsymtab("0", "0", 0.0, NUM|STR|CON|DONTFREE, symtab);
/* this is used for if(x)... tests: */
- nullloc = setsymtab((uchar *)"$zero&null", (uchar *)"", 0.0,
+ nullloc = setsymtab("$zero&null", "", 0.0,
NUM|STR|CON|DONTFREE, symtab);
- nullnode = valtonode(nullloc, CCON);
- FS = &setsymtab((uchar *)"FS", (uchar *)" ", 0.0,
- STR|DONTFREE, symtab)->sval;
- RS = &setsymtab((uchar *)"RS", (uchar *)"\n", 0.0,
- STR|DONTFREE, symtab)->sval;
- OFS = &setsymtab((uchar *)"OFS", (uchar *)" ", 0.0,
- STR|DONTFREE, symtab)->sval;
- ORS = &setsymtab((uchar *)"ORS", (uchar *)"\n", 0.0,
+ nullnode = celltonode(nullloc, CCON);
+
+ fsloc = setsymtab("FS", " ", 0.0, STR|DONTFREE, symtab);
+ FS = &fsloc->sval;
+ rsloc = setsymtab("RS", "\n", 0.0, STR|DONTFREE, symtab);
+ RS = &rsloc->sval;
+ rtloc = setsymtab("RT", "", 0.0, STR|DONTFREE, symtab);
+ ofsloc = setsymtab("OFS", " ", 0.0, STR|DONTFREE, symtab);
+ OFS = &ofsloc->sval;
+ orsloc = setsymtab("ORS", "\n", 0.0, STR|DONTFREE, symtab);
+ ORS = &orsloc->sval;
+ OFMT = &setsymtab("OFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval;
+ CONVFMT = &setsymtab("CONVFMT", "%.6g", 0.0,
STR|DONTFREE, symtab)->sval;
- OFMT = &setsymtab((uchar *)"OFMT", (uchar *)"%.6g", 0.0,
- STR|DONTFREE, symtab)->sval;
- FILENAME = &setsymtab((uchar *)"FILENAME", (uchar *)"-", 0.0,
- STR|DONTFREE, symtab)->sval;
- nfloc = setsymtab((uchar *)"NF", (uchar *)"", 0.0, NUM, symtab);
+ FILENAME = &setsymtab("FILENAME", "", 0.0, STR|DONTFREE, symtab)->sval;
+ nfloc = setsymtab("NF", "", 0.0, NUM, symtab);
NF = &nfloc->fval;
- nrloc = setsymtab((uchar *)"NR", (uchar *)"", 0.0, NUM, symtab);
+ nrloc = setsymtab("NR", "", 0.0, NUM, symtab);
NR = &nrloc->fval;
- fnrloc = setsymtab((uchar *)"FNR", (uchar *)"", 0.0, NUM, symtab);
+ fnrloc = setsymtab("FNR", "", 0.0, NUM, symtab);
FNR = &fnrloc->fval;
- SUBSEP = &setsymtab((uchar *)"SUBSEP", (uchar *)"\034", 0.0,
- STR|DONTFREE, symtab)->sval;
- rstartloc = setsymtab((uchar *)"RSTART", (uchar *)"", 0.0,
- NUM, symtab);
+ subseploc = setsymtab("SUBSEP", "\034", 0.0, STR|DONTFREE, symtab);
+ SUBSEP = &subseploc->sval;
+ rstartloc = setsymtab("RSTART", "", 0.0, NUM, symtab);
RSTART = &rstartloc->fval;
- rlengthloc = setsymtab((uchar *)"RLENGTH", (uchar *)"", 0.0,
- NUM, symtab);
+ rlengthloc = setsymtab("RLENGTH", "", 0.0, NUM, symtab);
RLENGTH = &rlengthloc->fval;
- symtabloc = setsymtab((uchar *)"SYMTAB", (uchar *)"", 0.0, ARR, symtab);
- symtabloc->sval = (uchar *)symtab;
+ symtabloc = setsymtab("SYMTAB", "", 0.0, ARR, symtab);
+ symtabloc->sval = (char *)symtab;
}
void
-arginit(int ac, uchar *av[])
+arginit(int ac, char **av) /* set up ARGV and ARGC */
{
Cell *cp;
int i;
- uchar temp[11];
-
- /* first make FILENAME first real argument */
- for (i = 1; i < ac; i++) {
- if (!isclvar(av[i])) {
- (void) setsval(lookup((uchar *)"FILENAME", symtab),
- av[i]);
- break;
- }
- }
- ARGC = &setsymtab((uchar *)"ARGC", (uchar *)"", (Awkfloat)ac,
- NUM, symtab)->fval;
- cp = setsymtab((uchar *)"ARGV", (uchar *)"", 0.0, ARR, symtab);
+ char temp[50];
+
+ ARGC = &setsymtab("ARGC", "", (Awkfloat)ac, NUM, symtab)->fval;
+ cp = setsymtab("ARGV", "", 0.0, ARR, symtab);
ARGVtab = makesymtab(NSYMTAB); /* could be (int) ARGC as well */
- cp->sval = (uchar *) ARGVtab;
+ cp->sval = (char *)ARGVtab;
for (i = 0; i < ac; i++) {
- (void) sprintf((char *)temp, "%d", i);
+ (void) sprintf(temp, "%d", i);
if (is_number(*av)) {
- (void) setsymtab(temp, *av, atof((const char *)*av),
+ (void) setsymtab(temp, *av, atof(*av),
STR|NUM, ARGVtab);
} else {
(void) setsymtab(temp, *av, 0.0, STR, ARGVtab);
@@ -152,20 +182,22 @@ arginit(int ac, uchar *av[])
}
void
-envinit(uchar *envp[])
+envinit(char **envp) /* set up ENVIRON variable */
{
Cell *cp;
- uchar *p;
+ char *p;
- cp = setsymtab((uchar *)"ENVIRON", (uchar *)"", 0.0, ARR, symtab);
+ cp = setsymtab("ENVIRON", "", 0.0, ARR, symtab);
ENVtab = makesymtab(NSYMTAB);
- cp->sval = (uchar *) ENVtab;
+ cp->sval = (char *)ENVtab;
for (; *envp; envp++) {
- if ((p = (uchar *)strchr((char *)*envp, '=')) == NULL)
+ if ((p = strchr(*envp, '=')) == NULL)
+ continue;
+ if (p == *envp) /* no left hand side name in env string */
continue;
*p++ = 0; /* split into two strings at = */
if (is_number(p)) {
- (void) setsymtab(*envp, p, atof((const char *)p),
+ (void) setsymtab(*envp, p, atof(p),
STR|NUM, ENVtab);
} else {
(void) setsymtab(*envp, p, 0.0, STR, ENVtab);
@@ -176,7 +208,7 @@ envinit(uchar *envp[])
}
Array *
-makesymtab(int n)
+makesymtab(int n) /* make a new symbol table */
{
Array *ap;
Cell **tp;
@@ -184,7 +216,7 @@ makesymtab(int n)
ap = (Array *)malloc(sizeof (Array));
tp = (Cell **)calloc(n, sizeof (Cell *));
if (ap == NULL || tp == NULL)
- ERROR "out of space in makesymtab" FATAL;
+ FATAL("out of space in makesymtab");
ap->nelem = 0;
ap->size = n;
ap->tab = tp;
@@ -192,9 +224,9 @@ makesymtab(int n)
}
void
-freesymtab(Cell *ap) /* free symbol table */
+freesymtab(Cell *ap) /* free a symbol table */
{
- Cell *cp, *next;
+ Cell *cp, *temp;
Array *tp;
int i;
@@ -205,20 +237,26 @@ freesymtab(Cell *ap) /* free symbol table */
if (tp == NULL)
return;
for (i = 0; i < tp->size; i++) {
- for (cp = tp->tab[i]; cp != NULL; cp = next) {
- next = cp->cnext;
+ for (cp = tp->tab[i]; cp != NULL; cp = temp) {
xfree(cp->nval);
if (freeable(cp))
xfree(cp->sval);
+ temp = cp->cnext; /* avoids freeing then using */
free(cp);
+ tp->nelem--;
}
+ tp->tab[i] = 0;
+ }
+ if (tp->nelem != 0) {
+ WARNING("can't happen: inconsistent element count freeing %s",
+ ap->nval);
}
free(tp->tab);
free(tp);
}
void
-freeelem(Cell *ap, uchar *s) /* free elem s from ap (i.e., ap["s"] */
+freeelem(Cell *ap, const char *s) /* free elem s from ap (i.e., ap["s"] */
{
Array *tp;
Cell *p, *prev = NULL;
@@ -228,7 +266,7 @@ freeelem(Cell *ap, uchar *s) /* free elem s from ap (i.e., ap["s"] */
tp = (Array *)ap->sval;
h = hash(s, tp->size);
for (p = tp->tab[h]; p != NULL; prev = p, p = p->cnext)
- if (strcmp((char *)s, (char *)p->nval) == 0) {
+ if (strcmp(s, p->nval) == 0) {
if (prev == NULL) /* 1st one */
tp->tab[h] = p->cnext;
else /* middle somewhere */
@@ -243,41 +281,40 @@ freeelem(Cell *ap, uchar *s) /* free elem s from ap (i.e., ap["s"] */
}
Cell *
-setsymtab(uchar *n, uchar *s, Awkfloat f, unsigned int t, Array *tp)
+setsymtab(const char *n, const char *s, Awkfloat f, unsigned int t, Array *tp)
{
- register int h;
- register Cell *p;
+ int h;
+ Cell *p;
if (n != NULL && (p = lookup(n, tp)) != NULL) {
- dprintf(("setsymtab found %p: n=%s", (void *)p, p->nval));
- dprintf((" s=\"%s\" f=%g t=%p\n",
- p->sval, p->fval, (void *)p->tval));
+ dprintf(("setsymtab found %p: n=%s s=\"%s\" f=%g t=%o\n",
+ (void *)p, NN(p->nval), NN(p->sval), p->fval, p->tval));
return (p);
}
p = (Cell *)malloc(sizeof (Cell));
if (p == NULL)
- ERROR "symbol table overflow at %s", n FATAL;
+ FATAL("out of space for symbol table at %s", n);
p->nval = tostring(n);
- p->sval = s ? tostring(s) : tostring((uchar *)"");
+ p->sval = s ? tostring(s) : tostring("");
p->fval = f;
p->tval = t;
- p->csub = 0;
-
+ p->csub = CUNK;
+ p->ctype = OCELL;
tp->nelem++;
if (tp->nelem > FULLTAB * tp->size)
rehash(tp);
h = hash(n, tp->size);
p->cnext = tp->tab[h];
tp->tab[h] = p;
- dprintf(("setsymtab set %p: n=%s", (void *)p, p->nval));
- dprintf((" s=\"%s\" f=%g t=%p\n", p->sval, p->fval, (void *)p->tval));
+ dprintf(("setsymtab set %p: n=%s s=\"%s\" f=%g t=%o\n",
+ (void *)p, p->nval, p->sval, p->fval, p->tval));
return (p);
}
int
-hash(uchar *s, int n) /* form hash value for string s */
+hash(const char *s, int n) /* form hash value for string s */
{
- register unsigned hashval;
+ unsigned int hashval;
for (hashval = 0; *s != '\0'; s++)
hashval = (*s + 31 * hashval);
@@ -292,10 +329,10 @@ rehash(Array *tp) /* rehash items in small table into big one */
nsz = GROWTAB * tp->size;
np = (Cell **)calloc(nsz, sizeof (Cell *));
- if (np == NULL)
- ERROR "out of space in rehash" FATAL;
+ if (np == NULL) /* can't do it, but can keep running. */
+ return; /* someone else will run out later. */
for (i = 0; i < tp->size; i++) {
- for (cp = tp->tab[i]; cp; cp = op) {
+ for (cp = tp->tab[i]; cp != NULL; cp = op) {
op = cp->cnext;
nh = hash(cp->nval, nsz);
cp->cnext = np[nh];
@@ -308,177 +345,278 @@ rehash(Array *tp) /* rehash items in small table into big one */
}
Cell *
-lookup(uchar *s, Array *tp) /* look for s in tp */
+lookup(const char *s, Array *tp) /* look for s in tp */
{
- register Cell *p;
+ Cell *p;
int h;
h = hash(s, tp->size);
for (p = tp->tab[h]; p != NULL; p = p->cnext) {
- if (strcmp((char *)s, (char *)p->nval) == 0)
+ if (strcmp(s, p->nval) == 0)
return (p); /* found it */
}
return (NULL); /* not found */
}
Awkfloat
-setfval(Cell *vp, Awkfloat f)
+setfval(Cell *vp, Awkfloat f) /* set float val of a Cell */
{
- int i;
+ int fldno;
+ f += 0.0; /* normalise negative zero to positive zero */
if ((vp->tval & (NUM | STR)) == 0)
funnyvar(vp, "assign to");
- if (vp->tval & FLD) {
+ if (isfld(vp)) {
donerec = 0; /* mark $0 invalid */
- i = fldidx(vp);
- if (i > *NF)
- newfld(i);
- dprintf(("setting field %d to %g\n", i, f));
- } else if (vp->tval & REC) {
+ fldno = atoi(vp->nval);
+ if (fldno > *NF)
+ newfld(fldno);
+ dprintf(("setting field %d to %g\n", fldno, f));
+ } else if (&vp->fval == NF) {
+ donerec = 0; /* mark $0 invalid */
+ setlastfld((int)f);
+ dprintf(("setting NF to %g\n", f));
+ } else if (isrec(vp)) {
donefld = 0; /* mark $1... invalid */
donerec = 1;
+ savefs();
+ } else if (vp == ofsloc) {
+ if (donerec == 0)
+ recbld();
}
- vp->tval &= ~STR; /* mark string invalid */
+ if (freeable(vp))
+ xfree(vp->sval); /* free any previous string */
+ vp->tval &= ~(STR|CONVC|CONVO); /* mark string invalid */
+ vp->fmt = NULL;
vp->tval |= NUM; /* mark number ok */
- dprintf(("setfval %p: %s = %g, t=%p\n", (void *)vp,
- vp->nval ? vp->nval : (unsigned char *)"NULL",
- f, (void *)vp->tval));
+ if (f == -0) /* who would have thought this possible? */
+ f = 0;
+ dprintf(("setfval %p: %s = %g, t=%o\n", (void *)vp,
+ NN(vp->nval), f, vp->tval));
return (vp->fval = f);
}
void
-funnyvar(Cell *vp, char *rw)
+funnyvar(Cell *vp, const char *rw)
{
- if (vp->tval & ARR)
- ERROR "can't %s %s; it's an array name.", rw, vp->nval FATAL;
- if (vp->tval & FCN)
- ERROR "can't %s %s; it's a function.", rw, vp->nval FATAL;
- ERROR "funny variable %o: n=%s s=\"%s\" f=%g t=%o",
- vp, vp->nval, vp->sval, vp->fval, vp->tval CONT;
+ if (isarr(vp))
+ FATAL("can't %s %s; it's an array name.", rw, vp->nval);
+ if (isfcn(vp))
+ FATAL("can't %s %s; it's a function.", rw, vp->nval);
+ WARNING("funny variable %p: n=%s s=\"%s\" f=%g t=%o",
+ vp, vp->nval, vp->sval, vp->fval, vp->tval);
}
-uchar *
-setsval(Cell *vp, uchar *s)
+char *
+setsval(Cell *vp, const char *s) /* set string val of a Cell */
{
- int i;
+ char *t;
+ int fldno;
+ Awkfloat f;
+ dprintf(("starting setsval %p: %s = \"%s\", t=%o, r,f=%d,%d\n",
+ (void *)vp, NN(vp->nval), s, vp->tval, donerec, donefld));
if ((vp->tval & (NUM | STR)) == 0)
funnyvar(vp, "assign to");
- if (vp->tval & FLD) {
+ if (isfld(vp)) {
donerec = 0; /* mark $0 invalid */
- i = fldidx(vp);
- if (i > *NF)
- newfld(i);
- dprintf(("setting field %d to %s\n", i, s));
- } else if (vp->tval & REC) {
+ fldno = atoi(vp->nval);
+ if (fldno > *NF)
+ newfld(fldno);
+ dprintf(("setting field %d to %s (%p)\n", fldno, s, (void *)s));
+ } else if (isrec(vp)) {
donefld = 0; /* mark $1... invalid */
donerec = 1;
+ savefs();
+ } else if (vp == ofsloc) {
+ if (donerec == 0)
+ recbld();
}
- vp->tval &= ~NUM;
- vp->tval |= STR;
+ t = s ? tostring(s) : tostring(""); /* in case it's self-assign */
if (freeable(vp))
xfree(vp->sval);
- vp->tval &= ~DONTFREE;
- dprintf(("setsval %p: %s = \"%s\", t=%p\n",
- (void *)vp,
- vp->nval ? (char *)vp->nval : "",
- s,
- (void *)(vp->tval ? (char *)vp->tval : "")));
- return (vp->sval = tostring(s));
+ vp->tval &= ~(NUM|CONVC|CONVO);
+ vp->tval |= STR;
+ vp->fmt = NULL;
+ setfree(vp);
+ dprintf(("setsval %p: %s = \"%s (%p) \", t=%o r,f=%d,%d\n",
+ (void *)vp, NN(vp->nval), t, (void *)t,
+ vp->tval, donerec, donefld));
+ vp->sval = t;
+ if (&vp->fval == NF) {
+ donerec = 0; /* mark $0 invalid */
+ f = getfval(vp);
+ setlastfld((int)f);
+ dprintf(("setting NF to %g\n", f));
+ }
+
+ return (vp->sval);
}
Awkfloat
-r_getfval(Cell *vp)
+getfval(Cell *vp) /* get float val of a Cell */
{
if ((vp->tval & (NUM | STR)) == 0)
funnyvar(vp, "read value of");
- if ((vp->tval & FLD) && donefld == 0)
+ if (isfld(vp) && donefld == 0)
fldbld();
- else if ((vp->tval & REC) && donerec == 0)
+ else if (isrec(vp) && donerec == 0)
recbld();
if (!isnum(vp)) { /* not a number */
- vp->fval = atof((const char *)vp->sval); /* best guess */
+ vp->fval = atof(vp->sval); /* best guess */
if (is_number(vp->sval) && !(vp->tval&CON))
vp->tval |= NUM; /* make NUM only sparingly */
}
- dprintf(("getfval %p: %s = %g, t=%p\n",
- (void *)vp, vp->nval, vp->fval, (void *)vp->tval));
+ dprintf(("getfval %p: %s = %g, t=%o\n",
+ (void *)vp, NN(vp->nval), vp->fval, vp->tval));
return (vp->fval);
}
-uchar *
-r_getsval(Cell *vp)
+static char *
+get_str_val(Cell *vp, char **fmt) /* get string val of a Cell */
{
- uchar s[256];
+ char s[256];
+ double dtemp;
if ((vp->tval & (NUM | STR)) == 0)
funnyvar(vp, "read value of");
- if ((vp->tval & FLD) && donefld == 0)
+ if (isfld(vp) && donefld == 0)
fldbld();
- else if ((vp->tval & REC) && donerec == 0)
+ else if (isrec(vp) && donerec == 0)
recbld();
- if ((vp->tval & STR) == 0) {
- if (!(vp->tval&DONTFREE))
- xfree(vp->sval);
- if ((long long)vp->fval == vp->fval) {
- (void) snprintf((char *)s, sizeof (s),
- "%.20g", vp->fval);
+
+ /*
+ * ADR: This is complicated and more fragile than is desirable.
+ * Retrieving a string value for a number associates the string
+ * value with the scalar. Previously, the string value was
+ * sticky, meaning if converted via OFMT that became the value
+ * (even though POSIX wants it to be via CONVFMT). Or if CONVFMT
+ * changed after a string value was retrieved, the original value
+ * was maintained and used. Also not per POSIX.
+ *
+ * We work around this design by adding two additional flags,
+ * CONVC and CONVO, indicating how the string value was
+ * obtained (via CONVFMT or OFMT) and _also_ maintaining a copy
+ * of the pointer to the xFMT format string used for the
+ * conversion. This pointer is only read, **never** dereferenced.
+ * The next time we do a conversion, if it's coming from the same
+ * xFMT as last time, and the pointer value is different, we
+ * know that the xFMT format string changed, and we need to
+ * redo the conversion. If it's the same, we don't have to.
+ *
+ * There are also several cases where we don't do a conversion,
+ * such as for a field (see the checks below).
+ */
+
+ /* Don't duplicate the code for actually updating the value */
+#define update_str_val(vp) \
+ { \
+ if (freeable(vp)) \
+ xfree(vp->sval); \
+ if (modf(vp->fval, &dtemp) == 0) /* it's integral */ \
+ (void) snprintf(s, sizeof (s), "%.30g", vp->fval); \
+ else \
+ (void) snprintf(s, sizeof (s), *fmt, vp->fval); \
+ vp->sval = tostring(s); \
+ vp->tval &= ~DONTFREE; \
+ vp->tval |= STR; \
+ }
+
+ if (isstr(vp) == 0) {
+ /*LINTED*/
+ update_str_val(vp);
+ if (fmt == OFMT) {
+ vp->tval &= ~CONVC;
+ vp->tval |= CONVO;
} else {
- /*LINTED*/
- (void) snprintf((char *)s, sizeof (s),
- (char *)*OFMT, vp->fval);
+ /* CONVFMT */
+ vp->tval &= ~CONVO;
+ vp->tval |= CONVC;
+ }
+ vp->fmt = *fmt;
+ } else if ((vp->tval & DONTFREE) != 0 || !isnum(vp) || isfld(vp)) {
+ goto done;
+ } else if (isstr(vp)) {
+ if (fmt == OFMT) {
+ if ((vp->tval & CONVC) != 0 ||
+ ((vp->tval & CONVO) != 0 && vp->fmt != *fmt)) {
+ /*LINTED*/
+ update_str_val(vp);
+ vp->tval &= ~CONVC;
+ vp->tval |= CONVO;
+ vp->fmt = *fmt;
+ }
+ } else {
+ /* CONVFMT */
+ if ((vp->tval & CONVO) != 0 ||
+ ((vp->tval & CONVC) != 0 && vp->fmt != *fmt)) {
+ /*LINTED*/
+ update_str_val(vp);
+ vp->tval &= ~CONVO;
+ vp->tval |= CONVC;
+ vp->fmt = *fmt;
+ }
}
- vp->sval = tostring(s);
- vp->tval &= ~DONTFREE;
- vp->tval |= STR;
}
- dprintf(("getsval %p: %s = \"%s\", t=%p\n",
- (void *)vp,
- vp->nval ? (char *)vp->nval : "",
- vp->sval ? (char *)vp->sval : "",
- (void *)vp->tval));
+done:
+ dprintf(("getsval %p: %s = \"%s (%p)\", t=%o\n",
+ (void *)vp, NN(vp->nval), vp->sval, (void *)vp->sval, vp->tval));
return (vp->sval);
}
-uchar *
-tostring(uchar *s)
+char *
+getsval(Cell *vp) /* get string val of a Cell */
{
- register uchar *p;
+ return (get_str_val(vp, CONVFMT));
+}
- p = (uchar *)malloc(strlen((char *)s)+1);
+char *
+getpssval(Cell *vp) /* get string val of a Cell for print */
+{
+ return (get_str_val(vp, OFMT));
+}
+
+
+char *
+tostring(const char *s) /* make a copy of string s */
+{
+ char *p = strdup(s);
if (p == NULL)
- ERROR "out of space in tostring on %s", s FATAL;
- (void) strcpy((char *)p, (char *)s);
+ FATAL("out of space in tostring on %s", s);
return (p);
}
-uchar *
-qstring(uchar *s, int delim) /* collect string up to delim */
+char *
+qstring(const char *is, int delim) /* collect string up to next delim */
{
- uchar *cbuf, *ret;
+ const char *os = is;
int c, n;
- size_t cbufsz, cnt;
-
- init_buf(&cbuf, &cbufsz, LINE_INCR);
+ uschar *s = (uschar *)is;
+ uschar *buf, *bp;
- for (cnt = 0; (c = *s) != delim; s++) {
+ if ((buf = (uschar *)malloc(strlen(is)+3)) == NULL)
+ FATAL("out of space in qstring(%s)", s);
+ for (bp = buf; (c = *s) != delim; s++) {
if (c == '\n') {
- ERROR "newline in string %.10s...", cbuf SYNTAX;
- } else if (c != '\\') {
- expand_buf(&cbuf, &cbufsz, cnt);
- cbuf[cnt++] = c;
- } else { /* \something */
- expand_buf(&cbuf, &cbufsz, cnt);
- switch (c = *++s) {
- case '\\': cbuf[cnt++] = '\\'; break;
- case 'n': cbuf[cnt++] = '\n'; break;
- case 't': cbuf[cnt++] = '\t'; break;
- case 'b': cbuf[cnt++] = '\b'; break;
- case 'f': cbuf[cnt++] = '\f'; break;
- case 'r': cbuf[cnt++] = '\r'; break;
+ SYNTAX("newline in string %.20s...", os);
+ } else if (c != '\\')
+ *bp++ = c;
+ else { /* \something */
+ c = *++s;
+ if (c == 0) { /* \ at end */
+ *bp++ = '\\';
+ break; /* for loop */
+ }
+ switch (c) {
+ case '\\': *bp++ = '\\'; break;
+ case 'n': *bp++ = '\n'; break;
+ case 't': *bp++ = '\t'; break;
+ case 'b': *bp++ = '\b'; break;
+ case 'f': *bp++ = '\f'; break;
+ case 'r': *bp++ = '\r'; break;
default:
if (!isdigit(c)) {
- cbuf[cnt++] = c;
+ *bp++ = c;
break;
}
n = c - '0';
@@ -487,13 +625,11 @@ qstring(uchar *s, int delim) /* collect string up to delim */
if (isdigit(s[1]))
n = 8 * n + *++s - '0';
}
- cbuf[cnt++] = n;
+ *bp++ = n;
break;
}
}
}
- cbuf[cnt] = '\0';
- ret = tostring(cbuf);
- free(cbuf);
- return (ret);
+ *bp++ = 0;
+ return ((char *)buf);
}