diff options
Diffstat (limited to 'usr/src/cmd/awk')
-rw-r--r-- | usr/src/cmd/awk/DIVERGENCES | 5 | ||||
-rw-r--r-- | usr/src/cmd/awk/Makefile | 13 | ||||
-rw-r--r-- | usr/src/cmd/awk/awk.g.y | 223 | ||||
-rw-r--r-- | usr/src/cmd/awk/awk.h | 252 | ||||
-rw-r--r-- | usr/src/cmd/awk/awk.lx.l | 306 | ||||
-rw-r--r-- | usr/src/cmd/awk/b.c | 576 | ||||
-rw-r--r-- | usr/src/cmd/awk/lex.c | 637 | ||||
-rw-r--r-- | usr/src/cmd/awk/lib.c | 813 | ||||
-rw-r--r-- | usr/src/cmd/awk/main.c | 150 | ||||
-rw-r--r-- | usr/src/cmd/awk/maketab.c | 78 | ||||
-rw-r--r-- | usr/src/cmd/awk/parse.c | 157 | ||||
-rw-r--r-- | usr/src/cmd/awk/run.c | 1732 | ||||
-rw-r--r-- | usr/src/cmd/awk/tran.c | 518 |
13 files changed, 3442 insertions, 2018 deletions
diff --git a/usr/src/cmd/awk/DIVERGENCES b/usr/src/cmd/awk/DIVERGENCES new file mode 100644 index 0000000000..ebc13cb036 --- /dev/null +++ b/usr/src/cmd/awk/DIVERGENCES @@ -0,0 +1,5 @@ +The illumos nawk(1) is slightly divergent from upstream: +- We allow an unlimited number of input program files +- We allow an unlimited number of "/pat/, /pat/" expressions +- Some of the code has been altered to track the length of strings + better so that we can avoid repeatedly calling strlen(3C) diff --git a/usr/src/cmd/awk/Makefile b/usr/src/cmd/awk/Makefile index c49c932558..046f0b739e 100644 --- a/usr/src/cmd/awk/Makefile +++ b/usr/src/cmd/awk/Makefile @@ -30,14 +30,13 @@ PROG= nawk -OBJ1= b.o lib.o main.o parse.o proctab.o run.o tran.o -OBJ2= awk.g.o awk.lx.o +OBJ1= b.o lib.o main.o parse.o proctab.o run.o tran.o lex.o +OBJ2= awk.g.o OBJS= $(OBJ2) $(OBJ1) SRCS= $(OBJ1:%.o=%.c) include ../Makefile.cmd -CERRWARN += -_gcc=-Wno-implicit-function-declaration CERRWARN += -_gcc=-Wno-unused-label CERRWARN += -_gcc=-Wno-parentheses CERRWARN += -_gcc=-Wno-unused-variable @@ -56,13 +55,13 @@ XGETFLAGS += -a -x awk.xcl CPPFLAGS += -D_FILE_OFFSET_BITS=64 YFLAGS += -d -LDLIBS += -lm +LDLIBS += -lm -lumem LINTFLAGS += -u -CLEANFILES= maketab proctab.c awk.g.c awk.lx.c y.tab.h +CLEANFILES= maketab proctab.c awk.g.c y.tab.h .KEEP_STATE: -all: $(PROG) +all: $(PROG) $(PROG): $(OBJS) $(LINK.c) $(OBJS) -o $@ $(LDLIBS) @@ -94,8 +93,6 @@ awk.g.c + y.tab.h: awk.g.y awk.g.o: awk.g.c -awk.lx.c: awk.lx.l - proctab.o: proctab.c $(COMPILE.c) proctab.c $(POST_PROCESS_O) diff --git a/usr/src/cmd/awk/awk.g.y b/usr/src/cmd/awk/awk.g.y index 21bc8b6dc8..3b5efb3d7d 100644 --- a/usr/src/cmd/awk/awk.g.y +++ b/usr/src/cmd/awk/awk.g.y @@ -1,5 +1,29 @@ %{ /* + * Copyright (C) Lucent Technologies 1997 + * All Rights Reserved + * + * Permission to use, copy, modify, and distribute this software and + * its documentation for any purpose and without fee is hereby + * granted, provided that the above copyright notice appear in all + * copies and that both that the copyright notice and this + * permission notice and warranty disclaimer appear in supporting + * documentation, and that the name Lucent Technologies or any of + * its entities not be used in advertising or publicity pertaining + * to distribution of the software without specific, written prior + * permission. + * + * LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, + * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. + * IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY + * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER + * IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, + * ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF + * THIS SOFTWARE. + */ + +/* * CDDL HEADER START * * The contents of this file are subject to the terms of the @@ -30,30 +54,28 @@ /* All Rights Reserved */ %{ -#ident "%Z%%M% %I% %E% SMI" /* SVr4.0 2.10 */ -%} - -%{ #include "awk.h" + +void checkdup(Node *list, Cell *item); int yywrap(void) { return(1); } -#ifndef DEBUG -# define PUTS(x) -#endif -Node *beginloc = 0, *endloc = 0; -int infunc = 0; /* = 1 if in arglist or body of func */ -uchar *curfname = 0; -Node *arglist = 0; /* list of args for current function */ + +Node *beginloc = NULL; +Node *endloc = NULL; +int infunc = 0; /* = 1 if in arglist or body of func */ +int inloop = 0; /* = 1 if in while, for, do */ +char *curfname = NULL; /* current function name */ +Node *arglist = NULL; /* list of args for current function */ static void setfname(Cell *); static int constnode(Node *); -static uchar *strnode(Node *); -static Node *notnull(); +static char *strnode(Node *); +static Node *notnull(Node *); %} %union { Node *p; Cell *cp; int i; - uchar *s; + char *s; } %token <i> FIRSTTOKEN /* must be first */ @@ -61,25 +83,26 @@ static Node *notnull(); %token <i> NL ',' '{' '(' '|' ';' '/' ')' '}' '[' ']' %token <i> ARRAY %token <i> MATCH NOTMATCH MATCHOP -%token <i> FINAL DOT ALL CCL NCCL CHAR OR STAR QUEST PLUS +%token <i> FINAL DOT ALL CCL NCCL CHAR OR STAR QUEST PLUS EMPTYRE %token <i> AND BOR APPEND EQ GE GT LE LT NE IN -%token <i> ARG BLTIN BREAK CLOSE CONTINUE DELETE DO EXIT FOR FUNC -%token <i> SUB GSUB IF INDEX LSUBSTR MATCHFCN NEXT +%token <i> ARG BLTIN BREAK CLOSE CONTINUE DELETE DO EXIT FOR FUNC +%token <i> SUB GSUB IF INDEX LSUBSTR MATCHFCN NEXT NEXTFILE %token <i> ADD MINUS MULT DIVIDE MOD %token <i> ASSIGN ASGNOP ADDEQ SUBEQ MULTEQ DIVEQ MODEQ POWEQ %token <i> PRINT PRINTF SPRINTF %token <p> ELSE INTEST CONDEXPR %token <i> POSTINCR PREINCR POSTDECR PREDECR -%token <cp> VAR IVAR VARNF CALL NUMBER STRING FIELD +%token <cp> VAR IVAR VARNF CALL NUMBER STRING %token <s> REGEXPR -%type <p> pas pattern ppattern plist pplist patlist prarg term +%type <p> pas pattern ppattern plist pplist patlist prarg term re %type <p> pa_pat pa_stat pa_stats %type <s> reg_expr %type <p> simple_stmt opt_simple_stmt stmt stmtlist %type <p> var varname funcname varlist -%type <p> for if while -%type <i> pst opt_pst lbrace rparen comma nl opt_nl and bor +%type <p> for if else while +%type <i> do st +%type <i> pst opt_pst lbrace rbrace rparen comma nl opt_nl and bor %type <i> subop print %right ASGNOP @@ -89,14 +112,14 @@ static Node *notnull(); %left AND %left GETLINE %nonassoc APPEND EQ GE GT LE LT NE MATCHOP IN '|' -%left ARG BLTIN BREAK CALL CLOSE CONTINUE DELETE DO EXIT FOR FIELD FUNC +%left ARG BLTIN BREAK CALL CLOSE CONTINUE DELETE DO EXIT FOR FUNC %left GSUB IF INDEX LSUBSTR MATCHFCN NEXT NUMBER %left PRINT PRINTF RETURN SPLIT SPRINTF STRING SUB SUBSTR %left REGEXPR VAR VARNF IVAR WHILE '(' %left CAT %left '+' '-' %left '*' '/' '%' -%left NOT UMINUS +%left NOT UMINUS UPLUS %right POWER %right DECR INCR %left INDIRECT @@ -107,7 +130,7 @@ static Node *notnull(); program: pas { if (errorflag==0) winner = (Node *)stat3(PROGRAM, beginloc, $1, endloc); } - | error { yyclearin; bracecheck(); ERROR "bailing out" SYNTAX; } + | error { yyclearin; bracecheck(); SYNTAX("bailing out"); } ; and: @@ -131,12 +154,12 @@ else: ; for: - FOR '(' opt_simple_stmt ';' pattern ';' opt_simple_stmt rparen stmt - { $$ = stat4(FOR, $3, notnull($5), $7, $9); } - | FOR '(' opt_simple_stmt ';' ';' opt_simple_stmt rparen stmt - { $$ = stat4(FOR, $3, NIL, $6, $8); } - | FOR '(' varname IN varname rparen stmt - { $$ = stat3(IN, $3, makearr($5), $7); } + FOR '(' opt_simple_stmt ';' opt_nl pattern ';' opt_nl opt_simple_stmt rparen {inloop++;} stmt + { --inloop; $$ = stat4(FOR, $3, notnull($6), $9, $12); } + | FOR '(' opt_simple_stmt ';' ';' opt_nl opt_simple_stmt rparen {inloop++;} stmt + { --inloop; $$ = stat4(FOR, $3, NIL, $7, $10); } + | FOR '(' varname IN varname rparen {inloop++;} stmt + { --inloop; $$ = stat3(IN, $3, makearr($5), $8); } ; funcname: @@ -184,8 +207,8 @@ pa_pat: pa_stat: pa_pat { $$ = stat2(PASTAT, $1, stat2(PRINT, rectonode(), NIL)); } | pa_pat lbrace stmtlist '}' { $$ = stat2(PASTAT, $1, $3); } - | pa_pat ',' pa_pat { $$ = pa2stat($1, $3, stat2(PRINT, rectonode(), NIL)); } - | pa_pat ',' pa_pat lbrace stmtlist '}' { $$ = pa2stat($1, $3, $5); } + | pa_pat ',' opt_nl pa_pat { $$ = pa2stat($1, $4, stat2(PRINT, rectonode(), NIL)); } + | pa_pat ',' opt_nl pa_pat lbrace stmtlist '}' { $$ = pa2stat($1, $4, $6); } | lbrace stmtlist '}' { $$ = stat2(PASTAT, NIL, $2); } | XBEGIN lbrace stmtlist '}' { beginloc = linkum(beginloc, $3); $$ = 0; } @@ -202,19 +225,17 @@ pa_stats: patlist: pattern - | patlist comma pattern { $$ = linkum($1, $3); } + | patlist comma pattern { $$ = linkum($1, $3); } ; ppattern: var ASGNOP ppattern { $$ = op2($2, $1, $3); } | ppattern '?' ppattern ':' ppattern %prec '?' - { $$ = op3(CONDEXPR, notnull($1), $3, $5); } + { $$ = op3(CONDEXPR, notnull($1), $3, $5); } | ppattern bor ppattern %prec BOR { $$ = op2(BOR, notnull($1), notnull($3)); } | ppattern and ppattern %prec AND { $$ = op2(AND, notnull($1), notnull($3)); } - | NOT ppattern - { $$ = op1(NOT, notnull($2)); } | ppattern MATCHOP reg_expr { $$ = op3($2, NIL, $1, (Node*)makedfa($3, 0)); } | ppattern MATCHOP ppattern { if (constnode($3)) @@ -224,21 +245,18 @@ ppattern: | ppattern IN varname { $$ = op2(INTEST, $1, makearr($3)); } | '(' plist ')' IN varname { $$ = op2(INTEST, $2, makearr($5)); } | ppattern term %prec CAT { $$ = op2(CAT, $1, $2); } - | reg_expr - { $$ = op3(MATCH, NIL, rectonode(), (Node*)makedfa($1, 0)); } + | re | term ; pattern: var ASGNOP pattern { $$ = op2($2, $1, $3); } | pattern '?' pattern ':' pattern %prec '?' - { $$ = op3(CONDEXPR, notnull($1), $3, $5); } + { $$ = op3(CONDEXPR, notnull($1), $3, $5); } | pattern bor pattern %prec BOR { $$ = op2(BOR, notnull($1), notnull($3)); } | pattern and pattern %prec AND { $$ = op2(AND, notnull($1), notnull($3)); } - | NOT pattern - { $$ = op1(NOT, op2(NE,$2,valtonode(lookup((uchar *)"$zero&null",symtab),CCON))); } | pattern EQ pattern { $$ = op2($2, $1, $3); } | pattern GE pattern { $$ = op2($2, $1, $3); } | pattern GT pattern { $$ = op2($2, $1, $3); } @@ -253,11 +271,14 @@ pattern: $$ = op3($2, (Node *)1, $1, $3); } | pattern IN varname { $$ = op2(INTEST, $1, makearr($3)); } | '(' plist ')' IN varname { $$ = op2(INTEST, $2, makearr($5)); } - | pattern '|' GETLINE var { $$ = op3(GETLINE, $4, (Node*)$2, $1); } - | pattern '|' GETLINE { $$ = op3(GETLINE, (Node*)0, (Node*)$2, $1); } + | pattern '|' GETLINE var { + if (safe) SYNTAX("cmd | getline is unsafe"); + else $$ = op3(GETLINE, $4, itonp($2), $1); } + | pattern '|' GETLINE { + if (safe) SYNTAX("cmd | getline is unsafe"); + else $$ = op3(GETLINE, (Node*)0, itonp($2), $1); } | pattern term %prec CAT { $$ = op2(CAT, $1, $2); } - | reg_expr - { $$ = op3(MATCH, NIL, rectonode(), (Node*)makedfa($1, 0)); } + | re | term ; @@ -269,6 +290,7 @@ plist: pplist: ppattern | pplist comma ppattern { $$ = linkum($1, $3); } + ; prarg: /* empty */ { $$ = rectonode(); } @@ -288,6 +310,12 @@ rbrace: '}' | rbrace NL ; +re: + reg_expr + { $$ = op3(MATCH, NIL, rectonode(), (Node*)makedfa($1, 0)); } + | NOT re { $$ = op1(NOT, notnull($2)); } + ; + reg_expr: '/' {startreg();} REGEXPR '/' { $$ = $3; } ; @@ -297,26 +325,34 @@ rparen: ; simple_stmt: - print prarg '|' term { $$ = stat3($1, $2, (Node *) $3, $4); } - | print prarg APPEND term { $$ = stat3($1, $2, (Node *) $3, $4); } - | print prarg GT term { $$ = stat3($1, $2, (Node *) $3, $4); } + print prarg '|' term { + if (safe) SYNTAX("print | is unsafe"); + else $$ = stat3($1, $2, itonp($3), $4); } + | print prarg APPEND term { + if (safe) SYNTAX("print >> is unsafe"); + else $$ = stat3($1, $2, itonp($3), $4); } + | print prarg GT term { + if (safe) SYNTAX("print > is unsafe"); + else $$ = stat3($1, $2, itonp($3), $4); } | print prarg { $$ = stat3($1, $2, NIL, NIL); } | DELETE varname '[' patlist ']' { $$ = stat2(DELETE, makearr($2), $4); } - | DELETE varname { yyclearin; ERROR "you can only delete array[element]" SYNTAX; $$ = stat1(DELETE, $2); } + | DELETE varname { $$ = stat2(DELETE, makearr($2), 0); } | pattern { $$ = exptostat($1); } - | error { yyclearin; ERROR "illegal statement" SYNTAX; } + | error { yyclearin; SYNTAX("illegal statement"); } ; st: - nl | ';' opt_nl + nl + | ';' opt_nl ; stmt: - BREAK st { $$ = stat1(BREAK, NIL); } - | CLOSE pattern st { $$ = stat1(CLOSE, $2); } - | CONTINUE st { $$ = stat1(CONTINUE, NIL); } - | do stmt WHILE '(' pattern ')' st - { $$ = stat2(DO, $2, notnull($5)); } + BREAK st { if (!inloop) SYNTAX("break illegal outside of loops"); + $$ = stat1(BREAK, NIL); } + | CONTINUE st { if (!inloop) SYNTAX("continue illegal outside of loops"); + $$ = stat1(CONTINUE, NIL); } + | do {inloop++;} stmt {--inloop;} WHILE '(' pattern ')' st + { $$ = stat2(DO, $3, notnull($7)); } | EXIT pattern st { $$ = stat1(EXIT, $2); } | EXIT st { $$ = stat1(EXIT, NIL); } | for @@ -324,12 +360,15 @@ stmt: | if stmt { $$ = stat3(IF, $1, $2, NIL); } | lbrace stmtlist rbrace { $$ = $2; } | NEXT st { if (infunc) - ERROR "next is illegal inside a function" SYNTAX; + SYNTAX("next is illegal inside a function"); $$ = stat1(NEXT, NIL); } + | NEXTFILE st { if (infunc) + SYNTAX("nextfile is illegal inside a function"); + $$ = stat1(NEXTFILE, NIL); } | RETURN pattern st { $$ = stat1(RETURN, $2); } | RETURN st { $$ = stat1(RETURN, NIL); } | simple_stmt st - | while stmt { $$ = stat2(WHILE, $1, $2); } + | while {inloop++;} stmt { --inloop; $$ = stat2(WHILE, $1, $3); } | ';' opt_nl { $$ = 0; } ; @@ -343,31 +382,34 @@ subop: ; term: - term '+' term { $$ = op2(ADD, $1, $3); } + term '/' ASGNOP term { $$ = op2(DIVEQ, $1, $4); } + | term '+' term { $$ = op2(ADD, $1, $3); } | term '-' term { $$ = op2(MINUS, $1, $3); } | term '*' term { $$ = op2(MULT, $1, $3); } | term '/' term { $$ = op2(DIVIDE, $1, $3); } | term '%' term { $$ = op2(MOD, $1, $3); } | term POWER term { $$ = op2(POWER, $1, $3); } | '-' term %prec UMINUS { $$ = op1(UMINUS, $2); } - | '+' term %prec UMINUS { $$ = $2; } - | BLTIN '(' ')' { $$ = op2(BLTIN, (Node *) $1, rectonode()); } - | BLTIN '(' patlist ')' { $$ = op2(BLTIN, (Node *) $1, $3); } - | BLTIN { $$ = op2(BLTIN, (Node *) $1, rectonode()); } - | CALL '(' ')' { $$ = op2(CALL, valtonode($1,CVAR), NIL); } - | CALL '(' patlist ')' { $$ = op2(CALL, valtonode($1,CVAR), $3); } + | '+' term %prec UMINUS { $$ = op1(UPLUS, $2); } + | NOT term %prec UMINUS { $$ = op1(NOT, notnull($2)); } + | BLTIN '(' ')' { $$ = op2(BLTIN, itonp($1), rectonode()); } + | BLTIN '(' patlist ')' { $$ = op2(BLTIN, itonp($1), $3); } + | BLTIN { $$ = op2(BLTIN, itonp($1), rectonode()); } + | CALL '(' ')' { $$ = op2(CALL, celltonode($1,CVAR), NIL); } + | CALL '(' patlist ')' { $$ = op2(CALL, celltonode($1,CVAR), $3); } + | CLOSE term { $$ = op1(CLOSE, $2); } | DECR var { $$ = op1(PREDECR, $2); } | INCR var { $$ = op1(PREINCR, $2); } | var DECR { $$ = op1(POSTDECR, $1); } | var INCR { $$ = op1(POSTINCR, $1); } - | GETLINE var LT term { $$ = op3(GETLINE, $2, (Node *)$3, $4); } - | GETLINE LT term { $$ = op3(GETLINE, NIL, (Node *)$2, $3); } + | GETLINE var LT term { $$ = op3(GETLINE, $2, itonp($3), $4); } + | GETLINE LT term { $$ = op3(GETLINE, NIL, itonp($2), $3); } | GETLINE var { $$ = op3(GETLINE, $2, NIL, NIL); } | GETLINE { $$ = op3(GETLINE, NIL, NIL, NIL); } | INDEX '(' pattern comma pattern ')' { $$ = op2(INDEX, $3, $5); } | INDEX '(' pattern comma reg_expr ')' - { ERROR "index() doesn't permit regular expressions" SYNTAX; + { SYNTAX("index() doesn't permit regular expressions"); $$ = op2(INDEX, $3, (Node*)$5); } | '(' pattern ')' { $$ = $2; } | MATCHFCN '(' pattern comma reg_expr ')' @@ -377,7 +419,7 @@ term: $$ = op3(MATCHFCN, NIL, $3, (Node*)makedfa(strnode($5), 1)); else $$ = op3(MATCHFCN, (Node *)1, $3, $5); } - | NUMBER { $$ = valtonode($1, CCON); } + | NUMBER { $$ = celltonode($1, CCON); } | SPLIT '(' pattern comma varname comma pattern ')' /* string */ { $$ = op4(SPLIT, $3, makearr($5), $7, (Node*)STRING); } | SPLIT '(' pattern comma varname comma reg_expr ')' /* const /regexp/ */ @@ -385,7 +427,7 @@ term: | SPLIT '(' pattern comma varname ')' { $$ = op4(SPLIT, $3, makearr($5), NIL, (Node*)STRING); } /* default */ | SPRINTF '(' patlist ')' { $$ = op1($1, $3); } - | STRING { $$ = valtonode($1, CCON); } + | STRING { $$ = celltonode($1, CCON); } | subop '(' reg_expr comma pattern ')' { $$ = op4($1, NIL, (Node*)makedfa($3, 1), $5, rectonode()); } | subop '(' pattern comma pattern ')' @@ -410,20 +452,21 @@ term: var: varname | varname '[' patlist ']' { $$ = op2(ARRAY, makearr($1), $3); } - | FIELD { $$ = valtonode($1, CFLD); } - | IVAR { $$ = op1(INDIRECT, valtonode($1, CVAR)); } - | INDIRECT term { $$ = op1(INDIRECT, $2); } - ; + | IVAR { $$ = op1(INDIRECT, celltonode($1, CVAR)); } + | INDIRECT term { $$ = op1(INDIRECT, $2); } + ; varlist: /* nothing */ { arglist = $$ = 0; } - | VAR { arglist = $$ = valtonode($1,CVAR); } - | varlist comma VAR { arglist = $$ = linkum($1,valtonode($3,CVAR)); } + | VAR { arglist = $$ = celltonode($1,CVAR); } + | varlist comma VAR { + checkdup($1, $3); + arglist = $$ = linkum($1,celltonode($3,CVAR)); } ; varname: - VAR { $$ = valtonode($1, CVAR); } - | ARG { $$ = op1(ARG, (Node *) $1); } + VAR { $$ = celltonode($1, CVAR); } + | ARG { $$ = op1(ARG, itonp($1)); } | VARNF { $$ = op1(VARNF, (Node *) $1); } ; @@ -438,20 +481,20 @@ static void setfname(Cell *p) { if (isarr(p)) - ERROR "%s is an array, not a function", p->nval SYNTAX; - else if (isfunc(p)) - ERROR "you can't define function %s more than once", p->nval SYNTAX; + SYNTAX("%s is an array, not a function", p->nval); + else if (isfcn(p)) + SYNTAX("you can't define function %s more than once", p->nval); curfname = p->nval; + p->tval |= FCN; } - static int constnode(Node *p) { - return p->ntype == NVALUE && ((Cell *) (p->narg[0]))->csub == CCON; + return isvalue(p) && ((Cell *) (p->narg[0]))->csub == CCON; } -static uchar * +static char * strnode(Node *p) { return ((Cell *)(p->narg[0]))->sval; @@ -468,3 +511,15 @@ notnull(Node *n) return op2(NE, n, nullnode); } } + +void +checkdup(Node *vl, Cell *cp) /* check if name already in list */ +{ + char *s = cp->nval; + for (; vl; vl = vl->nnext) { + if (strcmp(s, ((Cell *)(vl->narg[0]))->nval) == 0) { + SYNTAX("duplicate argument %s", s); + break; + } + } +} diff --git a/usr/src/cmd/awk/awk.h b/usr/src/cmd/awk/awk.h index 987028fea2..dfbed45e9d 100644 --- a/usr/src/cmd/awk/awk.h +++ b/usr/src/cmd/awk/awk.h @@ -1,4 +1,28 @@ /* + * Copyright (C) Lucent Technologies 1997 + * All Rights Reserved + * + * Permission to use, copy, modify, and distribute this software and + * its documentation for any purpose and without fee is hereby + * granted, provided that the above copyright notice appear in all + * copies and that both that the copyright notice and this + * permission notice and warranty disclaimer appear in supporting + * documentation, and that the name Lucent Technologies or any of + * its entities not be used in advertising or publicity pertaining + * to distribution of the software without specific, written prior + * permission. + * + * LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, + * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. + * IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY + * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER + * IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, + * ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF + * THIS SOFTWARE. + */ + +/* * CDDL HEADER START * * The contents of this file are subject to the terms of the @@ -29,6 +53,7 @@ #ifndef AWK_H #define AWK_H +#include <assert.h> #include <sys/types.h> #include <ctype.h> #include <stdio.h> @@ -38,10 +63,15 @@ #include <limits.h> typedef double Awkfloat; -typedef unsigned char uchar; -#define xfree(a) { if ((a) != NULL) { free(a); a = NULL; } } +/* unsigned char is more trouble than it's worth */ + +typedef unsigned char uschar; +#define xfree(a) { if ((a) != NULL) { free((void *)(a)); (a) = NULL; } } + +/* guaranteed non-null for dprintf */ +#define NN(p) ((p) ? (p) : "(null)") #define DEBUG #ifdef DEBUG /* uses have to be doubly parenthesized */ @@ -50,63 +80,56 @@ typedef unsigned char uchar; #define dprintf(x) #endif -extern char errbuf[200]; -extern void error(int, char *); -#define ERROR (void) snprintf(errbuf, sizeof (errbuf), -/*CSTYLED*/ -#define FATAL ), error(1, errbuf) -/*CSTYLED*/ -#define WARNING ), error(0, errbuf) -/*CSTYLED*/ -#define SYNTAX ), yyerror(errbuf) -/*CSTYLED*/ -#define CONT ) - extern int compile_time; /* 1 if compiling, 0 if running */ +extern int safe; /* 0 => unsafe, 1 => safe */ #define FLD_INCR 64 #define LINE_INCR 256 +#define RECSIZE (8 * 1024) /* sets limit on records, fields, etc., etc. */ +extern size_t recsize; /* size of current record, orig RECSIZE */ /* ensure that there is extra 1 byte in the buffer */ #define expand_buf(p, n, r) \ if (*(n) == 0 || (r) >= (*(n) - 1)) r_expand_buf(p, n, r) -extern uchar **FS; -extern uchar **RS; -extern uchar **ORS; -extern uchar **OFS; -extern uchar **OFMT; +extern char **FS; +extern char **RS; +extern char **ORS; +extern char **OFS; +extern char **OFMT; extern Awkfloat *NR; extern Awkfloat *FNR; extern Awkfloat *NF; -extern uchar **FILENAME; -extern uchar **SUBSEP; +extern char **FILENAME; +extern char **SUBSEP; extern Awkfloat *RSTART; extern Awkfloat *RLENGTH; -extern uchar *record; -extern size_t record_size; -extern int errorflag; +extern char *record; /* points to $0 */ +extern size_t recsize; +extern int errorflag; /* 1 if error has occurred */ extern int donefld; /* 1 if record broken into fields */ extern int donerec; /* 1 if record is valid (no fld has changed */ -extern uchar *patbeg; /* beginning of pattern matched */ -extern int patlen; /* length. set in b.c */ +extern char *patbeg; /* beginning of pattern matched */ +extern int patlen; /* length of pattern matched. set in b.c */ /* Cell: all information about a variable or constant */ typedef struct Cell { - uchar ctype; /* OCELL, OBOOL, OJUMP, etc. */ - uchar csub; /* CCON, CTEMP, CFLD, etc. */ - uchar *nval; /* name, for variables only */ - uchar *sval; /* string value */ + uschar ctype; /* OCELL, OBOOL, OJUMP, etc. */ + uschar csub; /* CCON, CTEMP, CFLD, etc. */ + char *nval; /* name, for variables only */ + char *sval; /* string value */ Awkfloat fval; /* value as number */ - unsigned tval; - /* type info: STR|NUM|ARR|FCN|FLD|CON|DONTFREE */ + int tval; + /* type info: STR|NUM|ARR|FCN|FLD|CON|DONTFREE|CONVC|CONVO */ + char *fmt; + /* CONVFMT/OFMT value used to convert from number */ struct Cell *cnext; /* ptr to next if chained */ } Cell; -typedef struct { /* symbol table array */ +typedef struct Array { /* symbol table array */ int nelem; /* elements in table right now */ int size; /* size of tab */ Cell **tab; /* hash table pointers */ @@ -114,15 +137,23 @@ typedef struct { /* symbol table array */ #define NSYMTAB 50 /* initial size of a symbol table */ extern Array *symtab, *makesymtab(int); -extern Cell *setsymtab(uchar *, uchar *, Awkfloat, unsigned int, Array *); -extern Cell *lookup(uchar *, Array *); +extern Cell *setsymtab(const char *, const char *, Awkfloat, + unsigned int, Array *); +extern Cell *lookup(const char *, Array *); extern Cell *recloc; /* location of input record */ extern Cell *nrloc; /* NR */ extern Cell *fnrloc; /* FNR */ +extern Cell *fsloc; /* FS */ extern Cell *nfloc; /* NF */ +extern Cell *ofsloc; /* OFS */ +extern Cell *orsloc; /* ORS */ +extern Cell *rsloc; /* RS */ +extern Cell *rtloc; /* RT */ extern Cell *rstartloc; /* RSTART */ extern Cell *rlengthloc; /* RLENGTH */ +extern Cell *subseploc; /* SUBSEP */ +extern Cell *symtabloc; /* SYMTAB */ /* Cell.tval values: */ #define NUM 01 /* number value is valid */ @@ -133,17 +164,18 @@ extern Cell *rlengthloc; /* RLENGTH */ #define FCN 040 /* this is a function name */ #define FLD 0100 /* this is a field $1, $2, ... */ #define REC 0200 /* this is $0 */ +#define CONVC 0400 /* string was converted from number via CONVFMT */ +#define CONVO 01000 /* string was converted from number via OFMT */ -#define freeable(p) (!((p)->tval & DONTFREE)) -extern Awkfloat setfval(Cell *, Awkfloat), getfval(Cell *), r_getfval(Cell *); -extern uchar *setsval(Cell *, uchar *), *getsval(Cell *), *r_getsval(Cell *); -extern uchar *tostring(uchar *), *tokname(int), *qstring(uchar *, int); - -#define getfval(p) \ - (((p)->tval & (ARR|FLD|REC|NUM)) == NUM ? (p)->fval : r_getfval(p)) -#define getsval(p) \ - (((p)->tval & (ARR|FLD|REC|STR)) == STR ? (p)->sval : r_getsval(p)) +extern Awkfloat setfval(Cell *, Awkfloat); +extern Awkfloat getfval(Cell *); +extern char *setsval(Cell *, const char *); +extern char *getsval(Cell *); +extern char *getpssval(Cell *); /* for print */ +extern char *tostring(const char *); +extern char *tokname(int); +extern char *qstring(const char *, int); /* function types */ #define FLENGTH 1 @@ -159,15 +191,16 @@ extern uchar *tostring(uchar *), *tokname(int), *qstring(uchar *, int); #define FATAN 11 #define FTOUPPER 12 #define FTOLOWER 13 +#define FFLUSH 14 /* Node: parse tree is made of nodes, with Cell's at bottom */ typedef struct Node { int ntype; struct Node *nnext; - off_t lineno; + off_t lineno; int nobj; - struct Node *narg[1]; + struct Node *narg[1]; /* variable: actual size set by calling malloc */ } Node; @@ -190,6 +223,7 @@ extern Node *nullnode; #define CNAME 3 #define CVAR 2 #define CFLD 1 +#define CUNK 0 /* bool subtypes */ #define BTRUE 11 @@ -201,6 +235,7 @@ extern Node *nullnode; #define JBREAK 23 #define JCONT 24 #define JRET 25 +#define JNEXTFILE 26 /* node types */ #define NVALUE 1 @@ -210,7 +245,7 @@ extern Node *nullnode; extern Cell *(*proctab[])(Node **, int); extern Cell *nullproc(Node **, int); -extern int pairstack[], paircnt; +extern int *pairstack, paircnt; extern Node *stat1(int, Node *), *stat2(int, Node *, Node *); extern Node *stat3(int, Node *, Node *, Node *); @@ -219,7 +254,7 @@ extern Node *pa2stat(Node *, Node *, Node *); extern Node *op1(int, Node *), *op2(int, Node *, Node *); extern Node *op3(int, Node *, Node *, Node *); extern Node *op4(int, Node *, Node *, Node *, Node *); -extern Node *linkum(Node *, Node *), *valtonode(Cell *, int); +extern Node *linkum(Node *, Node *), *celltonode(Cell *, int); extern Node *rectonode(void), *exptostat(Node *); extern Node *makearr(Node *); @@ -231,89 +266,124 @@ extern Node *makearr(Node *); #define isexit(n) ((n)->csub == JEXIT) #define isbreak(n) ((n)->csub == JBREAK) #define iscont(n) ((n)->csub == JCONT) -#define isnext(n) ((n)->csub == JNEXT) +#define isnext(n) ((n)->csub == JNEXT || (n)->csub == JNEXTFILE) #define isret(n) ((n)->csub == JRET) +#define isrec(n) ((n)->tval & REC) +#define isfld(n) ((n)->tval & FLD) #define isstr(n) ((n)->tval & STR) #define isnum(n) ((n)->tval & NUM) #define isarr(n) ((n)->tval & ARR) -#define isfunc(n) ((n)->tval & FCN) +#define isfcn(n) ((n)->tval & FCN) #define istrue(n) ((n)->csub == BTRUE) #define istemp(n) ((n)->csub == CTEMP) +#define freeable(p) (((p)->tval & (STR|DONTFREE)) == STR) + +/* structures used by regular expression matching machinery, mostly b.c: */ -#define NCHARS (256+1) +/* 256 handles 8-bit chars; 128 does 7-bit */ +/* watch out in match(), etc. */ +#define NCHARS (256+3) #define NSTATES 32 typedef struct rrow { - int ltype; - int lval; + long ltype; /* long avoids pointer warnings on 64-bit */ + union { + int i; + Node *np; + uschar *up; + } lval; /* because Al stores a pointer in it! */ int *lfollow; } rrow; typedef struct fa { - uchar *restr; + uschar gototab[NSTATES][NCHARS]; + uschar out[NSTATES]; + uschar *restr; + int *posns[NSTATES]; int anchor; int use; - uchar gototab[NSTATES][NCHARS]; - int *posns[NSTATES]; - uchar out[NSTATES]; int initstat; int curstat; int accept; int reset; + /* re is variable: actual size set by calling malloc */ struct rrow re[1]; } fa; +/* lex.c */ +extern int yylex(void); +extern void startreg(void); +extern int input(void); +extern void unput(int); +extern void unputstr(const char *); +extern int yylook(void); +extern int yyback(int *, int); +extern int yyinput(void); + +/* parse.c */ +extern void defn(Cell *, Node *, Node *); +extern int ptoi(void *); +extern Node *itonp(int); +extern int isarg(const char *); + /* b.c */ -extern fa *makedfa(uchar *, int); -extern int nematch(fa *, uchar *); -extern int match(fa *, uchar *); -extern int pmatch(fa *, uchar *); +extern fa *makedfa(const char *, int); +extern int nematch(fa *, const char *); +extern int match(fa *, const char *); +extern int pmatch(fa *, const char *); /* lib.c */ -extern int isclvar(uchar *); -extern int is_number(uchar *); -extern void setclvar(uchar *); -extern int readrec(uchar **, size_t *, FILE *); + +extern void SYNTAX(const char *, ...); +extern void FATAL(const char *, ...) __attribute__((__noreturn__)); +extern void WARNING(const char *, ...); +extern void error(void); +extern void nextfile(void); +extern void savefs(void); + +extern int isclvar(const char *); +extern int is_number(const char *); +extern void setclvar(char *); +extern int readrec(char **, size_t *, FILE *); extern void bracecheck(void); +extern void recinit(unsigned int n); extern void syminit(void); -extern void yyerror(char *); +extern void yyerror(const char *); extern void fldbld(void); extern void recbld(void); -extern int getrec(uchar **, size_t *); +extern int getrec(char **, size_t *, int); extern Cell *fieldadr(int); extern void newfld(int); -extern Cell *getfld(int); extern int fldidx(Cell *); -extern double errcheck(double, char *); +extern double errcheck(double, const char *); extern void fpecatch(int); -extern void init_buf(uchar **, size_t *, size_t); -extern void adjust_buf(uchar **, size_t); -extern void r_expand_buf(uchar **, size_t *, size_t); - -extern int donefld; -extern int donerec; -extern uchar *record; -extern size_t record_size; +extern void r_expand_buf(char **, size_t *, size_t); +extern void makefields(int, int); +extern void growfldtab(int n); +extern void setlastfld(int n); /* main.c */ extern int dbg; -extern uchar *cmdname; -extern uchar *lexprog; +extern char *lexprog; extern int compile_time; -extern char radixpoint; +extern char *cursource(void); +extern int pgetc(void); /* tran.c */ extern void syminit(void); -extern void arginit(int, uchar **); -extern void envinit(uchar **); +extern void arginit(int, char **); +extern void envinit(char **); extern void freesymtab(Cell *); -extern void freeelem(Cell *, uchar *); -extern void funnyvar(Cell *, char *); -extern int hash(uchar *, int); +extern void freeelem(Cell *, const char *); +extern void funnyvar(Cell *, const char *); +extern int hash(const char *, int); extern Awkfloat *ARGC; /* run.c */ -extern void run(Node *); +extern void run(Node *); +extern const char *filename(FILE *); +extern int adjbuf(char **pb, size_t *sz, size_t min, size_t q, + char **pbp, const char *what); extern int paircnt; extern Node *winner; @@ -336,7 +406,7 @@ extern Cell *substr(Node **, int); extern Cell *sub(Node **, int); extern Cell *gsub(Node **, int); extern Cell *sindex(Node **, int); -extern Cell *a_sprintf(Node **, int); +extern Cell *awksprintf(Node **, int); extern Cell *arith(Node **, int); extern Cell *incrdecr(Node **, int); extern Cell *cat(Node **, int); @@ -344,10 +414,10 @@ extern Cell *pastat(Node **, int); extern Cell *dopa2(Node **, int); extern Cell *matchop(Node **, int); extern Cell *intest(Node **, int); -extern Cell *aprintf(Node **, int); -extern Cell *print(Node **, int); +extern Cell *awkprintf(Node **, int); +extern Cell *printstat(Node **, int); extern Cell *closefile(Node **, int); -extern Cell *delete(Node **, int); +extern Cell *awkdelete(Node **, int); extern Cell *split(Node **, int); extern Cell *assign(Node **, int); extern Cell *condexpr(Node **, int); @@ -361,6 +431,6 @@ extern Cell *bltin(Node **, int); extern Cell *call(Node **, int); extern Cell *arg(Node **, int); extern Cell *getnf(Node **, int); -extern Cell *getaline(Node **, int); +extern Cell *awkgetline(Node **, int); #endif /* AWK_H */ diff --git a/usr/src/cmd/awk/awk.lx.l b/usr/src/cmd/awk/awk.lx.l deleted file mode 100644 index a7e8185832..0000000000 --- a/usr/src/cmd/awk/awk.lx.l +++ /dev/null @@ -1,306 +0,0 @@ -%{ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ -/* All Rights Reserved */ -%} - -%{ -#pragma ident "%Z%%M% %I% %E% SMI" -%} - -%Start A str sc reg comment - -%{ - -#include <sys/types.h> -#include "awk.h" -#include "y.tab.h" - -#undef input /* defeat lex */ -#undef unput - -static void unput(int); -static void unputstr(char *); - -extern YYSTYPE yylval; -extern int infunc; - -off_t lineno = 1; -int bracecnt = 0; -int brackcnt = 0; -int parencnt = 0; -#define DEBUG -#ifdef DEBUG -# define RET(x) {if(dbg)printf("lex %s [%s]\n", tokname(x), yytext); return(x); } -#else -# define RET(x) return(x) -#endif - -/* - * The standards (SUSV2) requires that Record size be atleast LINE_MAX. - * LINE_MAX is a standard variable defined in limits.h. - * Though nawk is not standards compliant, we let RECSIZE - * grow with LINE_MAX instead of the magic number 1024. - */ -#define CBUFLEN (3 * LINE_MAX) - -#define CADD cbuf[clen++] = yytext[0]; \ - if (clen >= CBUFLEN-1) { \ - ERROR "string/reg expr %.10s... too long", cbuf SYNTAX; \ - BEGIN A; \ - } - -static uchar cbuf[CBUFLEN]; -static uchar *s; -static int clen, cflag; -%} - -A [a-zA-Z_] -B [a-zA-Z0-9_] -D [0-9] -O [0-7] -H [0-9a-fA-F] -WS [ \t] - -%% - switch (yybgin-yysvec-1) { /* witchcraft */ - case 0: - BEGIN A; - break; - case sc: - BEGIN A; - RET('}'); - } - -<A>\n { lineno++; RET(NL); } -<A>#.* { ; } /* strip comments */ -<A>{WS}+ { ; } -<A>; { RET(';'); } - -<A>"\\"\n { lineno++; } -<A>BEGIN { RET(XBEGIN); } -<A>END { RET(XEND); } -<A>func(tion)? { if (infunc) ERROR "illegal nested function" SYNTAX; RET(FUNC); } -<A>return { if (!infunc) ERROR "return not in function" SYNTAX; RET(RETURN); } -<A>"&&" { RET(AND); } -<A>"||" { RET(BOR); } -<A>"!" { RET(NOT); } -<A>"!=" { yylval.i = NE; RET(NE); } -<A>"~" { yylval.i = MATCH; RET(MATCHOP); } -<A>"!~" { yylval.i = NOTMATCH; RET(MATCHOP); } -<A>"<" { yylval.i = LT; RET(LT); } -<A>"<=" { yylval.i = LE; RET(LE); } -<A>"==" { yylval.i = EQ; RET(EQ); } -<A>">=" { yylval.i = GE; RET(GE); } -<A>">" { yylval.i = GT; RET(GT); } -<A>">>" { yylval.i = APPEND; RET(APPEND); } -<A>"++" { yylval.i = INCR; RET(INCR); } -<A>"--" { yylval.i = DECR; RET(DECR); } -<A>"+=" { yylval.i = ADDEQ; RET(ASGNOP); } -<A>"-=" { yylval.i = SUBEQ; RET(ASGNOP); } -<A>"*=" { yylval.i = MULTEQ; RET(ASGNOP); } -<A>"/=" { yylval.i = DIVEQ; RET(ASGNOP); } -<A>"%=" { yylval.i = MODEQ; RET(ASGNOP); } -<A>"^=" { yylval.i = POWEQ; RET(ASGNOP); } -<A>"**=" { yylval.i = POWEQ; RET(ASGNOP); } -<A>"=" { yylval.i = ASSIGN; RET(ASGNOP); } -<A>"**" { RET(POWER); } -<A>"^" { RET(POWER); } - -<A>"$"{D}+ { yylval.cp = fieldadr(atoi(yytext+1)); RET(FIELD); } -<A>"$NF" { unputstr("(NF)"); return(INDIRECT); } -<A>"$"{A}{B}* { int c, n; - c = input(); unput(c); - if (c == '(' || c == '[' || infunc && (n=isarg(yytext+1)) >= 0) { - unputstr(yytext+1); - return(INDIRECT); - } else { - yylval.cp = setsymtab((uchar *)yytext+1, - (uchar *)"",0.0,STR|NUM,symtab); - RET(IVAR); - } - } -<A>"$" { RET(INDIRECT); } -<A>NF { yylval.cp = setsymtab((uchar *)yytext, (uchar *)"", 0.0, NUM, symtab); RET(VARNF); } - -<A>({D}+("."?){D}*|"."{D}+)((e|E)("+"|-)?{D}+)? { - yylval.cp = setsymtab((uchar *)yytext, tostring((uchar *)yytext), atof(yytext), CON|NUM, symtab); - RET(NUMBER); } - -<A>while { RET(WHILE); } -<A>for { RET(FOR); } -<A>do { RET(DO); } -<A>if { RET(IF); } -<A>else { RET(ELSE); } -<A>next { RET(NEXT); } -<A>exit { RET(EXIT); } -<A>break { RET(BREAK); } -<A>continue { RET(CONTINUE); } -<A>print { yylval.i = PRINT; RET(PRINT); } -<A>printf { yylval.i = PRINTF; RET(PRINTF); } -<A>sprintf { yylval.i = SPRINTF; RET(SPRINTF); } -<A>split { yylval.i = SPLIT; RET(SPLIT); } -<A>substr { RET(SUBSTR); } -<A>sub { yylval.i = SUB; RET(SUB); } -<A>gsub { yylval.i = GSUB; RET(GSUB); } -<A>index { RET(INDEX); } -<A>match { RET(MATCHFCN); } -<A>in { RET(IN); } -<A>getline { RET(GETLINE); } -<A>close { RET(CLOSE); } -<A>delete { RET(DELETE); } -<A>length { yylval.i = FLENGTH; RET(BLTIN); } -<A>log { yylval.i = FLOG; RET(BLTIN); } -<A>int { yylval.i = FINT; RET(BLTIN); } -<A>exp { yylval.i = FEXP; RET(BLTIN); } -<A>sqrt { yylval.i = FSQRT; RET(BLTIN); } -<A>sin { yylval.i = FSIN; RET(BLTIN); } -<A>cos { yylval.i = FCOS; RET(BLTIN); } -<A>atan2 { yylval.i = FATAN; RET(BLTIN); } -<A>system { yylval.i = FSYSTEM; RET(BLTIN); } -<A>rand { yylval.i = FRAND; RET(BLTIN); } -<A>srand { yylval.i = FSRAND; RET(BLTIN); } -<A>toupper { yylval.i = FTOUPPER; RET(BLTIN); } -<A>tolower { yylval.i = FTOLOWER; RET(BLTIN); } - -<A>{A}{B}* { int n, c; - c = input(); unput(c); /* look for '(' */ - if (c != '(' && infunc && (n=isarg(yytext)) >= 0) { - yylval.i = n; - RET(ARG); - } else { - yylval.cp = setsymtab((uchar *)yytext, - (uchar *)"",0.0,STR|NUM,symtab); - if (c == '(') { - RET(CALL); - } else { - RET(VAR); - } - } - } -<A>\" { BEGIN str; clen = 0; } - -<A>"}" { if (--bracecnt < 0) ERROR "extra }" SYNTAX; BEGIN sc; RET(';'); } -<A>"]" { if (--brackcnt < 0) ERROR "extra ]" SYNTAX; RET(']'); } -<A>")" { if (--parencnt < 0) ERROR "extra )" SYNTAX; RET(')'); } - -<A>. { if (yytext[0] == '{') bracecnt++; - else if (yytext[0] == '[') brackcnt++; - else if (yytext[0] == '(') parencnt++; - RET(yylval.i = yytext[0]); /* everything else */ } - -<reg>\\. { cbuf[clen++] = '\\'; cbuf[clen++] = yytext[1]; } -<reg>\n { ERROR "newline in regular expression %.10s...", cbuf SYNTAX; lineno++; BEGIN A; } -<reg>"/" { BEGIN A; - cbuf[clen] = 0; - yylval.s = tostring(cbuf); - unput('/'); - RET(REGEXPR); } -<reg>. { CADD; } - -<str>\" { BEGIN A; - cbuf[clen] = 0; s = tostring(cbuf); - cbuf[clen] = ' '; cbuf[++clen] = 0; - yylval.cp = setsymtab(cbuf, s, 0.0, CON|STR, symtab); - RET(STRING); } -<str>\n { ERROR "newline in string %.10s...", cbuf SYNTAX; lineno++; BEGIN A; } -<str>"\\\"" { cbuf[clen++] = '"'; } -<str>"\\"n { cbuf[clen++] = '\n'; } -<str>"\\"t { cbuf[clen++] = '\t'; } -<str>"\\"f { cbuf[clen++] = '\f'; } -<str>"\\"r { cbuf[clen++] = '\r'; } -<str>"\\"b { cbuf[clen++] = '\b'; } -<str>"\\"v { cbuf[clen++] = '\v'; } /* these ANSIisms may not be known by */ -<str>"\\"a { cbuf[clen++] = '\007'; } /* your compiler. hence 007 for bell */ -<str>"\\\\" { cbuf[clen++] = '\\'; } -<str>"\\"({O}{O}{O}|{O}{O}|{O}) { int n; - sscanf(yytext+1, "%o", &n); cbuf[clen++] = n; } -<str>"\\"x({H}+) { int n; /* ANSI permits any number! */ - sscanf(yytext+2, "%x", &n); cbuf[clen++] = n; } -<str>"\\". { cbuf[clen++] = yytext[1]; } -<str>. { CADD; } - -%% - -void -startreg() -{ - BEGIN reg; - clen = 0; -} - -/* input() and unput() are transcriptions of the standard lex - macros for input and output with additions for error message - printing. God help us all if someone changes how lex works. -*/ - -uchar ebuf[300]; -uchar *ep = ebuf; - -int -input(void) -{ - register int c; - extern uchar *lexprog; - - if (yysptr > yysbuf) - c = U(*--yysptr); - else if (lexprog != NULL) /* awk '...' */ - c = *lexprog++; - else /* awk -f ... */ - c = pgetc(); - if (c == '\n') - yylineno++; - else if (c == EOF) - c = 0; - if (ep >= ebuf + sizeof ebuf) - ep = ebuf; - return *ep++ = c; -} - -static void -unput(int c) -{ - yytchar = c; - if (yytchar == '\n') - yylineno--; - *yysptr++ = yytchar; - if (--ep < ebuf) - ep = ebuf + sizeof(ebuf) - 1; -} - - -static void -unputstr(char *s) -{ - int i; - - for (i = strlen(s)-1; i >= 0; i--) - unput(s[i]); -} diff --git a/usr/src/cmd/awk/b.c b/usr/src/cmd/awk/b.c index 9caee4e9d3..adca0cb633 100644 --- a/usr/src/cmd/awk/b.c +++ b/usr/src/cmd/awk/b.c @@ -1,4 +1,28 @@ /* + * Copyright (C) Lucent Technologies 1997 + * All Rights Reserved + * + * Permission to use, copy, modify, and distribute this software and + * its documentation for any purpose and without fee is hereby + * granted, provided that the above copyright notice appear in all + * copies and that both that the copyright notice and this + * permission notice and warranty disclaimer appear in supporting + * documentation, and that the name Lucent Technologies or any of + * its entities not be used in advertising or publicity pertaining + * to distribution of the software without specific, written prior + * permission. + * + * LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, + * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. + * IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY + * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER + * IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, + * ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF + * THIS SOFTWARE. + */ + +/* * CDDL HEADER START * * The contents of this file are subject to the terms of the @@ -28,6 +52,8 @@ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ /* All Rights Reserved */ +/* lasciate ogne speranza, voi ch'intrate. */ + #define DEBUG #include "awk.h" @@ -37,74 +63,86 @@ /* NCHARS is 2**n */ #define MAXLIN (3 * LINE_MAX) -#define type(v) (v)->nobj +#define type(v) (v)->nobj /* badly overloaded here */ +#define info(v) (v)->ntype /* badly overloaded here */ #define left(v) (v)->narg[0] #define right(v) (v)->narg[1] #define parent(v) (v)->nnext #define LEAF case CCL: case NCCL: case CHAR: case DOT: case FINAL: case ALL: +#define ELEAF case EMPTYRE: /* empty string in regexp */ #define UNARY case STAR: case PLUS: case QUEST: /* * encoding in tree Nodes: - * leaf (CCL, NCCL, CHAR, DOT, FINAL, ALL): + * leaf (CCL, NCCL, CHAR, DOT, FINAL, ALL, EMPTYRE): * left is index, right contains value or pointer to value * unary (STAR, PLUS, QUEST): left is child, right is null * binary (CAT, OR): left and right are children * parent contains pointer to parent */ -int setvec[MAXLIN]; -int tmpset[MAXLIN]; -Node *point[MAXLIN]; +int *setvec; +int *tmpset; +int maxsetvec = 0; int rtok; /* next token in current re */ int rlxval; -uchar *rlxstr; -uchar *prestr; /* current position in current re */ -uchar *lastre; /* origin of last re */ +static uschar *rlxstr; +static uschar *prestr; /* current position in current re */ +static uschar *lastre; /* origin of last re */ static int setcnt; static int poscnt; -uchar *patbeg; +char *patbeg; int patlen; #define NFA 20 /* cache this many dynamic fa's */ fa *fatab[NFA]; int nfatab = 0; /* entries in fatab */ -static fa *mkdfa(uchar *, int); +static fa *mkdfa(const char *, int); static int makeinit(fa *, int); static void penter(Node *); static void freetr(Node *); -static void overflo(char *); +static void overflo(const char *); +static void growvec(const char *); static void cfoll(fa *, Node *); static void follow(Node *); -static Node *reparse(uchar *); +static Node *reparse(const char *); static int relex(void); static void freefa(fa *); static int cgoto(fa *, int, int); fa * -makedfa(uchar *s, int anchor) /* returns dfa for reg expr s */ +makedfa(const char *s, int anchor) /* returns dfa for reg expr s */ { int i, use, nuse; fa *pfa; + static int now = 1; + + if (setvec == NULL) { /* first time through any RE */ + maxsetvec = MAXLIN; + setvec = (int *)malloc(maxsetvec * sizeof (int)); + tmpset = (int *)malloc(maxsetvec * sizeof (int)); + if (setvec == NULL || tmpset == NULL) + overflo("out of space initializing makedfa"); + } if (compile_time) /* a constant for sure */ return (mkdfa(s, anchor)); for (i = 0; i < nfatab; i++) { /* is it there already? */ if (fatab[i]->anchor == anchor && - strcmp((char *)fatab[i]->restr, (char *)s) == 0) { - fatab[i]->use++; + strcmp((const char *)fatab[i]->restr, s) == 0) { + fatab[i]->use = now++; return (fatab[i]); } } pfa = mkdfa(s, anchor); if (nfatab < NFA) { /* room for another */ fatab[nfatab] = pfa; - fatab[nfatab]->use = 1; + fatab[nfatab]->use = now++; nfatab++; return (pfa); } @@ -117,13 +155,16 @@ makedfa(uchar *s, int anchor) /* returns dfa for reg expr s */ } freefa(fatab[nuse]); fatab[nuse] = pfa; - pfa->use = 1; + pfa->use = now++; return (pfa); } +/* + * does the real work of making a dfa + * anchor = 1 for anchored matches, else 0 + */ fa * -mkdfa(uchar *s, int anchor) /* does the real work of making a dfa */ - /* anchor = 1 for anchored matches, else 0 */ +mkdfa(const char *s, int anchor) { Node *p, *p1; fa *f; @@ -137,7 +178,7 @@ mkdfa(uchar *s, int anchor) /* does the real work of making a dfa */ poscnt = 0; penter(p1); /* enter parent pointers and leaf indices */ if ((f = (fa *)calloc(1, sizeof (fa) + poscnt * sizeof (rrow))) == NULL) - overflo("no room for fa"); + overflo("out of space for fa"); /* penter has computed number of positions in re */ f->accept = poscnt-1; cfoll(f, p1); /* set up follow sets */ @@ -151,14 +192,14 @@ mkdfa(uchar *s, int anchor) /* does the real work of making a dfa */ *f->posns[1] = 0; f->initstat = makeinit(f, anchor); f->anchor = anchor; - f->restr = tostring(s); + f->restr = (uschar *)tostring(s); return (f); } static int makeinit(fa *f, int anchor) { - register int i, k; + int i, k; f->curstat = 2; f->out[2] = 0; @@ -192,9 +233,10 @@ void penter(Node *p) /* set up parent pointers and leaf indices */ { switch (type(p)) { + ELEAF LEAF - left(p) = (Node *) poscnt; - point[poscnt++] = p; + info(p) = poscnt; + poscnt++; break; UNARY penter(left(p)); @@ -207,8 +249,8 @@ penter(Node *p) /* set up parent pointers and leaf indices */ parent(left(p)) = p; parent(right(p)) = p; break; - default: - ERROR "unknown type %d in penter", type(p) FATAL; + default: /* can't happen */ + FATAL("can't happen: unknown type %d in penter", type(p)); break; } } @@ -217,6 +259,7 @@ static void freetr(Node *p) /* free parse tree */ { switch (type(p)) { + ELEAF LEAF xfree(p); break; @@ -230,92 +273,168 @@ freetr(Node *p) /* free parse tree */ freetr(right(p)); xfree(p); break; - default: - ERROR "unknown type %d in freetr", type(p) FATAL; + default: /* can't happen */ + FATAL("can't happen: unknown type %d in freetr", type(p)); break; } } -uchar * -cclenter(uchar *p) +static void +growvec(const char *msg) +{ + maxsetvec *= 4; + setvec = (int *)realloc(setvec, maxsetvec * sizeof (int)); + tmpset = (int *)realloc(tmpset, maxsetvec * sizeof (int)); + if (setvec == NULL || tmpset == NULL) + overflo(msg); +} + +/* + * in the parsing of regular expressions, metacharacters like . have + * to be seen literally; \056 is not a metacharacter. + */ + +/* + * find and eval hex string at pp, return new p; only pick up one 8-bit + * byte (2 chars). + */ +int +hexstr(uschar **pp) +{ + uschar *p; + int n = 0; + int i; + + for (i = 0, p = (uschar *)*pp; i < 2 && isxdigit(*p); i++, p++) { + if (isdigit(*p)) + n = 16 * n + *p - '0'; + else if (*p >= 'a' && *p <= 'f') + n = 16 * n + *p - 'a' + 10; + else if (*p >= 'A' && *p <= 'F') + n = 16 * n + *p - 'A' + 10; + } + *pp = (uschar *)p; + return (n); +} + +#define isoctdigit(c) ((c) >= '0' && (c) <= '7') + +/* pick up next thing after a \\ and increment *pp */ +int +quoted(uschar **pp) { - register int i, c; - uchar *op, *chars, *ret; - size_t bsize; + uschar *p = *pp; + int c; + + if ((c = *p++) == 't') + c = '\t'; + else if (c == 'n') + c = '\n'; + else if (c == 'f') + c = '\f'; + else if (c == 'r') + c = '\r'; + else if (c == 'b') + c = '\b'; + else if (c == '\\') + c = '\\'; + else if (c == 'x') { /* hexadecimal goo follows */ + c = hexstr(&p); /* this adds a null if number is invalid */ + } else if (isoctdigit(c)) { /* \d \dd \ddd */ + int n = c - '0'; + if (isoctdigit(*p)) { + n = 8 * n + *p++ - '0'; + if (isoctdigit(*p)) + n = 8 * n + *p++ - '0'; + } + c = n; + } /* else */ + /* c = c; */ + *pp = p; + return (c); +} + +char * +cclenter(const char *argp) /* add a character class */ +{ + int i, c, c2; + uschar *p = (uschar *)argp; + uschar *op, *bp; + static uschar *buf = NULL; + static size_t bufsz = 100; - init_buf(&chars, &bsize, LINE_INCR); op = p; - i = 0; - while ((c = *p++) != 0) { + if (buf == NULL && (buf = (uschar *)malloc(bufsz)) == NULL) + FATAL("out of space for character class [%.10s...] 1", p); + bp = buf; + for (i = 0; (c = *p++) != 0; ) { if (c == '\\') { - if ((c = *p++) == 't') - c = '\t'; - else if (c == 'n') - c = '\n'; - else if (c == 'f') - c = '\f'; - else if (c == 'r') - c = '\r'; - else if (c == 'b') - c = '\b'; - else if (c == '\\') - c = '\\'; - else if (isdigit(c)) { - int n = c - '0'; - if (isdigit(*p)) { - n = 8 * n + *p++ - '0'; - if (isdigit(*p)) - n = 8 * n + *p++ - '0'; - } - c = n; - } /* else */ - /* c = c; */ - } else if (c == '-' && i > 0 && chars[i-1] != 0) { + c = quoted(&p); + } else if (c == '-' && i > 0 && bp[-1] != 0) { if (*p != 0) { - c = chars[i-1]; - while ((uchar)c < *p) { /* fails if *p is \\ */ - expand_buf(&chars, &bsize, i); - chars[i++] = ++c; + c = bp[-1]; + c2 = *p++; + if (c2 == '\\') + c2 = quoted(&p); + if (c > c2) { /* empty; ignore */ + bp--; + i--; + continue; + } + while (c < c2) { + if (!adjbuf((char **)&buf, &bufsz, + bp-buf+2, 100, (char **)&bp, + "cclenter1")) { + FATAL( + "out of space for character class [%.10s...] 2", p); + } + *bp++ = ++c; + i++; } - p++; continue; } } - expand_buf(&chars, &bsize, i); - chars[i++] = c; + if (!adjbuf((char **)&buf, &bufsz, bp-buf+2, 100, (char **)&bp, + "cclenter2")) + FATAL( + "out of space for character class [%.10s...] 3", p); + *bp++ = c; + i++; } - chars[i++] = '\0'; - dprintf(("cclenter: in = |%s|, out = |%s|\n", op, chars)); + *bp = '\0'; + dprintf(("cclenter: in = |%s|, out = |%s|\n", op, buf)); xfree(op); - ret = tostring(chars); - free(chars); - return (ret); + return ((char *)tostring((char *)buf)); } static void -overflo(char *s) +overflo(const char *s) { - ERROR "regular expression too big: %s", gettext((char *)s) FATAL; + FATAL("regular expression too big: %.30s...", gettext((char *)s)); } /* enter follow set of each leaf of vertex v into lfollow[leaf] */ static void cfoll(fa *f, Node *v) { - register int i; - register int *p; + int i; + int *p; switch (type(v)) { + ELEAF LEAF - f->re[(int)left(v)].ltype = type(v); - f->re[(int)left(v)].lval = (int)right(v); + f->re[info(v)].ltype = type(v); + f->re[info(v)].lval.np = right(v); + while (f->accept >= maxsetvec) { /* guessing here! */ + growvec("out of space in cfoll()"); + } for (i = 0; i <= f->accept; i++) setvec[i] = 0; setcnt = 0; follow(v); /* computes setvec and setcnt */ if ((p = (int *)calloc(1, (setcnt+1) * sizeof (int))) == NULL) - overflo("follow set overflow"); - f->re[(int)left(v)].lfollow = p; + overflo("out of space building follow set"); + f->re[info(v)].lfollow = p; *p = setcnt; for (i = f->accept; i >= 0; i--) { if (setvec[i] == 1) @@ -330,8 +449,8 @@ cfoll(fa *f, Node *v) cfoll(f, left(v)); cfoll(f, right(v)); break; - default: - ERROR "unknown type %d in cfoll", type(v) FATAL; + default: /* can't happen */ + FATAL("can't happen: unknown type %d in cfoll", type(v)); } } @@ -342,15 +461,25 @@ cfoll(fa *f, Node *v) static int first(Node *p) { - register int b; + int b, lp; switch (type(p)) { + ELEAF LEAF - if (setvec[(int)left(p)] != 1) { - setvec[(int)left(p)] = 1; + lp = info(p); /* look for high-water mark of subscripts */ + while (setcnt >= maxsetvec || lp >= maxsetvec) { + /* guessing here! */ + growvec("out of space in first()"); + } + if (type(p) == EMPTYRE) { + setvec[lp] = 0; + return (0); + } + if (setvec[lp] != 1) { + setvec[lp] = 1; setcnt++; } - if (type(p) == CCL && (*(uchar *)right(p)) == '\0') + if (type(p) == CCL && (*(char *)right(p)) == '\0') return (0); /* empty CCL */ else return (1); @@ -372,8 +501,7 @@ first(Node *p) return (0); return (1); } - ERROR "unknown type %d in first", type(p) FATAL; - return (-1); + FATAL("can't happen: unknown type %d in first", type(p)); } /* collects leaves that can follow v into setvec */ @@ -407,14 +535,16 @@ follow(Node *v) follow(p); return; default: - ERROR "unknown type %d in follow", type(p) FATAL; + FATAL("unknown type %d in follow", type(p)); break; } } static int -member(uchar c, uchar *s) /* is c in s? */ +member(int c, const char *sarg) /* is c in s? */ { + uschar *s = (uschar *)sarg; + while (*s) if (c == *s++) return (1); @@ -423,9 +553,10 @@ member(uchar c, uchar *s) /* is c in s? */ int -match(fa *f, uchar *p) +match(fa *f, const char *p0) /* shortest match ? */ { - register int s, ns; + int s, ns; + uschar *p = (uschar *)p0; s = f->reset ? makeinit(f, 0) : f->initstat; if (f->out[s]) @@ -442,10 +573,11 @@ match(fa *f, uchar *p) } int -pmatch(fa *f, uchar *p) +pmatch(fa *f, const char *p0) /* longest match, for sub */ { - register int s, ns; - register uchar *q; + int s, ns; + uschar *p = (uschar *)p0; + uschar *q; int i, k; if (f->reset) { @@ -453,7 +585,7 @@ pmatch(fa *f, uchar *p) } else { s = f->initstat; } - patbeg = p; + patbeg = (char *)p; patlen = -1; do { q = p; @@ -466,16 +598,17 @@ pmatch(fa *f, uchar *p) s = cgoto(f, s, *q); if (s == 1) { /* no transition */ if (patlen >= 0) { - patbeg = p; + patbeg = (char *)p; return (1); - } else + } else { goto nextin; /* no match */ + } } } while (*q++ != 0); if (f->out[s]) patlen = q - p - 1; /* don't count $ */ if (patlen >= 0) { - patbeg = p; + patbeg = (char *)p; return (1); } nextin: @@ -485,7 +618,7 @@ pmatch(fa *f, uchar *p) xfree(f->posns[i]); k = *f->posns[0]; if ((f->posns[2] = - (int *)calloc(1, (k + 1) * sizeof (int))) == NULL) { + (int *)calloc(k + 1, sizeof (int))) == NULL) { overflo("out of space in pmatch"); } for (i = 0; i <= k; i++) @@ -500,10 +633,11 @@ pmatch(fa *f, uchar *p) } int -nematch(fa *f, uchar *p) +nematch(fa *f, const char *p0) /* non-empty match, for sub */ { - register int s, ns; - register uchar *q; + int s, ns; + uschar *p = (uschar *)p0; + uschar *q; int i, k; if (f->reset) { @@ -523,7 +657,7 @@ nematch(fa *f, uchar *p) s = cgoto(f, s, *q); if (s == 1) { /* no transition */ if (patlen > 0) { - patbeg = p; + patbeg = (char *)p; return (1); } else goto nnextin; /* no nonempty match */ @@ -532,7 +666,7 @@ nematch(fa *f, uchar *p) if (f->out[s]) patlen = q - p - 1; /* don't count $ */ if (patlen > 0) { - patbeg = p; + patbeg = (char *)p; return (1); } nnextin: @@ -542,7 +676,7 @@ nematch(fa *f, uchar *p) xfree(f->posns[i]); k = *f->posns[0]; if ((f->posns[2] = - (int *)calloc(1, (k + 1) * sizeof (int))) == NULL) { + (int *)calloc(k + 1, sizeof (int))) == NULL) { overflo("out of state space"); } for (i = 0; i <= k; i++) @@ -560,31 +694,31 @@ nematch(fa *f, uchar *p) static Node *regexp(void), *primary(void), *concat(Node *); static Node *alt(Node *), *unary(Node *); +/* parses regular expression pointed to by p */ +/* uses relex() to scan regular expression */ static Node * -reparse(uchar *p) +reparse(const char *p) { - /* parses regular expression pointed to by p */ - /* uses relex() to scan regular expression */ Node *np; dprintf(("reparse <%s>\n", p)); - lastre = prestr = p; /* prestr points to string to be parsed */ + + /* prestr points to string to be parsed */ + lastre = prestr = (uschar *)p; rtok = relex(); - if (rtok == '\0') - ERROR "empty regular expression" FATAL; - np = regexp(); + /* GNU compatibility: an empty regexp matches anything */ if (rtok == '\0') { - return (np); - } else { - ERROR "syntax error in regular expression %s at %s", - lastre, prestr FATAL; + return (op2(EMPTYRE, NIL, NIL)); } - /*NOTREACHED*/ - return (NULL); + np = regexp(); + if (rtok != '\0') + FATAL("syntax error in regular expression %s at %s", + lastre, prestr); + return (np); } static Node * -regexp(void) +regexp(void) /* top-level parse of reg expr */ { return (alt(concat(primary()))); } @@ -596,28 +730,31 @@ primary(void) switch (rtok) { case CHAR: - np = op2(CHAR, NIL, (Node *)rlxval); + np = op2(CHAR, NIL, itonp(rlxval)); rtok = relex(); return (unary(np)); case ALL: rtok = relex(); return (unary(op2(ALL, NIL, NIL))); + case EMPTYRE: + rtok = relex(); + return (unary(op2(ALL, NIL, NIL))); case DOT: rtok = relex(); return (unary(op2(DOT, NIL, NIL))); case CCL: /*LINTED align*/ - np = op2(CCL, NIL, (Node *)cclenter(rlxstr)); + np = op2(CCL, NIL, (Node *)cclenter((char *)rlxstr)); rtok = relex(); return (unary(np)); case NCCL: /*LINTED align*/ - np = op2(NCCL, NIL, (Node *)cclenter(rlxstr)); + np = op2(NCCL, NIL, (Node *)cclenter((char *)rlxstr)); rtok = relex(); return (unary(np)); case '^': rtok = relex(); - return (unary(op2(CHAR, NIL, (Node *)HAT))); + return (unary(op2(CHAR, NIL, itonp(HAT)))); case '$': rtok = relex(); return (unary(op2(CHAR, NIL, NIL))); @@ -627,20 +764,20 @@ primary(void) rtok = relex(); return (unary(op2(CCL, NIL, /*LINTED align*/ - (Node *)tostring((uchar *)"")))); + (Node *)tostring("")))); } np = regexp(); if (rtok == ')') { rtok = relex(); return (unary(np)); } else { - ERROR "syntax error in regular expression %s at %s", - lastre, prestr FATAL; + FATAL("syntax error in regular expression %s at %s", + lastre, prestr); } /* FALLTHROUGH */ default: - ERROR "illegal primary in regular expression %s at %s", - lastre, prestr FATAL; + FATAL("illegal primary in regular expression %s at %s", + lastre, prestr); } /*NOTREACHED*/ return (NULL); @@ -650,7 +787,14 @@ static Node * concat(Node *np) { switch (rtok) { - case CHAR: case DOT: case ALL: case CCL: case NCCL: case '$': case '(': + case EMPTYRE: + case CHAR: + case DOT: + case ALL: + case CCL: + case NCCL: + case '$': + case '(': return (concat(op2(CAT, np, primary()))); default: return (np); @@ -685,12 +829,48 @@ unary(Node *np) } } +/* + * Character class definitions conformant to the POSIX locale as + * defined in IEEE P1003.1 draft 7 of June 2001, assuming the source + * and operating character sets are both ASCII (ISO646) or supersets + * thereof. + * + * Note that to avoid overflowing the temporary buffer used in + * relex(), the expanded character class (prior to range expansion) + * must be less than twice the size of their full name. + */ + +struct charclass { + const char *cc_name; + int cc_namelen; + int (*cc_func)(int); +} charclasses[] = { + { "alnum", 5, isalnum }, + { "alpha", 5, isalpha }, + { "blank", 5, isblank }, + { "cntrl", 5, iscntrl }, + { "digit", 5, isdigit }, + { "graph", 5, isgraph }, + { "lower", 5, islower }, + { "print", 5, isprint }, + { "punct", 5, ispunct }, + { "space", 5, isspace }, + { "upper", 5, isupper }, + { "xdigit", 6, isxdigit }, + { NULL, 0, NULL }, +}; + + static int relex(void) /* lexical analyzer for reparse */ { - register int c; - uchar *cbuf; - int clen, cflag; + int c, n; + int cflag; + static uschar *buf = 0; + static size_t bufsz = 100; + uschar *bp; + struct charclass *cc; + int i; switch (c = *prestr++) { case '|': return OR; @@ -705,64 +885,82 @@ relex(void) /* lexical analyzer for reparse */ case ')': return (c); case '\\': - if ((c = *prestr++) == 't') - c = '\t'; - else if (c == 'n') - c = '\n'; - else if (c == 'f') - c = '\f'; - else if (c == 'r') - c = '\r'; - else if (c == 'b') - c = '\b'; - else if (c == '\\') - c = '\\'; - else if (isdigit(c)) { - int n = c - '0'; - if (isdigit(*prestr)) { - n = 8 * n + *prestr++ - '0'; - if (isdigit(*prestr)) - n = 8 * n + *prestr++ - '0'; - } - c = n; - } /* else it's now in c */ - rlxval = c; + rlxval = quoted(&prestr); return (CHAR); default: rlxval = c; return (CHAR); case '[': - clen = 0; + if (buf == NULL && (buf = (uschar *)malloc(bufsz)) == NULL) + FATAL("out of space in reg expr %.10s..", lastre); + bp = buf; if (*prestr == '^') { cflag = 1; prestr++; } else cflag = 0; - init_buf(&cbuf, NULL, strlen((char *)prestr) * 2 + 1); + n = 2 * strlen((const char *)prestr) + 1; + if (!adjbuf((char **)&buf, &bufsz, n, n, (char **)&bp, + "relex1")) + FATAL("out of space for reg expr %.10s...", lastre); for (;;) { if ((c = *prestr++) == '\\') { - cbuf[clen++] = '\\'; + *bp++ = '\\'; if ((c = *prestr++) == '\0') { - ERROR - "nonterminated character class %s", lastre FATAL; + FATAL("nonterminated character class " + "%.20s...", lastre); + } + *bp++ = c; + } else if (c == '[' && *prestr == ':') { + /* + * Handle POSIX character class names. + * Dag-Erling Smorgrav, des@ofug.org + */ + for (cc = charclasses; cc->cc_name; cc++) + if (strncmp((const char *)prestr + 1, + (const char *)cc->cc_name, + cc->cc_namelen) == 0) + break; + + if (cc->cc_name == NULL || + prestr[1 + cc->cc_namelen] != ':' || + prestr[2 + cc->cc_namelen] != ']') { + *bp++ = c; + continue; } - cbuf[clen++] = c; + + prestr += cc->cc_namelen + 3; + /* + * BUG: We begin at 1, instead of 0, since we + * would otherwise prematurely terminate the + * string for classes like [[:cntrl:]]. This + * means that we can't match the NUL character, + * not without first adapting the entire + * program to track each string's length. + */ + for (i = 1; i < NCHARS; i++) { + (void) adjbuf((char **)&buf, &bufsz, + bp - buf + 1, 100, (char **)&bp, + "relex2"); + if (cc->cc_func(i)) { + *bp++ = i; + n++; + } + } + } else if (c == '\0') { + FATAL("nonterminated character class %.20s", + lastre); + } else if (bp == buf) { /* 1st char is special */ + *bp++ = c; } else if (c == ']') { - cbuf[clen] = 0; - rlxstr = tostring(cbuf); - free(cbuf); + *bp++ = '\0'; + rlxstr = (uschar *)tostring((char *)buf); if (cflag == 0) return (CCL); else return (NCCL); - } else if (c == '\n') { - ERROR "newline in character class %s...", - lastre FATAL; - } else if (c == '\0') { - ERROR "nonterminated character class %s", - lastre FATAL; } else - cbuf[clen++] = c; + *bp++ = c; } /*NOTREACHED*/ } @@ -772,9 +970,13 @@ relex(void) /* lexical analyzer for reparse */ static int cgoto(fa *f, int s, int c) { - register int i, j, k; - register int *p, *q; + int i, j, k; + int *p, *q; + assert(c == HAT || c < NCHARS); + while (f->accept >= maxsetvec) { /* guessing here! */ + growvec("out of space in cgoto()"); + } for (i = 0; i <= f->accept; i++) setvec[i] = 0; setcnt = 0; @@ -782,16 +984,20 @@ cgoto(fa *f, int s, int c) p = f->posns[s]; for (i = 1; i <= *p; i++) { if ((k = f->re[p[i]].ltype) != FINAL) { - if (k == CHAR && c == f->re[p[i]].lval || - k == DOT && c != 0 && c != HAT || - k == ALL && c != 0 || - k == CCL && - member(c, (uchar *)f->re[p[i]].lval) || - k == NCCL && - !member(c, (uchar *)f->re[p[i]].lval) && - c != 0 && c != HAT) { + if ((k == CHAR && c == ptoi(f->re[p[i]].lval.np)) || + (k == DOT && c != 0 && c != HAT) || + (k == ALL && c != 0) || + (k == EMPTYRE && c != 0) || + (k == CCL && + member(c, (char *)f->re[p[i]].lval.up)) || + (k == NCCL && + !member(c, (char *)f->re[p[i]].lval.up) && + c != 0 && c != HAT)) { q = f->re[p[i]].lfollow; for (j = 1; j <= *q; j++) { + if (q[j] >= maxsetvec) { + growvec("cgoto overflow"); + } if (setvec[q[j]] == 0) { setcnt++; setvec[q[j]] = 1; @@ -847,17 +1053,19 @@ cgoto(fa *f, int s, int c) } static void -freefa(fa *f) +freefa(fa *f) /* free a finite automaton */ { - - register int i; + int i; if (f == NULL) return; for (i = 0; i <= f->curstat; i++) xfree(f->posns[i]); - for (i = 0; i <= f->accept; i++) + for (i = 0; i <= f->accept; i++) { xfree(f->re[i].lfollow); + if (f->re[i].ltype == CCL || f->re[i].ltype == NCCL) + xfree((f->re[i].lval.np)); + } xfree(f->restr); xfree(f); } diff --git a/usr/src/cmd/awk/lex.c b/usr/src/cmd/awk/lex.c new file mode 100644 index 0000000000..ad0575f3cb --- /dev/null +++ b/usr/src/cmd/awk/lex.c @@ -0,0 +1,637 @@ +/* + * Copyright (C) Lucent Technologies 1997 + * All Rights Reserved + * + * Permission to use, copy, modify, and distribute this software and + * its documentation for any purpose and without fee is hereby + * granted, provided that the above copyright notice appear in all + * copies and that both that the copyright notice and this + * permission notice and warranty disclaimer appear in supporting + * documentation, and that the name Lucent Technologies or any of + * its entities not be used in advertising or publicity pertaining + * to distribution of the software without specific, written prior + * permission. + * + * LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, + * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. + * IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY + * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER + * IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, + * ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF + * THIS SOFTWARE. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <ctype.h> +#include "awk.h" +#include "y.tab.h" + +extern YYSTYPE yylval; +extern int infunc; + +off_t lineno = 1; +int bracecnt = 0; +int brackcnt = 0; +int parencnt = 0; + +typedef struct Keyword { + const char *word; + int sub; + int type; +} Keyword; + +Keyword keywords[] = { /* keep sorted: binary searched */ + { "BEGIN", XBEGIN, XBEGIN }, + { "END", XEND, XEND }, + { "NF", VARNF, VARNF }, + { "atan2", FATAN, BLTIN }, + { "break", BREAK, BREAK }, + { "close", CLOSE, CLOSE }, + { "continue", CONTINUE, CONTINUE }, + { "cos", FCOS, BLTIN }, + { "delete", DELETE, DELETE }, + { "do", DO, DO }, + { "else", ELSE, ELSE }, + { "exit", EXIT, EXIT }, + { "exp", FEXP, BLTIN }, + { "fflush", FFLUSH, BLTIN }, + { "for", FOR, FOR }, + { "func", FUNC, FUNC }, + { "function", FUNC, FUNC }, + { "getline", GETLINE, GETLINE }, + { "gsub", GSUB, GSUB }, + { "if", IF, IF }, + { "in", IN, IN }, + { "index", INDEX, INDEX }, + { "int", FINT, BLTIN }, + { "length", FLENGTH, BLTIN }, + { "log", FLOG, BLTIN }, + { "match", MATCHFCN, MATCHFCN }, + { "next", NEXT, NEXT }, + { "nextfile", NEXTFILE, NEXTFILE }, + { "print", PRINT, PRINT }, + { "printf", PRINTF, PRINTF }, + { "rand", FRAND, BLTIN }, + { "return", RETURN, RETURN }, + { "sin", FSIN, BLTIN }, + { "split", SPLIT, SPLIT }, + { "sprintf", SPRINTF, SPRINTF }, + { "sqrt", FSQRT, BLTIN }, + { "srand", FSRAND, BLTIN }, + { "sub", SUB, SUB }, + { "substr", SUBSTR, SUBSTR }, + { "system", FSYSTEM, BLTIN }, + { "tolower", FTOLOWER, BLTIN }, + { "toupper", FTOUPPER, BLTIN }, + { "while", WHILE, WHILE }, +}; + +#define RET(x) { if (dbg) (void) printf("lex %s\n", tokname(x)); return (x); } + +int +peek(void) +{ + int c = input(); + unput(c); + return (c); +} + +int +gettok(char **pbuf, size_t *psz) /* get next input token */ +{ + int c, retc; + char *buf = *pbuf; + size_t sz = *psz; + char *bp = buf; + + c = input(); + if (c == 0) + return (0); + buf[0] = c; + buf[1] = 0; + if (!isalnum(c) && c != '.' && c != '_') + return (c); + + *bp++ = c; + if (isalpha(c) || c == '_') { /* it's a varname */ + for (; (c = input()) != 0; ) { + if (bp-buf >= sz && + !adjbuf(&buf, &sz, bp-buf+2, 100, &bp, "gettok")) + FATAL("out of space for name %.10s...", buf); + if (isalnum(c) || c == '_') + *bp++ = c; + else { + *bp = 0; + unput(c); + break; + } + } + *bp = 0; + retc = 'a'; /* alphanumeric */ + } else { /* maybe it's a number, but could be . */ + char *rem; + /* read input until can't be a number */ + for (; (c = input()) != 0; ) { + if (bp-buf >= sz && + !adjbuf(&buf, &sz, bp-buf+2, 100, &bp, "gettok")) + FATAL("out of space for number %.10s...", buf); + if (isdigit(c) || c == 'e' || c == 'E' || + c == '.' || c == '+' || c == '-') + *bp++ = c; + else { + unput(c); + break; + } + } + *bp = 0; + (void) strtod(buf, &rem); /* parse the number */ + if (rem == buf) { /* it wasn't a valid number at all */ + buf[1] = 0; /* return one character as token */ + retc = buf[0]; /* character is its own type */ + unputstr(rem+1); /* put rest back for later */ + } else { /* some prefix was a number */ + unputstr(rem); /* put rest back for later */ + rem[0] = 0; /* truncate buf after number part */ + retc = '0'; /* type is number */ + } + } + *pbuf = buf; + *psz = sz; + return (retc); +} + +int word(char *); +int string(void); +int regexpr(void); +int sc = 0; /* 1 => return a } right now */ +int reg = 0; /* 1 => return a REGEXPR now */ + +int +yylex(void) +{ + int c; + static char *buf = NULL; + /* BUG: setting this small causes core dump! */ + static size_t bufsize = 5; + + if (buf == NULL && (buf = (char *)malloc(bufsize)) == NULL) + FATAL("out of space in yylex"); + if (sc) { + sc = 0; + RET('}'); + } + if (reg) { + reg = 0; + return (regexpr()); + } + for (;;) { + c = gettok(&buf, &bufsize); + if (c == 0) + return (0); + if (isalpha(c) || c == '_') + return (word(buf)); + if (isdigit(c)) { + yylval.cp = setsymtab( + buf, tostring(buf), atof(buf), CON|NUM, symtab); + /* should this also have STR set? */ + RET(NUMBER); + } + + yylval.i = c; + switch (c) { + case '\n': /* {EOL} */ + lineno++; + RET(NL); + case '\r': /* assume \n is coming */ + case ' ': /* {WS}+ */ + case '\t': + break; + case '#': /* #.* strip comments */ + while ((c = input()) != '\n' && c != 0) + ; + unput(c); + break; + case ';': + RET(';'); + case '\\': + if (peek() == '\n') { + (void) input(); + lineno++; + } else if (peek() == '\r') { + (void) input(); + (void) input(); /* BUG: check for \n */ + lineno++; + } else { + RET(c); + } + break; + case '&': + if (peek() == '&') { + (void) input(); + RET(AND); + } else + RET('&'); + case '|': + if (peek() == '|') { + (void) input(); + RET(BOR); + } else + RET('|'); + case '!': + if (peek() == '=') { + (void) input(); + yylval.i = NE; + RET(NE); + } else if (peek() == '~') { + (void) input(); + yylval.i = NOTMATCH; + RET(MATCHOP); + } else + RET(NOT); + case '~': + yylval.i = MATCH; + RET(MATCHOP); + case '<': + if (peek() == '=') { + (void) input(); + yylval.i = LE; + RET(LE); + } else { + yylval.i = LT; + RET(LT); + } + case '=': + if (peek() == '=') { + (void) input(); + yylval.i = EQ; + RET(EQ); + } else { + yylval.i = ASSIGN; + RET(ASGNOP); + } + case '>': + if (peek() == '=') { + (void) input(); + yylval.i = GE; + RET(GE); + } else if (peek() == '>') { + (void) input(); + yylval.i = APPEND; + RET(APPEND); + } else { + yylval.i = GT; + RET(GT); + } + case '+': + if (peek() == '+') { + (void) input(); + yylval.i = INCR; + RET(INCR); + } else if (peek() == '=') { + (void) input(); + yylval.i = ADDEQ; + RET(ASGNOP); + } else + RET('+'); + case '-': + if (peek() == '-') { + (void) input(); + yylval.i = DECR; + RET(DECR); + } else if (peek() == '=') { + (void) input(); + yylval.i = SUBEQ; + RET(ASGNOP); + } else + RET('-'); + case '*': + if (peek() == '=') { /* *= */ + (void) input(); + yylval.i = MULTEQ; + RET(ASGNOP); + } else if (peek() == '*') { /* ** or **= */ + (void) input(); /* eat 2nd * */ + if (peek() == '=') { + (void) input(); + yylval.i = POWEQ; + RET(ASGNOP); + } else { + RET(POWER); + } + } else + RET('*'); + case '/': + RET('/'); + case '%': + if (peek() == '=') { + (void) input(); + yylval.i = MODEQ; + RET(ASGNOP); + } else + RET('%'); + case '^': + if (peek() == '=') { + (void) input(); + yylval.i = POWEQ; + RET(ASGNOP); + } else + RET(POWER); + + case '$': + /* BUG: awkward, if not wrong */ + c = gettok(&buf, &bufsize); + if (isalpha(c)) { + if (strcmp(buf, "NF") == 0) { + /* very special */ + unputstr("(NF)"); + RET(INDIRECT); + } + c = peek(); + if (c == '(' || c == '[' || + (infunc && isarg(buf) >= 0)) { + unputstr(buf); + RET(INDIRECT); + } + yylval.cp = setsymtab( + buf, "", 0.0, STR|NUM, symtab); + RET(IVAR); + } else if (c == 0) { /* */ + SYNTAX("unexpected end of input after $"); + RET(';'); + } else { + unputstr(buf); + RET(INDIRECT); + } + + case '}': + if (--bracecnt < 0) + SYNTAX("extra }"); + sc = 1; + RET(';'); + case ']': + if (--brackcnt < 0) + SYNTAX("extra ]"); + RET(']'); + case ')': + if (--parencnt < 0) + SYNTAX("extra )"); + RET(')'); + case '{': + bracecnt++; + RET('{'); + case '[': + brackcnt++; + RET('['); + case '(': + parencnt++; + RET('('); + + case '"': + /* BUG: should be like tran.c ? */ + return (string()); + + default: + RET(c); + } + } +} + +int +string(void) +{ + int c, n; + char *s, *bp; + static char *buf = NULL; + static size_t bufsz = 500; + + if (buf == NULL && (buf = (char *)malloc(bufsz)) == NULL) + FATAL("out of space for strings"); + for (bp = buf; (c = input()) != '"'; ) { + if (!adjbuf(&buf, &bufsz, bp-buf+2, 500, &bp, "string")) + FATAL("out of space for string %.10s...", buf); + switch (c) { + case '\n': + case '\r': + case 0: + *bp = '\0'; + SYNTAX("non-terminated string %.10s...", buf); + if (c == 0) /* hopeless */ + FATAL("giving up"); + lineno++; + break; + case '\\': + c = input(); + switch (c) { + case '"': *bp++ = '"'; break; + case 'n': *bp++ = '\n'; break; + case 't': *bp++ = '\t'; break; + case 'f': *bp++ = '\f'; break; + case 'r': *bp++ = '\r'; break; + case 'b': *bp++ = '\b'; break; + case 'v': *bp++ = '\v'; break; + case 'a': *bp++ = '\007'; break; + case '\\': *bp++ = '\\'; break; + + case '0': case '1': case '2': /* octal: \d \dd \ddd */ + case '3': case '4': case '5': case '6': case '7': + n = c - '0'; + if ((c = peek()) >= '0' && c < '8') { + n = 8 * n + input() - '0'; + if ((c = peek()) >= '0' && c < '8') + n = 8 * n + input() - '0'; + } + *bp++ = n; + break; + + case 'x': { /* hex \x0-9a-fA-F + */ + char xbuf[100], *px; + px = xbuf; + while ((c = input()) != 0 && px-xbuf < 100-2) { + if (isdigit(c) || + (c >= 'a' && c <= 'f') || + (c >= 'A' && c <= 'F')) + *px++ = c; + else + break; + } + *px = 0; + unput(c); + (void) sscanf(xbuf, "%x", (unsigned int *)&n); + *bp++ = n; + break; + } + + default: + *bp++ = c; + break; + } + break; + default: + *bp++ = c; + break; + } + } + *bp = 0; + s = tostring(buf); + *bp++ = ' '; *bp++ = 0; + yylval.cp = setsymtab(buf, s, 0.0, CON|STR|DONTFREE, symtab); + RET(STRING); +} + + +int +binsearch(char *w, Keyword *kp, int n) +{ + int cond, low, mid, high; + + low = 0; + high = n - 1; + while (low <= high) { + mid = (low + high) / 2; + if ((cond = strcmp(w, kp[mid].word)) < 0) + high = mid - 1; + else if (cond > 0) + low = mid + 1; + else + return (mid); + } + return (-1); +} + +int +word(char *w) +{ + Keyword *kp; + int c, n; + + n = binsearch(w, keywords, sizeof (keywords) / sizeof (keywords[0])); + if (n != -1) { /* found in table */ + kp = keywords + n; + yylval.i = kp->sub; + switch (kp->type) { /* special handling */ + case BLTIN: + if (kp->sub == FSYSTEM && safe) + SYNTAX("system is unsafe"); + RET(kp->type); + case FUNC: + if (infunc) + SYNTAX("illegal nested function"); + RET(kp->type); + case RETURN: + if (!infunc) + SYNTAX("return not in function"); + RET(kp->type); + case VARNF: + yylval.cp = setsymtab("NF", "", 0.0, NUM, symtab); + RET(VARNF); + default: + RET(kp->type); + } + } + c = peek(); /* look for '(' */ + if (c != '(' && infunc && (n = isarg(w)) >= 0) { + yylval.i = n; + RET(ARG); + } else { + yylval.cp = setsymtab(w, "", 0.0, STR|NUM|DONTFREE, symtab); + if (c == '(') { + RET(CALL); + } else { + RET(VAR); + } + } +} + +void +startreg(void) /* next call to yylex will return a regular expression */ +{ + reg = 1; +} + +int +regexpr(void) +{ + int c; + static char *buf = NULL; + static size_t bufsz = 500; + char *bp; + + if (buf == NULL && (buf = (char *)malloc(bufsz)) == NULL) + FATAL("out of space for rex expr"); + bp = buf; + for (; (c = input()) != '/' && c != 0; ) { + if (!adjbuf(&buf, &bufsz, bp-buf+3, 500, &bp, "regexpr")) + FATAL("out of space for reg expr %.10s...", buf); + if (c == '\n') { + *bp = '\0'; + SYNTAX("newline in regular expression %.10s...", buf); + unput('\n'); + break; + } else if (c == '\\') { + *bp++ = '\\'; + *bp++ = input(); + } else { + *bp++ = c; + } + } + *bp = 0; + if (c == 0) + SYNTAX("non-terminated regular expression %.10s...", buf); + yylval.s = tostring(buf); + unput('/'); + RET(REGEXPR); +} + +/* low-level lexical stuff, sort of inherited from lex */ + +char ebuf[300]; +char *ep = ebuf; +char yysbuf[100]; /* pushback buffer */ +char *yysptr = yysbuf; +FILE *yyin = NULL; + +int +input(void) /* get next lexical input character */ +{ + int c; + extern char *lexprog; + + if (yysptr > yysbuf) + c = (uschar)*--yysptr; + else if (lexprog != NULL) { /* awk '...' */ + if ((c = (uschar)*lexprog) != 0) + lexprog++; + } else /* awk -f ... */ + c = pgetc(); + if (c == EOF) + c = 0; + if (ep >= ebuf + sizeof (ebuf)) + ep = ebuf; + *ep = c; + if (c != 0) { + ep++; + } + return (c); +} + +void +unput(int c) /* put lexical character back on input */ +{ + if (yysptr >= yysbuf + sizeof (yysbuf)) + FATAL("pushed back too much: %.20s...", yysbuf); + *yysptr++ = c; + if (--ep < ebuf) + ep = ebuf + sizeof (ebuf) - 1; +} + +void +unputstr(const char *s) /* put a string back on input */ +{ + int i; + + for (i = strlen(s)-1; i >= 0; i--) + unput(s[i]); +} diff --git a/usr/src/cmd/awk/lib.c b/usr/src/cmd/awk/lib.c index ded064c6c3..fedd5d5137 100644 --- a/usr/src/cmd/awk/lib.c +++ b/usr/src/cmd/awk/lib.c @@ -1,4 +1,28 @@ /* + * Copyright (C) Lucent Technologies 1997 + * All Rights Reserved + * + * Permission to use, copy, modify, and distribute this software and + * its documentation for any purpose and without fee is hereby + * granted, provided that the above copyright notice appear in all + * copies and that both that the copyright notice and this + * permission notice and warranty disclaimer appear in supporting + * documentation, and that the name Lucent Technologies or any of + * its entities not be used in advertising or publicity pertaining + * to distribution of the software without specific, written prior + * permission. + * + * LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, + * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. + * IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY + * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER + * IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, + * ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF + * THIS SOFTWARE. + */ + +/* * CDDL HEADER START * * The contents of this file are subject to the terms of the @@ -27,63 +51,131 @@ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ /* All Rights Reserved */ -#pragma ident "%Z%%M% %I% %E% SMI" +/* Copyright (c) Lucent Technologies 1997 */ +/* All Rights Reserved */ +#include <stdio.h> +#include <string.h> +#include <ctype.h> #include <errno.h> +#include <stdlib.h> +#include <stdarg.h> #include "awk.h" #include "y.tab.h" -uchar *record; -size_t record_size; - -int donefld; /* 1 = implies rec broken into fields */ -int donerec; /* 1 = record is valid (no flds have changed) */ +static FILE *infile = NULL; +static char *file = ""; +char *record; +size_t recsize = RECSIZE; +static char *fields; +static size_t fieldssize = RECSIZE; +static char *rtbuf; +static size_t rtbufsize = RECSIZE; -static struct fldtab_chunk { - struct fldtab_chunk *next; - Cell fields[FLD_INCR]; -} *fldtab_head, *fldtab_tail; +Cell **fldtab; /* pointers to Cells */ +char inputFS[100] = " "; -static size_t fldtab_maxidx; +#define MAXFLD 2 +int nfields = MAXFLD; /* last allocated slot for $i */ -static FILE *infile = NULL; -static uchar *file = (uchar*) ""; -static uchar *fields; -static size_t fields_size = LINE_INCR; +int donefld; /* 1 = implies rec broken into fields */ +int donerec; /* 1 = record is valid (no flds have changed) */ -static int maxfld = 0; /* last used field */ +static int lastfld = 0; /* last used field */ static int argno = 1; /* current input argument number */ -static uchar *getargv(int); +static Cell dollar0 = { OCELL, CFLD, NULL, "", 0.0, REC|STR|DONTFREE }; +static Cell dollar1 = { OCELL, CFLD, NULL, "", 0.0, FLD|STR|DONTFREE }; + +static char *getargv(int); static void cleanfld(int, int); -static int refldbld(uchar *, uchar *); +static int refldbld(const char *, const char *); static void bcheck2(int, int, int); static void eprint(void); static void bclass(int); +void +recinit(unsigned int n) +{ + if ((record = (char *)malloc(n)) == NULL || + (fields = (char *)malloc(n+2)) == NULL || + (fldtab = (Cell **)malloc((nfields+1) * sizeof (Cell *))) == NULL || + (fldtab[0] = (Cell *)malloc(sizeof (Cell))) == NULL) + FATAL("out of space for $0 and fields"); + *fldtab[0] = dollar0; + fldtab[0]->sval = record; + fldtab[0]->nval = tostring("0"); + makefields(1, nfields); +} + +void +makefields(int n1, int n2) /* create $n1..$n2 inclusive */ +{ + char temp[50]; + int i; + + for (i = n1; i <= n2; i++) { + fldtab[i] = (Cell *)malloc(sizeof (Cell)); + if (fldtab[i] == NULL) + FATAL("out of space in makefields %d", i); + *fldtab[i] = dollar1; + (void) sprintf(temp, "%d", i); + fldtab[i]->nval = tostring(temp); + } +} + static void initgetrec(void) { int i; - uchar *p; + char *p; for (i = 1; i < *ARGC; i++) { - if (!isclvar(p = getargv(i))) /* find 1st real filename */ + p = getargv(i); /* find 1st real filename */ + if (p == NULL || *p == '\0') { /* deleted or zapped */ + argno++; + continue; + } + if (!isclvar(p)) { + (void) setsval(lookup("FILENAME", symtab), p); return; + } setclvar(p); /* a commandline assignment before filename */ argno++; } infile = stdin; /* no filenames, so use stdin */ - /* *FILENAME = file = (uchar*) "-"; */ } +/* + * POSIX specifies that fields are supposed to be evaluated as if they were + * split using the value of FS at the time that the record's value ($0) was + * read. + * + * Since field-splitting is done lazily, we save the current value of FS + * whenever a new record is read in (implicitly or via getline), or when + * a new value is assigned to $0. + */ +void +savefs(void) +{ + if (strlen(getsval(fsloc)) >= sizeof (inputFS)) + FATAL("field separator %.10s... is too long", *FS); + (void) strcpy(inputFS, *FS); +} + +static int firsttime = 1; + +/* + * get next input record + * note: cares whether buf == record + */ int -getrec(uchar **bufp, size_t *bufsizep) +getrec(char **pbuf, size_t *pbufsize, int isrecord) { int c; - static int firsttime = 1; - uchar_t *buf, *nbuf; - size_t len; + char *buf = *pbuf; + uschar saveb0; + size_t bufsize = *pbufsize, savebufsize = bufsize; if (firsttime) { firsttime = 0; @@ -91,17 +183,24 @@ getrec(uchar **bufp, size_t *bufsizep) } dprintf(("RS=<%s>, FS=<%s>, ARGC=%f, FILENAME=%s\n", *RS, *FS, *ARGC, *FILENAME)); - donefld = 0; - donerec = 1; + if (isrecord) { + donefld = 0; + donerec = 1; + savefs(); + } + saveb0 = buf[0]; + buf[0] = '\0'; while (argno < *ARGC || infile == stdin) { dprintf(("argno=%d, file=|%s|\n", argno, file)); if (infile == NULL) { /* have to open a new file */ file = getargv(argno); - if (*file == '\0') { /* it's been zapped */ + if (file == NULL || *file == '\0') { + /* deleted or zapped */ argno++; continue; } - if (isclvar(file)) { /* a var=value arg */ + if (isclvar(file)) { + /* a var=value arg */ setclvar(file); argno++; continue; @@ -110,31 +209,28 @@ getrec(uchar **bufp, size_t *bufsizep) dprintf(("opening file %s\n", file)); if (*file == '-' && *(file+1) == '\0') infile = stdin; - else if ((infile = fopen((char *)file, "r")) == NULL) - ERROR "can't open file %s", file FATAL; + else if ((infile = fopen(file, "rF")) == NULL) + FATAL("can't open file %s", file); (void) setfval(fnrloc, 0.0); } - c = readrec(&nbuf, &len, infile); - expand_buf(bufp, bufsizep, len); - buf = *bufp; - (void) memcpy(buf, nbuf, len); - buf[len] = '\0'; - free(nbuf); + c = readrec(&buf, &bufsize, infile); if (c != 0 || buf[0] != '\0') { /* normal record */ - if (bufp == &record) { - if (!(recloc->tval & DONTFREE)) + if (isrecord) { + if (freeable(recloc)) xfree(recloc->sval); - recloc->sval = record; + recloc->sval = buf; /* buf == record */ recloc->tval = REC | STR | DONTFREE; if (is_number(recloc->sval)) { recloc->fval = - atof((const char *)recloc->sval); + atof(recloc->sval); recloc->tval |= NUM; } } (void) setfval(nrloc, nrloc->fval+1); (void) setfval(fnrloc, fnrloc->fval+1); + *pbuf = buf; + *pbufsize = bufsize; return (1); } /* EOF arrived on this file; set up next */ @@ -143,19 +239,39 @@ getrec(uchar **bufp, size_t *bufsizep) infile = NULL; argno++; } + buf[0] = saveb0; + *pbuf = buf; + *pbufsize = savebufsize; return (0); /* true end of file */ } +void +nextfile(void) +{ + if (infile != NULL && infile != stdin) + (void) fclose(infile); + infile = NULL; + argno++; +} + +/* + * read one record into buf + */ int -readrec(uchar **bufp, size_t *sizep, FILE *inf) /* read one record into buf */ +readrec(char **pbuf, size_t *pbufsize, FILE *inf) { int sep, c; - uchar *buf; - int count; - size_t bufsize; + char *rr, *rt, *buf = *pbuf; + size_t bufsize = *pbufsize; + char *rs = getsval(rsloc); + + if (rtbuf == NULL && (rtbuf = malloc(rtbufsize)) == NULL) + FATAL("out of memory in readrec"); - init_buf(&buf, &bufsize, LINE_INCR); - if ((sep = **RS) == 0) { + rr = buf; + rt = rtbuf; + + if ((sep = *rs) == '\0') { sep = '\n'; /* skip leading \n's */ while ((c = getc(inf)) == '\n' && c != EOF) @@ -163,47 +279,90 @@ readrec(uchar **bufp, size_t *sizep, FILE *inf) /* read one record into buf */ if (c != EOF) (void) ungetc(c, inf); } - count = 0; - for (;;) { - while ((c = getc(inf)) != sep && c != EOF) { - expand_buf(&buf, &bufsize, count); - buf[count++] = c; + while ((c = getc(inf)) != EOF) { + if (c != sep) { + if (rr-buf+1 > bufsize) { + (void) adjbuf(&buf, &bufsize, + 1+rr-buf, recsize, &rr, "readrec1"); + } + *rr++ = c; + continue; + } + + /* + * Ensure enough space for either a single separator + * character, or at least two '\n' chars (when RS is + * the empty string). + */ + (void) adjbuf(&rtbuf, &rtbufsize, + 2+rt-rtbuf, recsize, &rt, "readrec2"); + + if (*rs == sep) { + *rt++ = sep; + break; } - if (**RS == sep || c == EOF) + + if ((c = getc(inf)) == '\n') { /* 2 in a row */ + *rt++ = '\n'; + *rt++ = '\n'; + while ((c = getc(inf)) == '\n' && c != EOF) { + /* Read any further \n's and add them to RT. */ + (void) adjbuf(&rtbuf, &rtbufsize, + 1+rt-rtbuf, recsize, &rt, "readrec3"); + *rt++ = '\n'; + } + if (c != EOF) + (void) ungetc(c, inf); break; - if ((c = getc(inf)) == '\n' || c == EOF) /* 2 in a row */ + } + + if (c == EOF) { + *rt++ = '\n'; break; - expand_buf(&buf, &bufsize, count + 1); - buf[count++] = '\n'; - buf[count++] = c; + } + + (void) adjbuf(&buf, &bufsize, + 2+rr-buf, recsize, &rr, "readrec4"); + *rr++ = '\n'; + *rr++ = c; } - buf[count] = '\0'; + (void) adjbuf(&buf, &bufsize, 1+rr-buf, recsize, &rr, "readrec5"); + (void) adjbuf(&rtbuf, &rtbufsize, 1+rt-rtbuf, recsize, &rt, "readrec6"); + *rr = '\0'; + *rt = '\0'; dprintf(("readrec saw <%s>, returns %d\n", - buf, c == EOF && count == 0 ? 0 : 1)); - *bufp = buf; - *sizep = count; - return (c == EOF && count == 0 ? 0 : 1); + buf, c == EOF && rr == buf ? 0 : 1)); + *pbuf = buf; + *pbufsize = bufsize; + if (c == EOF && rr == buf) { + return (0); + } else { + (void) setsval(rtloc, rtbuf); + return (1); + } } /* get ARGV[n] */ -static uchar * +static char * getargv(int n) { Cell *x; - uchar *s, temp[11]; + char *s, temp[50]; extern Array *ARGVtab; - (void) sprintf((char *)temp, "%d", n); - x = setsymtab(temp, (uchar *)"", 0.0, STR, ARGVtab); + (void) sprintf(temp, "%d", n); + if (lookup(temp, ARGVtab) == NULL) + return (NULL); + x = setsymtab(temp, "", 0.0, STR, ARGVtab); s = getsval(x); dprintf(("getargv(%d) returns |%s|\n", n, s)); return (s); } void -setclvar(uchar *s) /* set var=value from s */ +setclvar(char *s) /* set var=value from s */ { - uchar *p; + char *p; Cell *q; for (p = s; *p != '='; p++) @@ -213,7 +372,7 @@ setclvar(uchar *s) /* set var=value from s */ q = setsymtab(s, p, 0.0, STR, symtab); (void) setsval(q, p); if (is_number(q->sval)) { - q->fval = atof((const char *)q->sval); + q->fval = atof(q->sval); q->tval |= NUM; } dprintf(("command line set %s to |%s|\n", s, p)); @@ -221,236 +380,232 @@ setclvar(uchar *s) /* set var=value from s */ } void -fldbld(void) +fldbld(void) /* create fields from current record */ { - uchar *r, *fr, sep; + /* this relies on having fields[] the same length as $0 */ + /* the fields are all stored in this one array with \0's */ + /* possibly with a final trailing \0 not associated with any field */ + char *r, *fr, sep; Cell *p; - int i; - size_t len; + int i, j, n; if (donefld) return; - if (!(recloc->tval & STR)) - (void) getsval(recloc); - r = recloc->sval; /* was record! */ - - /* make sure fields is always allocated */ - adjust_buf(&fields, fields_size); - - /* - * make sure fields has enough size. We don't expand the buffer - * in the middle of the loop, since p->sval has already pointed - * the address in the fields. - */ - len = strlen((char *)r) + 1; - expand_buf(&fields, &fields_size, len); + if (!isstr(fldtab[0])) + (void) getsval(fldtab[0]); + r = fldtab[0]->sval; + n = strlen(r); + if (n > fieldssize) { + xfree(fields); + /* possibly 2 final \0s */ + if ((fields = (char *)malloc(n + 2)) == NULL) + FATAL("out of space for fields in fldbld %d", n); + fieldssize = n; + } fr = fields; i = 0; /* number of fields accumulated here */ - if (strlen((char *)*FS) > 1) { /* it's a regular expression */ - i = refldbld(r, *FS); - } else if ((sep = **FS) == ' ') { + if (strlen(inputFS) > 1) { /* it's a regular expression */ + i = refldbld(r, inputFS); + } else if ((sep = *inputFS) == ' ') { /* default whitespace */ for (i = 0; ; ) { while (*r == ' ' || *r == '\t' || *r == '\n') r++; - if (*r == 0) + if (*r == '\0') break; i++; - p = getfld(i); - if (!(p->tval & DONTFREE)) - xfree(p->sval); - p->sval = fr; - p->tval = FLD | STR | DONTFREE; + if (i > nfields) + growfldtab(i); + if (freeable(fldtab[i])) + xfree(fldtab[i]->sval); + fldtab[i]->sval = fr; + fldtab[i]->tval = FLD | STR | DONTFREE; do *fr++ = *r++; while (*r != ' ' && *r != '\t' && *r != '\n' && *r != '\0') ; - *fr++ = 0; + *fr++ = '\0'; + } + *fr = '\0'; + } else if ((sep = *inputFS) == '\0') { + /* new: FS="" => 1 char/field */ + for (i = 0; *r != '\0'; r++) { + char buf[2]; + i++; + if (i > nfields) + growfldtab(i); + if (freeable(fldtab[i])) + xfree(fldtab[i]->sval); + buf[0] = *r; + buf[1] = '\0'; + fldtab[i]->sval = tostring(buf); + fldtab[i]->tval = FLD | STR; } - *fr = 0; - } else if (*r != 0) { /* if 0, it's a null field */ + *fr = '\0'; + } else if (*r != '\0') { /* if 0, it's a null field */ + /* + * subtlecase : if length(FS) == 1 && length(RS > 0) + * \n is NOT a field separator (cf awk book 61,84). + * this variable is tested in the inner while loop. + */ + int rtest = '\n'; /* normal case */ + if (strlen(*RS) > 0) + rtest = '\0'; for (;;) { i++; - p = getfld(i); - if (!(p->tval & DONTFREE)) - xfree(p->sval); - p->sval = fr; - p->tval = FLD | STR | DONTFREE; - /* \n always a separator */ - while (*r != sep && *r != '\n' && *r != '\0') + if (i > nfields) + growfldtab(i); + if (freeable(fldtab[i])) + xfree(fldtab[i]->sval); + fldtab[i]->sval = fr; + fldtab[i]->tval = FLD | STR | DONTFREE; + /* \n is always a separator */ + while (*r != sep && *r != rtest && *r != '\0') *fr++ = *r++; - *fr++ = 0; - if (*r++ == 0) + *fr++ = '\0'; + if (*r++ == '\0') break; } - *fr = 0; + *fr = '\0'; } + if (i > nfields) + FATAL("record `%.30s...' has too many fields; can't happen", r); /* clean out junk from previous record */ - cleanfld(i, maxfld); - maxfld = i; + cleanfld(i+1, lastfld); + lastfld = i; donefld = 1; - for (i = 1; i <= maxfld; i++) { - p = getfld(i); + for (j = 1; j <= lastfld; j++) { + p = fldtab[j]; if (is_number(p->sval)) { - p->fval = atof((const char *)p->sval); + p->fval = atof(p->sval); p->tval |= NUM; } } - - (void) setfval(nfloc, (Awkfloat) maxfld); + (void) setfval(nfloc, (Awkfloat)lastfld); + donerec = 1; /* restore */ if (dbg) { - for (i = 0; i <= maxfld; i++) { - p = getfld(i); - (void) printf("field %d: |%s|\n", i, p->sval); + for (j = 0; j <= lastfld; j++) { + p = fldtab[j]; + (void) printf("field %d (%s): |%s|\n", + j, p->nval, p->sval); } } } +/* clean out fields n1 .. n2 inclusive; nvals remain intact */ static void -cleanfld(int n1, int n2) /* clean out fields n1..n2 inclusive */ +cleanfld(int n1, int n2) { - static uchar *nullstat = (uchar *) ""; Cell *p; - int i; + int i; - for (i = n2; i > n1; i--) { - p = getfld(i); - if (!(p->tval & DONTFREE)) + for (i = n1; i <= n2; i++) { + p = fldtab[i]; + if (freeable(p)) xfree(p->sval); + p->sval = ""; p->tval = FLD | STR | DONTFREE; - p->sval = nullstat; } } void -newfld(int n) /* add field n (after end) */ +newfld(int n) /* add field n after end of existing lastfld */ { - if (n < 0) - ERROR "accessing invalid field", record FATAL; - (void) getfld(n); - cleanfld(maxfld, n); - maxfld = n; - (void) setfval(nfloc, (Awkfloat) n); + if (n > nfields) + growfldtab(n); + cleanfld(lastfld+1, n); + lastfld = n; + (void) setfval(nfloc, (Awkfloat)n); } -/* - * allocate field table. We don't reallocate the table since there - * might be somewhere recording the address of the table. - */ -static void -morefld(void) +void +setlastfld(int n) /* set lastfld cleaning fldtab cells if necessary */ { - int i; - struct fldtab_chunk *fldcp; - Cell *newfld; - - if ((fldcp = calloc(sizeof (struct fldtab_chunk), 1)) == NULL) - ERROR "out of space in morefld" FATAL; - - newfld = &fldcp->fields[0]; - for (i = 0; i < FLD_INCR; i++) { - newfld[i].ctype = OCELL; - newfld[i].csub = CFLD; - newfld[i].nval = NULL; - newfld[i].sval = (uchar *)""; - newfld[i].fval = 0.0; - newfld[i].tval = FLD|STR|DONTFREE; - newfld[i].cnext = NULL; - } - /* - * link this field chunk - */ - if (fldtab_head == NULL) - fldtab_head = fldcp; + if (n < 0) + FATAL("cannot set NF to a negative value"); + if (n > nfields) + growfldtab(n); + + if (lastfld < n) + cleanfld(lastfld+1, n); else - fldtab_tail->next = fldcp; - fldtab_tail = fldcp; - fldcp->next = NULL; + cleanfld(n+1, lastfld); - fldtab_maxidx += FLD_INCR; + lastfld = n; } Cell * -getfld(int idx) +fieldadr(int n) /* get nth field */ { - struct fldtab_chunk *fldcp; - int cbase; - - if (idx < 0) - ERROR "trying to access field %d", idx FATAL; - while (idx >= fldtab_maxidx) - morefld(); - cbase = 0; - for (fldcp = fldtab_head; fldcp != NULL; fldcp = fldcp->next) { - if (idx < (cbase + FLD_INCR)) - return (&fldcp->fields[idx - cbase]); - cbase += FLD_INCR; - } - /* should never happen */ - ERROR "trying to access invalid field %d", idx FATAL; - return (NULL); + if (n < 0) + FATAL("trying to access out of range field %d", n); + if (n > nfields) /* fields after NF are empty */ + growfldtab(n); /* but does not increase NF */ + return (fldtab[n]); } -int -fldidx(Cell *vp) +void +growfldtab(int n) /* make new fields up to at least $n */ { - struct fldtab_chunk *fldcp; - Cell *tbl; - int cbase; - - cbase = 0; - for (fldcp = fldtab_head; fldcp != NULL; fldcp = fldcp->next) { - tbl = &fldcp->fields[0]; - if (vp >= tbl && vp < (tbl + FLD_INCR)) - return (cbase + (vp - tbl)); - cbase += FLD_INCR; - } - /* should never happen */ - ERROR "trying to access unknown field" FATAL; - return (0); + int nf = 2 * nfields; + size_t s; + + if (n > nf) + nf = n; + s = (nf+1) * (sizeof (Cell *)); /* freebsd: how much do we need? */ + if (s / sizeof (Cell *) - 1 == nf) /* didn't overflow */ + fldtab = (Cell **)realloc(fldtab, s); + else /* overflow sizeof int */ + xfree(fldtab); /* make it null */ + if (fldtab == NULL) + FATAL("out of space creating %d fields", nf); + makefields(nfields+1, nf); + nfields = nf; } +/* build fields from reg expr in FS */ static int -refldbld(uchar *rec, uchar *fs) /* build fields from reg expr in FS */ +refldbld(const char *rec, const char *fs) { - uchar *fr; - int i, tempstat; + /* this relies on having fields[] the same length as $0 */ + /* the fields are all stored in this one array with \0's */ + char *fr; + int i, tempstat, n; fa *pfa; - Cell *p; - size_t len; - /* make sure fields is allocated */ - adjust_buf(&fields, fields_size); + n = strlen(rec); + if (n > fieldssize) { + xfree(fields); + if ((fields = (char *)malloc(n+1)) == NULL) + FATAL("out of space for fields in refldbld %d", n); + fieldssize = n; + } fr = fields; *fr = '\0'; if (*rec == '\0') return (0); - - len = strlen((char *)rec) + 1; - expand_buf(&fields, &fields_size, len); - fr = fields; - pfa = makedfa(fs, 1); dprintf(("into refldbld, rec = <%s>, pat = <%s>\n", rec, fs)); tempstat = pfa->initstat; for (i = 1; ; i++) { - p = getfld(i); - if (!(p->tval & DONTFREE)) - xfree(p->sval); - p->tval = FLD | STR | DONTFREE; - p->sval = fr; + if (i > nfields) + growfldtab(i); + if (freeable(fldtab[i])) + xfree(fldtab[i]->sval); + fldtab[i]->tval = FLD | STR | DONTFREE; + fldtab[i]->sval = fr; dprintf(("refldbld: i=%d\n", i)); if (nematch(pfa, rec)) { - pfa->initstat = 2; + pfa->initstat = 2; /* horrible coupling to b.c */ dprintf(("match %s (%d chars)\n", patbeg, patlen)); - (void) strncpy((char *)fr, (char *)rec, patbeg-rec); + (void) strncpy(fr, rec, patbeg-rec); fr += patbeg - rec + 1; *(fr-1) = '\0'; rec = patbeg + patlen; } else { dprintf(("no match %s\n", rec)); - (void) strcpy((char *)fr, (char *)rec); + (void) strcpy(fr, rec); pfa->initstat = tempstat; break; } @@ -459,71 +614,74 @@ refldbld(uchar *rec, uchar *fs) /* build fields from reg expr in FS */ } void -recbld(void) +recbld(void) /* create $0 from $1..$NF if necessary */ { int i; - uchar *p; + char *p; size_t cnt, len, olen; + char *sep = getsval(ofsloc); if (donerec == 1) return; cnt = 0; - olen = strlen((char *)*OFS); + olen = strlen(sep); for (i = 1; i <= *NF; i++) { - p = getsval(getfld(i)); - len = strlen((char *)p); - expand_buf(&record, &record_size, cnt + len + olen); + p = getsval(fldtab[i]); + len = strlen(p); + expand_buf(&record, &recsize, cnt + len + olen); (void) memcpy(&record[cnt], p, len); cnt += len; if (i < *NF) { - (void) memcpy(&record[cnt], *OFS, olen); + (void) memcpy(&record[cnt], sep, olen); cnt += olen; } } record[cnt] = '\0'; - dprintf(("in recbld FS=%o, recloc=%p\n", **FS, (void *)recloc)); - if (!(recloc->tval & DONTFREE)) + dprintf(("in recbld inputFS=%s, recloc=%p\n", inputFS, (void *)recloc)); + if (freeable(recloc)) xfree(recloc->sval); recloc->tval = REC | STR | DONTFREE; recloc->sval = record; - dprintf(("in recbld FS=%o, recloc=%p\n", **FS, (void *)recloc)); + dprintf(("in recbld inputFS=%s, recloc=%p\n", inputFS, (void *)recloc)); dprintf(("recbld = |%s|\n", record)); donerec = 1; } -Cell * -fieldadr(int n) +int errorflag = 0; + +void +yyerror(const char *s) { - if (n < 0) - ERROR "trying to access field %d", n FATAL; - return (getfld(n)); + SYNTAX("%s", s); } -int errorflag = 0; -char errbuf[200]; - void -yyerror(char *s) +SYNTAX(const char *fmt, ...) { - extern uchar *cmdname, *curfname; + extern char *cmdname, *curfname; static int been_here = 0; + va_list varg; if (been_here++ > 2) return; - (void) fprintf(stderr, "%s: %s", cmdname, s); - (void) fprintf(stderr, gettext(" at source line %lld"), lineno); + (void) fprintf(stderr, "%s: ", cmdname); + va_start(varg, fmt); + (void) vfprintf(stderr, fmt, varg); + va_end(varg); + (void) fprintf(stderr, " at source line %lld", lineno); if (curfname != NULL) - (void) fprintf(stderr, gettext(" in function %s"), curfname); + (void) fprintf(stderr, " in function %s", curfname); + if (compile_time == 1 && cursource() != NULL) + (void) fprintf(stderr, " source file %s", cursource()); (void) fprintf(stderr, "\n"); errorflag = 2; eprint(); } -/*ARGSUSED*/ void -fpecatch(int sig) +fpecatch(int n) { - ERROR "floating point exception" FATAL; + FATAL("floating point exception %d", n); } extern int bracecnt, brackcnt, parencnt; @@ -558,47 +716,74 @@ bcheck2(int n, int c1, int c2) } void -error(int f, char *s) +FATAL(const char *fmt, ...) { - extern Node *curnode; - extern uchar *cmdname; + extern char *cmdname; + va_list varg; + + (void) fflush(stdout); + (void) fprintf(stderr, "%s: ", cmdname); + va_start(varg, fmt); + (void) vfprintf(stderr, fmt, varg); + va_end(varg); + error(); + if (dbg > 1) /* core dump if serious debugging on */ + abort(); + exit(2); +} + +void +WARNING(const char *fmt, ...) +{ + extern char *cmdname; + va_list varg; (void) fflush(stdout); (void) fprintf(stderr, "%s: ", cmdname); - (void) fprintf(stderr, "%s", s); + va_start(varg, fmt); + (void) vfprintf(stderr, fmt, varg); + va_end(varg); + error(); +} + +void +error(void) +{ + extern Node *curnode; + (void) fprintf(stderr, "\n"); if (compile_time != 2 && NR && *NR > 0) { (void) fprintf(stderr, gettext(" input record number %g"), *FNR); - if (strcmp((char *)*FILENAME, "-") != 0) + if (strcmp(*FILENAME, "-") != 0) (void) fprintf(stderr, gettext(", file %s"), *FILENAME); (void) fprintf(stderr, "\n"); } if (compile_time != 2 && curnode) - (void) fprintf(stderr, gettext(" source line number %lld\n"), + (void) fprintf(stderr, gettext(" source line number %lld"), curnode->lineno); else if (compile_time != 2 && lineno) { (void) fprintf(stderr, - gettext(" source line number %lld\n"), lineno); + gettext(" source line number %lld"), lineno); } + if (compile_time == 1 && cursource() != NULL) + (void) fprintf(stderr, gettext(" source file %s"), cursource()); + (void) fprintf(stderr, "\n"); eprint(); - if (f) { - if (dbg) - abort(); - exit(2); - } } static void eprint(void) /* try to print context around error */ { - uchar *p, *q; + char *p, *q; int c; static int been_here = 0; - extern uchar ebuf[300], *ep; + extern char ebuf[], *ep; if (compile_time == 2 || compile_time == 0 || been_here++ > 0) return; + if (ebuf == ep) + return; p = ep - 1; if (p > ebuf && *p == '\n') p--; @@ -640,30 +825,22 @@ bclass(int c) } double -errcheck(double x, char *s) +errcheck(double x, const char *s) { - extern int errno; - if (errno == EDOM) { errno = 0; - ERROR "%s argument out of domain", s WARNING; + WARNING("%s argument out of domain", s); x = 1; } else if (errno == ERANGE) { errno = 0; - ERROR "%s result out of range", s WARNING; + WARNING("%s result out of range", s); x = 1; } return (x); } -void -PUTS(uchar *s) -{ - dprintf(("%s\n", s)); -} - int -isclvar(uchar *s) /* is s of form var=something? */ +isclvar(const char *s) /* is s of form var=something ? */ { if (s != NULL) { @@ -686,88 +863,28 @@ isclvar(uchar *s) /* is s of form var=something? */ return (0); } -#define MAXEXPON 38 /* maximum exponent for fp number */ - +#include <math.h> int -is_number(uchar *s) +is_number(const char *s) { - int d1, d2; - int point; - uchar *es; - extern char radixpoint; - - d1 = d2 = point = 0; - while (*s == ' ' || *s == '\t' || *s == '\n') - s++; - if (*s == '\0') - return (0); /* empty stuff isn't number */ - if (*s == '+' || *s == '-') - s++; - if (!isdigit(*s) && *s != radixpoint) - return (0); - if (isdigit(*s)) { - do { - d1++; - s++; - } while (isdigit(*s)); - } - if (d1 >= MAXEXPON) - return (0); /* too many digits to convert */ - if (*s == radixpoint) { - point++; - s++; - } - if (isdigit(*s)) { - d2++; - do { - s++; - } while (isdigit(*s)); - } - if (!(d1 || point && d2)) + double r; + char *ep; + errno = 0; + r = strtod(s, &ep); + if (ep == s || r == HUGE_VAL || errno == ERANGE) return (0); - if (*s == 'e' || *s == 'E') { - s++; - if (*s == '+' || *s == '-') - s++; - if (!isdigit(*s)) - return (0); - es = s; - do { - s++; - } while (isdigit(*s)); - if (s - es > 2) { - return (0); - } else if (s - es == 2 && - (int)(10 * (*es-'0') + *(es+1)-'0') >= MAXEXPON) { - return (0); - } - } - while (*s == ' ' || *s == '\t' || *s == '\n') - s++; - if (*s == '\0') + while (*ep == ' ' || *ep == '\t' || *ep == '\n') + ep++; + if (*ep == '\0') return (1); else return (0); } void -init_buf(uchar **optr, size_t *sizep, size_t amt) +r_expand_buf(char **optr, size_t *sizep, size_t req) { - uchar *nptr = NULL; - - if ((nptr = malloc(amt)) == NULL) - ERROR "out of space in init_buf" FATAL; - /* initial buffer should have NULL terminated */ - *nptr = '\0'; - if (sizep != NULL) - *sizep = amt; - *optr = nptr; -} - -void -r_expand_buf(uchar **optr, size_t *sizep, size_t req) -{ - uchar *nptr; + char *nptr; size_t amt, size = *sizep; if (size != 0 && req < (size - 1)) @@ -776,20 +893,10 @@ r_expand_buf(uchar **optr, size_t *sizep, size_t req) amt = (amt / LINE_INCR + 1) * LINE_INCR; if ((nptr = realloc(*optr, size + amt)) == NULL) - ERROR "out of space in expand_buf" FATAL; + FATAL("out of space in expand_buf"); /* initial buffer should have NULL terminated */ if (size == 0) *nptr = '\0'; *sizep += amt; *optr = nptr; } - -void -adjust_buf(uchar **optr, size_t size) -{ - uchar *nptr; - - if ((nptr = realloc(*optr, size)) == NULL) - ERROR "out of space in adjust_buf" FATAL; - *optr = nptr; -} diff --git a/usr/src/cmd/awk/main.c b/usr/src/cmd/awk/main.c index b0c9d5ae98..ff004daf65 100644 --- a/usr/src/cmd/awk/main.c +++ b/usr/src/cmd/awk/main.c @@ -1,4 +1,28 @@ /* + * Copyright (C) Lucent Technologies 1997 + * All Rights Reserved + * + * Permission to use, copy, modify, and distribute this software and + * its documentation for any purpose and without fee is hereby + * granted, provided that the above copyright notice appear in all + * copies and that both that the copyright notice and this + * permission notice and warranty disclaimer appear in supporting + * documentation, and that the name Lucent Technologies or any of + * its entities not be used in advertising or publicity pertaining + * to distribution of the software without specific, written prior + * permission. + * + * LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, + * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. + * IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY + * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER + * IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, + * ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF + * THIS SOFTWARE. + */ + +/* * CDDL HEADER START * * The contents of this file are subject to the terms of the @@ -39,24 +63,25 @@ #include "awk.h" #include "y.tab.h" -char *version = "version Oct 11, 1989"; +char *version = "version Aug 27, 2018"; int dbg = 0; -uchar *cmdname; /* gets argv[0] for error messages */ -uchar *lexprog; /* points to program argument if it exists */ +Awkfloat srand_seed = 1; +char *cmdname; /* gets argv[0] for error messages */ +char *lexprog; /* points to program argument if it exists */ int compile_time = 2; /* for error printing: */ /* 2 = cmdline, 1 = compile, 0 = running */ -char radixpoint = '.'; -static uchar **pfile = NULL; /* program filenames from -f's */ +static char **pfile = NULL; /* program filenames from -f's */ static int npfile = 0; /* number of filenames */ static int curpfile = 0; /* current filename */ +int safe = 0; /* 1 => "safe" mode */ + int main(int argc, char *argv[], char *envp[]) { - uchar *fs = NULL; - char *nl_radix; + const char *fs = NULL; /* * At this point, numbers are still scanned as in * the POSIX locale. @@ -68,7 +93,7 @@ main(int argc, char *argv[], char *envp[]) #define TEXT_DOMAIN "SYS_TEST" /* Use this only if it weren't */ #endif (void) textdomain(TEXT_DOMAIN); - cmdname = (uchar *)argv[0]; + cmdname = argv[0]; if (argc == 1) { (void) fprintf(stderr, gettext( "Usage: %s [-f programfile | 'program'] [-Ffieldsep] " @@ -76,9 +101,19 @@ main(int argc, char *argv[], char *envp[]) exit(1); } (void) signal(SIGFPE, fpecatch); + + srand_seed = 1; + srand((unsigned int)srand_seed); + yyin = NULL; - syminit(); + symtab = makesymtab(NSYMTAB/NSYMTAB); while (argc > 1 && argv[1][0] == '-' && argv[1][1] != '\0') { + if (strcmp(argv[1], "-version") == 0 || + strcmp(argv[1], "--version") == 0) { + (void) printf("awk %s\n", version); + exit(0); + break; + } if (strcmp(argv[1], "--") == 0) { /* explicit end of args */ argc--; @@ -86,41 +121,66 @@ main(int argc, char *argv[], char *envp[]) break; } switch (argv[1][1]) { + case 's': + if (strcmp(argv[1], "-safe") == 0) + safe = 1; + break; case 'f': /* next argument is program filename */ - argc--; - argv++; - if (argc <= 1) - ERROR "no program filename" FATAL; - pfile = realloc(pfile, sizeof (uchar *) * (npfile + 1)); - if (pfile == NULL) - ERROR "out of space in main" FATAL; - pfile[npfile++] = (uchar *)argv[1]; + if (argv[1][2] != 0) { /* arg is -fsomething */ + pfile = realloc(pfile, + sizeof (char *) * (npfile + 1)); + if (pfile == NULL) + FATAL("out of space in main"); + pfile[npfile++] = &argv[1][2]; + } else { /* arg is -f something */ + argc--; argv++; + if (argc <= 1) + FATAL("no program filename"); + pfile = realloc(pfile, + sizeof (char *) * (npfile + 1)); + if (pfile == NULL) + FATAL("out of space in main"); + pfile[npfile++] = argv[1]; + } break; case 'F': /* set field separator */ if (argv[1][2] != 0) { /* arg is -Fsomething */ /* wart: t=>\t */ if (argv[1][2] == 't' && argv[1][3] == 0) - fs = (uchar *) "\t"; + fs = "\t"; else if (argv[1][2] != 0) - fs = (uchar *)&argv[1][2]; + fs = &argv[1][2]; } else { /* arg is -F something */ argc--; argv++; if (argc > 1) { /* wart: t=>\t */ if (argv[1][0] == 't' && argv[1][1] == 0) - fs = (uchar *) "\t"; + fs = "\t"; else if (argv[1][0] != 0) - fs = (uchar *)&argv[1][0]; + fs = &argv[1][0]; } } if (fs == NULL || *fs == '\0') - ERROR "field separator FS is empty" WARNING; + WARNING("field separator FS is empty"); break; case 'v': /* -v a=1 to be done NOW. one -v for each */ - if (argv[1][2] == '\0' && --argc > 1 && - isclvar((uchar *)(++argv)[1])) - setclvar((uchar *)argv[1]); + if (argv[1][2] != 0) { /* arg is -vsomething */ + if (isclvar(&argv[1][2])) + setclvar(&argv[1][2]); + else + FATAL("invalid -v option argument: %s", + &argv[1][2]); + } else { /* arg is -v something */ + argc--; argv++; + if (argc <= 1) + FATAL("no variable name"); + if (isclvar(argv[1])) + setclvar(argv[1]); + else + FATAL("invalid -v option argument: %s", + argv[1]); + } break; case 'd': dbg = atoi(&argv[1][2]); @@ -129,7 +189,7 @@ main(int argc, char *argv[], char *envp[]) (void) printf("awk %s\n", version); break; default: - ERROR "unknown option %s ignored", argv[1] WARNING; + WARNING("unknown option %s ignored", argv[1]); break; } argc--; @@ -140,18 +200,21 @@ main(int argc, char *argv[], char *envp[]) if (argc <= 1) { if (dbg) exit(0); - ERROR "no program given" FATAL; + FATAL("no program given"); } dprintf(("program = |%s|\n", argv[1])); - lexprog = (uchar *)argv[1]; + lexprog = argv[1]; argc--; argv++; } + recinit(recsize); + syminit(); compile_time = 1; - argv[0] = (char *)cmdname; /* put prog name at front of arglist */ + argv[0] = cmdname; /* put prog name at front of arglist */ dprintf(("argc=%d, argv[0]=%s\n", argc, argv[0])); - arginit(argc, (uchar **)argv); - envinit((uchar **)envp); + arginit(argc, argv); + if (!safe) + envinit(envp); (void) yyparse(); if (fs) *FS = qstring(fs, '\0'); @@ -160,9 +223,6 @@ main(int argc, char *argv[], char *envp[]) * done parsing, so now activate the LC_NUMERIC */ (void) setlocale(LC_ALL, ""); - nl_radix = nl_langinfo(RADIXCHAR); - if (nl_radix) - radixpoint = *nl_radix; if (errorflag == 0) { compile_time = 0; @@ -173,7 +233,7 @@ main(int argc, char *argv[], char *envp[]) } int -pgetc(void) /* get program character */ +pgetc(void) /* get 1 character from awk program */ { int c; @@ -181,17 +241,27 @@ pgetc(void) /* get program character */ if (yyin == NULL) { if (curpfile >= npfile) return (EOF); - yyin = (strcmp((char *)pfile[curpfile], "-") == 0) ? - stdin : fopen((char *)pfile[curpfile], "r"); + yyin = (strcmp(pfile[curpfile], "-") == 0) ? + stdin : fopen(pfile[curpfile], "rF"); if (yyin == NULL) { - ERROR "can't open file %s", - pfile[curpfile] FATAL; + FATAL("can't open file %s", pfile[curpfile]); } + lineno = 1; } if ((c = getc(yyin)) != EOF) return (c); - (void) fclose(yyin); + if (yyin != stdin) + (void) fclose(yyin); yyin = NULL; curpfile++; } } + +char * +cursource(void) /* current source file name */ +{ + if (curpfile < npfile) + return (pfile[curpfile]); + else + return (NULL); +} diff --git a/usr/src/cmd/awk/maketab.c b/usr/src/cmd/awk/maketab.c index 5c7d8601ea..9c625aabcc 100644 --- a/usr/src/cmd/awk/maketab.c +++ b/usr/src/cmd/awk/maketab.c @@ -1,4 +1,28 @@ /* + * Copyright (C) Lucent Technologies 1997 + * All Rights Reserved + * + * Permission to use, copy, modify, and distribute this software and + * its documentation for any purpose and without fee is hereby + * granted, provided that the above copyright notice appear in all + * copies and that both that the copyright notice and this + * permission notice and warranty disclaimer appear in supporting + * documentation, and that the name Lucent Technologies or any of + * its entities not be used in advertising or publicity pertaining + * to distribution of the software without specific, written prior + * permission. + * + * LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, + * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. + * IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY + * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER + * IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, + * ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF + * THIS SOFTWARE. + */ + +/* * CDDL HEADER START * * The contents of this file are subject to the terms of the @@ -26,6 +50,12 @@ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ /* All Rights Reserved */ +/* + * this program makes the table to link function names + * and type indices that is used by execute() in run.c. + * it finds the indices in ytab.h, produced by yacc. + */ + #include <stdio.h> #include <string.h> #include <stdlib.h> @@ -35,8 +65,8 @@ struct xx { int token; - char *name; - char *pname; + const char *name; + const char *pname; } proc[] = { { PROGRAM, "program", NULL }, { BOR, "boolop", " || " }, @@ -54,13 +84,14 @@ struct xx { { SUB, "sub", "sub" }, { GSUB, "gsub", "gsub" }, { INDEX, "sindex", "sindex" }, - { SPRINTF, "a_sprintf", "sprintf " }, + { SPRINTF, "awksprintf", "sprintf " }, { ADD, "arith", " + " }, { MINUS, "arith", " - " }, { MULT, "arith", " * " }, { DIVIDE, "arith", " / " }, { MOD, "arith", " % " }, { UMINUS, "arith", " -" }, + { UPLUS, "arith", " +" }, { POWER, "arith", " **" }, { PREINCR, "incrdecr", "++" }, { POSTINCR, "incrdecr", "++" }, @@ -73,10 +104,10 @@ struct xx { { NOTMATCH, "matchop", " !~ " }, { MATCHFCN, "matchop", "matchop" }, { INTEST, "intest", "intest" }, - { PRINTF, "aprintf", "printf" }, - { PRINT, "print", "print" }, + { PRINTF, "awkprintf", "printf" }, + { PRINT, "printstat", "print" }, { CLOSE, "closefile", "closefile" }, - { DELETE, "delete", "delete" }, + { DELETE, "awkdelete", "awkdelete" }, { SPLIT, "split", "split" }, { ASSIGN, "assign", " = " }, { ADDEQ, "assign", " += " }, @@ -92,6 +123,7 @@ struct xx { { DO, "dostat", "do" }, { IN, "instat", "instat" }, { NEXT, "jump", "next" }, + { NEXTFILE, "jump", "nextfile" }, { EXIT, "jump", "exit" }, { BREAK, "jump", "break" }, { CONTINUE, "jump", "continue" }, @@ -100,23 +132,24 @@ struct xx { { CALL, "call", "call" }, { ARG, "arg", "arg" }, { VARNF, "getnf", "NF" }, - { GETLINE, "getaline", "getline" }, + { GETLINE, "awkgetline", "getline" }, { 0, "", "" }, }; -#define SIZE LASTTOKEN - FIRSTTOKEN + 1 -char *table[SIZE]; +#define SIZE (LASTTOKEN - FIRSTTOKEN + 1) +const char *table[SIZE]; char *names[SIZE]; int -main() +main(int argc, char *argv[]) { - struct xx *p; + const struct xx *p; int i, n, tok; char c; FILE *fp; - char buf[100], name[100], def[100]; + char buf[200], name[200], def[200]; + printf("#include <stdio.h>\n"); printf("#include \"awk.h\"\n"); printf("#include \"y.tab.h\"\n\n"); @@ -124,28 +157,29 @@ main() fprintf(stderr, gettext("maketab can't open y.tab.h!\n")); exit(1); } - printf("static uchar *printname[%d] = {\n", SIZE); + printf("static char *printname[%d] = {\n", SIZE); i = 0; while (fgets(buf, sizeof (buf), fp) != NULL) { n = sscanf(buf, "%1c %s %s %d", &c, def, name, &tok); - /* not a valid #define? */ - if (c != '#' || n != 4 && strcmp(def, "define") != 0) + if (c != '#' || (n != 4 && strcmp(def, "define") != 0)) { + /* not a valid #define */ continue; + } if (tok < FIRSTTOKEN || tok > LASTTOKEN) { fprintf(stderr, gettext("maketab funny token %d %s\n"), tok, buf); exit(1); } - names[tok-FIRSTTOKEN] = malloc(strlen(name)+1); + names[tok-FIRSTTOKEN] = (char *)malloc(strlen(name)+1); strcpy(names[tok-FIRSTTOKEN], name); - printf("\t(uchar *) \"%s\",\t/* %d */\n", name, tok); + printf("\t(char *) \"%s\",\t/* %d */\n", name, tok); i++; } printf("};\n\n"); for (p = proc; p->token != 0; p++) table[p->token-FIRSTTOKEN] = p->name; - printf("\nCell *(*proctab[%d])() = {\n", SIZE); + printf("\nCell *(*proctab[%d])(Node **, int) = {\n", SIZE); for (i = 0; i < SIZE; i++) if (table[i] == 0) printf("\tnullproc,\t/* %s */\n", names[i]); @@ -153,14 +187,14 @@ main() printf("\t%s,\t/* %s */\n", table[i], names[i]); printf("};\n\n"); - printf("uchar *\ntokname(int n)\n"); /* print a tokname() function */ + printf("char *\ntokname(int n)\n"); /* print a tokname() function */ printf("{\n"); printf(" static char buf[100];\n\n"); printf(" if (n < FIRSTTOKEN || n > LASTTOKEN) {\n"); printf(" (void) sprintf(buf, \"token %%d\", n);\n"); - printf(" return ((uchar *)buf);\n"); + printf(" return (buf);\n"); printf(" }\n"); - printf(" return printname[n-257];\n"); + printf(" return printname[n-FIRSTTOKEN];\n"); printf("}\n"); - exit(0); + return (0); } diff --git a/usr/src/cmd/awk/parse.c b/usr/src/cmd/awk/parse.c index 909977f10f..2afcf1e78f 100644 --- a/usr/src/cmd/awk/parse.c +++ b/usr/src/cmd/awk/parse.c @@ -1,4 +1,28 @@ /* + * Copyright (C) Lucent Technologies 1997 + * All Rights Reserved + * + * Permission to use, copy, modify, and distribute this software and + * its documentation for any purpose and without fee is hereby + * granted, provided that the above copyright notice appear in all + * copies and that both that the copyright notice and this + * permission notice and warranty disclaimer appear in supporting + * documentation, and that the name Lucent Technologies or any of + * its entities not be used in advertising or publicity pertaining + * to distribution of the software without specific, written prior + * permission. + * + * LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, + * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. + * IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY + * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER + * IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, + * ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF + * THIS SOFTWARE. + */ + +/* * CDDL HEADER START * * The contents of this file are subject to the terms of the @@ -28,8 +52,6 @@ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ /* All Rights Reserved */ -#pragma ident "%Z%%M% %I% %E% SMI" - #define DEBUG #include "awk.h" #include "y.tab.h" @@ -37,11 +59,11 @@ Node * nodealloc(int n) { - register Node *x; + Node *x; x = (Node *)malloc(sizeof (Node) + (n - 1) * sizeof (Node *)); if (x == NULL) - ERROR "out of space in nodealloc" FATAL; + FATAL("out of space in nodealloc"); x->nnext = NULL; x->lineno = lineno; return (x); @@ -57,7 +79,7 @@ exptostat(Node *a) Node * node1(int a, Node *b) { - register Node *x; + Node *x; x = nodealloc(1); x->nobj = a; @@ -68,7 +90,7 @@ node1(int a, Node *b) Node * node2(int a, Node *b, Node *c) { - register Node *x; + Node *x; x = nodealloc(2); x->nobj = a; @@ -80,7 +102,7 @@ node2(int a, Node *b, Node *c) Node * node3(int a, Node *b, Node *c, Node *d) { - register Node *x; + Node *x; x = nodealloc(3); x->nobj = a; @@ -93,7 +115,8 @@ node3(int a, Node *b, Node *c, Node *d) Node * node4(int a, Node *b, Node *c, Node *d, Node *e) { - register Node *x; + Node *x; + x = nodealloc(4); x->nobj = a; x->narg[0] = b; @@ -104,89 +127,89 @@ node4(int a, Node *b, Node *c, Node *d, Node *e) } Node * -stat3(int a, Node *b, Node *c, Node *d) +stat1(int a, Node *b) { - register Node *x; + Node *x; - x = node3(a, b, c, d); + x = node1(a, b); x->ntype = NSTAT; return (x); } Node * -op2(int a, Node *b, Node *c) +stat2(int a, Node *b, Node *c) { - register Node *x; + Node *x; x = node2(a, b, c); - x->ntype = NEXPR; + x->ntype = NSTAT; return (x); } Node * -op1(int a, Node *b) +stat3(int a, Node *b, Node *c, Node *d) { - register Node *x; + Node *x; - x = node1(a, b); - x->ntype = NEXPR; + x = node3(a, b, c, d); + x->ntype = NSTAT; return (x); } Node * -stat1(int a, Node *b) +stat4(int a, Node *b, Node *c, Node *d, Node *e) { - register Node *x; + Node *x; - x = node1(a, b); + x = node4(a, b, c, d, e); x->ntype = NSTAT; return (x); } Node * -op3(int a, Node *b, Node *c, Node *d) +op1(int a, Node *b) { - register Node *x; + Node *x; - x = node3(a, b, c, d); + x = node1(a, b); x->ntype = NEXPR; return (x); } Node * -op4(int a, Node *b, Node *c, Node *d, Node *e) +op2(int a, Node *b, Node *c) { - register Node *x; + Node *x; - x = node4(a, b, c, d, e); + x = node2(a, b, c); x->ntype = NEXPR; return (x); } Node * -stat2(int a, Node *b, Node *c) +op3(int a, Node *b, Node *c, Node *d) { - register Node *x; + Node *x; - x = node2(a, b, c); - x->ntype = NSTAT; + x = node3(a, b, c, d); + x->ntype = NEXPR; return (x); } Node * -stat4(int a, Node *b, Node *c, Node *d, Node *e) +op4(int a, Node *b, Node *c, Node *d, Node *e) { - register Node *x; + Node *x; x = node4(a, b, c, d, e); - x->ntype = NSTAT; + x->ntype = NEXPR; return (x); } Node * -valtonode(Cell *a, int b) +celltonode(Cell *a, int b) { - register Node *x; + Node *x; a->ctype = OCELL; a->csub = b; @@ -196,10 +219,10 @@ valtonode(Cell *a, int b) } Node * -rectonode(void) +rectonode(void) /* make $0 into a Node */ { - /* return valtonode(lookup("$0", symtab), CFLD); */ - return (valtonode(recloc, CFLD)); + extern Cell *literal0; + return (op1(INDIRECT, celltonode(literal0, CUNK))); } Node * @@ -209,23 +232,26 @@ makearr(Node *p) if (isvalue(p)) { cp = (Cell *)(p->narg[0]); - if (isfunc(cp)) - ERROR "%s is a function, not an array", cp->nval SYNTAX; + if (isfcn(cp)) + SYNTAX("%s is a function, not an array", cp->nval); else if (!isarr(cp)) { xfree(cp->sval); - cp->sval = (uchar *)makesymtab(NSYMTAB); + cp->sval = (char *)makesymtab(NSYMTAB); cp->tval = ARR; } } return (p); } +int paircnt; /* number of them in use */ +int *pairstack; /* state of each pat,pat */ + Node * -pa2stat(Node *a, Node *b, Node *c) +pa2stat(Node *a, Node *b, Node *c) /* pat, pat {...} */ { - register Node *x; + Node *x; - x = node4(PASTAT2, a, b, c, (Node *)paircnt); + x = node4(PASTAT2, a, b, c, itonp(paircnt)); paircnt++; x->ntype = NSTAT; return (x); @@ -234,7 +260,7 @@ pa2stat(Node *a, Node *b, Node *c) Node * linkum(Node *a, Node *b) { - register Node *c; + Node *c; if (errorflag) /* don't link things that are wrong */ return (a); @@ -248,38 +274,55 @@ linkum(Node *a, Node *b) return (a); } +/* turn on FCN bit in definition, */ +/* body of function, arglist */ void -defn(Cell *v, Node *vl, Node *st) /* turn on FCN bit in definition */ +defn(Cell *v, Node *vl, Node *st) { Node *p; int n; if (isarr(v)) { - ERROR "`%s' is an array name and a function name", - v->nval SYNTAX; + SYNTAX("`%s' is an array name and a function name", v->nval); + return; + } + if (isarg(v->nval) != -1) { + SYNTAX("`%s' is both function name and argument name", v->nval); return; } + v->tval = FCN; - v->sval = (uchar *)st; + v->sval = (char *)st; n = 0; /* count arguments */ - for (p = vl; p; p = p->nnext) + for (p = vl; p != NULL; p = p->nnext) n++; v->fval = n; dprintf(("defining func %s (%d args)\n", v->nval, n)); } +/* is s in argument list for current function? */ +/* return -1 if not, otherwise arg # */ int -isarg(uchar *s) /* is s in argument list for current function? */ +isarg(const char *s) { extern Node *arglist; Node *p = arglist; int n; - for (n = 0; p != 0; p = p->nnext, n++) { - if (strcmp((char *)((Cell *)(p->narg[0]))->nval, - (char *)s) == 0) { + for (n = 0; p != NULL; p = p->nnext, n++) + if (strcmp(((Cell *)(p->narg[0]))->nval, s) == 0) return (n); - } - } return (-1); } + +int +ptoi(void *p) /* convert pointer to integer */ +{ + return ((int)(long)p); /* swearing that p fits, of course */ +} + +Node * +itonp(int i) /* and vice versa */ +{ + return ((Node *)(long)i); +} diff --git a/usr/src/cmd/awk/run.c b/usr/src/cmd/awk/run.c index 3cc8341d84..5226d43ed2 100644 --- a/usr/src/cmd/awk/run.c +++ b/usr/src/cmd/awk/run.c @@ -1,4 +1,28 @@ /* + * Copyright (C) Lucent Technologies 1997 + * All Rights Reserved + * + * Permission to use, copy, modify, and distribute this software and + * its documentation for any purpose and without fee is hereby + * granted, provided that the above copyright notice appear in all + * copies and that both that the copyright notice and this + * permission notice and warranty disclaimer appear in supporting + * documentation, and that the name Lucent Technologies or any of + * its entities not be used in advertising or publicity pertaining + * to distribution of the software without specific, written prior + * permission. + * + * LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, + * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. + * IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY + * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER + * IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, + * ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF + * THIS SOFTWARE. + */ + +/* * CDDL HEADER START * * The contents of this file are subject to the terms of the @@ -26,178 +50,221 @@ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ /* All Rights Reserved */ -#define tempfree(x, s) if (istemp(x)) tfree(x, s) - -#define execute(p) r_execute(p) - #define DEBUG -#include "awk.h" -#include <math.h> -#include "y.tab.h" #include <stdio.h> #include <ctype.h> #include <setjmp.h> +#include <math.h> #include <time.h> +#include <sys/wait.h> +#include "awk.h" +#include "y.tab.h" -#ifndef FOPEN_MAX -#define FOPEN_MAX 15 /* max number of open files, from ANSI std. */ -#endif - +#define tempfree(x) if (istemp(x)) tfree(x) static jmp_buf env; +extern Awkfloat srand_seed; -static Cell *r_execute(Node *); -static Cell *gettemp(char *), *copycell(Cell *); -static FILE *openfile(int, uchar *), *redirect(int, Node *); - -int paircnt; -Node *winner = NULL; +static Cell *execute(Node *); +static Cell *gettemp(void), *copycell(Cell *); +static FILE *openfile(int, const char *), *redirect(int, Node *); -static Cell *tmps; +Node *winner = NULL; /* root of parse tree */ +static Cell *tmps; /* free temporary cells for execution */ -static Cell truecell = { OBOOL, BTRUE, 0, 0, 1.0, NUM }; -Cell *true = &truecell; -static Cell falsecell = { OBOOL, BFALSE, 0, 0, 0.0, NUM }; -Cell *false = &falsecell; -static Cell breakcell = { OJUMP, JBREAK, 0, 0, 0.0, NUM }; +static Cell truecell = { OBOOL, BTRUE, NULL, NULL, 1.0, NUM, NULL }; +Cell *True = &truecell; +static Cell falsecell = { OBOOL, BFALSE, NULL, NULL, 0.0, NUM, NULL }; +Cell *False = &falsecell; +static Cell breakcell = { OJUMP, JBREAK, NULL, NULL, 0.0, NUM, NULL }; Cell *jbreak = &breakcell; -static Cell contcell = { OJUMP, JCONT, 0, 0, 0.0, NUM }; +static Cell contcell = { OJUMP, JCONT, NULL, NULL, 0.0, NUM, NULL }; Cell *jcont = &contcell; -static Cell nextcell = { OJUMP, JNEXT, 0, 0, 0.0, NUM }; +static Cell nextcell = { OJUMP, JNEXT, NULL, NULL, 0.0, NUM, NULL }; Cell *jnext = &nextcell; -static Cell exitcell = { OJUMP, JEXIT, 0, 0, 0.0, NUM }; +static Cell nextfilecell = { OJUMP, JNEXTFILE, NULL, NULL, 0.0, + NUM, NULL }; +Cell *jnextfile = &nextfilecell; +static Cell exitcell = { OJUMP, JEXIT, NULL, NULL, 0.0, NUM, NULL }; Cell *jexit = &exitcell; -static Cell retcell = { OJUMP, JRET, 0, 0, 0.0, NUM }; +static Cell retcell = { OJUMP, JRET, NULL, NULL, 0.0, NUM, NULL }; Cell *jret = &retcell; -static Cell tempcell = { OCELL, CTEMP, 0, 0, 0.0, NUM }; +static Cell tempcell = { OCELL, CTEMP, NULL, "", 0.0, + NUM|STR|DONTFREE, NULL }; Node *curnode = NULL; /* the node being executed, for debugging */ -static void tfree(Cell *, char *); +static void tfree(Cell *); static void closeall(void); static double ipow(double, int); +static void backsub(char **pb_ptr, char **sptr_ptr); + + +/* + * buffer memory management + * + * pbuf: address of pointer to buffer being managed + * psiz: address of buffer size variable + * minlen: minimum length of buffer needed + * quantum: buffer size quantum + * pbptr: address of movable pointer into buffer, or 0 if none + * whatrtn: name of the calling routine if failure should cause fatal error + * + * return 0 for realloc failure, !=0 for success + */ +int +adjbuf(char **pbuf, size_t *psiz, size_t minlen, size_t quantum, char **pbptr, + const char *whatrtn) +{ + if (minlen > *psiz) { + char *tbuf; + int rminlen = quantum ? minlen % quantum : 0; + int boff = pbptr ? *pbptr - *pbuf : 0; + /* round up to next multiple of quantum */ + if (rminlen) + minlen += quantum - rminlen; + tbuf = (char *)realloc(*pbuf, minlen); + dprintf(("adjbuf %s: %d %d (pbuf=%p, tbuf=%p)\n", whatrtn, + *psiz, minlen, (void *)*pbuf, (void *)tbuf)); + if (tbuf == NULL) { + if (whatrtn) + FATAL("out of memory in %s", whatrtn); + return (0); + } + *pbuf = tbuf; + *psiz = minlen; + if (pbptr) + *pbptr = tbuf + boff; + } + return (1); +} void -run(Node *a) +run(Node *a) /* execution of parse tree starts here */ { + extern void stdinit(void); + + stdinit(); (void) execute(a); closeall(); } static Cell * -r_execute(Node *u) +execute(Node *u) /* execute a node of the parse tree */ { - register Cell *(*proc)(); - register Cell *x; - register Node *a; + Cell *(*proc)(Node **, int); + Cell *x; + Node *a; if (u == NULL) - return (true); + return (True); for (a = u; ; a = a->nnext) { curnode = a; if (isvalue(a)) { x = (Cell *) (a->narg[0]); - if ((x->tval & FLD) && !donefld) + if (isfld(x) && !donefld) fldbld(); - else if ((x->tval & REC) && !donerec) + else if (isrec(x) && !donerec) recbld(); return (x); } /* probably a Cell* but too risky to print */ if (notlegal(a->nobj)) - ERROR "illegal statement" FATAL; + FATAL("illegal statement"); proc = proctab[a->nobj-FIRSTTOKEN]; x = (*proc)(a->narg, a->nobj); - if ((x->tval & FLD) && !donefld) + if (isfld(x) && !donefld) fldbld(); - else if ((x->tval & REC) && !donerec) + else if (isrec(x) && !donerec) recbld(); if (isexpr(a)) return (x); /* a statement, goto next statement */ if (isjump(x)) return (x); - if (a->nnext == (Node *)NULL) + if (a->nnext == NULL) return (x); - tempfree(x, "execute"); + tempfree(x); } } +/* execute an awk program */ +/* a[0] = BEGIN, a[1] = body, a[2] = END */ /*ARGSUSED*/ Cell * program(Node **a, int n) { - register Cell *x; + Cell *x; if (setjmp(env) != 0) goto ex; if (a[0]) { /* BEGIN */ x = execute(a[0]); if (isexit(x)) - return (true); + return (True); if (isjump(x)) { - ERROR "illegal break, continue or next from BEGIN" - FATAL; + FATAL("illegal break, continue, next or nextfile " + "from BEGIN"); } - tempfree(x, ""); + tempfree(x); } -loop: if (a[1] || a[2]) - while (getrec(&record, &record_size) > 0) { + while (getrec(&record, &recsize, 1) > 0) { x = execute(a[1]); if (isexit(x)) break; - tempfree(x, ""); + tempfree(x); } ex: - if (setjmp(env) != 0) + if (setjmp(env) != 0) /* handles exit within END */ goto ex1; if (a[2]) { /* END */ x = execute(a[2]); - if (iscont(x)) /* read some more */ - goto loop; - if (isbreak(x) || isnext(x)) - ERROR "illegal break or next from END" FATAL; - tempfree(x, ""); + if (isbreak(x) || isnext(x) || iscont(x)) + FATAL("illegal break, continue, next or nextfile " + "from END"); + tempfree(x); } ex1: - return (true); + return (True); } -struct Frame { +struct Frame { /* stack frame for awk function calls */ int nargs; /* number of arguments in this call */ Cell *fcncell; /* pointer to Cell for function */ Cell **args; /* pointer to array of arguments after execute */ Cell *retval; /* return value */ }; -#define NARGS 30 +#define NARGS 50 /* max args in a call */ -struct Frame *frame = NULL; /* base of stack frames; dynamically allocated */ +struct Frame *frame = NULL; /* base of stack frames; dynamically alloc'd */ int nframe = 0; /* number of frames allocated */ struct Frame *fp = NULL; /* frame pointer. bottom level unused */ /*ARGSUSED*/ Cell * -call(Node **a, int n) +call(Node **a, int n) /* function call. very kludgy and fragile */ { static Cell newcopycell = - { OCELL, CCOPY, 0, (uchar *) "", 0.0, NUM|STR|DONTFREE }; - int i, ncall, ndef, freed = 0; + { OCELL, CCOPY, 0, "", 0.0, NUM|STR|DONTFREE, NULL }; + int i, ncall, ndef; + /* handles potential double freeing when fcn & param share a tempcell */ + int freed = 0; Node *x; - Cell *args[NARGS], *oargs[NARGS], *y, *z, *fcn; - uchar *s; + Cell *args[NARGS], *oargs[NARGS]; /* BUG: fixed size arrays */ + Cell *y, *z, *fcn; + char *s; fcn = execute(a[0]); /* the function itself */ s = fcn->nval; - if (!isfunc(fcn)) - ERROR "calling undefined function %s", s FATAL; + if (!isfcn(fcn)) + FATAL("calling undefined function %s", s); if (frame == NULL) { fp = frame = (struct Frame *)calloc(nframe += 100, sizeof (struct Frame)); if (frame == NULL) { - ERROR "out of space for stack frames calling %s", - s FATAL; + FATAL("out of space for stack frames calling %s", s); } } for (ncall = 0, x = a[1]; x != NULL; x = x->nnext) /* args in call */ @@ -206,12 +273,12 @@ call(Node **a, int n) dprintf(("calling %s, %d args (%d in defn), fp=%d\n", s, ncall, ndef, fp-frame)); if (ncall > ndef) { - ERROR "function %s called with %d args, uses only %d", - s, ncall, ndef WARNING; + WARNING("function %s called with %d args, uses only %d", + s, ncall, ndef); } if (ncall + ndef > NARGS) { - ERROR "function %s has %d arguments, limit %d", - s, ncall+ndef, NARGS FATAL; + FATAL("function %s has %d arguments, limit %d", + s, ncall+ndef, NARGS); } for (i = 0, x = a[1]; x != NULL; i++, x = x->nnext) { /* get call args */ @@ -219,20 +286,20 @@ call(Node **a, int n) y = execute(x); oargs[i] = y; dprintf(("args[%d]: %s %f <%s>, t=%o\n", - i, y->nval, y->fval, - isarr(y) ? "(array)" : (char *)y->sval, y->tval)); - if (isfunc(y)) { - ERROR "can't use function %s as argument in %s", - y->nval, s FATAL; + i, NN(y->nval), y->fval, + isarr(y) ? "(array)" : NN(y->sval), y->tval)); + if (isfcn(y)) { + FATAL("can't use function %s as argument in %s", + y->nval, s); } if (isarr(y)) args[i] = y; /* arrays by ref */ else args[i] = copycell(y); - tempfree(y, "callargs"); + tempfree(y); } - for (; i < ndef; i++) { /* add null args for ones not provided */ - args[i] = gettemp("nullargs"); + for (; i < ndef; i++) { /* add null args for ones not provided */ + args[i] = gettemp(); *args[i] = newcopycell; } fp++; /* now ok to up frame */ @@ -241,13 +308,13 @@ call(Node **a, int n) frame = (struct Frame *) realloc(frame, (nframe += 100) * sizeof (struct Frame)); if (frame == NULL) - ERROR "out of space for stack frames in %s", s FATAL; + FATAL("out of space for stack frames in %s", s); fp = frame + dfp; } fp->fcncell = fcn; fp->args = args; fp->nargs = ndef; /* number defined with (excess are locals) */ - fp->retval = gettemp("retval"); + fp->retval = gettemp(); dprintf(("start exec of %s, fp=%d\n", s, fp-frame)); /*LINTED align*/ @@ -261,24 +328,29 @@ call(Node **a, int n) if (i >= ncall) { freesymtab(t); t->csub = CTEMP; + tempfree(t); } else { oargs[i]->tval = t->tval; oargs[i]->tval &= ~(STR|NUM|DONTFREE); oargs[i]->sval = t->sval; - tempfree(t, "oargsarr"); + tempfree(t); } } - } else { + } else if (t != y) { /* kludge to prevent freeing twice */ t->csub = CTEMP; - tempfree(t, "fp->args"); - if (t == y) freed = 1; + tempfree(t); + } else if (t == y && t->csub == CCOPY) { + t->csub = CTEMP; + tempfree(t); + freed = 1; } } - tempfree(fcn, "call.fcn"); + tempfree(fcn); if (isexit(y) || isnext(y)) return (y); - if (!freed) - tempfree(y, "fcn ret"); /* this can free twice! */ + if (freed == 0) { + tempfree(y); /* don't free twice! */ + } z = fp->retval; /* return value */ dprintf(("%s returns %g |%s| %o\n", s, getfval(z), getsval(z), z->tval)); @@ -291,42 +363,47 @@ copycell(Cell *x) /* make a copy of a cell in a temp */ { Cell *y; - y = gettemp("copycell"); + /* copy is not constant or field */ + + y = gettemp(); + y->tval = x->tval & ~(CON|FLD|REC); y->csub = CCOPY; /* prevents freeing until call is over */ - y->nval = x->nval; - y->sval = x->sval ? tostring(x->sval) : NULL; + y->nval = x->nval; /* BUG? */ + if (isstr(x)) { + y->sval = tostring(x->sval); + y->tval &= ~DONTFREE; + } else + y->tval |= DONTFREE; y->fval = x->fval; - /* copy is not constant or field is DONTFREE right? */ - y->tval = x->tval & ~(CON|FLD|REC|DONTFREE); return (y); } /*ARGSUSED*/ Cell * -arg(Node **a, int nnn) +arg(Node **a, int nnn) /* nth argument of a function */ { int n; - n = (int)a[0]; /* argument number, counting from 0 */ + n = ptoi(a[0]); /* argument number, counting from 0 */ dprintf(("arg(%d), fp->nargs=%d\n", n, fp->nargs)); if (n+1 > fp->nargs) { - ERROR "argument #%d of function %s was not supplied", - n+1, fp->fcncell->nval FATAL; + FATAL("argument #%d of function %s was not supplied", + n+1, fp->fcncell->nval); } return (fp->args[n]); } Cell * -jump(Node **a, int n) +jump(Node **a, int n) /* break, continue, next, nextfile, return */ { - register Cell *y; + Cell *y; switch (n) { case EXIT: if (a[0] != NULL) { y = execute(a[0]); errorflag = (int)getfval(y); - tempfree(y, ""); + tempfree(y); } longjmp(env, 1); /*NOTREACHED*/ @@ -341,77 +418,85 @@ jump(Node **a, int n) (void) setsval(fp->retval, getsval(y)); else if (y->tval & NUM) (void) setfval(fp->retval, getfval(y)); - tempfree(y, ""); + else /* can't happen */ + FATAL("bad type variable %d", y->tval); + tempfree(y); } return (jret); case NEXT: return (jnext); + case NEXTFILE: + nextfile(); + return (jnextfile); case BREAK: return (jbreak); case CONTINUE: return (jcont); default: /* can't happen */ - ERROR "illegal jump type %d", n FATAL; + FATAL("illegal jump type %d", n); } /*NOTREACHED*/ return (NULL); } Cell * -getaline(Node **a, int n) +awkgetline(Node **a, int n) /* get next line from specific input */ { /* a[0] is variable, a[1] is operator, a[2] is filename */ - register Cell *r, *x; - uchar *buf; + Cell *r, *x; FILE *fp; - size_t len; + char *buf; + size_t bufsize = recsize; + int mode; + + if ((buf = (char *)malloc(bufsize)) == NULL) + FATAL("out of memory in getline"); (void) fflush(stdout); /* in case someone is waiting for a prompt */ - r = gettemp(""); + r = gettemp(); if (a[1] != NULL) { /* getline < file */ x = execute(a[2]); /* filename */ - if ((int)a[1] == '|') /* input pipe */ - a[1] = (Node *)LE; /* arbitrary flag */ - fp = openfile((int)a[1], getsval(x)); - tempfree(x, ""); - buf = NULL; + mode = ptoi(a[1]); + if (mode == '|') /* input pipe */ + mode = LE; /* arbitrary flag */ + fp = openfile(mode, getsval(x)); + tempfree(x); if (fp == NULL) n = -1; else - n = readrec(&buf, &len, fp); - if (n > 0) { - if (a[0] != NULL) { /* getline var <file */ - (void) setsval(execute(a[0]), buf); - } else { /* getline <file */ - if (!(recloc->tval & DONTFREE)) - xfree(recloc->sval); - expand_buf(&record, &record_size, len); - (void) memcpy(record, buf, len); - record[len] = '\0'; - recloc->sval = record; - recloc->tval = REC | STR | DONTFREE; - donerec = 1; donefld = 0; + n = readrec(&buf, &bufsize, fp); + /*LINTED if*/ + if (n <= 0) { + ; + } else if (a[0] != NULL) { /* getline var <file */ + x = execute(a[0]); + (void) setsval(x, buf); + tempfree(x); + } else { /* getline <file */ + (void) setsval(recloc, buf); + if (is_number(recloc->sval)) { + recloc->fval = atof(recloc->sval); + recloc->tval |= NUM; } } - if (buf != NULL) - free(buf); } else { /* bare getline; use current input */ if (a[0] == NULL) /* getline */ - n = getrec(&record, &record_size); + n = getrec(&record, &recsize, 1); else { /* getline var */ - init_buf(&buf, &len, LINE_INCR); - n = getrec(&buf, &len); - (void) setsval(execute(a[0]), buf); - free(buf); + n = getrec(&buf, &bufsize, 0); + x = execute(a[0]); + (void) setsval(x, buf); + tempfree(x); } } (void) setfval(r, (Awkfloat)n); + free(buf); return (r); } /*ARGSUSED*/ Cell * -getnf(Node **a, int n) +getnf(Node **a, int n) /* get NF */ { if (donefld == 0) fldbld(); @@ -420,208 +505,241 @@ getnf(Node **a, int n) /*ARGSUSED*/ Cell * -array(Node **a, int n) +array(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */ { - register Cell *x, *y, *z; - register uchar *s; - register Node *np; - uchar *buf; - size_t bsize, tlen, len, slen; + Cell *x, *y, *z; + char *s; + Node *np; + char *buf; + size_t bufsz = recsize; + size_t tlen = 0, len, nsub; + + if ((buf = (char *)malloc(bufsz)) == NULL) + FATAL("out of memory in array"); x = execute(a[0]); /* Cell* for symbol table */ - init_buf(&buf, &bsize, LINE_INCR); buf[0] = '\0'; - tlen = 0; - slen = strlen((char *)*SUBSEP); - for (np = a[1]; np; np = np->nnext) { + for (np = a[1]; np != NULL; np = np->nnext) { y = execute(np); /* subscript */ s = getsval(y); - len = strlen((char *)s); - expand_buf(&buf, &bsize, tlen + len + slen); + len = strlen(s); + nsub = strlen(getsval(subseploc)); + (void) adjbuf(&buf, &bufsz, tlen + len + nsub + 1, + recsize, 0, "array"); (void) memcpy(&buf[tlen], s, len); tlen += len; if (np->nnext) { - (void) memcpy(&buf[tlen], *SUBSEP, slen); - tlen += slen; + (void) memcpy(&buf[tlen], *SUBSEP, nsub); + tlen += nsub; } buf[tlen] = '\0'; - tempfree(y, ""); + tempfree(y); } if (!isarr(x)) { - dprintf(("making %s into an array\n", x->nval)); + dprintf(("making %s into an array\n", NN(x->nval))); if (freeable(x)) xfree(x->sval); x->tval &= ~(STR|NUM|DONTFREE); x->tval |= ARR; - x->sval = (uchar *) makesymtab(NSYMTAB); + x->sval = (char *)makesymtab(NSYMTAB); } /*LINTED align*/ - z = setsymtab(buf, (uchar *)"", 0.0, STR|NUM, (Array *)x->sval); + z = setsymtab(buf, "", 0.0, STR|NUM, (Array *)x->sval); z->ctype = OCELL; z->csub = CVAR; - tempfree(x, ""); + tempfree(x); free(buf); return (z); } /*ARGSUSED*/ Cell * -delete(Node **a, int n) +awkdelete(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */ { Cell *x, *y; Node *np; - uchar *buf, *s; - size_t bsize, tlen, slen, len; + char *s; + size_t nsub; + size_t tlen = 0, len; x = execute(a[0]); /* Cell* for symbol table */ - if (!isarr(x)) - return (true); - init_buf(&buf, &bsize, LINE_INCR); - buf[0] = '\0'; - tlen = 0; - slen = strlen((char *)*SUBSEP); - for (np = a[1]; np; np = np->nnext) { - y = execute(np); /* subscript */ - s = getsval(y); - len = strlen((char *)s); - expand_buf(&buf, &bsize, tlen + len + slen); - (void) memcpy(&buf[tlen], s, len); - tlen += len; - if (np->nnext) { - (void) memcpy(&buf[tlen], *SUBSEP, slen); - tlen += slen; + if (x == symtabloc) { + FATAL("cannot delete SYMTAB or its elements"); + } + if (!isarr(x)) { + dprintf(("making %s into an array\n", x->nval)); + if (freeable(x)) + xfree(x->sval); + x->tval &= ~(STR|NUM|DONTFREE); + x->tval |= ARR; + x->sval = (char *)makesymtab(NSYMTAB); + } + if (a[1] == NULL) { /* delete the elements, not the table */ + freesymtab(x); + x->tval &= ~STR; + x->tval |= ARR; + x->sval = (char *)makesymtab(NSYMTAB); + } else { + size_t bufsz = recsize; + char *buf; + if ((buf = (char *)malloc(bufsz)) == NULL) + FATAL("out of memory in awkdelete"); + buf[0] = '\0'; + for (np = a[1]; np != NULL; np = np->nnext) { + y = execute(np); /* subscript */ + s = getsval(y); + len = strlen(s); + nsub = strlen(getsval(subseploc)); + (void) adjbuf(&buf, &bufsz, tlen + len + nsub + 1, + recsize, 0, "awkdelete"); + (void) memcpy(&buf[tlen], s, len); + tlen += len; + if (np->nnext) { + (void) memcpy(&buf[tlen], *SUBSEP, nsub); + tlen += nsub; + } + buf[tlen] = '\0'; + tempfree(y); } - buf[tlen] = '\0'; - tempfree(y, ""); + freeelem(x, buf); + free(buf); } - freeelem(x, buf); - tempfree(x, ""); - free(buf); - return (true); + tempfree(x); + return (True); } /*ARGSUSED*/ Cell * -intest(Node **a, int n) +intest(Node **a, int n) /* a[0] is index (list), a[1] is symtab */ { - register Cell *x, *ap, *k; + Cell *x, *ap, *k; Node *p; - uchar *buf; - uchar *s; - size_t bsize, tlen, slen, len; + char *buf; + char *s; + size_t bufsz = recsize; + size_t nsub; + size_t tlen = 0, len; ap = execute(a[1]); /* array name */ - if (!isarr(ap)) - ERROR "%s is not an array", ap->nval FATAL; - init_buf(&buf, &bsize, LINE_INCR); - buf[0] = 0; - tlen = 0; - slen = strlen((char *)*SUBSEP); - for (p = a[0]; p; p = p->nnext) { + if (!isarr(ap)) { + dprintf(("making %s into an array\n", ap->nval)); + if (freeable(ap)) + xfree(ap->sval); + ap->tval &= ~(STR|NUM|DONTFREE); + ap->tval |= ARR; + ap->sval = (char *)makesymtab(NSYMTAB); + } + if ((buf = (char *)malloc(bufsz)) == NULL) { + FATAL("out of memory in intest"); + } + buf[0] = '\0'; + for (p = a[0]; p != NULL; p = p->nnext) { x = execute(p); /* expr */ s = getsval(x); - len = strlen((char *)s); - expand_buf(&buf, &bsize, tlen + len + slen); + len = strlen(s); + nsub = strlen(getsval(subseploc)); + (void) adjbuf(&buf, &bufsz, tlen + len + nsub + 1, + recsize, 0, "intest"); (void) memcpy(&buf[tlen], s, len); tlen += len; - tempfree(x, ""); + tempfree(x); if (p->nnext) { - (void) memcpy(&buf[tlen], *SUBSEP, slen); - tlen += slen; + (void) memcpy(&buf[tlen], *SUBSEP, nsub); + tlen += nsub; } buf[tlen] = '\0'; } /*LINTED align*/ k = lookup(buf, (Array *)ap->sval); - tempfree(ap, ""); + tempfree(ap); free(buf); if (k == NULL) - return (false); + return (False); else - return (true); + return (True); } Cell * -matchop(Node **a, int n) +matchop(Node **a, int n) /* ~ and match() */ { - register Cell *x, *y; - register uchar *s, *t; - register int i; + Cell *x, *y; + char *s, *t; + int i; fa *pfa; - int (*mf)() = match, mode = 0; + int (*mf)(fa *, const char *) = match, mode = 0; if (n == MATCHFCN) { mf = pmatch; mode = 1; } - x = execute(a[1]); + x = execute(a[1]); /* a[1] = target text */ s = getsval(x); - if (a[0] == 0) - i = (*mf)(a[2], s); + if (a[0] == NULL) /* a[1] == 0: already-compiled reg expr */ + i = (*mf)((fa *)a[2], s); else { - y = execute(a[2]); + y = execute(a[2]); /* a[2] = regular expr */ t = getsval(y); pfa = makedfa(t, mode); i = (*mf)(pfa, s); - tempfree(y, ""); + tempfree(y); } - tempfree(x, ""); + tempfree(x); if (n == MATCHFCN) { int start = patbeg - s + 1; if (patlen < 0) start = 0; (void) setfval(rstartloc, (Awkfloat)start); (void) setfval(rlengthloc, (Awkfloat)patlen); - x = gettemp(""); + x = gettemp(); x->tval = NUM; x->fval = start; return (x); - } else if (n == MATCH && i == 1 || n == NOTMATCH && i == 0) - return (true); + } else if ((n == MATCH && i == 1) || (n == NOTMATCH && i == 0)) + return (True); else - return (false); + return (False); } Cell * -boolop(Node **a, int n) +boolop(Node **a, int n) /* a[0] || a[1], a[0] && a[1], !a[0] */ { - register Cell *x, *y; - register int i; + Cell *x, *y; + int i; x = execute(a[0]); i = istrue(x); - tempfree(x, ""); + tempfree(x); switch (n) { case BOR: if (i) - return (true); + return (True); y = execute(a[1]); i = istrue(y); - tempfree(y, ""); - return (i ? true : false); + tempfree(y); + return (i ? True : False); case AND: if (!i) - return (false); + return (False); y = execute(a[1]); i = istrue(y); - tempfree(y, ""); - return (i ? true : false); + tempfree(y); + return (i ? True : False); case NOT: - return (i ? false : true); + return (i ? False : True); default: /* can't happen */ - ERROR "unknown boolean operator %d", n FATAL; + FATAL("unknown boolean operator %d", n); } /*NOTREACHED*/ return (NULL); } Cell * -relop(Node **a, int n) +relop(Node **a, int n) /* a[0] < a[1], etc. */ { - register int i; - register Cell *x, *y; + int i; + Cell *x, *y; Awkfloat j; x = execute(a[0]); @@ -630,102 +748,108 @@ relop(Node **a, int n) j = x->fval - y->fval; i = j < 0 ? -1: (j > 0 ? 1: 0); } else { - i = strcmp((char *)getsval(x), (char *)getsval(y)); + i = strcmp(getsval(x), getsval(y)); } - tempfree(x, ""); - tempfree(y, ""); + tempfree(x); + tempfree(y); switch (n) { - case LT: return (i < 0 ? true : false); - case LE: return (i <= 0 ? true : false); - case NE: return (i != 0 ? true : false); - case EQ: return (i == 0 ? true : false); - case GE: return (i >= 0 ? true : false); - case GT: return (i > 0 ? true : false); + case LT: return (i < 0 ? True : False); + case LE: return (i <= 0 ? True : False); + case NE: return (i != 0 ? True : False); + case EQ: return (i == 0 ? True : False); + case GE: return (i >= 0 ? True : False); + case GT: return (i > 0 ? True : False); default: /* can't happen */ - ERROR "unknown relational operator %d", n FATAL; + FATAL("unknown relational operator %d", n); } /*NOTREACHED*/ - return (false); + return (False); } static void -tfree(Cell *a, char *s) +tfree(Cell *a) /* free a tempcell */ { - if (dbg > 1) { - (void) printf("## tfree %.8s %06lo %s\n", - s, (ulong_t)a, a->sval ? a->sval : (uchar *)""); - } - if (freeable(a)) + if (freeable(a)) { + dprintf(("freeing %s %s %o\n", + NN(a->nval), NN(a->sval), a->tval)); xfree(a->sval); + } if (a == tmps) - ERROR "tempcell list is curdled" FATAL; + FATAL("tempcell list is curdled"); a->cnext = tmps; tmps = a; } static Cell * -gettemp(char *s) +gettemp(void) /* get a tempcell */ { int i; - register Cell *x; + Cell *x; if (!tmps) { tmps = (Cell *)calloc(100, sizeof (Cell)); if (!tmps) - ERROR "no space for temporaries" FATAL; + FATAL("out of space for temporaries"); for (i = 1; i < 100; i++) tmps[i-1].cnext = &tmps[i]; - tmps[i-1].cnext = 0; + tmps[i-1].cnext = NULL; } x = tmps; tmps = x->cnext; *x = tempcell; - if (dbg > 1) - (void) printf("## gtemp %.8s %06lo\n", s, (ulong_t)x); + dprintf(("gtemp %.8s %06lo\n", NN(x->nval), (ulong_t)x)); return (x); } /*ARGSUSED*/ Cell * -indirect(Node **a, int n) +indirect(Node **a, int n) /* $( a[0] ) */ { - register Cell *x; - register int m; - register uchar *s; + Awkfloat val; + Cell *x; + int m; + char *s; x = execute(a[0]); - m = (int)getfval(x); + + /* freebsd: defend against super large field numbers */ + val = getfval(x); + if ((Awkfloat)INT_MAX < val) + FATAL("trying to access out of range field %s", x->nval); + m = (int)val; if (m == 0 && !is_number(s = getsval(x))) /* suspicion! */ - ERROR "illegal field $(%s)", s FATAL; - tempfree(x, ""); + FATAL("illegal field $(%s), name \"%s\"", s, x->nval); + /* BUG: can x->nval ever be null??? */ + tempfree(x); x = fieldadr(m); - x->ctype = OCELL; + x->ctype = OCELL; /* BUG? why are these needed? */ x->csub = CFLD; return (x); } /*ARGSUSED*/ Cell * -substr(Node **a, int nnn) +substr(Node **a, int nnn) /* substr(a[0], a[1], a[2]) */ { - register int k, m, n; - register uchar *s; + int k, m, n; + char *s; int temp; - register Cell *x, *y, *z; + Cell *x, *y, *z = NULL; x = execute(a[0]); y = execute(a[1]); - if (a[2] != 0) + if (a[2] != NULL) z = execute(a[2]); s = getsval(x); - k = strlen((char *)s) + 1; + k = strlen(s) + 1; if (k <= 1) { - tempfree(x, ""); - tempfree(y, ""); - if (a[2] != 0) - tempfree(z, ""); - x = gettemp(""); - (void) setsval(x, (uchar *)""); + tempfree(x); + tempfree(y); + if (a[2] != NULL) { + tempfree(z); + } + x = gettemp(); + (void) setsval(x, ""); return (x); } m = (int)getfval(y); @@ -733,10 +857,10 @@ substr(Node **a, int nnn) m = 1; else if (m > k) m = k; - tempfree(y, ""); - if (a[2] != 0) { + tempfree(y); + if (a[2] != NULL) { n = (int)getfval(z); - tempfree(z, ""); + tempfree(z); } else n = k - 1; if (n < 0) @@ -744,21 +868,21 @@ substr(Node **a, int nnn) else if (n > k - m) n = k - m; dprintf(("substr: m=%d, n=%d, s=%s\n", m, n, s)); - y = gettemp(""); + y = gettemp(); temp = s[n + m - 1]; /* with thanks to John Linderman */ s[n + m - 1] = '\0'; (void) setsval(y, s + m - 1); s[n + m - 1] = temp; - tempfree(x, ""); + tempfree(x); return (y); } /*ARGSUSED*/ Cell * -sindex(Node **a, int nnn) +sindex(Node **a, int nnn) /* index(a[0], a[1]) */ { - register Cell *x, *y, *z; - register uchar *s1, *s2, *p1, *p2, *q; + Cell *x, *y, *z; + char *s1, *s2, *p1, *p2, *q; Awkfloat v = 0.0; x = execute(a[0]); @@ -766,7 +890,7 @@ sindex(Node **a, int nnn) y = execute(a[1]); s2 = getsval(y); - z = gettemp(""); + z = gettemp(); for (p1 = s1; *p1 != '\0'; p1++) { for (q = p1, p2 = s2; *p2 != '\0' && *q == *p2; q++, p2++) ; @@ -775,26 +899,32 @@ sindex(Node **a, int nnn) break; } } - tempfree(x, ""); - tempfree(y, ""); + tempfree(x); + tempfree(y); (void) setfval(z, v); return (z); } -void -format(uchar **bufp, uchar *s, Node *a) +#define MAXNUMSIZE 50 + +/* printf-like conversions */ +int +format(char **pbuf, int *pbufsize, const char *s, Node *a) { - uchar *fmt; - register uchar *os; - register Cell *x; - int flag = 0, len; - uchar_t *buf; - size_t bufsize, fmtsize, cnt, tcnt, ret; - - init_buf(&buf, &bufsize, LINE_INCR); - init_buf(&fmt, &fmtsize, LINE_INCR); + char *fmt; + const char *os; + Cell *x; + int flag = 0, n, len; + int fmtwd; /* format width */ + char *buf = *pbuf; + size_t bufsize = *pbufsize; + size_t fmtsz = recsize; + size_t cnt, tcnt, ret; + os = s; cnt = 0; + if ((fmt = (char *)malloc(fmtsz)) == NULL) + FATAL("out of memory in format()"); while (*s) { if (*s != '%') { expand_buf(&buf, &bufsize, cnt); @@ -807,58 +937,77 @@ format(uchar **bufp, uchar *s, Node *a) s += 2; continue; } + /* + * have to be real careful in case this is a huge number, + * eg, "%100000d". + */ + fmtwd = atoi(s+1); + if (fmtwd < 0) + fmtwd = -fmtwd; for (tcnt = 0; ; s++) { - expand_buf(&fmt, &fmtsize, tcnt); + expand_buf(&fmt, &fmtsz, tcnt); fmt[tcnt++] = *s; if (*s == '\0') break; - if (isalpha(*s) && *s != 'l' && *s != 'h' && *s != 'L') + if (isalpha((uschar)*s) && + *s != 'l' && *s != 'h' && *s != 'L') break; /* the ansi panoply */ + if (*s == '$') { + FATAL("'$' not permitted in awk formats"); + } if (*s == '*') { if (a == NULL) { - ERROR - "not enough args in printf(%s) or sprintf(%s)", os, os FATAL; + FATAL("not enough args in printf(%s) " + "or sprintf(%s)", os, os); } x = execute(a); a = a->nnext; tcnt--; - expand_buf(&fmt, &fmtsize, tcnt + 12); - ret = sprintf((char *)&fmt[tcnt], "%d", - (int)getfval(x)); + expand_buf(&fmt, &fmtsz, tcnt + 12); + fmtwd = (int)getfval(x); + ret = sprintf(&fmt[tcnt], "%d", fmtwd); + if (fmtwd < 0) + fmtwd = -fmtwd; tcnt += ret; - tempfree(x, ""); + tempfree(x); } } fmt[tcnt] = '\0'; + if (fmtwd < 0) + fmtwd = -fmtwd; switch (*s) { + case 'a': case 'A': + flag = *s; + break; case 'f': case 'e': case 'g': case 'E': case 'G': - flag = 1; + flag = 'f'; break; case 'd': case 'i': - flag = 2; + flag = 'd'; if (*(s-1) == 'l') break; fmt[tcnt - 1] = 'l'; - expand_buf(&fmt, &fmtsize, tcnt); + expand_buf(&fmt, &fmtsz, tcnt); fmt[tcnt++] = 'd'; fmt[tcnt] = '\0'; break; case 'o': case 'x': case 'X': case 'u': - flag = *(s-1) == 'l' ? 2 : 3; + flag = *(s-1) == 'l' ? 'd' : 'u'; break; case 's': - flag = 4; + flag = 's'; break; case 'c': - flag = 5; + flag = 'c'; break; default: - flag = 0; + WARNING("weird printf conversion %s", fmt); + flag = '?'; break; } - if (flag == 0) { - len = strlen((char *)fmt); + if (flag == '?') { + len = strlen(fmt); expand_buf(&buf, &bufsize, cnt + len); (void) memcpy(&buf[cnt], fmt, len); cnt += len; @@ -866,79 +1015,100 @@ format(uchar **bufp, uchar *s, Node *a) continue; } if (a == NULL) { - ERROR - "not enough args in printf(%s) or sprintf(%s)", os, os FATAL; + FATAL("not enough args in printf(%s) " + "or sprintf(%s)", os, os); } x = execute(a); a = a->nnext; - for (;;) { - /* make sure we have at least 1 byte space */ - expand_buf(&buf, &bufsize, cnt + 1); - len = bufsize - cnt; - switch (flag) { - case 1: - /*LINTED*/ - ret = snprintf((char *)&buf[cnt], len, - (char *)fmt, getfval(x)); - break; - case 2: - /*LINTED*/ - ret = snprintf((char *)&buf[cnt], len, - (char *)fmt, (long)getfval(x)); - break; - case 3: + n = MAXNUMSIZE; + if (fmtwd > n) + n = fmtwd; +retry: + /* make sure we have at least 1 byte space */ + (void) adjbuf(&buf, &bufsize, 1 + n + cnt, + recsize, NULL, "format5"); + len = bufsize - cnt; + switch (flag) { + case 'a': + case 'A': + case 'f': + /*LINTED*/ + ret = snprintf(&buf[cnt], len, + fmt, getfval(x)); + break; + case 'd': + /*LINTED*/ + ret = snprintf(&buf[cnt], len, + fmt, (long)getfval(x)); + break; + case 'u': + /*LINTED*/ + ret = snprintf(&buf[cnt], len, + fmt, (int)getfval(x)); + break; + case 's': + /*LINTED*/ + ret = snprintf(&buf[cnt], len, + fmt, getsval(x)); + break; + case 'c': + if (!isnum(x)) { /*LINTED*/ - ret = snprintf((char *)&buf[cnt], len, - (char *)fmt, (int)getfval(x)); + ret = snprintf(&buf[cnt], len, + fmt, getsval(x)[0]); break; - case 4: + } + if (getfval(x)) { /*LINTED*/ - ret = snprintf((char *)&buf[cnt], len, - (char *)fmt, getsval(x)); - break; - case 5: - if (isnum(x)) { - /*LINTED*/ - ret = snprintf((char *)&buf[cnt], len, - (char *)fmt, (int)getfval(x)); - } else { - /*LINTED*/ - ret = snprintf((char *)&buf[cnt], len, - (char *)fmt, getsval(x)[0]); - } - break; - default: - ret = 0; + ret = snprintf(&buf[cnt], len, + fmt, (int)getfval(x)); + } else { + /* explicit null byte */ + buf[cnt] = '\0'; + /* next output will start here */ + buf[cnt + 1] = '\0'; + ret = 1; } - if (ret < len) - break; - expand_buf(&buf, &bufsize, cnt + ret); + break; + default: + FATAL("can't happen: " + "bad conversion %c in format()", flag); + } + if (ret >= len) { + (void) adjbuf(&buf, &bufsize, cnt + ret + 1, + recsize, NULL, "format6"); + goto retry; } - tempfree(x, ""); + tempfree(x); cnt += ret; s++; } buf[cnt] = '\0'; - for (; a; a = a->nnext) /* evaluate any remaining args */ - (void) execute(a); - *bufp = tostring(buf); - free(buf); free(fmt); + for (; a != NULL; a = a->nnext) /* evaluate any remaining args */ + (void) execute(a); + *pbuf = buf; + *pbufsize = bufsize; + return (cnt); } /*ARGSUSED*/ Cell * -a_sprintf(Node **a, int n) +awksprintf(Node **a, int n) /* sprintf(a[0]) */ { - register Cell *x; - register Node *y; - uchar *buf; + Cell *x; + Node *y; + char *buf; + int bufsz = 3 * recsize; + if ((buf = (char *)malloc(bufsz)) == NULL) + FATAL("out of memory in awksprintf"); y = a[0]->nnext; x = execute(a[0]); - format(&buf, getsval(x), y); - tempfree(x, ""); - x = gettemp(""); + if (format(&buf, &bufsz, getsval(x), y) == -1) + FATAL("sprintf string %.30s... too long. can't happen.", buf); + tempfree(x); + x = gettemp(); x->sval = buf; x->tval = STR; return (x); @@ -946,44 +1116,55 @@ a_sprintf(Node **a, int n) /*ARGSUSED*/ Cell * -aprintf(Node **a, int n) +awkprintf(Node **a, int n) /* printf */ { + /* a[0] is list of args, starting with format string */ + /* a[1] is redirection operator, a[2] is redirection file */ FILE *fp; - register Cell *x; - register Node *y; - uchar *buf; + Cell *x; + Node *y; + char *buf; + int len; + int bufsz = 3 * recsize; + if ((buf = (char *)malloc(bufsz)) == NULL) + FATAL("out of memory in awkprintf"); y = a[0]->nnext; x = execute(a[0]); - format(&buf, getsval(x), y); - tempfree(x, ""); - if (a[1] == NULL) - (void) fputs((char *)buf, stdout); - else { - fp = redirect((int)a[1], a[2]); - (void) fputs((char *)buf, fp); + if ((len = format(&buf, &bufsz, getsval(x), y)) == -1) + FATAL("printf string %.30s... too long. can't happen.", buf); + tempfree(x); + if (a[1] == NULL) { + (void) fwrite(buf, len, 1, stdout); + if (ferror(stdout)) + FATAL("write error on stdout"); + } else { + fp = redirect(ptoi(a[1]), a[2]); + (void) fwrite(buf, len, 1, fp); (void) fflush(fp); + if (ferror(fp)) + FATAL("write error on %s", filename(fp)); } free(buf); - return (true); + return (True); } Cell * -arith(Node **a, int n) +arith(Node **a, int n) /* a[0] + a[1], etc. also -a[0] */ { - Awkfloat i, j; + Awkfloat i, j = 0; double v; - register Cell *x, *y, *z; + Cell *x, *y, *z; x = execute(a[0]); i = getfval(x); - tempfree(x, ""); - if (n != UMINUS) { + tempfree(x); + if (n != UMINUS && n != UPLUS) { y = execute(a[1]); j = getfval(y); - tempfree(y, ""); + tempfree(y); } - z = gettemp(""); + z = gettemp(); switch (n) { case ADD: i += j; @@ -996,18 +1177,20 @@ arith(Node **a, int n) break; case DIVIDE: if (j == 0) - ERROR "division by zero" FATAL; + FATAL("division by zero"); i /= j; break; case MOD: if (j == 0) - ERROR "division by zero in mod" FATAL; + FATAL("division by zero in mod"); (void) modf(i/j, &v); i = i - j * v; break; case UMINUS: i = -i; break; + case UPLUS: /* handled by getfval(), above */ + break; case POWER: if (j >= 0 && modf(j, &v) == 0.0) /* pos integer exponent */ i = ipow(i, (int)j); @@ -1015,14 +1198,14 @@ arith(Node **a, int n) i = errcheck(pow(i, j), "pow"); break; default: /* can't happen */ - ERROR "illegal arithmetic operator %d", n FATAL; + FATAL("illegal arithmetic operator %d", n); } (void) setfval(z, i); return (z); } static double -ipow(double x, int n) +ipow(double x, int n) /* x**n. ought to be done by pow, but isn't always */ { double v; @@ -1036,10 +1219,10 @@ ipow(double x, int n) } Cell * -incrdecr(Node **a, int n) +incrdecr(Node **a, int n) /* a[0]++, etc. */ { - register Cell *x, *z; - register int k; + Cell *x, *z; + int k; Awkfloat xf; x = execute(a[0]); @@ -1049,34 +1232,42 @@ incrdecr(Node **a, int n) (void) setfval(x, xf + k); return (x); } - z = gettemp(""); + z = gettemp(); (void) setfval(z, xf); (void) setfval(x, xf + k); - tempfree(x, ""); + tempfree(x); return (z); } +/* a[0] = a[1], a[0] += a[1], etc. */ +/* this is subtle; don't muck with it. */ Cell * assign(Node **a, int n) { - register Cell *x, *y; + Cell *x, *y; Awkfloat xf, yf; double v; y = execute(a[1]); x = execute(a[0]); /* order reversed from before... */ if (n == ASSIGN) { /* ordinary assignment */ - if ((y->tval & (STR|NUM)) == (STR|NUM)) { + /*LINTED if*/ + if (x == y && !(x->tval & (FLD|REC)) && x != nfloc) { + /* + * If this is a self-assignment, we leave things alone, + * unless it's a field or NF. + */ + } else if ((y->tval & (STR|NUM)) == (STR|NUM)) { (void) setsval(x, getsval(y)); x->fval = getfval(y); x->tval |= NUM; - } else if (y->tval & STR) + } else if (isstr(y)) (void) setsval(x, getsval(y)); - else if (y->tval & NUM) + else if (isnum(y)) (void) setfval(x, getfval(y)); else funnyvar(y, "read value of"); - tempfree(y, ""); + tempfree(y); return (x); } xf = getfval(x); @@ -1093,12 +1284,12 @@ assign(Node **a, int n) break; case DIVEQ: if (yf == 0) - ERROR "division by zero in /=" FATAL; + FATAL("division by zero in /="); xf /= yf; break; case MODEQ: if (yf == 0) - ERROR "division by zero in %%=" FATAL; + FATAL("division by zero in %%="); (void) modf(xf/yf, &v); xf = xf - yf * v; break; @@ -1109,55 +1300,55 @@ assign(Node **a, int n) xf = errcheck(pow(xf, yf), "pow"); break; default: - ERROR "illegal assignment operator %d", n FATAL; + FATAL("illegal assignment operator %d", n); break; } - tempfree(y, ""); + tempfree(y); (void) setfval(x, xf); return (x); } /*ARGSUSED*/ Cell * -cat(Node **a, int q) +cat(Node **a, int q) /* a[0] cat a[1] */ { - register Cell *x, *y, *z; - register int n1, n2; - register uchar *s; + Cell *x, *y, *z; + int n1, n2; + char *s = NULL; + size_t ssz = 0; x = execute(a[0]); + n1 = strlen(getsval(x)); + (void) adjbuf(&s, &ssz, n1 + 1, recsize, 0, "cat1"); + (void) strncpy(s, x->sval, ssz); + y = execute(a[1]); - (void) getsval(x); - (void) getsval(y); - n1 = strlen((char *)x->sval); - n2 = strlen((char *)y->sval); - s = (uchar *)malloc(n1 + n2 + 1); - if (s == NULL) { - ERROR "out of space concatenating %.15s and %.15s", - x->sval, y->sval FATAL; - } - (void) strcpy((char *)s, (char *)x->sval); - (void) strcpy((char *)s + n1, (char *)y->sval); - tempfree(y, ""); - z = gettemp(""); + n2 = strlen(getsval(y)); + (void) adjbuf(&s, &ssz, n1 + n2 + 1, recsize, 0, "cat2"); + (void) strncpy(s + n1, y->sval, ssz - n1); + + tempfree(x); + tempfree(y); + + z = gettemp(); z->sval = s; z->tval = STR; - tempfree(x, ""); + return (z); } /*ARGSUSED*/ Cell * -pastat(Node **a, int n) +pastat(Node **a, int n) /* a[0] { a[1] } */ { - register Cell *x; + Cell *x; - if (a[0] == 0) + if (a[0] == NULL) x = execute(a[1]); else { x = execute(a[0]); if (istrue(x)) { - tempfree(x, ""); + tempfree(x); x = execute(a[1]); } } @@ -1166,73 +1357,83 @@ pastat(Node **a, int n) /*ARGSUSED*/ Cell * -dopa2(Node **a, int n) +dopa2(Node **a, int n) /* a[0], a[1] { a[2] } */ { Cell *x; int pair; - static int *pairstack = NULL; if (!pairstack) { /* first time */ dprintf(("paircnt: %d\n", paircnt)); - pairstack = (int *)malloc(sizeof (int) * paircnt); - if (!pairstack) - ERROR "out of space in dopa2" FATAL; - (void) memset(pairstack, 0, sizeof (int) * paircnt); + pairstack = (int *)calloc(paircnt, sizeof (int)); + if (pairstack == NULL) + FATAL("out of space in dopa2"); } - pair = (int)a[3]; + pair = ptoi(a[3]); if (pairstack[pair] == 0) { x = execute(a[0]); if (istrue(x)) pairstack[pair] = 1; - tempfree(x, ""); + tempfree(x); } if (pairstack[pair] == 1) { x = execute(a[1]); if (istrue(x)) pairstack[pair] = 0; - tempfree(x, ""); + tempfree(x); x = execute(a[2]); return (x); } - return (false); + return (False); } /*ARGSUSED*/ Cell * -split(Node **a, int nnn) +split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */ { - Cell *x, *y, *ap; - register uchar *s; - register int sep; - uchar *t, temp, num[11], *fs; - int n, tempstat; + Cell *x = NULL, *y, *ap; + char *s, *origs; + char *fs, *origfs = NULL; + int sep; + char *t, temp, num[50]; + int n, tempstat, arg3type; y = execute(a[0]); /* source string */ - s = getsval(y); - if (a[2] == 0) /* fs string */ - fs = *FS; - else if ((int)a[3] == STRING) { /* split(str,arr,"string") */ + origs = s = tostring(getsval(y)); + arg3type = ptoi(a[3]); + if (a[2] == NULL) /* fs string */ + fs = getsval(fsloc); + else if (arg3type == STRING) { /* split(str,arr,"string") */ x = execute(a[2]); - fs = getsval(x); - } else if ((int)a[3] == REGEXPR) - fs = (uchar *)"(regexpr)"; /* split(str,arr,/regexpr/) */ + origfs = fs = tostring(getsval(x)); + tempfree(x); + } else if (arg3type == REGEXPR) + fs = "(regexpr)"; /* split(str,arr,/regexpr/) */ else - ERROR "illegal type of split()" FATAL; + FATAL("illegal type of split"); sep = *fs; ap = execute(a[1]); /* array name */ freesymtab(ap); - dprintf(("split: s=|%s|, a=%s, sep=|%s|\n", s, ap->nval, fs)); + dprintf(("split: s=|%s|, a=%s, sep=|%s|\n", s, NN(ap->nval), fs)); ap->tval &= ~STR; ap->tval |= ARR; - ap->sval = (uchar *)makesymtab(NSYMTAB); + ap->sval = (char *)makesymtab(NSYMTAB); n = 0; - if (*s != '\0' && strlen((char *)fs) > 1 || (int)a[3] == REGEXPR) { + if (arg3type == REGEXPR && strlen((char *)((fa*)a[2])->restr) == 0) { + /* + * split(s, a, //); have to arrange things such that it looks + * like an empty separator. + */ + arg3type = 0; + fs = ""; + sep = 0; + } + if (*s != '\0' && (strlen(fs) > 1 || arg3type == REGEXPR)) { /* reg expr */ fa *pfa; - if ((int)a[3] == REGEXPR) { /* it's ready already */ + if (arg3type == REGEXPR) { /* it's ready already */ pfa = (fa *)a[2]; } else { pfa = makedfa(fs, 1); @@ -1242,12 +1443,12 @@ split(Node **a, int nnn) pfa->initstat = 2; do { n++; - (void) sprintf((char *)num, "%d", n); + (void) sprintf(num, "%d", n); temp = *patbeg; *patbeg = '\0'; if (is_number(s)) { (void) setsymtab(num, s, - atof((char *)s), + atof(s), /*LINTED align*/ STR|NUM, (Array *)ap->sval); } else { @@ -1259,19 +1460,22 @@ split(Node **a, int nnn) s = patbeg + patlen; if (*(patbeg+patlen-1) == 0 || *s == 0) { n++; - (void) sprintf((char *)num, "%d", n); - (void) setsymtab(num, (uchar *)"", 0.0, + (void) sprintf(num, "%d", n); + (void) setsymtab(num, "", 0.0, /*LINTED align*/ STR, (Array *)ap->sval); pfa->initstat = tempstat; goto spdone; } } while (nematch(pfa, s)); + /* bwk: has to be here to reset */ + /* cf gsub and refldbld */ + pfa->initstat = tempstat; } n++; - (void) sprintf((char *)num, "%d", n); + (void) sprintf(num, "%d", n); if (is_number(s)) { - (void) setsymtab(num, s, atof((char *)s), + (void) setsymtab(num, s, atof(s), /*LINTED align*/ STR|NUM, (Array *)ap->sval); } else { @@ -1284,7 +1488,7 @@ spdone: for (n = 0; ; ) { while (*s == ' ' || *s == '\t' || *s == '\n') s++; - if (*s == 0) + if (*s == '\0') break; n++; t = s; @@ -1295,9 +1499,9 @@ spdone: ; temp = *s; *s = '\0'; - (void) sprintf((char *)num, "%d", n); + (void) sprintf(num, "%d", n); if (is_number(t)) { - (void) setsymtab(num, t, atof((char *)t), + (void) setsymtab(num, t, atof(t), /*LINTED align*/ STR|NUM, (Array *)ap->sval); } else { @@ -1306,10 +1510,27 @@ spdone: STR, (Array *)ap->sval); } *s = temp; - if (*s != 0) + if (*s != '\0') s++; } - } else if (*s != 0) { + } else if (sep == '\0') { /* split(s, a, "") => 1 char/elem */ + for (n = 0; *s != 0; s++) { + char buf[2]; + n++; + (void) sprintf(num, "%d", n); + buf[0] = *s; + buf[1] = '\0'; + if (isdigit((uschar)buf[0])) { + (void) setsymtab(num, buf, atof(buf), + /*LINTED align*/ + STR|NUM, (Array *)ap->sval); + } else { + (void) setsymtab(num, buf, 0.0, + /*LINTED align*/ + STR, (Array *)ap->sval); + } + } + } else if (*s != '\0') { for (;;) { n++; t = s; @@ -1317,9 +1538,9 @@ spdone: s++; temp = *s; *s = '\0'; - (void) sprintf((char *)num, "%d", n); + (void) sprintf(num, "%d", n); if (is_number(t)) { - (void) setsymtab(num, t, atof((char *)t), + (void) setsymtab(num, t, atof(t), /*LINTED align*/ STR|NUM, (Array *)ap->sval); } else { @@ -1328,15 +1549,15 @@ spdone: STR, (Array *)ap->sval); } *s = temp; - if (*s++ == 0) + if (*s++ == '\0') break; } } - tempfree(ap, ""); - tempfree(y, ""); - if (a[2] != 0 && (int)a[3] == STRING) - tempfree(x, ""); - x = gettemp(""); + tempfree(ap); + tempfree(y); + free(origs); + free(origfs); + x = gettemp(); x->tval = NUM; x->fval = n; return (x); @@ -1344,16 +1565,16 @@ spdone: /*ARGSUSED*/ Cell * -condexpr(Node **a, int n) +condexpr(Node **a, int n) /* a[0] ? a[1] : a[2] */ { - register Cell *x; + Cell *x; x = execute(a[0]); if (istrue(x)) { - tempfree(x, ""); + tempfree(x); x = execute(a[1]); } else { - tempfree(x, ""); + tempfree(x); x = execute(a[2]); } return (x); @@ -1361,16 +1582,16 @@ condexpr(Node **a, int n) /*ARGSUSED*/ Cell * -ifstat(Node **a, int n) +ifstat(Node **a, int n) /* if (a[0]) a[1]; else a[2] */ { - register Cell *x; + Cell *x; x = execute(a[0]); if (istrue(x)) { - tempfree(x, ""); + tempfree(x); x = execute(a[1]); - } else if (a[2] != 0) { - tempfree(x, ""); + } else if (a[2] != NULL) { + tempfree(x); x = execute(a[2]); } return (x); @@ -1378,123 +1599,139 @@ ifstat(Node **a, int n) /*ARGSUSED*/ Cell * -whilestat(Node **a, int n) +whilestat(Node **a, int n) /* while (a[0]) a[1] */ { - register Cell *x; + Cell *x; for (;;) { x = execute(a[0]); if (!istrue(x)) return (x); - tempfree(x, ""); + tempfree(x); x = execute(a[1]); if (isbreak(x)) { - x = true; + x = True; return (x); } if (isnext(x) || isexit(x) || isret(x)) return (x); - tempfree(x, ""); + tempfree(x); } } /*ARGSUSED*/ Cell * -dostat(Node **a, int n) +dostat(Node **a, int n) /* do a[0]; while(a[1]) */ { - register Cell *x; + Cell *x; for (;;) { x = execute(a[0]); if (isbreak(x)) - return (true); + return (True); if (isnext(x) || isexit(x) || isret(x)) return (x); - tempfree(x, ""); + tempfree(x); x = execute(a[1]); if (!istrue(x)) return (x); - tempfree(x, ""); + tempfree(x); } } /*ARGSUSED*/ Cell * -forstat(Node **a, int n) +forstat(Node **a, int n) /* for (a[0]; a[1]; a[2]) a[3] */ { - register Cell *x; + Cell *x; x = execute(a[0]); - tempfree(x, ""); + tempfree(x); for (;;) { - if (a[1] != 0) { + if (a[1] != NULL) { x = execute(a[1]); if (!istrue(x)) return (x); else - tempfree(x, ""); + tempfree(x); } x = execute(a[3]); if (isbreak(x)) /* turn off break */ - return (true); + return (True); if (isnext(x) || isexit(x) || isret(x)) return (x); - tempfree(x, ""); + tempfree(x); x = execute(a[2]); - tempfree(x, ""); + tempfree(x); } } /*ARGSUSED*/ Cell * -instat(Node **a, int n) +instat(Node **a, int n) /* for (a[0] in a[1]) a[2] */ { - register Cell *x, *vp, *arrayp, *cp, *ncp; + Cell *x, *vp, *arrayp, *cp, *ncp; Array *tp; int i; vp = execute(a[0]); arrayp = execute(a[1]); - if (!isarr(arrayp)) - ERROR "%s is not an array", arrayp->nval FATAL; + if (!isarr(arrayp)) { + dprintf(("making %s into an array\n", arrayp->nval)); + if (freeable(arrayp)) + xfree(arrayp->sval); + arrayp->tval &= ~(STR|NUM|DONTFREE); + arrayp->tval |= ARR; + arrayp->sval = (char *)makesymtab(NSYMTAB); + } /*LINTED align*/ tp = (Array *)arrayp->sval; - tempfree(arrayp, ""); + tempfree(arrayp); for (i = 0; i < tp->size; i++) { /* this routine knows too much */ for (cp = tp->tab[i]; cp != NULL; cp = ncp) { (void) setsval(vp, cp->nval); ncp = cp->cnext; x = execute(a[2]); if (isbreak(x)) { - tempfree(vp, ""); - return (true); + tempfree(vp); + return (True); } if (isnext(x) || isexit(x) || isret(x)) { - tempfree(vp, ""); + tempfree(vp); return (x); } - tempfree(x, ""); + tempfree(x); } } - return (true); + return (True); } /*ARGSUSED*/ Cell * -bltin(Node **a, int n) +bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg list */ { - register Cell *x, *y; + Cell *x, *y; Awkfloat u; - register int t; - uchar *p, *buf; + int t; + Awkfloat tmp; + char *p, *buf; Node *nextarg; + FILE *fp; + void flush_all(void); + int status = 0; - t = (int)a[0]; + t = ptoi(a[0]); x = execute(a[1]); nextarg = a[1]->nnext; switch (t) { case FLENGTH: - u = (Awkfloat)strlen((char *)getsval(x)); break; + if (isarr(x)) { + /* LINTED align */ + u = ((Array *)x->sval)->nelem; + } else { + u = strlen(getsval(x)); + } + break; case FLOG: u = errcheck(log(getfval(x)), "log"); break; case FINT: @@ -1508,60 +1745,82 @@ bltin(Node **a, int n) case FCOS: u = cos(getfval(x)); break; case FATAN: - if (nextarg == 0) { - ERROR "atan2 requires two arguments; returning 1.0" - WARNING; + if (nextarg == NULL) { + WARNING("atan2 requires two arguments; returning 1.0"); u = 1.0; } else { y = execute(a[1]->nnext); u = atan2(getfval(x), getfval(y)); - tempfree(y, ""); + tempfree(y); nextarg = nextarg->nnext; } break; case FSYSTEM: /* in case something is buffered already */ (void) fflush(stdout); - /* 256 is unix-dep */ - u = (Awkfloat)system((char *)getsval(x)) / 256; + status = system(getsval(x)); + u = status; + if (status != -1) { + if (WIFEXITED(status)) { + u = WEXITSTATUS(status); + } else if (WIFSIGNALED(status)) { + u = WTERMSIG(status) + 256; + if (WCOREDUMP(status)) + u += 256; + } else /* something else?!? */ + u = 0; + } break; case FRAND: - u = (Awkfloat)(rand() % 32767) / 32767.0; + /* in principle, rand() returns something in 0..RAND_MAX */ + u = (Awkfloat) (rand() % RAND_MAX) / RAND_MAX; break; case FSRAND: - if (x->tval & REC) /* no argument provided */ + if (isrec(x)) /* no argument provided */ u = time((time_t *)0); else u = getfval(x); - srand((int)u); u = (int)u; + tmp = u; + srand((unsigned int) u); + u = srand_seed; + srand_seed = tmp; break; case FTOUPPER: case FTOLOWER: buf = tostring(getsval(x)); if (t == FTOUPPER) { for (p = buf; *p; p++) - if (islower(*p)) - *p = toupper(*p); + if (islower((uschar)*p)) + *p = toupper((uschar)*p); } else { for (p = buf; *p; p++) - if (isupper(*p)) - *p = tolower(*p); + if (isupper((uschar)*p)) + *p = tolower((uschar)*p); } - tempfree(x, ""); - x = gettemp(""); + tempfree(x); + x = gettemp(); (void) setsval(x, buf); free(buf); return (x); + case FFLUSH: + if (isrec(x) || strlen(getsval(x)) == 0) { + flush_all(); /* fflush() or fflush("") -> all */ + u = 0; + } else if ((fp = openfile(FFLUSH, getsval(x))) == NULL) + u = EOF; + else + u = fflush(fp); + break; default: /* can't happen */ - ERROR "illegal function type %d", t FATAL; + FATAL("illegal function type %d", t); break; } - tempfree(x, ""); - x = gettemp(""); + tempfree(x); + x = gettemp(); (void) setfval(x, u); - if (nextarg != 0) { - ERROR "warning: function has too many arguments" WARNING; - for (; nextarg; nextarg = nextarg->nnext) + if (nextarg != NULL) { + WARNING("warning: function has too many arguments"); + for (; nextarg != NULL; nextarg = nextarg->nnext) (void) execute(nextarg); } return (x); @@ -1569,28 +1828,30 @@ bltin(Node **a, int n) /*ARGSUSED*/ Cell * -print(Node **a, int n) +printstat(Node **a, int n) /* print a[0] */ { - register Node *x; - register Cell *y; + Node *x; + Cell *y; FILE *fp; - if (a[1] == 0) + if (a[1] == NULL) /* a[1] is redirection operator, a[2] is file */ fp = stdout; else - fp = redirect((int)a[1], a[2]); + fp = redirect(ptoi(a[1]), a[2]); for (x = a[0]; x != NULL; x = x->nnext) { y = execute(x); - (void) fputs((char *)getsval(y), fp); - tempfree(y, ""); + (void) fputs(getpssval(y), fp); + tempfree(y); if (x->nnext == NULL) - (void) fputs((char *)*ORS, fp); + (void) fputs(getsval(orsloc), fp); else - (void) fputs((char *)*OFS, fp); + (void) fputs(getsval(ofsloc), fp); } - if (a[1] != 0) + if (a[1] != NULL) (void) fflush(fp); - return (true); + if (ferror(fp)) + FATAL("write error on %s", filename(fp)); + return (True); } /*ARGSUSED*/ @@ -1600,67 +1861,100 @@ nullproc(Node **a, int n) return (0); } -struct { - FILE *fp; - uchar *fname; - int mode; /* '|', 'a', 'w' */ -} files[FOPEN_MAX]; static FILE * -redirect(int a, Node *b) +redirect(int a, Node *b) /* set up all i/o redirections */ { FILE *fp; Cell *x; - uchar *fname; + char *fname; x = execute(b); fname = getsval(x); fp = openfile(a, fname); if (fp == NULL) - ERROR "can't open file %s", fname FATAL; - tempfree(x, ""); + FATAL("can't open file %s", fname); + tempfree(x); return (fp); } +struct files { + FILE *fp; + const char *fname; + int mode; /* '|', 'a', 'w' => LE/LT, GT */ +} *files; + +int nfiles; + +void +stdinit(void) /* in case stdin, etc., are not constants */ +{ + nfiles = FOPEN_MAX; + files = calloc(nfiles, sizeof (*files)); + if (files == NULL) + FATAL("can't allocate file memory for %u files", nfiles); + files[0].fp = stdin; + files[0].fname = "/dev/stdin"; + files[0].mode = LT; + files[1].fp = stdout; + files[1].fname = "/dev/stdout"; + files[1].mode = GT; + files[2].fp = stderr; + files[2].fname = "/dev/stderr"; + files[2].mode = GT; +} + static FILE * -openfile(int a, uchar *s) +openfile(int a, const char *s) { - register int i, m; - register FILE *fp; + int i, m; + FILE *fp = NULL; if (*s == '\0') - ERROR "null file name in print or getline" FATAL; - for (i = 0; i < FOPEN_MAX; i++) { - if (files[i].fname && - strcmp((char *)s, (char *)files[i].fname) == 0) { + FATAL("null file name in print or getline"); + for (i = 0; i < nfiles; i++) { + if (files[i].fname && strcmp(s, files[i].fname) == 0) { if (a == files[i].mode || - a == APPEND && files[i].mode == GT) { + (a == APPEND && files[i].mode == GT)) { return (files[i].fp); } + if (a == FFLUSH) + return (files[i].fp); } } - for (i = 0; i < FOPEN_MAX; i++) { + if (a == FFLUSH) /* didn't find it, so don't create it! */ + return (NULL); + + for (i = 0; i < nfiles; i++) { if (files[i].fp == 0) break; } - if (i >= FOPEN_MAX) - ERROR "%s makes too many open files", s FATAL; + if (i >= nfiles) { + struct files *nf; + int nnf = nfiles + FOPEN_MAX; + nf = realloc(files, nnf * sizeof (*nf)); + if (nf == NULL) + FATAL("cannot grow files for %s and %d files", s, nnf); + (void) memset(&nf[nfiles], 0, FOPEN_MAX * sizeof (*nf)); + nfiles = nnf; + files = nf; + } (void) fflush(stdout); /* force a semblance of order */ m = a; if (a == GT) { - fp = fopen((char *)s, "w"); + fp = fopen(s, "wF"); } else if (a == APPEND) { - fp = fopen((char *)s, "a"); + fp = fopen(s, "aF"); m = GT; /* so can mix > and >> */ } else if (a == '|') { /* output pipe */ - fp = popen((char *)s, "w"); + fp = popen(s, "wF"); } else if (a == LE) { /* input pipe */ - fp = popen((char *)s, "r"); + fp = popen(s, "rF"); } else if (a == LT) { /* getline <file */ - fp = strcmp((char *)s, "-") == 0 ? - stdin : fopen((char *)s, "r"); /* "-" is stdin */ + fp = strcmp(s, "-") == 0 ? + stdin : fopen(s, "rF"); /* "-" is stdin */ } else /* can't happen */ - ERROR "illegal redirection" FATAL; + FATAL("illegal redirection %d", a); if (fp != NULL) { files[i].fname = tostring(s); files[i].fp = fp; @@ -1669,38 +1963,52 @@ openfile(int a, uchar *s) return (fp); } +const char * +filename(FILE *fp) +{ + int i; + + for (i = 0; i < nfiles; i++) + if (fp == files[i].fp) + return (files[i].fname); + return ("???"); +} + /*ARGSUSED*/ Cell * closefile(Node **a, int n) { - register Cell *x; + Cell *x; int i, stat; x = execute(a[0]); (void) getsval(x); - for (i = 0; i < FOPEN_MAX; i++) { - if (files[i].fname && - strcmp((char *)x->sval, (char *)files[i].fname) == 0) { + stat = -1; + for (i = 0; i < nfiles; i++) { + if (files[i].fname && strcmp(x->sval, files[i].fname) == 0) { if (ferror(files[i].fp)) { - ERROR "i/o error occurred on %s", - files[i].fname WARNING; + WARNING("i/o error occurred on %s", + files[i].fname); } if (files[i].mode == '|' || files[i].mode == LE) stat = pclose(files[i].fp); else stat = fclose(files[i].fp); if (stat == EOF) { - ERROR "i/o error occurred closing %s", - files[i].fname WARNING; + WARNING("i/o error occurred closing %s", + files[i].fname); } - xfree(files[i].fname); + if (i > 2) /* don't do /dev/std... */ + xfree(files[i].fname); /* watch out for ref thru this */ files[i].fname = NULL; files[i].fp = NULL; } } - tempfree(x, "close"); - return (true); + tempfree(x); + x = gettemp(); + (void) setfval(x, (Awkfloat) stat); + return (x); } static void @@ -1708,197 +2016,257 @@ closeall(void) { int i, stat; - for (i = 0; i < FOPEN_MAX; i++) { + for (i = 0; i < nfiles; i++) { if (files[i].fp) { if (ferror(files[i].fp)) { - ERROR "i/o error occurred on %s", - files[i].fname WARNING; + WARNING("i/o error occurred on %s", + files[i].fname); } if (files[i].mode == '|' || files[i].mode == LE) stat = pclose(files[i].fp); else stat = fclose(files[i].fp); if (stat == EOF) { - ERROR "i/o error occurred while closing %s", - files[i].fname WARNING; + WARNING("i/o error occurred while closing %s", + files[i].fname); } } } } +void +flush_all(void) +{ + int i; + + for (i = 0; i < nfiles; i++) + if (files[i].fp) + (void) fflush(files[i].fp); +} + /*ARGSUSED*/ Cell * -sub(Node **a, int nnn) +sub(Node **a, int nnn) /* substitute command */ { - register uchar *sptr; - register Cell *x, *y, *result; - uchar *buf, *t; + char *sptr, *pb, *q; + Cell *x, *y, *result; + char *t, *buf; fa *pfa; - size_t bsize, cnt, len; + size_t bufsz = recsize; + if ((buf = (char *)malloc(bufsz)) == NULL) + FATAL("out of memory in sub"); x = execute(a[3]); /* target string */ t = getsval(x); - if (a[0] == 0) + if (a[0] == NULL) /* 0 => a[1] is already-compiled regexpr */ pfa = (fa *)a[1]; /* regular expression */ else { y = execute(a[1]); pfa = makedfa(getsval(y), 1); - tempfree(y, ""); + tempfree(y); } y = execute(a[2]); /* replacement string */ - result = false; + result = False; if (pmatch(pfa, t)) { - init_buf(&buf, &bsize, LINE_INCR); - cnt = 0; sptr = t; - len = patbeg - sptr; - if (len > 0) { - expand_buf(&buf, &bsize, cnt + len); - (void) memcpy(buf, sptr, len); - cnt += len; - } + (void) adjbuf(&buf, &bufsz, + 1 + patbeg - sptr, recsize, 0, "sub"); + pb = buf; + while (sptr < patbeg) + *pb++ = *sptr++; sptr = getsval(y); - while (*sptr != 0) { - expand_buf(&buf, &bsize, cnt); - if (*sptr == '\\' && - (*(sptr+1) == '&' || *(sptr+1) == '\\')) { - sptr++; /* skip \, */ - buf[cnt++] = *sptr++; /* add & or \ */ + while (*sptr != '\0') { + (void) adjbuf(&buf, &bufsz, 5 + pb - buf, + recsize, &pb, "sub"); + if (*sptr == '\\') { + backsub(&pb, &sptr); } else if (*sptr == '&') { - expand_buf(&buf, &bsize, cnt + patlen); sptr++; - (void) memcpy(&buf[cnt], patbeg, patlen); - cnt += patlen; + (void) adjbuf(&buf, &bufsz, + 1 + patlen + pb - buf, recsize, &pb, "sub"); + for (q = patbeg; q < patbeg+patlen; ) + *pb++ = *q++; } else { - buf[cnt++] = *sptr++; + *pb++ = *sptr++; } } + *pb = '\0'; + if (pb > buf + bufsz) + FATAL("sub result1 %.30s too big; can't happen", buf); sptr = patbeg + patlen; if ((patlen == 0 && *patbeg) || (patlen && *(sptr-1))) { - len = strlen((char *)sptr); - expand_buf(&buf, &bsize, cnt + len); - (void) memcpy(&buf[cnt], sptr, len); - cnt += len; + (void) adjbuf(&buf, &bufsz, + 1 + strlen(sptr) + pb - buf, 0, &pb, "sub"); + while ((*pb++ = *sptr++) != '\0') + ; } - buf[cnt] = '\0'; - (void) setsval(x, buf); - free(buf); - result = true; + if (pb > buf + bufsz) + FATAL("sub result2 %.30s too big; can't happen", buf); + (void) setsval(x, buf); /* BUG: should be able to avoid copy */ + result = True; } - tempfree(x, ""); - tempfree(y, ""); + tempfree(x); + tempfree(y); + free(buf); return (result); } /*ARGSUSED*/ Cell * -gsub(Node **a, int nnn) +gsub(Node **a, int nnn) /* global substitute */ { - register Cell *x, *y; - register uchar *rptr, *sptr, *t; - uchar *buf; - register fa *pfa; + Cell *x, *y; + char *rptr, *sptr, *t, *pb, *q; + char *buf; + fa *pfa; int mflag, tempstat, num; - size_t bsize, cnt, len; + size_t bufsz = recsize; + if ((buf = (char *)malloc(bufsz)) == NULL) + FATAL("out of memory in gsub"); mflag = 0; /* if mflag == 0, can replace empty string */ num = 0; x = execute(a[3]); /* target string */ t = getsval(x); - if (a[0] == 0) - pfa = (fa *) a[1]; /* regular expression */ + if (a[0] == NULL) /* 0 => a[1] is already-compiled regexpr */ + pfa = (fa *)a[1]; /* regular expression */ else { y = execute(a[1]); pfa = makedfa(getsval(y), 1); - tempfree(y, ""); + tempfree(y); } y = execute(a[2]); /* replacement string */ if (pmatch(pfa, t)) { tempstat = pfa->initstat; pfa->initstat = 2; - init_buf(&buf, &bsize, LINE_INCR); + pb = buf; rptr = getsval(y); - cnt = 0; do { - if (patlen == 0 && *patbeg != 0) { + if (patlen == 0 && *patbeg != '\0') { /* matched empty string */ if (mflag == 0) { /* can replace empty */ num++; sptr = rptr; - while (*sptr != 0) { - expand_buf(&buf, &bsize, cnt); - if (*sptr == '\\' && - (*(sptr+1) == '&' || - *(sptr+1) == '\\')) { - sptr++; - buf[cnt++] = *sptr++; + while (*sptr != '\0') { + (void) adjbuf(&buf, &bufsz, + 5 + pb - buf, recsize, + &pb, "gsub"); + if (*sptr == '\\') { + backsub(&pb, &sptr); } else if (*sptr == '&') { - expand_buf(&buf, - &bsize, - cnt + patlen); sptr++; - (void) memcpy(&buf[cnt], - patbeg, patlen); - cnt += patlen; + (void) adjbuf(&buf, + &bufsz, + 1+patlen+pb-buf, + recsize, + &pb, "gsub"); + for ( + q = patbeg; + q < patbeg+patlen; + *pb++ = *q++) + ; } else { - buf[cnt++] = *sptr++; + *pb++ = *sptr++; } } } - if (*t == 0) /* at end */ + if (*t == '\0') /* at end */ goto done; - expand_buf(&buf, &bsize, cnt); - buf[cnt++] = *t++; + (void) adjbuf(&buf, &bufsz, + 2 + pb - buf, recsize, &pb, "gsub"); + *pb++ = *t++; + /* BUG: not sure of this test */ + if (pb > buf + bufsz) + FATAL("gsub result0 %.30s too big; " + "can't happen", buf); mflag = 0; } else { /* matched nonempty string */ num++; sptr = t; - len = patbeg - sptr; - if (len > 0) { - expand_buf(&buf, &bsize, cnt + len); - (void) memcpy(&buf[cnt], sptr, len); - cnt += len; - } + (void) adjbuf(&buf, &bufsz, + 1 + (patbeg - sptr) + pb - buf, + recsize, &pb, "gsub"); + while (sptr < patbeg) + *pb++ = *sptr++; sptr = rptr; - while (*sptr != 0) { - expand_buf(&buf, &bsize, cnt); - if (*sptr == '\\' && - (*(sptr+1) == '&' || - *(sptr+1) == '\\')) { - sptr++; - buf[cnt++] = *sptr++; + while (*sptr != '\0') { + (void) adjbuf(&buf, &bufsz, + 5 + pb - buf, recsize, &pb, "gsub"); + if (*sptr == '\\') { + backsub(&pb, &sptr); } else if (*sptr == '&') { - expand_buf(&buf, &bsize, - cnt + patlen); sptr++; - (void) memcpy(&buf[cnt], - patbeg, patlen); - cnt += patlen; + (void) adjbuf(&buf, &bufsz, + 1 + patlen + pb - buf, + recsize, &pb, "gsub"); + for ( + q = patbeg; + q < patbeg+patlen; + *pb++ = *q++) + ; } else { - buf[cnt++] = *sptr++; + *pb++ = *sptr++; } } t = patbeg + patlen; - if ((*(t-1) == 0) || (*t == 0)) + if (patlen == 0 || *(t-1) == '\0' || *t == '\0') goto done; + if (pb > buf + bufsz) + FATAL("gsub result1 %.30s too big; " + "can't happen", buf); mflag = 1; } } while (pmatch(pfa, t)); sptr = t; - len = strlen((char *)sptr); - expand_buf(&buf, &bsize, len + cnt); - (void) memcpy(&buf[cnt], sptr, len); - cnt += len; + (void) adjbuf(&buf, &bufsz, + 1 + strlen(sptr) + pb - buf, 0, &pb, "gsub"); + while ((*pb++ = *sptr++) != '\0') + ; done: - buf[cnt] = '\0'; + if (pb < buf + bufsz) + *pb = '\0'; + else if (*(pb-1) != '\0') + FATAL("gsub result2 %.30s truncated; " + "can't happen", buf); + /* BUG: should be able to avoid copy + free */ (void) setsval(x, buf); - free(buf); pfa->initstat = tempstat; } - tempfree(x, ""); - tempfree(y, ""); - x = gettemp(""); + tempfree(x); + tempfree(y); + x = gettemp(); x->tval = NUM; x->fval = num; + free(buf); return (x); } + +/* + * handle \\& variations; sptr[0] == '\\' + */ +static void +backsub(char **pb_ptr, char **sptr_ptr) +{ + char *pb = *pb_ptr, *sptr = *sptr_ptr; + + if (sptr[1] == '\\') { + if (sptr[2] == '\\' && sptr[3] == '&') { /* \\\& -> \& */ + *pb++ = '\\'; + *pb++ = '&'; + sptr += 4; + } else if (sptr[2] == '&') { /* \\& -> \ + matched */ + *pb++ = '\\'; + sptr += 2; + } else { /* \\x -> \\x */ + *pb++ = *sptr++; + *pb++ = *sptr++; + } + } else if (sptr[1] == '&') { /* literal & */ + sptr++; + *pb++ = *sptr++; + } else /* literal \ */ + *pb++ = *sptr++; + + *pb_ptr = pb; + *sptr_ptr = sptr; +} diff --git a/usr/src/cmd/awk/tran.c b/usr/src/cmd/awk/tran.c index e8e42d780e..ba9a685d93 100644 --- a/usr/src/cmd/awk/tran.c +++ b/usr/src/cmd/awk/tran.c @@ -1,4 +1,28 @@ /* + * Copyright (C) Lucent Technologies 1997 + * All Rights Reserved + * + * Permission to use, copy, modify, and distribute this software and + * its documentation for any purpose and without fee is hereby + * granted, provided that the above copyright notice appear in all + * copies and that both that the copyright notice and this + * permission notice and warranty disclaimer appear in supporting + * documentation, and that the name Lucent Technologies or any of + * its entities not be used in advertising or publicity pertaining + * to distribution of the software without specific, written prior + * permission. + * + * LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, + * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. + * IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY + * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER + * IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, + * ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF + * THIS SOFTWARE. + */ + +/* * CDDL HEADER START * * The contents of this file are subject to the terms of the @@ -27,13 +51,12 @@ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ /* All Rights Reserved */ -#pragma ident "%Z%%M% %I% %E% SMI" - #define DEBUG #include <stdio.h> -#include <stdlib.h> +#include <math.h> #include <ctype.h> #include <string.h> +#include <stdlib.h> #include "awk.h" #include "y.tab.h" @@ -42,107 +65,114 @@ Array *symtab; /* main symbol table */ -uchar **FS; /* initial field sep */ -uchar **RS; /* initial record sep */ -uchar **OFS; /* output field sep */ -uchar **ORS; /* output record sep */ -uchar **OFMT; /* output format for numbers */ +char **FS; /* initial field sep */ +char **RS; /* initial record sep */ +char **OFS; /* output field sep */ +char **ORS; /* output record sep */ +char **OFMT; /* output format for numbers */ +char **CONVFMT; /* format for conversions in getsval */ Awkfloat *NF; /* number of fields in current record */ Awkfloat *NR; /* number of current record */ Awkfloat *FNR; /* number of current record in current file */ -uchar **FILENAME; /* current filename argument */ +char **FILENAME; /* current filename argument */ Awkfloat *ARGC; /* number of arguments from command line */ -uchar **SUBSEP; /* subscript separator for a[i,j,k]; default \034 */ +char **SUBSEP; /* subscript separator for a[i,j,k]; default \034 */ Awkfloat *RSTART; /* start of re matched with ~; origin 1 (!) */ Awkfloat *RLENGTH; /* length of same */ Cell *recloc; /* location of record */ +Cell *fsloc; /* FS */ Cell *nrloc; /* NR */ Cell *nfloc; /* NF */ Cell *fnrloc; /* FNR */ +Cell *ofsloc; /* OFS */ +Cell *orsloc; /* ORS */ +Cell *rsloc; /* RS */ +Cell *rtloc; /* RT */ Array *ARGVtab; /* symbol table containing ARGV[...] */ Array *ENVtab; /* symbol table containing ENVIRON[...] */ Cell *rstartloc; /* RSTART */ Cell *rlengthloc; /* RLENGTH */ +Cell *subseploc; /* SUBSEP */ Cell *symtabloc; /* SYMTAB */ -Cell *nullloc; +Cell *nullloc; /* a guaranteed empty cell */ Node *nullnode; /* zero&null, converted into a node for comparisons */ +Cell *literal0; static void rehash(Array *); -void -syminit(void) +static void +setfree(Cell *vp) { - init_buf(&record, &record_size, LINE_INCR); + if (&vp->sval == FS || &vp->sval == RS || + &vp->sval == OFS || &vp->sval == ORS || + &vp->sval == OFMT || &vp->sval == CONVFMT || + &vp->sval == FILENAME || &vp->sval == SUBSEP) + vp->tval |= DONTFREE; + else + vp->tval &= ~DONTFREE; +} +void +syminit(void) /* initialize symbol table with builtin vars */ +{ /* initialize $0 */ - recloc = getfld(0); - recloc->nval = (uchar *)"$0"; + recloc = fieldadr(0); + recloc->nval = "$0"; recloc->sval = record; recloc->tval = REC|STR|DONTFREE; - symtab = makesymtab(NSYMTAB); - (void) setsymtab((uchar *)"0", (uchar *)"0", 0.0, - NUM|STR|CON|DONTFREE, symtab); + literal0 = setsymtab("0", "0", 0.0, NUM|STR|CON|DONTFREE, symtab); /* this is used for if(x)... tests: */ - nullloc = setsymtab((uchar *)"$zero&null", (uchar *)"", 0.0, + nullloc = setsymtab("$zero&null", "", 0.0, NUM|STR|CON|DONTFREE, symtab); - nullnode = valtonode(nullloc, CCON); - FS = &setsymtab((uchar *)"FS", (uchar *)" ", 0.0, - STR|DONTFREE, symtab)->sval; - RS = &setsymtab((uchar *)"RS", (uchar *)"\n", 0.0, - STR|DONTFREE, symtab)->sval; - OFS = &setsymtab((uchar *)"OFS", (uchar *)" ", 0.0, - STR|DONTFREE, symtab)->sval; - ORS = &setsymtab((uchar *)"ORS", (uchar *)"\n", 0.0, + nullnode = celltonode(nullloc, CCON); + + fsloc = setsymtab("FS", " ", 0.0, STR|DONTFREE, symtab); + FS = &fsloc->sval; + rsloc = setsymtab("RS", "\n", 0.0, STR|DONTFREE, symtab); + RS = &rsloc->sval; + rtloc = setsymtab("RT", "", 0.0, STR|DONTFREE, symtab); + ofsloc = setsymtab("OFS", " ", 0.0, STR|DONTFREE, symtab); + OFS = &ofsloc->sval; + orsloc = setsymtab("ORS", "\n", 0.0, STR|DONTFREE, symtab); + ORS = &orsloc->sval; + OFMT = &setsymtab("OFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval; + CONVFMT = &setsymtab("CONVFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval; - OFMT = &setsymtab((uchar *)"OFMT", (uchar *)"%.6g", 0.0, - STR|DONTFREE, symtab)->sval; - FILENAME = &setsymtab((uchar *)"FILENAME", (uchar *)"-", 0.0, - STR|DONTFREE, symtab)->sval; - nfloc = setsymtab((uchar *)"NF", (uchar *)"", 0.0, NUM, symtab); + FILENAME = &setsymtab("FILENAME", "", 0.0, STR|DONTFREE, symtab)->sval; + nfloc = setsymtab("NF", "", 0.0, NUM, symtab); NF = &nfloc->fval; - nrloc = setsymtab((uchar *)"NR", (uchar *)"", 0.0, NUM, symtab); + nrloc = setsymtab("NR", "", 0.0, NUM, symtab); NR = &nrloc->fval; - fnrloc = setsymtab((uchar *)"FNR", (uchar *)"", 0.0, NUM, symtab); + fnrloc = setsymtab("FNR", "", 0.0, NUM, symtab); FNR = &fnrloc->fval; - SUBSEP = &setsymtab((uchar *)"SUBSEP", (uchar *)"\034", 0.0, - STR|DONTFREE, symtab)->sval; - rstartloc = setsymtab((uchar *)"RSTART", (uchar *)"", 0.0, - NUM, symtab); + subseploc = setsymtab("SUBSEP", "\034", 0.0, STR|DONTFREE, symtab); + SUBSEP = &subseploc->sval; + rstartloc = setsymtab("RSTART", "", 0.0, NUM, symtab); RSTART = &rstartloc->fval; - rlengthloc = setsymtab((uchar *)"RLENGTH", (uchar *)"", 0.0, - NUM, symtab); + rlengthloc = setsymtab("RLENGTH", "", 0.0, NUM, symtab); RLENGTH = &rlengthloc->fval; - symtabloc = setsymtab((uchar *)"SYMTAB", (uchar *)"", 0.0, ARR, symtab); - symtabloc->sval = (uchar *)symtab; + symtabloc = setsymtab("SYMTAB", "", 0.0, ARR, symtab); + symtabloc->sval = (char *)symtab; } void -arginit(int ac, uchar *av[]) +arginit(int ac, char **av) /* set up ARGV and ARGC */ { Cell *cp; int i; - uchar temp[11]; - - /* first make FILENAME first real argument */ - for (i = 1; i < ac; i++) { - if (!isclvar(av[i])) { - (void) setsval(lookup((uchar *)"FILENAME", symtab), - av[i]); - break; - } - } - ARGC = &setsymtab((uchar *)"ARGC", (uchar *)"", (Awkfloat)ac, - NUM, symtab)->fval; - cp = setsymtab((uchar *)"ARGV", (uchar *)"", 0.0, ARR, symtab); + char temp[50]; + + ARGC = &setsymtab("ARGC", "", (Awkfloat)ac, NUM, symtab)->fval; + cp = setsymtab("ARGV", "", 0.0, ARR, symtab); ARGVtab = makesymtab(NSYMTAB); /* could be (int) ARGC as well */ - cp->sval = (uchar *) ARGVtab; + cp->sval = (char *)ARGVtab; for (i = 0; i < ac; i++) { - (void) sprintf((char *)temp, "%d", i); + (void) sprintf(temp, "%d", i); if (is_number(*av)) { - (void) setsymtab(temp, *av, atof((const char *)*av), + (void) setsymtab(temp, *av, atof(*av), STR|NUM, ARGVtab); } else { (void) setsymtab(temp, *av, 0.0, STR, ARGVtab); @@ -152,20 +182,22 @@ arginit(int ac, uchar *av[]) } void -envinit(uchar *envp[]) +envinit(char **envp) /* set up ENVIRON variable */ { Cell *cp; - uchar *p; + char *p; - cp = setsymtab((uchar *)"ENVIRON", (uchar *)"", 0.0, ARR, symtab); + cp = setsymtab("ENVIRON", "", 0.0, ARR, symtab); ENVtab = makesymtab(NSYMTAB); - cp->sval = (uchar *) ENVtab; + cp->sval = (char *)ENVtab; for (; *envp; envp++) { - if ((p = (uchar *)strchr((char *)*envp, '=')) == NULL) + if ((p = strchr(*envp, '=')) == NULL) + continue; + if (p == *envp) /* no left hand side name in env string */ continue; *p++ = 0; /* split into two strings at = */ if (is_number(p)) { - (void) setsymtab(*envp, p, atof((const char *)p), + (void) setsymtab(*envp, p, atof(p), STR|NUM, ENVtab); } else { (void) setsymtab(*envp, p, 0.0, STR, ENVtab); @@ -176,7 +208,7 @@ envinit(uchar *envp[]) } Array * -makesymtab(int n) +makesymtab(int n) /* make a new symbol table */ { Array *ap; Cell **tp; @@ -184,7 +216,7 @@ makesymtab(int n) ap = (Array *)malloc(sizeof (Array)); tp = (Cell **)calloc(n, sizeof (Cell *)); if (ap == NULL || tp == NULL) - ERROR "out of space in makesymtab" FATAL; + FATAL("out of space in makesymtab"); ap->nelem = 0; ap->size = n; ap->tab = tp; @@ -192,9 +224,9 @@ makesymtab(int n) } void -freesymtab(Cell *ap) /* free symbol table */ +freesymtab(Cell *ap) /* free a symbol table */ { - Cell *cp, *next; + Cell *cp, *temp; Array *tp; int i; @@ -205,20 +237,26 @@ freesymtab(Cell *ap) /* free symbol table */ if (tp == NULL) return; for (i = 0; i < tp->size; i++) { - for (cp = tp->tab[i]; cp != NULL; cp = next) { - next = cp->cnext; + for (cp = tp->tab[i]; cp != NULL; cp = temp) { xfree(cp->nval); if (freeable(cp)) xfree(cp->sval); + temp = cp->cnext; /* avoids freeing then using */ free(cp); + tp->nelem--; } + tp->tab[i] = 0; + } + if (tp->nelem != 0) { + WARNING("can't happen: inconsistent element count freeing %s", + ap->nval); } free(tp->tab); free(tp); } void -freeelem(Cell *ap, uchar *s) /* free elem s from ap (i.e., ap["s"] */ +freeelem(Cell *ap, const char *s) /* free elem s from ap (i.e., ap["s"] */ { Array *tp; Cell *p, *prev = NULL; @@ -228,7 +266,7 @@ freeelem(Cell *ap, uchar *s) /* free elem s from ap (i.e., ap["s"] */ tp = (Array *)ap->sval; h = hash(s, tp->size); for (p = tp->tab[h]; p != NULL; prev = p, p = p->cnext) - if (strcmp((char *)s, (char *)p->nval) == 0) { + if (strcmp(s, p->nval) == 0) { if (prev == NULL) /* 1st one */ tp->tab[h] = p->cnext; else /* middle somewhere */ @@ -243,41 +281,40 @@ freeelem(Cell *ap, uchar *s) /* free elem s from ap (i.e., ap["s"] */ } Cell * -setsymtab(uchar *n, uchar *s, Awkfloat f, unsigned int t, Array *tp) +setsymtab(const char *n, const char *s, Awkfloat f, unsigned int t, Array *tp) { - register int h; - register Cell *p; + int h; + Cell *p; if (n != NULL && (p = lookup(n, tp)) != NULL) { - dprintf(("setsymtab found %p: n=%s", (void *)p, p->nval)); - dprintf((" s=\"%s\" f=%g t=%p\n", - p->sval, p->fval, (void *)p->tval)); + dprintf(("setsymtab found %p: n=%s s=\"%s\" f=%g t=%o\n", + (void *)p, NN(p->nval), NN(p->sval), p->fval, p->tval)); return (p); } p = (Cell *)malloc(sizeof (Cell)); if (p == NULL) - ERROR "symbol table overflow at %s", n FATAL; + FATAL("out of space for symbol table at %s", n); p->nval = tostring(n); - p->sval = s ? tostring(s) : tostring((uchar *)""); + p->sval = s ? tostring(s) : tostring(""); p->fval = f; p->tval = t; - p->csub = 0; - + p->csub = CUNK; + p->ctype = OCELL; tp->nelem++; if (tp->nelem > FULLTAB * tp->size) rehash(tp); h = hash(n, tp->size); p->cnext = tp->tab[h]; tp->tab[h] = p; - dprintf(("setsymtab set %p: n=%s", (void *)p, p->nval)); - dprintf((" s=\"%s\" f=%g t=%p\n", p->sval, p->fval, (void *)p->tval)); + dprintf(("setsymtab set %p: n=%s s=\"%s\" f=%g t=%o\n", + (void *)p, p->nval, p->sval, p->fval, p->tval)); return (p); } int -hash(uchar *s, int n) /* form hash value for string s */ +hash(const char *s, int n) /* form hash value for string s */ { - register unsigned hashval; + unsigned int hashval; for (hashval = 0; *s != '\0'; s++) hashval = (*s + 31 * hashval); @@ -292,10 +329,10 @@ rehash(Array *tp) /* rehash items in small table into big one */ nsz = GROWTAB * tp->size; np = (Cell **)calloc(nsz, sizeof (Cell *)); - if (np == NULL) - ERROR "out of space in rehash" FATAL; + if (np == NULL) /* can't do it, but can keep running. */ + return; /* someone else will run out later. */ for (i = 0; i < tp->size; i++) { - for (cp = tp->tab[i]; cp; cp = op) { + for (cp = tp->tab[i]; cp != NULL; cp = op) { op = cp->cnext; nh = hash(cp->nval, nsz); cp->cnext = np[nh]; @@ -308,177 +345,278 @@ rehash(Array *tp) /* rehash items in small table into big one */ } Cell * -lookup(uchar *s, Array *tp) /* look for s in tp */ +lookup(const char *s, Array *tp) /* look for s in tp */ { - register Cell *p; + Cell *p; int h; h = hash(s, tp->size); for (p = tp->tab[h]; p != NULL; p = p->cnext) { - if (strcmp((char *)s, (char *)p->nval) == 0) + if (strcmp(s, p->nval) == 0) return (p); /* found it */ } return (NULL); /* not found */ } Awkfloat -setfval(Cell *vp, Awkfloat f) +setfval(Cell *vp, Awkfloat f) /* set float val of a Cell */ { - int i; + int fldno; + f += 0.0; /* normalise negative zero to positive zero */ if ((vp->tval & (NUM | STR)) == 0) funnyvar(vp, "assign to"); - if (vp->tval & FLD) { + if (isfld(vp)) { donerec = 0; /* mark $0 invalid */ - i = fldidx(vp); - if (i > *NF) - newfld(i); - dprintf(("setting field %d to %g\n", i, f)); - } else if (vp->tval & REC) { + fldno = atoi(vp->nval); + if (fldno > *NF) + newfld(fldno); + dprintf(("setting field %d to %g\n", fldno, f)); + } else if (&vp->fval == NF) { + donerec = 0; /* mark $0 invalid */ + setlastfld((int)f); + dprintf(("setting NF to %g\n", f)); + } else if (isrec(vp)) { donefld = 0; /* mark $1... invalid */ donerec = 1; + savefs(); + } else if (vp == ofsloc) { + if (donerec == 0) + recbld(); } - vp->tval &= ~STR; /* mark string invalid */ + if (freeable(vp)) + xfree(vp->sval); /* free any previous string */ + vp->tval &= ~(STR|CONVC|CONVO); /* mark string invalid */ + vp->fmt = NULL; vp->tval |= NUM; /* mark number ok */ - dprintf(("setfval %p: %s = %g, t=%p\n", (void *)vp, - vp->nval ? vp->nval : (unsigned char *)"NULL", - f, (void *)vp->tval)); + if (f == -0) /* who would have thought this possible? */ + f = 0; + dprintf(("setfval %p: %s = %g, t=%o\n", (void *)vp, + NN(vp->nval), f, vp->tval)); return (vp->fval = f); } void -funnyvar(Cell *vp, char *rw) +funnyvar(Cell *vp, const char *rw) { - if (vp->tval & ARR) - ERROR "can't %s %s; it's an array name.", rw, vp->nval FATAL; - if (vp->tval & FCN) - ERROR "can't %s %s; it's a function.", rw, vp->nval FATAL; - ERROR "funny variable %o: n=%s s=\"%s\" f=%g t=%o", - vp, vp->nval, vp->sval, vp->fval, vp->tval CONT; + if (isarr(vp)) + FATAL("can't %s %s; it's an array name.", rw, vp->nval); + if (isfcn(vp)) + FATAL("can't %s %s; it's a function.", rw, vp->nval); + WARNING("funny variable %p: n=%s s=\"%s\" f=%g t=%o", + vp, vp->nval, vp->sval, vp->fval, vp->tval); } -uchar * -setsval(Cell *vp, uchar *s) +char * +setsval(Cell *vp, const char *s) /* set string val of a Cell */ { - int i; + char *t; + int fldno; + Awkfloat f; + dprintf(("starting setsval %p: %s = \"%s\", t=%o, r,f=%d,%d\n", + (void *)vp, NN(vp->nval), s, vp->tval, donerec, donefld)); if ((vp->tval & (NUM | STR)) == 0) funnyvar(vp, "assign to"); - if (vp->tval & FLD) { + if (isfld(vp)) { donerec = 0; /* mark $0 invalid */ - i = fldidx(vp); - if (i > *NF) - newfld(i); - dprintf(("setting field %d to %s\n", i, s)); - } else if (vp->tval & REC) { + fldno = atoi(vp->nval); + if (fldno > *NF) + newfld(fldno); + dprintf(("setting field %d to %s (%p)\n", fldno, s, (void *)s)); + } else if (isrec(vp)) { donefld = 0; /* mark $1... invalid */ donerec = 1; + savefs(); + } else if (vp == ofsloc) { + if (donerec == 0) + recbld(); } - vp->tval &= ~NUM; - vp->tval |= STR; + t = s ? tostring(s) : tostring(""); /* in case it's self-assign */ if (freeable(vp)) xfree(vp->sval); - vp->tval &= ~DONTFREE; - dprintf(("setsval %p: %s = \"%s\", t=%p\n", - (void *)vp, - vp->nval ? (char *)vp->nval : "", - s, - (void *)(vp->tval ? (char *)vp->tval : ""))); - return (vp->sval = tostring(s)); + vp->tval &= ~(NUM|CONVC|CONVO); + vp->tval |= STR; + vp->fmt = NULL; + setfree(vp); + dprintf(("setsval %p: %s = \"%s (%p) \", t=%o r,f=%d,%d\n", + (void *)vp, NN(vp->nval), t, (void *)t, + vp->tval, donerec, donefld)); + vp->sval = t; + if (&vp->fval == NF) { + donerec = 0; /* mark $0 invalid */ + f = getfval(vp); + setlastfld((int)f); + dprintf(("setting NF to %g\n", f)); + } + + return (vp->sval); } Awkfloat -r_getfval(Cell *vp) +getfval(Cell *vp) /* get float val of a Cell */ { if ((vp->tval & (NUM | STR)) == 0) funnyvar(vp, "read value of"); - if ((vp->tval & FLD) && donefld == 0) + if (isfld(vp) && donefld == 0) fldbld(); - else if ((vp->tval & REC) && donerec == 0) + else if (isrec(vp) && donerec == 0) recbld(); if (!isnum(vp)) { /* not a number */ - vp->fval = atof((const char *)vp->sval); /* best guess */ + vp->fval = atof(vp->sval); /* best guess */ if (is_number(vp->sval) && !(vp->tval&CON)) vp->tval |= NUM; /* make NUM only sparingly */ } - dprintf(("getfval %p: %s = %g, t=%p\n", - (void *)vp, vp->nval, vp->fval, (void *)vp->tval)); + dprintf(("getfval %p: %s = %g, t=%o\n", + (void *)vp, NN(vp->nval), vp->fval, vp->tval)); return (vp->fval); } -uchar * -r_getsval(Cell *vp) +static char * +get_str_val(Cell *vp, char **fmt) /* get string val of a Cell */ { - uchar s[256]; + char s[256]; + double dtemp; if ((vp->tval & (NUM | STR)) == 0) funnyvar(vp, "read value of"); - if ((vp->tval & FLD) && donefld == 0) + if (isfld(vp) && donefld == 0) fldbld(); - else if ((vp->tval & REC) && donerec == 0) + else if (isrec(vp) && donerec == 0) recbld(); - if ((vp->tval & STR) == 0) { - if (!(vp->tval&DONTFREE)) - xfree(vp->sval); - if ((long long)vp->fval == vp->fval) { - (void) snprintf((char *)s, sizeof (s), - "%.20g", vp->fval); + + /* + * ADR: This is complicated and more fragile than is desirable. + * Retrieving a string value for a number associates the string + * value with the scalar. Previously, the string value was + * sticky, meaning if converted via OFMT that became the value + * (even though POSIX wants it to be via CONVFMT). Or if CONVFMT + * changed after a string value was retrieved, the original value + * was maintained and used. Also not per POSIX. + * + * We work around this design by adding two additional flags, + * CONVC and CONVO, indicating how the string value was + * obtained (via CONVFMT or OFMT) and _also_ maintaining a copy + * of the pointer to the xFMT format string used for the + * conversion. This pointer is only read, **never** dereferenced. + * The next time we do a conversion, if it's coming from the same + * xFMT as last time, and the pointer value is different, we + * know that the xFMT format string changed, and we need to + * redo the conversion. If it's the same, we don't have to. + * + * There are also several cases where we don't do a conversion, + * such as for a field (see the checks below). + */ + + /* Don't duplicate the code for actually updating the value */ +#define update_str_val(vp) \ + { \ + if (freeable(vp)) \ + xfree(vp->sval); \ + if (modf(vp->fval, &dtemp) == 0) /* it's integral */ \ + (void) snprintf(s, sizeof (s), "%.30g", vp->fval); \ + else \ + (void) snprintf(s, sizeof (s), *fmt, vp->fval); \ + vp->sval = tostring(s); \ + vp->tval &= ~DONTFREE; \ + vp->tval |= STR; \ + } + + if (isstr(vp) == 0) { + /*LINTED*/ + update_str_val(vp); + if (fmt == OFMT) { + vp->tval &= ~CONVC; + vp->tval |= CONVO; } else { - /*LINTED*/ - (void) snprintf((char *)s, sizeof (s), - (char *)*OFMT, vp->fval); + /* CONVFMT */ + vp->tval &= ~CONVO; + vp->tval |= CONVC; + } + vp->fmt = *fmt; + } else if ((vp->tval & DONTFREE) != 0 || !isnum(vp) || isfld(vp)) { + goto done; + } else if (isstr(vp)) { + if (fmt == OFMT) { + if ((vp->tval & CONVC) != 0 || + ((vp->tval & CONVO) != 0 && vp->fmt != *fmt)) { + /*LINTED*/ + update_str_val(vp); + vp->tval &= ~CONVC; + vp->tval |= CONVO; + vp->fmt = *fmt; + } + } else { + /* CONVFMT */ + if ((vp->tval & CONVO) != 0 || + ((vp->tval & CONVC) != 0 && vp->fmt != *fmt)) { + /*LINTED*/ + update_str_val(vp); + vp->tval &= ~CONVO; + vp->tval |= CONVC; + vp->fmt = *fmt; + } } - vp->sval = tostring(s); - vp->tval &= ~DONTFREE; - vp->tval |= STR; } - dprintf(("getsval %p: %s = \"%s\", t=%p\n", - (void *)vp, - vp->nval ? (char *)vp->nval : "", - vp->sval ? (char *)vp->sval : "", - (void *)vp->tval)); +done: + dprintf(("getsval %p: %s = \"%s (%p)\", t=%o\n", + (void *)vp, NN(vp->nval), vp->sval, (void *)vp->sval, vp->tval)); return (vp->sval); } -uchar * -tostring(uchar *s) +char * +getsval(Cell *vp) /* get string val of a Cell */ { - register uchar *p; + return (get_str_val(vp, CONVFMT)); +} - p = (uchar *)malloc(strlen((char *)s)+1); +char * +getpssval(Cell *vp) /* get string val of a Cell for print */ +{ + return (get_str_val(vp, OFMT)); +} + + +char * +tostring(const char *s) /* make a copy of string s */ +{ + char *p = strdup(s); if (p == NULL) - ERROR "out of space in tostring on %s", s FATAL; - (void) strcpy((char *)p, (char *)s); + FATAL("out of space in tostring on %s", s); return (p); } -uchar * -qstring(uchar *s, int delim) /* collect string up to delim */ +char * +qstring(const char *is, int delim) /* collect string up to next delim */ { - uchar *cbuf, *ret; + const char *os = is; int c, n; - size_t cbufsz, cnt; - - init_buf(&cbuf, &cbufsz, LINE_INCR); + uschar *s = (uschar *)is; + uschar *buf, *bp; - for (cnt = 0; (c = *s) != delim; s++) { + if ((buf = (uschar *)malloc(strlen(is)+3)) == NULL) + FATAL("out of space in qstring(%s)", s); + for (bp = buf; (c = *s) != delim; s++) { if (c == '\n') { - ERROR "newline in string %.10s...", cbuf SYNTAX; - } else if (c != '\\') { - expand_buf(&cbuf, &cbufsz, cnt); - cbuf[cnt++] = c; - } else { /* \something */ - expand_buf(&cbuf, &cbufsz, cnt); - switch (c = *++s) { - case '\\': cbuf[cnt++] = '\\'; break; - case 'n': cbuf[cnt++] = '\n'; break; - case 't': cbuf[cnt++] = '\t'; break; - case 'b': cbuf[cnt++] = '\b'; break; - case 'f': cbuf[cnt++] = '\f'; break; - case 'r': cbuf[cnt++] = '\r'; break; + SYNTAX("newline in string %.20s...", os); + } else if (c != '\\') + *bp++ = c; + else { /* \something */ + c = *++s; + if (c == 0) { /* \ at end */ + *bp++ = '\\'; + break; /* for loop */ + } + switch (c) { + case '\\': *bp++ = '\\'; break; + case 'n': *bp++ = '\n'; break; + case 't': *bp++ = '\t'; break; + case 'b': *bp++ = '\b'; break; + case 'f': *bp++ = '\f'; break; + case 'r': *bp++ = '\r'; break; default: if (!isdigit(c)) { - cbuf[cnt++] = c; + *bp++ = c; break; } n = c - '0'; @@ -487,13 +625,11 @@ qstring(uchar *s, int delim) /* collect string up to delim */ if (isdigit(s[1])) n = 8 * n + *++s - '0'; } - cbuf[cnt++] = n; + *bp++ = n; break; } } } - cbuf[cnt] = '\0'; - ret = tostring(cbuf); - free(cbuf); - return (ret); + *bp++ = 0; + return ((char *)buf); } |