diff options
| author | Cody Peter Mello <cody.mello@joyent.com> | 2018-10-16 21:41:38 +0000 |
|---|---|---|
| committer | Cody Peter Mello <cody.mello@joyent.com> | 2019-06-13 18:47:50 +0000 |
| commit | 084c5c48f7680535f554dd406a99bf0ea8329823 (patch) | |
| tree | 2ccc88d69a6a75d4ce6129d12793c8c537d62b12 | |
| parent | b4acf023dd2b2008c601d706c1ba8f9684bca593 (diff) | |
| download | illumos-joyent-084c5c48f7680535f554dd406a99bf0ea8329823.tar.gz | |
OS-7315 Update nawk(1) field splitting behaviour to match POSIX definition
Reviewed by: Robert Mustacchi <rm@joyent.com>
Approved by: Jason King <jbk@joyent.com>
| -rw-r--r-- | usr/src/cmd/awk/awk.h | 1 | ||||
| -rw-r--r-- | usr/src/cmd/awk/lib.c | 24 | ||||
| -rw-r--r-- | usr/src/cmd/awk/tran.c | 2 | ||||
| -rw-r--r-- | usr/src/test/util-tests/tests/awk/bugs-fixed/fs-overflow.ok | 2 | ||||
| -rwxr-xr-x | usr/src/test/util-tests/tests/awk/tests/T.split | 75 |
5 files changed, 91 insertions, 13 deletions
diff --git a/usr/src/cmd/awk/awk.h b/usr/src/cmd/awk/awk.h index b1db39fadb..01495d108d 100644 --- a/usr/src/cmd/awk/awk.h +++ b/usr/src/cmd/awk/awk.h @@ -338,6 +338,7 @@ extern void FATAL(const char *, ...) __attribute__((__noreturn__)); extern void WARNING(const char *, ...); extern void error(void); extern void nextfile(void); +extern void savefs(void); extern int isclvar(const char *); extern int is_number(const char *); diff --git a/usr/src/cmd/awk/lib.c b/usr/src/cmd/awk/lib.c index ae60fde3f1..bec53b6e32 100644 --- a/usr/src/cmd/awk/lib.c +++ b/usr/src/cmd/awk/lib.c @@ -144,6 +144,23 @@ initgetrec(void) infile = stdin; /* no filenames, so use stdin */ } +/* + * POSIX specifies that fields are supposed to be evaluated as if they were + * split using the value of FS at the time that the record's value ($0) was + * read. + * + * Since field-splitting is done lazily, we save the current value of FS + * whenever a new record is read in (implicitly or via getline), or when + * a new value is assigned to $0. + */ +void +savefs(void) +{ + if (strlen(getsval(fsloc)) >= sizeof (inputFS)) + FATAL("field separator %.10s... is too long", *FS); + (void) strcpy(inputFS, *FS); +} + static int firsttime = 1; /* @@ -167,6 +184,7 @@ getrec(char **pbuf, size_t *pbufsize, int isrecord) if (isrecord) { donefld = 0; donerec = 1; + savefs(); } saveb0 = buf[0]; buf[0] = '\0'; @@ -242,9 +260,6 @@ readrec(char **pbuf, size_t *pbufsize, FILE *inf) /* read one record into buf */ size_t bufsize = *pbufsize; char *rs = getsval(rsloc); - if (strlen(getsval(fsloc)) >= sizeof (inputFS)) - FATAL("field separator %.10s... is too long", *FS); - (void) strcpy(inputFS, *FS); /* for subsequent field splitting */ if ((sep = *rs) == 0) { sep = '\n'; /* skip leading \n's */ @@ -342,9 +357,6 @@ fldbld(void) /* create fields from current record */ fr = fields; i = 0; /* number of fields accumulated here */ - if (strlen(getsval(fsloc)) >= sizeof (inputFS)) - FATAL("field separator %.10s... is too long", *FS); - (void) strcpy(inputFS, *FS); if (strlen(inputFS) > 1) { /* it's a regular expression */ i = refldbld(r, inputFS); } else if ((sep = *inputFS) == ' ') { /* default whitespace */ diff --git a/usr/src/cmd/awk/tran.c b/usr/src/cmd/awk/tran.c index 012b90acf4..417bede66d 100644 --- a/usr/src/cmd/awk/tran.c +++ b/usr/src/cmd/awk/tran.c @@ -377,6 +377,7 @@ setfval(Cell *vp, Awkfloat f) /* set float val of a Cell */ } else if (isrec(vp)) { donefld = 0; /* mark $1... invalid */ donerec = 1; + savefs(); } else if (vp == ofsloc) { if (donerec == 0) recbld(); @@ -424,6 +425,7 @@ setsval(Cell *vp, const char *s) /* set string val of a Cell */ } else if (isrec(vp)) { donefld = 0; /* mark $1... invalid */ donerec = 1; + savefs(); } else if (vp == ofsloc) { if (donerec == 0) recbld(); diff --git a/usr/src/test/util-tests/tests/awk/bugs-fixed/fs-overflow.ok b/usr/src/test/util-tests/tests/awk/bugs-fixed/fs-overflow.ok index 3a9b6cfad5..6c1b534bcb 100644 --- a/usr/src/test/util-tests/tests/awk/bugs-fixed/fs-overflow.ok +++ b/usr/src/test/util-tests/tests/awk/bugs-fixed/fs-overflow.ok @@ -1,3 +1,3 @@ $AWK: field separator cccccccccc... is too long - source line number 12 + source line number 11 EXIT CODE: 2 diff --git a/usr/src/test/util-tests/tests/awk/tests/T.split b/usr/src/test/util-tests/tests/awk/tests/T.split index 045256e6a9..5444561993 100755 --- a/usr/src/test/util-tests/tests/awk/tests/T.split +++ b/usr/src/test/util-tests/tests/awk/tests/T.split @@ -18,14 +18,31 @@ fail() { echo T.split: misc tests of field splitting and split command -echo a:bc:def > $TEMP0 -echo a > $TEMP1 -$AWK '{ FS = ":"; print $1 }' $TEMP0 > $TEMP2 +$AWK 'BEGIN { + # Assign string to $0, then change FS. + FS = ":"; + $0="a:bc:def"; + FS = "-"; + print FS, $1, NF; + + # Assign number to $0, then change FS. + FS = "2"; + $0=1212121; + FS="3"; + print FS, $1, NF; +}' > $TEMP1 +echo '- a 3 +3 1 4' > $TEMP2 diff $TEMP1 $TEMP2 || fail 'BAD: T.split 0.1' -echo a:bc:def > $TEMP0 -echo 3 > $TEMP1 -$AWK '{ FS = ":"; print NF }' $TEMP0 > $TEMP2 +$AWK 'BEGIN { + # FS changes after getline. + FS = ":"; + "echo a:bc:def" | getline; + FS = "-"; + print FS, $1, NF; +}' > $TEMP1 +echo '- a 3' > $TEMP2 diff $TEMP1 $TEMP2 || fail 'BAD: T.split 0.2' echo ' @@ -45,6 +62,52 @@ echo '0 4' > $TEMP2 diff $TEMP1 $TEMP2 || fail 'BAD: T.split 0.3' +# getline var shouldn't impact fields. + +echo 'f b a' > $TEMP0 +$AWK '{ + FS = ":"; + getline a < "/etc/passwd"; + print $1; +}' $TEMP0 > $TEMP1 +echo 'f' > $TEMP2 +diff $TEMP1 $TEMP2 || fail 'BAD: T.split 0.4' + +echo 'a b c d +foo +e f g h i +bar' > $TEMP0 +$AWK '{ + FS=":"; + getline v; + print $2, NF; + FS=" "; +}' $TEMP0 > $TEMP1 +echo 'b 4 +f 5' > $TEMP2 +diff $TEMP1 $TEMP2 || fail 'BAD: T.split 0.5' + +echo 'a.b.c=d.e.f +g.h.i=j.k.l +m.n.o=p.q.r' > $TEMP0 +echo 'b +h +n' > $TEMP1 +$AWK 'BEGIN { FS="=" } { FS="."; $0=$1; print $2; FS="="; }' $TEMP0 > $TEMP2 +diff $TEMP1 $TEMP2 || fail 'BAD: T.split (record assignment 1)' + +echo 'a.b.c=d.e.f +g.h.i=j.k.l +m.n.o=p.q.r' > $TEMP0 +echo 'd.e.f +b +j.k.l +h +p.q.r +n' > $TEMP1 +$AWK 'BEGIN { FS="=" } { print $2; FS="."; $0=$1; print $2; FS="="; }' $TEMP0 > $TEMP2 +diff $TEMP1 $TEMP2 || fail 'BAD: T.split (record assignment 2)' + echo 'abc de f |
