summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCody Peter Mello <cody.mello@joyent.com>2018-10-16 21:41:38 +0000
committerCody Peter Mello <cody.mello@joyent.com>2019-06-13 18:47:50 +0000
commit084c5c48f7680535f554dd406a99bf0ea8329823 (patch)
tree2ccc88d69a6a75d4ce6129d12793c8c537d62b12
parentb4acf023dd2b2008c601d706c1ba8f9684bca593 (diff)
downloadillumos-joyent-084c5c48f7680535f554dd406a99bf0ea8329823.tar.gz
OS-7315 Update nawk(1) field splitting behaviour to match POSIX definition
Reviewed by: Robert Mustacchi <rm@joyent.com> Approved by: Jason King <jbk@joyent.com>
-rw-r--r--usr/src/cmd/awk/awk.h1
-rw-r--r--usr/src/cmd/awk/lib.c24
-rw-r--r--usr/src/cmd/awk/tran.c2
-rw-r--r--usr/src/test/util-tests/tests/awk/bugs-fixed/fs-overflow.ok2
-rwxr-xr-xusr/src/test/util-tests/tests/awk/tests/T.split75
5 files changed, 91 insertions, 13 deletions
diff --git a/usr/src/cmd/awk/awk.h b/usr/src/cmd/awk/awk.h
index b1db39fadb..01495d108d 100644
--- a/usr/src/cmd/awk/awk.h
+++ b/usr/src/cmd/awk/awk.h
@@ -338,6 +338,7 @@ extern void FATAL(const char *, ...) __attribute__((__noreturn__));
extern void WARNING(const char *, ...);
extern void error(void);
extern void nextfile(void);
+extern void savefs(void);
extern int isclvar(const char *);
extern int is_number(const char *);
diff --git a/usr/src/cmd/awk/lib.c b/usr/src/cmd/awk/lib.c
index ae60fde3f1..bec53b6e32 100644
--- a/usr/src/cmd/awk/lib.c
+++ b/usr/src/cmd/awk/lib.c
@@ -144,6 +144,23 @@ initgetrec(void)
infile = stdin; /* no filenames, so use stdin */
}
+/*
+ * POSIX specifies that fields are supposed to be evaluated as if they were
+ * split using the value of FS at the time that the record's value ($0) was
+ * read.
+ *
+ * Since field-splitting is done lazily, we save the current value of FS
+ * whenever a new record is read in (implicitly or via getline), or when
+ * a new value is assigned to $0.
+ */
+void
+savefs(void)
+{
+ if (strlen(getsval(fsloc)) >= sizeof (inputFS))
+ FATAL("field separator %.10s... is too long", *FS);
+ (void) strcpy(inputFS, *FS);
+}
+
static int firsttime = 1;
/*
@@ -167,6 +184,7 @@ getrec(char **pbuf, size_t *pbufsize, int isrecord)
if (isrecord) {
donefld = 0;
donerec = 1;
+ savefs();
}
saveb0 = buf[0];
buf[0] = '\0';
@@ -242,9 +260,6 @@ readrec(char **pbuf, size_t *pbufsize, FILE *inf) /* read one record into buf */
size_t bufsize = *pbufsize;
char *rs = getsval(rsloc);
- if (strlen(getsval(fsloc)) >= sizeof (inputFS))
- FATAL("field separator %.10s... is too long", *FS);
- (void) strcpy(inputFS, *FS); /* for subsequent field splitting */
if ((sep = *rs) == 0) {
sep = '\n';
/* skip leading \n's */
@@ -342,9 +357,6 @@ fldbld(void) /* create fields from current record */
fr = fields;
i = 0; /* number of fields accumulated here */
- if (strlen(getsval(fsloc)) >= sizeof (inputFS))
- FATAL("field separator %.10s... is too long", *FS);
- (void) strcpy(inputFS, *FS);
if (strlen(inputFS) > 1) { /* it's a regular expression */
i = refldbld(r, inputFS);
} else if ((sep = *inputFS) == ' ') { /* default whitespace */
diff --git a/usr/src/cmd/awk/tran.c b/usr/src/cmd/awk/tran.c
index 012b90acf4..417bede66d 100644
--- a/usr/src/cmd/awk/tran.c
+++ b/usr/src/cmd/awk/tran.c
@@ -377,6 +377,7 @@ setfval(Cell *vp, Awkfloat f) /* set float val of a Cell */
} else if (isrec(vp)) {
donefld = 0; /* mark $1... invalid */
donerec = 1;
+ savefs();
} else if (vp == ofsloc) {
if (donerec == 0)
recbld();
@@ -424,6 +425,7 @@ setsval(Cell *vp, const char *s) /* set string val of a Cell */
} else if (isrec(vp)) {
donefld = 0; /* mark $1... invalid */
donerec = 1;
+ savefs();
} else if (vp == ofsloc) {
if (donerec == 0)
recbld();
diff --git a/usr/src/test/util-tests/tests/awk/bugs-fixed/fs-overflow.ok b/usr/src/test/util-tests/tests/awk/bugs-fixed/fs-overflow.ok
index 3a9b6cfad5..6c1b534bcb 100644
--- a/usr/src/test/util-tests/tests/awk/bugs-fixed/fs-overflow.ok
+++ b/usr/src/test/util-tests/tests/awk/bugs-fixed/fs-overflow.ok
@@ -1,3 +1,3 @@
$AWK: field separator cccccccccc... is too long
- source line number 12
+ source line number 11
EXIT CODE: 2
diff --git a/usr/src/test/util-tests/tests/awk/tests/T.split b/usr/src/test/util-tests/tests/awk/tests/T.split
index 045256e6a9..5444561993 100755
--- a/usr/src/test/util-tests/tests/awk/tests/T.split
+++ b/usr/src/test/util-tests/tests/awk/tests/T.split
@@ -18,14 +18,31 @@ fail() {
echo T.split: misc tests of field splitting and split command
-echo a:bc:def > $TEMP0
-echo a > $TEMP1
-$AWK '{ FS = ":"; print $1 }' $TEMP0 > $TEMP2
+$AWK 'BEGIN {
+ # Assign string to $0, then change FS.
+ FS = ":";
+ $0="a:bc:def";
+ FS = "-";
+ print FS, $1, NF;
+
+ # Assign number to $0, then change FS.
+ FS = "2";
+ $0=1212121;
+ FS="3";
+ print FS, $1, NF;
+}' > $TEMP1
+echo '- a 3
+3 1 4' > $TEMP2
diff $TEMP1 $TEMP2 || fail 'BAD: T.split 0.1'
-echo a:bc:def > $TEMP0
-echo 3 > $TEMP1
-$AWK '{ FS = ":"; print NF }' $TEMP0 > $TEMP2
+$AWK 'BEGIN {
+ # FS changes after getline.
+ FS = ":";
+ "echo a:bc:def" | getline;
+ FS = "-";
+ print FS, $1, NF;
+}' > $TEMP1
+echo '- a 3' > $TEMP2
diff $TEMP1 $TEMP2 || fail 'BAD: T.split 0.2'
echo '
@@ -45,6 +62,52 @@ echo '0
4' > $TEMP2
diff $TEMP1 $TEMP2 || fail 'BAD: T.split 0.3'
+# getline var shouldn't impact fields.
+
+echo 'f b a' > $TEMP0
+$AWK '{
+ FS = ":";
+ getline a < "/etc/passwd";
+ print $1;
+}' $TEMP0 > $TEMP1
+echo 'f' > $TEMP2
+diff $TEMP1 $TEMP2 || fail 'BAD: T.split 0.4'
+
+echo 'a b c d
+foo
+e f g h i
+bar' > $TEMP0
+$AWK '{
+ FS=":";
+ getline v;
+ print $2, NF;
+ FS=" ";
+}' $TEMP0 > $TEMP1
+echo 'b 4
+f 5' > $TEMP2
+diff $TEMP1 $TEMP2 || fail 'BAD: T.split 0.5'
+
+echo 'a.b.c=d.e.f
+g.h.i=j.k.l
+m.n.o=p.q.r' > $TEMP0
+echo 'b
+h
+n' > $TEMP1
+$AWK 'BEGIN { FS="=" } { FS="."; $0=$1; print $2; FS="="; }' $TEMP0 > $TEMP2
+diff $TEMP1 $TEMP2 || fail 'BAD: T.split (record assignment 1)'
+
+echo 'a.b.c=d.e.f
+g.h.i=j.k.l
+m.n.o=p.q.r' > $TEMP0
+echo 'd.e.f
+b
+j.k.l
+h
+p.q.r
+n' > $TEMP1
+$AWK 'BEGIN { FS="=" } { print $2; FS="."; $0=$1; print $2; FS="="; }' $TEMP0 > $TEMP2
+diff $TEMP1 $TEMP2 || fail 'BAD: T.split (record assignment 2)'
+
echo 'abc
de
f