diff options
author | Cody Peter Mello <cody.mello@joyent.com> | 2018-10-23 18:01:14 +0000 |
---|---|---|
committer | Cody Peter Mello <cody.mello@joyent.com> | 2019-06-13 18:47:50 +0000 |
commit | 2b2695541d621ef1caa51056d6407f0acd012ed6 (patch) | |
tree | 182c3b6dc707f76c929fb252567b6fb90f46ac3d /usr | |
parent | 084c5c48f7680535f554dd406a99bf0ea8329823 (diff) | |
download | illumos-joyent-2b2695541d621ef1caa51056d6407f0acd012ed6.tar.gz |
OS-7316 Want support for RT (record terminator) variable in nawk(1)
Reviewed by: Robert Mustacchi <rm@joyent.com>
Approved by: Jason King <jbk@joyent.com>
Diffstat (limited to 'usr')
-rw-r--r-- | usr/src/cmd/awk/awk.h | 1 | ||||
-rw-r--r-- | usr/src/cmd/awk/lib.c | 73 | ||||
-rw-r--r-- | usr/src/cmd/awk/tran.c | 2 | ||||
-rw-r--r-- | usr/src/man/man1/nawk.1 | 14 | ||||
-rw-r--r-- | usr/src/pkg/manifests/system-test-utiltest.mf | 1 | ||||
-rwxr-xr-x | usr/src/test/util-tests/tests/awk/tests/T.rt | 99 |
6 files changed, 176 insertions, 14 deletions
diff --git a/usr/src/cmd/awk/awk.h b/usr/src/cmd/awk/awk.h index 01495d108d..dfbed45e9d 100644 --- a/usr/src/cmd/awk/awk.h +++ b/usr/src/cmd/awk/awk.h @@ -149,6 +149,7 @@ extern Cell *nfloc; /* NF */ extern Cell *ofsloc; /* OFS */ extern Cell *orsloc; /* ORS */ extern Cell *rsloc; /* RS */ +extern Cell *rtloc; /* RT */ extern Cell *rstartloc; /* RSTART */ extern Cell *rlengthloc; /* RLENGTH */ extern Cell *subseploc; /* SUBSEP */ diff --git a/usr/src/cmd/awk/lib.c b/usr/src/cmd/awk/lib.c index bec53b6e32..bde265e273 100644 --- a/usr/src/cmd/awk/lib.c +++ b/usr/src/cmd/awk/lib.c @@ -69,6 +69,8 @@ char *record; size_t recsize = RECSIZE; static char *fields; static size_t fieldssize = RECSIZE; +static char *rtbuf; +static size_t rtbufsize = RECSIZE; Cell **fldtab; /* pointers to Cells */ char inputFS[100] = " "; @@ -256,11 +258,17 @@ int readrec(char **pbuf, size_t *pbufsize, FILE *inf) /* read one record into buf */ { int sep, c; - char *rr, *buf = *pbuf; + char *rr, *rt, *buf = *pbuf; size_t bufsize = *pbufsize; char *rs = getsval(rsloc); - if ((sep = *rs) == 0) { + if (rtbuf == NULL && (rtbuf = malloc(rtbufsize)) == NULL) + FATAL("out of memory in readrec"); + + rr = buf; + rt = rtbuf; + + if ((sep = *rs) == '\0') { sep = '\n'; /* skip leading \n's */ while ((c = getc(inf)) == '\n' && c != EOF) @@ -268,30 +276,67 @@ readrec(char **pbuf, size_t *pbufsize, FILE *inf) /* read one record into buf */ if (c != EOF) (void) ungetc(c, inf); } - for (rr = buf; ; ) { - while ((c = getc(inf)) != sep && c != EOF) { - if (rr-buf+1 > bufsize) - if (!adjbuf(&buf, &bufsize, 1+rr-buf, recsize, &rr, "readrec 1")) - FATAL("input record `%.30s...' too long", buf); + while ((c = getc(inf)) != EOF) { + if (c != sep) { + if (rr-buf+1 > bufsize) { + (void) adjbuf(&buf, &bufsize, + 1+rr-buf, recsize, &rr, "readrec1"); + } *rr++ = c; + continue; } - if (*rs == sep || c == EOF) + + /* + * Ensure enough space for either a single separator + * character, or at least two '\n' chars (when RS is + * the empty string). + */ + (void) adjbuf(&rtbuf, &rtbufsize, + 2+rt-rtbuf, recsize, &rt, "readrec2"); + + if (*rs == sep) { + *rt++ = sep; break; - if ((c = getc(inf)) == '\n' || c == EOF) /* 2 in a row */ + } + + if ((c = getc(inf)) == '\n') { /* 2 in a row */ + *rt++ = '\n'; + *rt++ = '\n'; + while ((c = getc(inf)) == '\n' && c != EOF) { + /* Read any further \n's and add them to RT. */ + (void) adjbuf(&rtbuf, &rtbufsize, + 1+rt-rtbuf, recsize, &rt, "readrec3"); + *rt++ = '\n'; + } + if (c != EOF) + (void) ungetc(c, inf); break; - if (!adjbuf(&buf, &bufsize, 2+rr-buf, recsize, &rr, "readrec 2")) - FATAL("input record `%.30s...' too long", buf); + } + + if (c == EOF) { + *rt++ = '\n'; + break; + } + + (void) adjbuf(&buf, &bufsize, + 2+rr-buf, recsize, &rr, "readrec4"); *rr++ = '\n'; *rr++ = c; } - if (!adjbuf(&buf, &bufsize, 1+rr-buf, recsize, &rr, "readrec 3")) - FATAL("input record `%.30s...' too long", buf); + (void) adjbuf(&buf, &bufsize, 1+rr-buf, recsize, &rr, "readrec5"); + (void) adjbuf(&rtbuf, &rtbufsize, 1+rt-rtbuf, recsize, &rt, "readrec6"); *rr = '\0'; + *rt = '\0'; dprintf(("readrec saw <%s>, returns %d\n", buf, c == EOF && rr == buf ? 0 : 1)); *pbuf = buf; *pbufsize = bufsize; - return (c == EOF && rr == buf ? 0 : 1); + if (c == EOF && rr == buf) { + return (0); + } else { + (void) setsval(rtloc, rtbuf); + return (1); + } } /* get ARGV[n] */ diff --git a/usr/src/cmd/awk/tran.c b/usr/src/cmd/awk/tran.c index 417bede66d..ba9a685d93 100644 --- a/usr/src/cmd/awk/tran.c +++ b/usr/src/cmd/awk/tran.c @@ -88,6 +88,7 @@ Cell *fnrloc; /* FNR */ Cell *ofsloc; /* OFS */ Cell *orsloc; /* ORS */ Cell *rsloc; /* RS */ +Cell *rtloc; /* RT */ Array *ARGVtab; /* symbol table containing ARGV[...] */ Array *ENVtab; /* symbol table containing ENVIRON[...] */ Cell *rstartloc; /* RSTART */ @@ -132,6 +133,7 @@ syminit(void) /* initialize symbol table with builtin vars */ FS = &fsloc->sval; rsloc = setsymtab("RS", "\n", 0.0, STR|DONTFREE, symtab); RS = &rsloc->sval; + rtloc = setsymtab("RT", "", 0.0, STR|DONTFREE, symtab); ofsloc = setsymtab("OFS", " ", 0.0, STR|DONTFREE, symtab); OFS = &ofsloc->sval; orsloc = setsymtab("ORS", "\n", 0.0, STR|DONTFREE, symtab); diff --git a/usr/src/man/man1/nawk.1 b/usr/src/man/man1/nawk.1 index 7734155f19..5fec573a52 100644 --- a/usr/src/man/man1/nawk.1 +++ b/usr/src/man/man1/nawk.1 @@ -711,6 +711,20 @@ The subscript separator string for multi-dimensional arrays. The default value is \fB\e034\fR\&. .RE +.SS "/usr/bin/nawk" +.LP +The following variable is supported for \fB/usr/bin/nawk\fR only: +.sp +.ne 2 +.na +\fB\fBRT\fR\fR +.ad +.RS 12n +The record terminator for the most recent record read. For most records this +will be the same value as \fBRS\fR. At the end of a file with no trailing +separator value, though, this will be set to the empty string (\fB""\fR). +.RE + .SS "Regular Expressions" .LP The \fBnawk\fR utility makes use of the extended regular expression notation diff --git a/usr/src/pkg/manifests/system-test-utiltest.mf b/usr/src/pkg/manifests/system-test-utiltest.mf index b056feb9cd..fbfb38267b 100644 --- a/usr/src/pkg/manifests/system-test-utiltest.mf +++ b/usr/src/pkg/manifests/system-test-utiltest.mf @@ -1066,6 +1066,7 @@ file path=opt/util-tests/tests/awk/tests/T.overflow mode=0555 file path=opt/util-tests/tests/awk/tests/T.re mode=0555 file path=opt/util-tests/tests/awk/tests/T.recache mode=0555 file path=opt/util-tests/tests/awk/tests/T.redir mode=0555 +file path=opt/util-tests/tests/awk/tests/T.rt mode=0555 file path=opt/util-tests/tests/awk/tests/T.split mode=0555 file path=opt/util-tests/tests/awk/tests/T.sub mode=0555 file path=opt/util-tests/tests/awk/tests/T.system mode=0555 diff --git a/usr/src/test/util-tests/tests/awk/tests/T.rt b/usr/src/test/util-tests/tests/awk/tests/T.rt new file mode 100755 index 0000000000..585f3ca3e1 --- /dev/null +++ b/usr/src/test/util-tests/tests/awk/tests/T.rt @@ -0,0 +1,99 @@ +#!/bin/bash + +if [[ -z "$AWK" || -z "$WORKDIR" ]]; then + printf '$AWK and $WORKDIR must be set\n' >&2 + exit 1 +fi + +TEMP0=$WORKDIR/test.temp.0 +TEMP1=$WORKDIR/test.temp.1 +TEMP2=$WORKDIR/test.temp.2 + +RESULT=0 + +fail() { + echo "$1" >&2 + RESULT=1 +} + +echo T.rt: tests for the RT variable + +$AWK 'BEGIN { print (RT == "" ? "true" : "false"); }' > $TEMP1 +printf 'true\n' > $TEMP2 +diff $TEMP1 $TEMP2 || fail 'BAD: T.rt RT is set to "" in BEGIN' + +printf 'a\n' > $TEMP0 +$AWK '{ print (RT == "\n" ? "true" : "false"); }' $TEMP0 > $TEMP1 +printf 'true\n' > $TEMP2 +diff $TEMP1 $TEMP2 || fail 'BAD: T.rt RT is set to "\n"' + +printf 'a' > $TEMP0 +$AWK '{ print (RT == "" ? "true" : "false"); }' $TEMP0 > $TEMP1 +printf 'true\n' > $TEMP2 +diff $TEMP1 $TEMP2 || fail 'BAD: T.rt RT is set to ""' + +$AWK 'BEGIN { "echo hello" | getline; print (RT == "\n" ? "true" : "false"); }' > $TEMP1 +printf 'true\n' > $TEMP2 +diff $TEMP1 $TEMP2 || fail 'BAD: T.rt RT is set to "\n" (getline)' + +$AWK 'BEGIN { "printf a" | getline; print (RT == "" ? "true" : "false"); }' > $TEMP1 +printf 'true\n' > $TEMP2 +diff $TEMP1 $TEMP2 || fail 'BAD: T.rt RT is set to "" (getline)' + +$AWK 'BEGIN { "echo hello" | getline v; print (RT == "\n" ? "true" : "false"); }' > $TEMP1 +printf 'true\n' > $TEMP2 +diff $TEMP1 $TEMP2 || fail 'BAD: T.rt RT is set to "\n" (getline var)' + +$AWK 'BEGIN { + RT = "foo"; + getline < "/etc/passwd"; + print (RT == "\n" ? "true" : "false"); +}' > $TEMP1 +printf 'true\n' > $TEMP2 +diff $TEMP1 $TEMP2 || fail 'BAD: T.rt RT is set to "\n" (getline < file)' + +$AWK 'BEGIN { + RT = "foo"; + getline v < "/etc/passwd"; + print (RT == "\n" ? "true" : "false"); +}' > $TEMP1 +printf 'true\n' > $TEMP2 +diff $TEMP1 $TEMP2 || fail 'BAD: T.rt RT is set to "\n" (getline var < file)' + +# Single newline at end +printf '\n\n\n\n\na\n\na b\na b c d\nq r s t u\n\n\n\n\nv w x y z\n' > $TEMP0 +$AWK 'BEGIN { RS = ""; } { print NF, length(RT); }' $TEMP0 > $TEMP1 +printf '1 2\n11 5\n5 1\n' > $TEMP2 +diff $TEMP1 $TEMP2 || fail 'BAD: T.rt empty RS variable (1 newline at end)' + +# Two newlines at end +printf '\n\n\n\n\na\n\na b\na b c d\nq r s t u\n\n\n\n\nv w x y z\n\n' > $TEMP0 +$AWK 'BEGIN { RS = ""; } { print NF, length(RT); }' $TEMP0 > $TEMP1 +printf '1 2\n11 5\n5 2\n' > $TEMP2 +diff $TEMP1 $TEMP2 || fail 'BAD: T.rt empty RS variable (2 newlines at end)' + +# Multiple newlines at end +printf 'a\n\na b\na b c d\nq r s t u\n\n\n\n\nv w x y z\n\n\n\n' > $TEMP0 +$AWK 'BEGIN { RS = ""; } { print NF, length(RT); }' $TEMP0 > $TEMP1 +printf '1 2\n11 5\n5 4\n' > $TEMP2 +diff $TEMP1 $TEMP2 || fail 'BAD: T.rt empty RS variable (many newlines at end)' + +# No newlines at end +printf 'a\n\na b\na b c d\nq r s t u\n\n\n\n\nv w x y z' > $TEMP0 +$AWK 'BEGIN { RS = ""; } { print NF, length(RT); }' $TEMP0 > $TEMP1 +printf '1 2\n11 5\n5 0\n' > $TEMP2 +diff $TEMP1 $TEMP2 || fail 'BAD: T.rt empty RS variable (no ending newline)' + +# Non-newline separators + +printf 'a\036' > $TEMP0 +$AWK 'BEGIN { RS="\036" } { print (RT == "\036" ? "true" : "false"), length(RS); }' $TEMP0 > $TEMP1 +printf 'true 1\n' > $TEMP2 +diff $TEMP1 $TEMP2 || fail 'BAD: T.rt RT is set to "\036" (RS="\036")' + +printf 'a' > $TEMP0 +$AWK 'BEGIN { RS="\036" } { print (RT == "" ? "true" : "false"), length(RS); }' $TEMP0 > $TEMP1 +printf 'true 1\n' > $TEMP2 +diff $TEMP1 $TEMP2 || fail 'BAD: T.rt RT is set to "" (RS="\036")' + +exit $RESULT |