summaryrefslogtreecommitdiff
path: root/usr
diff options
context:
space:
mode:
authorCody Peter Mello <cody.mello@joyent.com>2018-10-23 18:01:14 +0000
committerCody Peter Mello <cody.mello@joyent.com>2019-06-13 18:47:50 +0000
commit2b2695541d621ef1caa51056d6407f0acd012ed6 (patch)
tree182c3b6dc707f76c929fb252567b6fb90f46ac3d /usr
parent084c5c48f7680535f554dd406a99bf0ea8329823 (diff)
downloadillumos-joyent-2b2695541d621ef1caa51056d6407f0acd012ed6.tar.gz
OS-7316 Want support for RT (record terminator) variable in nawk(1)
Reviewed by: Robert Mustacchi <rm@joyent.com> Approved by: Jason King <jbk@joyent.com>
Diffstat (limited to 'usr')
-rw-r--r--usr/src/cmd/awk/awk.h1
-rw-r--r--usr/src/cmd/awk/lib.c73
-rw-r--r--usr/src/cmd/awk/tran.c2
-rw-r--r--usr/src/man/man1/nawk.114
-rw-r--r--usr/src/pkg/manifests/system-test-utiltest.mf1
-rwxr-xr-xusr/src/test/util-tests/tests/awk/tests/T.rt99
6 files changed, 176 insertions, 14 deletions
diff --git a/usr/src/cmd/awk/awk.h b/usr/src/cmd/awk/awk.h
index 01495d108d..dfbed45e9d 100644
--- a/usr/src/cmd/awk/awk.h
+++ b/usr/src/cmd/awk/awk.h
@@ -149,6 +149,7 @@ extern Cell *nfloc; /* NF */
extern Cell *ofsloc; /* OFS */
extern Cell *orsloc; /* ORS */
extern Cell *rsloc; /* RS */
+extern Cell *rtloc; /* RT */
extern Cell *rstartloc; /* RSTART */
extern Cell *rlengthloc; /* RLENGTH */
extern Cell *subseploc; /* SUBSEP */
diff --git a/usr/src/cmd/awk/lib.c b/usr/src/cmd/awk/lib.c
index bec53b6e32..bde265e273 100644
--- a/usr/src/cmd/awk/lib.c
+++ b/usr/src/cmd/awk/lib.c
@@ -69,6 +69,8 @@ char *record;
size_t recsize = RECSIZE;
static char *fields;
static size_t fieldssize = RECSIZE;
+static char *rtbuf;
+static size_t rtbufsize = RECSIZE;
Cell **fldtab; /* pointers to Cells */
char inputFS[100] = " ";
@@ -256,11 +258,17 @@ int
readrec(char **pbuf, size_t *pbufsize, FILE *inf) /* read one record into buf */
{
int sep, c;
- char *rr, *buf = *pbuf;
+ char *rr, *rt, *buf = *pbuf;
size_t bufsize = *pbufsize;
char *rs = getsval(rsloc);
- if ((sep = *rs) == 0) {
+ if (rtbuf == NULL && (rtbuf = malloc(rtbufsize)) == NULL)
+ FATAL("out of memory in readrec");
+
+ rr = buf;
+ rt = rtbuf;
+
+ if ((sep = *rs) == '\0') {
sep = '\n';
/* skip leading \n's */
while ((c = getc(inf)) == '\n' && c != EOF)
@@ -268,30 +276,67 @@ readrec(char **pbuf, size_t *pbufsize, FILE *inf) /* read one record into buf */
if (c != EOF)
(void) ungetc(c, inf);
}
- for (rr = buf; ; ) {
- while ((c = getc(inf)) != sep && c != EOF) {
- if (rr-buf+1 > bufsize)
- if (!adjbuf(&buf, &bufsize, 1+rr-buf, recsize, &rr, "readrec 1"))
- FATAL("input record `%.30s...' too long", buf);
+ while ((c = getc(inf)) != EOF) {
+ if (c != sep) {
+ if (rr-buf+1 > bufsize) {
+ (void) adjbuf(&buf, &bufsize,
+ 1+rr-buf, recsize, &rr, "readrec1");
+ }
*rr++ = c;
+ continue;
}
- if (*rs == sep || c == EOF)
+
+ /*
+ * Ensure enough space for either a single separator
+ * character, or at least two '\n' chars (when RS is
+ * the empty string).
+ */
+ (void) adjbuf(&rtbuf, &rtbufsize,
+ 2+rt-rtbuf, recsize, &rt, "readrec2");
+
+ if (*rs == sep) {
+ *rt++ = sep;
break;
- if ((c = getc(inf)) == '\n' || c == EOF) /* 2 in a row */
+ }
+
+ if ((c = getc(inf)) == '\n') { /* 2 in a row */
+ *rt++ = '\n';
+ *rt++ = '\n';
+ while ((c = getc(inf)) == '\n' && c != EOF) {
+ /* Read any further \n's and add them to RT. */
+ (void) adjbuf(&rtbuf, &rtbufsize,
+ 1+rt-rtbuf, recsize, &rt, "readrec3");
+ *rt++ = '\n';
+ }
+ if (c != EOF)
+ (void) ungetc(c, inf);
break;
- if (!adjbuf(&buf, &bufsize, 2+rr-buf, recsize, &rr, "readrec 2"))
- FATAL("input record `%.30s...' too long", buf);
+ }
+
+ if (c == EOF) {
+ *rt++ = '\n';
+ break;
+ }
+
+ (void) adjbuf(&buf, &bufsize,
+ 2+rr-buf, recsize, &rr, "readrec4");
*rr++ = '\n';
*rr++ = c;
}
- if (!adjbuf(&buf, &bufsize, 1+rr-buf, recsize, &rr, "readrec 3"))
- FATAL("input record `%.30s...' too long", buf);
+ (void) adjbuf(&buf, &bufsize, 1+rr-buf, recsize, &rr, "readrec5");
+ (void) adjbuf(&rtbuf, &rtbufsize, 1+rt-rtbuf, recsize, &rt, "readrec6");
*rr = '\0';
+ *rt = '\0';
dprintf(("readrec saw <%s>, returns %d\n",
buf, c == EOF && rr == buf ? 0 : 1));
*pbuf = buf;
*pbufsize = bufsize;
- return (c == EOF && rr == buf ? 0 : 1);
+ if (c == EOF && rr == buf) {
+ return (0);
+ } else {
+ (void) setsval(rtloc, rtbuf);
+ return (1);
+ }
}
/* get ARGV[n] */
diff --git a/usr/src/cmd/awk/tran.c b/usr/src/cmd/awk/tran.c
index 417bede66d..ba9a685d93 100644
--- a/usr/src/cmd/awk/tran.c
+++ b/usr/src/cmd/awk/tran.c
@@ -88,6 +88,7 @@ Cell *fnrloc; /* FNR */
Cell *ofsloc; /* OFS */
Cell *orsloc; /* ORS */
Cell *rsloc; /* RS */
+Cell *rtloc; /* RT */
Array *ARGVtab; /* symbol table containing ARGV[...] */
Array *ENVtab; /* symbol table containing ENVIRON[...] */
Cell *rstartloc; /* RSTART */
@@ -132,6 +133,7 @@ syminit(void) /* initialize symbol table with builtin vars */
FS = &fsloc->sval;
rsloc = setsymtab("RS", "\n", 0.0, STR|DONTFREE, symtab);
RS = &rsloc->sval;
+ rtloc = setsymtab("RT", "", 0.0, STR|DONTFREE, symtab);
ofsloc = setsymtab("OFS", " ", 0.0, STR|DONTFREE, symtab);
OFS = &ofsloc->sval;
orsloc = setsymtab("ORS", "\n", 0.0, STR|DONTFREE, symtab);
diff --git a/usr/src/man/man1/nawk.1 b/usr/src/man/man1/nawk.1
index 7734155f19..5fec573a52 100644
--- a/usr/src/man/man1/nawk.1
+++ b/usr/src/man/man1/nawk.1
@@ -711,6 +711,20 @@ The subscript separator string for multi-dimensional arrays. The default value
is \fB\e034\fR\&.
.RE
+.SS "/usr/bin/nawk"
+.LP
+The following variable is supported for \fB/usr/bin/nawk\fR only:
+.sp
+.ne 2
+.na
+\fB\fBRT\fR\fR
+.ad
+.RS 12n
+The record terminator for the most recent record read. For most records this
+will be the same value as \fBRS\fR. At the end of a file with no trailing
+separator value, though, this will be set to the empty string (\fB""\fR).
+.RE
+
.SS "Regular Expressions"
.LP
The \fBnawk\fR utility makes use of the extended regular expression notation
diff --git a/usr/src/pkg/manifests/system-test-utiltest.mf b/usr/src/pkg/manifests/system-test-utiltest.mf
index b056feb9cd..fbfb38267b 100644
--- a/usr/src/pkg/manifests/system-test-utiltest.mf
+++ b/usr/src/pkg/manifests/system-test-utiltest.mf
@@ -1066,6 +1066,7 @@ file path=opt/util-tests/tests/awk/tests/T.overflow mode=0555
file path=opt/util-tests/tests/awk/tests/T.re mode=0555
file path=opt/util-tests/tests/awk/tests/T.recache mode=0555
file path=opt/util-tests/tests/awk/tests/T.redir mode=0555
+file path=opt/util-tests/tests/awk/tests/T.rt mode=0555
file path=opt/util-tests/tests/awk/tests/T.split mode=0555
file path=opt/util-tests/tests/awk/tests/T.sub mode=0555
file path=opt/util-tests/tests/awk/tests/T.system mode=0555
diff --git a/usr/src/test/util-tests/tests/awk/tests/T.rt b/usr/src/test/util-tests/tests/awk/tests/T.rt
new file mode 100755
index 0000000000..585f3ca3e1
--- /dev/null
+++ b/usr/src/test/util-tests/tests/awk/tests/T.rt
@@ -0,0 +1,99 @@
+#!/bin/bash
+
+if [[ -z "$AWK" || -z "$WORKDIR" ]]; then
+ printf '$AWK and $WORKDIR must be set\n' >&2
+ exit 1
+fi
+
+TEMP0=$WORKDIR/test.temp.0
+TEMP1=$WORKDIR/test.temp.1
+TEMP2=$WORKDIR/test.temp.2
+
+RESULT=0
+
+fail() {
+ echo "$1" >&2
+ RESULT=1
+}
+
+echo T.rt: tests for the RT variable
+
+$AWK 'BEGIN { print (RT == "" ? "true" : "false"); }' > $TEMP1
+printf 'true\n' > $TEMP2
+diff $TEMP1 $TEMP2 || fail 'BAD: T.rt RT is set to "" in BEGIN'
+
+printf 'a\n' > $TEMP0
+$AWK '{ print (RT == "\n" ? "true" : "false"); }' $TEMP0 > $TEMP1
+printf 'true\n' > $TEMP2
+diff $TEMP1 $TEMP2 || fail 'BAD: T.rt RT is set to "\n"'
+
+printf 'a' > $TEMP0
+$AWK '{ print (RT == "" ? "true" : "false"); }' $TEMP0 > $TEMP1
+printf 'true\n' > $TEMP2
+diff $TEMP1 $TEMP2 || fail 'BAD: T.rt RT is set to ""'
+
+$AWK 'BEGIN { "echo hello" | getline; print (RT == "\n" ? "true" : "false"); }' > $TEMP1
+printf 'true\n' > $TEMP2
+diff $TEMP1 $TEMP2 || fail 'BAD: T.rt RT is set to "\n" (getline)'
+
+$AWK 'BEGIN { "printf a" | getline; print (RT == "" ? "true" : "false"); }' > $TEMP1
+printf 'true\n' > $TEMP2
+diff $TEMP1 $TEMP2 || fail 'BAD: T.rt RT is set to "" (getline)'
+
+$AWK 'BEGIN { "echo hello" | getline v; print (RT == "\n" ? "true" : "false"); }' > $TEMP1
+printf 'true\n' > $TEMP2
+diff $TEMP1 $TEMP2 || fail 'BAD: T.rt RT is set to "\n" (getline var)'
+
+$AWK 'BEGIN {
+ RT = "foo";
+ getline < "/etc/passwd";
+ print (RT == "\n" ? "true" : "false");
+}' > $TEMP1
+printf 'true\n' > $TEMP2
+diff $TEMP1 $TEMP2 || fail 'BAD: T.rt RT is set to "\n" (getline < file)'
+
+$AWK 'BEGIN {
+ RT = "foo";
+ getline v < "/etc/passwd";
+ print (RT == "\n" ? "true" : "false");
+}' > $TEMP1
+printf 'true\n' > $TEMP2
+diff $TEMP1 $TEMP2 || fail 'BAD: T.rt RT is set to "\n" (getline var < file)'
+
+# Single newline at end
+printf '\n\n\n\n\na\n\na b\na b c d\nq r s t u\n\n\n\n\nv w x y z\n' > $TEMP0
+$AWK 'BEGIN { RS = ""; } { print NF, length(RT); }' $TEMP0 > $TEMP1
+printf '1 2\n11 5\n5 1\n' > $TEMP2
+diff $TEMP1 $TEMP2 || fail 'BAD: T.rt empty RS variable (1 newline at end)'
+
+# Two newlines at end
+printf '\n\n\n\n\na\n\na b\na b c d\nq r s t u\n\n\n\n\nv w x y z\n\n' > $TEMP0
+$AWK 'BEGIN { RS = ""; } { print NF, length(RT); }' $TEMP0 > $TEMP1
+printf '1 2\n11 5\n5 2\n' > $TEMP2
+diff $TEMP1 $TEMP2 || fail 'BAD: T.rt empty RS variable (2 newlines at end)'
+
+# Multiple newlines at end
+printf 'a\n\na b\na b c d\nq r s t u\n\n\n\n\nv w x y z\n\n\n\n' > $TEMP0
+$AWK 'BEGIN { RS = ""; } { print NF, length(RT); }' $TEMP0 > $TEMP1
+printf '1 2\n11 5\n5 4\n' > $TEMP2
+diff $TEMP1 $TEMP2 || fail 'BAD: T.rt empty RS variable (many newlines at end)'
+
+# No newlines at end
+printf 'a\n\na b\na b c d\nq r s t u\n\n\n\n\nv w x y z' > $TEMP0
+$AWK 'BEGIN { RS = ""; } { print NF, length(RT); }' $TEMP0 > $TEMP1
+printf '1 2\n11 5\n5 0\n' > $TEMP2
+diff $TEMP1 $TEMP2 || fail 'BAD: T.rt empty RS variable (no ending newline)'
+
+# Non-newline separators
+
+printf 'a\036' > $TEMP0
+$AWK 'BEGIN { RS="\036" } { print (RT == "\036" ? "true" : "false"), length(RS); }' $TEMP0 > $TEMP1
+printf 'true 1\n' > $TEMP2
+diff $TEMP1 $TEMP2 || fail 'BAD: T.rt RT is set to "\036" (RS="\036")'
+
+printf 'a' > $TEMP0
+$AWK 'BEGIN { RS="\036" } { print (RT == "" ? "true" : "false"), length(RS); }' $TEMP0 > $TEMP1
+printf 'true 1\n' > $TEMP2
+diff $TEMP1 $TEMP2 || fail 'BAD: T.rt RT is set to "" (RS="\036")'
+
+exit $RESULT