summaryrefslogtreecommitdiff
path: root/usr/src
diff options
context:
space:
mode:
authorGarrett D'Amore <garrett@nexenta.com>2010-12-15 17:12:35 -0800
committerGarrett D'Amore <garrett@nexenta.com>2010-12-15 17:12:35 -0800
commit84441f85b19f6b8080883f30109e58e43c893709 (patch)
tree2cda098e30c31b5014e9ef70863a6aa23bd5a042 /usr/src
parent87a4464e3e8a802f4f55fba5dff7ac7d4f66d309 (diff)
downloadillumos-joyent-84441f85b19f6b8080883f30109e58e43c893709.tar.gz
508 od doesn't set text domain
509 need an open source sed(1) replacement 516 libc regex misses \< and \> word delimiters Reviewed by: gwr@nexenta.com Reviewed by: roland.mainz@nexenta.com Approved by: gwr@nexenta.com
Diffstat (limited to 'usr/src')
-rw-r--r--usr/src/Makefile.lint1
-rw-r--r--usr/src/cmd/Makefile1
-rw-r--r--usr/src/cmd/mdb/tools/scripts/tigen.sh3
-rw-r--r--usr/src/cmd/od/od.c6
-rw-r--r--usr/src/cmd/sed/Makefile53
-rw-r--r--usr/src/cmd/sed/POSIX204
-rw-r--r--usr/src/cmd/sed/THIRDPARTYLICENSE31
-rw-r--r--usr/src/cmd/sed/THIRDPARTYLICENSE.descrip1
-rw-r--r--usr/src/cmd/sed/compile.c934
-rw-r--r--usr/src/cmd/sed/defs.h150
-rw-r--r--usr/src/cmd/sed/extern.h67
-rw-r--r--usr/src/cmd/sed/main.c520
-rw-r--r--usr/src/cmd/sed/misc.c82
-rw-r--r--usr/src/cmd/sed/process.c767
-rw-r--r--usr/src/cmd/sed/sed.1636
-rw-r--r--usr/src/cmd/sed/sed.txt391
-rw-r--r--usr/src/head/regex.h1
-rw-r--r--usr/src/lib/libc/port/locale/engine.c7
-rw-r--r--usr/src/lib/libc/port/locale/regcomp.c18
-rw-r--r--usr/src/pkg/manifests/SUNWcs.mf2
-rw-r--r--usr/src/pkg/manifests/system-xopen-xcu4.mf2
21 files changed, 3868 insertions, 9 deletions
diff --git a/usr/src/Makefile.lint b/usr/src/Makefile.lint
index 6ccb30c9e4..879ce1d1f5 100644
--- a/usr/src/Makefile.lint
+++ b/usr/src/Makefile.lint
@@ -269,6 +269,7 @@ COMMON_SUBDIRS = \
cmd/savecore \
cmd/sbdadm \
cmd/sdpadm \
+ cmd/sed \
cmd/setpgrp \
cmd/smbios \
cmd/sgs \
diff --git a/usr/src/cmd/Makefile b/usr/src/cmd/Makefile
index 8d94808b35..839fe278c0 100644
--- a/usr/src/cmd/Makefile
+++ b/usr/src/cmd/Makefile
@@ -357,6 +357,7 @@ COMMON_SUBDIRS= \
scsi \
sdiff \
sdpadm \
+ sed \
sendmail \
setfacl \
setmnt \
diff --git a/usr/src/cmd/mdb/tools/scripts/tigen.sh b/usr/src/cmd/mdb/tools/scripts/tigen.sh
index 2f3ff0e997..58c97d8d8a 100644
--- a/usr/src/cmd/mdb/tools/scripts/tigen.sh
+++ b/usr/src/cmd/mdb/tools/scripts/tigen.sh
@@ -24,7 +24,6 @@
# Copyright 2004 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
-#ident "%Z%%M% %I% %E% SMI"
#
#
@@ -122,7 +121,7 @@ for term in $termlist; do
$verbose && echo "loading terminfo for $term ... \c" >& 2
echo "static const termio_attr_t ${cterm}_attrs[] = {"
- sed -n '/termio_attrs\[\] = /,/^\}/p' $termio_c | \
+ sed -n '/termio_attrs\[\] = /,/^}/p' $termio_c | \
sed -n \ 's/{ "\([a-z0-9]*\)", \([A-Z_]*\),.*/\1 \2/p' | \
while read attr type; do
diff --git a/usr/src/cmd/od/od.c b/usr/src/cmd/od/od.c
index dab2c167cd..30e140ba96 100644
--- a/usr/src/cmd/od/od.c
+++ b/usr/src/cmd/od/od.c
@@ -39,6 +39,11 @@
#define _(x) gettext(x)
+
+#ifndef TEXT_DOMAIN
+#define TEXT_DOMAIN "SYS_TEST"
+#endif
+
/* address format */
static char *afmt = "%07llo";
static char *cfmt = " ";
@@ -551,6 +556,7 @@ main(int argc, char **argv)
input = stdin;
(void) setlocale(LC_ALL, "");
+ (void) textdomain(TEXT_DOMAIN);
while ((c = getopt(argc, argv, "A:bCcdDfFj:N:oOsSxXvt:")) != EOF) {
switch (c) {
diff --git a/usr/src/cmd/sed/Makefile b/usr/src/cmd/sed/Makefile
new file mode 100644
index 0000000000..a2feb2ab24
--- /dev/null
+++ b/usr/src/cmd/sed/Makefile
@@ -0,0 +1,53 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2010 Nexenta Systems, Inc. All rights reserved.
+#
+
+
+PROG= sed
+XPG4PROG= sed
+OBJS= main.o compile.o misc.o process.o
+SRCS= $(OBJS:%.o=%.c)
+POFILES= $(OBJS:%.o=%.po)
+
+include ../Makefile.cmd
+
+CPPFLAGS += -D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64
+
+CFLAGS += $(CCVERBOSE)
+
+.KEEP_STATE:
+
+
+all: $(PROG)
+
+$(PROG): $(OBJS)
+ $(LINK.c) $(OBJS) -o $@ $(LDLIBS)
+ $(POST_PROCESS)
+
+$(POFILE): $(POFILES)
+ $(RM) $@
+ $(CAT) $(POFILES) > $@
+
+$(ROOTXPG4PROG):
+ $(RM) $@
+ $(LN) -s ../../bin/$(PROG) $@
+
+install: all $(DIRS) $(ROOTPROG) $(ROOTXPG4PROG)
+
+clean:
+ $(RM) $(OBJS) $(POFILES)
+
+lint: lint_SRCS
+
+include ../Makefile.targ
diff --git a/usr/src/cmd/sed/POSIX b/usr/src/cmd/sed/POSIX
new file mode 100644
index 0000000000..239acf8e48
--- /dev/null
+++ b/usr/src/cmd/sed/POSIX
@@ -0,0 +1,204 @@
+# @(#)POSIX 8.1 (Berkeley) 6/6/93
+# $FreeBSD$
+
+Comments on the IEEE P1003.2 Draft 12
+ Part 2: Shell and Utilities
+ Section 4.55: sed - Stream editor
+
+Diomidis Spinellis <dds@doc.ic.ac.uk>
+Keith Bostic <bostic@cs.berkeley.edu>
+
+In the following paragraphs, "wrong" usually means "inconsistent with
+historic practice", as most of the following comments refer to
+undocumented inconsistencies between the historical versions of sed and
+the POSIX 1003.2 standard. All the comments are notes taken while
+implementing a POSIX-compatible version of sed, and should not be
+interpreted as official opinions or criticism towards the POSIX committee.
+All uses of "POSIX" refer to section 4.55, Draft 12 of POSIX 1003.2.
+
+ 1. 32V and BSD derived implementations of sed strip the text
+ arguments of the a, c and i commands of their initial blanks,
+ i.e.
+
+ #!/bin/sed -f
+ a\
+ foo\
+ \ indent\
+ bar
+
+ produces:
+
+ foo
+ indent
+ bar
+
+ POSIX does not specify this behavior as the System V versions of
+ sed do not do this stripping. The argument against stripping is
+ that it is difficult to write sed scripts that have leading blanks
+ if they are stripped. The argument for stripping is that it is
+ difficult to write readable sed scripts unless indentation is allowed
+ and ignored, and leading whitespace is obtainable by entering a
+ backslash in front of it. This implementation follows the BSD
+ historic practice.
+
+ 2. Historical versions of sed required that the w flag be the last
+ flag to an s command as it takes an additional argument. This
+ is obvious, but not specified in POSIX.
+
+ 3. Historical versions of sed required that whitespace follow a w
+ flag to an s command. This is not specified in POSIX. This
+ implementation permits whitespace but does not require it.
+
+ 4. Historical versions of sed permitted any number of whitespace
+ characters to follow the w command. This is not specified in
+ POSIX. This implementation permits whitespace but does not
+ require it.
+
+ 5. The rule for the l command differs from historic practice. Table
+ 2-15 includes the various ANSI C escape sequences, including \\
+ for backslash. Some historical versions of sed displayed two
+ digit octal numbers, too, not three as specified by POSIX. POSIX
+ is a cleanup, and is followed by this implementation.
+
+ 6. The POSIX specification for ! does not specify that for a single
+ command the command must not contain an address specification
+ whereas the command list can contain address specifications. The
+ specification for ! implies that "3!/hello/p" works, and it never
+ has, historically. Note,
+
+ 3!{
+ /hello/p
+ }
+
+ does work.
+
+ 7. POSIX does not specify what happens with consecutive ! commands
+ (e.g. /foo/!!!p). Historic implementations allow any number of
+ !'s without changing the behaviour. (It seems logical that each
+ one might reverse the behaviour.) This implementation follows
+ historic practice.
+
+ 8. Historic versions of sed permitted commands to be separated
+ by semi-colons, e.g. 'sed -ne '1p;2p;3q' printed the first
+ three lines of a file. This is not specified by POSIX.
+ Note, the ; command separator is not allowed for the commands
+ a, c, i, w, r, :, b, t, # and at the end of a w flag in the s
+ command. This implementation follows historic practice and
+ implements the ; separator.
+
+ 9. Historic versions of sed terminated the script if EOF was reached
+ during the execution of the 'n' command, i.e.:
+
+ sed -e '
+ n
+ i\
+ hello
+ ' </dev/null
+
+ did not produce any output. POSIX does not specify this behavior.
+ This implementation follows historic practice.
+
+10. Deleted.
+
+11. Historical implementations do not output the change text of a c
+ command in the case of an address range whose first line number
+ is greater than the second (e.g. 3,1). POSIX requires that the
+ text be output. Since the historic behavior doesn't seem to have
+ any particular purpose, this implementation follows the POSIX
+ behavior.
+
+12. POSIX does not specify whether address ranges are checked and
+ reset if a command is not executed due to a jump. The following
+ program will behave in different ways depending on whether the
+ 'c' command is triggered at the third line, i.e. will the text
+ be output even though line 3 of the input will never logically
+ encounter that command.
+
+ 2,4b
+ 1,3c\
+ text
+
+ Historic implementations did not output the text in the above
+ example. Therefore it was believed that a range whose second
+ address was never matched extended to the end of the input.
+ However, the current practice adopted by this implementation,
+ as well as by those from GNU and SUN, is as follows: The text
+ from the 'c' command still isn't output because the second address
+ isn't actually matched; but the range is reset after all if its
+ second address is a line number. In the above example, only the
+ first line of the input will be deleted.
+
+13. Historical implementations allow an output suppressing #n at the
+ beginning of -e arguments as well as in a script file. POSIX
+ does not specify this. This implementation follows historical
+ practice.
+
+14. POSIX does not explicitly specify how sed behaves if no script is
+ specified. Since the sed Synopsis permits this form of the command,
+ and the language in the Description section states that the input
+ is output, it seems reasonable that it behave like the cat(1)
+ command. Historic sed implementations behave differently for "ls |
+ sed", where they produce no output, and "ls | sed -e#", where they
+ behave like cat. This implementation behaves like cat in both cases.
+
+15. The POSIX requirement to open all w files at the beginning makes
+ sed behave nonintuitively when the w commands are preceded by
+ addresses or are within conditional blocks. This implementation
+ follows historic practice and POSIX, by default, and provides the
+ -a option which opens the files only when they are needed.
+
+16. POSIX does not specify how escape sequences other than \n and \D
+ (where D is the delimiter character) are to be treated. This is
+ reasonable, however, it also doesn't state that the backslash is
+ to be discarded from the output regardless. A strict reading of
+ POSIX would be that "echo xyz | sed s/./\a" would display "\ayz".
+ As historic sed implementations always discarded the backslash,
+ this implementation does as well.
+
+17. POSIX specifies that an address can be "empty". This implies
+ that constructs like ",d" or "1,d" and ",5d" are allowed. This
+ is not true for historic implementations or this implementation
+ of sed.
+
+18. The b t and : commands are documented in POSIX to ignore leading
+ white space, but no mention is made of trailing white space.
+ Historic implementations of sed assigned different locations to
+ the labels "x" and "x ". This is not useful, and leads to subtle
+ programming errors, but it is historic practice and changing it
+ could theoretically break working scripts. This implementation
+ follows historic practice.
+
+19. Although POSIX specifies that reading from files that do not exist
+ from within the script must not terminate the script, it does not
+ specify what happens if a write command fails. Historic practice
+ is to fail immediately if the file cannot be opened or written.
+ This implementation follows historic practice.
+
+20. Historic practice is that the \n construct can be used for either
+ string1 or string2 of the y command. This is not specified by
+ POSIX. This implementation follows historic practice.
+
+21. Deleted.
+
+22. Historic implementations of sed ignore the RE delimiter characters
+ within character classes. This is not specified in POSIX. This
+ implementation follows historic practice.
+
+23. Historic implementations handle empty RE's in a special way: the
+ empty RE is interpreted as if it were the last RE encountered,
+ whether in an address or elsewhere. POSIX does not document this
+ behavior. For example the command:
+
+ sed -e /abc/s//XXX/
+
+ substitutes XXX for the pattern abc. The semantics of "the last
+ RE" can be defined in two different ways:
+
+ 1. The last RE encountered when compiling (lexical/static scope).
+ 2. The last RE encountered while running (dynamic scope).
+
+ While many historical implementations fail on programs depending
+ on scope differences, the SunOS version exhibited dynamic scope
+ behaviour. This implementation does dynamic scoping, as this seems
+ the most useful and in order to remain consistent with historical
+ practice.
diff --git a/usr/src/cmd/sed/THIRDPARTYLICENSE b/usr/src/cmd/sed/THIRDPARTYLICENSE
new file mode 100644
index 0000000000..1ff25a60f8
--- /dev/null
+++ b/usr/src/cmd/sed/THIRDPARTYLICENSE
@@ -0,0 +1,31 @@
+Copyright 2010 Nexenta Systems, Inc. All rights reserved.
+Copyright (c) 1992 Diomidis Spinellis.
+Copyright (c) 1992, 1993, 1994
+ The Regents of the University of California. All rights reserved.
+
+This code is derived from software contributed to Berkeley by
+Diomidis Spinellis of Imperial College, University of London.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+1. Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+4. Neither the name of the University nor the names of its contributors
+ may be used to endorse or promote products derived from this software
+ without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGE.
diff --git a/usr/src/cmd/sed/THIRDPARTYLICENSE.descrip b/usr/src/cmd/sed/THIRDPARTYLICENSE.descrip
new file mode 100644
index 0000000000..0966e485b2
--- /dev/null
+++ b/usr/src/cmd/sed/THIRDPARTYLICENSE.descrip
@@ -0,0 +1 @@
+SED - STREAM EDITOR
diff --git a/usr/src/cmd/sed/compile.c b/usr/src/cmd/sed/compile.c
new file mode 100644
index 0000000000..8eded89167
--- /dev/null
+++ b/usr/src/cmd/sed/compile.c
@@ -0,0 +1,934 @@
+/*
+ * Copyright 2010 Nexenta Systems, Inc. All rights reserved.
+ * Copyright (c) 1992 Diomidis Spinellis.
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Diomidis Spinellis of Imperial College, University of London.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include <ctype.h>
+#include <err.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <regex.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <wchar.h>
+#include <libintl.h>
+#include <note.h>
+
+#include "defs.h"
+#include "extern.h"
+
+#define LHSZ 128
+#define LHMASK (LHSZ - 1)
+static struct labhash {
+ struct labhash *lh_next;
+ uint_t lh_hash;
+ struct s_command *lh_cmd;
+ int lh_ref;
+} *labels[LHSZ];
+
+static char *compile_addr(char *, struct s_addr *);
+static char *compile_ccl(char **, char *);
+static char *compile_delimited(char *, char *, int);
+static char *compile_flags(char *, struct s_subst *);
+static regex_t *compile_re(char *, int);
+static char *compile_subst(char *, struct s_subst *);
+static char *compile_text(void);
+static char *compile_tr(char *, struct s_tr **);
+static struct s_command
+ **compile_stream(struct s_command **);
+static char *duptoeol(char *, const char *);
+static void enterlabel(struct s_command *);
+static struct s_command
+ *findlabel(char *);
+static void fixuplabel(struct s_command *, struct s_command *);
+static void uselabel(void);
+
+/*
+ * Command specification. This is used to drive the command parser.
+ */
+struct s_format {
+ char code; /* Command code */
+ int naddr; /* Number of address args */
+ enum e_args args; /* Argument type */
+};
+
+static struct s_format cmd_fmts[] = {
+ {'{', 2, GROUP},
+ {'}', 0, ENDGROUP},
+ {'a', 1, TEXT},
+ {'b', 2, BRANCH},
+ {'c', 2, TEXT},
+ {'d', 2, EMPTY},
+ {'D', 2, EMPTY},
+ {'g', 2, EMPTY},
+ {'G', 2, EMPTY},
+ {'h', 2, EMPTY},
+ {'H', 2, EMPTY},
+ {'i', 1, TEXT},
+ {'l', 2, EMPTY},
+ {'n', 2, EMPTY},
+ {'N', 2, EMPTY},
+ {'p', 2, EMPTY},
+ {'P', 2, EMPTY},
+ {'q', 1, EMPTY},
+ {'r', 1, RFILE},
+ {'s', 2, SUBST},
+ {'t', 2, BRANCH},
+ {'w', 2, WFILE},
+ {'x', 2, EMPTY},
+ {'y', 2, TR},
+ {'!', 2, NONSEL},
+ {':', 0, LABEL},
+ {'#', 0, COMMENT},
+ {'=', 1, EMPTY},
+ {'\0', 0, COMMENT},
+};
+
+/* The compiled program. */
+struct s_command *prog;
+
+/*
+ * Compile the program into prog.
+ * Initialise appends.
+ */
+void
+compile(void)
+{
+ *compile_stream(&prog) = NULL;
+ fixuplabel(prog, NULL);
+ uselabel();
+ if (appendnum == 0)
+ appends = NULL;
+ else if ((appends = malloc(sizeof (struct s_appends) * appendnum)) ==
+ NULL)
+ err(1, "malloc");
+ if ((match = malloc((maxnsub + 1) * sizeof (regmatch_t))) == NULL)
+ err(1, "malloc");
+}
+
+#define EATSPACE() do { \
+ if (p) \
+ while (*p && isspace((unsigned char)*p)) \
+ p++; \
+ _NOTE(CONSTCOND) \
+} while (0)
+
+static struct s_command **
+compile_stream(struct s_command **link)
+{
+ char *p;
+ static char lbuf[_POSIX2_LINE_MAX + 1]; /* To save stack */
+ struct s_command *cmd, *cmd2, *stack;
+ struct s_format *fp;
+ char re[_POSIX2_LINE_MAX + 1];
+ int naddr; /* Number of addresses */
+
+ stack = 0;
+ for (;;) {
+ if ((p = cu_fgets(lbuf, sizeof (lbuf), NULL)) == NULL) {
+ if (stack != 0)
+ fatal(_("unexpected EOF (pending }'s)"));
+ return (link);
+ }
+
+semicolon: EATSPACE();
+ if (p) {
+ if (*p == '#' || *p == '\0')
+ continue;
+ else if (*p == ';') {
+ p++;
+ goto semicolon;
+ }
+ }
+ if ((*link = cmd = malloc(sizeof (struct s_command))) == NULL)
+ err(1, "malloc");
+ link = &cmd->next;
+ cmd->startline = cmd->nonsel = 0;
+ /* First parse the addresses */
+ naddr = 0;
+
+/* Valid characters to start an address */
+#define addrchar(c) (strchr("0123456789/\\$", (c)))
+ if (addrchar(*p)) {
+ naddr++;
+ if ((cmd->a1 = malloc(sizeof (struct s_addr))) == NULL)
+ err(1, "malloc");
+ p = compile_addr(p, cmd->a1);
+ EATSPACE(); /* EXTENSION */
+ if (*p == ',') {
+ p++;
+ EATSPACE(); /* EXTENSION */
+ naddr++;
+ if ((cmd->a2 = malloc(sizeof (struct s_addr)))
+ == NULL)
+ err(1, "malloc");
+ p = compile_addr(p, cmd->a2);
+ EATSPACE();
+ } else
+ cmd->a2 = 0;
+ } else
+ cmd->a1 = cmd->a2 = 0;
+
+nonsel: /* Now parse the command */
+ if (!*p)
+ fatal(_("command expected"));
+ cmd->code = *p;
+ for (fp = cmd_fmts; fp->code; fp++)
+ if (fp->code == *p)
+ break;
+ if (!fp->code)
+ fatal(_("invalid command code %c"), *p);
+ if (naddr > fp->naddr)
+ fatal(_("command %c expects up to %d address(es), "
+ "found %d"), *p, fp->naddr, naddr);
+ switch (fp->args) {
+ case NONSEL: /* ! */
+ p++;
+ EATSPACE();
+ cmd->nonsel = 1;
+ goto nonsel;
+ case GROUP: /* { */
+ p++;
+ EATSPACE();
+ cmd->next = stack;
+ stack = cmd;
+ link = &cmd->u.c;
+ if (*p)
+ goto semicolon;
+ break;
+ case ENDGROUP:
+ /*
+ * Short-circuit command processing, since end of
+ * group is really just a noop.
+ */
+ cmd->nonsel = 1;
+ if (stack == 0)
+ fatal(_("unexpected }"));
+ cmd2 = stack;
+ stack = cmd2->next;
+ cmd2->next = cmd;
+ /*FALLTHROUGH*/
+ case EMPTY: /* d D g G h H l n N p P q x = \0 */
+ p++;
+ EATSPACE();
+ if (*p == ';') {
+ p++;
+ link = &cmd->next;
+ goto semicolon;
+ }
+ if (*p)
+ fatal(_("extra characters at the end of %c "
+ "command"), cmd->code);
+ break;
+ case TEXT: /* a c i */
+ p++;
+ EATSPACE();
+ if (*p != '\\')
+ fatal(_("command %c expects \\ "
+ "followed by text"), cmd->code);
+ p++;
+ EATSPACE();
+ if (*p)
+ fatal(_("extra characters after \\ "
+ "at the end of %c command"),
+ cmd->code);
+ cmd->t = compile_text();
+ break;
+ case COMMENT: /* \0 # */
+ break;
+ case WFILE: /* w */
+ p++;
+ EATSPACE();
+ if (*p == '\0')
+ fatal(_("filename expected"));
+ cmd->t = duptoeol(p, "w command");
+ if (aflag)
+ cmd->u.fd = -1;
+ else if ((cmd->u.fd = open(p,
+ O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, 0666)) == -1)
+ err(1, "%s", p);
+ break;
+ case RFILE: /* r */
+ p++;
+ EATSPACE();
+ if (*p == '\0')
+ fatal(_("filename expected"));
+ else
+ cmd->t = duptoeol(p, "read command");
+ break;
+ case BRANCH: /* b t */
+ p++;
+ EATSPACE();
+ if (*p == '\0')
+ cmd->t = NULL;
+ else
+ cmd->t = duptoeol(p, "branch");
+ break;
+ case LABEL: /* : */
+ p++;
+ EATSPACE();
+ cmd->t = duptoeol(p, "label");
+ if (strlen(p) == 0)
+ fatal(_("empty label"));
+ enterlabel(cmd);
+ break;
+ case SUBST: /* s */
+ p++;
+ if (*p == '\0' || *p == '\\')
+ fatal(_("substitute pattern can not "
+ "be delimited by newline or backslash"));
+ if ((cmd->u.s = calloc(1, sizeof (struct s_subst))) ==
+ NULL)
+ err(1, "malloc");
+ p = compile_delimited(p, re, 0);
+ if (p == NULL)
+ fatal(_("unterminated substitute pattern"));
+
+ /* Compile RE with no case sensitivity temporarily */
+ if (*re == '\0')
+ cmd->u.s->re = NULL;
+ else
+ cmd->u.s->re = compile_re(re, 0);
+ --p;
+ p = compile_subst(p, cmd->u.s);
+ p = compile_flags(p, cmd->u.s);
+
+ /* Recompile RE with case sens. from "I" flag if any */
+ if (*re == '\0')
+ cmd->u.s->re = NULL;
+ else
+ cmd->u.s->re = compile_re(re, cmd->u.s->icase);
+ EATSPACE();
+ if (*p == ';') {
+ p++;
+ link = &cmd->next;
+ goto semicolon;
+ }
+ break;
+ case TR: /* y */
+ p++;
+ p = compile_tr(p, &cmd->u.y);
+ EATSPACE();
+ if (*p == ';') {
+ p++;
+ link = &cmd->next;
+ goto semicolon;
+ }
+ if (*p)
+ fatal(_("extra text at the end of a "
+ "transform command"));
+ break;
+ }
+ }
+}
+
+/*
+ * Get a delimited string. P points to the delimeter of the string; d points
+ * to a buffer area. Newline and delimiter escapes are processed; other
+ * escapes are ignored.
+ *
+ * Returns a pointer to the first character after the final delimiter or NULL
+ * in the case of a non-terminated string. The character array d is filled
+ * with the processed string.
+ */
+static char *
+compile_delimited(char *p, char *d, int is_tr)
+{
+ char c;
+
+ c = *p++;
+ if (c == '\0')
+ return (NULL);
+ else if (c == '\\')
+ fatal(_("\\ can not be used as a string delimiter"));
+ else if (c == '\n')
+ fatal(_("newline can not be used as a string delimiter"));
+ while (*p) {
+ if (*p == '[' && *p != c) {
+ if ((d = compile_ccl(&p, d)) == NULL)
+ fatal(_("unbalanced brackets ([])"));
+ continue;
+ } else if (*p == '\\' && p[1] == '[') {
+ *d++ = *p++;
+ } else if (*p == '\\' && p[1] == c)
+ p++;
+ else if (*p == '\\' && p[1] == 'n') {
+ *d++ = '\n';
+ p += 2;
+ continue;
+ } else if (*p == '\\' && p[1] == '\\') {
+ if (is_tr)
+ p++;
+ else
+ *d++ = *p++;
+ } else if (*p == c) {
+ *d = '\0';
+ return (p + 1);
+ }
+ *d++ = *p++;
+ }
+ return (NULL);
+}
+
+
+/* compile_ccl: expand a POSIX character class */
+static char *
+compile_ccl(char **sp, char *t)
+{
+ int c, d;
+ char *s = *sp;
+
+ *t++ = *s++;
+ if (*s == '^')
+ *t++ = *s++;
+ if (*s == ']')
+ *t++ = *s++;
+ for (; *s && (*t = *s) != ']'; s++, t++)
+ if (*s == '[' &&
+ ((d = *(s+1)) == '.' || d == ':' || d == '=')) {
+ *++t = *++s, t++, s++;
+ for (c = *s; (*t = *s) != ']' || c != d; s++, t++)
+ if ((c = *s) == '\0')
+ return (NULL);
+ }
+ return ((*s == ']') ? *sp = ++s, ++t : NULL);
+}
+
+/*
+ * Compiles the regular expression in RE and returns a pointer to the compiled
+ * regular expression.
+ * Cflags are passed to regcomp.
+ */
+static regex_t *
+compile_re(char *re, int case_insensitive)
+{
+ regex_t *rep;
+ int eval, flags;
+
+
+ flags = rflags;
+ if (case_insensitive)
+ flags |= REG_ICASE;
+ if ((rep = malloc(sizeof (regex_t))) == NULL)
+ err(1, "malloc");
+ if ((eval = regcomp(rep, re, flags)) != 0)
+ fatal(_("RE error: %s"), strregerror(eval, rep));
+ if (maxnsub < rep->re_nsub)
+ maxnsub = rep->re_nsub;
+ return (rep);
+}
+
+/*
+ * Compile the substitution string of a regular expression and set res to
+ * point to a saved copy of it. Nsub is the number of parenthesized regular
+ * expressions.
+ */
+static char *
+compile_subst(char *p, struct s_subst *s)
+{
+ static char lbuf[_POSIX2_LINE_MAX + 1];
+ int asize;
+ uintptr_t size;
+ uchar_t ref;
+ char c, *text, *op, *sp;
+ int more = 1, sawesc = 0;
+
+ c = *p++; /* Terminator character */
+ if (c == '\0')
+ return (NULL);
+
+ s->maxbref = 0;
+ s->linenum = linenum;
+ asize = 2 * _POSIX2_LINE_MAX + 1;
+ if ((text = malloc(asize)) == NULL)
+ err(1, "malloc");
+ size = 0;
+ do {
+ op = sp = text + size;
+ for (; *p; p++) {
+ if (*p == '\\' || sawesc) {
+ /*
+ * If this is a continuation from the last
+ * buffer, we won't have a character to
+ * skip over.
+ */
+ if (sawesc)
+ sawesc = 0;
+ else
+ p++;
+
+ if (*p == '\0') {
+ /*
+ * This escaped character is continued
+ * in the next part of the line. Note
+ * this fact, then cause the loop to
+ * exit w/ normal EOL case and reenter
+ * above with the new buffer.
+ */
+ sawesc = 1;
+ p--;
+ continue;
+ } else if (strchr("123456789", *p) != NULL) {
+ *sp++ = '\\';
+ ref = *p - '0';
+ if (s->re != NULL &&
+ ref > s->re->re_nsub)
+ fatal(_("not defined in "
+ "the RE: \\%c"), *p);
+ if (s->maxbref < ref)
+ s->maxbref = ref;
+ } else if (*p == '&' || *p == '\\')
+ *sp++ = '\\';
+ } else if (*p == c) {
+ if (*++p == '\0' && more) {
+ if (cu_fgets(lbuf, sizeof (lbuf),
+ &more))
+ p = lbuf;
+ }
+ *sp++ = '\0';
+ size += (uintptr_t)sp - (uintptr_t)op;
+ if ((s->new = realloc(text, size)) == NULL)
+ err(1, "realloc");
+ return (p);
+ } else if (*p == '\n') {
+ fatal(_("unescaped newline inside "
+ "substitute pattern"));
+ /* NOTREACHED */
+ }
+ *sp++ = *p;
+ }
+ size += (uintptr_t)sp - (uintptr_t)op;
+ if (asize - size < _POSIX2_LINE_MAX + 1) {
+ asize *= 2;
+ if ((text = realloc(text, asize)) == NULL)
+ err(1, "realloc");
+ }
+ } while (cu_fgets(p = lbuf, sizeof (lbuf), &more));
+ fatal(_("unterminated substitute in regular expression"));
+ return (NULL);
+}
+
+/*
+ * Compile the flags of the s command
+ */
+static char *
+compile_flags(char *p, struct s_subst *s)
+{
+ int gn; /* True if we have seen g or n */
+ unsigned long nval;
+ char wfile[_POSIX2_LINE_MAX + 1], *q;
+
+ s->n = 1; /* Default */
+ s->p = 0;
+ s->wfile = NULL;
+ s->wfd = -1;
+ s->icase = 0;
+ gn = 0;
+ for (;;) {
+ EATSPACE(); /* EXTENSION */
+ switch (*p) {
+ case 'g':
+ if (gn)
+ fatal(_("more than one number or "
+ "'g' in substitute flags"));
+ gn = 1;
+ s->n = 0;
+ break;
+ case '\0':
+ case '\n':
+ case ';':
+ return (p);
+ case 'p':
+ s->p = 1;
+ break;
+ case 'I':
+ s->icase = 1;
+ break;
+ case '1': case '2': case '3':
+ case '4': case '5': case '6':
+ case '7': case '8': case '9':
+ if (gn)
+ fatal(_("more than one number or "
+ "'g' in substitute flags"));
+ gn = 1;
+ errno = 0;
+ nval = strtol(p, &p, 10);
+ if (errno == ERANGE || nval > INT_MAX)
+ fatal(_("overflow in the 'N' substitute flag"));
+ s->n = nval;
+ p--;
+ break;
+ case 'w':
+ p++;
+#ifdef HISTORIC_PRACTICE
+ if (*p != ' ') {
+ fatal(_("space missing before w wfile"));
+ return (p);
+ }
+#endif
+ EATSPACE();
+ q = wfile;
+ while (*p) {
+ if (*p == '\n')
+ break;
+ *q++ = *p++;
+ }
+ *q = '\0';
+ if (q == wfile)
+ fatal(_("no wfile specified"));
+ s->wfile = strdup(wfile);
+ if (!aflag && (s->wfd = open(wfile,
+ O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, 0666)) == -1)
+ err(1, "%s", wfile);
+ return (p);
+ default:
+ fatal(_("bad flag in substitute command: '%c'"), *p);
+ break;
+ }
+ p++;
+ }
+}
+
+/*
+ * Compile a translation set of strings into a lookup table.
+ */
+static char *
+compile_tr(char *p, struct s_tr **py)
+{
+ struct s_tr *y;
+ int i;
+ const char *op, *np;
+ char old[_POSIX2_LINE_MAX + 1];
+ char new[_POSIX2_LINE_MAX + 1];
+ size_t oclen, oldlen, nclen, newlen;
+ mbstate_t mbs1, mbs2;
+
+ if ((*py = y = malloc(sizeof (*y))) == NULL)
+ err(1, NULL);
+ y->multis = NULL;
+ y->nmultis = 0;
+
+ if (*p == '\0' || *p == '\\')
+ fatal(_("transform pattern can not be delimited by "
+ "newline or backslash"));
+ p = compile_delimited(p, old, 1);
+ if (p == NULL)
+ fatal(_("unterminated transform source string"));
+ p = compile_delimited(p - 1, new, 1);
+ if (p == NULL)
+ fatal(_("unterminated transform target string"));
+ EATSPACE();
+ op = old;
+ oldlen = mbsrtowcs(NULL, &op, 0, NULL);
+ if (oldlen == (size_t)-1)
+ err(1, NULL);
+ np = new;
+ newlen = mbsrtowcs(NULL, &np, 0, NULL);
+ if (newlen == (size_t)-1)
+ err(1, NULL);
+ if (newlen != oldlen)
+ fatal(_("transform strings are not the same length"));
+ if (MB_CUR_MAX == 1) {
+ /*
+ * The single-byte encoding case is easy: generate a
+ * lookup table.
+ */
+ for (i = 0; i <= UCHAR_MAX; i++)
+ y->bytetab[i] = (char)i;
+ for (; *op; op++, np++)
+ y->bytetab[(uchar_t)*op] = *np;
+ } else {
+ /*
+ * Multi-byte encoding case: generate a lookup table as
+ * above, but only for single-byte characters. The first
+ * bytes of multi-byte characters have their lookup table
+ * entries set to 0, which causes do_tr() to search through
+ * an auxiliary vector of multi-byte mappings.
+ */
+ (void) memset(&mbs1, 0, sizeof (mbs1));
+ (void) memset(&mbs2, 0, sizeof (mbs2));
+ for (i = 0; i <= UCHAR_MAX; i++)
+ y->bytetab[i] = (btowc(i) != WEOF) ? (uchar_t)i : 0;
+ while (*op != '\0') {
+ oclen = mbrlen(op, MB_LEN_MAX, &mbs1);
+ if (oclen == (size_t)-1 || oclen == (size_t)-2)
+ errx(1, "%s", strerror(EILSEQ));
+ nclen = mbrlen(np, MB_LEN_MAX, &mbs2);
+ if (nclen == (size_t)-1 || nclen == (size_t)-2)
+ errx(1, "%s", strerror(EILSEQ));
+ if (oclen == 1 && nclen == 1)
+ y->bytetab[(uchar_t)*op] = *np;
+ else {
+ y->bytetab[(uchar_t)*op] = 0;
+ y->multis = realloc(y->multis,
+ (y->nmultis + 1) * sizeof (*y->multis));
+ if (y->multis == NULL)
+ err(1, NULL);
+ i = y->nmultis++;
+ y->multis[i].fromlen = oclen;
+ (void) memcpy(y->multis[i].from, op, oclen);
+ y->multis[i].tolen = nclen;
+ (void) memcpy(y->multis[i].to, np, nclen);
+ }
+ op += oclen;
+ np += nclen;
+ }
+ }
+ return (p);
+}
+
+/*
+ * Compile the text following an a or i command.
+ */
+static char *
+compile_text(void)
+{
+ int esc_nl;
+ uintptr_t size, asize;
+ char *text, *p, *op, *s;
+ char lbuf[_POSIX2_LINE_MAX + 1];
+
+ asize = 2 * _POSIX2_LINE_MAX + 1;
+ if ((text = malloc(asize)) == NULL)
+ err(1, "malloc");
+ size = 0;
+ while (cu_fgets(lbuf, sizeof (lbuf), NULL)) {
+ op = s = text + size;
+ p = lbuf;
+ EATSPACE();
+ for (esc_nl = 0; *p != '\0'; p++) {
+ if (*p == '\\' && p[1] != '\0' && *++p == '\n')
+ esc_nl = 1;
+ *s++ = *p;
+ }
+ size += (uintptr_t)s - (uintptr_t)op;
+ if (!esc_nl) {
+ *s = '\0';
+ break;
+ }
+ if (asize - size < _POSIX2_LINE_MAX + 1) {
+ asize *= 2;
+ if ((text = realloc(text, asize)) == NULL)
+ err(1, "realloc");
+ }
+ }
+ text[size] = '\0';
+ if ((p = realloc(text, size + 1)) == NULL)
+ err(1, "realloc");
+ return (p);
+}
+
+/*
+ * Get an address and return a pointer to the first character after
+ * it. Fill the structure pointed to according to the address.
+ */
+static char *
+compile_addr(char *p, struct s_addr *a)
+{
+ char *end, re[_POSIX2_LINE_MAX + 1];
+ int icase;
+
+ icase = 0;
+
+ a->type = 0;
+ switch (*p) {
+ case '\\': /* Context address */
+ ++p;
+ /* FALLTHROUGH */
+ case '/': /* Context address */
+ p = compile_delimited(p, re, 0);
+ if (p == NULL)
+ fatal(_("unterminated regular expression"));
+
+ /* Check for case insensitive regexp flag */
+ if (*p == 'I') {
+ icase = 1;
+ p++;
+ }
+ if (*re == '\0')
+ a->u.r = NULL;
+ else
+ a->u.r = compile_re(re, icase);
+ a->type = AT_RE;
+ return (p);
+
+ case '$': /* Last line */
+ a->type = AT_LAST;
+ return (p + 1);
+
+ case '+': /* Relative line number */
+ a->type = AT_RELLINE;
+ p++;
+ /* FALLTHROUGH */
+ /* Line number */
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ if (a->type == 0)
+ a->type = AT_LINE;
+ a->u.l = strtol(p, &end, 10);
+ return (end);
+ default:
+ fatal(_("expected context address"));
+ return (NULL);
+ }
+}
+
+/*
+ * duptoeol --
+ * Return a copy of all the characters up to \n or \0.
+ */
+static char *
+duptoeol(char *s, const char *ctype)
+{
+ size_t len;
+ int ws;
+ char *p, *start;
+
+ ws = 0;
+ for (start = s; *s != '\0' && *s != '\n'; ++s)
+ ws = isspace((unsigned char)*s);
+ *s = '\0';
+ if (ws)
+ warnx(_("%lu: %s: whitespace after %s"), linenum, fname, ctype);
+ len = (uintptr_t)s - (uintptr_t)start + 1;
+ if ((p = malloc(len)) == NULL)
+ err(1, "malloc");
+ return (memmove(p, start, len));
+}
+
+/*
+ * Convert goto label names to addresses, and count a and r commands, in
+ * the given subset of the script. Free the memory used by labels in b
+ * and t commands (but not by :).
+ *
+ * TODO: Remove } nodes
+ */
+static void
+fixuplabel(struct s_command *cp, struct s_command *end)
+{
+
+ for (; cp != end; cp = cp->next)
+ switch (cp->code) {
+ case 'a':
+ case 'r':
+ appendnum++;
+ break;
+ case 'b':
+ case 't':
+ /* Resolve branch target. */
+ if (cp->t == NULL) {
+ cp->u.c = NULL;
+ break;
+ }
+ if ((cp->u.c = findlabel(cp->t)) == NULL)
+ fatal(_("undefined label '%s'"), cp->t);
+ free(cp->t);
+ break;
+ case '{':
+ /* Do interior commands. */
+ fixuplabel(cp->u.c, cp->next);
+ break;
+ }
+}
+
+/*
+ * Associate the given command label for later lookup.
+ */
+static void
+enterlabel(struct s_command *cp)
+{
+ struct labhash **lhp, *lh;
+ uchar_t *p;
+ uint_t h, c;
+
+ for (h = 0, p = (uchar_t *)cp->t; (c = *p) != 0; p++)
+ h = (h << 5) + h + c;
+ lhp = &labels[h & LHMASK];
+ for (lh = *lhp; lh != NULL; lh = lh->lh_next)
+ if (lh->lh_hash == h && strcmp(cp->t, lh->lh_cmd->t) == 0)
+ fatal(_("duplicate label '%s'"), cp->t);
+ if ((lh = malloc(sizeof (*lh))) == NULL)
+ err(1, "malloc");
+ lh->lh_next = *lhp;
+ lh->lh_hash = h;
+ lh->lh_cmd = cp;
+ lh->lh_ref = 0;
+ *lhp = lh;
+}
+
+/*
+ * Find the label contained in the command l in the command linked
+ * list cp. L is excluded from the search. Return NULL if not found.
+ */
+static struct s_command *
+findlabel(char *name)
+{
+ struct labhash *lh;
+ uchar_t *p;
+ uint_t h, c;
+
+ for (h = 0, p = (uchar_t *)name; (c = *p) != 0; p++)
+ h = (h << 5) + h + c;
+ for (lh = labels[h & LHMASK]; lh != NULL; lh = lh->lh_next) {
+ if (lh->lh_hash == h && strcmp(name, lh->lh_cmd->t) == 0) {
+ lh->lh_ref = 1;
+ return (lh->lh_cmd);
+ }
+ }
+ return (NULL);
+}
+
+/*
+ * Warn about any unused labels. As a side effect, release the label hash
+ * table space.
+ */
+static void
+uselabel(void)
+{
+ struct labhash *lh, *next;
+ int i;
+
+ for (i = 0; i < LHSZ; i++) {
+ for (lh = labels[i]; lh != NULL; lh = next) {
+ next = lh->lh_next;
+ if (!lh->lh_ref)
+ warnx(_("%lu: %s: unused label '%s'"),
+ linenum, fname, lh->lh_cmd->t);
+ free(lh);
+ }
+ }
+}
diff --git a/usr/src/cmd/sed/defs.h b/usr/src/cmd/sed/defs.h
new file mode 100644
index 0000000000..d22bca5913
--- /dev/null
+++ b/usr/src/cmd/sed/defs.h
@@ -0,0 +1,150 @@
+/*
+ * Copyright (c) 1992 Diomidis Spinellis.
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Diomidis Spinellis of Imperial College, University of London.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef DEFS_H
+#define DEFS_H
+
+/*
+ * Types of address specifications
+ */
+enum e_atype {
+ AT_RE = 1, /* Line that match RE */
+ AT_LINE, /* Specific line */
+ AT_RELLINE, /* Relative line */
+ AT_LAST /* Last line */
+};
+
+/*
+ * Format of an address
+ */
+struct s_addr {
+ enum e_atype type; /* Address type */
+ union {
+ ulong_t l; /* Line number */
+ regex_t *r; /* Regular expression */
+ } u;
+};
+
+/*
+ * Substitution command
+ */
+struct s_subst {
+ int n; /* Occurrence to subst. */
+ int p; /* True if p flag */
+ int icase; /* True if I flag */
+ char *wfile; /* NULL if no wfile */
+ int wfd; /* Cached file descriptor */
+ regex_t *re; /* Regular expression */
+ unsigned int maxbref; /* Largest backreference. */
+ ulong_t linenum; /* Line number. */
+ char *new; /* Replacement text */
+};
+
+/*
+ * Translate command.
+ */
+struct s_tr {
+ unsigned char bytetab[256];
+ struct trmulti {
+ size_t fromlen;
+ char from[MB_LEN_MAX];
+ size_t tolen;
+ char to[MB_LEN_MAX];
+ } *multis;
+ int nmultis;
+};
+
+/*
+ * An internally compiled command.
+ * Initialy, label references are stored in t, on a second pass they
+ * are updated to pointers.
+ */
+struct s_command {
+ struct s_command *next; /* Pointer to next command */
+ struct s_addr *a1, *a2; /* Start and end address */
+ ulong_t startline; /* Start line number or zero */
+ char *t; /* Text for : a c i r w */
+ union {
+ struct s_command *c; /* Command(s) for b t { */
+ struct s_subst *s; /* Substitute command */
+ struct s_tr *y; /* Replace command array */
+ int fd; /* File descriptor for w */
+ } u;
+ char code; /* Command code */
+ uint_t nonsel:1; /* True if ! */
+};
+
+/*
+ * Types of command arguments recognised by the parser
+ */
+enum e_args {
+ EMPTY, /* d D g G h H l n N p P q x = \0 */
+ TEXT, /* a c i */
+ NONSEL, /* ! */
+ GROUP, /* { */
+ ENDGROUP, /* } */
+ COMMENT, /* # */
+ BRANCH, /* b t */
+ LABEL, /* : */
+ RFILE, /* r */
+ WFILE, /* w */
+ SUBST, /* s */
+ TR /* y */
+};
+
+/*
+ * Structure containing things to append before a line is read
+ */
+struct s_appends {
+ enum {AP_STRING, AP_FILE} type;
+ char *s;
+ size_t len;
+};
+
+enum e_spflag {
+ APPEND, /* Append to the contents. */
+ REPLACE /* Replace the contents. */
+};
+
+/*
+ * Structure for a space (process, hold, otherwise).
+ */
+typedef struct {
+ char *space; /* Current space pointer. */
+ size_t len; /* Current length. */
+ int deleted; /* If deleted. */
+ char *back; /* Backing memory. */
+ size_t blen; /* Backing memory length. */
+} SPACE;
+
+#endif /* DEFS_H */
diff --git a/usr/src/cmd/sed/extern.h b/usr/src/cmd/sed/extern.h
new file mode 100644
index 0000000000..10d1ab60eb
--- /dev/null
+++ b/usr/src/cmd/sed/extern.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright 2010 Nexenta Systems, Inc. All rights reserved.
+ * Copyright (c) 1992 Diomidis Spinellis.
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Diomidis Spinellis of Imperial College, University of London.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef EXTERN_H
+#define EXTERN_H
+
+extern struct s_command *prog;
+extern struct s_appends *appends;
+extern regmatch_t *match;
+extern size_t maxnsub;
+extern ulong_t linenum;
+extern int appendnum;
+extern int aflag, eflag, nflag;
+extern const char *fname, *outfname;
+extern FILE *infile, *outfile;
+extern int rflags; /* regex flags to use */
+
+void cfclose(struct s_command *, struct s_command *);
+void compile(void);
+void cspace(SPACE *, const char *, size_t, enum e_spflag);
+char *cu_fgets(char *, int, int *);
+int mf_fgets(SPACE *, enum e_spflag);
+int lastline(void);
+void process(void);
+void resetstate(void);
+char *strregerror(int, regex_t *);
+/*PRINTFLIKE1*/
+void fatal(const char *, ...); /* output includes file and line # */
+
+#ifdef lint
+#define _(s) s
+#else
+#define _(s) gettext(s)
+#endif
+
+#endif /* EXTERN_H */
diff --git a/usr/src/cmd/sed/main.c b/usr/src/cmd/sed/main.c
new file mode 100644
index 0000000000..b1ca9a2648
--- /dev/null
+++ b/usr/src/cmd/sed/main.c
@@ -0,0 +1,520 @@
+/*
+ * Copyright 2010 Nexenta Systems, Inc. All rights reserved.
+ * Copyright (c) 1992 Diomidis Spinellis.
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Diomidis Spinellis of Imperial College, University of London.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/types.h>
+#include <sys/mman.h>
+#include <sys/param.h>
+#include <sys/stat.h>
+
+#include <err.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <libgen.h>
+#include <limits.h>
+#include <locale.h>
+#include <regex.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <libintl.h>
+
+#include "defs.h"
+#include "extern.h"
+
+/*
+ * Linked list of units (strings and files) to be compiled
+ */
+struct s_compunit {
+ struct s_compunit *next;
+ enum e_cut {CU_FILE, CU_STRING} type;
+ char *s; /* Pointer to string or fname */
+};
+
+/*
+ * Linked list pointer to compilation units and pointer to current
+ * next pointer.
+ */
+static struct s_compunit *script, **cu_nextp = &script;
+
+/*
+ * Linked list of files to be processed
+ */
+struct s_flist {
+ char *fname;
+ struct s_flist *next;
+};
+
+/*
+ * Linked list pointer to files and pointer to current
+ * next pointer.
+ */
+static struct s_flist *files, **fl_nextp = &files;
+
+FILE *infile; /* Current input file */
+FILE *outfile; /* Current output file */
+
+int aflag, eflag, nflag;
+int rflags = 0;
+static int rval; /* Exit status */
+
+static int ispan; /* Whether inplace editing spans across files */
+
+/*
+ * Current file and line number; line numbers restart across compilation
+ * units, but span across input files. The latter is optional if editing
+ * in place.
+ */
+const char *fname; /* File name. */
+const char *outfname; /* Output file name */
+static char oldfname[PATH_MAX]; /* Old file name (for in-place editing) */
+static char tmpfname[PATH_MAX]; /* Temporary file name (for in-place editing) */
+static const char *inplace; /* Inplace edit file extension. */
+ulong_t linenum;
+
+static void add_compunit(enum e_cut, char *);
+static void add_file(char *);
+static void usage(void);
+static char *getln(FILE *, size_t *);
+
+
+int
+main(int argc, char *argv[])
+{
+ int c, fflag;
+ char *temp_arg;
+
+ (void) setlocale(LC_ALL, "");
+
+#ifndef TEXT_DOMAIN
+#define TEXT_DOMAIN "SYS_TEST"
+#endif
+ (void) textdomain(TEXT_DOMAIN);
+
+ fflag = 0;
+ inplace = NULL;
+
+ while ((c = getopt(argc, argv, "EI:ae:f:i:lnr")) != -1)
+ switch (c) {
+ case 'r': /* Gnu sed compat */
+ case 'E':
+ rflags = REG_EXTENDED;
+ break;
+ case 'I':
+ inplace = optarg;
+ ispan = 1; /* span across input files */
+ break;
+ case 'a':
+ aflag = 1;
+ break;
+ case 'e':
+ eflag = 1;
+ if (asprintf(&temp_arg, "%s\n", optarg) <= 1)
+ err(1, "asprintf");
+ add_compunit(CU_STRING, temp_arg);
+ break;
+ case 'f':
+ fflag = 1;
+ add_compunit(CU_FILE, optarg);
+ break;
+ case 'i':
+ inplace = optarg;
+ ispan = 0; /* don't span across input files */
+ break;
+ case 'l':
+ /* On SunOS, setlinebuf "returns no useful value */
+ (void) setlinebuf(stdout);
+ break;
+ case 'n':
+ nflag = 1;
+ break;
+ default:
+ case '?':
+ usage();
+ }
+ argc -= optind;
+ argv += optind;
+
+ /* First usage case; script is the first arg */
+ if (!eflag && !fflag && *argv) {
+ add_compunit(CU_STRING, *argv);
+ argv++;
+ }
+
+ compile();
+
+ /* Continue with first and start second usage */
+ if (*argv)
+ for (; *argv; argv++)
+ add_file(*argv);
+ else
+ add_file(NULL);
+ process();
+ cfclose(prog, NULL);
+ if (fclose(stdout))
+ err(1, "stdout");
+ return (rval);
+}
+
+static void
+usage(void)
+{
+ (void) fputs(_("usage: sed script [-Ealn] [-i extension] [file ...]\n"
+ " sed [-Ealn] [-i extension] [-e script] ... "
+ "[-f script_file] ... [file ...]"),
+ stderr);
+ exit(1);
+}
+
+/*
+ * Like fgets, but go through the chain of compilation units chaining them
+ * together. Empty strings and files are ignored.
+ */
+char *
+cu_fgets(char *buf, int n, int *more)
+{
+ static enum {ST_EOF, ST_FILE, ST_STRING} state = ST_EOF;
+ static FILE *f; /* Current open file */
+ static char *s; /* Current pointer inside string */
+ static char string_ident[30];
+ char *p;
+
+again:
+ switch (state) {
+ case ST_EOF:
+ if (script == NULL) {
+ if (more != NULL)
+ *more = 0;
+ return (NULL);
+ }
+ linenum = 0;
+ switch (script->type) {
+ case CU_FILE:
+ if ((f = fopen(script->s, "r")) == NULL)
+ err(1, "%s", script->s);
+ fname = script->s;
+ state = ST_FILE;
+ goto again;
+ case CU_STRING:
+ if (((size_t)snprintf(string_ident,
+ sizeof (string_ident), "\"%s\"", script->s)) >=
+ sizeof (string_ident) - 1)
+ (void) strcpy(string_ident +
+ sizeof (string_ident) - 6, " ...\"");
+ fname = string_ident;
+ s = script->s;
+ state = ST_STRING;
+ goto again;
+ }
+ /*NOTREACHED*/
+
+ case ST_FILE:
+ if ((p = fgets(buf, n, f)) != NULL) {
+ linenum++;
+ if (linenum == 1 && buf[0] == '#' && buf[1] == 'n')
+ nflag = 1;
+ if (more != NULL)
+ *more = !feof(f);
+ return (p);
+ }
+ script = script->next;
+ (void) fclose(f);
+ state = ST_EOF;
+ goto again;
+ case ST_STRING:
+ if (linenum == 0 && s[0] == '#' && s[1] == 'n')
+ nflag = 1;
+ p = buf;
+ for (;;) {
+ if (n-- <= 1) {
+ *p = '\0';
+ linenum++;
+ if (more != NULL)
+ *more = 1;
+ return (buf);
+ }
+ switch (*s) {
+ case '\0':
+ state = ST_EOF;
+ if (s == script->s) {
+ script = script->next;
+ goto again;
+ } else {
+ script = script->next;
+ *p = '\0';
+ linenum++;
+ if (more != NULL)
+ *more = 0;
+ return (buf);
+ }
+ case '\n':
+ *p++ = '\n';
+ *p = '\0';
+ s++;
+ linenum++;
+ if (more != NULL)
+ *more = 0;
+ return (buf);
+ default:
+ *p++ = *s++;
+ }
+ }
+ }
+ /* NOTREACHED */
+ return (NULL);
+}
+
+/*
+ * Like fgets, but go through the list of files chaining them together.
+ * Set len to the length of the line.
+ */
+int
+mf_fgets(SPACE *sp, enum e_spflag spflag)
+{
+ struct stat sb;
+ size_t len;
+ char *p;
+ int c;
+ static int firstfile;
+
+ if (infile == NULL) {
+ /* stdin? */
+ if (files->fname == NULL) {
+ if (inplace != NULL)
+ errx(1,
+ _("-I or -i may not be used with stdin"));
+ infile = stdin;
+ fname = "stdin";
+ outfile = stdout;
+ outfname = "stdout";
+ }
+ firstfile = 1;
+ }
+
+ for (;;) {
+ if (infile != NULL && (c = getc(infile)) != EOF) {
+ (void) ungetc(c, infile);
+ break;
+ }
+ /* If we are here then either eof or no files are open yet */
+ if (infile == stdin) {
+ sp->len = 0;
+ return (0);
+ }
+ if (infile != NULL) {
+ (void) fclose(infile);
+ if (*oldfname != '\0') {
+ if (link(fname, oldfname) != 0) {
+ warn("link()");
+ (void) unlink(tmpfname);
+ exit(1);
+ }
+ *oldfname = '\0';
+ }
+ if (*tmpfname != '\0') {
+ if (outfile != NULL && outfile != stdout)
+ if (fclose(outfile) != 0) {
+ warn("fclose()");
+ (void) unlink(tmpfname);
+ exit(1);
+ }
+ outfile = NULL;
+ if (rename(tmpfname, fname) != 0) {
+ /* this should not happen really! */
+ warn("rename()");
+ (void) unlink(tmpfname);
+ exit(1);
+ }
+ *tmpfname = '\0';
+ }
+ outfname = NULL;
+ }
+ if (firstfile == 0)
+ files = files->next;
+ else
+ firstfile = 0;
+ if (files == NULL) {
+ sp->len = 0;
+ return (0);
+ }
+ fname = files->fname;
+ if (inplace != NULL) {
+ char bn[PATH_MAX];
+ char dn[PATH_MAX];
+ (void) strlcpy(bn, fname, sizeof (bn));
+ (void) strlcpy(dn, fname, sizeof (dn));
+ if (lstat(fname, &sb) != 0)
+ err(1, "%s", fname);
+ if (!(sb.st_mode & S_IFREG))
+ fatal(_("in-place editing only "
+ "works for regular files"));
+ if (*inplace != '\0') {
+ (void) strlcpy(oldfname, fname,
+ sizeof (oldfname));
+ len = strlcat(oldfname, inplace,
+ sizeof (oldfname));
+ if (len > sizeof (oldfname))
+ fatal(_("name too long"));
+ }
+ len = snprintf(tmpfname, sizeof (tmpfname),
+ "%s/.!%ld!%s", dirname(dn), (long)getpid(),
+ basename(bn));
+ if (len >= sizeof (tmpfname))
+ fatal(_("name too long"));
+ (void) unlink(tmpfname);
+ if ((outfile = fopen(tmpfname, "w")) == NULL)
+ err(1, "%s", fname);
+ if (fchown(fileno(outfile), sb.st_uid, sb.st_gid) != 0)
+ warn("fchown()");
+ if (fchmod(fileno(outfile), sb.st_mode & 07777) != 0)
+ warn("fchmod()");
+ outfname = tmpfname;
+ if (!ispan) {
+ linenum = 0;
+ resetstate();
+ }
+ } else {
+ outfile = stdout;
+ outfname = "stdout";
+ }
+ if ((infile = fopen(fname, "r")) == NULL) {
+ warn("%s", fname);
+ rval = 1;
+ continue;
+ }
+ }
+ /*
+ * We are here only when infile is open and we still have something
+ * to read from it.
+ *
+ * Use fgetln so that we can handle essentially infinite input data.
+ * Can't use the pointer into the stdio buffer as the process space
+ * because the ungetc() can cause it to move.
+ */
+ p = getln(infile, &len);
+ if (ferror(infile))
+ errx(1, "%s: %s", fname, strerror(errno ? errno : EIO));
+ if (len != 0 && p[len - 1] == '\n')
+ len--;
+ cspace(sp, p, len, spflag);
+
+ linenum++;
+
+ return (1);
+}
+
+/*
+ * Add a compilation unit to the linked list
+ */
+static void
+add_compunit(enum e_cut type, char *s)
+{
+ struct s_compunit *cu;
+
+ if ((cu = malloc(sizeof (struct s_compunit))) == NULL)
+ err(1, "malloc");
+ cu->type = type;
+ cu->s = s;
+ cu->next = NULL;
+ *cu_nextp = cu;
+ cu_nextp = &cu->next;
+}
+
+/*
+ * Add a file to the linked list
+ */
+static void
+add_file(char *s)
+{
+ struct s_flist *fp;
+
+ if ((fp = malloc(sizeof (struct s_flist))) == NULL)
+ err(1, "malloc");
+ fp->next = NULL;
+ *fl_nextp = fp;
+ fp->fname = s;
+ fl_nextp = &fp->next;
+}
+
+int
+lastline(void)
+{
+ int ch;
+
+ if (files->next != NULL && (inplace == NULL || ispan))
+ return (0);
+ if ((ch = getc(infile)) == EOF)
+ return (1);
+ (void) ungetc(ch, infile);
+ return (0);
+}
+
+char *
+getln(FILE *in, size_t *lenp)
+{
+ static char *buffer = NULL;
+ static size_t sz = 0;
+
+ size_t len = 0;
+
+ for (;;) {
+ if (sz <= (len + 1)) {
+ char *nb;
+ if ((nb = realloc(buffer, sz + LINE_MAX)) == NULL) {
+ err(1, "realloc");
+ }
+ buffer = nb;
+ sz += LINE_MAX;
+ }
+
+ buffer[len] = 0;
+
+ if (fgets(buffer + len, sz - len, in) == NULL) {
+ /* END OF FILE */
+ *lenp = len;
+ break;
+ }
+
+ len += strlen(buffer + len);
+
+ if (buffer[len - 1] == '\n') {
+ /* got the new line */
+ *lenp = len;
+ break;
+ }
+ }
+
+ return (buffer);
+}
diff --git a/usr/src/cmd/sed/misc.c b/usr/src/cmd/sed/misc.c
new file mode 100644
index 0000000000..fed23012d5
--- /dev/null
+++ b/usr/src/cmd/sed/misc.c
@@ -0,0 +1,82 @@
+/*
+ * Copyright 2010 Nexenta Systems, Inc. All rights reserved.
+ * Copyright (c) 1992 Diomidis Spinellis.
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Diomidis Spinellis of Imperial College, University of London.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/types.h>
+
+#include <err.h>
+#include <limits.h>
+#include <regex.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdarg.h>
+
+#include "defs.h"
+#include "extern.h"
+
+/*
+ * Return a string for a regular expression error passed. This is overkill,
+ * because of the silly semantics of regerror (we can never know the size of
+ * the buffer).
+ */
+char *
+strregerror(int errcode, regex_t *preg)
+{
+ static char *oe;
+ size_t s;
+
+ if (oe != NULL)
+ free(oe);
+ s = regerror(errcode, preg, NULL, 0);
+ if ((oe = malloc(s)) == NULL)
+ err(1, "malloc");
+ (void) regerror(errcode, preg, oe, s);
+ return (oe);
+}
+
+void
+fatal(const char *fmt, ...)
+{
+ va_list ap;
+
+ (void) fprintf(stderr, "%s: %lu: ", fname, linenum);
+
+ va_start(ap, fmt);
+ (void) vfprintf(stderr, fmt, ap);
+ va_end(ap);
+
+ (void) fputc('\n', stderr);
+
+ exit(1);
+}
diff --git a/usr/src/cmd/sed/process.c b/usr/src/cmd/sed/process.c
new file mode 100644
index 0000000000..f5c8cf0b25
--- /dev/null
+++ b/usr/src/cmd/sed/process.c
@@ -0,0 +1,767 @@
+/*
+ * Copyright 2010 Nexenta Systems, Inc. All rights reserved.
+ * Copyright (c) 1992 Diomidis Spinellis.
+ * Copyright (c) 1992, 1993, 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Diomidis Spinellis of Imperial College, University of London.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include <ctype.h>
+#include <err.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <regex.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <wchar.h>
+#include <wctype.h>
+#include <termio.h>
+#include <libintl.h>
+#include <note.h>
+
+#include "defs.h"
+#include "extern.h"
+
+static SPACE HS, PS, SS, YS;
+#define pd PS.deleted
+#define ps PS.space
+#define psl PS.len
+#define hs HS.space
+#define hsl HS.len
+
+static int applies(struct s_command *);
+static void do_tr(struct s_tr *);
+static void flush_appends(void);
+static void lputs(char *, size_t);
+static int regexec_e(regex_t *, const char *, int, int, size_t);
+static void regsub(SPACE *, char *, char *);
+static int substitute(struct s_command *);
+
+struct s_appends *appends; /* Array of pointers to strings to append. */
+static int appendx; /* Index into appends array. */
+int appendnum; /* Size of appends array. */
+
+static int lastaddr; /* Set by applies if last address of a range. */
+static int sdone; /* If any substitutes since last line input. */
+ /* Iov structure for 'w' commands. */
+static regex_t *defpreg;
+size_t maxnsub;
+regmatch_t *match;
+
+#define OUT() do { \
+ (void) fwrite(ps, 1, psl, outfile); \
+ (void) fputc('\n', outfile); \
+ _NOTE(CONSTCOND) \
+} while (0)
+
+void
+process(void)
+{
+ struct s_command *cp;
+ SPACE tspace;
+ size_t oldpsl = 0;
+ char *p;
+
+ p = NULL;
+
+ for (linenum = 0; mf_fgets(&PS, REPLACE); /* NOP */) {
+ pd = 0;
+top:
+ cp = prog;
+redirect:
+ while (cp != NULL) {
+ if (!applies(cp)) {
+ cp = cp->next;
+ continue;
+ }
+ switch (cp->code) {
+ case '{':
+ cp = cp->u.c;
+ goto redirect;
+ case 'a':
+ if (appendx >= appendnum)
+ if ((appends = realloc(appends,
+ sizeof (struct s_appends) *
+ (appendnum *= 2))) == NULL)
+ err(1, "realloc");
+ appends[appendx].type = AP_STRING;
+ appends[appendx].s = cp->t;
+ appends[appendx].len = strlen(cp->t);
+ appendx++;
+ break;
+ case 'b':
+ cp = cp->u.c;
+ goto redirect;
+ case 'c':
+ pd = 1;
+ psl = 0;
+ if (cp->a2 == NULL || lastaddr || lastline())
+ (void) fprintf(outfile, "%s", cp->t);
+ break;
+ case 'd':
+ pd = 1;
+ goto new;
+ case 'D':
+ if (pd)
+ goto new;
+ if (psl == 0 ||
+ (p = memchr(ps, '\n', psl)) == NULL) {
+ pd = 1;
+ goto new;
+ } else {
+ psl -=
+ (uintptr_t)(p + 1) - (uintptr_t)ps;
+ (void) memmove(ps, p + 1, psl);
+ goto top;
+ }
+ case 'g':
+ cspace(&PS, hs, hsl, REPLACE);
+ break;
+ case 'G':
+ cspace(&PS, "\n", 1, APPEND);
+ cspace(&PS, hs, hsl, APPEND);
+ break;
+ case 'h':
+ cspace(&HS, ps, psl, REPLACE);
+ break;
+ case 'H':
+ cspace(&HS, "\n", 1, APPEND);
+ cspace(&HS, ps, psl, APPEND);
+ break;
+ case 'i':
+ (void) fprintf(outfile, "%s", cp->t);
+ break;
+ case 'l':
+ lputs(ps, psl);
+ break;
+ case 'n':
+ if (!nflag && !pd)
+ OUT();
+ flush_appends();
+ if (!mf_fgets(&PS, REPLACE))
+ exit(0);
+ pd = 0;
+ break;
+ case 'N':
+ flush_appends();
+ cspace(&PS, "\n", 1, APPEND);
+ if (!mf_fgets(&PS, APPEND))
+ exit(0);
+ break;
+ case 'p':
+ if (pd)
+ break;
+ OUT();
+ break;
+ case 'P':
+ if (pd)
+ break;
+ if ((p = memchr(ps, '\n', psl)) != NULL) {
+ oldpsl = psl;
+ psl = (uintptr_t)p - (uintptr_t)ps;
+ }
+ OUT();
+ if (p != NULL)
+ psl = oldpsl;
+ break;
+ case 'q':
+ if (!nflag && !pd)
+ OUT();
+ flush_appends();
+ exit(0);
+ /*NOTREACHED*/
+ case 'r':
+ if (appendx >= appendnum)
+ if ((appends = realloc(appends,
+ sizeof (struct s_appends) *
+ (appendnum *= 2))) == NULL)
+ err(1, "realloc");
+ appends[appendx].type = AP_FILE;
+ appends[appendx].s = cp->t;
+ appends[appendx].len = strlen(cp->t);
+ appendx++;
+ break;
+ case 's':
+ sdone |= substitute(cp);
+ break;
+ case 't':
+ if (sdone) {
+ sdone = 0;
+ cp = cp->u.c;
+ goto redirect;
+ }
+ break;
+ case 'w':
+ if (pd)
+ break;
+ if (cp->u.fd == -1 && (cp->u.fd = open(cp->t,
+ O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, 0666))
+ == -1)
+ err(1, "%s", cp->t);
+ if (write(cp->u.fd, ps, psl) != (ssize_t)psl ||
+ write(cp->u.fd, "\n", 1) != 1)
+ err(1, "%s", cp->t);
+ break;
+ case 'x':
+ /*
+ * If the hold space is null, make it empty
+ * but not null. Otherwise the pattern space
+ * will become null after the swap, which is
+ * an abnormal condition.
+ */
+ if (hs == NULL)
+ cspace(&HS, "", 0, REPLACE);
+ tspace = PS;
+ PS = HS;
+ HS = tspace;
+ break;
+ case 'y':
+ if (pd || psl == 0)
+ break;
+ do_tr(cp->u.y);
+ break;
+ case ':':
+ case '}':
+ break;
+ case '=':
+ (void) fprintf(outfile, "%lu\n", linenum);
+ }
+ cp = cp->next;
+ } /* for all cp */
+
+new: if (!nflag && !pd)
+ OUT();
+ flush_appends();
+ } /* for all lines */
+}
+
+/*
+ * TRUE if the address passed matches the current program state
+ * (lastline, linenumber, ps).
+ */
+#define MATCH(a) \
+ ((a)->type == AT_RE ? regexec_e((a)->u.r, ps, 0, 1, psl) : \
+ (a)->type == AT_LINE ? linenum == (a)->u.l : lastline())
+
+/*
+ * Return TRUE if the command applies to the current line. Sets the start
+ * line for process ranges. Interprets the non-select (``!'') flag.
+ */
+static int
+applies(struct s_command *cp)
+{
+ int r;
+
+ lastaddr = 0;
+ if (cp->a1 == NULL && cp->a2 == NULL)
+ r = 1;
+ else if (cp->a2)
+ if (cp->startline > 0) {
+ if (MATCH(cp->a2)) {
+ cp->startline = 0;
+ lastaddr = 1;
+ r = 1;
+ } else if (linenum - cp->startline <= cp->a2->u.l)
+ r = 1;
+ else if ((cp->a2->type == AT_LINE &&
+ linenum > cp->a2->u.l) ||
+ (cp->a2->type == AT_RELLINE &&
+ linenum - cp->startline > cp->a2->u.l)) {
+ /*
+ * We missed the 2nd address due to a branch,
+ * so just close the range and return false.
+ */
+ cp->startline = 0;
+ r = 0;
+ } else
+ r = 1;
+ } else if (MATCH(cp->a1)) {
+ /*
+ * If the second address is a number less than or
+ * equal to the line number first selected, only
+ * one line shall be selected.
+ * -- POSIX 1003.2
+ * Likewise if the relative second line address is zero.
+ */
+ if ((cp->a2->type == AT_LINE &&
+ linenum >= cp->a2->u.l) ||
+ (cp->a2->type == AT_RELLINE && cp->a2->u.l == 0))
+ lastaddr = 1;
+ else {
+ cp->startline = linenum;
+ }
+ r = 1;
+ } else
+ r = 0;
+ else
+ r = MATCH(cp->a1);
+ return (cp->nonsel ? ! r : r);
+}
+
+/*
+ * Reset the sed processor to its initial state.
+ */
+void
+resetstate(void)
+{
+ struct s_command *cp;
+
+ /*
+ * Reset all in-range markers.
+ */
+ for (cp = prog; cp; cp = cp->code == '{' ? cp->u.c : cp->next)
+ if (cp->a2)
+ cp->startline = 0;
+
+ /*
+ * Clear out the hold space.
+ */
+ cspace(&HS, "", 0, REPLACE);
+}
+
+/*
+ * substitute --
+ * Do substitutions in the pattern space. Currently, we build a
+ * copy of the new pattern space in the substitute space structure
+ * and then swap them.
+ */
+static int
+substitute(struct s_command *cp)
+{
+ SPACE tspace;
+ regex_t *re;
+ regoff_t re_off, slen;
+ int lastempty, n;
+ char *s;
+
+ s = ps;
+ re = cp->u.s->re;
+ if (re == NULL) {
+ if (defpreg != NULL && cp->u.s->maxbref > defpreg->re_nsub) {
+ linenum = cp->u.s->linenum;
+ fatal(_("\\%u not defined in the RE"),
+ cp->u.s->maxbref);
+ }
+ }
+ if (!regexec_e(re, s, 0, 0, psl))
+ return (0);
+
+ SS.len = 0; /* Clean substitute space. */
+ slen = psl;
+ n = cp->u.s->n;
+ lastempty = 1;
+
+ switch (n) {
+ case 0: /* Global */
+ do {
+ if (lastempty || match[0].rm_so != match[0].rm_eo) {
+ /* Locate start of replaced string. */
+ re_off = match[0].rm_so;
+ /* Copy leading retained string. */
+ cspace(&SS, s, re_off, APPEND);
+ /* Add in regular expression. */
+ regsub(&SS, s, cp->u.s->new);
+ }
+
+ /* Move past this match. */
+ if (match[0].rm_so != match[0].rm_eo) {
+ s += match[0].rm_eo;
+ slen -= match[0].rm_eo;
+ lastempty = 0;
+ } else {
+ if (match[0].rm_so < slen)
+ cspace(&SS, s + match[0].rm_so, 1,
+ APPEND);
+ s += match[0].rm_so + 1;
+ slen -= match[0].rm_so + 1;
+ lastempty = 1;
+ }
+ } while (slen >= 0 && regexec_e(re, s, REG_NOTBOL, 0, slen));
+ /* Copy trailing retained string. */
+ if (slen > 0)
+ cspace(&SS, s, slen, APPEND);
+ break;
+ default: /* Nth occurrence */
+ while (--n) {
+ if (match[0].rm_eo == match[0].rm_so)
+ match[0].rm_eo = match[0].rm_so + 1;
+ s += match[0].rm_eo;
+ slen -= match[0].rm_eo;
+ if (slen < 0)
+ return (0);
+ if (!regexec_e(re, s, REG_NOTBOL, 0, slen))
+ return (0);
+ }
+ /* FALLTHROUGH */
+ case 1: /* 1st occurrence */
+ /* Locate start of replaced string. */
+ re_off = match[0].rm_so + ((uintptr_t)s - (uintptr_t)ps);
+ /* Copy leading retained string. */
+ cspace(&SS, ps, re_off, APPEND);
+ /* Add in regular expression. */
+ regsub(&SS, s, cp->u.s->new);
+ /* Copy trailing retained string. */
+ s += match[0].rm_eo;
+ slen -= match[0].rm_eo;
+ cspace(&SS, s, slen, APPEND);
+ break;
+ }
+
+ /*
+ * Swap the substitute space and the pattern space, and make sure
+ * that any leftover pointers into stdio memory get lost.
+ */
+ tspace = PS;
+ PS = SS;
+ SS = tspace;
+ SS.space = SS.back;
+
+ /* Handle the 'p' flag. */
+ if (cp->u.s->p)
+ OUT();
+
+ /* Handle the 'w' flag. */
+ if (cp->u.s->wfile && !pd) {
+ if (cp->u.s->wfd == -1 && (cp->u.s->wfd = open(cp->u.s->wfile,
+ O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, 0666)) == -1)
+ err(1, "%s", cp->u.s->wfile);
+ if (write(cp->u.s->wfd, ps, psl) != (ssize_t)psl ||
+ write(cp->u.s->wfd, "\n", 1) != 1)
+ err(1, "%s", cp->u.s->wfile);
+ }
+ return (1);
+}
+
+/*
+ * do_tr --
+ * Perform translation ('y' command) in the pattern space.
+ */
+static void
+do_tr(struct s_tr *y)
+{
+ SPACE tmp;
+ char c, *p;
+ size_t clen, left;
+ int i;
+
+ if (MB_CUR_MAX == 1) {
+ /*
+ * Single-byte encoding: perform in-place translation
+ * of the pattern space.
+ */
+ for (p = ps; p < &ps[psl]; p++)
+ *p = y->bytetab[(uchar_t)*p];
+ } else {
+ /*
+ * Multi-byte encoding: perform translation into the
+ * translation space, then swap the translation and
+ * pattern spaces.
+ */
+ /* Clean translation space. */
+ YS.len = 0;
+ for (p = ps, left = psl; left > 0; p += clen, left -= clen) {
+ if ((c = y->bytetab[(uchar_t)*p]) != '\0') {
+ cspace(&YS, &c, 1, APPEND);
+ clen = 1;
+ continue;
+ }
+ for (i = 0; i < y->nmultis; i++)
+ if (left >= y->multis[i].fromlen &&
+ memcmp(p, y->multis[i].from,
+ y->multis[i].fromlen) == 0)
+ break;
+ if (i < y->nmultis) {
+ cspace(&YS, y->multis[i].to,
+ y->multis[i].tolen, APPEND);
+ clen = y->multis[i].fromlen;
+ } else {
+ cspace(&YS, p, 1, APPEND);
+ clen = 1;
+ }
+ }
+ /* Swap the translation space and the pattern space. */
+ tmp = PS;
+ PS = YS;
+ YS = tmp;
+ YS.space = YS.back;
+ }
+}
+
+/*
+ * Flush append requests. Always called before reading a line,
+ * therefore it also resets the substitution done (sdone) flag.
+ */
+static void
+flush_appends(void)
+{
+ FILE *f;
+ int count, i;
+ char buf[8 * 1024];
+
+ for (i = 0; i < appendx; i++)
+ switch (appends[i].type) {
+ case AP_STRING:
+ (void) fwrite(appends[i].s, sizeof (char),
+ appends[i].len, outfile);
+ break;
+ case AP_FILE:
+ /*
+ * Read files probably shouldn't be cached. Since
+ * it's not an error to read a non-existent file,
+ * it's possible that another program is interacting
+ * with the sed script through the filesystem. It
+ * would be truly bizarre, but possible. It's probably
+ * not that big a performance win, anyhow.
+ */
+ if ((f = fopen(appends[i].s, "r")) == NULL)
+ break;
+ while ((count =
+ fread(buf, sizeof (char), sizeof (buf), f)))
+ (void) fwrite(buf, sizeof (char), count,
+ outfile);
+ (void) fclose(f);
+ break;
+ }
+ if (ferror(outfile))
+ errx(1, "%s: %s", outfname, strerror(errno ? errno : EIO));
+ appendx = sdone = 0;
+}
+
+static void
+lputs(char *s, size_t len)
+{
+ static const char escapes[] = "\\\a\b\f\r\t\v";
+ int c, col, width;
+ const char *p;
+ struct winsize win;
+ static int termwidth = -1;
+ size_t clen, i;
+ wchar_t wc;
+ mbstate_t mbs;
+
+ if (outfile != stdout)
+ termwidth = 60;
+ if (termwidth == -1) {
+ if (((p = getenv("COLUMNS")) != NULL) && (*p != '\0'))
+ termwidth = atoi(p);
+ else if (ioctl(STDOUT_FILENO, TIOCGWINSZ, &win) == 0 &&
+ win.ws_col > 0)
+ termwidth = win.ws_col;
+ else
+ termwidth = 60;
+ }
+ if (termwidth <= 0)
+ termwidth = 1;
+
+ (void) memset(&mbs, 0, sizeof (mbs));
+ col = 0;
+ while (len != 0) {
+ clen = mbrtowc(&wc, s, len, &mbs);
+ if (clen == 0)
+ clen = 1;
+ if (clen == (size_t)-1 || clen == (size_t)-2) {
+ wc = (unsigned char)*s;
+ clen = 1;
+ (void) memset(&mbs, 0, sizeof (mbs));
+ }
+ if (wc == '\n') {
+ if (col + 1 >= termwidth)
+ (void) fprintf(outfile, "\\\n");
+ (void) fputc('$', outfile);
+ (void) fputc('\n', outfile);
+ col = 0;
+ } else if (iswprint(wc)) {
+ width = wcwidth(wc);
+ if (col + width >= termwidth) {
+ (void) fprintf(outfile, "\\\n");
+ col = 0;
+ }
+ (void) fwrite(s, 1, clen, outfile);
+ col += width;
+ } else if (wc != L'\0' && (c = wctob(wc)) != EOF &&
+ (p = strchr(escapes, c)) != NULL) {
+ if (col + 2 >= termwidth) {
+ (void) fprintf(outfile, "\\\n");
+ col = 0;
+ }
+ (void) fprintf(outfile, "\\%c",
+ "\\abfrtv"[(uintptr_t)p - (uintptr_t)escapes]);
+ col += 2;
+ } else {
+ if (col + 4 * clen >= (unsigned)termwidth) {
+ (void) fprintf(outfile, "\\\n");
+ col = 0;
+ }
+ for (i = 0; i < clen; i++)
+ (void) fprintf(outfile, "\\%03o",
+ (int)(unsigned char)s[i]);
+ col += 4 * clen;
+ }
+ s += clen;
+ len -= clen;
+ }
+ if (col + 1 >= termwidth)
+ (void) fprintf(outfile, "\\\n");
+ (void) fputc('$', outfile);
+ (void) fputc('\n', outfile);
+ if (ferror(outfile))
+ errx(1, "%s: %s", outfname, strerror(errno ? errno : EIO));
+}
+
+static int
+regexec_e(regex_t *preg, const char *string, int eflags, int nomatch,
+ size_t slen)
+{
+ int eval;
+
+ if (preg == NULL) {
+ if (defpreg == NULL)
+ fatal(_("first RE may not be empty"));
+ } else
+ defpreg = preg;
+
+ /* Set anchors */
+ match[0].rm_so = 0;
+ match[0].rm_eo = slen;
+
+ eval = regexec(defpreg, string,
+ nomatch ? 0 : maxnsub + 1, match, eflags | REG_STARTEND);
+ switch (eval) {
+ case 0:
+ return (1);
+ case REG_NOMATCH:
+ return (0);
+ }
+ fatal(_("RE error: %s"), strregerror(eval, defpreg));
+ return (0);
+}
+
+/*
+ * regsub - perform substitutions after a regexp match
+ * Based on a routine by Henry Spencer
+ */
+static void
+regsub(SPACE *sp, char *string, char *src)
+{
+ int len, no;
+ char c, *dst;
+
+#define NEEDSP(reqlen) \
+ /* XXX What is the +1 for? */ \
+ if (sp->len + (reqlen) + 1 >= sp->blen) { \
+ sp->blen += (reqlen) + 1024; \
+ if ((sp->back = realloc(sp->back, sp->blen)) == NULL) \
+ err(1, "realloc"); \
+ sp->space = sp->back; \
+ dst = sp->space + sp->len; \
+ }
+
+ dst = sp->space + sp->len;
+ while ((c = *src++) != '\0') {
+ if (c == '&')
+ no = 0;
+ else if (c == '\\' && isdigit((unsigned char)*src))
+ no = *src++ - '0';
+ else
+ no = -1;
+ if (no < 0) { /* Ordinary character. */
+ if (c == '\\' && (*src == '\\' || *src == '&'))
+ c = *src++;
+ NEEDSP(1);
+ *dst++ = c;
+ ++sp->len;
+ } else if (match[no].rm_so != -1 && match[no].rm_eo != -1) {
+ len = match[no].rm_eo - match[no].rm_so;
+ NEEDSP(len);
+ (void) memmove(dst, string + match[no].rm_so, len);
+ dst += len;
+ sp->len += len;
+ }
+ }
+ NEEDSP(1);
+ *dst = '\0';
+}
+
+/*
+ * cspace --
+ * Concatenate space: append the source space to the destination space,
+ * allocating new space as necessary.
+ */
+void
+cspace(SPACE *sp, const char *p, size_t len, enum e_spflag spflag)
+{
+ size_t tlen;
+
+ /* Make sure SPACE has enough memory and ramp up quickly. */
+ tlen = sp->len + len + 1;
+ if (tlen > sp->blen) {
+ sp->blen = tlen + 1024;
+ if ((sp->space = sp->back = realloc(sp->back, sp->blen)) ==
+ NULL)
+ err(1, "realloc");
+ }
+
+ if (spflag == REPLACE)
+ sp->len = 0;
+
+ (void) memmove(sp->space + sp->len, p, len);
+
+ sp->space[sp->len += len] = '\0';
+}
+
+/*
+ * Close all cached opened files and report any errors
+ */
+void
+cfclose(struct s_command *cp, struct s_command *end)
+{
+
+ for (; cp != end; cp = cp->next)
+ switch (cp->code) {
+ case 's':
+ if (cp->u.s->wfd != -1 && close(cp->u.s->wfd))
+ err(1, "%s", cp->u.s->wfile);
+ cp->u.s->wfd = -1;
+ break;
+ case 'w':
+ if (cp->u.fd != -1 && close(cp->u.fd))
+ err(1, "%s", cp->t);
+ cp->u.fd = -1;
+ break;
+ case '{':
+ cfclose(cp->u.c, cp->next);
+ break;
+ }
+}
diff --git a/usr/src/cmd/sed/sed.1 b/usr/src/cmd/sed/sed.1
new file mode 100644
index 0000000000..0744630b57
--- /dev/null
+++ b/usr/src/cmd/sed/sed.1
@@ -0,0 +1,636 @@
+.\" Copyright (c) 1992, 1993
+.\" The Regents of the University of California. All rights reserved.
+.\"
+.\" This code is derived from software contributed to Berkeley by
+.\" the Institute of Electrical and Electronics Engineers, Inc.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)sed.1 8.2 (Berkeley) 12/30/93
+.\" $FreeBSD$
+.\"
+.Dd May 24, 2009
+.Dt SED 1
+.Os
+.Sh NAME
+.Nm sed
+.Nd stream editor
+.Sh SYNOPSIS
+.Nm
+.Op Fl Ealnr
+.Ar command
+.Op Ar
+.Nm
+.Op Fl Ealnr
+.Op Fl e Ar command
+.Op Fl f Ar command_file
+.Op Fl I Ar extension
+.Op Fl i Ar extension
+.Op Ar
+.Sh DESCRIPTION
+The
+.Nm
+utility reads the specified files, or the standard input if no files
+are specified, modifying the input as specified by a list of commands.
+The input is then written to the standard output.
+.Pp
+A single command may be specified as the first argument to
+.Nm .
+Multiple commands may be specified by using the
+.Fl e
+or
+.Fl f
+options.
+All commands are applied to the input in the order they are specified
+regardless of their origin.
+.Pp
+The following options are available:
+.Bl -tag -width indent
+.It Fl E
+Interpret regular expressions as extended (modern) regular expressions
+rather than basic regular expressions (BRE's).
+The
+.Xr re_format 7
+manual page fully describes both formats.
+.It Fl a
+The files listed as parameters for the
+.Dq w
+functions are created (or truncated) before any processing begins,
+by default.
+The
+.Fl a
+option causes
+.Nm
+to delay opening each file until a command containing the related
+.Dq w
+function is applied to a line of input.
+.It Fl e Ar command
+Append the editing commands specified by the
+.Ar command
+argument
+to the list of commands.
+.It Fl f Ar command_file
+Append the editing commands found in the file
+.Ar command_file
+to the list of commands.
+The editing commands should each be listed on a separate line.
+.It Fl I Ar extension
+Edit files in-place, saving backups with the specified
+.Ar extension .
+If a zero-length
+.Ar extension
+is given, no backup will be saved.
+It is not recommended to give a zero-length
+.Ar extension
+when in-place editing files, as you risk corruption or partial content
+in situations where disk space is exhausted, etc.
+.Pp
+Note that in-place editing with
+.Fl I
+still takes place in a single continuous line address space covering
+all files, although each file preserves its individuality instead of
+forming one output stream.
+The line counter is never reset between files, address ranges can span
+file boundaries, and the
+.Dq $
+address matches only the last line of the last file.
+(See
+.Sx "Sed Addresses" . )
+That can lead to unexpected results in many cases of in-place editing,
+where using
+.Fl i
+is desired.
+.It Fl i Ar extension
+Edit files in-place similarly to
+.Fl I ,
+but treat each file independently from other files.
+In particular, line numbers in each file start at 1,
+the
+.Dq $
+address matches the last line of the current file,
+and address ranges are limited to the current file.
+(See
+.Sx "Sed Addresses" . )
+The net result is as though each file were edited by a separate
+.Nm
+instance.
+.It Fl l
+Make output line buffered.
+.It Fl n
+By default, each line of input is echoed to the standard output after
+all of the commands have been applied to it.
+The
+.Fl n
+option suppresses this behavior.
+.It Fl r
+Same as
+.Fl E
+for compatibility with GNU sed.
+.El
+.Pp
+The form of a
+.Nm
+command is as follows:
+.Pp
+.Dl [address[,address]]function[arguments]
+.Pp
+Whitespace may be inserted before the first address and the function
+portions of the command.
+.Pp
+Normally,
+.Nm
+cyclically copies a line of input, not including its terminating newline
+character, into a
+.Em "pattern space" ,
+(unless there is something left after a
+.Dq D
+function),
+applies all of the commands with addresses that select that pattern space,
+copies the pattern space to the standard output, appending a newline, and
+deletes the pattern space.
+.Pp
+Some of the functions use a
+.Em "hold space"
+to save all or part of the pattern space for subsequent retrieval.
+.Sh "Sed Addresses"
+An address is not required, but if specified must have one of the
+following formats:
+.Bl -bullet -offset indent
+.It
+a number that counts
+input lines
+cumulatively across input files (or in each file independently
+if a
+.Fl i
+option is in effect);
+.It
+a dollar
+.Pq Dq $
+character that addresses the last line of input (or the last line
+of the current file if a
+.Fl i
+option was specified);
+.It
+a context address
+that consists of a regular expression preceded and followed by a
+delimiter. The closing delimiter can also optionally be followed by the
+.Dq I
+character, to indicate that the regular expression is to be matched
+in a case-insensitive way.
+.El
+.Pp
+A command line with no addresses selects every pattern space.
+.Pp
+A command line with one address selects all of the pattern spaces
+that match the address.
+.Pp
+A command line with two addresses selects an inclusive range.
+This
+range starts with the first pattern space that matches the first
+address.
+The end of the range is the next following pattern space
+that matches the second address.
+If the second address is a number
+less than or equal to the line number first selected, only that
+line is selected.
+The number in the second address may be prefixed with a
+.Pq Dq \&+
+to specify the number of lines to match after the first pattern.
+In the case when the second address is a context
+address,
+.Nm
+does not re-match the second address against the
+pattern space that matched the first address.
+Starting at the
+first line following the selected range,
+.Nm
+starts looking again for the first address.
+.Pp
+Editing commands can be applied to non-selected pattern spaces by use
+of the exclamation character
+.Pq Dq \&!
+function.
+.Sh "Sed Regular Expressions"
+The regular expressions used in
+.Nm ,
+by default, are basic regular expressions (BREs, see
+.Xr re_format 7
+for more information), but extended (modern) regular expressions can be used
+instead if the
+.Fl E
+flag is given.
+In addition,
+.Nm
+has the following two additions to regular expressions:
+.Pp
+.Bl -enum -compact
+.It
+In a context address, any character other than a backslash
+.Pq Dq \e
+or newline character may be used to delimit the regular expression.
+The opening delimiter needs to be preceded by a backslash
+unless it is a slash.
+For example, the context address
+.Li \exabcx
+is equivalent to
+.Li /abc/ .
+Also, putting a backslash character before the delimiting character
+within the regular expression causes the character to be treated literally.
+For example, in the context address
+.Li \exabc\exdefx ,
+the RE delimiter is an
+.Dq x
+and the second
+.Dq x
+stands for itself, so that the regular expression is
+.Dq abcxdef .
+.Pp
+.It
+The escape sequence \en matches a newline character embedded in the
+pattern space.
+You cannot, however, use a literal newline character in an address or
+in the substitute command.
+.El
+.Pp
+One special feature of
+.Nm
+regular expressions is that they can default to the last regular
+expression used.
+If a regular expression is empty, i.e., just the delimiter characters
+are specified, the last regular expression encountered is used instead.
+The last regular expression is defined as the last regular expression
+used as part of an address or substitute command, and at run-time, not
+compile-time.
+For example, the command
+.Dq /abc/s//XXX/
+will substitute
+.Dq XXX
+for the pattern
+.Dq abc .
+.Sh "Sed Functions"
+In the following list of commands, the maximum number of permissible
+addresses for each command is indicated by [0addr], [1addr], or [2addr],
+representing zero, one, or two addresses.
+.Pp
+The argument
+.Em text
+consists of one or more lines.
+To embed a newline in the text, precede it with a backslash.
+Other backslashes in text are deleted and the following character
+taken literally.
+.Pp
+The
+.Dq r
+and
+.Dq w
+functions take an optional file parameter, which should be separated
+from the function letter by white space.
+Each file given as an argument to
+.Nm
+is created (or its contents truncated) before any input processing begins.
+.Pp
+The
+.Dq b ,
+.Dq r ,
+.Dq s ,
+.Dq t ,
+.Dq w ,
+.Dq y ,
+.Dq \&! ,
+and
+.Dq \&:
+functions all accept additional arguments.
+The following synopses indicate which arguments have to be separated from
+the function letters by white space characters.
+.Pp
+Two of the functions take a function-list.
+This is a list of
+.Nm
+functions separated by newlines, as follows:
+.Bd -literal -offset indent
+{ function
+ function
+ ...
+ function
+}
+.Ed
+.Pp
+The
+.Dq {
+can be preceded by white space and can be followed by white space.
+The function can be preceded by white space.
+The terminating
+.Dq }
+must be preceded by a newline or optional white space.
+.Pp
+.Bl -tag -width "XXXXXX" -compact
+.It [2addr] function-list
+Execute function-list only when the pattern space is selected.
+.Pp
+.It [1addr]a\e
+.It text
+Write
+.Em text
+to standard output immediately before each attempt to read a line of input,
+whether by executing the
+.Dq N
+function or by beginning a new cycle.
+.Pp
+.It [2addr]b[label]
+Branch to the
+.Dq \&:
+function with the specified label.
+If the label is not specified, branch to the end of the script.
+.Pp
+.It [2addr]c\e
+.It text
+Delete the pattern space.
+With 0 or 1 address or at the end of a 2-address range,
+.Em text
+is written to the standard output.
+.Pp
+.It [2addr]d
+Delete the pattern space and start the next cycle.
+.Pp
+.It [2addr]D
+Delete the initial segment of the pattern space through the first
+newline character and start the next cycle.
+.Pp
+.It [2addr]g
+Replace the contents of the pattern space with the contents of the
+hold space.
+.Pp
+.It [2addr]G
+Append a newline character followed by the contents of the hold space
+to the pattern space.
+.Pp
+.It [2addr]h
+Replace the contents of the hold space with the contents of the
+pattern space.
+.Pp
+.It [2addr]H
+Append a newline character followed by the contents of the pattern space
+to the hold space.
+.Pp
+.It [1addr]i\e
+.It text
+Write
+.Em text
+to the standard output.
+.Pp
+.It [2addr]l
+(The letter ell.)
+Write the pattern space to the standard output in a visually unambiguous
+form.
+This form is as follows:
+.Pp
+.Bl -tag -width "carriage-returnXX" -offset indent -compact
+.It backslash
+\e\e
+.It alert
+\ea
+.It form-feed
+\ef
+.It carriage-return
+\er
+.It tab
+\et
+.It vertical tab
+\ev
+.El
+.Pp
+Nonprintable characters are written as three-digit octal numbers (with a
+preceding backslash) for each byte in the character (most significant byte
+first).
+Long lines are folded, with the point of folding indicated by displaying
+a backslash followed by a newline.
+The end of each line is marked with a
+.Dq $ .
+.Pp
+.It [2addr]n
+Write the pattern space to the standard output if the default output has
+not been suppressed, and replace the pattern space with the next line of
+input.
+.Pp
+.It [2addr]N
+Append the next line of input to the pattern space, using an embedded
+newline character to separate the appended material from the original
+contents.
+Note that the current line number changes.
+.Pp
+.It [2addr]p
+Write the pattern space to standard output.
+.Pp
+.It [2addr]P
+Write the pattern space, up to the first newline character to the
+standard output.
+.Pp
+.It [1addr]q
+Branch to the end of the script and quit without starting a new cycle.
+.Pp
+.It [1addr]r file
+Copy the contents of
+.Em file
+to the standard output immediately before the next attempt to read a
+line of input.
+If
+.Em file
+cannot be read for any reason, it is silently ignored and no error
+condition is set.
+.Pp
+.It [2addr]s/regular expression/replacement/flags
+Substitute the replacement string for the first instance of the regular
+expression in the pattern space.
+Any character other than backslash or newline can be used instead of
+a slash to delimit the RE and the replacement.
+Within the RE and the replacement, the RE delimiter itself can be used as
+a literal character if it is preceded by a backslash.
+.Pp
+An ampersand
+.Pq Dq &
+appearing in the replacement is replaced by the string matching the RE.
+The special meaning of
+.Dq &
+in this context can be suppressed by preceding it by a backslash.
+The string
+.Dq \e# ,
+where
+.Dq #
+is a digit, is replaced by the text matched
+by the corresponding backreference expression (see
+.Xr re_format 7 ) .
+.Pp
+A line can be split by substituting a newline character into it.
+To specify a newline character in the replacement string, precede it with
+a backslash.
+.Pp
+The value of
+.Em flags
+in the substitute function is zero or more of the following:
+.Bl -tag -width "XXXXXX" -offset indent
+.It Ar N
+Make the substitution only for the
+.Ar N Ns 'th
+occurrence of the regular expression in the pattern space.
+.It g
+Make the substitution for all non-overlapping matches of the
+regular expression, not just the first one.
+.It p
+Write the pattern space to standard output if a replacement was made.
+If the replacement string is identical to that which it replaces, it
+is still considered to have been a replacement.
+.It w Em file
+Append the pattern space to
+.Em file
+if a replacement was made.
+If the replacement string is identical to that which it replaces, it
+is still considered to have been a replacement.
+.It I
+Match the regular expression in a case-insensitive way.
+.El
+.Pp
+.It [2addr]t [label]
+Branch to the
+.Dq \&:
+function bearing the label if any substitutions have been made since the
+most recent reading of an input line or execution of a
+.Dq t
+function.
+If no label is specified, branch to the end of the script.
+.Pp
+.It [2addr]w Em file
+Append the pattern space to the
+.Em file .
+.Pp
+.It [2addr]x
+Swap the contents of the pattern and hold spaces.
+.Pp
+.It [2addr]y/string1/string2/
+Replace all occurrences of characters in
+.Em string1
+in the pattern space with the corresponding characters from
+.Em string2 .
+Any character other than a backslash or newline can be used instead of
+a slash to delimit the strings.
+Within
+.Em string1
+and
+.Em string2 ,
+a backslash followed by any character other than a newline is that literal
+character, and a backslash followed by an ``n'' is replaced by a newline
+character.
+.Pp
+.It [2addr]!function
+.It [2addr]!function-list
+Apply the function or function-list only to the lines that are
+.Em not
+selected by the address(es).
+.Pp
+.It [0addr]:label
+This function does nothing; it bears a label to which the
+.Dq b
+and
+.Dq t
+commands may branch.
+.Pp
+.It [1addr]=
+Write the line number to the standard output followed by a newline
+character.
+.Pp
+.It [0addr]
+Empty lines are ignored.
+.Pp
+.It [0addr]#
+The
+.Dq #
+and the remainder of the line are ignored (treated as a comment), with
+the single exception that if the first two characters in the file are
+.Dq #n ,
+the default output is suppressed.
+This is the same as specifying the
+.Fl n
+option on the command line.
+.El
+.Sh ENVIRONMENT
+The
+.Ev COLUMNS , LANG , LC_ALL , LC_CTYPE
+and
+.Ev LC_COLLATE
+environment variables affect the execution of
+.Nm
+as described in
+.Xr environ 7 .
+.Sh EXIT STATUS
+.Ex -std
+.Sh SEE ALSO
+.Xr awk 1 ,
+.Xr ed 1 ,
+.Xr grep 1 ,
+.Xr regex 3 ,
+.Xr re_format 7
+.Sh STANDARDS
+The
+.Nm
+utility is expected to be a superset of the
+.St -p1003.2
+specification.
+.Pp
+The
+.Fl E , I , a
+and
+.Fl i
+options, the prefixing
+.Dq \&+
+in the second member of an address range,
+as well as the
+.Dq I
+flag to the address regular expression and substitution command are
+non-standard
+.Fx
+extensions and may not be available on other operating systems.
+.Sh HISTORY
+A
+.Nm
+command, written by
+.An L. E. McMahon ,
+appeared in
+.At v7 .
+.Sh AUTHORS
+.An "Diomidis D. Spinellis" Aq dds@FreeBSD.org
+.Sh BUGS
+Multibyte characters containing a byte with value 0x5C
+.Tn ( ASCII
+.Ql \e )
+may be incorrectly treated as line continuation characters in arguments to the
+.Dq a ,
+.Dq c
+and
+.Dq i
+commands.
+Multibyte characters cannot be used as delimiters with the
+.Dq s
+and
+.Dq y
+commands.
diff --git a/usr/src/cmd/sed/sed.txt b/usr/src/cmd/sed/sed.txt
new file mode 100644
index 0000000000..0845895cae
--- /dev/null
+++ b/usr/src/cmd/sed/sed.txt
@@ -0,0 +1,391 @@
+SED(1) BSD General Commands Manual SED(1)
+
+NAME
+ sed -- stream editor
+
+SYNOPSIS
+ sed [-Ealnr] _c_o_m_m_a_n_d [_f_i_l_e _._._.]
+ sed [-Ealnr] [-e _c_o_m_m_a_n_d] [-f _c_o_m_m_a_n_d___f_i_l_e] [-I _e_x_t_e_n_s_i_o_n] [-i _e_x_t_e_n_s_i_o_n]
+ [_f_i_l_e _._._.]
+
+DESCRIPTION
+ The sed utility reads the specified files, or the standard input if no
+ files are specified, modifying the input as specified by a list of com-
+ mands. The input is then written to the standard output.
+
+ A single command may be specified as the first argument to sed. Multiple
+ commands may be specified by using the -e or -f options. All commands
+ are applied to the input in the order they are specified regardless of
+ their origin.
+
+ The following options are available:
+
+ -E Interpret regular expressions as extended (modern) regular
+ expressions rather than basic regular expressions (BRE's). The
+ re_format(7) manual page fully describes both formats.
+
+ -a The files listed as parameters for the ``w'' functions are cre-
+ ated (or truncated) before any processing begins, by default.
+ The -a option causes sed to delay opening each file until a com-
+ mand containing the related ``w'' function is applied to a line
+ of input.
+
+ -e _c_o_m_m_a_n_d
+ Append the editing commands specified by the _c_o_m_m_a_n_d argument to
+ the list of commands.
+
+ -f _c_o_m_m_a_n_d___f_i_l_e
+ Append the editing commands found in the file _c_o_m_m_a_n_d___f_i_l_e to the
+ list of commands. The editing commands should each be listed on
+ a separate line.
+
+ -I _e_x_t_e_n_s_i_o_n
+ Edit files in-place, saving backups with the specified _e_x_t_e_n_s_i_o_n.
+ If a zero-length _e_x_t_e_n_s_i_o_n is given, no backup will be saved. It
+ is not recommended to give a zero-length _e_x_t_e_n_s_i_o_n when in-place
+ editing files, as you risk corruption or partial content in situ-
+ ations where disk space is exhausted, etc.
+
+ Note that in-place editing with -I still takes place in a single
+ continuous line address space covering all files, although each
+ file preserves its individuality instead of forming one output
+ stream. The line counter is never reset between files, address
+ ranges can span file boundaries, and the ``$'' address matches
+ only the last line of the last file. (See _S_e_d _A_d_d_r_e_s_s_e_s.) That
+ can lead to unexpected results in many cases of in-place editing,
+ where using -i is desired.
+
+ -i _e_x_t_e_n_s_i_o_n
+ Edit files in-place similarly to -I, but treat each file indepen-
+ dently from other files. In particular, line numbers in each
+ file start at 1, the ``$'' address matches the last line of the
+ current file, and address ranges are limited to the current file.
+ (See _S_e_d _A_d_d_r_e_s_s_e_s.) The net result is as though each file were
+ edited by a separate sed instance.
+
+ -l Make output line buffered.
+
+ -n By default, each line of input is echoed to the standard output
+ after all of the commands have been applied to it. The -n option
+ suppresses this behavior.
+
+ -r Same as -E for compatibility with GNU sed.
+
+ The form of a sed command is as follows:
+
+ [address[,address]]function[arguments]
+
+ Whitespace may be inserted before the first address and the function por-
+ tions of the command.
+
+ Normally, sed cyclically copies a line of input, not including its termi-
+ nating newline character, into a _p_a_t_t_e_r_n _s_p_a_c_e, (unless there is some-
+ thing left after a ``D'' function), applies all of the commands with
+ addresses that select that pattern space, copies the pattern space to the
+ standard output, appending a newline, and deletes the pattern space.
+
+ Some of the functions use a _h_o_l_d _s_p_a_c_e to save all or part of the pattern
+ space for subsequent retrieval.
+
+Sed Addresses
+ An address is not required, but if specified must have one of the follow-
+ ing formats:
+
+ +o a number that counts input lines cumulatively across input
+ files (or in each file independently if a -i option is in
+ effect);
+
+ +o a dollar (``$'') character that addresses the last line of
+ input (or the last line of the current file if a -i option was
+ specified);
+
+ +o a context address that consists of a regular expression pre-
+ ceded and followed by a delimiter. The closing delimiter can
+ also optionally be followed by the ``I'' character, to indicate
+ that the regular expression is to be matched in a case-insensi-
+ tive way.
+
+ A command line with no addresses selects every pattern space.
+
+ A command line with one address selects all of the pattern spaces that
+ match the address.
+
+ A command line with two addresses selects an inclusive range. This range
+ starts with the first pattern space that matches the first address. The
+ end of the range is the next following pattern space that matches the
+ second address. If the second address is a number less than or equal to
+ the line number first selected, only that line is selected. The number
+ in the second address may be prefixed with a (``+'') to specify the num-
+ ber of lines to match after the first pattern. In the case when the sec-
+ ond address is a context address, sed does not re-match the second
+ address against the pattern space that matched the first address. Start-
+ ing at the first line following the selected range, sed starts looking
+ again for the first address.
+
+ Editing commands can be applied to non-selected pattern spaces by use of
+ the exclamation character (``!'') function.
+
+Sed Regular Expressions
+ The regular expressions used in sed, by default, are basic regular
+ expressions (BREs, see re_format(7) for more information), but extended
+ (modern) regular expressions can be used instead if the -E flag is given.
+ In addition, sed has the following two additions to regular expressions:
+
+ 1. In a context address, any character other than a backslash (``\'')
+ or newline character may be used to delimit the regular expression.
+ The opening delimiter needs to be preceded by a backslash unless it
+ is a slash. For example, the context address \xabcx is equivalent
+ to /abc/. Also, putting a backslash character before the delimiting
+ character within the regular expression causes the character to be
+ treated literally. For example, in the context address \xabc\xdefx,
+ the RE delimiter is an ``x'' and the second ``x'' stands for itself,
+ so that the regular expression is ``abcxdef''.
+
+ 2. The escape sequence \n matches a newline character embedded in the
+ pattern space. You cannot, however, use a literal newline character
+ in an address or in the substitute command.
+
+ One special feature of sed regular expressions is that they can default
+ to the last regular expression used. If a regular expression is empty,
+ i.e., just the delimiter characters are specified, the last regular
+ expression encountered is used instead. The last regular expression is
+ defined as the last regular expression used as part of an address or sub-
+ stitute command, and at run-time, not compile-time. For example, the
+ command ``/abc/s//XXX/'' will substitute ``XXX'' for the pattern ``abc''.
+
+Sed Functions
+ In the following list of commands, the maximum number of permissible
+ addresses for each command is indicated by [0addr], [1addr], or [2addr],
+ representing zero, one, or two addresses.
+
+ The argument _t_e_x_t consists of one or more lines. To embed a newline in
+ the text, precede it with a backslash. Other backslashes in text are
+ deleted and the following character taken literally.
+
+ The ``r'' and ``w'' functions take an optional file parameter, which
+ should be separated from the function letter by white space. Each file
+ given as an argument to sed is created (or its contents truncated) before
+ any input processing begins.
+
+ The ``b'', ``r'', ``s'', ``t'', ``w'', ``y'', ``!'', and ``:'' functions
+ all accept additional arguments. The following synopses indicate which
+ arguments have to be separated from the function letters by white space
+ characters.
+
+ Two of the functions take a function-list. This is a list of sed func-
+ tions separated by newlines, as follows:
+
+ { function
+ function
+ ...
+ function
+ }
+
+ The ``{'' can be preceded by white space and can be followed by white
+ space. The function can be preceded by white space. The terminating
+ ``}'' must be preceded by a newline or optional white space.
+
+ [2addr] function-list
+ Execute function-list only when the pattern space is selected.
+
+ [1addr]a\
+ text Write _t_e_x_t to standard output immediately before each attempt to
+ read a line of input, whether by executing the ``N'' function or
+ by beginning a new cycle.
+
+ [2addr]b[label]
+ Branch to the ``:'' function with the specified label. If the
+ label is not specified, branch to the end of the script.
+
+ [2addr]c\
+ text Delete the pattern space. With 0 or 1 address or at the end of a
+ 2-address range, _t_e_x_t is written to the standard output.
+
+ [2addr]d
+ Delete the pattern space and start the next cycle.
+
+ [2addr]D
+ Delete the initial segment of the pattern space through the first
+ newline character and start the next cycle.
+
+ [2addr]g
+ Replace the contents of the pattern space with the contents of
+ the hold space.
+
+ [2addr]G
+ Append a newline character followed by the contents of the hold
+ space to the pattern space.
+
+ [2addr]h
+ Replace the contents of the hold space with the contents of the
+ pattern space.
+
+ [2addr]H
+ Append a newline character followed by the contents of the pat-
+ tern space to the hold space.
+
+ [1addr]i\
+ text Write _t_e_x_t to the standard output.
+
+ [2addr]l
+ (The letter ell.) Write the pattern space to the standard output
+ in a visually unambiguous form. This form is as follows:
+
+ backslash \\
+ alert \a
+ form-feed \f
+ carriage-return \r
+ tab \t
+ vertical tab \v
+
+ Nonprintable characters are written as three-digit octal numbers
+ (with a preceding backslash) for each byte in the character (most
+ significant byte first). Long lines are folded, with the point
+ of folding indicated by displaying a backslash followed by a new-
+ line. The end of each line is marked with a ``$''.
+
+ [2addr]n
+ Write the pattern space to the standard output if the default
+ output has not been suppressed, and replace the pattern space
+ with the next line of input.
+
+ [2addr]N
+ Append the next line of input to the pattern space, using an
+ embedded newline character to separate the appended material from
+ the original contents. Note that the current line number
+ changes.
+
+ [2addr]p
+ Write the pattern space to standard output.
+
+ [2addr]P
+ Write the pattern space, up to the first newline character to the
+ standard output.
+
+ [1addr]q
+ Branch to the end of the script and quit without starting a new
+ cycle.
+
+ [1addr]r file
+ Copy the contents of _f_i_l_e to the standard output immediately
+ before the next attempt to read a line of input. If _f_i_l_e cannot
+ be read for any reason, it is silently ignored and no error con-
+ dition is set.
+
+ [2addr]s/regular expression/replacement/flags
+ Substitute the replacement string for the first instance of the
+ regular expression in the pattern space. Any character other
+ than backslash or newline can be used instead of a slash to
+ delimit the RE and the replacement. Within the RE and the
+ replacement, the RE delimiter itself can be used as a literal
+ character if it is preceded by a backslash.
+
+ An ampersand (``&'') appearing in the replacement is replaced by
+ the string matching the RE. The special meaning of ``&'' in this
+ context can be suppressed by preceding it by a backslash. The
+ string ``\#'', where ``#'' is a digit, is replaced by the text
+ matched by the corresponding backreference expression (see
+ re_format(7)).
+
+ A line can be split by substituting a newline character into it.
+ To specify a newline character in the replacement string, precede
+ it with a backslash.
+
+ The value of _f_l_a_g_s in the substitute function is zero or more of
+ the following:
+
+ _N Make the substitution only for the _N'th occurrence
+ of the regular expression in the pattern space.
+
+ g Make the substitution for all non-overlapping
+ matches of the regular expression, not just the
+ first one.
+
+ p Write the pattern space to standard output if a
+ replacement was made. If the replacement string is
+ identical to that which it replaces, it is still
+ considered to have been a replacement.
+
+ w _f_i_l_e Append the pattern space to _f_i_l_e if a replacement
+ was made. If the replacement string is identical
+ to that which it replaces, it is still considered
+ to have been a replacement.
+
+ I Match the regular expression in a case-insensitive
+ way.
+
+ [2addr]t [label]
+ Branch to the ``:'' function bearing the label if any substitu-
+ tions have been made since the most recent reading of an input
+ line or execution of a ``t'' function. If no label is specified,
+ branch to the end of the script.
+
+ [2addr]w _f_i_l_e
+ Append the pattern space to the _f_i_l_e.
+
+ [2addr]x
+ Swap the contents of the pattern and hold spaces.
+
+ [2addr]y/string1/string2/
+ Replace all occurrences of characters in _s_t_r_i_n_g_1 in the pattern
+ space with the corresponding characters from _s_t_r_i_n_g_2. Any char-
+ acter other than a backslash or newline can be used instead of a
+ slash to delimit the strings. Within _s_t_r_i_n_g_1 and _s_t_r_i_n_g_2, a
+ backslash followed by any character other than a newline is that
+ literal character, and a backslash followed by an ``n'' is
+ replaced by a newline character.
+
+ [2addr]!function
+ [2addr]!function-list
+ Apply the function or function-list only to the lines that are
+ _n_o_t selected by the address(es).
+
+ [0addr]:label
+ This function does nothing; it bears a label to which the ``b''
+ and ``t'' commands may branch.
+
+ [1addr]=
+ Write the line number to the standard output followed by a new-
+ line character.
+
+ [0addr]
+ Empty lines are ignored.
+
+ [0addr]#
+ The ``#'' and the remainder of the line are ignored (treated as a
+ comment), with the single exception that if the first two charac-
+ ters in the file are ``#n'', the default output is suppressed.
+ This is the same as specifying the -n option on the command line.
+
+ENVIRONMENT
+ The COLUMNS, LANG, LC_ALL, LC_CTYPE and LC_COLLATE environment variables
+ affect the execution of sed as described in environ(7).
+
+EXIT STATUS
+ The sed utility exits 0 on success, and >0 if an error occurs.
+
+SEE ALSO
+ awk(1), ed(1), grep(1), regex(3), re_format(7)
+
+STANDARDS
+ The sed utility is expected to be a superset of the IEEE Std 1003.2
+ (``POSIX.2'') specification.
+
+ The -E, -I, -a and -i options, the prefixing ``+'' in the second member
+ of an address range, as well as the ``I'' flag to the address regular
+ expression and substitution command are non-standard FreeBSD extensions
+ and may not be available on other operating systems.
+
+HISTORY
+ A sed command, written by L. E. McMahon, appeared in Version 7 AT&T UNIX.
+
+AUTHORS
+ Diomidis D. Spinellis <dds@FreeBSD.org>
+
+BUGS
+ Multibyte characters containing a byte with value 0x5C (ASCII `\') may be
+ incorrectly treated as line continuation characters in arguments to the
+ ``a'', ``c'' and ``i'' commands. Multibyte characters cannot be used as
+ delimiters with the ``s'' and ``y'' commands.
+
+BSD May 24, 2009 BSD
diff --git a/usr/src/head/regex.h b/usr/src/head/regex.h
index d664b67e71..7fc31e25e1 100644
--- a/usr/src/head/regex.h
+++ b/usr/src/head/regex.h
@@ -88,6 +88,7 @@ typedef ssize_t regoff_t;
#define REG_DUMP 0x2000
#define REG_PEND 0x4000
#define REG_NOSPEC 0x8000
+#define REG_STARTEND 0x10000
/* internal flags */
#define REG_MUST 0x100 /* check for regmust substring */
diff --git a/usr/src/lib/libc/port/locale/engine.c b/usr/src/lib/libc/port/locale/engine.c
index 80f2fc89dc..b189ba6bc5 100644
--- a/usr/src/lib/libc/port/locale/engine.c
+++ b/usr/src/lib/libc/port/locale/engine.c
@@ -172,7 +172,7 @@ matcher(struct re_guts *g,
/* simplify the situation where possible */
if (g->cflags&REG_NOSUB)
nmatch = 0;
-#ifdef REG_STARTEND
+
if (eflags&REG_STARTEND) {
start = string + pmatch[0].rm_so;
stop = string + pmatch[0].rm_eo;
@@ -180,10 +180,7 @@ matcher(struct re_guts *g,
start = string;
stop = start + strlen(start);
}
-#else
- start = string;
- stop = start + strlen(start);
-#endif
+
if (stop < start)
return (REG_EFATAL);
diff --git a/usr/src/lib/libc/port/locale/regcomp.c b/usr/src/lib/libc/port/locale/regcomp.c
index b43fbfd599..33ca5511e4 100644
--- a/usr/src/lib/libc/port/locale/regcomp.c
+++ b/usr/src/lib/libc/port/locale/regcomp.c
@@ -392,7 +392,17 @@ p_ere_exp(struct parse *p)
case '\\':
(void) REQUIRE(MORE(), REG_EESCAPE);
wc = WGETNEXT();
- ordinary(p, wc);
+ switch (wc) {
+ case '<':
+ EMIT(OBOW, 0);
+ break;
+ case '>':
+ EMIT(OEOW, 0);
+ break;
+ default:
+ ordinary(p, wc);
+ break;
+ }
break;
case '{': /* okay as ordinary except if digit follows */
(void) REQUIRE(!MORE() || !isdigit((uch)PEEK()), REG_BADRPT);
@@ -545,6 +555,12 @@ p_simp_re(struct parse *p,
case '[':
p_bracket(p);
break;
+ case BACKSL|'<':
+ EMIT(OBOW, 0);
+ break;
+ case BACKSL|'>':
+ EMIT(OEOW, 0);
+ break;
case BACKSL|'{':
SETERROR(REG_BADRPT);
break;
diff --git a/usr/src/pkg/manifests/SUNWcs.mf b/usr/src/pkg/manifests/SUNWcs.mf
index 4b1ca1a77f..e83186f9a6 100644
--- a/usr/src/pkg/manifests/SUNWcs.mf
+++ b/usr/src/pkg/manifests/SUNWcs.mf
@@ -2560,6 +2560,8 @@ license usr/src/cmd/mt/THIRDPARTYLICENSE \
license=usr/src/cmd/mt/THIRDPARTYLICENSE
license usr/src/cmd/script/THIRDPARTYLICENSE \
license=usr/src/cmd/script/THIRDPARTYLICENSE
+license usr/src/cmd/sed/THIRDPARTYLICENSE \
+ license=usr/src/cmd/sed/THIRDPARTYLICENSE
license usr/src/cmd/stat/vmstat/THIRDPARTYLICENSE \
license=usr/src/cmd/stat/vmstat/THIRDPARTYLICENSE
license usr/src/cmd/tail/THIRDPARTYLICENSE \
diff --git a/usr/src/pkg/manifests/system-xopen-xcu4.mf b/usr/src/pkg/manifests/system-xopen-xcu4.mf
index f853c370cb..fa942c9901 100644
--- a/usr/src/pkg/manifests/system-xopen-xcu4.mf
+++ b/usr/src/pkg/manifests/system-xopen-xcu4.mf
@@ -65,7 +65,6 @@ file path=usr/xpg4/bin/nm mode=0555
file path=usr/xpg4/bin/nohup mode=0555
file path=usr/xpg4/bin/pr mode=0555
file path=usr/xpg4/bin/rm mode=0555
-file path=usr/xpg4/bin/sed mode=0555
file path=usr/xpg4/bin/sort mode=0555
file path=usr/xpg4/bin/stty mode=0555
file path=usr/xpg4/bin/who mode=0555
@@ -99,6 +98,7 @@ link path=usr/xpg4/bin/kill target=../../bin/alias
link path=usr/xpg4/bin/od target=../../bin/od
link path=usr/xpg4/bin/pfsh target=../../bin/pfexec
link path=usr/xpg4/bin/read target=../../bin/alias
+link path=usr/xpg4/bin/sed target=../../bin/sed
link path=usr/xpg4/bin/sh target=../../bin/ksh93
link path=usr/xpg4/bin/tail target=../../bin/tail
link path=usr/xpg4/bin/test target=../../bin/alias