summaryrefslogtreecommitdiff
path: root/usr/src/cmd/grep/grep.c
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src/cmd/grep/grep.c')
-rw-r--r--usr/src/cmd/grep/grep.c408
1 files changed, 174 insertions, 234 deletions
diff --git a/usr/src/cmd/grep/grep.c b/usr/src/cmd/grep/grep.c
index 8848d84724..9c738feb1b 100644
--- a/usr/src/cmd/grep/grep.c
+++ b/usr/src/cmd/grep/grep.c
@@ -78,7 +78,6 @@ static int bmgtab[M_CSETSIZE]; /* BMG delta1 table */
typedef struct _PATTERN {
char *pattern; /* original pattern */
- wchar_t *wpattern; /* wide, lowercased pattern */
struct _PATTERN *next;
regex_t re; /* compiled pattern */
} PATTERN;
@@ -109,15 +108,15 @@ static uchar_t Fflag; /* Fgrep or -F flag */
static uchar_t Rflag; /* Like rflag, but follow symlinks */
static uchar_t outfn; /* Put out file name */
static uchar_t conflag; /* show context of matches */
+static uchar_t oflag; /* Print only matching output */
static char *cmdname;
static char *stdin_label; /* Optional lable for stdin */
-static int use_wchar, use_bmg, mblocale;
+static int use_bmg, mblocale;
-static size_t outbuflen, prntbuflen, conbuflen;
+static size_t prntbuflen, conbuflen;
static unsigned long conalen, conblen, conmatches;
static char *prntbuf, *conbuf;
-static wchar_t *outline;
static void addfile(const char *fn);
static void addpattern(char *s);
@@ -184,6 +183,7 @@ main(int argc, char **argv)
} else {
if (*ap == 'f' || *ap == 'F') {
fgrep++;
+ regflags |= REG_NOSPEC;
}
}
@@ -219,7 +219,7 @@ main(int argc, char **argv)
}
}
- while ((c = getopt_long(argc, argv, "+vwchHilLnrbse:f:qxEFIRA:B:C:",
+ while ((c = getopt_long(argc, argv, "+vwchHilLnrbse:f:qxEFIRA:B:C:o",
grep_options, NULL)) != EOF) {
unsigned long tval;
switch (c) {
@@ -322,6 +322,7 @@ main(int argc, char **argv)
case 'F': /* POSIX: strings, not RE's */
Fflag++;
+ regflags |= REG_NOSPEC;
break;
case 'R': /* Solaris: like rflag, but follow symlinks */
@@ -384,6 +385,10 @@ main(int argc, char **argv)
stdin_label = optarg;
break;
+ case 'o':
+ oflag++;
+ break;
+
default:
usage();
}
@@ -431,19 +436,45 @@ main(int argc, char **argv)
usage();
/*
- * -l or -L overrides -H like in GNU grep
+ * -l or -L overrides -H like in GNU grep. It also overrides -o.
*/
- if (lflag || Lflag)
+ if (lflag || Lflag) {
Hflag = 0;
+ oflag = 0;
+ }
/*
* -c, -l and -q flags are mutually exclusive
* We have -c override -l like in Solaris.
* -q overrides -l & -c programmatically in grep() function.
+ * -c overrides -o in GNU grep, we honor that.
*/
- if (cflag && (lflag || Lflag)) {
+ if (cflag) {
lflag = 0;
Lflag = 0;
+ oflag = 0;
+ }
+
+ /*
+ * If -o is set then we ignore all context related options, like other
+ * greps.
+ */
+ if (oflag) {
+ conflag = 0;
+ }
+
+ /*
+ * These flags are a semantic mess with no clear answers as to their
+ * behvaior. Based on some experimentation GNU grep will exit zero if a
+ * non-match is present, but never print anything. BSD grep seems to
+ * exit 1 and not print anything, even if there would have been a match.
+ * Also, you probably don't want to ask about what happens with grep -x
+ * -o -v, some implementations seem to just ignore -v.
+ */
+ if (oflag && !nvflag) {
+ (void) fprintf(stderr, gettext("%s: the combination of -v and "
+ "-o is not supported currently\n"), argv[0]);
+ exit(2);
}
argv += optind - 1;
@@ -477,14 +508,6 @@ main(int argc, char **argv)
}
/*
- * If -x flag is not specified or -i flag is specified
- * with fgrep in a multibyte locale, need to use
- * the wide character APIs. Otherwise, byte-oriented
- * process will be done.
- */
- use_wchar = Fflag && mblocale && (!xflag || iflag);
-
- /*
* Compile Patterns and also decide if BMG can be used
*/
fixpatterns();
@@ -740,15 +763,36 @@ static void
fixpatterns(void)
{
PATTERN *pp;
- int rv, fix_pattern, npatterns;
+ int rv, fix_pattern;
+
+ /*
+ * Decide if we are able to run the Boyer-Moore-Gosper algorithm.
+ * Use the Boyer-Moore-Gosper algorithm if:
+ * - fgrep (Fflag)
+ * - singlebyte locale (!mblocale)
+ * - no ignoring case (!iflag)
+ * - no printing line numbers (!nflag)
+ * - no negating the output (nvflag)
+ * - only one pattern (patterns != NULL && patterns->next ==
+ * NULL)
+ * - non zero length pattern (strlen(patterns->pattern) != 0)
+ * - no context required (conflag == 0)
+ * - no exact matches (!oflag)
+ */
+ use_bmg = Fflag && !mblocale && !iflag && !nflag && nvflag && !oflag &&
+ (patterns != NULL && patterns->next == NULL) &&
+ (strlen(patterns->pattern) != 0) && conflag == 0;
+
+ if (use_bmg) {
+ return;
+ }
/*
* Fix the specified pattern if -x is specified.
*/
fix_pattern = !Fflag && xflag;
- for (npatterns = 0, pp = patterns; pp != NULL; pp = pp->next) {
- npatterns++;
+ for (pp = patterns; pp != NULL; pp = pp->next) {
if (fix_pattern) {
char *cp, *cq;
size_t plen, nplen;
@@ -771,61 +815,9 @@ fixpatterns(void)
pp->pattern = cp;
}
- if (Fflag) {
- if (use_wchar) {
- /*
- * Fflag && mblocale && iflag
- * Fflag && mblocale && !xflag
- */
- size_t n;
- n = strlen(pp->pattern) + 1;
- if ((pp->wpattern =
- malloc(sizeof (wchar_t) * n)) == NULL) {
- (void) fprintf(stderr,
- gettext("%s: out of memory\n"),
- cmdname);
- exit(2);
- }
- if (mbstowcs(pp->wpattern, pp->pattern, n) ==
- (size_t)-1) {
- (void) fprintf(stderr,
- gettext("%s: failed to convert "
- "\"%s\" to wide-characters\n"),
- cmdname, pp->pattern);
- exit(2);
- }
- if (iflag) {
- wchar_t *wp;
- for (wp = pp->wpattern; *wp != L'\0';
- wp++) {
- *wp = towlower((wint_t)*wp);
- }
- }
- free(pp->pattern);
- } else {
- /*
- * Fflag && mblocale && !iflag
- * Fflag && !mblocale && iflag
- * Fflag && !mblocale && !iflag
- */
- if (iflag) {
- unsigned char *cp;
- for (cp = (unsigned char *)pp->pattern;
- *cp != '\0'; cp++) {
- *cp = tolower(*cp);
- }
- }
- }
- /*
- * fgrep: No regular expressions.
- */
- continue;
- }
-
/*
- * For non-fgrep, compile the regular expression,
- * give an informative error message, and exit if
- * it didn't compile.
+ * Compile the regular expression, give an informative error
+ * message, and exit if it didn't compile.
*/
if ((rv = regcomp(&pp->re, pp->pattern, regflags)) != 0) {
(void) regerror(rv, &pp->re, errstr, sizeof (errstr));
@@ -836,25 +828,6 @@ fixpatterns(void)
}
free(pp->pattern);
}
-
- /*
- * Decide if we are able to run the Boyer-Moore-Gosper algorithm.
- * Use the Boyer-Moore-Gosper algorithm if:
- * - fgrep (Fflag)
- * - singlebyte locale (!mblocale)
- * - no ignoring case (!iflag)
- * - no printing line numbers (!nflag)
- * - no negating the output (nvflag)
- * - only one pattern (npatterns == 1)
- * - non zero length pattern (strlen(patterns->pattern) != 0)
- * - no context required (conflag == 0)
- *
- * It's guaranteed patterns->pattern is still alive
- * when Fflag && !mblocale.
- */
- use_bmg = Fflag && !mblocale && !iflag && !nflag && nvflag &&
- (npatterns == 1) && (strlen(patterns->pattern) != 0) &&
- conflag == 0;
}
/*
@@ -887,36 +860,6 @@ rfind_nl(const char *ptr, size_t len)
}
/*
- * Duplicate the specified string converting each character
- * into a lower case.
- */
-static char *
-istrdup(const char *s1)
-{
- static size_t ibuflen = 0;
- static char *ibuf = NULL;
- size_t slen;
- char *p;
-
- slen = strlen(s1);
- if (slen >= ibuflen) {
- /* ibuf does not fit to s1 */
- ibuflen = slen + 1;
- ibuf = realloc(ibuf, ibuflen);
- if (ibuf == NULL) {
- (void) fprintf(stderr,
- gettext("%s: out of memory\n"), cmdname);
- exit(2);
- }
- }
- p = ibuf;
- do {
- *p++ = tolower(*s1);
- } while (*s1++ != '\0');
- return (ibuf);
-}
-
-/*
* Do grep on a single file.
* Return true in any lines matched.
*
@@ -947,6 +890,7 @@ grep(int fd, const char *fn)
int conaprnt = 0, conbprnt = 0, lastmatch = 0;
boolean_t nearmatch; /* w/in N+1 of last match */
boolean_t havematch = B_FALSE; /* have a match in context */
+ boolean_t sameline = B_FALSE; /* Are we still on the same line? */
size_t prntlen;
if (patterns == NULL)
@@ -958,16 +902,6 @@ grep(int fd, const char *fn)
bmgcomp(pp->pattern, strlen(pp->pattern));
}
- if (use_wchar && outline == NULL) {
- outbuflen = BUFSIZE + 1;
- outline = malloc(sizeof (wchar_t) * outbuflen);
- if (outline == NULL) {
- (void) fprintf(stderr, gettext("%s: out of memory\n"),
- cmdname);
- exit(2);
- }
- }
-
if (prntbuf == NULL) {
prntbuflen = BUFSIZE;
if ((prntbuf = malloc(prntbuflen + 1)) == NULL) {
@@ -996,6 +930,8 @@ grep(int fd, const char *fn)
long count;
off_t offset = 0;
char separate;
+ char *startmatch = NULL; /* -o, start of match */
+ char *postmatch = NULL; /* -o, character after match */
boolean_t last_ctx = B_FALSE, eof = B_FALSE;
if (data_len == 0) {
@@ -1167,7 +1103,18 @@ L_start_process:
}
goto L_next_line;
}
- lineno++;
+
+ /*
+ * When using -o, we might actually loop around while still on
+ * the same line. In such a case, we need to make sure we don't
+ * increment the line number.
+ */
+ if (!sameline) {
+ lineno++;
+ } else {
+ sameline = B_FALSE;
+ }
+
/*
* Line starts from ptr and ends at ptrend.
* line_len will be the length of the line.
@@ -1179,108 +1126,59 @@ L_start_process:
* From now, the process will be performed based
* on the line from ptr to ptrend.
*/
- if (use_wchar) {
- size_t len;
-
- if (line_len >= outbuflen) {
- outbuflen = line_len + 1;
- outline = realloc(outline,
- sizeof (wchar_t) * outbuflen);
- if (outline == NULL) {
- (void) fprintf(stderr,
- gettext("%s: out of memory\n"),
- cmdname);
- exit(2);
- }
- }
+ for (pp = patterns; pp; pp = pp->next) {
+ int rv;
+ regmatch_t rm;
+ size_t nmatch = 0;
- len = mbstowcs(outline, ptr, line_len);
- if (len == (size_t)-1) {
- (void) fprintf(stderr, gettext(
- "%s: input file \"%s\": line %lld: invalid multibyte character\n"),
- cmdname, fn, lineno);
- /* never match a line with invalid sequence */
- goto L_skip_line;
- }
- outline[len] = L'\0';
+ /*
+ * The current implementation of regexec has a higher
+ * cost when you ask for match information. As a result,
+ * we only ask for a match when we know that we need it
+ * specifically. This is always needed for -o because we
+ * rely on it to tell us what we matched. For fgrep -x
+ * we need it so we can determine whether we matched the
+ * entire line.
+ */
+ if (oflag || (Fflag && xflag))
+ nmatch = 1;
- if (iflag) {
- wchar_t *cp;
- for (cp = outline; *cp != '\0'; cp++) {
- *cp = towlower((wint_t)*cp);
+ rv = regexec(&pp->re, ptr, nmatch, &rm, 0);
+ if (rv == REG_OK) {
+ /*
+ * fgrep in this form cannot insert the
+ * metacharacters to verify whether or not we
+ * were the entire line. As a result, we check
+ * the pattern length against the line length.
+ */
+ if (Fflag && xflag &&
+ line_len != rm.rm_eo - rm.rm_so) {
+ continue;
}
- }
- if (xflag) {
- for (pp = patterns; pp; pp = pp->next) {
- if (outline[0] == pp->wpattern[0] &&
- wcscmp(outline,
- pp->wpattern) == 0) {
- /* matched */
- break;
- }
- }
- } else {
- for (pp = patterns; pp; pp = pp->next) {
- if (wcswcs(outline, pp->wpattern)
- != NULL) {
- /* matched */
- break;
- }
- }
- }
- } else if (Fflag) {
- /* fgrep in byte-oriented handling */
- char *fptr;
- if (iflag) {
- fptr = istrdup(ptr);
- } else {
- fptr = ptr;
- }
- if (xflag) {
- /* fgrep -x */
- for (pp = patterns; pp; pp = pp->next) {
- if (fptr[0] == pp->pattern[0] &&
- strcmp(fptr, pp->pattern) == 0) {
- /* matched */
- break;
- }
- }
- } else {
- for (pp = patterns; pp; pp = pp->next) {
- if (strstr(fptr, pp->pattern) != NULL) {
- /* matched */
- break;
- }
+ /* matched */
+ if (oflag) {
+ startmatch = ptr + rm.rm_so;
+ postmatch = ptr + rm.rm_eo;
}
+ break;
}
- } else {
- /* grep or egrep */
- for (pp = patterns; pp; pp = pp->next) {
- int rv;
-
- rv = regexec(&pp->re, ptr, 0, NULL, 0);
- if (rv == REG_OK) {
- /* matched */
- break;
- }
- switch (rv) {
- case REG_NOMATCH:
- break;
- case REG_ECHAR:
- (void) fprintf(stderr, gettext(
+ switch (rv) {
+ case REG_NOMATCH:
+ break;
+ case REG_ECHAR:
+ (void) fprintf(stderr, gettext(
"%s: input file \"%s\": line %lld: invalid multibyte character\n"),
- cmdname, fn, lineno);
- break;
- default:
- (void) regerror(rv, &pp->re, errstr,
- sizeof (errstr));
- (void) fprintf(stderr, gettext(
+ cmdname, fn, lineno);
+ break;
+ default:
+ (void) regerror(rv, &pp->re, errstr,
+ sizeof (errstr));
+ (void) fprintf(stderr, gettext(
"%s: input file \"%s\": line %lld: %s\n"),
- cmdname, fn, lineno, errstr);
- exit(2);
- }
+ cmdname, fn, lineno, errstr);
+ exit(2);
}
}
@@ -1406,8 +1304,21 @@ L_next_line:
*/
if (!last_ctx && nvflag == (pp != NULL)) {
matches++;
- if (!nextend)
- matchptr = (conflag != 0) ? conptrend : ptrend;
+ if (!nextend) {
+ if (conflag != 0) {
+ matchptr = conptrend;
+ } else if (oflag) {
+ matchptr = postmatch - 1;
+ } else {
+ matchptr = ptrend;
+ }
+ }
+ }
+
+ if (pp != NULL && oflag && postmatch == NULL) {
+ (void) fprintf(stderr, gettext("%s: internal error, "
+ "-o set, but failed to find postmatch\n"), cmdname);
+ abort();
}
/*
@@ -1434,10 +1345,17 @@ L_next_line:
(void) fwrite("--\n", 1, 3, stdout);
} else if (conflag == 0 && nvflag == (pp != NULL)) {
*ptrend = '\n';
+ if (oflag) {
+ prntptr = startmatch;
+ } else {
+ prntptr = ptr;
+ }
prntlen = line_len + 1;
- prntptr = ptr;
linenum = lineno;
blkoffset = line_offset;
+ if (oflag) {
+ blkoffset += startmatch - ptr;
+ }
} else if (eof) {
/* No match and no more data */
goto out;
@@ -1446,10 +1364,13 @@ L_next_line:
goto L_skip_line;
}
- prntptrend = prntptr - 1;
- while ((prntptrend = find_nl(prntptrend + 1,
+ if (oflag) {
+ prntptrend = postmatch - 1;
+ } else {
+ prntptrend = prntptr - 1;
+ }
+ while (oflag || (prntptrend = find_nl(prntptrend + 1,
prntlen)) != NULL) {
-
/*
* GNU grep uses '-' for context lines and ':' for
* matching lines, so replicate that here.
@@ -1501,10 +1422,23 @@ L_next_line:
}
(void) fwrite(prntptr, 1,
prntptrend - prntptr + 1, stdout);
+
+ if (oflag) {
+ (void) fputc('\n', stdout);
+ }
}
if (ferror(stdout)) {
return (0);
}
+
+ /*
+ * With -o we'll only ever take this loop once. Manually
+ * break out.
+ */
+ if (oflag) {
+ goto L_skip_line;
+ }
+
linenum++;
prntlen -= prntptrend - prntptr + 1;
blkoffset += prntptrend - prntptr + 1;
@@ -1545,9 +1479,15 @@ L_skip_line:
if (!newlinep)
break;
+ if (oflag && postmatch != NULL) {
+ line_len = postmatch - 1 - ptr;
+ ptr = postmatch;
+ sameline = B_TRUE;
+ } else {
+ ptr = ptrend + 1;
+ }
data_len -= line_len + 1;
line_offset += line_len + 1;
- ptr = ptrend + 1;
}
out:
@@ -1585,7 +1525,7 @@ usage(void)
(void) fprintf(stderr, gettext("usage: %5s"), cmdname);
if (!egrep && !fgrep)
(void) fprintf(stderr, gettext(" [-E|-F]"));
- (void) fprintf(stderr, gettext(" [-bchHilLnqrRsvx] [-A num] [-B num] "
+ (void) fprintf(stderr, gettext(" [-bchHilLnoqrRsvx] [-A num] [-B num] "
"[-C num|-num]\n [--label=name] [-e pattern_list]... "
"[-f pattern_file]...\n [pattern_list] [file]...\n"));
exit(2);