diff options
Diffstat (limited to 'usr/src/cmd/grep/grep.c')
-rw-r--r-- | usr/src/cmd/grep/grep.c | 408 |
1 files changed, 174 insertions, 234 deletions
diff --git a/usr/src/cmd/grep/grep.c b/usr/src/cmd/grep/grep.c index 8848d84724..9c738feb1b 100644 --- a/usr/src/cmd/grep/grep.c +++ b/usr/src/cmd/grep/grep.c @@ -78,7 +78,6 @@ static int bmgtab[M_CSETSIZE]; /* BMG delta1 table */ typedef struct _PATTERN { char *pattern; /* original pattern */ - wchar_t *wpattern; /* wide, lowercased pattern */ struct _PATTERN *next; regex_t re; /* compiled pattern */ } PATTERN; @@ -109,15 +108,15 @@ static uchar_t Fflag; /* Fgrep or -F flag */ static uchar_t Rflag; /* Like rflag, but follow symlinks */ static uchar_t outfn; /* Put out file name */ static uchar_t conflag; /* show context of matches */ +static uchar_t oflag; /* Print only matching output */ static char *cmdname; static char *stdin_label; /* Optional lable for stdin */ -static int use_wchar, use_bmg, mblocale; +static int use_bmg, mblocale; -static size_t outbuflen, prntbuflen, conbuflen; +static size_t prntbuflen, conbuflen; static unsigned long conalen, conblen, conmatches; static char *prntbuf, *conbuf; -static wchar_t *outline; static void addfile(const char *fn); static void addpattern(char *s); @@ -184,6 +183,7 @@ main(int argc, char **argv) } else { if (*ap == 'f' || *ap == 'F') { fgrep++; + regflags |= REG_NOSPEC; } } @@ -219,7 +219,7 @@ main(int argc, char **argv) } } - while ((c = getopt_long(argc, argv, "+vwchHilLnrbse:f:qxEFIRA:B:C:", + while ((c = getopt_long(argc, argv, "+vwchHilLnrbse:f:qxEFIRA:B:C:o", grep_options, NULL)) != EOF) { unsigned long tval; switch (c) { @@ -322,6 +322,7 @@ main(int argc, char **argv) case 'F': /* POSIX: strings, not RE's */ Fflag++; + regflags |= REG_NOSPEC; break; case 'R': /* Solaris: like rflag, but follow symlinks */ @@ -384,6 +385,10 @@ main(int argc, char **argv) stdin_label = optarg; break; + case 'o': + oflag++; + break; + default: usage(); } @@ -431,19 +436,45 @@ main(int argc, char **argv) usage(); /* - * -l or -L overrides -H like in GNU grep + * -l or -L overrides -H like in GNU grep. It also overrides -o. */ - if (lflag || Lflag) + if (lflag || Lflag) { Hflag = 0; + oflag = 0; + } /* * -c, -l and -q flags are mutually exclusive * We have -c override -l like in Solaris. * -q overrides -l & -c programmatically in grep() function. + * -c overrides -o in GNU grep, we honor that. */ - if (cflag && (lflag || Lflag)) { + if (cflag) { lflag = 0; Lflag = 0; + oflag = 0; + } + + /* + * If -o is set then we ignore all context related options, like other + * greps. + */ + if (oflag) { + conflag = 0; + } + + /* + * These flags are a semantic mess with no clear answers as to their + * behvaior. Based on some experimentation GNU grep will exit zero if a + * non-match is present, but never print anything. BSD grep seems to + * exit 1 and not print anything, even if there would have been a match. + * Also, you probably don't want to ask about what happens with grep -x + * -o -v, some implementations seem to just ignore -v. + */ + if (oflag && !nvflag) { + (void) fprintf(stderr, gettext("%s: the combination of -v and " + "-o is not supported currently\n"), argv[0]); + exit(2); } argv += optind - 1; @@ -477,14 +508,6 @@ main(int argc, char **argv) } /* - * If -x flag is not specified or -i flag is specified - * with fgrep in a multibyte locale, need to use - * the wide character APIs. Otherwise, byte-oriented - * process will be done. - */ - use_wchar = Fflag && mblocale && (!xflag || iflag); - - /* * Compile Patterns and also decide if BMG can be used */ fixpatterns(); @@ -740,15 +763,36 @@ static void fixpatterns(void) { PATTERN *pp; - int rv, fix_pattern, npatterns; + int rv, fix_pattern; + + /* + * Decide if we are able to run the Boyer-Moore-Gosper algorithm. + * Use the Boyer-Moore-Gosper algorithm if: + * - fgrep (Fflag) + * - singlebyte locale (!mblocale) + * - no ignoring case (!iflag) + * - no printing line numbers (!nflag) + * - no negating the output (nvflag) + * - only one pattern (patterns != NULL && patterns->next == + * NULL) + * - non zero length pattern (strlen(patterns->pattern) != 0) + * - no context required (conflag == 0) + * - no exact matches (!oflag) + */ + use_bmg = Fflag && !mblocale && !iflag && !nflag && nvflag && !oflag && + (patterns != NULL && patterns->next == NULL) && + (strlen(patterns->pattern) != 0) && conflag == 0; + + if (use_bmg) { + return; + } /* * Fix the specified pattern if -x is specified. */ fix_pattern = !Fflag && xflag; - for (npatterns = 0, pp = patterns; pp != NULL; pp = pp->next) { - npatterns++; + for (pp = patterns; pp != NULL; pp = pp->next) { if (fix_pattern) { char *cp, *cq; size_t plen, nplen; @@ -771,61 +815,9 @@ fixpatterns(void) pp->pattern = cp; } - if (Fflag) { - if (use_wchar) { - /* - * Fflag && mblocale && iflag - * Fflag && mblocale && !xflag - */ - size_t n; - n = strlen(pp->pattern) + 1; - if ((pp->wpattern = - malloc(sizeof (wchar_t) * n)) == NULL) { - (void) fprintf(stderr, - gettext("%s: out of memory\n"), - cmdname); - exit(2); - } - if (mbstowcs(pp->wpattern, pp->pattern, n) == - (size_t)-1) { - (void) fprintf(stderr, - gettext("%s: failed to convert " - "\"%s\" to wide-characters\n"), - cmdname, pp->pattern); - exit(2); - } - if (iflag) { - wchar_t *wp; - for (wp = pp->wpattern; *wp != L'\0'; - wp++) { - *wp = towlower((wint_t)*wp); - } - } - free(pp->pattern); - } else { - /* - * Fflag && mblocale && !iflag - * Fflag && !mblocale && iflag - * Fflag && !mblocale && !iflag - */ - if (iflag) { - unsigned char *cp; - for (cp = (unsigned char *)pp->pattern; - *cp != '\0'; cp++) { - *cp = tolower(*cp); - } - } - } - /* - * fgrep: No regular expressions. - */ - continue; - } - /* - * For non-fgrep, compile the regular expression, - * give an informative error message, and exit if - * it didn't compile. + * Compile the regular expression, give an informative error + * message, and exit if it didn't compile. */ if ((rv = regcomp(&pp->re, pp->pattern, regflags)) != 0) { (void) regerror(rv, &pp->re, errstr, sizeof (errstr)); @@ -836,25 +828,6 @@ fixpatterns(void) } free(pp->pattern); } - - /* - * Decide if we are able to run the Boyer-Moore-Gosper algorithm. - * Use the Boyer-Moore-Gosper algorithm if: - * - fgrep (Fflag) - * - singlebyte locale (!mblocale) - * - no ignoring case (!iflag) - * - no printing line numbers (!nflag) - * - no negating the output (nvflag) - * - only one pattern (npatterns == 1) - * - non zero length pattern (strlen(patterns->pattern) != 0) - * - no context required (conflag == 0) - * - * It's guaranteed patterns->pattern is still alive - * when Fflag && !mblocale. - */ - use_bmg = Fflag && !mblocale && !iflag && !nflag && nvflag && - (npatterns == 1) && (strlen(patterns->pattern) != 0) && - conflag == 0; } /* @@ -887,36 +860,6 @@ rfind_nl(const char *ptr, size_t len) } /* - * Duplicate the specified string converting each character - * into a lower case. - */ -static char * -istrdup(const char *s1) -{ - static size_t ibuflen = 0; - static char *ibuf = NULL; - size_t slen; - char *p; - - slen = strlen(s1); - if (slen >= ibuflen) { - /* ibuf does not fit to s1 */ - ibuflen = slen + 1; - ibuf = realloc(ibuf, ibuflen); - if (ibuf == NULL) { - (void) fprintf(stderr, - gettext("%s: out of memory\n"), cmdname); - exit(2); - } - } - p = ibuf; - do { - *p++ = tolower(*s1); - } while (*s1++ != '\0'); - return (ibuf); -} - -/* * Do grep on a single file. * Return true in any lines matched. * @@ -947,6 +890,7 @@ grep(int fd, const char *fn) int conaprnt = 0, conbprnt = 0, lastmatch = 0; boolean_t nearmatch; /* w/in N+1 of last match */ boolean_t havematch = B_FALSE; /* have a match in context */ + boolean_t sameline = B_FALSE; /* Are we still on the same line? */ size_t prntlen; if (patterns == NULL) @@ -958,16 +902,6 @@ grep(int fd, const char *fn) bmgcomp(pp->pattern, strlen(pp->pattern)); } - if (use_wchar && outline == NULL) { - outbuflen = BUFSIZE + 1; - outline = malloc(sizeof (wchar_t) * outbuflen); - if (outline == NULL) { - (void) fprintf(stderr, gettext("%s: out of memory\n"), - cmdname); - exit(2); - } - } - if (prntbuf == NULL) { prntbuflen = BUFSIZE; if ((prntbuf = malloc(prntbuflen + 1)) == NULL) { @@ -996,6 +930,8 @@ grep(int fd, const char *fn) long count; off_t offset = 0; char separate; + char *startmatch = NULL; /* -o, start of match */ + char *postmatch = NULL; /* -o, character after match */ boolean_t last_ctx = B_FALSE, eof = B_FALSE; if (data_len == 0) { @@ -1167,7 +1103,18 @@ L_start_process: } goto L_next_line; } - lineno++; + + /* + * When using -o, we might actually loop around while still on + * the same line. In such a case, we need to make sure we don't + * increment the line number. + */ + if (!sameline) { + lineno++; + } else { + sameline = B_FALSE; + } + /* * Line starts from ptr and ends at ptrend. * line_len will be the length of the line. @@ -1179,108 +1126,59 @@ L_start_process: * From now, the process will be performed based * on the line from ptr to ptrend. */ - if (use_wchar) { - size_t len; - - if (line_len >= outbuflen) { - outbuflen = line_len + 1; - outline = realloc(outline, - sizeof (wchar_t) * outbuflen); - if (outline == NULL) { - (void) fprintf(stderr, - gettext("%s: out of memory\n"), - cmdname); - exit(2); - } - } + for (pp = patterns; pp; pp = pp->next) { + int rv; + regmatch_t rm; + size_t nmatch = 0; - len = mbstowcs(outline, ptr, line_len); - if (len == (size_t)-1) { - (void) fprintf(stderr, gettext( - "%s: input file \"%s\": line %lld: invalid multibyte character\n"), - cmdname, fn, lineno); - /* never match a line with invalid sequence */ - goto L_skip_line; - } - outline[len] = L'\0'; + /* + * The current implementation of regexec has a higher + * cost when you ask for match information. As a result, + * we only ask for a match when we know that we need it + * specifically. This is always needed for -o because we + * rely on it to tell us what we matched. For fgrep -x + * we need it so we can determine whether we matched the + * entire line. + */ + if (oflag || (Fflag && xflag)) + nmatch = 1; - if (iflag) { - wchar_t *cp; - for (cp = outline; *cp != '\0'; cp++) { - *cp = towlower((wint_t)*cp); + rv = regexec(&pp->re, ptr, nmatch, &rm, 0); + if (rv == REG_OK) { + /* + * fgrep in this form cannot insert the + * metacharacters to verify whether or not we + * were the entire line. As a result, we check + * the pattern length against the line length. + */ + if (Fflag && xflag && + line_len != rm.rm_eo - rm.rm_so) { + continue; } - } - if (xflag) { - for (pp = patterns; pp; pp = pp->next) { - if (outline[0] == pp->wpattern[0] && - wcscmp(outline, - pp->wpattern) == 0) { - /* matched */ - break; - } - } - } else { - for (pp = patterns; pp; pp = pp->next) { - if (wcswcs(outline, pp->wpattern) - != NULL) { - /* matched */ - break; - } - } - } - } else if (Fflag) { - /* fgrep in byte-oriented handling */ - char *fptr; - if (iflag) { - fptr = istrdup(ptr); - } else { - fptr = ptr; - } - if (xflag) { - /* fgrep -x */ - for (pp = patterns; pp; pp = pp->next) { - if (fptr[0] == pp->pattern[0] && - strcmp(fptr, pp->pattern) == 0) { - /* matched */ - break; - } - } - } else { - for (pp = patterns; pp; pp = pp->next) { - if (strstr(fptr, pp->pattern) != NULL) { - /* matched */ - break; - } + /* matched */ + if (oflag) { + startmatch = ptr + rm.rm_so; + postmatch = ptr + rm.rm_eo; } + break; } - } else { - /* grep or egrep */ - for (pp = patterns; pp; pp = pp->next) { - int rv; - - rv = regexec(&pp->re, ptr, 0, NULL, 0); - if (rv == REG_OK) { - /* matched */ - break; - } - switch (rv) { - case REG_NOMATCH: - break; - case REG_ECHAR: - (void) fprintf(stderr, gettext( + switch (rv) { + case REG_NOMATCH: + break; + case REG_ECHAR: + (void) fprintf(stderr, gettext( "%s: input file \"%s\": line %lld: invalid multibyte character\n"), - cmdname, fn, lineno); - break; - default: - (void) regerror(rv, &pp->re, errstr, - sizeof (errstr)); - (void) fprintf(stderr, gettext( + cmdname, fn, lineno); + break; + default: + (void) regerror(rv, &pp->re, errstr, + sizeof (errstr)); + (void) fprintf(stderr, gettext( "%s: input file \"%s\": line %lld: %s\n"), - cmdname, fn, lineno, errstr); - exit(2); - } + cmdname, fn, lineno, errstr); + exit(2); } } @@ -1406,8 +1304,21 @@ L_next_line: */ if (!last_ctx && nvflag == (pp != NULL)) { matches++; - if (!nextend) - matchptr = (conflag != 0) ? conptrend : ptrend; + if (!nextend) { + if (conflag != 0) { + matchptr = conptrend; + } else if (oflag) { + matchptr = postmatch - 1; + } else { + matchptr = ptrend; + } + } + } + + if (pp != NULL && oflag && postmatch == NULL) { + (void) fprintf(stderr, gettext("%s: internal error, " + "-o set, but failed to find postmatch\n"), cmdname); + abort(); } /* @@ -1434,10 +1345,17 @@ L_next_line: (void) fwrite("--\n", 1, 3, stdout); } else if (conflag == 0 && nvflag == (pp != NULL)) { *ptrend = '\n'; + if (oflag) { + prntptr = startmatch; + } else { + prntptr = ptr; + } prntlen = line_len + 1; - prntptr = ptr; linenum = lineno; blkoffset = line_offset; + if (oflag) { + blkoffset += startmatch - ptr; + } } else if (eof) { /* No match and no more data */ goto out; @@ -1446,10 +1364,13 @@ L_next_line: goto L_skip_line; } - prntptrend = prntptr - 1; - while ((prntptrend = find_nl(prntptrend + 1, + if (oflag) { + prntptrend = postmatch - 1; + } else { + prntptrend = prntptr - 1; + } + while (oflag || (prntptrend = find_nl(prntptrend + 1, prntlen)) != NULL) { - /* * GNU grep uses '-' for context lines and ':' for * matching lines, so replicate that here. @@ -1501,10 +1422,23 @@ L_next_line: } (void) fwrite(prntptr, 1, prntptrend - prntptr + 1, stdout); + + if (oflag) { + (void) fputc('\n', stdout); + } } if (ferror(stdout)) { return (0); } + + /* + * With -o we'll only ever take this loop once. Manually + * break out. + */ + if (oflag) { + goto L_skip_line; + } + linenum++; prntlen -= prntptrend - prntptr + 1; blkoffset += prntptrend - prntptr + 1; @@ -1545,9 +1479,15 @@ L_skip_line: if (!newlinep) break; + if (oflag && postmatch != NULL) { + line_len = postmatch - 1 - ptr; + ptr = postmatch; + sameline = B_TRUE; + } else { + ptr = ptrend + 1; + } data_len -= line_len + 1; line_offset += line_len + 1; - ptr = ptrend + 1; } out: @@ -1585,7 +1525,7 @@ usage(void) (void) fprintf(stderr, gettext("usage: %5s"), cmdname); if (!egrep && !fgrep) (void) fprintf(stderr, gettext(" [-E|-F]")); - (void) fprintf(stderr, gettext(" [-bchHilLnqrRsvx] [-A num] [-B num] " + (void) fprintf(stderr, gettext(" [-bchHilLnoqrRsvx] [-A num] [-B num] " "[-C num|-num]\n [--label=name] [-e pattern_list]... " "[-f pattern_file]...\n [pattern_list] [file]...\n")); exit(2); |