diff options
author | Robert Mustacchi <rm@fingolfin.org> | 2020-08-12 00:11:20 -0700 |
---|---|---|
committer | Robert Mustacchi <rm@fingolfin.org> | 2020-08-25 16:00:10 -0700 |
commit | dbe930bf51e0d7458b24d30e9f25756c5da54ddf (patch) | |
tree | b55b0f650b5f1373de1aeaee41e77809d4ca45c1 | |
parent | 81dd18d87c3c2e198d2178dfd1f30d62a81b69ea (diff) | |
download | illumos-joyent-dbe930bf51e0d7458b24d30e9f25756c5da54ddf.tar.gz |
13040 grep can leverage bmg in more cases
Reviewed by: Andy Fiddaman <andy@omniosce.org>
Reviewed by: Toomas Soome <tsoome@me.com>
Approved by: Gordon Ross <gordon.w.ross@gmail.com>
-rw-r--r-- | usr/src/cmd/grep/grep.c | 55 |
1 files changed, 51 insertions, 4 deletions
diff --git a/usr/src/cmd/grep/grep.c b/usr/src/cmd/grep/grep.c index 9c738feb1b..6becff5a58 100644 --- a/usr/src/cmd/grep/grep.c +++ b/usr/src/cmd/grep/grep.c @@ -756,6 +756,51 @@ addpattern(char *s) } /* + * Check if a given grep pattern that is being used with egrep or grep can be + * considered 'simple'. That is there are no characters that would be treated + * differently from fgrep. In this particular case, we're a little bit + * conservative and look for characters that are: + * + * o 7-bit ASCII + * o Letters + * o Numbers + * o Meta-characters not used in BREs/EREs: !, @, #, /, -, _, <, >, = + * + * This can certianly be made more complex and less restrictive with additional + * testing. + */ +static boolean_t +simple_pattern(const char *str) +{ + for (; *str != '\0'; str++) { + if (!isascii(*str)) { + return (B_FALSE); + } + + if (isalnum(*str)) { + continue; + } + + switch (*str) { + case '!': + case '@': + case '#': + case '/': + case '-': + case '_': + case '<': + case '>': + case '=': + continue; + default: + return (B_FALSE); + } + } + + return (B_TRUE); +} + +/* * Fix patterns. * Must do after all arguments read, in case later -i option. */ @@ -768,7 +813,7 @@ fixpatterns(void) /* * Decide if we are able to run the Boyer-Moore-Gosper algorithm. * Use the Boyer-Moore-Gosper algorithm if: - * - fgrep (Fflag) + * - fgrep or non-BRE/ERE (Fflag || simple_pattern()) * - singlebyte locale (!mblocale) * - no ignoring case (!iflag) * - no printing line numbers (!nflag) @@ -778,10 +823,12 @@ fixpatterns(void) * - non zero length pattern (strlen(patterns->pattern) != 0) * - no context required (conflag == 0) * - no exact matches (!oflag) + * - no word matches (!wlag) */ - use_bmg = Fflag && !mblocale && !iflag && !nflag && nvflag && !oflag && - (patterns != NULL && patterns->next == NULL) && - (strlen(patterns->pattern) != 0) && conflag == 0; + use_bmg = !mblocale && !iflag && !nflag && nvflag && !oflag && + (patterns != NULL && patterns->next == NULL) && !wflag && + (strlen(patterns->pattern) != 0) && conflag == 0 && + (Fflag || simple_pattern(patterns->pattern)); if (use_bmg) { return; |