diff options
author | Yuri Pankov <yuri.pankov@nexenta.com> | 2017-08-08 10:57:15 +0300 |
---|---|---|
committer | Robert Mustacchi <rm@joyent.com> | 2017-08-14 17:44:45 +0000 |
commit | 79d022da827bda94f470706ea9a9a8d6dbab9d07 (patch) | |
tree | bef9ebc2c456b47136bf3db0030f986b5c5b04c4 /usr/src | |
parent | 02e2c3ed584d6e282e2f2f9ed1659ef2ba09b924 (diff) | |
download | illumos-gate-79d022da827bda94f470706ea9a9a8d6dbab9d07.tar.gz |
8568 fnmatch, glob: fix exponential CPU use with repeated '*' operators
Reviewed by: Igor Kozhukhov <igor@dilos.org>
Reviewed by: Andrew Stormont <andyjstormont@gmail.com>
Reviewed by: Toomas Soome <tsoome@me.com>
Approved by: Robert Mustacchi <rm@joyent.com>
Diffstat (limited to 'usr/src')
-rw-r--r-- | usr/src/lib/libc/port/locale/fnmatch.c | 90 | ||||
-rw-r--r-- | usr/src/lib/libc/port/regex/glob.c | 119 |
2 files changed, 117 insertions, 92 deletions
diff --git a/usr/src/lib/libc/port/locale/fnmatch.c b/usr/src/lib/libc/port/locale/fnmatch.c index ebea076036..cf7b1c2372 100644 --- a/usr/src/lib/libc/port/locale/fnmatch.c +++ b/usr/src/lib/libc/port/locale/fnmatch.c @@ -32,8 +32,7 @@ /* * Copyright 2013 Garrett D'Amore <garrett@damore.org> - * Copyright 2010 Nexenta Systems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright 2017 Nexenta Systems, Inc. */ /* @@ -74,9 +73,7 @@ static int fnmatch1(const char *, const char *, const char *, int, mbstate_t, mbstate_t, locale_t); int -fnmatch(pattern, string, flags) - const char *pattern, *string; - int flags; +fnmatch(const char *pattern, const char *string, int flags) { locale_t loc = uselocale(NULL); static const mbstate_t initial = { 0 }; @@ -89,11 +86,14 @@ static int fnmatch1(const char *pattern, const char *string, const char *stringstart, int flags, mbstate_t patmbs, mbstate_t strmbs, locale_t loc) { + const char *bt_pattern, *bt_string; + mbstate_t bt_patmbs, bt_strmbs; char *newp; char c; wchar_t pc, sc; size_t pclen, sclen; + bt_pattern = bt_string = NULL; for (;;) { pclen = mbrtowc_l(&pc, pattern, MB_LEN_MAX, &patmbs, loc); if (pclen == (size_t)-1 || pclen == (size_t)-2) @@ -111,16 +111,18 @@ fnmatch1(const char *pattern, const char *string, const char *stringstart, * Removed FNM_LEADING_DIR, as it is not present * on Solaris. */ - return (sc == EOS ? 0 : FNM_NOMATCH); + if (sc == EOS) + return (0); + goto backtrack; case '?': if (sc == EOS) return (FNM_NOMATCH); if (sc == '/' && (flags & FNM_PATHNAME)) - return (FNM_NOMATCH); + goto backtrack; if (sc == '.' && (flags & FNM_PERIOD) && (string == stringstart || ((flags & FNM_PATHNAME) && *(string - 1) == '/'))) - return (FNM_NOMATCH); + goto backtrack; string += sclen; break; case '*': @@ -132,7 +134,7 @@ fnmatch1(const char *pattern, const char *string, const char *stringstart, if (sc == '.' && (flags & FNM_PERIOD) && (string == stringstart || ((flags & FNM_PATHNAME) && *(string - 1) == '/'))) - return (FNM_NOMATCH); + goto backtrack; /* Optimize for pattern with * at end or before /. */ if (c == EOS) @@ -147,34 +149,24 @@ fnmatch1(const char *pattern, const char *string, const char *stringstart, break; } - /* General case, use recursion. */ - while (sc != EOS) { - if (!fnmatch1(pattern, string, stringstart, - flags, patmbs, strmbs, loc)) - return (0); - sclen = mbrtowc_l(&sc, string, MB_LEN_MAX, - &strmbs, loc); - if (sclen == (size_t)-1 || - sclen == (size_t)-2) { - sc = (unsigned char)*string; - sclen = 1; - (void) memset(&strmbs, 0, - sizeof (strmbs)); - } - if (sc == '/' && flags & FNM_PATHNAME) - break; - string += sclen; - } - return (FNM_NOMATCH); + /* + * First try the shortest match for the '*' that + * could work. We can forget any earlier '*' since + * there is no way having it match more characters + * can help us, given that we are already here. + */ + bt_pattern = pattern, bt_patmbs = patmbs; + bt_string = string, bt_strmbs = strmbs; + break; case '[': if (sc == EOS) return (FNM_NOMATCH); if (sc == '/' && (flags & FNM_PATHNAME)) - return (FNM_NOMATCH); + goto backtrack; if (sc == '.' && (flags & FNM_PERIOD) && (string == stringstart || ((flags & FNM_PATHNAME) && *(string - 1) == '/'))) - return (FNM_NOMATCH); + goto backtrack; switch (rangematch(pattern, sc, flags, &newp, &patmbs, loc)) { @@ -184,7 +176,7 @@ fnmatch1(const char *pattern, const char *string, const char *stringstart, pattern = newp; break; case RANGE_NOMATCH: - return (FNM_NOMATCH); + goto backtrack; } string += sclen; break; @@ -201,15 +193,39 @@ fnmatch1(const char *pattern, const char *string, const char *stringstart, /* FALLTHROUGH */ default: norm: + string += sclen; if (pc == sc) - string += sclen; - + break; else if ((flags & FNM_IGNORECASE) && (towlower_l(pc, loc) == towlower_l(sc, loc))) - string += sclen; - else - return (FNM_NOMATCH); - + break; + else { + backtrack: + /* + * If we have a mismatch (other than hitting + * the end of the string), go back to the last + * '*' seen and have it match one additional + * character. + */ + if (bt_pattern == NULL) + return (FNM_NOMATCH); + sclen = mbrtowc_l(&sc, bt_string, MB_LEN_MAX, + &bt_strmbs, loc); + if (sclen == (size_t)-1 || + sclen == (size_t)-2) { + sc = (unsigned char)*bt_string; + sclen = 1; + (void) memset(&bt_strmbs, 0, + sizeof (bt_strmbs)); + } + if (sc == EOS) + return (FNM_NOMATCH); + if (sc == '/' && flags & FNM_PATHNAME) + return (FNM_NOMATCH); + bt_string += sclen; + pattern = bt_pattern, patmbs = bt_patmbs; + string = bt_string, strmbs = bt_strmbs; + } break; } } diff --git a/usr/src/lib/libc/port/regex/glob.c b/usr/src/lib/libc/port/regex/glob.c index 642d4f67d2..b6495c660c 100644 --- a/usr/src/lib/libc/port/regex/glob.c +++ b/usr/src/lib/libc/port/regex/glob.c @@ -1,8 +1,4 @@ /* - * Copyright (c) 2013 Gary Mills - */ -/* $OpenBSD: glob.c,v 1.39 2012/01/20 07:09:42 tedu Exp $ */ -/* * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * @@ -35,6 +31,10 @@ */ /* + * Copyright (c) 2013 Gary Mills + */ + +/* * glob(3) -- a superset of the one defined in POSIX 1003.2. * * The [!...] convention to negate a range is supported (SysV, Posix, ksh). @@ -70,6 +70,7 @@ #include <glob.h> #include <limits.h> #include <pwd.h> +#include <stdint.h> #include <stdio.h> #include <stdlib.h> #include <string.h> @@ -161,9 +162,6 @@ typedef struct wcat { #define GLOB_LIMIT_STAT 2048 #define GLOB_LIMIT_READDIR 16384 -/* Limit of recursion during matching attempts. */ -#define GLOB_LIMIT_RECUR 64 - struct glob_lim { size_t glim_malloc; size_t glim_stat; @@ -200,7 +198,7 @@ static int globexp1(const wcat_t *, glob_t *, struct glob_lim *, int (*)(const char *, int)); static int globexp2(const wcat_t *, const wcat_t *, glob_t *, struct glob_lim *, int (*)(const char *, int)); -static int match(wcat_t *, wcat_t *, wcat_t *, int); +static int match(wcat_t *, wcat_t *, wcat_t *); /* * Extended glob() function, selected by #pragma redefine_extname @@ -214,12 +212,9 @@ _glob_ext(const char *pattern, int flags, int (*errfunc)(const char *, int), int n; size_t patlen; wchar_t c; - wcat_t *bufnext, *bufend, patbuf[MAXPATHLEN]; + wcat_t *bufnext, *bufend, patbuf[PATH_MAX]; struct glob_lim limit = { 0, 0, 0 }; - if ((patlen = strnlen(pattern, PATH_MAX)) == PATH_MAX) - return (GLOB_NOMATCH); - patnext = pattern; if (!(flags & GLOB_APPEND)) { pglob->gl_pathc = 0; @@ -233,12 +228,15 @@ _glob_ext(const char *pattern, int flags, int (*errfunc)(const char *, int), pglob->gl_flags = flags & ~GLOB_MAGCHAR; pglob->gl_matchc = 0; + if ((patlen = strnlen(pattern, PATH_MAX)) == PATH_MAX) + return (GLOB_NOMATCH); + if (pglob->gl_offs >= INT_MAX || pglob->gl_pathc >= INT_MAX || pglob->gl_pathc >= INT_MAX - pglob->gl_offs - 1) return (GLOB_NOSPACE); bufnext = patbuf; - bufend = bufnext + MAXPATHLEN - 1; + bufend = bufnext + PATH_MAX - 1; patlen += 1; if (flags & GLOB_NOESCAPE) { while (bufnext < bufend) { @@ -326,7 +324,7 @@ globexp2(const wcat_t *ptr, const wcat_t *pattern, glob_t *pglob, int i, rv; wcat_t *lm, *ls; const wcat_t *pe, *pm, *pl; - wcat_t patbuf[MAXPATHLEN]; + wcat_t patbuf[PATH_MAX]; /* copy part up to the brace */ for (lm = patbuf, pm = pattern; pm != ptr; *lm++ = *pm++) @@ -561,9 +559,9 @@ glob0(const wcat_t *pattern, glob_t *pglob, struct glob_lim *limitp, int err, oldpathc; wchar_t c; int a; - wcat_t *bufnext, patbuf[MAXPATHLEN]; + wcat_t *bufnext, patbuf[PATH_MAX]; - qpatnext = globtilde(pattern, patbuf, MAXPATHLEN, pglob); + qpatnext = globtilde(pattern, patbuf, PATH_MAX, pglob); oldpathc = pglob->gl_pathc; bufnext = patbuf; @@ -676,7 +674,7 @@ glob0(const wcat_t *pattern, glob_t *pglob, struct glob_lim *limitp, bufnext->w_at = 0; bufnext->w_wc = EOS; - if ((err = glob1(patbuf, patbuf+MAXPATHLEN-1, pglob, limitp, errfunc)) + if ((err = glob1(patbuf, patbuf+PATH_MAX-1, pglob, limitp, errfunc)) != 0) return (err); @@ -743,13 +741,13 @@ static int glob1(wcat_t *pattern, wcat_t *pattern_last, glob_t *pglob, struct glob_lim *limitp, int (*errfunc)(const char *, int)) { - wcat_t pathbuf[MAXPATHLEN]; + wcat_t pathbuf[PATH_MAX]; /* A null pathname is invalid -- POSIX 1003.1 sect. 2.4. */ if (pattern->w_wc == EOS) return (0); - return (glob2(pathbuf, pathbuf+MAXPATHLEN-1, - pathbuf, pathbuf+MAXPATHLEN-1, + return (glob2(pathbuf, pathbuf+PATH_MAX-1, + pathbuf, pathbuf+PATH_MAX-1, pattern, pattern_last, pglob, limitp, errfunc)); } @@ -844,7 +842,7 @@ glob3(wcat_t *pathbuf, wcat_t *pathbuf_last, wcat_t *pathend, struct dirent *dp; DIR *dirp; int err; - char buf[MAXPATHLEN]; + char buf[PATH_MAX]; /* * The readdirfunc declaration can't be prototyped, because it is @@ -930,7 +928,7 @@ glob3(wcat_t *pathbuf, wcat_t *pathbuf_last, wcat_t *pathend, break; } - if (!match(pathend, pattern, restpattern, GLOB_LIMIT_RECUR)) { + if (!match(pathend, pattern, restpattern)) { pathend->w_at = 0; pathend->w_wc = EOS; continue; @@ -1000,25 +998,22 @@ globextend(const wcat_t *path, glob_t *pglob, struct glob_lim *limitp, pglob->gl_statv && pglob->gl_statv[i]) free(pglob->gl_statv[i]); } - if (pglob->gl_pathv) { - free(pglob->gl_pathv); - pglob->gl_pathv = NULL; - } - if ((pglob->gl_flags & GLOB_KEEPSTAT) != 0 && - pglob->gl_statv) { + free(pglob->gl_pathv); + pglob->gl_pathv = NULL; + if ((pglob->gl_flags & GLOB_KEEPSTAT) != 0) { free(pglob->gl_statv); pglob->gl_statv = NULL; } return (GLOB_NOSPACE); } limitp->glim_malloc += allocn * sizeof (*pathv); - pathv = realloc(pglob->gl_pathv, allocn * sizeof (*pathv)); + pathv = reallocarray(pglob->gl_pathv, allocn, sizeof (*pathv)); if (pathv == NULL) goto nospace; if ((pglob->gl_flags & GLOB_KEEPSTAT) != 0) { limitp->glim_malloc += allocn * sizeof (*statv); - statv = realloc(pglob->gl_statv, - allocn * sizeof (*statv)); + statv = reallocarray(pglob->gl_statv, allocn, + sizeof (*statv)); if (statv == NULL) goto nospace; } @@ -1086,17 +1081,24 @@ globextend(const wcat_t *path, glob_t *pglob, struct glob_lim *limitp, /* * pattern matching function for filenames. Each occurrence of the * - * pattern causes a recursion level. + * pattern causes an iteration. + * + * Note, this function differs from the original as per the discussion + * here: https://research.swtch.com/glob + * + * Basically we removed the recursion and made it use the algorithm + * from Russ Cox to not go quadratic on cases like a file called + * ("a" x 100) . "x" matched against a pattern like "a*a*a*a*a*a*a*y". */ static int -match(wcat_t *name, wcat_t *pat, wcat_t *patend, int recur) +match(wcat_t *name, wcat_t *pat, wcat_t *patend) { int ok, negate_range; wcat_t c, k; + wcat_t *nextp = NULL; + wcat_t *nextn = NULL; - if (recur-- == 0) - return (1); - +loop: while (pat < patend) { c = *pat++; switch (c.w_wc) { @@ -1112,31 +1114,31 @@ match(wcat_t *name, wcat_t *pat, wcat_t *patend, int recur) pat++; /* eat consecutive '*' */ if (pat == patend) return (1); - do { - if (match(name, pat, patend, recur)) - return (1); - } while ((name++)->w_wc != EOS); - return (0); + if (name->w_wc == EOS) + return (0); + nextn = name + 1; + nextp = pat - 1; + break; case M_ONE: if (c.w_at != M_QUOTE) { k = *name++; if (k.w_at != c.w_at || k.w_wc != c.w_wc) - return (0); + goto fail; break; } if ((name++)->w_wc == EOS) - return (0); + goto fail; break; case M_SET: if (c.w_at != M_QUOTE) { k = *name++; if (k.w_at != c.w_at || k.w_wc != c.w_wc) - return (0); + goto fail; break; } ok = 0; if ((k = *name++).w_wc == EOS) - return (0); + goto fail; if ((negate_range = (pat->w_at == M_QUOTE && pat->w_wc == M_NOT)) != 0) ++pat; @@ -1161,16 +1163,25 @@ match(wcat_t *name, wcat_t *pat, wcat_t *patend, int recur) ok = 1; } if (ok == negate_range) - return (0); + goto fail; break; default: k = *name++; if (k.w_at != c.w_at || k.w_wc != c.w_wc) - return (0); + goto fail; break; } } - return (name->w_wc == EOS); + if (name->w_wc == EOS) + return (1); + +fail: + if (nextn) { + pat = nextp; + name = nextn; + goto loop; + } + return (0); } /* @@ -1186,16 +1197,14 @@ _globfree_ext(glob_t *pglob) if (pglob->gl_pathv != NULL) { pp = pglob->gl_pathv + pglob->gl_offs; for (i = pglob->gl_pathc; i--; ++pp) - if (*pp) - free(*pp); + free(*pp); free(pglob->gl_pathv); pglob->gl_pathv = NULL; } if ((pglob->gl_flags & GLOB_KEEPSTAT) != 0 && pglob->gl_statv != NULL) { for (i = 0; i < pglob->gl_pathc; i++) { - if (pglob->gl_statv[i] != NULL) - free(pglob->gl_statv[i]); + free(pglob->gl_statv[i]); } free(pglob->gl_statv); pglob->gl_statv = NULL; @@ -1205,7 +1214,7 @@ _globfree_ext(glob_t *pglob) static DIR * g_opendir(wcat_t *str, glob_t *pglob) { - char buf[MAXPATHLEN]; + char buf[PATH_MAX]; if (str->w_wc == EOS) (void) strlcpy(buf, ".", sizeof (buf)); @@ -1223,7 +1232,7 @@ g_opendir(wcat_t *str, glob_t *pglob) static int g_lstat(wcat_t *fn, struct stat *sb, glob_t *pglob) { - char buf[MAXPATHLEN]; + char buf[PATH_MAX]; if (g_Ctoc(fn, buf, sizeof (buf))) return (-1); @@ -1235,7 +1244,7 @@ g_lstat(wcat_t *fn, struct stat *sb, glob_t *pglob) static int g_stat(wcat_t *fn, struct stat *sb, glob_t *pglob) { - char buf[MAXPATHLEN]; + char buf[PATH_MAX]; if (g_Ctoc(fn, buf, sizeof (buf))) return (-1); |