summaryrefslogtreecommitdiff
path: root/usr/src
diff options
context:
space:
mode:
authorYuri Pankov <yuri.pankov@nexenta.com>2017-08-08 10:57:15 +0300
committerRobert Mustacchi <rm@joyent.com>2017-08-14 17:44:45 +0000
commit79d022da827bda94f470706ea9a9a8d6dbab9d07 (patch)
treebef9ebc2c456b47136bf3db0030f986b5c5b04c4 /usr/src
parent02e2c3ed584d6e282e2f2f9ed1659ef2ba09b924 (diff)
downloadillumos-gate-79d022da827bda94f470706ea9a9a8d6dbab9d07.tar.gz
8568 fnmatch, glob: fix exponential CPU use with repeated '*' operators
Reviewed by: Igor Kozhukhov <igor@dilos.org> Reviewed by: Andrew Stormont <andyjstormont@gmail.com> Reviewed by: Toomas Soome <tsoome@me.com> Approved by: Robert Mustacchi <rm@joyent.com>
Diffstat (limited to 'usr/src')
-rw-r--r--usr/src/lib/libc/port/locale/fnmatch.c90
-rw-r--r--usr/src/lib/libc/port/regex/glob.c119
2 files changed, 117 insertions, 92 deletions
diff --git a/usr/src/lib/libc/port/locale/fnmatch.c b/usr/src/lib/libc/port/locale/fnmatch.c
index ebea076036..cf7b1c2372 100644
--- a/usr/src/lib/libc/port/locale/fnmatch.c
+++ b/usr/src/lib/libc/port/locale/fnmatch.c
@@ -32,8 +32,7 @@
/*
* Copyright 2013 Garrett D'Amore <garrett@damore.org>
- * Copyright 2010 Nexenta Systems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright 2017 Nexenta Systems, Inc.
*/
/*
@@ -74,9 +73,7 @@ static int fnmatch1(const char *, const char *, const char *, int, mbstate_t,
mbstate_t, locale_t);
int
-fnmatch(pattern, string, flags)
- const char *pattern, *string;
- int flags;
+fnmatch(const char *pattern, const char *string, int flags)
{
locale_t loc = uselocale(NULL);
static const mbstate_t initial = { 0 };
@@ -89,11 +86,14 @@ static int
fnmatch1(const char *pattern, const char *string, const char *stringstart,
int flags, mbstate_t patmbs, mbstate_t strmbs, locale_t loc)
{
+ const char *bt_pattern, *bt_string;
+ mbstate_t bt_patmbs, bt_strmbs;
char *newp;
char c;
wchar_t pc, sc;
size_t pclen, sclen;
+ bt_pattern = bt_string = NULL;
for (;;) {
pclen = mbrtowc_l(&pc, pattern, MB_LEN_MAX, &patmbs, loc);
if (pclen == (size_t)-1 || pclen == (size_t)-2)
@@ -111,16 +111,18 @@ fnmatch1(const char *pattern, const char *string, const char *stringstart,
* Removed FNM_LEADING_DIR, as it is not present
* on Solaris.
*/
- return (sc == EOS ? 0 : FNM_NOMATCH);
+ if (sc == EOS)
+ return (0);
+ goto backtrack;
case '?':
if (sc == EOS)
return (FNM_NOMATCH);
if (sc == '/' && (flags & FNM_PATHNAME))
- return (FNM_NOMATCH);
+ goto backtrack;
if (sc == '.' && (flags & FNM_PERIOD) &&
(string == stringstart ||
((flags & FNM_PATHNAME) && *(string - 1) == '/')))
- return (FNM_NOMATCH);
+ goto backtrack;
string += sclen;
break;
case '*':
@@ -132,7 +134,7 @@ fnmatch1(const char *pattern, const char *string, const char *stringstart,
if (sc == '.' && (flags & FNM_PERIOD) &&
(string == stringstart ||
((flags & FNM_PATHNAME) && *(string - 1) == '/')))
- return (FNM_NOMATCH);
+ goto backtrack;
/* Optimize for pattern with * at end or before /. */
if (c == EOS)
@@ -147,34 +149,24 @@ fnmatch1(const char *pattern, const char *string, const char *stringstart,
break;
}
- /* General case, use recursion. */
- while (sc != EOS) {
- if (!fnmatch1(pattern, string, stringstart,
- flags, patmbs, strmbs, loc))
- return (0);
- sclen = mbrtowc_l(&sc, string, MB_LEN_MAX,
- &strmbs, loc);
- if (sclen == (size_t)-1 ||
- sclen == (size_t)-2) {
- sc = (unsigned char)*string;
- sclen = 1;
- (void) memset(&strmbs, 0,
- sizeof (strmbs));
- }
- if (sc == '/' && flags & FNM_PATHNAME)
- break;
- string += sclen;
- }
- return (FNM_NOMATCH);
+ /*
+ * First try the shortest match for the '*' that
+ * could work. We can forget any earlier '*' since
+ * there is no way having it match more characters
+ * can help us, given that we are already here.
+ */
+ bt_pattern = pattern, bt_patmbs = patmbs;
+ bt_string = string, bt_strmbs = strmbs;
+ break;
case '[':
if (sc == EOS)
return (FNM_NOMATCH);
if (sc == '/' && (flags & FNM_PATHNAME))
- return (FNM_NOMATCH);
+ goto backtrack;
if (sc == '.' && (flags & FNM_PERIOD) &&
(string == stringstart ||
((flags & FNM_PATHNAME) && *(string - 1) == '/')))
- return (FNM_NOMATCH);
+ goto backtrack;
switch (rangematch(pattern, sc, flags, &newp,
&patmbs, loc)) {
@@ -184,7 +176,7 @@ fnmatch1(const char *pattern, const char *string, const char *stringstart,
pattern = newp;
break;
case RANGE_NOMATCH:
- return (FNM_NOMATCH);
+ goto backtrack;
}
string += sclen;
break;
@@ -201,15 +193,39 @@ fnmatch1(const char *pattern, const char *string, const char *stringstart,
/* FALLTHROUGH */
default:
norm:
+ string += sclen;
if (pc == sc)
- string += sclen;
-
+ break;
else if ((flags & FNM_IGNORECASE) &&
(towlower_l(pc, loc) == towlower_l(sc, loc)))
- string += sclen;
- else
- return (FNM_NOMATCH);
-
+ break;
+ else {
+ backtrack:
+ /*
+ * If we have a mismatch (other than hitting
+ * the end of the string), go back to the last
+ * '*' seen and have it match one additional
+ * character.
+ */
+ if (bt_pattern == NULL)
+ return (FNM_NOMATCH);
+ sclen = mbrtowc_l(&sc, bt_string, MB_LEN_MAX,
+ &bt_strmbs, loc);
+ if (sclen == (size_t)-1 ||
+ sclen == (size_t)-2) {
+ sc = (unsigned char)*bt_string;
+ sclen = 1;
+ (void) memset(&bt_strmbs, 0,
+ sizeof (bt_strmbs));
+ }
+ if (sc == EOS)
+ return (FNM_NOMATCH);
+ if (sc == '/' && flags & FNM_PATHNAME)
+ return (FNM_NOMATCH);
+ bt_string += sclen;
+ pattern = bt_pattern, patmbs = bt_patmbs;
+ string = bt_string, strmbs = bt_strmbs;
+ }
break;
}
}
diff --git a/usr/src/lib/libc/port/regex/glob.c b/usr/src/lib/libc/port/regex/glob.c
index 642d4f67d2..b6495c660c 100644
--- a/usr/src/lib/libc/port/regex/glob.c
+++ b/usr/src/lib/libc/port/regex/glob.c
@@ -1,8 +1,4 @@
/*
- * Copyright (c) 2013 Gary Mills
- */
-/* $OpenBSD: glob.c,v 1.39 2012/01/20 07:09:42 tedu Exp $ */
-/*
* Copyright (c) 1989, 1993
* The Regents of the University of California. All rights reserved.
*
@@ -35,6 +31,10 @@
*/
/*
+ * Copyright (c) 2013 Gary Mills
+ */
+
+/*
* glob(3) -- a superset of the one defined in POSIX 1003.2.
*
* The [!...] convention to negate a range is supported (SysV, Posix, ksh).
@@ -70,6 +70,7 @@
#include <glob.h>
#include <limits.h>
#include <pwd.h>
+#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@@ -161,9 +162,6 @@ typedef struct wcat {
#define GLOB_LIMIT_STAT 2048
#define GLOB_LIMIT_READDIR 16384
-/* Limit of recursion during matching attempts. */
-#define GLOB_LIMIT_RECUR 64
-
struct glob_lim {
size_t glim_malloc;
size_t glim_stat;
@@ -200,7 +198,7 @@ static int globexp1(const wcat_t *, glob_t *, struct glob_lim *,
int (*)(const char *, int));
static int globexp2(const wcat_t *, const wcat_t *, glob_t *,
struct glob_lim *, int (*)(const char *, int));
-static int match(wcat_t *, wcat_t *, wcat_t *, int);
+static int match(wcat_t *, wcat_t *, wcat_t *);
/*
* Extended glob() function, selected by #pragma redefine_extname
@@ -214,12 +212,9 @@ _glob_ext(const char *pattern, int flags, int (*errfunc)(const char *, int),
int n;
size_t patlen;
wchar_t c;
- wcat_t *bufnext, *bufend, patbuf[MAXPATHLEN];
+ wcat_t *bufnext, *bufend, patbuf[PATH_MAX];
struct glob_lim limit = { 0, 0, 0 };
- if ((patlen = strnlen(pattern, PATH_MAX)) == PATH_MAX)
- return (GLOB_NOMATCH);
-
patnext = pattern;
if (!(flags & GLOB_APPEND)) {
pglob->gl_pathc = 0;
@@ -233,12 +228,15 @@ _glob_ext(const char *pattern, int flags, int (*errfunc)(const char *, int),
pglob->gl_flags = flags & ~GLOB_MAGCHAR;
pglob->gl_matchc = 0;
+ if ((patlen = strnlen(pattern, PATH_MAX)) == PATH_MAX)
+ return (GLOB_NOMATCH);
+
if (pglob->gl_offs >= INT_MAX || pglob->gl_pathc >= INT_MAX ||
pglob->gl_pathc >= INT_MAX - pglob->gl_offs - 1)
return (GLOB_NOSPACE);
bufnext = patbuf;
- bufend = bufnext + MAXPATHLEN - 1;
+ bufend = bufnext + PATH_MAX - 1;
patlen += 1;
if (flags & GLOB_NOESCAPE) {
while (bufnext < bufend) {
@@ -326,7 +324,7 @@ globexp2(const wcat_t *ptr, const wcat_t *pattern, glob_t *pglob,
int i, rv;
wcat_t *lm, *ls;
const wcat_t *pe, *pm, *pl;
- wcat_t patbuf[MAXPATHLEN];
+ wcat_t patbuf[PATH_MAX];
/* copy part up to the brace */
for (lm = patbuf, pm = pattern; pm != ptr; *lm++ = *pm++)
@@ -561,9 +559,9 @@ glob0(const wcat_t *pattern, glob_t *pglob, struct glob_lim *limitp,
int err, oldpathc;
wchar_t c;
int a;
- wcat_t *bufnext, patbuf[MAXPATHLEN];
+ wcat_t *bufnext, patbuf[PATH_MAX];
- qpatnext = globtilde(pattern, patbuf, MAXPATHLEN, pglob);
+ qpatnext = globtilde(pattern, patbuf, PATH_MAX, pglob);
oldpathc = pglob->gl_pathc;
bufnext = patbuf;
@@ -676,7 +674,7 @@ glob0(const wcat_t *pattern, glob_t *pglob, struct glob_lim *limitp,
bufnext->w_at = 0;
bufnext->w_wc = EOS;
- if ((err = glob1(patbuf, patbuf+MAXPATHLEN-1, pglob, limitp, errfunc))
+ if ((err = glob1(patbuf, patbuf+PATH_MAX-1, pglob, limitp, errfunc))
!= 0)
return (err);
@@ -743,13 +741,13 @@ static int
glob1(wcat_t *pattern, wcat_t *pattern_last, glob_t *pglob,
struct glob_lim *limitp, int (*errfunc)(const char *, int))
{
- wcat_t pathbuf[MAXPATHLEN];
+ wcat_t pathbuf[PATH_MAX];
/* A null pathname is invalid -- POSIX 1003.1 sect. 2.4. */
if (pattern->w_wc == EOS)
return (0);
- return (glob2(pathbuf, pathbuf+MAXPATHLEN-1,
- pathbuf, pathbuf+MAXPATHLEN-1,
+ return (glob2(pathbuf, pathbuf+PATH_MAX-1,
+ pathbuf, pathbuf+PATH_MAX-1,
pattern, pattern_last, pglob, limitp, errfunc));
}
@@ -844,7 +842,7 @@ glob3(wcat_t *pathbuf, wcat_t *pathbuf_last, wcat_t *pathend,
struct dirent *dp;
DIR *dirp;
int err;
- char buf[MAXPATHLEN];
+ char buf[PATH_MAX];
/*
* The readdirfunc declaration can't be prototyped, because it is
@@ -930,7 +928,7 @@ glob3(wcat_t *pathbuf, wcat_t *pathbuf_last, wcat_t *pathend,
break;
}
- if (!match(pathend, pattern, restpattern, GLOB_LIMIT_RECUR)) {
+ if (!match(pathend, pattern, restpattern)) {
pathend->w_at = 0;
pathend->w_wc = EOS;
continue;
@@ -1000,25 +998,22 @@ globextend(const wcat_t *path, glob_t *pglob, struct glob_lim *limitp,
pglob->gl_statv && pglob->gl_statv[i])
free(pglob->gl_statv[i]);
}
- if (pglob->gl_pathv) {
- free(pglob->gl_pathv);
- pglob->gl_pathv = NULL;
- }
- if ((pglob->gl_flags & GLOB_KEEPSTAT) != 0 &&
- pglob->gl_statv) {
+ free(pglob->gl_pathv);
+ pglob->gl_pathv = NULL;
+ if ((pglob->gl_flags & GLOB_KEEPSTAT) != 0) {
free(pglob->gl_statv);
pglob->gl_statv = NULL;
}
return (GLOB_NOSPACE);
}
limitp->glim_malloc += allocn * sizeof (*pathv);
- pathv = realloc(pglob->gl_pathv, allocn * sizeof (*pathv));
+ pathv = reallocarray(pglob->gl_pathv, allocn, sizeof (*pathv));
if (pathv == NULL)
goto nospace;
if ((pglob->gl_flags & GLOB_KEEPSTAT) != 0) {
limitp->glim_malloc += allocn * sizeof (*statv);
- statv = realloc(pglob->gl_statv,
- allocn * sizeof (*statv));
+ statv = reallocarray(pglob->gl_statv, allocn,
+ sizeof (*statv));
if (statv == NULL)
goto nospace;
}
@@ -1086,17 +1081,24 @@ globextend(const wcat_t *path, glob_t *pglob, struct glob_lim *limitp,
/*
* pattern matching function for filenames. Each occurrence of the *
- * pattern causes a recursion level.
+ * pattern causes an iteration.
+ *
+ * Note, this function differs from the original as per the discussion
+ * here: https://research.swtch.com/glob
+ *
+ * Basically we removed the recursion and made it use the algorithm
+ * from Russ Cox to not go quadratic on cases like a file called
+ * ("a" x 100) . "x" matched against a pattern like "a*a*a*a*a*a*a*y".
*/
static int
-match(wcat_t *name, wcat_t *pat, wcat_t *patend, int recur)
+match(wcat_t *name, wcat_t *pat, wcat_t *patend)
{
int ok, negate_range;
wcat_t c, k;
+ wcat_t *nextp = NULL;
+ wcat_t *nextn = NULL;
- if (recur-- == 0)
- return (1);
-
+loop:
while (pat < patend) {
c = *pat++;
switch (c.w_wc) {
@@ -1112,31 +1114,31 @@ match(wcat_t *name, wcat_t *pat, wcat_t *patend, int recur)
pat++; /* eat consecutive '*' */
if (pat == patend)
return (1);
- do {
- if (match(name, pat, patend, recur))
- return (1);
- } while ((name++)->w_wc != EOS);
- return (0);
+ if (name->w_wc == EOS)
+ return (0);
+ nextn = name + 1;
+ nextp = pat - 1;
+ break;
case M_ONE:
if (c.w_at != M_QUOTE) {
k = *name++;
if (k.w_at != c.w_at || k.w_wc != c.w_wc)
- return (0);
+ goto fail;
break;
}
if ((name++)->w_wc == EOS)
- return (0);
+ goto fail;
break;
case M_SET:
if (c.w_at != M_QUOTE) {
k = *name++;
if (k.w_at != c.w_at || k.w_wc != c.w_wc)
- return (0);
+ goto fail;
break;
}
ok = 0;
if ((k = *name++).w_wc == EOS)
- return (0);
+ goto fail;
if ((negate_range = (pat->w_at == M_QUOTE &&
pat->w_wc == M_NOT)) != 0)
++pat;
@@ -1161,16 +1163,25 @@ match(wcat_t *name, wcat_t *pat, wcat_t *patend, int recur)
ok = 1;
}
if (ok == negate_range)
- return (0);
+ goto fail;
break;
default:
k = *name++;
if (k.w_at != c.w_at || k.w_wc != c.w_wc)
- return (0);
+ goto fail;
break;
}
}
- return (name->w_wc == EOS);
+ if (name->w_wc == EOS)
+ return (1);
+
+fail:
+ if (nextn) {
+ pat = nextp;
+ name = nextn;
+ goto loop;
+ }
+ return (0);
}
/*
@@ -1186,16 +1197,14 @@ _globfree_ext(glob_t *pglob)
if (pglob->gl_pathv != NULL) {
pp = pglob->gl_pathv + pglob->gl_offs;
for (i = pglob->gl_pathc; i--; ++pp)
- if (*pp)
- free(*pp);
+ free(*pp);
free(pglob->gl_pathv);
pglob->gl_pathv = NULL;
}
if ((pglob->gl_flags & GLOB_KEEPSTAT) != 0 &&
pglob->gl_statv != NULL) {
for (i = 0; i < pglob->gl_pathc; i++) {
- if (pglob->gl_statv[i] != NULL)
- free(pglob->gl_statv[i]);
+ free(pglob->gl_statv[i]);
}
free(pglob->gl_statv);
pglob->gl_statv = NULL;
@@ -1205,7 +1214,7 @@ _globfree_ext(glob_t *pglob)
static DIR *
g_opendir(wcat_t *str, glob_t *pglob)
{
- char buf[MAXPATHLEN];
+ char buf[PATH_MAX];
if (str->w_wc == EOS)
(void) strlcpy(buf, ".", sizeof (buf));
@@ -1223,7 +1232,7 @@ g_opendir(wcat_t *str, glob_t *pglob)
static int
g_lstat(wcat_t *fn, struct stat *sb, glob_t *pglob)
{
- char buf[MAXPATHLEN];
+ char buf[PATH_MAX];
if (g_Ctoc(fn, buf, sizeof (buf)))
return (-1);
@@ -1235,7 +1244,7 @@ g_lstat(wcat_t *fn, struct stat *sb, glob_t *pglob)
static int
g_stat(wcat_t *fn, struct stat *sb, glob_t *pglob)
{
- char buf[MAXPATHLEN];
+ char buf[PATH_MAX];
if (g_Ctoc(fn, buf, sizeof (buf)))
return (-1);