summaryrefslogtreecommitdiff
path: root/usr/src/lib/libc/port/regex/engine.c
diff options
context:
space:
mode:
authorYuri Pankov <yuri.pankov@nexenta.com>2019-10-19 03:04:42 +0300
committerDan McDonald <danmcd@joyent.com>2019-10-22 11:10:25 -0400
commit695dd8d1c21542efb8ca2e82c6eb63007a6a5212 (patch)
treeb234edce08ae727f2e84c63a7cd8211864e11dd1 /usr/src/lib/libc/port/regex/engine.c
parent1603eda21695ca85bfde0e5c75a27d94ac4ce4ff (diff)
downloadillumos-joyent-695dd8d1c21542efb8ca2e82c6eb63007a6a5212.tar.gz
11741 regexec: fix processing multibyte strings
Reviewed by: Toomas Soome <tsoome@me.com> Approved by: Dan McDonald <danmcd@joyent.com>
Diffstat (limited to 'usr/src/lib/libc/port/regex/engine.c')
-rw-r--r--usr/src/lib/libc/port/regex/engine.c48
1 files changed, 45 insertions, 3 deletions
diff --git a/usr/src/lib/libc/port/regex/engine.c b/usr/src/lib/libc/port/regex/engine.c
index 2e04a9085b..f9d4f5f587 100644
--- a/usr/src/lib/libc/port/regex/engine.c
+++ b/usr/src/lib/libc/port/regex/engine.c
@@ -1,5 +1,5 @@
/*
- * Copyright 2010 Nexenta Systems, Inc. All rights reserved.
+ * Copyright 2019 Nexenta by DDN, Inc. All rights reserved.
* Copyright 2012 Milan Jurik. All rights reserved.
* Copyright (c) 2016 by Delphix. All rights reserved.
* Copyright (c) 1992, 1993, 1994 Henry Spencer.
@@ -44,6 +44,7 @@
*/
#ifdef SNAMES
+#define stepback sstepback
#define matcher smatcher
#define walk swalk
#define dissect sdissect
@@ -54,6 +55,7 @@
#define match smat
#endif
#ifdef LNAMES
+#define stepback lstepback
#define matcher lmatcher
#define walk lwalk
#define dissect ldissect
@@ -64,6 +66,7 @@
#define match lmat
#endif
#ifdef MNAMES
+#define stepback mstepback
#define matcher mmatcher
#define walk mwalk
#define dissect mdissect
@@ -142,6 +145,39 @@ static const char *pchar(int ch);
#endif
/*
+ * Given a multibyte string pointed to by start, step back nchar characters
+ * from current position pointed to by cur.
+ */
+static const char *
+stepback(const char *start, const char *cur, int nchar)
+{
+ const char *ret;
+ int wc, mbc;
+ mbstate_t mbs;
+ size_t clen;
+
+ if (MB_CUR_MAX == 1)
+ return ((cur - nchar) > start ? cur - nchar : NULL);
+
+ ret = cur;
+ for (wc = nchar; wc > 0; wc--) {
+ for (mbc = 1; mbc <= MB_CUR_MAX; mbc++) {
+ if ((ret - mbc) < start)
+ return (NULL);
+ memset(&mbs, 0, sizeof (mbs));
+ clen = mbrtowc(NULL, ret - mbc, mbc, &mbs);
+ if (clen != (size_t)-1 && clen != (size_t)-2)
+ break;
+ }
+ if (mbc > MB_CUR_MAX)
+ return (NULL);
+ ret -= mbc;
+ }
+
+ return (ret);
+}
+
+/*
* matcher - the actual matching engine
*/
static int /* 0 success, REG_NOMATCH failure */
@@ -243,8 +279,13 @@ matcher(struct re_guts *g, const char *string, size_t nmatch,
ZAPSTATE(&m->mbs);
/* Adjust start according to moffset, to speed things up */
- if (dp != NULL && g->moffset > -1)
- start = ((dp - g->moffset) < start) ? start : dp - g->moffset;
+ if (dp != NULL && g->moffset > -1) {
+ const char *nstart;
+
+ nstart = stepback(start, dp, g->moffset);
+ if (nstart != NULL)
+ start = nstart;
+ }
SP("mloop", m->st, *start);
@@ -1037,6 +1078,7 @@ pchar(int ch)
#endif
#endif
+#undef stepback
#undef matcher
#undef walk
#undef dissect