diff options
author | Yuri Pankov <yuri.pankov@nexenta.com> | 2019-10-19 03:04:42 +0300 |
---|---|---|
committer | Dan McDonald <danmcd@joyent.com> | 2019-10-22 11:10:25 -0400 |
commit | 695dd8d1c21542efb8ca2e82c6eb63007a6a5212 (patch) | |
tree | b234edce08ae727f2e84c63a7cd8211864e11dd1 /usr/src/lib/libc/port/regex/engine.c | |
parent | 1603eda21695ca85bfde0e5c75a27d94ac4ce4ff (diff) | |
download | illumos-joyent-695dd8d1c21542efb8ca2e82c6eb63007a6a5212.tar.gz |
11741 regexec: fix processing multibyte strings
Reviewed by: Toomas Soome <tsoome@me.com>
Approved by: Dan McDonald <danmcd@joyent.com>
Diffstat (limited to 'usr/src/lib/libc/port/regex/engine.c')
-rw-r--r-- | usr/src/lib/libc/port/regex/engine.c | 48 |
1 files changed, 45 insertions, 3 deletions
diff --git a/usr/src/lib/libc/port/regex/engine.c b/usr/src/lib/libc/port/regex/engine.c index 2e04a9085b..f9d4f5f587 100644 --- a/usr/src/lib/libc/port/regex/engine.c +++ b/usr/src/lib/libc/port/regex/engine.c @@ -1,5 +1,5 @@ /* - * Copyright 2010 Nexenta Systems, Inc. All rights reserved. + * Copyright 2019 Nexenta by DDN, Inc. All rights reserved. * Copyright 2012 Milan Jurik. All rights reserved. * Copyright (c) 2016 by Delphix. All rights reserved. * Copyright (c) 1992, 1993, 1994 Henry Spencer. @@ -44,6 +44,7 @@ */ #ifdef SNAMES +#define stepback sstepback #define matcher smatcher #define walk swalk #define dissect sdissect @@ -54,6 +55,7 @@ #define match smat #endif #ifdef LNAMES +#define stepback lstepback #define matcher lmatcher #define walk lwalk #define dissect ldissect @@ -64,6 +66,7 @@ #define match lmat #endif #ifdef MNAMES +#define stepback mstepback #define matcher mmatcher #define walk mwalk #define dissect mdissect @@ -142,6 +145,39 @@ static const char *pchar(int ch); #endif /* + * Given a multibyte string pointed to by start, step back nchar characters + * from current position pointed to by cur. + */ +static const char * +stepback(const char *start, const char *cur, int nchar) +{ + const char *ret; + int wc, mbc; + mbstate_t mbs; + size_t clen; + + if (MB_CUR_MAX == 1) + return ((cur - nchar) > start ? cur - nchar : NULL); + + ret = cur; + for (wc = nchar; wc > 0; wc--) { + for (mbc = 1; mbc <= MB_CUR_MAX; mbc++) { + if ((ret - mbc) < start) + return (NULL); + memset(&mbs, 0, sizeof (mbs)); + clen = mbrtowc(NULL, ret - mbc, mbc, &mbs); + if (clen != (size_t)-1 && clen != (size_t)-2) + break; + } + if (mbc > MB_CUR_MAX) + return (NULL); + ret -= mbc; + } + + return (ret); +} + +/* * matcher - the actual matching engine */ static int /* 0 success, REG_NOMATCH failure */ @@ -243,8 +279,13 @@ matcher(struct re_guts *g, const char *string, size_t nmatch, ZAPSTATE(&m->mbs); /* Adjust start according to moffset, to speed things up */ - if (dp != NULL && g->moffset > -1) - start = ((dp - g->moffset) < start) ? start : dp - g->moffset; + if (dp != NULL && g->moffset > -1) { + const char *nstart; + + nstart = stepback(start, dp, g->moffset); + if (nstart != NULL) + start = nstart; + } SP("mloop", m->st, *start); @@ -1037,6 +1078,7 @@ pchar(int ch) #endif #endif +#undef stepback #undef matcher #undef walk #undef dissect |