diff options
author | Igor Pashev <pashev.igor@gmail.com> | 2012-06-24 22:28:35 +0000 |
---|---|---|
committer | Igor Pashev <pashev.igor@gmail.com> | 2012-06-24 22:28:35 +0000 |
commit | 3950ffe2a485479f6561c27364d3d7df5a21d124 (patch) | |
tree | 468c6e14449d1b1e279222ec32f676b0311917d2 /src/lib/libast/regex/regrexec.c | |
download | ksh-upstream.tar.gz |
Imported Upstream version 93u+upstream
Diffstat (limited to 'src/lib/libast/regex/regrexec.c')
-rw-r--r-- | src/lib/libast/regex/regrexec.c | 145 |
1 files changed, 145 insertions, 0 deletions
diff --git a/src/lib/libast/regex/regrexec.c b/src/lib/libast/regex/regrexec.c new file mode 100644 index 0000000..6cb1272 --- /dev/null +++ b/src/lib/libast/regex/regrexec.c @@ -0,0 +1,145 @@ +/*********************************************************************** +* * +* This software is part of the ast package * +* Copyright (c) 1985-2011 AT&T Intellectual Property * +* and is licensed under the * +* Eclipse Public License, Version 1.0 * +* by AT&T Intellectual Property * +* * +* A copy of the License is available at * +* http://www.eclipse.org/org/documents/epl-v10.html * +* (with md5 checksum b35adb5213ca9657e911e9befb180842) * +* * +* Information and Software Systems Research * +* AT&T Research * +* Florham Park NJ * +* * +* Glenn Fowler <gsf@research.att.com> * +* David Korn <dgk@research.att.com> * +* Phong Vo <kpv@research.att.com> * +* * +***********************************************************************/ +#pragma prototyped + +/* + * posix regex record executor + * multiple record sized-buffer interface + */ + +#include "reglib.h" + +/* + * call regnexec() on records selected by Boyer-Moore + */ + +int +regrexec(const regex_t* p, const char* s, size_t len, size_t nmatch, regmatch_t* match, regflags_t flags, int sep, void* handle, regrecord_t record) +{ + register unsigned char* buf = (unsigned char*)s; + register unsigned char* beg; + register unsigned char* l; + register unsigned char* r; + register unsigned char* x; + register size_t* skip; + register size_t* fail; + register Bm_mask_t** mask; + register size_t index; + register int n; + unsigned char* end; + size_t mid; + int complete; + int exactlen; + int leftlen; + int rightlen; + int inv; + Bm_mask_t m; + Env_t* env; + Rex_t* e; + + if (!s || !p || !(env = p->env) || (e = env->rex)->type != REX_BM) + return REG_BADPAT; + inv = (flags & REG_INVERT) != 0; + buf = beg = (unsigned char*)s; + end = buf + len; + mid = (len < e->re.bm.right) ? 0 : (len - e->re.bm.right); + skip = e->re.bm.skip; + fail = e->re.bm.fail; + mask = e->re.bm.mask; + complete = e->re.bm.complete && !nmatch; + exactlen = e->re.bm.size; + leftlen = e->re.bm.left + exactlen; + rightlen = exactlen + e->re.bm.right; + index = leftlen++; + for (;;) + { + while ((index += skip[buf[index]]) < mid); + if (index < HIT) + goto impossible; + index -= HIT; + m = mask[n = exactlen - 1][buf[index]]; + do + { + if (!n--) + goto possible; + } while (m &= mask[n][buf[--index]]); + if ((index += fail[n + 1]) < len) + continue; + impossible: + if (inv) + { + l = r = buf + len; + goto invert; + } + n = 0; + goto done; + possible: + r = (l = buf + index) + exactlen; + while (l > beg) + if (*--l == sep) + { + l++; + break; + } + if ((r - l) < leftlen) + goto spanned; + while (r < end && *r != sep) + r++; + if ((r - (buf + index)) < rightlen) + goto spanned; + if (complete || (env->rex = ((r - l) > 128) ? e : e->next) && !(n = regnexec(p, (char*)l, r - l, nmatch, match, flags))) + { + if (inv) + { + invert: + x = beg; + while (beg < l) + { + while (x < l && *x != sep) + x++; + if (n = (*record)(handle, (char*)beg, x - beg)) + goto done; + beg = ++x; + } + } + else if (n = (*record)(handle, (char*)l, r - l)) + goto done; + if ((index = (r - buf) + leftlen) >= len) + { + n = (inv && (++r - buf) < len) ? (*record)(handle, (char*)r, (buf + len) - r): 0; + goto done; + } + beg = r + 1; + } + else if (n != REG_NOMATCH) + goto done; + else + { + spanned: + if ((index += exactlen) >= mid) + goto impossible; + } + } + done: + env->rex = e; + return n; +} |