summaryrefslogtreecommitdiff
path: root/usr/src/lib/libast/common/regex/regnexec.c
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src/lib/libast/common/regex/regnexec.c')
-rw-r--r--usr/src/lib/libast/common/regex/regnexec.c2047
1 files changed, 2047 insertions, 0 deletions
diff --git a/usr/src/lib/libast/common/regex/regnexec.c b/usr/src/lib/libast/common/regex/regnexec.c
new file mode 100644
index 0000000000..3406e33a0a
--- /dev/null
+++ b/usr/src/lib/libast/common/regex/regnexec.c
@@ -0,0 +1,2047 @@
+/***********************************************************************
+* *
+* This software is part of the ast package *
+* Copyright (c) 1985-2007 AT&T Knowledge Ventures *
+* and is licensed under the *
+* Common Public License, Version 1.0 *
+* by AT&T Knowledge Ventures *
+* *
+* A copy of the License is available at *
+* http://www.opensource.org/licenses/cpl1.0.txt *
+* (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) *
+* *
+* Information and Software Systems Research *
+* AT&T Research *
+* Florham Park NJ *
+* *
+* Glenn Fowler <gsf@research.att.com> *
+* David Korn <dgk@research.att.com> *
+* Phong Vo <kpv@research.att.com> *
+* *
+***********************************************************************/
+#pragma prototyped
+
+/*
+ * posix regex executor
+ * single sized-record interface
+ */
+
+#include "reglib.h"
+
+#if _AST_REGEX_DEBUG
+
+#define DEBUG_TEST(f,y,n) ((debug&(debug_flag=f))?(y):(n))
+#define DEBUG_CODE(f,y,n) do if(debug&(f)){y}else{n} while(0)
+#define DEBUG_INIT() do { char* t; if (!debug) { debug = 0x80000000; if (t = getenv("_AST_regex_exec_debug")) debug |= strtoul(t, NiL, 0); } } while (0)
+
+static unsigned long debug;
+static unsigned long debug_flag;
+
+static const char* rexnames[] =
+{
+ "REX_NULL",
+ "REX_ALT",
+ "REX_ALT_CATCH",
+ "REX_BACK",
+ "REX_BEG",
+ "REX_BEG_STR",
+ "REX_BM",
+ "REX_CAT",
+ "REX_CLASS",
+ "REX_COLL_CLASS",
+ "REX_CONJ",
+ "REX_CONJ_LEFT",
+ "REX_CONJ_RIGHT",
+ "REX_DONE",
+ "REX_DOT",
+ "REX_END",
+ "REX_END_STR",
+ "REX_EXEC",
+ "REX_FIN_STR",
+ "REX_GROUP",
+ "REX_GROUP_CATCH",
+ "REX_GROUP_AHEAD",
+ "REX_GROUP_AHEAD_CATCH",
+ "REX_GROUP_AHEAD_NOT",
+ "REX_GROUP_BEHIND",
+ "REX_GROUP_BEHIND_CATCH",
+ "REX_GROUP_BEHIND_NOT",
+ "REX_GROUP_BEHIND_NOT_CATCH",
+ "REX_GROUP_COND",
+ "REX_GROUP_COND_CATCH",
+ "REX_GROUP_CUT",
+ "REX_GROUP_CUT_CATCH",
+ "REX_KMP",
+ "REX_NEG",
+ "REX_NEG_CATCH",
+ "REX_NEST",
+ "REX_ONECHAR",
+ "REX_REP",
+ "REX_REP_CATCH",
+ "REX_STRING",
+ "REX_TRIE",
+ "REX_WBEG",
+ "REX_WEND",
+ "REX_WORD",
+ "REX_WORD_NOT"
+};
+
+static const char* rexname(Rex_t* rex)
+{
+ if (!rex)
+ return "NIL";
+ if (rex->type >= elementsof(rexnames))
+ return "ERROR";
+ return rexnames[rex->type];
+}
+
+#else
+
+#define DEBUG_INIT()
+#define DEBUG_TEST(f,y,n) (n)
+#define DEBUG_CODE(f,y,n) do {n} while(0)
+
+#endif
+
+#define BEG_ALT 1 /* beginning of an alt */
+#define BEG_ONE 2 /* beginning of one iteration of a rep */
+#define BEG_REP 3 /* beginning of a repetition */
+#define BEG_SUB 4 /* beginning of a subexpression */
+#define END_ANY 5 /* end of any of above */
+
+/*
+ * returns from parse()
+ */
+
+#define NONE 0 /* no parse found */
+#define GOOD 1 /* some parse was found */
+#define CUT 2 /* no match and no backtrack */
+#define BEST 3 /* an unbeatable parse was found */
+#define BAD 4 /* error ocurred */
+
+/*
+ * REG_SHELL_DOT test
+ */
+
+#define LEADING(e,r,s) (*(s)==(e)->leading&&((s)==(e)->beg||*((s)-1)==(r)->explicit))
+
+/*
+ * Pos_t is for comparing parses. An entry is made in the
+ * array at the beginning and at the end of each Group_t,
+ * each iteration in a Group_t, and each Binary_t.
+ */
+
+typedef struct
+{
+ unsigned char* p; /* where in string */
+ size_t length; /* length in string */
+ short serial; /* preorder subpattern number */
+ short be; /* which end of pair */
+} Pos_t;
+
+/* ===== begin library support ===== */
+
+#define vector(t,v,i) (((i)<(v)->max)?(t*)((v)->vec+(i)*(v)->siz):(t*)vecseek(&(v),i))
+
+static Vector_t*
+vecopen(int inc, int siz)
+{
+ Vector_t* v;
+ Stk_t* sp;
+
+ if (inc <= 0)
+ inc = 16;
+ if (!(sp = stkopen(STK_SMALL|STK_NULL)))
+ return 0;
+ if (!(v = (Vector_t*)stkseek(sp, sizeof(Vector_t) + inc * siz)))
+ {
+ stkclose(sp);
+ return 0;
+ }
+ v->stk = sp;
+ v->vec = (char*)v + sizeof(Vector_t);
+ v->max = v->inc = inc;
+ v->siz = siz;
+ v->cur = 0;
+ return v;
+}
+
+static void*
+vecseek(Vector_t** p, int index)
+{
+ Vector_t* v = *p;
+
+ if (index >= v->max)
+ {
+ while ((v->max += v->inc) <= index);
+ if (!(v = (Vector_t*)stkseek(v->stk, sizeof(Vector_t) + v->max * v->siz)))
+ return 0;
+ *p = v;
+ v->vec = (char*)v + sizeof(Vector_t);
+ }
+ return v->vec + index * v->siz;
+}
+
+static void
+vecclose(Vector_t* v)
+{
+ if (v)
+ stkclose(v->stk);
+}
+
+typedef struct
+{
+ Stk_pos_t pos;
+ char data[1];
+} Stk_frame_t;
+
+#define stknew(s,p) ((p)->offset=stktell(s),(p)->base=stkfreeze(s,0))
+#define stkold(s,p) stkset(s,(p)->base,(p)->offset)
+
+#define stkframe(s) (*((Stk_frame_t**)stktop(s)-1))
+#define stkdata(s,t) ((t*)stkframe(s)->data)
+#define stkpop(s) stkold(s,&(stkframe(s)->pos))
+
+static void*
+stkpush(Stk_t* sp, size_t size)
+{
+ Stk_frame_t* f;
+ Stk_pos_t p;
+
+ stknew(sp, &p);
+ size = sizeof(Stk_frame_t) + sizeof(size_t) + size - 1;
+ if (!(f = (Stk_frame_t*)stkalloc(sp, sizeof(Stk_frame_t) + sizeof(Stk_frame_t*) + size - 1)))
+ return 0;
+ f->pos = p;
+ stkframe(sp) = f;
+ return f->data;
+}
+
+/* ===== end library support ===== */
+
+/*
+ * Match_frame_t is for saving and restoring match records
+ * around alternate attempts, so that fossils will not be
+ * left in the match array. These are the only entries in
+ * the match array that are not otherwise guaranteed to
+ * have current data in them when they get used.
+ */
+
+typedef struct
+{
+ size_t size;
+ regmatch_t* match;
+ regmatch_t save[1];
+} Match_frame_t;
+
+#define matchframe stkdata(stkstd,Match_frame_t)
+#define matchpush(e,x) ((x)->re.group.number?_matchpush(e,x):0)
+#define matchcopy(e,x) ((x)->re.group.number?memcpy(matchframe->match,matchframe->save,matchframe->size):(void*)0)
+#define matchpop(e,x) ((x)->re.group.number?memcpy(matchframe->match,matchframe->save,matchframe->size),stkpop(stkstd):(void*)0)
+
+#define pospop(e) (--(e)->pos->cur)
+
+/*
+ * allocate a frame and push a match onto the stack
+ */
+
+static int
+_matchpush(Env_t* env, Rex_t* rex)
+{
+ Match_frame_t* f;
+ regmatch_t* m;
+ regmatch_t* e;
+ regmatch_t* s;
+ int num;
+
+ if (rex->re.group.number <= 0 || (num = rex->re.group.last - rex->re.group.number + 1) <= 0)
+ num = 0;
+ if (!(f = (Match_frame_t*)stkpush(stkstd, sizeof(Match_frame_t) + (num - 1) * sizeof(regmatch_t))))
+ {
+ env->error = REG_ESPACE;
+ return 1;
+ }
+ f->size = num * sizeof(regmatch_t);
+ f->match = m = env->match + rex->re.group.number;
+ e = m + num;
+ s = f->save;
+ while (m < e)
+ {
+ *s++ = *m;
+ *m++ = state.nomatch;
+ }
+ return 0;
+}
+
+/*
+ * allocate a frame and push a pos onto the stack
+ */
+
+static int
+pospush(Env_t* env, Rex_t* rex, unsigned char* p, int be)
+{
+ Pos_t* pos;
+
+ if (!(pos = vector(Pos_t, env->pos, env->pos->cur)))
+ {
+ env->error = REG_ESPACE;
+ return 1;
+ }
+ pos->serial = rex->serial;
+ pos->p = p;
+ pos->be = be;
+ env->pos->cur++;
+ return 0;
+}
+
+/*
+ * two matches are known to have the same length
+ * os is start of old pos array, ns is start of new,
+ * oend and nend are end+1 pointers to ends of arrays.
+ * oe and ne are ends (not end+1) of subarrays.
+ * returns 1 if new is better, -1 if old, else 0.
+ */
+
+#if _AST_REGEX_DEBUG
+
+static void
+showmatch(regmatch_t* p)
+{
+ sfputc(sfstdout, '(');
+ if (p->rm_so < 0)
+ sfputc(sfstdout, '?');
+ else
+ sfprintf(sfstdout, "%d", p->rm_so);
+ sfputc(sfstdout, ',');
+ if (p->rm_eo < 0)
+ sfputc(sfstdout, '?');
+ else
+ sfprintf(sfstdout, "%d", p->rm_eo);
+ sfputc(sfstdout, ')');
+}
+
+static int
+better(Env_t* env, Pos_t* os, Pos_t* ns, Pos_t* oend, Pos_t* nend, int level)
+#define better _better
+{
+ int i;
+
+ DEBUG_CODE(0x0040,{sfprintf(sfstdout, "AHA better old ");for (i = 0; i <= env->nsub; i++)showmatch(&env->best[i]);sfprintf(sfstdout, "\n new ");for (i = 0; i <= env->nsub; i++)showmatch(&env->match[i]);sfprintf(sfstdout, "\n");},{0;});
+ i = better(env, os, ns, oend, nend, 0);
+ DEBUG_TEST(0x0040,(sfprintf(sfstdout, " %s\n", i <= 0 ? "OLD" : "NEW")),(0));
+ return i;
+}
+
+#endif
+
+static int
+better(Env_t* env, Pos_t* os, Pos_t* ns, Pos_t* oend, Pos_t* nend, int level)
+{
+ Pos_t* oe;
+ Pos_t* ne;
+ int k;
+ int n;
+
+ if (env->error)
+ return -1;
+ for (;;)
+ {
+ DEBUG_CODE(0x0080,{sfprintf(sfstdout, " %-*.*sold ", (level + 3) * 4, (level + 3) * 4, "");for (oe = os; oe < oend; oe++)sfprintf(sfstdout, "<%d,%d,%d>", oe->p - env->beg, oe->serial, oe->be);sfprintf(sfstdout, "\n %-*.*snew ", (level + 3) * 4, (level + 3) * 4, "");for (oe = ns; oe < nend; oe++)sfprintf(sfstdout, "<%d,%d,%d>", oe->p - env->beg, oe->serial, oe->be);sfprintf(sfstdout, "\n");},{0;});
+ if (ns >= nend)
+ return DEBUG_TEST(0x8000,(os < oend),(0));
+ if (os >= oend)
+ return DEBUG_TEST(0x8000,(-1),(1));
+ n = os->serial;
+ if (ns->serial > n)
+ return -1;
+ if (n > ns->serial)
+ {
+ env->error = REG_PANIC;
+ return -1;
+ }
+ if (ns->p > os->p)
+ return 1;
+ if (os->p > ns->p)
+ return -1;
+ oe = os;
+ k = 0;
+ for (;;)
+ if ((++oe)->serial == n)
+ {
+ if (oe->be != END_ANY)
+ k++;
+ else if (k-- <= 0)
+ break;
+ }
+ ne = ns;
+ k = 0;
+ for (;;)
+ if ((++ne)->serial == n)
+ {
+ if (ne->be != END_ANY)
+ k++;
+ else if (k-- <= 0)
+ break;
+ }
+ if (ne->p > oe->p)
+ return 1;
+ if (oe->p > ne->p)
+ return -1;
+ if (k = better(env, os + 1, ns + 1, oe, ne, level + 1))
+ return k;
+ os = oe + 1;
+ ns = ne + 1;
+ }
+}
+
+#undef better
+
+#define follow(e,r,c,s) ((r)->next?parse(e,(r)->next,c,s):(c)?parse(e,c,0,s):BEST)
+
+static int parse(Env_t*, Rex_t*, Rex_t*, unsigned char*);
+
+static int
+parserep(Env_t* env, Rex_t* rex, Rex_t* cont, unsigned char* s, int n)
+{
+ int i;
+ int r = NONE;
+ Rex_t catcher;
+
+ DEBUG_TEST(0x0010,(sfprintf(sfstdout, "AHA#%04d 0x%04x parserep %s %d %d %d `%-.*s'\n", __LINE__, debug_flag, rexname(rex->re.group.expr.rex), rex->lo, n, rex->hi, env->end - s, s)),(0));
+ if ((rex->flags & REG_MINIMAL) && n >= rex->lo && n < rex->hi)
+ {
+ if (env->stack && pospush(env, rex, s, END_ANY))
+ return BAD;
+ i = follow(env, rex, cont, s);
+ if (env->stack)
+ pospop(env);
+ switch (i)
+ {
+ case BAD:
+ return BAD;
+ case CUT:
+ return CUT;
+ case BEST:
+ case GOOD:
+ return BEST;
+ }
+ }
+ if (n < rex->hi)
+ {
+ catcher.type = REX_REP_CATCH;
+ catcher.serial = rex->serial;
+ catcher.re.rep_catch.ref = rex;
+ catcher.re.rep_catch.cont = cont;
+ catcher.re.rep_catch.beg = s;
+ catcher.re.rep_catch.n = n + 1;
+ catcher.next = rex->next;
+ if (n == 0)
+ rex->re.rep_catch.beg = s;
+ if (env->stack)
+ {
+DEBUG_TEST(0x0004,(sfprintf(sfstdout,"AHA#%04d 0x%04x PUSH (%d,%d)(%d,%d) (%d,%d)(%d,%d)\n", __LINE__, debug_flag, rex->re.group.number, env->best[0].rm_so, env->best[0].rm_eo, env->best[1].rm_so, env->best[1].rm_eo, env->match[0].rm_so, env->match[0].rm_eo, env->match[1].rm_so, env->match[1].rm_eo)),(0));
+ if (matchpush(env, rex))
+ return BAD;
+ if (pospush(env, rex, s, BEG_ONE))
+ return BAD;
+DEBUG_TEST(0x0004,(sfprintf(sfstdout,"AHA#%04d 0x%04x PUSH (%d,%d)(%d,%d) (%d,%d)(%d,%d)\n", __LINE__, debug_flag, rex->re.group.number, env->best[0].rm_so, env->best[0].rm_eo, env->best[1].rm_so, env->best[1].rm_eo, env->match[0].rm_so, env->match[0].rm_eo, env->match[1].rm_so, env->match[1].rm_eo)),(0));
+ }
+ r = parse(env, rex->re.group.expr.rex, &catcher, s);
+ DEBUG_TEST(0x0010,(sfprintf(sfstdout, "AHA#%04d 0x%04x parserep parse %d `%-.*s'\n", __LINE__, debug_flag, r, env->end - s, s)),(0));
+ if (env->stack)
+ {
+DEBUG_TEST(0x0004,(sfprintf(sfstdout,"AHA#%04d 0x%04x POP (%d,%d)(%d,%d) (%d,%d)(%d,%d)\n", __LINE__, debug_flag, rex->re.group.number, env->best[0].rm_so, env->best[0].rm_eo, env->best[1].rm_so, env->best[1].rm_eo, env->match[0].rm_so, env->match[0].rm_eo, env->match[1].rm_so, env->match[1].rm_eo)),(0));
+ pospop(env);
+ matchpop(env, rex);
+DEBUG_TEST(0x0004,(sfprintf(sfstdout,"AHA#%04d 0x%04x POP (%d,%d)(%d,%d) (%d,%d)(%d,%d)\n", __LINE__, debug_flag, rex->re.group.number, env->best[0].rm_so, env->best[0].rm_eo, env->best[1].rm_so, env->best[1].rm_eo, env->match[0].rm_so, env->match[0].rm_eo, env->match[1].rm_so, env->match[1].rm_eo)),(0));
+ }
+ switch (r)
+ {
+ case BAD:
+ return BAD;
+ case BEST:
+ return BEST;
+ case CUT:
+ r = NONE;
+ break;
+ case GOOD:
+ if (rex->flags & REG_MINIMAL)
+ return BEST;
+ r = GOOD;
+ break;
+ }
+ }
+ if (n < rex->lo)
+ return r;
+ if (!(rex->flags & REG_MINIMAL) || n >= rex->hi)
+ {
+ if (env->stack && pospush(env, rex, s, END_ANY))
+ return BAD;
+ i = follow(env, rex, cont, s);
+ if (env->stack)
+ pospop(env);
+ switch (i)
+ {
+ case BAD:
+ r = BAD;
+ break;
+ case CUT:
+ r = CUT;
+ break;
+ case BEST:
+ r = BEST;
+ break;
+ case GOOD:
+ r = (rex->flags & REG_MINIMAL) ? BEST : GOOD;
+ break;
+ }
+ }
+ return r;
+}
+
+static int
+parsetrie(Env_t* env, Trie_node_t* x, Rex_t* rex, Rex_t* cont, unsigned char* s)
+{
+ unsigned char* p;
+ int r;
+
+ if (p = rex->map)
+ {
+ for (;;)
+ {
+ if (s >= env->end)
+ return NONE;
+ while (x->c != p[*s])
+ if (!(x = x->sib))
+ return NONE;
+ if (x->end)
+ break;
+ x = x->son;
+ s++;
+ }
+ }
+ else
+ {
+ for (;;)
+ {
+ if (s >= env->end)
+ return NONE;
+ while (x->c != *s)
+ if (!(x = x->sib))
+ return NONE;
+ if (x->end)
+ break;
+ x = x->son;
+ s++;
+ }
+ }
+ s++;
+ if (rex->flags & REG_MINIMAL)
+ switch (follow(env, rex, cont, s))
+ {
+ case BAD:
+ return BAD;
+ case CUT:
+ return CUT;
+ case BEST:
+ case GOOD:
+ return BEST;
+ }
+ if (x->son)
+ switch (parsetrie(env, x->son, rex, cont, s))
+ {
+ case BAD:
+ return BAD;
+ case CUT:
+ return CUT;
+ case BEST:
+ return BEST;
+ case GOOD:
+ if (rex->flags & REG_MINIMAL)
+ return BEST;
+ r = GOOD;
+ break;
+ default:
+ r = NONE;
+ break;
+ }
+ else
+ r = NONE;
+ if (!(rex->flags & REG_MINIMAL))
+ switch (follow(env, rex, cont, s))
+ {
+ case BAD:
+ return BAD;
+ case CUT:
+ return CUT;
+ case BEST:
+ return BEST;
+ case GOOD:
+ return GOOD;
+ }
+ return r;
+}
+
+static int
+collelt(register Celt_t* ce, char* key, int c, int x)
+{
+ Ckey_t elt;
+
+ mbxfrm(elt, key, COLL_KEY_MAX);
+ for (;; ce++)
+ {
+ switch (ce->typ)
+ {
+ case COLL_call:
+ if (!x && (*ce->fun)(c))
+ return 1;
+ continue;
+ case COLL_char:
+ if (!strcmp((char*)ce->beg, (char*)elt))
+ return 1;
+ continue;
+ case COLL_range:
+ if (strcmp((char*)ce->beg, (char*)elt) <= ce->min && strcmp((char*)elt, (char*)ce->end) <= ce->max)
+ return 1;
+ continue;
+ case COLL_range_lc:
+ if (strcmp((char*)ce->beg, (char*)elt) <= ce->min && strcmp((char*)elt, (char*)ce->end) <= ce->max && (iswlower(c) || !iswupper(c)))
+ return 1;
+ continue;
+ case COLL_range_uc:
+ if (strcmp((char*)ce->beg, (char*)elt) <= ce->min && strcmp((char*)elt, (char*)ce->end) <= ce->max && (iswupper(c) || !iswlower(c)))
+ return 1;
+ continue;
+ }
+ break;
+ }
+ return 0;
+}
+
+static int
+collic(register Celt_t* ce, char* key, register char* nxt, int c, int x)
+{
+ if (!x)
+ {
+ if (collelt(ce, key, c, x))
+ return 1;
+ if (iswlower(c))
+ c = towupper(c);
+ else if (iswupper(c))
+ c = towlower(c);
+ else
+ return 0;
+ x = wctomb(key, c);
+ key[x] = 0;
+ return collelt(ce, key, c, 0);
+ }
+ while (*nxt)
+ {
+ if (collic(ce, key, nxt + 1, c, x))
+ return 1;
+ if (islower(*nxt))
+ *nxt = toupper(*nxt);
+ else if (isupper(*nxt))
+ *nxt = tolower(*nxt);
+ else
+ return 0;
+ nxt++;
+ }
+ return collelt(ce, key, c, x);
+}
+
+static int
+collmatch(Rex_t* rex, unsigned char* s, unsigned char* e, unsigned char** p)
+{
+ unsigned char* t;
+ wchar_t c;
+ int w;
+ int r;
+ int x;
+ int ic;
+ Ckey_t key;
+ Ckey_t elt;
+
+ ic = (rex->flags & REG_ICASE);
+ if ((w = MBSIZE(s)) > 1)
+ {
+ memcpy((char*)key, (char*)s, w);
+ key[w] = 0;
+ t = s;
+ c = mbchar(t);
+#if !_lib_wctype
+ c &= 0xff;
+#endif
+ x = 0;
+ }
+ else
+ {
+ c = s[0];
+ if (ic && isupper(c))
+ c = tolower(c);
+ key[0] = c;
+ key[1] = 0;
+ if (isalpha(c))
+ {
+ x = e - s;
+ if (x > COLL_KEY_MAX)
+ x = COLL_KEY_MAX;
+ while (w < x)
+ {
+ c = s[w];
+ if (!isalpha(c))
+ break;
+ r = mbxfrm(elt, key, COLL_KEY_MAX);
+ if (ic && isupper(c))
+ c = tolower(c);
+ key[w] = c;
+ key[w + 1] = 0;
+ if (mbxfrm(elt, key, COLL_KEY_MAX) != r)
+ break;
+ w++;
+ }
+ }
+ key[w] = 0;
+ c = key[0];
+ x = w - 1;
+ }
+ r = 1;
+ for (;;)
+ {
+ if (ic ? collic(rex->re.collate.elements, (char*)key, (char*)key, c, x) : collelt(rex->re.collate.elements, (char*)key, c, x))
+ break;
+ if (!x)
+ {
+ r = 0;
+ break;
+ }
+ w = x--;
+ key[w] = 0;
+ }
+ *p = s + w;
+ return rex->re.collate.invert ? !r : r;
+}
+
+static unsigned char*
+nestmatch(register unsigned char* s, register unsigned char* e, const unsigned short* type, register int co)
+{
+ register int c;
+ register int cc;
+ unsigned int n;
+ int oc;
+
+ if (type[co] & (REX_NEST_literal|REX_NEST_quote))
+ {
+ n = (type[co] & REX_NEST_literal) ? REX_NEST_terminator : (REX_NEST_escape|REX_NEST_terminator);
+ while (s < e)
+ {
+ c = *s++;
+ if (c == co)
+ return s;
+ else if (type[c] & n)
+ {
+ if (s >= e || (type[c] & REX_NEST_terminator))
+ break;
+ s++;
+ }
+ }
+ }
+ else
+ {
+ cc = type[co] >> REX_NEST_SHIFT;
+ oc = type[co] & (REX_NEST_open|REX_NEST_close);
+ n = 1;
+ while (s < e)
+ {
+ c = *s++;
+ switch (type[c] & (REX_NEST_escape|REX_NEST_open|REX_NEST_close|REX_NEST_delimiter|REX_NEST_separator|REX_NEST_terminator))
+ {
+ case REX_NEST_delimiter:
+ case REX_NEST_terminator:
+ return oc ? 0 : s;
+ case REX_NEST_separator:
+ if (!oc)
+ return s;
+ break;
+ case REX_NEST_escape:
+ if (s >= e)
+ return 0;
+ s++;
+ break;
+ case REX_NEST_open|REX_NEST_close:
+ if (c == cc)
+ {
+ if (!--n)
+ return s;
+ }
+ /*FALLTHROUGH*/
+ case REX_NEST_open:
+ if (c == co)
+ {
+ if (!++n)
+ return 0;
+ }
+ else if (!(s = nestmatch(s, e, type, c)))
+ return 0;
+ break;
+ case REX_NEST_close:
+ if (c != cc)
+ return 0;
+ if (!--n)
+ return s;
+ break;
+ }
+ }
+ return (oc || !(type[UCHAR_MAX+1] & REX_NEST_terminator)) ? 0 : s;
+ }
+ return 0;
+}
+
+static int
+parse(Env_t* env, Rex_t* rex, Rex_t* cont, unsigned char* s)
+{
+ int c;
+ int d;
+ int i;
+ int m;
+ int n;
+ int r;
+ int* f;
+ unsigned char* p;
+ unsigned char* t;
+ unsigned char* b;
+ unsigned char* e;
+ char* u;
+ regmatch_t* o;
+ Trie_node_t* x;
+ Rex_t* q;
+ Rex_t catcher;
+ Rex_t next;
+
+ for (;;)
+ {
+DEBUG_TEST(0x0008,(sfprintf(sfstdout, "AHA#%04d 0x%04x parse %s `%-.*s'\n", __LINE__, debug_flag, rexname(rex), env->end - s, s)),(0));
+ switch (rex->type)
+ {
+ case REX_ALT:
+ if (env->stack)
+ {
+ if (matchpush(env, rex))
+ return BAD;
+ if (pospush(env, rex, s, BEG_ALT))
+ return BAD;
+ catcher.type = REX_ALT_CATCH;
+ catcher.serial = rex->serial;
+ catcher.re.alt_catch.cont = cont;
+ catcher.next = rex->next;
+ r = parse(env, rex->re.group.expr.binary.left, &catcher, s);
+ if (r < BEST || (rex->flags & REG_MINIMAL))
+ {
+ matchcopy(env, rex);
+ ((Pos_t*)env->pos->vec + env->pos->cur - 1)->serial = catcher.serial = rex->re.group.expr.binary.serial;
+ n = parse(env, rex->re.group.expr.binary.right, &catcher, s);
+ if (n != NONE)
+ r = n;
+ }
+ pospop(env);
+ matchpop(env, rex);
+ }
+ else
+ {
+ if ((r = parse(env, rex->re.group.expr.binary.left, cont, s)) == NONE)
+ r = parse(env, rex->re.group.expr.binary.right, cont, s);
+ if (r == GOOD)
+ r = BEST;
+ }
+ return r;
+ case REX_ALT_CATCH:
+ if (pospush(env, rex, s, END_ANY))
+ return BAD;
+ r = follow(env, rex, rex->re.alt_catch.cont, s);
+ pospop(env);
+ return r;
+ case REX_BACK:
+ o = &env->match[rex->lo];
+ if (o->rm_so < 0)
+ return NONE;
+ i = o->rm_eo - o->rm_so;
+ e = s + i;
+ if (e > env->end)
+ return NONE;
+ t = env->beg + o->rm_so;
+ if (!(p = rex->map))
+ {
+ while (s < e)
+ if (*s++ != *t++)
+ return NONE;
+ }
+ else if (!mbwide())
+ {
+ while (s < e)
+ if (p[*s++] != p[*t++])
+ return NONE;
+ }
+ else
+ {
+ while (s < e)
+ {
+ c = mbchar(s);
+ d = mbchar(t);
+ if (towupper(c) != towupper(d))
+ return NONE;
+ }
+ }
+ break;
+ case REX_BEG:
+ if ((!(rex->flags & REG_NEWLINE) || s <= env->beg || *(s - 1) != '\n') && ((env->flags & REG_NOTBOL) || s != env->beg))
+ return NONE;
+ break;
+ case REX_CLASS:
+ if (LEADING(env, rex, s))
+ return NONE;
+ n = rex->hi;
+ if (n > env->end - s)
+ n = env->end - s;
+ m = rex->lo;
+ if (m > n)
+ return NONE;
+ r = NONE;
+ if (!(rex->flags & REG_MINIMAL))
+ {
+ for (i = 0; i < n; i++)
+ if (!settst(rex->re.charclass, s[i]))
+ {
+ n = i;
+ break;
+ }
+ for (s += n; n-- >= m; s--)
+ switch (follow(env, rex, cont, s))
+ {
+ case BAD:
+ return BAD;
+ case CUT:
+ return CUT;
+ case BEST:
+ return BEST;
+ case GOOD:
+ r = GOOD;
+ break;
+ }
+ }
+ else
+ {
+ for (e = s + m; s < e; s++)
+ if (!settst(rex->re.charclass, *s))
+ return r;
+ e += n - m;
+ for (;;)
+ {
+ switch (follow(env, rex, cont, s))
+ {
+ case BAD:
+ return BAD;
+ case CUT:
+ return CUT;
+ case BEST:
+ case GOOD:
+ return BEST;
+ }
+ if (s >= e || !settst(rex->re.charclass, *s))
+ break;
+ s++;
+ }
+ }
+ return r;
+ case REX_COLL_CLASS:
+ if (LEADING(env, rex, s))
+ return NONE;
+ n = rex->hi;
+ if (n > env->end - s)
+ n = env->end - s;
+ m = rex->lo;
+ if (m > n)
+ return NONE;
+ r = NONE;
+ e = env->end;
+ if (!(rex->flags & REG_MINIMAL))
+ {
+ if (!(b = (unsigned char*)stkpush(stkstd, n)))
+ {
+ env->error = REG_ESPACE;
+ return BAD;
+ }
+ for (i = 0; s < e && i < n && collmatch(rex, s, e, &t); i++)
+ {
+ b[i] = t - s;
+ s = t;
+ }
+ for (; i-- >= rex->lo; s -= b[i])
+ switch (follow(env, rex, cont, s))
+ {
+ case BAD:
+ stkpop(stkstd);
+ return BAD;
+ case CUT:
+ stkpop(stkstd);
+ return CUT;
+ case BEST:
+ stkpop(stkstd);
+ return BEST;
+ case GOOD:
+ r = GOOD;
+ break;
+ }
+ stkpop(stkstd);
+ }
+ else
+ {
+ for (i = 0; i < m && s < e; i++, s = t)
+ if (!collmatch(rex, s, e, &t))
+ return r;
+ while (i++ <= n)
+ {
+ switch (follow(env, rex, cont, s))
+ {
+ case BAD:
+ return BAD;
+ case CUT:
+ return CUT;
+ case BEST:
+ case GOOD:
+ return BEST;
+ }
+ if (s >= e || !collmatch(rex, s, e, &s))
+ break;
+ }
+ }
+ return r;
+ case REX_CONJ:
+ next.type = REX_CONJ_RIGHT;
+ next.re.conj_right.cont = cont;
+ next.next = rex->next;
+ catcher.type = REX_CONJ_LEFT;
+ catcher.re.conj_left.right = rex->re.group.expr.binary.right;
+ catcher.re.conj_left.cont = &next;
+ catcher.re.conj_left.beg = s;
+ catcher.next = 0;
+ return parse(env, rex->re.group.expr.binary.left, &catcher, s);
+ case REX_CONJ_LEFT:
+ rex->re.conj_left.cont->re.conj_right.end = s;
+ cont = rex->re.conj_left.cont;
+ s = rex->re.conj_left.beg;
+ rex = rex->re.conj_left.right;
+ continue;
+ case REX_CONJ_RIGHT:
+ if (rex->re.conj_right.end != s)
+ return NONE;
+ cont = rex->re.conj_right.cont;
+ break;
+ case REX_DONE:
+ if (!env->stack)
+ return BEST;
+ n = s - env->beg;
+ r = env->nsub;
+ DEBUG_TEST(0x0100,(sfprintf(sfstdout,"AHA#%04d 0x%04x %s (%d,%d)(%d,%d)(%d,%d)(%d,%d) (%d,%d)(%d,%d)\n", __LINE__, debug_flag, rexname(rex), env->best[0].rm_so, env->best[0].rm_eo, env->best[1].rm_so, env->best[1].rm_eo, env->best[2].rm_so, env->best[2].rm_eo, env->best[3].rm_so, env->best[3].rm_eo, env->match[0].rm_so, env->match[0].rm_eo, env->match[1].rm_so, env->match[1].rm_eo)),(0));
+ if ((i = env->best[0].rm_eo) >= 0)
+ {
+ if (rex->flags & REG_MINIMAL)
+ {
+ if (n > i)
+ return GOOD;
+ }
+ else
+ {
+ if (n < i)
+ return GOOD;
+ }
+ if (n == i && better(env,
+ (Pos_t*)env->bestpos->vec,
+ (Pos_t*)env->pos->vec,
+ (Pos_t*)env->bestpos->vec+env->bestpos->cur,
+ (Pos_t*)env->pos->vec+env->pos->cur,
+ 0) <= 0)
+ return GOOD;
+ }
+ env->best[0].rm_eo = n;
+ memcpy(&env->best[1], &env->match[1], r * sizeof(regmatch_t));
+ n = env->pos->cur;
+ if (!vector(Pos_t, env->bestpos, n))
+ {
+ env->error = REG_ESPACE;
+ return BAD;
+ }
+ env->bestpos->cur = n;
+ memcpy(env->bestpos->vec, env->pos->vec, n * sizeof(Pos_t));
+ DEBUG_TEST(0x0100,(sfprintf(sfstdout,"AHA#%04d 0x%04x %s (%d,%d)(%d,%d)(%d,%d)(%d,%d) (%d,%d)(%d,%d)\n", __LINE__, debug_flag, rexname(rex), env->best[0].rm_so, env->best[0].rm_eo, env->best[1].rm_so, env->best[1].rm_eo, env->best[2].rm_so, env->best[2].rm_eo, env->best[3].rm_so, env->best[3].rm_eo, env->match[0].rm_so, env->match[0].rm_eo, env->match[1].rm_so, env->match[1].rm_eo)),(0));
+ return GOOD;
+ case REX_DOT:
+ if (LEADING(env, rex, s))
+ return NONE;
+ n = rex->hi;
+ if (n > env->end - s)
+ n = env->end - s;
+ m = rex->lo;
+ if (m > n)
+ return NONE;
+ if ((c = rex->explicit) >= 0 && !mbwide())
+ for (i = 0; i < n; i++)
+ if (s[i] == c)
+ {
+ n = i;
+ break;
+ }
+ r = NONE;
+ if (!(rex->flags & REG_MINIMAL))
+ {
+ if (!mbwide())
+ {
+ for (s += n; n-- >= m; s--)
+ switch (follow(env, rex, cont, s))
+ {
+ case BAD:
+ return BAD;
+ case CUT:
+ return CUT;
+ case BEST:
+ return BEST;
+ case GOOD:
+ r = GOOD;
+ break;
+ }
+ }
+ else
+ {
+ if (!(b = (unsigned char*)stkpush(stkstd, n)))
+ {
+ env->error = REG_ESPACE;
+ return BAD;
+ }
+ e = env->end;
+ for (i = 0; s < e && i < n && *s != c; i++)
+ s += b[i] = MBSIZE(s);
+ for (; i-- >= m; s -= b[i])
+ switch (follow(env, rex, cont, s))
+ {
+ case BAD:
+ stkpop(stkstd);
+ return BAD;
+ case CUT:
+ stkpop(stkstd);
+ return CUT;
+ case BEST:
+ stkpop(stkstd);
+ return BEST;
+ case GOOD:
+ r = GOOD;
+ break;
+ }
+ stkpop(stkstd);
+ }
+ }
+ else
+ {
+ if (!mbwide())
+ {
+ e = s + n;
+ for (s += m; s <= e; s++)
+ switch (follow(env, rex, cont, s))
+ {
+ case BAD:
+ return BAD;
+ case CUT:
+ return CUT;
+ case BEST:
+ case GOOD:
+ return BEST;
+ }
+ }
+ else
+ {
+ e = env->end;
+ for (i = 0; s < e && i < m && *s != c; i++)
+ s += MBSIZE(s);
+ if (i >= m)
+ for (; s <= e && i <= n; s += MBSIZE(s), i++)
+ switch (follow(env, rex, cont, s))
+ {
+ case BAD:
+ return BAD;
+ case CUT:
+ return CUT;
+ case BEST:
+ case GOOD:
+ return BEST;
+ }
+ }
+ }
+ return r;
+ case REX_END:
+ if ((!(rex->flags & REG_NEWLINE) || *s != '\n') && ((env->flags & REG_NOTEOL) || s < env->end))
+ return NONE;
+ break;
+ case REX_GROUP:
+DEBUG_TEST(0x0200,(sfprintf(sfstdout,"AHA#%04d 0x%04x parse %s `%-.*s'\n", __LINE__, debug_flag, rexname(rex), env->end - s, s)),(0));
+ if (env->stack)
+ {
+ if (rex->re.group.number)
+ env->match[rex->re.group.number].rm_so = s - env->beg;
+ if (pospush(env, rex, s, BEG_SUB))
+ return BAD;
+ catcher.re.group_catch.eo = rex->re.group.number ? &env->match[rex->re.group.number].rm_eo : (regoff_t*)0;
+ }
+ catcher.type = REX_GROUP_CATCH;
+ catcher.serial = rex->serial;
+ catcher.re.group_catch.cont = cont;
+ catcher.next = rex->next;
+ r = parse(env, rex->re.group.expr.rex, &catcher, s);
+ if (env->stack)
+ {
+ pospop(env);
+ if (rex->re.group.number)
+ env->match[rex->re.group.number].rm_so = -1;
+ }
+ return r;
+ case REX_GROUP_CATCH:
+DEBUG_TEST(0x0200,(sfprintf(sfstdout,"AHA#%04d 0x%04x parse %s=>%s `%-.*s'\n", __LINE__, debug_flag, rexname(rex), rexname(rex->re.group_catch.cont), env->end - s, s)),(0));
+ if (env->stack)
+ {
+ if (rex->re.group_catch.eo)
+ *rex->re.group_catch.eo = s - env->beg;
+ if (pospush(env, rex, s, END_ANY))
+ return BAD;
+ }
+ r = follow(env, rex, rex->re.group_catch.cont, s);
+ if (env->stack)
+ {
+ pospop(env);
+ if (rex->re.group_catch.eo)
+ *rex->re.group_catch.eo = -1;
+ }
+ return r;
+ case REX_GROUP_AHEAD:
+ catcher.type = REX_GROUP_AHEAD_CATCH;
+ catcher.flags = rex->flags;
+ catcher.serial = rex->serial;
+ catcher.re.rep_catch.beg = s;
+ catcher.re.rep_catch.cont = cont;
+ catcher.next = rex->next;
+ return parse(env, rex->re.group.expr.rex, &catcher, s);
+ case REX_GROUP_AHEAD_CATCH:
+ return follow(env, rex, rex->re.rep_catch.cont, rex->re.rep_catch.beg);
+ case REX_GROUP_AHEAD_NOT:
+ r = parse(env, rex->re.group.expr.rex, NiL, s);
+ if (r == NONE)
+ r = follow(env, rex, cont, s);
+ else if (r != BAD)
+ r = NONE;
+ return r;
+ case REX_GROUP_BEHIND:
+ if ((s - env->beg) < rex->re.group.size)
+ return NONE;
+ catcher.type = REX_GROUP_BEHIND_CATCH;
+ catcher.flags = rex->flags;
+ catcher.serial = rex->serial;
+ catcher.re.behind_catch.beg = s;
+ catcher.re.behind_catch.end = e = env->end;
+ catcher.re.behind_catch.cont = cont;
+ catcher.next = rex->next;
+ for (t = s - rex->re.group.size; t >= env->beg; t--)
+ {
+ env->end = s;
+ r = parse(env, rex->re.group.expr.rex, &catcher, t);
+ env->end = e;
+ if (r != NONE)
+ return r;
+ }
+ return NONE;
+ case REX_GROUP_BEHIND_CATCH:
+ if (s != rex->re.behind_catch.beg)
+ return NONE;
+ env->end = rex->re.behind_catch.end;
+ return follow(env, rex, rex->re.behind_catch.cont, rex->re.behind_catch.beg);
+ case REX_GROUP_BEHIND_NOT:
+ if ((s - env->beg) < rex->re.group.size)
+ r = NONE;
+ else
+ {
+ catcher.type = REX_GROUP_BEHIND_NOT_CATCH;
+ catcher.re.neg_catch.beg = s;
+ catcher.next = 0;
+ e = env->end;
+ env->end = s;
+ for (t = s - rex->re.group.size; t >= env->beg; t--)
+ {
+ r = parse(env, rex->re.group.expr.rex, &catcher, t);
+ if (r != NONE)
+ break;
+ }
+ env->end = e;
+ }
+ if (r == NONE)
+ r = follow(env, rex, cont, s);
+ else if (r != BAD)
+ r = NONE;
+ return r;
+ case REX_GROUP_BEHIND_NOT_CATCH:
+ return s == rex->re.neg_catch.beg ? GOOD : NONE;
+ case REX_GROUP_COND:
+ if (q = rex->re.group.expr.binary.right)
+ {
+ catcher.re.cond_catch.next[0] = q->re.group.expr.binary.right;
+ catcher.re.cond_catch.next[1] = q->re.group.expr.binary.left;
+ }
+ else
+ catcher.re.cond_catch.next[0] = catcher.re.cond_catch.next[1] = 0;
+ if (q = rex->re.group.expr.binary.left)
+ {
+ catcher.type = REX_GROUP_COND_CATCH;
+ catcher.flags = rex->flags;
+ catcher.serial = rex->serial;
+ catcher.re.cond_catch.yes = 0;
+ catcher.re.cond_catch.beg = s;
+ catcher.re.cond_catch.cont = cont;
+ catcher.next = rex->next;
+ r = parse(env, q, &catcher, s);
+ if (r == BAD || catcher.re.cond_catch.yes)
+ return r;
+ }
+ else if (!rex->re.group.size || rex->re.group.size > 0 && env->match[rex->re.group.size].rm_so >= 0)
+ r = GOOD;
+ else
+ r = NONE;
+ if (q = catcher.re.cond_catch.next[r != NONE])
+ {
+ catcher.type = REX_CAT;
+ catcher.flags = q->flags;
+ catcher.serial = q->serial;
+ catcher.re.group_catch.cont = cont;
+ catcher.next = rex->next;
+ return parse(env, q, &catcher, s);
+ }
+ return follow(env, rex, cont, s);
+ case REX_GROUP_COND_CATCH:
+ rex->re.cond_catch.yes = 1;
+ catcher.type = REX_CAT;
+ catcher.flags = rex->flags;
+ catcher.serial = rex->serial;
+ catcher.re.group_catch.cont = rex->re.cond_catch.cont;
+ catcher.next = rex->next;
+ return parse(env, rex->re.cond_catch.next[1], &catcher, rex->re.cond_catch.beg);
+ case REX_CAT:
+ return follow(env, rex, rex->re.group_catch.cont, s);
+ case REX_GROUP_CUT:
+ catcher.type = REX_GROUP_CUT_CATCH;
+ catcher.flags = rex->flags;
+ catcher.serial = rex->serial;
+ catcher.re.group_catch.cont = cont;
+ catcher.next = rex->next;
+ return parse(env, rex->re.group.expr.rex, &catcher, s);
+ case REX_GROUP_CUT_CATCH:
+ switch (r = follow(env, rex, rex->re.group_catch.cont, s))
+ {
+ case GOOD:
+ r = BEST;
+ break;
+ case NONE:
+ r = CUT;
+ break;
+ }
+ return r;
+ case REX_KMP:
+ f = rex->re.string.fail;
+ b = rex->re.string.base;
+ n = rex->re.string.size;
+ t = s;
+ e = env->end;
+ if (p = rex->map)
+ {
+ while (t + n <= e)
+ {
+ for (i = -1; t < e; t++)
+ {
+ while (i >= 0 && b[i+1] != p[*t])
+ i = f[i];
+ if (b[i+1] == p[*t])
+ i++;
+ if (i + 1 == n)
+ {
+ t++;
+ if (env->stack)
+ env->best[0].rm_so = t - s - n;
+ switch (follow(env, rex, cont, t))
+ {
+ case BAD:
+ return BAD;
+ case CUT:
+ return CUT;
+ case BEST:
+ case GOOD:
+ return BEST;
+ }
+ t -= n - 1;
+ break;
+ }
+ }
+ }
+ }
+ else
+ {
+ while (t + n <= e)
+ {
+ for (i = -1; t < e; t++)
+ {
+ while (i >= 0 && b[i+1] != *t)
+ i = f[i];
+ if (b[i+1] == *t)
+ i++;
+ if (i + 1 == n)
+ {
+ t++;
+ if (env->stack)
+ env->best[0].rm_so = t - s - n;
+ switch (follow(env, rex, cont, t))
+ {
+ case BAD:
+ return BAD;
+ case CUT:
+ return CUT;
+ case BEST:
+ case GOOD:
+ return BEST;
+ }
+ t -= n - 1;
+ break;
+ }
+ }
+ }
+ }
+ return NONE;
+ case REX_NEG:
+ if (LEADING(env, rex, s))
+ return NONE;
+ i = env->end - s;
+ n = ((i + 7) >> 3) + 1;
+ catcher.type = REX_NEG_CATCH;
+ catcher.re.neg_catch.beg = s;
+ if (!(p = (unsigned char*)stkpush(stkstd, n)))
+ return BAD;
+ memset(catcher.re.neg_catch.index = p, 0, n);
+ catcher.next = rex->next;
+ if (parse(env, rex->re.group.expr.rex, &catcher, s) == BAD)
+ r = BAD;
+ else
+ {
+ r = NONE;
+ for (; i >= 0; i--)
+ if (!bittst(p, i))
+ {
+ switch (follow(env, rex, cont, s + i))
+ {
+ case BAD:
+ r = BAD;
+ break;
+ case BEST:
+ r = BEST;
+ break;
+ case CUT:
+ r = CUT;
+ break;
+ case GOOD:
+ r = GOOD;
+ /*FALLTHROUGH*/
+ default:
+ continue;
+ }
+ break;
+ }
+ }
+ stkpop(stkstd);
+ return r;
+ case REX_NEG_CATCH:
+ bitset(rex->re.neg_catch.index, s - rex->re.neg_catch.beg);
+ return NONE;
+ case REX_NEST:
+ do
+ {
+ if ((c = *s++) == rex->re.nest.primary)
+ {
+ if (s >= env->end || !(s = nestmatch(s, env->end, rex->re.nest.type, c)))
+ return NONE;
+ break;
+ }
+ if (rex->re.nest.primary >= 0)
+ return NONE;
+ if (rex->re.nest.type[c] & (REX_NEST_delimiter|REX_NEST_separator|REX_NEST_terminator))
+ break;
+ if (!(s = nestmatch(s, env->end, rex->re.nest.type, c)))
+ return NONE;
+ } while (s < env->end && !(rex->re.nest.type[*(s-1)] & (REX_NEST_delimiter|REX_NEST_separator|REX_NEST_terminator)));
+ break;
+ case REX_NULL:
+ break;
+ case REX_ONECHAR:
+ n = rex->hi;
+ if (n > env->end - s)
+ n = env->end - s;
+ m = rex->lo;
+ if (m > n)
+ return NONE;
+ r = NONE;
+ c = rex->re.onechar;
+ if (!(rex->flags & REG_MINIMAL))
+ {
+ if (!mbwide())
+ {
+ if (p = rex->map)
+ {
+ for (i = 0; i < n; i++, s++)
+ if (p[*s] != c)
+ break;
+ }
+ else
+ {
+ for (i = 0; i < n; i++, s++)
+ if (*s != c)
+ break;
+ }
+ for (; i-- >= m; s--)
+ switch (follow(env, rex, cont, s))
+ {
+ case BAD:
+ return BAD;
+ case BEST:
+ return BEST;
+ case CUT:
+ return CUT;
+ case GOOD:
+ r = GOOD;
+ break;
+ }
+ }
+ else
+ {
+ if (!(b = (unsigned char*)stkpush(stkstd, n)))
+ {
+ env->error = REG_ESPACE;
+ return BAD;
+ }
+ e = env->end;
+ if (!(rex->flags & REG_ICASE))
+ {
+ for (i = 0; s < e && i < n; i++, s = t)
+ {
+ t = s;
+ if (mbchar(t) != c)
+ break;
+ b[i] = t - s;
+ }
+ }
+ else
+ {
+ for (i = 0; s < e && i < n; i++, s = t)
+ {
+ t = s;
+ if (towupper(mbchar(t)) != c)
+ break;
+ b[i] = t - s;
+ }
+ }
+ for (; i-- >= m; s -= b[i])
+ switch (follow(env, rex, cont, s))
+ {
+ case BAD:
+ stkpop(stkstd);
+ return BAD;
+ case BEST:
+ stkpop(stkstd);
+ return BEST;
+ case CUT:
+ stkpop(stkstd);
+ return CUT;
+ case GOOD:
+ r = GOOD;
+ break;
+ }
+ stkpop(stkstd);
+ }
+ }
+ else
+ {
+ if (!mbwide())
+ {
+ e = s + m;
+ if (p = rex->map)
+ {
+ for (; s < e; s++)
+ if (p[*s] != c)
+ return r;
+ e += n - m;
+ for (;;)
+ {
+ switch (follow(env, rex, cont, s))
+ {
+ case BAD:
+ return BAD;
+ case CUT:
+ return CUT;
+ case BEST:
+ case GOOD:
+ return BEST;
+ }
+ if (s >= e || p[*s++] != c)
+ break;
+ }
+ }
+ else
+ {
+ for (; s < e; s++)
+ if (*s != c)
+ return r;
+ e += n - m;
+ for (;;)
+ {
+ switch (follow(env, rex, cont, s))
+ {
+ case BAD:
+ return BAD;
+ case CUT:
+ return CUT;
+ case BEST:
+ case GOOD:
+ return BEST;
+ }
+ if (s >= e || *s++ != c)
+ break;
+ }
+ }
+ }
+ else
+ {
+ if (!(rex->flags & REG_ICASE))
+ {
+ for (i = 0; i < m && s < e; i++, s = t)
+ {
+ t = s;
+ if (mbchar(t) != c)
+ return r;
+ }
+ while (i++ <= n)
+ {
+ switch (follow(env, rex, cont, s))
+ {
+ case BAD:
+ return BAD;
+ case CUT:
+ return CUT;
+ case BEST:
+ case GOOD:
+ return BEST;
+ }
+ if (s >= e || mbchar(s) != c)
+ break;
+ }
+ }
+ else
+ {
+ for (i = 0; i < m && s < e; i++, s = t)
+ {
+ t = s;
+ if (towupper(mbchar(t)) != c)
+ return r;
+ }
+ while (i++ <= n)
+ {
+ switch (follow(env, rex, cont, s))
+ {
+ case BAD:
+ return BAD;
+ case CUT:
+ return CUT;
+ case BEST:
+ case GOOD:
+ return BEST;
+ }
+ if (s >= e || towupper(mbchar(s)) != c)
+ break;
+ }
+ }
+ }
+ }
+ return r;
+ case REX_REP:
+ if (env->stack && pospush(env, rex, s, BEG_REP))
+ return BAD;
+ r = parserep(env, rex, cont, s, 0);
+ if (env->stack)
+ pospop(env);
+ return r;
+ case REX_REP_CATCH:
+ DEBUG_TEST(0x0020,(sfprintf(sfstdout, "AHA#%04d 0x%04x %s n %d len %d s `%-.*s'\n", __LINE__, debug_flag, rexname(rex), rex->re.rep_catch.n, s - rex->re.rep_catch.beg, env->end - s, s)),(0));
+ if (env->stack && pospush(env, rex, s, END_ANY))
+ return BAD;
+ if (s == rex->re.rep_catch.beg && rex->re.rep_catch.n > rex->re.rep_catch.ref->lo)
+ {
+ /*
+ * optional empty iteration
+ */
+
+DEBUG_TEST(0x0002,(sfprintf(sfstdout, "AHA#%04d %p re.group.back=%d re.group.expr.rex=%s\n", __LINE__, rex->re.rep_catch.ref->re.group.expr.rex, rex->re.rep_catch.ref->re.group.expr.rex->re.group.back, rexname(rex->re.rep_catch.ref->re.group.expr.rex))),(0));
+ if (!env->stack || s != rex->re.rep_catch.ref->re.rep_catch.beg && !rex->re.rep_catch.ref->re.group.expr.rex->re.group.back)
+ r = NONE;
+ else if (pospush(env, rex, s, END_ANY))
+ r = BAD;
+ else
+ {
+ r = follow(env, rex, rex->re.rep_catch.cont, s);
+ pospop(env);
+ }
+ }
+ else
+ r = parserep(env, rex->re.rep_catch.ref, rex->re.rep_catch.cont, s, rex->re.rep_catch.n);
+ if (env->stack)
+ pospop(env);
+ return r;
+ case REX_STRING:
+DEBUG_TEST(0x0200,(sfprintf(sfstdout,"AHA#%04d 0x%04x parse %s \"%-.*s\" `%-.*s'\n", __LINE__, debug_flag, rexname(rex), rex->re.string.size, rex->re.string.base, env->end - s, s)),(0));
+ if (rex->re.string.size > (env->end - s))
+ return NONE;
+ t = rex->re.string.base;
+ e = t + rex->re.string.size;
+ if (!(p = rex->map))
+ {
+ while (t < e)
+ if (*s++ != *t++)
+ return NONE;
+ }
+ else if (!mbwide())
+ {
+ while (t < e)
+ if (p[*s++] != *t++)
+ return NONE;
+ }
+ else
+ {
+ while (t < e)
+ {
+ c = mbchar(s);
+ d = mbchar(t);
+ if (towupper(c) != d)
+ return NONE;
+ }
+ }
+ break;
+ case REX_TRIE:
+ if (((s + rex->re.trie.min) > env->end) || !(x = rex->re.trie.root[rex->map ? rex->map[*s] : *s]))
+ return NONE;
+ return parsetrie(env, x, rex, cont, s);
+ case REX_EXEC:
+ u = 0;
+ r = (*env->disc->re_execf)(env->regex, rex->re.exec.data, rex->re.exec.text, rex->re.exec.size, (const char*)s, env->end - s, &u, env->disc);
+ e = (unsigned char*)u;
+ if (e >= s && e <= env->end)
+ s = e;
+ switch (r)
+ {
+ case 0:
+ break;
+ case REG_NOMATCH:
+ return NONE;
+ default:
+ env->error = r;
+ return BAD;
+ }
+ break;
+ case REX_WBEG:
+ if (!isword(*s) || s > env->beg && isword(*(s - 1)))
+ return NONE;
+ break;
+ case REX_WEND:
+ if (isword(*s) || s > env->beg && !isword(*(s - 1)))
+ return NONE;
+ break;
+ case REX_WORD:
+ if (s > env->beg && isword(*(s - 1)) == isword(*s))
+ return NONE;
+ break;
+ case REX_WORD_NOT:
+ if (s == env->beg || isword(*(s - 1)) != isword(*s))
+ return NONE;
+ break;
+ case REX_BEG_STR:
+ if (s != env->beg)
+ return NONE;
+ break;
+ case REX_END_STR:
+ for (t = s; t < env->end && *t == '\n'; t++);
+ if (t < env->end)
+ return NONE;
+ break;
+ case REX_FIN_STR:
+ if (s < env->end)
+ return NONE;
+ break;
+ }
+ if (!(rex = rex->next))
+ {
+ if (!(rex = cont))
+ break;
+ cont = 0;
+ }
+ }
+ return GOOD;
+}
+
+#if _AST_REGEX_DEBUG
+
+static void
+listnode(Rex_t* e, int level)
+{
+ int i;
+
+ if (e)
+ {
+ do
+ {
+ for (i = 0; i < level; i++)
+ sfprintf(sfstderr, " ");
+ sfprintf(sfstderr, "%s\n", rexname(e));
+ switch (e->type)
+ {
+ case REX_ALT:
+ case REX_CONJ:
+ listnode(e->re.group.expr.binary.left, level + 1);
+ listnode(e->re.group.expr.binary.right, level + 1);
+ break;
+ case REX_GROUP:
+ case REX_GROUP_AHEAD:
+ case REX_GROUP_AHEAD_NOT:
+ case REX_GROUP_BEHIND:
+ case REX_GROUP_BEHIND_NOT:
+ case REX_GROUP_CUT:
+ case REX_NEG:
+ case REX_REP:
+ listnode(e->re.group.expr.rex, level + 1);
+ break;
+ }
+ } while (e = e->next);
+ }
+}
+
+static int
+list(Env_t* env, Rex_t* rex)
+{
+ sfprintf(sfstderr, "AHA regex hard=%d stack=%p\n", env->hard, env->stack);
+ if (rex)
+ listnode(rex, 1);
+ return 0;
+}
+
+#endif
+
+/*
+ * returning REG_BADPAT or REG_ESPACE is not explicitly
+ * countenanced by the standard
+ */
+
+int
+regnexec(const regex_t* p, const char* s, size_t len, size_t nmatch, regmatch_t* match, regflags_t flags)
+{
+ register int n;
+ register int i;
+ int j;
+ int k;
+ int m;
+ int advance;
+ Env_t* env;
+ Rex_t* e;
+
+ DEBUG_INIT();
+ DEBUG_TEST(0x0001,(sfprintf(sfstdout, "AHA#%04d 0x%04x regnexec %d 0x%08x `%-.*s'\n", __LINE__, debug_flag, nmatch, flags, len, s)),(0));
+ if (!p || !(env = p->env))
+ return REG_BADPAT;
+ if (!s)
+ return fatal(env->disc, REG_BADPAT, NiL);
+ if (len < env->min)
+ {
+ DEBUG_TEST(0x0080,(sfprintf(sfstdout, "AHA#%04d REG_NOMATCH %d %d\n", __LINE__, len, env->min)),(0));
+ return REG_NOMATCH;
+ }
+ env->regex = p;
+ env->beg = (unsigned char*)s;
+ env->end = env->beg + len;
+ stknew(stkstd, &env->stk);
+ env->flags &= ~REG_EXEC;
+ env->flags |= (flags & REG_EXEC);
+ advance = 0;
+ if (env->stack = env->hard || !(env->flags & REG_NOSUB) && nmatch)
+ {
+ n = env->nsub;
+ if (!(env->match = (regmatch_t*)stkpush(stkstd, 2 * (n + 1) * sizeof(regmatch_t))) ||
+ !env->pos && !(env->pos = vecopen(16, sizeof(Pos_t))) ||
+ !env->bestpos && !(env->bestpos = vecopen(16, sizeof(Pos_t))))
+ {
+ k = REG_ESPACE;
+ goto done;
+ }
+ env->pos->cur = env->bestpos->cur = 0;
+ env->best = &env->match[n + 1];
+ env->best[0].rm_so = 0;
+ env->best[0].rm_eo = -1;
+ for (i = 0; i <= n; i++)
+ env->match[i] = state.nomatch;
+ if (flags & REG_ADVANCE)
+ advance = 1;
+ }
+ DEBUG_TEST(0x1000,(list(env,env->rex)),(0));
+ k = REG_NOMATCH;
+ if ((e = env->rex)->type == REX_BM)
+ {
+ DEBUG_TEST(0x0080,(sfprintf(sfstdout, "AHA#%04d REX_BM\n", __LINE__)),(0));
+ if (len < e->re.bm.right)
+ {
+ DEBUG_TEST(0x0080,(sfprintf(sfstdout, "AHA#%04d REG_NOMATCH %d %d\n", __LINE__, len, e->re.bm.right)),(0));
+ goto done;
+ }
+ else if (!(flags & REG_LEFT))
+ {
+ register unsigned char* buf = (unsigned char*)s;
+ register size_t index = e->re.bm.left + e->re.bm.size;
+ register size_t mid = len - e->re.bm.right;
+ register size_t* skip = e->re.bm.skip;
+ register size_t* fail = e->re.bm.fail;
+ register Bm_mask_t** mask = e->re.bm.mask;
+ Bm_mask_t m;
+ size_t x;
+
+ DEBUG_TEST(0x0080,(sfprintf(sfstdout, "AHA#%04d REX_BM len=%d right=%d left=%d size=%d %d %d\n", __LINE__, len, e->re.bm.right, e->re.bm.left, e->re.bm.size, index, mid)),(0));
+ for (;;)
+ {
+ while ((index += skip[buf[index]]) < mid);
+ if (index < HIT)
+ {
+ DEBUG_TEST(0x0080,(sfprintf(sfstdout, "AHA#%04d REG_NOMATCH %d %d\n", __LINE__, index, HIT)),(0));
+ goto done;
+ }
+ index -= HIT;
+ m = mask[n = e->re.bm.size - 1][buf[index]];
+ do
+ {
+ if (!n--)
+ {
+ if (e->re.bm.back < 0)
+ goto possible;
+ if (advance)
+ {
+ i = index - e->re.bm.back;
+ s += i;
+ if (env->stack)
+ env->best[0].rm_so += i;
+ goto possible;
+ }
+ x = index;
+ if (index < e->re.bm.back)
+ index = 0;
+ else
+ index -= e->re.bm.back;
+ while (index <= x)
+ {
+ if ((i = parse(env, e->next, &env->done, buf + index)) != NONE)
+ {
+ if (env->stack)
+ env->best[0].rm_so = index;
+ n = env->nsub;
+ goto hit;
+ }
+ index++;
+ }
+ index += e->re.bm.size;
+ break;
+ }
+ } while (m &= mask[n][buf[--index]]);
+ if ((index += fail[n + 1]) >= len)
+ goto done;
+ }
+ }
+ possible:
+ n = env->nsub;
+ e = e->next;
+ }
+ j = env->once || (flags & REG_LEFT);
+ DEBUG_TEST(0x0080,(sfprintf(sfstdout, "AHA#%04d parse once=%d\n", __LINE__, j)),(0));
+ while ((i = parse(env, e, &env->done, (unsigned char*)s)) == NONE || advance && !env->best[0].rm_eo && !(advance = 0))
+ {
+ if (j)
+ goto done;
+ i = MBSIZE(s);
+ s += i;
+ if ((unsigned char*)s > env->end - env->min)
+ goto done;
+ if (env->stack)
+ env->best[0].rm_so += i;
+ }
+ if ((flags & REG_LEFT) && env->stack && env->best[0].rm_so)
+ goto done;
+ hit:
+ if (k = env->error)
+ goto done;
+ if (i == CUT)
+ {
+ k = env->error = REG_NOMATCH;
+ goto done;
+ }
+ if (!(env->flags & REG_NOSUB))
+ {
+ k = (env->flags & (REG_SHELL|REG_AUGMENTED)) == (REG_SHELL|REG_AUGMENTED);
+ for (i = j = m = 0; j < nmatch; i++)
+ if (!i || !k || (i & 1))
+ {
+ if (i > n)
+ match[j] = state.nomatch;
+ else
+ match[m = j] = env->best[i];
+ j++;
+ }
+ if (k)
+ {
+ while (m > 0 && match[m].rm_so == -1 && match[m].rm_eo == -1)
+ m--;
+ ((regex_t*)p)->re_nsub = m;
+ }
+ }
+ k = 0;
+ done:
+ stkold(stkstd, &env->stk);
+ env->stk.base = 0;
+ if (k > REG_NOMATCH)
+ fatal(p->env->disc, k, NiL);
+ return k;
+}
+
+void
+regfree(regex_t* p)
+{
+ Env_t* env;
+
+ if (p && (env = p->env))
+ {
+#if _REG_subcomp
+ if (env->sub)
+ {
+ regsubfree(p);
+ p->re_sub = 0;
+ }
+#endif
+ p->env = 0;
+ if (--env->refs <= 0 && !(env->disc->re_flags & REG_NOFREE))
+ {
+ drop(env->disc, env->rex);
+ if (env->pos)
+ vecclose(env->pos);
+ if (env->bestpos)
+ vecclose(env->bestpos);
+ if (env->stk.base)
+ stkold(stkstd, &env->stk);
+ alloc(env->disc, env, 0);
+ }
+ }
+}