summaryrefslogtreecommitdiff
path: root/src/lib/libast/regex
diff options
context:
space:
mode:
authorIgor Pashev <pashev.igor@gmail.com>2012-06-24 22:28:35 +0000
committerIgor Pashev <pashev.igor@gmail.com>2012-06-24 22:28:35 +0000
commit3950ffe2a485479f6561c27364d3d7df5a21d124 (patch)
tree468c6e14449d1b1e279222ec32f676b0311917d2 /src/lib/libast/regex
downloadksh-upstream.tar.gz
Imported Upstream version 93u+upstream
Diffstat (limited to 'src/lib/libast/regex')
-rw-r--r--src/lib/libast/regex/regalloc.c36
-rw-r--r--src/lib/libast/regex/regcache.c198
-rw-r--r--src/lib/libast/regex/regclass.c298
-rw-r--r--src/lib/libast/regex/regcoll.c120
-rw-r--r--src/lib/libast/regex/regcomp.c3544
-rw-r--r--src/lib/libast/regex/regdecomp.c448
-rw-r--r--src/lib/libast/regex/regerror.c95
-rw-r--r--src/lib/libast/regex/regexec.c54
-rw-r--r--src/lib/libast/regex/regfatal.c49
-rw-r--r--src/lib/libast/regex/reginit.c412
-rw-r--r--src/lib/libast/regex/reglib.h582
-rw-r--r--src/lib/libast/regex/regnexec.c2045
-rw-r--r--src/lib/libast/regex/regrecord.c34
-rw-r--r--src/lib/libast/regex/regrexec.c145
-rw-r--r--src/lib/libast/regex/regstat.c53
-rw-r--r--src/lib/libast/regex/regsub.c269
-rw-r--r--src/lib/libast/regex/regsubcomp.c377
-rw-r--r--src/lib/libast/regex/regsubexec.c196
18 files changed, 8955 insertions, 0 deletions
diff --git a/src/lib/libast/regex/regalloc.c b/src/lib/libast/regex/regalloc.c
new file mode 100644
index 0000000..03807a4
--- /dev/null
+++ b/src/lib/libast/regex/regalloc.c
@@ -0,0 +1,36 @@
+/***********************************************************************
+* *
+* This software is part of the ast package *
+* Copyright (c) 1985-2011 AT&T Intellectual Property *
+* and is licensed under the *
+* Eclipse Public License, Version 1.0 *
+* by AT&T Intellectual Property *
+* *
+* A copy of the License is available at *
+* http://www.eclipse.org/org/documents/epl-v10.html *
+* (with md5 checksum b35adb5213ca9657e911e9befb180842) *
+* *
+* Information and Software Systems Research *
+* AT&T Research *
+* Florham Park NJ *
+* *
+* Glenn Fowler <gsf@research.att.com> *
+* David Korn <dgk@research.att.com> *
+* Phong Vo <kpv@research.att.com> *
+* *
+***********************************************************************/
+#pragma prototyped
+
+/*
+ * posix regex alloc control
+ */
+
+#include "reglib.h"
+
+void
+regalloc(void* handle, void*(*resize)(void*,void*,size_t), regflags_t flags)
+{
+ state.disc.re_flags = flags;
+ state.disc.re_resizef = resize;
+ state.disc.re_resizehandle = handle;
+}
diff --git a/src/lib/libast/regex/regcache.c b/src/lib/libast/regex/regcache.c
new file mode 100644
index 0000000..a45f0e3
--- /dev/null
+++ b/src/lib/libast/regex/regcache.c
@@ -0,0 +1,198 @@
+/***********************************************************************
+* *
+* This software is part of the ast package *
+* Copyright (c) 1985-2011 AT&T Intellectual Property *
+* and is licensed under the *
+* Eclipse Public License, Version 1.0 *
+* by AT&T Intellectual Property *
+* *
+* A copy of the License is available at *
+* http://www.eclipse.org/org/documents/epl-v10.html *
+* (with md5 checksum b35adb5213ca9657e911e9befb180842) *
+* *
+* Information and Software Systems Research *
+* AT&T Research *
+* Florham Park NJ *
+* *
+* Glenn Fowler <gsf@research.att.com> *
+* David Korn <dgk@research.att.com> *
+* Phong Vo <kpv@research.att.com> *
+* *
+***********************************************************************/
+#pragma prototyped
+
+/*
+ * regcomp() regex_t cache
+ * at&t research
+ */
+
+#include <ast.h>
+#include <regex.h>
+
+#define CACHE 8 /* default # cached re's */
+#define ROUND 64 /* pattern buffer size round */
+
+typedef unsigned long Key_t;
+
+typedef struct Cache_s
+{
+ char* pattern;
+ regex_t re;
+ unsigned long serial;
+ regflags_t reflags;
+ int keep;
+ int size;
+} Cache_t;
+
+typedef struct State_s
+{
+ unsigned int size;
+ unsigned long serial;
+ char* locale;
+ Cache_t** cache;
+} State_t;
+
+static State_t matchstate;
+
+/*
+ * flush the cache
+ */
+
+static void
+flushcache(void)
+{
+ register int i;
+
+ for (i = matchstate.size; i--;)
+ if (matchstate.cache[i] && matchstate.cache[i]->keep)
+ {
+ matchstate.cache[i]->keep = 0;
+ regfree(&matchstate.cache[i]->re);
+ }
+}
+
+/*
+ * return regcomp() compiled re for pattern and reflags
+ */
+
+regex_t*
+regcache(const char* pattern, regflags_t reflags, int* status)
+{
+ register Cache_t* cp;
+ register int i;
+ char* s;
+ int empty;
+ int unused;
+ int old;
+ Key_t key;
+
+ /*
+ * 0 pattern flushes the cache and reflags>0 extends cache
+ */
+
+ if (!pattern)
+ {
+ flushcache();
+ i = 0;
+ if (reflags > matchstate.size)
+ {
+ if (matchstate.cache = newof(matchstate.cache, Cache_t*, reflags, 0))
+ matchstate.size = reflags;
+ else
+ {
+ matchstate.size = 0;
+ i = 1;
+ }
+ }
+ if (status)
+ *status = i;
+ return 0;
+ }
+ if (!matchstate.cache)
+ {
+ if (!(matchstate.cache = newof(0, Cache_t*, CACHE, 0)))
+ return 0;
+ matchstate.size = CACHE;
+ }
+
+ /*
+ * flush the cache if the locale changed
+ * the ast setlocale() intercept maintains
+ * persistent setlocale() return values
+ */
+
+ if ((s = setlocale(LC_CTYPE, NiL)) != matchstate.locale)
+ {
+ matchstate.locale = s;
+ flushcache();
+ }
+
+ /*
+ * check if the pattern is in the cache
+ */
+
+ for (i = 0; i < sizeof(key) && pattern[i]; i++)
+ ((char*)&key)[i] = pattern[i];
+ for (; i < sizeof(key); i++)
+ ((char*)&key)[i] = 0;
+ empty = unused = -1;
+ old = 0;
+ for (i = matchstate.size; i--;)
+ if (!matchstate.cache[i])
+ empty = i;
+ else if (!matchstate.cache[i]->keep)
+ unused = i;
+ else if (*(Key_t*)matchstate.cache[i]->pattern == key && !strcmp(matchstate.cache[i]->pattern, pattern) && matchstate.cache[i]->reflags == reflags)
+ break;
+ else if (!matchstate.cache[old] || matchstate.cache[old]->serial > matchstate.cache[i]->serial)
+ old = i;
+ if (i < 0)
+ {
+ if (unused < 0)
+ {
+ if (empty < 0)
+ unused = old;
+ else
+ unused = empty;
+ }
+ if (!(cp = matchstate.cache[unused]) && !(cp = matchstate.cache[unused] = newof(0, Cache_t, 1, 0)))
+ {
+ if (status)
+ *status = REG_ESPACE;
+ return 0;
+ }
+ if (cp->keep)
+ {
+ cp->keep = 0;
+ regfree(&cp->re);
+ }
+ if ((i = strlen(pattern) + 1) > cp->size)
+ {
+ cp->size = roundof(i, ROUND);
+ if (!(cp->pattern = newof(cp->pattern, char, cp->size, 0)))
+ {
+ if (status)
+ *status = REG_ESPACE;
+ return 0;
+ }
+ }
+ strcpy(cp->pattern, pattern);
+ while (++i < sizeof(Key_t))
+ cp->pattern[i] = 0;
+ pattern = (const char*)cp->pattern;
+ if (i = regcomp(&cp->re, pattern, reflags))
+ {
+ if (status)
+ *status = i;
+ return 0;
+ }
+ cp->keep = 1;
+ cp->reflags = reflags;
+ }
+ else
+ cp = matchstate.cache[i];
+ cp->serial = ++matchstate.serial;
+ if (status)
+ *status = 0;
+ return &cp->re;
+}
diff --git a/src/lib/libast/regex/regclass.c b/src/lib/libast/regex/regclass.c
new file mode 100644
index 0000000..87ada74
--- /dev/null
+++ b/src/lib/libast/regex/regclass.c
@@ -0,0 +1,298 @@
+/***********************************************************************
+* *
+* This software is part of the ast package *
+* Copyright (c) 1985-2011 AT&T Intellectual Property *
+* and is licensed under the *
+* Eclipse Public License, Version 1.0 *
+* by AT&T Intellectual Property *
+* *
+* A copy of the License is available at *
+* http://www.eclipse.org/org/documents/epl-v10.html *
+* (with md5 checksum b35adb5213ca9657e911e9befb180842) *
+* *
+* Information and Software Systems Research *
+* AT&T Research *
+* Florham Park NJ *
+* *
+* Glenn Fowler <gsf@research.att.com> *
+* David Korn <dgk@research.att.com> *
+* Phong Vo <kpv@research.att.com> *
+* *
+***********************************************************************/
+#pragma prototyped
+/*
+ * RE character class support
+ */
+
+#include "reglib.h"
+
+struct Ctype_s; typedef struct Ctype_s Ctype_t;
+
+struct Ctype_s
+{
+ const char* name;
+ size_t size;
+ regclass_t ctype;
+ Ctype_t* next;
+#if _lib_wctype
+ wctype_t wtype;
+#endif
+};
+
+static Ctype_t* ctypes;
+
+/*
+ * this stuff gets around posix failure to define isblank,
+ * and the fact that ctype functions are macros
+ * and any local extensions that may not even have functions or macros
+ */
+
+#if _need_iswblank
+
+int
+_reg_iswblank(wint_t wc)
+{
+ static int initialized;
+ static wctype_t wt;
+
+ if (!initialized)
+ {
+ initialized = 1;
+ wt = wctype("blank");
+ }
+ return iswctype(wc, wt);
+}
+
+#endif
+
+static int Isalnum(int c) { return iswalnum(c); }
+static int Isalpha(int c) { return iswalpha(c); }
+static int Isblank(int c) { return iswblank(c); }
+static int Iscntrl(int c) { return iswcntrl(c); }
+static int Isdigit(int c) { return iswdigit(c); }
+static int Notdigit(int c) { return !iswdigit(c); }
+static int Isgraph(int c) { return iswgraph(c); }
+static int Islower(int c) { return iswlower(c); }
+static int Isprint(int c) { return iswprint(c); }
+static int Ispunct(int c) { return iswpunct(c); }
+static int Isspace(int c) { return iswspace(c); }
+static int Notspace(int c) { return !iswspace(c); }
+static int Isupper(int c) { return iswupper(c); }
+static int Isword(int c) { return iswalnum(c) || c == '_'; }
+static int Notword(int c) { return !iswalnum(c) && c != '_'; }
+static int Isxdigit(int c) { return iswxdigit(c);}
+
+#if _lib_wctype
+
+static int Is_wc_1(int);
+static int Is_wc_2(int);
+static int Is_wc_3(int);
+static int Is_wc_4(int);
+static int Is_wc_5(int);
+static int Is_wc_6(int);
+static int Is_wc_7(int);
+static int Is_wc_8(int);
+static int Is_wc_9(int);
+static int Is_wc_10(int);
+static int Is_wc_11(int);
+static int Is_wc_12(int);
+static int Is_wc_13(int);
+static int Is_wc_14(int);
+static int Is_wc_15(int);
+static int Is_wc_16(int);
+
+#endif
+
+#define SZ(s) s,(sizeof(s)-1)
+
+static Ctype_t ctype[] =
+{
+ { SZ("alnum"), Isalnum },
+ { SZ("alpha"), Isalpha },
+ { SZ("blank"), Isblank },
+ { SZ("cntrl"), Iscntrl },
+ { SZ("digit"), Isdigit },
+ { SZ("graph"), Isgraph },
+ { SZ("lower"), Islower },
+ { SZ("print"), Isprint },
+ { SZ("punct"), Ispunct },
+ { SZ("space"), Isspace },
+ { SZ("upper"), Isupper },
+ { SZ("word"), Isword },
+ { SZ("xdigit"),Isxdigit},
+
+#define CTYPES 13
+
+#if _lib_wctype
+ { 0, 0, Is_wc_1 },
+ { 0, 0, Is_wc_2 },
+ { 0, 0, Is_wc_3 },
+ { 0, 0, Is_wc_4 },
+ { 0, 0, Is_wc_5 },
+ { 0, 0, Is_wc_6 },
+ { 0, 0, Is_wc_7 },
+ { 0, 0, Is_wc_8 },
+ { 0, 0, Is_wc_9 },
+ { 0, 0, Is_wc_10 },
+ { 0, 0, Is_wc_11 },
+ { 0, 0, Is_wc_12 },
+ { 0, 0, Is_wc_13 },
+ { 0, 0, Is_wc_14 },
+ { 0, 0, Is_wc_15 },
+ { 0, 0, Is_wc_16 },
+
+#define WTYPES 16
+
+#else
+
+#define WTYPES 0
+
+#endif
+};
+
+#if _lib_wctype
+
+static int Is_wc_1(int c) { return iswctype(c, ctype[CTYPES+0].wtype); }
+static int Is_wc_2(int c) { return iswctype(c, ctype[CTYPES+1].wtype); }
+static int Is_wc_3(int c) { return iswctype(c, ctype[CTYPES+2].wtype); }
+static int Is_wc_4(int c) { return iswctype(c, ctype[CTYPES+3].wtype); }
+static int Is_wc_5(int c) { return iswctype(c, ctype[CTYPES+4].wtype); }
+static int Is_wc_6(int c) { return iswctype(c, ctype[CTYPES+5].wtype); }
+static int Is_wc_7(int c) { return iswctype(c, ctype[CTYPES+6].wtype); }
+static int Is_wc_8(int c) { return iswctype(c, ctype[CTYPES+7].wtype); }
+static int Is_wc_9(int c) { return iswctype(c, ctype[CTYPES+8].wtype); }
+static int Is_wc_10(int c) { return iswctype(c, ctype[CTYPES+9].wtype); }
+static int Is_wc_11(int c) { return iswctype(c, ctype[CTYPES+10].wtype); }
+static int Is_wc_12(int c) { return iswctype(c, ctype[CTYPES+11].wtype); }
+static int Is_wc_13(int c) { return iswctype(c, ctype[CTYPES+12].wtype); }
+static int Is_wc_14(int c) { return iswctype(c, ctype[CTYPES+13].wtype); }
+static int Is_wc_15(int c) { return iswctype(c, ctype[CTYPES+14].wtype); }
+static int Is_wc_16(int c) { return iswctype(c, ctype[CTYPES+15].wtype); }
+
+#endif
+
+/*
+ * return pointer to ctype function for :class:] in s
+ * s points to the first char after the initial [
+ * dynamic wctype classes are locale-specific
+ * dynamic entry locale is punned in Ctype_t.next
+ * the search does a lazy (one entry at a time) flush on locale mismatch
+ * if e!=0 it points to next char in s
+ * 0 returned on error
+ */
+
+regclass_t
+regclass(const char* s, char** e)
+{
+ register Ctype_t* cp;
+ register int c;
+ register size_t n;
+ register const char* t;
+ Ctype_t* lc;
+ Ctype_t* xp;
+ Ctype_t* zp;
+
+ if (!(c = *s++))
+ return 0;
+ for (t = s; *t && (*t != c || *(t + 1) != ']'); t++);
+ if (*t != c || !(n = t - s))
+ return 0;
+ for (cp = ctypes; cp; cp = cp->next)
+ if (n == cp->size && strneq(s, cp->name, n))
+ goto found;
+ xp = zp = 0;
+ lc = (Ctype_t*)setlocale(LC_CTYPE, NiL);
+ for (cp = ctype; cp < &ctype[elementsof(ctype)]; cp++)
+ {
+#if _lib_wctype
+ if (!zp)
+ {
+ if (!cp->size)
+ zp = cp;
+ else if (!xp && cp->next && cp->next != lc)
+ xp = cp;
+ }
+#endif
+ if (n == cp->size && strneq(s, cp->name, n) && (!cp->next || cp->next == lc))
+ goto found;
+ }
+#if _lib_wctype
+ if (!(cp = zp))
+ {
+ if (!(cp = xp))
+ return 0;
+ cp->size = 0;
+ if (!streq(cp->name, s))
+ {
+ free((char*)cp->name);
+ cp->name = 0;
+ }
+ }
+ if (!cp->name)
+ {
+ if (!(cp->name = (const char*)memdup(s, n + 1)))
+ return 0;
+ *((char*)cp->name + n) = 0;
+ }
+ /* mvs.390 needs the (char*) cast -- barf */
+ if (!(cp->wtype = wctype((char*)cp->name)))
+ {
+ free((char*)cp->name);
+ cp->name = 0;
+ return 0;
+ }
+ cp->size = n;
+ cp->next = lc;
+#endif
+ found:
+ if (e)
+ *e = (char*)t + 2;
+ return cp->ctype;
+}
+
+/*
+ * associate the ctype function fun with name
+ */
+
+int
+regaddclass(const char* name, regclass_t fun)
+{
+ register Ctype_t* cp;
+ register Ctype_t* np;
+ register size_t n;
+
+ n = strlen(name);
+ for (cp = ctypes; cp; cp = cp->next)
+ if (cp->size == n && strneq(name, cp->name, n))
+ {
+ cp->ctype = fun;
+ return 0;
+ }
+ if (!(np = newof(0, Ctype_t, 1, n + 1)))
+ return REG_ESPACE;
+ np->size = n;
+ np->name = strcpy((char*)(np + 1), name);
+ np->ctype = fun;
+ np->next = ctypes;
+ ctypes = np;
+ return 0;
+}
+
+/*
+ * return pointer to ctype function for token
+ */
+
+regclass_t
+classfun(int type)
+{
+ switch (type)
+ {
+ case T_ALNUM: return Isword;
+ case T_ALNUM_NOT: return Notword;
+ case T_DIGIT: return Isdigit;
+ case T_DIGIT_NOT: return Notdigit;
+ case T_SPACE: return Isspace;
+ case T_SPACE_NOT: return Notspace;
+ }
+ return 0;
+}
diff --git a/src/lib/libast/regex/regcoll.c b/src/lib/libast/regex/regcoll.c
new file mode 100644
index 0000000..64dc7a8
--- /dev/null
+++ b/src/lib/libast/regex/regcoll.c
@@ -0,0 +1,120 @@
+/***********************************************************************
+* *
+* This software is part of the ast package *
+* Copyright (c) 1985-2011 AT&T Intellectual Property *
+* and is licensed under the *
+* Eclipse Public License, Version 1.0 *
+* by AT&T Intellectual Property *
+* *
+* A copy of the License is available at *
+* http://www.eclipse.org/org/documents/epl-v10.html *
+* (with md5 checksum b35adb5213ca9657e911e9befb180842) *
+* *
+* Information and Software Systems Research *
+* AT&T Research *
+* Florham Park NJ *
+* *
+* Glenn Fowler <gsf@research.att.com> *
+* David Korn <dgk@research.att.com> *
+* Phong Vo <kpv@research.att.com> *
+* *
+***********************************************************************/
+#pragma prototyped
+/*
+ * regex collation symbol support
+ */
+
+#include "reglib.h"
+
+/*
+ * return the collating symbol delimited by [c c], where c is either '=' or '.'
+ * s points to the first char after the initial [
+ * if e!=0 it is set to point to the next char in s on return
+ *
+ * the collating symbol is converted to multibyte in <buf,size>
+ * the return value is:
+ * -1 syntax error / invalid collating element
+ * >=0 size with 0-terminated mb character (*wc != 0)
+ * or collating element (*wc == 0) in buf
+ */
+
+int
+regcollate(register const char* s, char** e, char* buf, size_t size, wchar_t* wc)
+{
+ register int c;
+ register char* b;
+ register char* x;
+ const char* t;
+ int i;
+ int r;
+ int term;
+ wchar_t w;
+ char xfm[256];
+ char tmp[sizeof(xfm)];
+
+ if (size < 2 || (term = *s) != '.' && term != '=' || !*++s || *s == term && *(s + 1) == ']')
+ goto nope;
+ t = s;
+ w = mbchar(s);
+ if ((r = (s - t)) > 1)
+ {
+ if (*s++ != term || *s++ != ']')
+ goto oops;
+ goto done;
+ }
+ if (*s == term && *(s + 1) == ']')
+ {
+ s += 2;
+ goto done;
+ }
+ b = buf;
+ x = buf + size - 2;
+ s = t;
+ for (;;)
+ {
+ if (!(c = *s++))
+ goto oops;
+ if (c == term)
+ {
+ if (!(c = *s++))
+ goto oops;
+ if (c != term)
+ {
+ if (c != ']')
+ goto oops;
+ break;
+ }
+ }
+ if (b < x)
+ *b++ = c;
+ }
+ r = s - t - 2;
+ w = 0;
+ if (b >= x)
+ goto done;
+ *b = 0;
+ for (i = 0; i < r && i < sizeof(tmp) - 1; i++)
+ tmp[i] = '0';
+ tmp[i] = 0;
+ if (mbxfrm(xfm, buf, sizeof(xfm)) >= mbxfrm(xfm, tmp, sizeof(xfm)))
+ goto nope;
+ t = (const char*)buf;
+ done:
+ if (r <= size)
+ {
+ memcpy(buf, t, r);
+ if (r < size)
+ buf[r] = 0;
+ }
+ if (wc)
+ *wc = w;
+ if (e)
+ *e = (char*)s;
+ return r;
+ oops:
+ s--;
+ nope:
+ if (e)
+ *e = (char*)s;
+ return -1;
+}
diff --git a/src/lib/libast/regex/regcomp.c b/src/lib/libast/regex/regcomp.c
new file mode 100644
index 0000000..416d453
--- /dev/null
+++ b/src/lib/libast/regex/regcomp.c
@@ -0,0 +1,3544 @@
+/***********************************************************************
+* *
+* This software is part of the ast package *
+* Copyright (c) 1985-2011 AT&T Intellectual Property *
+* and is licensed under the *
+* Eclipse Public License, Version 1.0 *
+* by AT&T Intellectual Property *
+* *
+* A copy of the License is available at *
+* http://www.eclipse.org/org/documents/epl-v10.html *
+* (with md5 checksum b35adb5213ca9657e911e9befb180842) *
+* *
+* Information and Software Systems Research *
+* AT&T Research *
+* Florham Park NJ *
+* *
+* Glenn Fowler <gsf@research.att.com> *
+* David Korn <dgk@research.att.com> *
+* Phong Vo <kpv@research.att.com> *
+* *
+***********************************************************************/
+#pragma prototyped
+
+/*
+ * posix regex compiler
+ */
+
+#include "reglib.h"
+
+#if _PACKAGE_ast
+#include "lclib.h"
+#endif
+
+#define serialize re_serialize /* hp.ia64 <unistd.h>! */
+
+#define C_ESC (-1)
+#define C_MB (-2)
+
+#if _AST_REGEX_DEBUG
+
+#define DEBUG_TEST(f,y,n) ((debug&(debug_flag=f))?(y):(n))
+#define DEBUG_CODE(f,y,n) do if(debug&(f)){y}else{n} while(0)
+#define DEBUG_INIT() do { char* t; if (!debug) { debug = 0x80000000; if (t = getenv("_AST_regex_comp_debug")) debug |= strtoul(t, NiL, 0); } } while (0)
+
+static unsigned long debug;
+static unsigned long debug_flag;
+
+#else
+
+#define DEBUG_INIT()
+#define DEBUG_TEST(f,y,n) (n)
+#define DEBUG_CODE(f,y,n) do {n} while(0)
+
+#endif
+
+#if _PACKAGE_ast
+
+typedef struct Cchr_s
+{
+ Dtlink_t lnk;
+ unsigned char nam[2];
+ Ckey_t key;
+} Cchr_t;
+
+#endif
+
+#define eat(p) do{if ((p)->token.push)(p)->token.push=0;else (p)->cursor+=(p)->token.len;}while (0)
+
+/*
+ * determine whether greedy matching will work, i.e. produce
+ * the best match first. such expressions are "easy", and
+ * need no backtracking once a complete match is found.
+ * if an expression has backreferences or alts it's hard
+ * else if it has only one closure it's easy
+ * else if all closures are simple (i.e. one-character) it's easy
+ * else it's hard.
+ */
+
+typedef struct Stats_s
+{
+ unsigned long l; /* min length to left of x */
+ unsigned long k; /* min length to left of y */
+ unsigned long m; /* min length */
+ unsigned long n; /* max length */
+ unsigned short a; /* number of alternations */
+ unsigned short b; /* number of backrefs */
+ unsigned short c; /* number of closures */
+ unsigned short e; /* $ */
+ unsigned short i; /* number of negations */
+ unsigned short p; /* number of named subexpressions */
+ unsigned short s; /* number of simple closures */
+ unsigned short t; /* number of tries */
+ unsigned short u; /* number of unnamed subexpressions */
+ Rex_t* x; /* max length REX_STRING */
+ Rex_t* y; /* max length REX_TRIE */
+} Stats_t;
+
+typedef struct Token_s
+{
+ unsigned long min;
+ unsigned long max;
+ short lex;
+ short len;
+ short esc;
+ short att;
+ short push;
+} Token_t;
+
+typedef struct Cenv_s
+{
+ int delimiter; /* pattern delimiter */
+ int error; /* last error */
+ int explicit; /* explicit match on this char */
+ int mappeddot; /* inverse mapped '.' */
+ int mappednewline; /* inverse mapped '\n' */
+ int mappedslash; /* inverse mapped '/' */
+ regflags_t flags; /* flags arg to regcomp */
+ int type; /* BRE,ERE,ARE,SRE,KRE */
+ unsigned char* cursor; /* curent point in re */
+ unsigned char* pattern; /* the original pattern */
+ unsigned char* literal; /* literal restart pattern */
+ int parno; /* number of last open paren */
+ int parnest; /* paren nest count */
+ int posixkludge; /* to make * nonspecial */
+ int regexp; /* <regexp.h> compatibility */
+ Token_t token; /* token lookahead */
+ Stats_t stats; /* RE statistics */
+ int terminator; /* pattern terminator */
+ Rex_t* paren[2*(BACK_REF_MAX+2)];
+ /* paren[i]!=0 if \i defined */
+ regex_t* regex; /* user handle */
+ regdisc_t* disc; /* user discipline */
+ unsigned char* map; /* external to native ccode map */
+ unsigned char* MAP; /* fold and/or map */
+} Cenv_t;
+
+/*
+ * allocate a new Rex_t node
+ */
+
+#if _BLD_DEBUG
+#define node(a,b,c,d,e) node(a,b,c,d,e, const char* file, unsigned int line)
+#endif
+
+static Rex_t*
+node(Cenv_t* env, int type, int lo, int hi, size_t extra)
+{
+ register Rex_t* e;
+
+ DEBUG_TEST(0x0800,(sfprintf(sfstdout, "%s:%d node(%d,%d,%d,%u)\n", file, line, type, lo, hi, sizeof(Rex_t) + extra)),(0));
+ if (e = (Rex_t*)alloc(env->disc, 0, sizeof(Rex_t) + extra))
+ {
+ memset(e, 0, sizeof(Rex_t) + extra);
+ e->type = type;
+ e->marked = 0;
+ e->lo = lo;
+ e->hi = hi;
+ e->flags = env->flags;
+ e->map = (e->flags & REG_ICASE) ? env->MAP : env->map;
+ e->explicit = env->explicit;
+ if (extra)
+ e->re.data = (char*)e + sizeof(Rex_t);
+ }
+ return e;
+}
+
+#if _BLD_DEBUG
+#undef node
+#define node(a,b,c,d,e) node(a,b,c,d,e,__FILE__,__LINE__)
+#endif
+
+/*
+ * free a Trie_node_t node
+ */
+
+static void
+triedrop(regdisc_t* disc, Trie_node_t* e)
+{
+ if (e)
+ {
+ triedrop(disc, e->son);
+ triedrop(disc, e->sib);
+ alloc(disc, e, 0);
+ }
+}
+
+/*
+ * free a Rex_t node
+ */
+
+void
+drop(regdisc_t* disc, Rex_t* e)
+{
+ int i;
+ Rex_t* x;
+
+ if (e && !(disc->re_flags & REG_NOFREE))
+ do
+ {
+ switch (e->type)
+ {
+ case REX_ALT:
+ case REX_CONJ:
+ drop(disc, e->re.group.expr.binary.left);
+ drop(disc, e->re.group.expr.binary.right);
+ break;
+ case REX_GROUP:
+ case REX_GROUP_AHEAD:
+ case REX_GROUP_AHEAD_NOT:
+ case REX_GROUP_BEHIND:
+ case REX_GROUP_BEHIND_NOT:
+ case REX_GROUP_CUT:
+ case REX_NEG:
+ case REX_REP:
+ drop(disc, e->re.group.expr.rex);
+ break;
+ case REX_TRIE:
+ for (i = 0; i <= UCHAR_MAX; i++)
+ triedrop(disc, e->re.trie.root[i]);
+ break;
+ }
+ x = e->next;
+ alloc(disc, e, 0);
+ } while (e = x);
+}
+
+/*
+ * mark e and descendants minimal
+ */
+
+static void
+mark(register Rex_t* e, int set)
+{
+ if (e && !e->marked)
+ do
+ {
+ e->marked = 1;
+ if (set)
+ e->flags |= REG_MINIMAL;
+ else
+ e->flags &= ~REG_MINIMAL;
+ switch (e->type)
+ {
+ case REX_ALT:
+ case REX_CONJ:
+ case REX_GROUP_COND:
+ if (e->re.group.expr.binary.left)
+ mark(e->re.group.expr.binary.left, set);
+ if (e->re.group.expr.binary.right)
+ mark(e->re.group.expr.binary.right, set);
+ break;
+ case REX_GROUP:
+ case REX_GROUP_AHEAD:
+ case REX_GROUP_AHEAD_NOT:
+ case REX_GROUP_BEHIND:
+ case REX_GROUP_BEHIND_NOT:
+ case REX_GROUP_CUT:
+ case REX_NEG:
+ case REX_REP:
+ case REX_TRIE:
+ mark(e->re.group.expr.rex, set);
+ break;
+ }
+ } while (e = e->next);
+}
+
+/*
+ * assign subexpression numbers by a preorder tree walk
+ */
+
+static int
+serialize(Cenv_t* env, Rex_t* e, int n)
+{
+ do
+ {
+ e->serial = n++;
+ switch (e->type)
+ {
+ case REX_ALT:
+ case REX_GROUP_COND:
+ if (e->re.group.expr.binary.left)
+ n = serialize(env, e->re.group.expr.binary.left, n);
+ e->re.group.expr.binary.serial = n++;
+ if (e->re.group.expr.binary.right)
+ n = serialize(env, e->re.group.expr.binary.right, n);
+ break;
+ case REX_CONJ:
+ n = serialize(env, e->re.group.expr.binary.left, n);
+ n = serialize(env, e->re.group.expr.binary.right, n);
+ break;
+ case REX_GROUP:
+ case REX_GROUP_AHEAD:
+ case REX_GROUP_AHEAD_NOT:
+ case REX_GROUP_BEHIND:
+ case REX_GROUP_BEHIND_NOT:
+ case REX_GROUP_CUT:
+ case REX_NEG:
+ case REX_REP:
+ n = serialize(env, e->re.group.expr.rex, n);
+ break;
+ }
+ } while (e = e->next);
+ return n;
+}
+
+/*
+ * catenate e and f into a sequence, collapsing them if possible
+ */
+
+static Rex_t*
+cat(Cenv_t* env, Rex_t* e, Rex_t* f)
+{
+ Rex_t* g;
+
+ if (!f)
+ {
+ drop(env->disc, e);
+ return 0;
+ }
+ if (e->type == REX_NULL)
+ {
+ drop(env->disc, e);
+ return f;
+ }
+ if (f->type == REX_NULL)
+ {
+ g = f->next;
+ f->next = 0;
+ drop(env->disc, f);
+ f = g;
+ }
+ else if (e->type == REX_DOT && f->type == REX_DOT)
+ {
+ unsigned int m = e->lo + f->lo;
+ unsigned int n = e->hi + f->hi;
+
+ if (m <= RE_DUP_MAX)
+ {
+ if (e->hi > RE_DUP_MAX || f->hi > RE_DUP_MAX)
+ {
+ n = RE_DUP_INF;
+ goto combine;
+ }
+ else if (n <= RE_DUP_MAX)
+ {
+ combine:
+ e->lo = m;
+ e->hi = n;
+ g = f->next;
+ f->next = 0;
+ drop(env->disc, f);
+ f = g;
+ }
+ }
+ }
+ e->next = f;
+ return e;
+}
+
+/*
+ * collect re statistics
+ */
+
+static int
+stats(register Cenv_t* env, register Rex_t* e)
+{
+ register unsigned long n;
+ register unsigned long m;
+ unsigned long cm;
+ unsigned long nm;
+ unsigned long cn;
+ unsigned long nn;
+ unsigned long l;
+ unsigned long k;
+ unsigned long t;
+ Rex_t* q;
+ Rex_t* x;
+ Rex_t* y;
+ unsigned char c;
+ unsigned char b;
+
+ do
+ {
+ switch (e->type)
+ {
+ case REX_ALT:
+ x = env->stats.x;
+ l = env->stats.l;
+ y = env->stats.y;
+ k = env->stats.k;
+ t = env->stats.t;
+ if (++env->stats.a <= 0)
+ return 1;
+ cm = env->stats.m;
+ env->stats.m = 0;
+ cn = env->stats.n;
+ env->stats.n = 0;
+ if (stats(env, e->re.group.expr.binary.left))
+ return 1;
+ m = env->stats.m;
+ env->stats.m = 0;
+ n = env->stats.n;
+ env->stats.n = 0;
+ if (e->re.group.expr.binary.right && stats(env, e->re.group.expr.binary.right))
+ return 1;
+ if (env->stats.m > m)
+ env->stats.m = m;
+ else
+ m = env->stats.m;
+ if ((env->stats.m += cm) < m)
+ return 1;
+ if (env->stats.n < n)
+ env->stats.n = n;
+ else
+ n = env->stats.n;
+ if ((env->stats.n += cn) < n)
+ return 1;
+ env->stats.x = x;
+ env->stats.l = l;
+ env->stats.y = y;
+ env->stats.k = k;
+ env->stats.t = t;
+ break;
+ case REX_BACK:
+ if (++env->stats.b <= 0)
+ return 1;
+ break;
+ case REX_CLASS:
+ case REX_COLL_CLASS:
+ case REX_DOT:
+ case REX_ONECHAR:
+ n = env->stats.m;
+ if ((env->stats.m += e->lo) < n)
+ return 1;
+ if (e->hi != RE_DUP_INF)
+ {
+ n = env->stats.n;
+ if ((env->stats.n += e->hi) < n)
+ return 1;
+ }
+ if (e->lo != e->hi)
+ {
+ if (++env->stats.c <= 0)
+ return 1;
+ if (++env->stats.s <= 0)
+ return 1;
+ }
+ break;
+ case REX_CONJ:
+ cm = env->stats.m;
+ env->stats.m = 0;
+ cn = env->stats.n;
+ env->stats.n = 0;
+ if (stats(env, e->re.group.expr.binary.left))
+ return 1;
+ nm = env->stats.m;
+ env->stats.m = 0;
+ nn = env->stats.n;
+ env->stats.n = 0;
+ if (stats(env, e->re.group.expr.binary.right))
+ return 1;
+ if (env->stats.m < nm)
+ env->stats.m = nm;
+ else
+ nm = env->stats.m;
+ if ((env->stats.m += cm) < nm)
+ return 1;
+ if (env->stats.n < nn)
+ env->stats.n = nn;
+ else
+ nn = env->stats.n;
+ if ((env->stats.n += cn) < nn)
+ return 1;
+ break;
+ case REX_END:
+ env->stats.e = 1;
+ break;
+ case REX_GROUP:
+ if (e->re.group.number && ++env->stats.p <= 0 || !e->re.group.number && ++env->stats.u <= 0)
+ return 1;
+ if (stats(env, e->re.group.expr.rex))
+ return 1;
+ break;
+ case REX_GROUP_AHEAD:
+ case REX_GROUP_AHEAD_NOT:
+ case REX_GROUP_BEHIND:
+ case REX_GROUP_BEHIND_NOT:
+ m = env->stats.m;
+ n = env->stats.n;
+ x = env->stats.x;
+ y = env->stats.y;
+ if (stats(env, e->re.group.expr.rex))
+ return 1;
+ env->stats.m = m;
+ env->stats.n = n;
+ env->stats.x = x;
+ env->stats.y = y;
+ switch (e->type)
+ {
+ case REX_GROUP_AHEAD:
+ case REX_GROUP_BEHIND:
+ if (++env->stats.u <= 0)
+ return 1;
+ break;
+ }
+ break;
+ case REX_GROUP_COND:
+ if (++env->stats.u <= 0)
+ return 1;
+ m = env->stats.m;
+ n = env->stats.n;
+ x = env->stats.x;
+ y = env->stats.y;
+ if (e->re.group.size > 0 && ++env->stats.b <= 0)
+ return 1;
+ if (e->re.group.expr.binary.left && stats(env, e->re.group.expr.binary.left))
+ return 1;
+ if (q = e->re.group.expr.binary.right)
+ {
+ if (q->re.group.expr.binary.left && stats(env, q->re.group.expr.binary.left))
+ return 1;
+ if (q->re.group.expr.binary.right && stats(env, q->re.group.expr.binary.right))
+ return 1;
+ }
+ env->stats.m = m;
+ env->stats.n = n;
+ env->stats.x = x;
+ env->stats.y = y;
+ break;
+ case REX_GROUP_CUT:
+ if (++env->stats.u <= 0)
+ return 1;
+ m = env->stats.m;
+ n = env->stats.n;
+ x = env->stats.x;
+ y = env->stats.y;
+ if (stats(env, e->re.group.expr.rex))
+ return 1;
+ env->stats.m = m;
+ env->stats.n = n;
+ env->stats.x = x;
+ env->stats.y = y;
+ break;
+ case REX_NEG:
+ env->stats.i++;
+ x = env->stats.x;
+ l = env->stats.l;
+ y = env->stats.y;
+ k = env->stats.k;
+ t = env->stats.t;
+ cm = env->stats.m;
+ env->stats.m = 0;
+ if (stats(env, e->re.group.expr.rex))
+ return 1;
+ env->stats.m = !env->stats.m;
+ if ((env->stats.m += cm) < cm)
+ return 1;
+ env->stats.x = x;
+ env->stats.l = l;
+ env->stats.y = y;
+ env->stats.k = k;
+ env->stats.t = t;
+ break;
+ case REX_REP:
+ x = env->stats.x;
+ l = env->stats.l;
+ y = env->stats.y;
+ k = env->stats.k;
+ t = env->stats.t;
+ if (++env->stats.c <= 0)
+ return 1;
+ b = env->stats.b;
+ c = env->stats.c;
+ cm = env->stats.m;
+ env->stats.m = 0;
+ if (stats(env, e->re.group.expr.rex))
+ return 1;
+ if (env->stats.m == 1 && b == env->stats.b && c == env->stats.c && ++env->stats.s <= 0)
+ return 1;
+ if (e->lo < 1)
+ {
+ env->stats.x = x;
+ env->stats.l = l;
+ env->stats.y = y;
+ env->stats.k = k;
+ env->stats.t = t;
+ env->stats.m = cm;
+ }
+ else
+ {
+ m = env->stats.m;
+ if ((env->stats.m *= e->lo) > 0 && env->stats.m < m)
+ return 1;
+ m = env->stats.m;
+ if ((env->stats.m += cm) < m)
+ return 1;
+ if (env->stats.x != x)
+ env->stats.l = cm;
+ if (env->stats.y != y)
+ env->stats.k = cm;
+ }
+ break;
+ case REX_STRING:
+ if (!e->map)
+ {
+ cm = env->stats.m;
+ if ((env->stats.m += e->re.string.size) < cm)
+ return 1;
+ cn = env->stats.n;
+ if ((env->stats.n += e->re.string.size) < cn)
+ return 1;
+ if (!env->stats.x || env->stats.x->re.string.size < e->re.string.size)
+ {
+ env->stats.x = e;
+ env->stats.l = cm;
+ }
+ }
+ break;
+ case REX_TRIE:
+ if (++env->stats.s <= 0)
+ return 1;
+ cm = env->stats.m;
+ if ((env->stats.m += e->re.trie.min) < cm)
+ return 1;
+ cn = env->stats.n;
+ if ((env->stats.n += e->re.trie.max) < cn)
+ return 1;
+ env->stats.t++;
+ if (!env->stats.y || env->stats.y->re.trie.min < e->re.trie.min)
+ {
+ env->stats.y = e;
+ env->stats.k = cm;
+ }
+ break;
+ }
+ } while (e = e->next);
+ return 0;
+}
+
+static int token(Cenv_t*);
+
+static int
+magic(register Cenv_t* env, register int c, int escaped)
+{
+ register char* sp;
+ register int n;
+ int o = c;
+ int e = env->error;
+ int l = env->token.len;
+ short* mp;
+ char* ep;
+
+ if (mp = state.magic[c])
+ {
+ c = mp[env->type+escaped];
+ if (c >= T_META)
+ {
+ sp = (char*)env->cursor + env->token.len;
+ switch (c)
+ {
+ case T_LEFT:
+ n = 0;
+ ep = sp;
+ while (*sp >= '0' && *sp <= '9')
+ {
+ if (n > (INT_MAX / 10))
+ {
+ env->error = REG_BADBR;
+ goto bad;
+ }
+ n = n * 10 + *sp++ - '0';
+ }
+ if (sp == ep)
+ {
+ if (env->type < SRE || *sp != ',')
+ {
+ env->error = *sp ? REG_BADBR : REG_EBRACE;
+ goto bad;
+ }
+ }
+ else if (n > RE_DUP_MAX)
+ {
+ env->error = REG_BADBR;
+ goto bad;
+ }
+ env->token.min = n;
+ if (*sp == ',')
+ {
+ n = 0;
+ ep = ++sp;
+ while (*sp >= '0' && *sp <= '9')
+ {
+ if (n > (INT_MAX / 10))
+ {
+ env->error = REG_BADBR;
+ goto bad;
+ }
+ n = n * 10 + *sp++ - '0';
+ }
+ if (sp == ep)
+ n = RE_DUP_INF;
+ else if (n < env->token.min)
+ {
+ env->error = REG_BADBR;
+ goto bad;
+ }
+ }
+ env->token.max = n;
+ switch (*sp)
+ {
+ case 0:
+ env->error = REG_EBRACE;
+ goto bad;
+ case '\\':
+ if (!escaped)
+ {
+ env->error = REG_BADBR;
+ goto bad;
+ }
+ sp++;
+ break;
+ default:
+ if (escaped)
+ {
+ env->error = REG_BADBR;
+ goto bad;
+ }
+ break;
+ }
+ switch (*sp++)
+ {
+ case 0:
+ env->error = REG_EBRACE;
+ goto bad;
+ case '}':
+ break;
+ default:
+ env->error = REG_BADBR;
+ goto bad;
+ }
+ env->token.len = sp - (char*)env->cursor;
+ break;
+ case T_RIGHT:
+ env->error = REG_EBRACE;
+ goto bad;
+ case T_OPEN:
+ if (env->type < SRE && *sp == '?')
+ {
+ env->token.len++;
+ env->token.lex = 0;
+ goto group;
+ }
+ break;
+ case T_ESCAPE:
+ c = chresc(sp - 2, &ep);
+ if (ep < sp)
+ goto bad;
+ env->token.len += ep - sp;
+ if (c >= T_META)
+ {
+ env->token.lex = c;
+ c = C_ESC;
+ }
+ return c;
+ case T_BACK+0:
+ case T_BACK+1:
+ case T_BACK+2:
+ case T_BACK+3:
+ case T_BACK+4:
+ case T_BACK+5:
+ case T_BACK+6:
+ case T_BACK+7:
+ n = chresc(sp - 2, &ep);
+ if (ep > sp + 1)
+ {
+ env->token.len += ep - sp;
+ return n;
+ }
+ /*FALLTHROUGH*/
+ case T_BACK+8:
+ case T_BACK+9:
+ if (env->type == SRE || c == T_BACK && !(env->flags & REG_LENIENT))
+ {
+ env->error = REG_BADESC;
+ goto bad;
+ }
+ if ((env->flags & REG_MULTIREF) && isdigit(*sp))
+ {
+ c = (c - T_BACK) * 10 + (*sp - '0');
+ if (c > 0 && c <= env->parno && env->paren[c])
+ c += T_BACK;
+ else
+ c = chresc(sp - 2, &ep);
+ env->token.len++;
+ }
+ if (c == T_BACK)
+ c = 0;
+ break;
+ case T_BAD:
+ if (escaped == 1 && (env->flags & REG_LENIENT) && (c = mp[env->type+escaped+2]) >= T_META)
+ return c;
+ goto bad;
+ }
+ if (env->type >= SRE)
+ {
+ if (c == T_DOT)
+ c = '.';
+ else if (c < T_OPEN)
+ {
+ if (env->type == KRE && *(env->cursor + env->token.len) == '-' && *(env->cursor + env->token.len + 1) == '(')
+ {
+ env->token.len++;
+ env->token.att = 1;
+ }
+ if (env->type == KRE && *(env->cursor + env->token.len) == '(')
+ {
+ env->token.len++;
+ switch (c)
+ {
+ case T_AT:
+ break;
+ case T_PERCENT:
+ env->token.lex = c;
+ goto group;
+ case T_TILDE:
+ env->token.lex = 0;
+ goto group;
+ default:
+ env->token.lex = c;
+ break;
+ }
+ c = T_OPEN;
+ }
+ else if (c == T_STAR)
+ c = T_DOTSTAR;
+ else if (c == T_QUES)
+ c = T_DOT;
+ else
+ {
+ c = o;
+ env->token.len = l;
+ }
+ }
+ else if (c > T_BACK)
+ {
+ c = (c - T_BACK) * 2 - 1;
+ c = (c > env->parno || !env->paren[c]) ? o : T_BACK + c;
+ }
+ else if (env->type == KRE && !env->parnest && (env->flags & REG_SHELL_GROUP))
+ {
+ if (c == T_AND)
+ c = '&';
+ else if (c == T_BAR)
+ c = '|';
+ else if (c == T_OPEN)
+ c = '(';
+ }
+ }
+ }
+ }
+ else if (escaped == 2)
+ {
+ if (env->type >= SRE && !(env->flags & REG_SHELL_ESCAPED) || (env->flags & REG_ESCAPE) && (c == '[' || c == '-' || c == ']' || env->delimiter && c == env->delimiter))
+ /*ok*/;
+ else
+ {
+ env->error = REG_BADESC;
+ goto bad;
+ }
+ }
+ else if (escaped && !(env->flags & REG_LENIENT) && c != ']')
+ {
+ env->error = REG_BADESC;
+ goto bad;
+ }
+ return c;
+ group:
+ sp = (char*)env->cursor + env->token.len;
+ switch (*sp++)
+ {
+ case ')':
+ break;
+ case '#':
+ for (;;)
+ {
+ switch (*sp++)
+ {
+ case 0:
+ env->error = REG_EPAREN;
+ return T_BAD;
+ case ')':
+ break;
+ default:
+ continue;
+ }
+ break;
+ }
+ break;
+ default:
+ return T_GROUP;
+ }
+ env->cursor = (unsigned char*)sp;
+ return token(env);
+ bad:
+ if (escaped == 2)
+ env->error = e;
+ else if (env->flags & REG_LENIENT)
+ return o;
+ else if (escaped == 1 && !env->error)
+ {
+ if (mp || o == ']')
+ return o;
+ env->error = REG_BADESC;
+ }
+ return T_BAD;
+}
+
+static int
+token(register Cenv_t* env)
+{
+ int c;
+ int posixkludge;
+
+ if (env->token.push)
+ return env->token.lex;
+ env->token.att = env->token.esc = 0;
+ if ((env->token.len = MBSIZE(env->cursor)) > 1)
+ return env->token.lex = C_MB;
+ env->token.lex = 0;
+ for (;;)
+ {
+ c = *env->cursor;
+ if (c == 0 || c == env->delimiter || c == env->terminator)
+ return T_END;
+ if (!(env->flags & REG_COMMENT))
+ break;
+ if (c == '#')
+ {
+ do
+ {
+ c = *++env->cursor;
+ if (c == 0 || c == env->delimiter)
+ return T_END;
+ } while (c != '\n');
+ }
+ else if (!isspace(c))
+ break;
+ env->cursor++;
+ }
+ if (c == '\n' && (env->flags & REG_MULTIPLE) && !env->delimiter)
+ {
+ if (env->parnest)
+ {
+ env->error = REG_EPAREN;
+ return T_BAD;
+ }
+ env->parno = 0;
+ env->pattern = env->cursor + 1;
+ return T_BAR;
+ }
+ if (env->flags & REG_LITERAL)
+ return c;
+ if (posixkludge = env->posixkludge)
+ {
+ env->posixkludge = 0;
+ if (c == '*')
+ return c;
+ }
+ if (c == '\\')
+ {
+ if (env->flags & REG_SHELL_ESCAPED)
+ return c;
+ if (!(c = *(env->cursor + 1)) || c == env->terminator)
+ {
+ if (env->flags & REG_LENIENT)
+ {
+ if (c)
+ {
+ env->token.esc = env->token.len;
+ env->token.len += MBSIZE(env->cursor + 1);
+ return c;
+ }
+ return '\\';
+ }
+ env->error = REG_EESCAPE;
+ return T_BAD;
+ }
+ env->token.esc = env->token.len;
+ env->token.len += MBSIZE(env->cursor + 1);
+ if (env->delimiter && c == 'n')
+ return '\n';
+ else if (c == env->delimiter)
+ return magic(env, c, 0);
+ else if (c == '(' && env->type == BRE)
+ env->posixkludge = 1;
+ else if (c == ')' && env->type == BRE && env->parnest <= 0)
+ {
+ env->error = REG_EPAREN;
+ return T_BAD;
+ }
+ else if (isspace(c) && (env->flags & REG_COMMENT))
+ return c;
+ return magic(env, c, 1);
+ }
+ else if (c == '$')
+ {
+ if (env->type == BRE && (*(env->cursor + 1) == 0 || *(env->cursor + 1) == env->delimiter || *(env->cursor + 1) == env->terminator || *(env->cursor + 1) == '\\' && *(env->cursor + 2) == ')') || (env->flags & REG_MULTIPLE) && *(env->cursor + 1) == '\n')
+ return T_DOLL;
+ }
+ else if (c == '^')
+ {
+ if (env->type == BRE && (env->cursor == env->pattern || posixkludge == 1))
+ {
+ env->posixkludge = 2;
+ return T_CFLX;
+ }
+ }
+ else if (c == ')')
+ {
+ if (env->type != BRE && env->parnest <= 0)
+ return c;
+ }
+ else if (c == '/' && env->explicit == env->mappedslash)
+ {
+ while (*(env->cursor + env->token.len) == c)
+ env->token.len++;
+ return T_SLASHPLUS;
+ }
+ return magic(env, c, 0);
+}
+
+static Celt_t*
+col(Celt_t* ce, int ic, unsigned char* bp, int bw, int bc, unsigned char* ep, int ew, int ec)
+{
+ register char* s;
+ register unsigned char* k;
+ register unsigned char* e;
+ register int c;
+ register int cc;
+ int bt;
+ int et;
+ Ckey_t key;
+
+ cc = 0;
+ for (;;)
+ {
+ k = key;
+ if (bw == 1)
+ {
+ c = bc;
+ if (ic)
+ {
+ if (isupper(c))
+ {
+ c = tolower(c);
+ cc = -1;
+ }
+ else if (islower(c))
+ {
+ c = toupper(c);
+ cc = -1;
+ }
+ }
+ *k++ = c;
+ }
+ else if (bw < COLL_KEY_MAX)
+ {
+ s = (char*)bp;
+ if (ic)
+ {
+ c = mbchar(s);
+ if (iswupper(c))
+ {
+ c = towlower(c);
+ cc = 1;
+ }
+ else if (iswlower(c))
+ {
+ c = towupper(c);
+ cc = 1;
+ }
+ }
+ if (cc > 0)
+ {
+ cc = -1;
+ k += mbconv((char*)k, c);
+ }
+ else
+ for (e = k + bw; k < e; *k++ = *s++);
+ }
+ *k = 0;
+ mbxfrm(ce->beg, key, COLL_KEY_MAX);
+ if (ep)
+ {
+ k = key;
+ c = mbchar(k);
+ if (iswupper(c))
+ bt = COLL_range_uc;
+ else if (iswlower(c))
+ bt = COLL_range_lc;
+ else
+ bt = COLL_range;
+ k = key;
+ if (ew == 1)
+ {
+ c = ec;
+ if (ic)
+ {
+ if (isupper(c))
+ {
+ c = tolower(c);
+ cc = -1;
+ }
+ else if (islower(c))
+ {
+ c = toupper(c);
+ cc = -1;
+ }
+ }
+ *k++ = c;
+ }
+ else if (ew < COLL_KEY_MAX)
+ {
+ s = (char*)ep;
+ if (ic)
+ {
+ c = mbchar(s);
+ if (iswupper(c))
+ {
+ c = towlower(c);
+ cc = 1;
+ }
+ else if (iswlower(c))
+ {
+ c = towupper(c);
+ cc = 1;
+ }
+ }
+ if (cc > 0)
+ {
+ cc = -1;
+ k += mbconv((char*)k, c);
+ }
+ else
+ for (e = k + ew; k < e; *k++ = *s++);
+ }
+ *k = 0;
+ mbxfrm(ce->end, key, COLL_KEY_MAX);
+ k = key;
+ c = mbchar(k);
+ if (iswupper(c))
+ et = COLL_range_uc;
+ else if (iswlower(c))
+ et = COLL_range_lc;
+ else
+ et = COLL_range;
+ ce->typ = bt == et ? bt : COLL_range;
+ }
+ else
+ ce->typ = COLL_char;
+ ce++;
+ if (!ic || !cc)
+ break;
+ ic = 0;
+ }
+ return ce;
+}
+
+static Rex_t*
+bra(Cenv_t* env)
+{
+ Rex_t* e;
+ int c;
+ int i;
+ int w;
+ int neg;
+ int last;
+ int inrange;
+ int complicated;
+ int collate;
+ int elements;
+ unsigned char* first;
+ unsigned char* start;
+ unsigned char* begin;
+ unsigned char* s;
+ regclass_t f;
+ unsigned char buf[4 * (COLL_KEY_MAX + 1)];
+#if _PACKAGE_ast
+ int ic;
+ char mbc[COLL_KEY_MAX + 1];
+#endif
+
+ if (!(e = node(env, REX_CLASS, 1, 1, sizeof(Set_t))))
+ return 0;
+ collate = complicated = elements = 0;
+ if (*env->cursor == '^' || env->type >= SRE && *env->cursor == '!')
+ {
+ env->cursor++;
+ neg = 1;
+ }
+ else
+ neg = 0;
+ first = env->cursor;
+ start = first + MBSIZE(first);
+ if (*env->cursor == 0 || *(env->cursor + 1) == 0 || *env->cursor == env->terminator || *(env->cursor + 1) == env->terminator || (env->flags & REG_ESCAPE) && (*env->cursor == env->delimiter || *env->cursor != '\\' && *(env->cursor + 1) == env->delimiter))
+ goto error;
+ begin = env->cursor + MBSIZE(env->cursor);
+
+ /*
+ * inrange: 0=no, 1=possibly, 2=definitely
+ */
+
+ inrange = 0;
+ for (;;)
+ {
+ if (!(c = *env->cursor) || c == env->terminator || c == env->delimiter && (env->flags & REG_ESCAPE))
+ goto error;
+ env->cursor += (w = MBSIZE(env->cursor));
+ if (c == '\\' && ((env->flags & REG_CLASS_ESCAPE) || *env->cursor == env->delimiter && (env->flags & REG_ESCAPE)))
+ {
+ if (*env->cursor)
+ {
+ if (*env->cursor == 'n')
+ {
+ env->cursor++;
+ c = '\n';
+ }
+ else if (env->type < SRE || !(env->flags & REG_SHELL_ESCAPED))
+ {
+ env->token.len = 1;
+ w = magic(env, *env->cursor, 2);
+ if (env->token.len > 1 || w != T_BAD)
+ {
+ if (env->token.len == 1 && (f = classfun(w)))
+ {
+ if (inrange > 1)
+ {
+ if (env->type < SRE && !(env->flags & REG_LENIENT))
+ goto erange;
+ inrange = 0;
+ }
+ env->cursor++;
+ for (c = 0; c <= UCHAR_MAX; c++)
+ if ((*f)(c))
+ setadd(e->re.charclass, c);
+ complicated++;
+ elements++;
+ continue;
+ }
+ if (env->token.len > 1 || w >= 0 && w < T_META)
+ {
+ c = w;
+ if (c > UCHAR_MAX)
+ {
+ if (env->type < SRE && !(env->flags & REG_LENIENT) && !mbwide())
+ goto erange;
+ c = UCHAR_MAX;
+ }
+ env->cursor += env->token.len;
+ }
+ }
+ }
+ }
+ }
+ else if (c == ']')
+ {
+ if (env->cursor == begin)
+ {
+ last = c;
+ inrange = 1;
+ continue;
+ }
+ if (inrange != 0)
+ {
+ setadd(e->re.charclass, last);
+ elements++;
+ if (inrange == 2)
+ {
+ setadd(e->re.charclass, '-');
+ elements++;
+ }
+ }
+ break;
+ }
+ else if (c == '-')
+ {
+ if (!inrange && env->cursor != begin && *env->cursor != ']')
+ {
+ if (env->type < SRE && !(env->flags & REG_LENIENT))
+ goto erange;
+ continue;
+ }
+ else if (inrange == 1)
+ {
+ inrange = 2;
+ complicated++;
+ continue;
+ }
+ }
+ else if (c == '[')
+ {
+ switch (*env->cursor)
+ {
+ case 0:
+ goto error;
+ case ':':
+ if (env->regexp)
+ goto normal;
+ if (inrange == 1)
+ {
+ setadd(e->re.charclass, last);
+ elements++;
+ }
+ if (!(f = regclass((char*)env->cursor, (char**)&env->cursor)))
+ {
+ if (env->cursor == start && (c = *(env->cursor + 1)))
+ {
+ s = start = env->cursor + 1;
+ while (*++s && *s != ':');
+ if (*s == ':' && *(s + 1) == ']' && *(s + 2) == ']')
+ {
+ if ((i = (s - start)) == 1)
+ {
+ switch (c)
+ {
+ case '<':
+ i = REX_WBEG;
+ break;
+ case '>':
+ i = REX_WEND;
+ break;
+ default:
+ i = 0;
+ break;
+ }
+ if (i)
+ {
+ env->cursor = s + 3;
+ drop(env->disc, e);
+ return node(env, i, 0, 0, 0);
+ }
+ }
+ }
+ }
+ env->error = REG_ECTYPE;
+ goto error;
+ }
+ for (c = 0; c <= UCHAR_MAX; c++)
+ if ((*f)(c))
+ setadd(e->re.charclass, c);
+ inrange = 0;
+ complicated++;
+ elements++;
+ continue;
+ case '=':
+ if (env->regexp)
+ goto normal;
+ if (inrange == 2)
+ goto erange;
+ if (inrange == 1)
+ {
+ setadd(e->re.charclass, last);
+ elements++;
+ }
+ if ((c = regcollate((char*)env->cursor, (char**)&env->cursor, (char*)buf, sizeof(buf), NiL)) < 0)
+ goto ecollate;
+ if (c > 1)
+ collate++;
+ else
+ setadd(e->re.charclass, buf[0]);
+ c = buf[0];
+ inrange = 0;
+ complicated++;
+ elements++;
+ continue;
+ case '.':
+ if (env->regexp)
+ goto normal;
+ if ((c = regcollate((char*)env->cursor, (char**)&env->cursor, (char*)buf, sizeof(buf), NiL)) < 0)
+ goto ecollate;
+ if (c > 1)
+ collate++;
+ c = buf[0];
+ complicated++;
+ break;
+ default:
+ normal:
+ if (*env->cursor == env->terminator || *env->cursor == env->delimiter && (env->flags & REG_ESCAPE))
+ goto error;
+ break;
+ }
+ }
+ else if (w > 1)
+ complicated++;
+ if (inrange == 2)
+ {
+ if (last <= c)
+ {
+ for (i = last; i <= c; i++)
+ setadd(e->re.charclass, i);
+ inrange = env->type >= SRE || (env->flags & REG_LENIENT);
+ elements += 2;
+ }
+ else if (env->type >= SRE)
+ {
+ setadd(e->re.charclass, last);
+ setadd(e->re.charclass, c);
+ elements += 2;
+ inrange = 1;
+ }
+ else if (!(env->flags & REG_LENIENT))
+ goto erange;
+ else
+ inrange = 0;
+ }
+ else if (inrange == 1)
+ {
+ setadd(e->re.charclass, last);
+ elements++;
+ }
+ else
+ inrange = 1;
+ last = c;
+ }
+#if _PACKAGE_ast
+ if (complicated && mbcoll())
+ {
+ Dt_t* dt;
+ Cchr_t* cc;
+ Cchr_t* tc;
+ Cchr_t* xc;
+ Celt_t* ce;
+ Cchr_t key;
+ int rw;
+ int rc;
+ wchar_t wc;
+ unsigned char* rp;
+ unsigned char* pp;
+ char* wp;
+ char cb[2][COLL_KEY_MAX+1];
+
+ static Dtdisc_t disc;
+
+ static const char primary[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
+
+ if (!(dt = (Dt_t*)LCINFO(AST_LC_COLLATE)->data))
+ {
+ disc.key = offsetof(Cchr_t, key);
+ if ((cc = newof(0, Cchr_t, elementsof(primary), 0)) && (dt = dtopen(&disc, Dtoset)))
+ {
+ for (i = 0; i < elementsof(primary) - 1; i++, cc++)
+ {
+ cc->nam[0] = primary[i];
+ mbxfrm(cc->key, cc->nam, COLL_KEY_MAX);
+ dtinsert(dt, cc);
+ }
+ for (i = 0; i < elementsof(cc->key); i++)
+ cc->key[i] = ~0;
+ dtinsert(dt, cc);
+ LCINFO(AST_LC_COLLATE)->data = (void*)dt;
+ }
+ else
+ {
+ if (cc)
+ free(cc);
+ drop(env->disc, e);
+ return 0;
+ }
+ }
+ if (dt)
+ {
+ drop(env->disc, e);
+ if (ic = env->flags & REG_ICASE)
+ elements *= 2;
+ if (!(e = node(env, REX_COLL_CLASS, 1, 1, (elements + 3) * sizeof(Celt_t))))
+ return 0;
+ ce = (Celt_t*)e->re.data;
+ e->re.collate.invert = neg;
+ e->re.collate.elements = ce;
+ env->cursor = first;
+ inrange = 0;
+ for (;;)
+ {
+ if ((c = *env->cursor) == 0 || c == env->terminator || (env->flags & REG_ESCAPE) && c == env->delimiter)
+ goto error;
+ pp = env->cursor;
+ env->cursor += (w = MBSIZE(env->cursor));
+ if (c == '\\' && ((env->flags & REG_CLASS_ESCAPE) || *env->cursor == env->delimiter && (env->flags & REG_ESCAPE)))
+ {
+ if (*env->cursor)
+ {
+ if (*env->cursor == 'n')
+ {
+ pp = env->cursor++;
+ c = '\n';
+ }
+ else if (env->type < SRE || !(env->flags & REG_SHELL_ESCAPED))
+ {
+ env->token.len = 1;
+ w = magic(env, *env->cursor, 2);
+ if (env->token.len > 1 || w != T_BAD)
+ {
+ if (env->token.len == 1 && (f = classfun(w)))
+ {
+ if (inrange > 1)
+ {
+ if (env->type < SRE && !(env->flags & REG_LENIENT))
+ goto erange;
+ inrange = 0;
+ }
+ env->cursor++;
+ ce->fun = f;
+ ce->typ = COLL_call;
+ ce++;
+ continue;
+ }
+ if (env->token.len > 1 || w >= 0 && w < T_META)
+ {
+ c = w;
+ w = mbconv(mbc, c);
+ pp = (unsigned char*)mbc;
+ env->cursor += env->token.len;
+ }
+ }
+ }
+ }
+ }
+ else if (c == ']')
+ {
+ if (env->cursor == begin)
+ {
+ rp = pp;
+ rw = w;
+ inrange = 1;
+ continue;
+ }
+ if (inrange != 0)
+ {
+ ce = col(ce, ic, rp, rw, rc, NiL, 0, 0);
+ if (inrange == 2)
+ ce = col(ce, ic, NiL, 1, '-', NiL, 0, 0);
+ }
+ break;
+ }
+ else if (c == '-')
+ {
+ if (!inrange && env->cursor != begin && *env->cursor != ']')
+ {
+ if (env->type < SRE && !(env->flags & REG_LENIENT))
+ goto erange;
+ continue;
+ }
+ else if (inrange == 1)
+ {
+ inrange = 2;
+ continue;
+ }
+ }
+ else if (c == '[')
+ {
+ switch (*env->cursor)
+ {
+ case 0:
+ goto error;
+ case ':':
+ if (env->regexp)
+ goto complicated_normal;
+ if (inrange == 1)
+ ce = col(ce, ic, rp, rw, rc, NiL, 0, 0);
+ if (!(f = regclass((char*)env->cursor, (char**)&env->cursor)))
+ {
+ if (env->cursor == start && (c = *(env->cursor + 1)) && *(env->cursor + 2) == ':' && *(env->cursor + 3) == ']' && *(env->cursor + 4) == ']')
+ {
+ switch (c)
+ {
+ case '<':
+ i = REX_WBEG;
+ break;
+ case '>':
+ i = REX_WEND;
+ break;
+ default:
+ i = 0;
+ break;
+ }
+ if (i)
+ {
+ env->cursor += 5;
+ drop(env->disc, e);
+ return node(env, i, 0, 0, 0);
+ }
+ }
+ env->error = REG_ECTYPE;
+ goto error;
+ }
+ ce->fun = f;
+ ce->typ = COLL_call;
+ ce++;
+ inrange = 0;
+ continue;
+ case '=':
+ if (env->regexp)
+ goto complicated_normal;
+ if (inrange == 2)
+ goto erange;
+ if (inrange == 1)
+ ce = col(ce, ic, rp, rw, rc, NiL, 0, 0);
+ pp = (unsigned char*)cb[inrange];
+ rp = env->cursor + 1;
+ if ((rw = regcollate((char*)env->cursor, (char**)&env->cursor, (char*)pp, COLL_KEY_MAX, &wc)) < 0)
+ goto ecollate;
+ c = 0;
+ if (ic)
+ {
+ if (iswupper(wc))
+ {
+ wc = towlower(wc);
+ rw = mbconv((char*)pp, wc);
+ c = 'u';
+ }
+ else if (iswlower(wc))
+ c = 'l';
+ }
+ i = 1;
+ for (;;)
+ {
+ mbxfrm(key.key, (char*)pp, COLL_KEY_MAX);
+ if (!(cc = (Cchr_t*)dtsearch(dt, &key)) && !(cc = (Cchr_t*)dtprev(dt, &key)))
+ {
+ if (i)
+ {
+ c = *pp;
+ goto singleton;
+ }
+ goto ecollate;
+ }
+ xc = (tc = (Cchr_t*)dtprev(dt, cc)) && !strcasecmp((char*)tc->nam, (char*)cc->nam) ? tc : cc;
+ if (c == 'l' || c == 'L' && !(c = 0))
+ ce->typ = COLL_range_lc;
+ else if (c == 'u' || c == 'U' && !(c = 0))
+ ce->typ = COLL_range_uc;
+ else
+ ce->typ = COLL_range;
+ strcpy((char*)ce->beg, (char*)xc->key);
+ if (!(cc = (Cchr_t*)dtnext(dt, cc)))
+ {
+ if (i)
+ {
+ c = *pp;
+ goto singleton;
+ }
+ goto ecollate;
+ }
+ if (!strcasecmp((char*)xc->nam, (char*)cc->nam) && (tc = (Cchr_t*)dtnext(dt, cc)))
+ cc = tc;
+ strcpy((char*)ce->end, (char*)cc->key);
+ ce->max = -1;
+ ce++;
+ if (!c)
+ break;
+ if (c == 'u')
+ {
+ wc = towlower(wc);
+ c = 'L';
+ }
+ else
+ {
+ wc = towupper(wc);
+ c = 'U';
+ }
+ rw = mbconv((char*)pp, wc);
+ i = 0;
+ }
+ inrange = 0;
+ c = *pp;
+ continue;
+ case '.':
+ if (env->regexp)
+ goto complicated_normal;
+ pp = (unsigned char*)cb[inrange];
+ if ((w = regcollate((char*)env->cursor, (char**)&env->cursor, (char*)pp, COLL_KEY_MAX, NiL)) < 0)
+ goto ecollate;
+ c = *pp;
+ break;
+ default:
+ complicated_normal:
+ if (*env->cursor == env->terminator || *env->cursor == env->delimiter && (env->flags & REG_ESCAPE))
+ goto error;
+ break;
+ }
+ }
+ singleton:
+ if (inrange == 2)
+ {
+ ce = col(ce, ic, rp, rw, rc, pp, w, c);
+ if (strcmp((char*)ce->beg, (char*)ce->end) > 0)
+ {
+ if (env->type < SRE && !(env->flags & REG_LENIENT))
+ goto erange;
+ (ce-1)->typ = COLL_char;
+ strcpy((char*)ce->beg, (char*)(ce-1)->end);
+ ce->typ = COLL_char;
+ ce++;
+ }
+ inrange = env->type >= SRE || (env->flags & REG_LENIENT);
+ }
+ else if (inrange == 1)
+ ce = col(ce, ic, rp, rw, rc, NiL, 0, 0);
+ else
+ inrange = 1;
+ rp = pp;
+ rw = w;
+ rc = c;
+ }
+ ce->typ = COLL_end;
+ return e;
+ }
+ }
+#endif
+ if (collate)
+ goto ecollate;
+ if (env->flags & REG_ICASE)
+ for (i = 0; i <= UCHAR_MAX; i++)
+ if (settst(e->re.charclass, i))
+ {
+ if (isupper(i))
+ c = tolower(i);
+ else if (islower(i))
+ c = toupper(i);
+ else
+ continue;
+ setadd(e->re.charclass, c);
+ }
+ if (neg)
+ {
+ for (i = 0; i < elementsof(e->re.charclass->bits); i++)
+ e->re.charclass->bits[i] ^= ~0;
+ if (env->explicit >= 0)
+ setclr(e->re.charclass, env->explicit);
+ }
+ return e;
+ ecollate:
+ env->error = REG_ECOLLATE;
+ goto error;
+ erange:
+ env->error = REG_ERANGE;
+ error:
+ drop(env->disc, e);
+ if (!env->error)
+ env->error = REG_EBRACK;
+ return 0;
+}
+
+static Rex_t*
+ccl(Cenv_t* env, int type)
+{
+ int i;
+ Rex_t* e;
+ Celt_t* ce;
+ regclass_t f;
+
+ if (!(f = classfun(type)))
+ {
+ env->error = REG_BADESC;
+ return 0;
+ }
+ if (!mbcoll())
+ {
+ if (!(e = node(env, REX_CLASS, 1, 1, sizeof(Set_t))))
+ return 0;
+ for (i = 0; i <= UCHAR_MAX; i++)
+ if ((*f)(i))
+ setadd(e->re.charclass, i);
+ if (env->explicit >= 0)
+ setclr(e->re.charclass, env->explicit);
+ }
+ else
+ {
+ if (!(e = node(env, REX_COLL_CLASS, 1, 1, 2 * sizeof(Celt_t))))
+ return 0;
+ ce = (Celt_t*)e->re.data;
+ e->re.collate.invert = 0;
+ e->re.collate.elements = ce;
+ ce->fun = f;
+ ce->typ = COLL_call;
+ ce++;
+ ce->typ = COLL_end;
+ }
+ return e;
+}
+
+static Rex_t*
+rep(Cenv_t* env, Rex_t* e, int number, int last)
+{
+ Rex_t* f;
+ unsigned long m = 0;
+ unsigned long n = RE_DUP_INF;
+ int minimal = -1;
+
+ if (!e)
+ return 0;
+ switch (token(env))
+ {
+ case T_BANG:
+ eat(env);
+ if (!(f = node(env, REX_NEG, m, n, 0)))
+ {
+ drop(env->disc, e);
+ return 0;
+ }
+ f->re.group.expr.rex = e;
+ return f;
+ case T_QUES:
+ eat(env);
+ n = 1;
+ break;
+ case T_STAR:
+ eat(env);
+ break;
+ case T_PLUS:
+ eat(env);
+ m = 1;
+ break;
+ case T_LEFT:
+ eat(env);
+ m = env->token.min;
+ n = env->token.max;
+ break;
+ default:
+ return e;
+ }
+ if (env->token.att)
+ minimal = 1;
+ else if (env->type < SRE)
+ switch (token(env))
+ {
+ case T_QUES:
+ eat(env);
+ minimal = !(env->flags & REG_MINIMAL);
+ break;
+ case T_STAR: /*AST*/
+ eat(env);
+ minimal = !!(env->flags & REG_MINIMAL);
+ break;
+ }
+ switch (e->type)
+ {
+ case REX_DOT:
+ case REX_CLASS:
+ case REX_COLL_CLASS:
+ case REX_ONECHAR:
+ e->lo = m;
+ e->hi = n;
+ if (minimal >= 0)
+ mark(e, minimal);
+ return e;
+#if HUH_2002_08_07
+ case REX_BEG:
+#endif
+ case REX_BEG_STR:
+ case REX_END_STR:
+ case REX_FIN_STR:
+ case REX_WBEG:
+ case REX_WEND:
+ case REX_WORD:
+ case REX_WORD_NOT:
+ env->error = REG_BADRPT;
+ drop(env->disc, e);
+ return 0;
+ }
+ if (m == 1 && n == 1)
+ {
+ if (minimal >= 0)
+ mark(e, minimal);
+ return e;
+ }
+ if (!(f = node(env, REX_REP, m, n, 0)))
+ {
+ drop(env->disc, e);
+ return 0;
+ }
+ f->re.group.expr.rex = e;
+ f->re.group.number = number;
+ f->re.group.last = last;
+ if (minimal >= 0)
+ mark(f, minimal);
+ if (m <= n && n)
+ {
+ for (; e && e->type >= REX_GROUP && e->type <= REX_GROUP_CUT; e = e->re.group.expr.rex);
+ if (e && e->type == REX_NEG)
+ f->type = REX_GROUP;
+ }
+ return f;
+}
+
+static int
+isstring(Rex_t* e)
+{
+ switch (e->type)
+ {
+ case REX_ONECHAR:
+ return e->lo == 1 && e->hi == 1;
+ case REX_STRING:
+ return 1;
+ }
+ return 0;
+}
+
+static Trie_node_t*
+trienode(Cenv_t* env, int c)
+{
+ Trie_node_t* t;
+
+ if (t = (Trie_node_t*)alloc(env->disc, 0, sizeof(Trie_node_t)))
+ {
+ memset(t, 0, sizeof(Trie_node_t));
+ t->c = c;
+ }
+ return t;
+}
+
+static int
+insert(Cenv_t* env, Rex_t* f, Rex_t* g)
+{
+ unsigned char* s;
+ unsigned char* e;
+ Trie_node_t* t;
+ int len;
+ unsigned char tmp[2];
+
+ switch (f->type)
+ {
+ case REX_ONECHAR:
+ *(s = tmp) = f->re.onechar;
+ e = s + 1;
+ break;
+ case REX_STRING:
+ s = f->re.string.base;
+ e = s + f->re.string.size;
+ break;
+ default:
+ return 1;
+ }
+ if (!(t = g->re.trie.root[*s]) && !(t = g->re.trie.root[*s] = trienode(env, *s)))
+ return 1;
+ for (len = 1;;)
+ {
+ if (t->c == *s)
+ {
+ if (++s >= e)
+ break;
+ if (!t->son && !(t->son = trienode(env, *s)))
+ return 1;
+ t = t->son;
+ len++;
+ }
+ else
+ {
+ if (!t->sib && !(t->sib = trienode(env, *s)))
+ return 1;
+ t = t->sib;
+ }
+ }
+ if (g->re.trie.min > len)
+ g->re.trie.min = len;
+ if (g->re.trie.max < len)
+ g->re.trie.max = len;
+ t->end = 1;
+ return 0;
+}
+
+/*
+ * trie() tries to combine nontrivial e and f into a REX_TRIE
+ * unless 0 is returned, e and f are deleted as far as possible
+ * also called by regcomb
+ */
+
+static Rex_t*
+trie(Cenv_t* env, Rex_t* e, Rex_t* f)
+{
+ Rex_t* g;
+
+ if (e->next || f->next || !isstring(e) || e->flags != f->flags)
+ return 0;
+ if (isstring(f))
+ {
+ if (!(g = node(env, REX_TRIE, 0, 0, (UCHAR_MAX + 1) * sizeof(Trie_node_t*))))
+ return 0;
+ g->re.trie.min = INT_MAX;
+ if (insert(env, f, g))
+ goto nospace;
+ drop(env->disc, f);
+ }
+ else if (f->type != REX_TRIE)
+ return 0;
+ else
+ g = f;
+ if (insert(env, e, g))
+ goto nospace;
+ drop(env->disc, e);
+ return g;
+ nospace:
+ if (g != f)
+ drop(env->disc, g);
+ return 0;
+}
+
+static Rex_t* alt(Cenv_t*, int, int);
+
+static int
+chr(register Cenv_t* env, int* escaped)
+{
+ unsigned char* p;
+ int c;
+
+ *escaped = 0;
+ if (!(c = *env->cursor))
+ return -1;
+ env->cursor++;
+ if (c == '\\')
+ {
+ if (env->flags & REG_SHELL_ESCAPED)
+ return c;
+ if (!(c = *(env->cursor + 1)) || c == env->terminator)
+ {
+ if (env->flags & REG_LENIENT)
+ return c ? c : '\\';
+ env->error = REG_EESCAPE;
+ return -1;
+ }
+ p = env->cursor;
+ c = chresc((char*)env->cursor - 1, (char**)&env->cursor);
+ *escaped = env->cursor - p;
+ }
+ return c;
+}
+
+/*
+ * open the perly gates
+ */
+
+static Rex_t*
+grp(Cenv_t* env, int parno)
+{
+ Rex_t* e;
+ Rex_t* f;
+ int c;
+ int i;
+ int n;
+ int x;
+ int esc;
+ int typ;
+ int beg;
+ unsigned char* p;
+
+ beg = env->pattern == env->cursor - env->token.len;
+ if (!(c = env->token.lex) && (c = *env->cursor))
+ env->cursor++;
+ env->token.len = 0;
+ env->parnest++;
+ typ = -1;
+ switch (c)
+ {
+ case '-':
+ case '+':
+ case 'a':
+ case 'g':
+ case 'i':
+ case 'l':
+ case 'm':
+ case 'p':
+ case 'r':
+ case 's':
+ case 'x':
+ case 'A':
+ case 'B':
+ case 'E':
+ case 'F':
+ case 'G':
+ case 'I':
+ case 'K':
+ case 'L':
+ case 'M': /* glob(3) */
+ case 'N': /* glob(3) */
+ case 'O': /* glob(3) */
+ case 'P':
+ case 'R': /* pcre */
+ case 'S':
+ case 'U': /* pcre */
+ case 'X': /* pcre */
+ x = REX_GROUP;
+ i = 1;
+ env->token.push = 1;
+ for (;;)
+ {
+ switch (c)
+ {
+ case ')':
+ if (!(env->flags & REG_LITERAL))
+ {
+ env->error = REG_BADRPT;
+ return 0;
+ }
+ /*FALLTHROUGH*/
+ case 0:
+ case T_CLOSE:
+ x = 0;
+ goto done;
+ case ':':
+ eat(env);
+ if (token(env) == T_CLOSE)
+ x = 0;
+ goto done;
+ case '-':
+ i = 0;
+ break;
+ case '+':
+ i = 1;
+ break;
+ case 'a':
+ if (i)
+ env->flags |= (REG_LEFT|REG_RIGHT);
+ else
+ env->flags &= ~(REG_LEFT|REG_RIGHT);
+ break;
+ case 'g':
+ if (i)
+ env->flags &= ~REG_MINIMAL;
+ else
+ env->flags |= REG_MINIMAL;
+ break;
+ case 'i':
+ if (i)
+ env->flags |= REG_ICASE;
+ else
+ env->flags &= ~REG_ICASE;
+ break;
+ case 'l':
+ if (i)
+ env->flags |= REG_LEFT;
+ else
+ env->flags &= ~REG_LEFT;
+ break;
+ case 'm':
+ if (i)
+ env->flags |= REG_NEWLINE;
+ else
+ env->flags &= ~REG_NEWLINE;
+ env->explicit = (env->flags & (REG_NEWLINE|REG_SPAN)) == REG_NEWLINE ? env->mappednewline : -1;
+ break;
+ case 'p':
+ if (i)
+ env->flags &= ~REG_LENIENT;
+ else
+ env->flags |= REG_LENIENT;
+ break;
+ case 'r':
+ if (i)
+ env->flags |= REG_RIGHT;
+ else
+ env->flags &= ~REG_RIGHT;
+ break;
+ case 's':
+ if (i)
+ env->flags |= REG_SPAN;
+ else
+ env->flags &= ~REG_SPAN;
+ env->explicit = (env->flags & (REG_NEWLINE|REG_SPAN)) == REG_NEWLINE ? env->mappednewline : -1;
+ break;
+ case 'x':
+ if (i)
+ env->flags |= REG_COMMENT;
+ else
+ env->flags &= ~REG_COMMENT;
+ break;
+ case 'X':
+ if (typ >= 0 || env->type == ERE && (env->flags & REG_CLASS_ESCAPE))
+ break; /* PCRE_EXTRA */
+ /*FALLTHROUGH*/
+ case 'A':
+ env->flags &= ~(REG_AUGMENTED|REG_EXTENDED|REG_LITERAL|REG_SHELL|REG_LEFT|REG_RIGHT);
+ env->flags |= REG_AUGMENTED|REG_EXTENDED;
+ typ = ARE;
+ break;
+ case 'B':
+ case 'G':
+ env->flags &= ~(REG_AUGMENTED|REG_EXTENDED|REG_LITERAL|REG_SHELL|REG_LEFT|REG_RIGHT);
+ typ = BRE;
+ break;
+ case 'E':
+ env->flags &= ~(REG_AUGMENTED|REG_EXTENDED|REG_LITERAL|REG_SHELL|REG_LEFT|REG_RIGHT);
+ env->flags |= REG_EXTENDED;
+ typ = ERE;
+ break;
+ case 'F':
+ case 'L':
+ env->flags &= ~(REG_AUGMENTED|REG_EXTENDED|REG_LITERAL|REG_SHELL|REG_LEFT|REG_RIGHT);
+ env->flags |= REG_LITERAL;
+ typ = ERE;
+ break;
+ case 'K':
+ env->flags &= ~(REG_AUGMENTED|REG_EXTENDED|REG_LITERAL|REG_SHELL|REG_LEFT|REG_RIGHT);
+ env->flags |= REG_AUGMENTED|REG_SHELL|REG_LEFT|REG_RIGHT;
+ typ = KRE;
+ break;
+ case 'M':
+ /* used by caller to disable glob(3) GLOB_BRACE */
+ break;
+ case 'N':
+ /* used by caller to disable glob(3) GLOB_NOCHECK */
+ break;
+ case 'O':
+ /* used by caller to disable glob(3) GLOB_STARSTAR */
+ break;
+ case 'P':
+ env->flags &= ~(REG_AUGMENTED|REG_EXTENDED|REG_LITERAL|REG_SHELL|REG_LEFT|REG_RIGHT);
+ env->flags |= REG_EXTENDED|REG_CLASS_ESCAPE;
+ typ = ERE;
+ break;
+ case 'S':
+ env->flags &= ~(REG_AUGMENTED|REG_EXTENDED|REG_LITERAL|REG_SHELL|REG_LEFT|REG_RIGHT);
+ env->flags |= REG_SHELL|REG_LEFT|REG_RIGHT;
+ typ = SRE;
+ break;
+ case 'U': /* PCRE_UNGREEDY */
+ if (typ >= 0 || env->type == ERE && (env->flags & REG_CLASS_ESCAPE))
+ {
+ if (i)
+ env->flags |= REG_MINIMAL;
+ else
+ env->flags &= ~REG_MINIMAL;
+ }
+ break;
+ default:
+ env->error = REG_BADRPT;
+ return 0;
+ }
+ eat(env);
+ c = token(env);
+ }
+ done:
+ break;
+ case ':':
+ x = REX_GROUP;
+ break;
+ case '=':
+ x = REX_GROUP_AHEAD;
+ break;
+ case '!':
+ x = REX_GROUP_AHEAD_NOT;
+ break;
+ case '<':
+ switch (token(env))
+ {
+ case '=':
+ x = REX_GROUP_BEHIND;
+ break;
+ case '!':
+ case T_BANG:
+ x = REX_GROUP_BEHIND_NOT;
+ break;
+ default:
+ env->error = REG_BADRPT;
+ return 0;
+ }
+ eat(env);
+ break;
+ case '>':
+ x = REX_GROUP_CUT;
+ break;
+ case '%':
+ case T_PERCENT:
+ e = node(env, REX_NEST, 0, 0, (UCHAR_MAX + 1) * sizeof(unsigned short));
+ e->re.nest.primary = isalnum(*env->cursor) ? -1 : *env->cursor;
+ n = 1;
+ for (;;)
+ {
+ switch (i = chr(env, &esc))
+ {
+ case -1:
+ case 0:
+ invalid:
+ env->cursor -= esc + 1;
+ env->error = REG_EPAREN;
+ return 0;
+ case 'D':
+ x = REX_NEST_delimiter;
+ /*FALLTHROUGH*/
+ delimiter:
+ if ((i = chr(env, &esc)) < 0)
+ goto invalid;
+ if (e->re.nest.type[i] & ~x)
+ goto invalid;
+ e->re.nest.type[i] = x;
+ continue;
+ case 'E':
+ x = REX_NEST_escape;
+ goto delimiter;
+ case 'L':
+ x = REX_NEST_literal;
+ goto quote;
+ case 'O':
+ switch (i = chr(env, &esc))
+ {
+ case 'T':
+ e->re.nest.type[UCHAR_MAX+1] |= REX_NEST_terminator;
+ break;
+ default:
+ goto invalid;
+ }
+ continue;
+ case 'Q':
+ x = REX_NEST_quote;
+ /*FALLTHROUGH*/
+ quote:
+ if ((i = chr(env, &esc)) < 0)
+ goto invalid;
+ if (e->re.nest.type[i] & ~x)
+ goto invalid;
+ e->re.nest.type[i] = x|REX_NEST_open|REX_NEST_close|(i<<REX_NEST_SHIFT);
+ continue;
+ case 'S':
+ x = REX_NEST_separator;
+ goto delimiter;
+ case 'T':
+ x = REX_NEST_terminator;
+ goto delimiter;
+ case '|':
+ case '&':
+ if (!esc)
+ goto invalid;
+ goto nesting;
+ case '(':
+ if (!esc)
+ n++;
+ goto nesting;
+ case ')':
+ if (!esc && !--n)
+ break;
+ goto nesting;
+ default:
+ nesting:
+ if (isalnum(i) || (e->re.nest.type[i] & (REX_NEST_close|REX_NEST_escape|REX_NEST_literal|REX_NEST_quote|REX_NEST_delimiter|REX_NEST_separator|REX_NEST_terminator)))
+ goto invalid;
+ e->re.nest.type[i] = REX_NEST_open;
+ if ((x = chr(env, &esc)) < 0 || (e->re.nest.type[x] & (REX_NEST_close|REX_NEST_escape|REX_NEST_delimiter|REX_NEST_separator|REX_NEST_terminator)))
+ goto invalid;
+ if (!esc)
+ {
+ if (x == ')' && !--n)
+ goto invalid;
+ else if (x == '(')
+ n++;
+ }
+ e->re.nest.type[x] |= REX_NEST_close;
+ e->re.nest.type[i] |= x << REX_NEST_SHIFT;
+ continue;
+ }
+ break;
+ }
+ env->parnest--;
+ if (c == T_PERCENT)
+ for (n = 0; n < 2; n++)
+ {
+ parno = ++env->parno;
+ if (!(f = node(env, REX_GROUP, 0, 0, 0)))
+ {
+ drop(env->disc, e);
+ return 0;
+ }
+ if (parno < elementsof(env->paren))
+ env->paren[parno] = f;
+ f->re.group.back = 0;
+ f->re.group.number = parno;
+ f->re.group.expr.rex = e;
+ e = f;
+ }
+ return e;
+ case '(':
+ c = 0;
+ if (isdigit(*env->cursor))
+ {
+ f = 0;
+ do
+ {
+ if (c > (INT_MAX / 10))
+ {
+ env->error = REG_BADRPT;
+ return 0;
+ }
+ c = c * 10 + (*env->cursor++ - '0');
+ } while (isdigit(*env->cursor));
+ if (*env->cursor++ != ')')
+ {
+ env->error = REG_BADRPT;
+ return 0;
+ }
+ if (c && env->type >= SRE)
+ c = c * 2 - 1;
+ if (!c || c > env->parno || !env->paren[c])
+ {
+ if (!(env->flags & REG_LENIENT))
+ {
+ env->error = REG_ESUBREG;
+ return 0;
+ }
+ if (c)
+ c = -1;
+ }
+ }
+ else
+ {
+ if (env->type < SRE && *env->cursor++ != '?')
+ {
+ env->error = REG_BADRPT;
+ return 0;
+ }
+ if (!(f = grp(env, parno + 1)) && env->error)
+ return 0;
+ }
+ if (!(e = node(env, REX_GROUP_COND, 0, 0, 0)))
+ {
+ drop(env->disc, f);
+ return 0;
+ }
+ e->re.group.size = c;
+ e->re.group.expr.binary.left = f;
+ if (!(e->re.group.expr.binary.right = alt(env, parno, 1)))
+ {
+ drop(env->disc, e);
+ return 0;
+ }
+ if (token(env) != T_CLOSE)
+ {
+ env->error = REG_EPAREN;
+ return 0;
+ }
+ eat(env);
+ env->parnest--;
+ return rep(env, e, parno, parno);
+ case '{':
+ p = env->cursor;
+ n = 1;
+ while (c = *env->cursor)
+ {
+ if (c == '\\' && *(env->cursor + 1))
+ env->cursor++;
+ else if (c == '{')
+ n++;
+ else if (c == '}' && !--n)
+ break;
+ else if (c == env->delimiter || c == env->terminator)
+ break;
+ env->cursor++;
+ }
+ if (c != '}')
+ {
+ env->error = REG_EBRACE;
+ return 0;
+ }
+ if (*++env->cursor != ')')
+ {
+ env->error = REG_EPAREN;
+ return 0;
+ }
+ env->cursor++;
+ env->parnest--;
+ if (env->disc->re_version < REG_VERSION_EXEC)
+ {
+ env->error = REG_BADRPT;
+ return 0;
+ }
+ if (!env->disc->re_execf)
+ return 0;
+ if (!(e = node(env, REX_EXEC, 0, 0, 0)))
+ return 0;
+ e->re.exec.text = (const char*)p;
+ e->re.exec.size = env->cursor - p - 2;
+ if (!env->disc->re_compf)
+ e->re.exec.data = 0;
+ else
+ e->re.exec.data = (*env->disc->re_compf)(env->regex, e->re.exec.text, e->re.exec.size, env->disc);
+ return e;
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ c -= '0';
+ while (isdigit(*env->cursor))
+ {
+ if (c > (INT_MAX / 10))
+ {
+ env->error = REG_ESUBREG;
+ return 0;
+ }
+ c = c * 10 + *env->cursor++ - '0';
+ }
+ if (*env->cursor == ')')
+ {
+ env->cursor++;
+ env->parnest--;
+ env->token.len = 1;
+ if (c > env->parno || !env->paren[c])
+ {
+ env->error = REG_ESUBREG;
+ return 0;
+ }
+ env->paren[c]->re.group.back = 1;
+ return rep(env, node(env, REX_BACK, c, 0, 0), 0, 0);
+ }
+ /*FALLTHROUGH*/
+ default:
+ env->error = REG_BADRPT;
+ return 0;
+ }
+ if (x && !(e = alt(env, parno, 0)))
+ return 0;
+ c = token(env);
+ env->parnest--;
+ if (c != T_CLOSE && (!(env->flags & REG_LITERAL) || c != ')'))
+ {
+ env->error = REG_EPAREN;
+ return 0;
+ }
+ eat(env);
+ if (typ >= 0)
+ {
+ if (beg)
+ env->pattern = env->cursor;
+ env->type = typ;
+ }
+ if (!x)
+ return 0;
+ if (!(f = node(env, x, 0, 0, 0)))
+ {
+ drop(env->disc, e);
+ return 0;
+ }
+ f->re.group.expr.rex = e;
+ if (x == REX_GROUP_BEHIND || x == REX_GROUP_BEHIND_NOT)
+ {
+ if (stats(env, e))
+ {
+ drop(env->disc, f);
+ if (!env->error)
+ env->error = REG_ECOUNT;
+ return 0;
+ }
+ f->re.group.size = env->stats.m;
+ memset(&env->stats, 0, sizeof(env->stats));
+ }
+ switch (x)
+ {
+ case REX_GROUP:
+ case REX_GROUP_CUT:
+ f = rep(env, f, parno, env->parno);
+ break;
+ }
+ return f;
+}
+
+static Rex_t*
+seq(Cenv_t* env)
+{
+ Rex_t* e;
+ Rex_t* f;
+ Token_t tok;
+ int c;
+ int i;
+ int n;
+ int x;
+ int parno;
+ int type;
+ regflags_t flags;
+ unsigned char* s;
+ unsigned char* p;
+ unsigned char* t;
+ unsigned char* u;
+ unsigned char buf[256];
+
+ for (;;)
+ {
+ s = buf;
+ while ((c = token(env)) < T_META && s < &buf[sizeof(buf) - env->token.len])
+ {
+ x = c;
+ p = env->cursor;
+ if (c >= 0)
+ {
+ n = 1;
+ *s++ = (env->flags & REG_ICASE) ? toupper(c) : c;
+ }
+ else if (c == C_ESC || (env->flags & REG_ICASE))
+ {
+ c = (c == C_ESC) ? env->token.lex : mbchar(p);
+ if (env->flags & REG_ICASE)
+ c = towupper(c);
+ if ((&buf[sizeof(buf)] - s) < MB_CUR_MAX)
+ break;
+ if ((n = mbconv((char*)s, c)) < 0)
+ *s++ = c;
+ else if (n)
+ s += n;
+ else
+ {
+ n = 1;
+ *s++ = 0;
+ }
+ }
+ else
+ {
+ n = env->token.len - env->token.esc;
+ for (t = p, u = s + n; s < u; *s++ = *t++);
+ }
+ eat(env);
+ }
+ if (c == T_BAD)
+ return 0;
+ if (s > buf)
+ switch (c)
+ {
+ case T_STAR:
+ case T_PLUS:
+ case T_LEFT:
+ case T_QUES:
+ case T_BANG:
+ if ((s -= n) == buf)
+ e = 0;
+ else
+ {
+ i = s - buf;
+ if (!(e = node(env, REX_STRING, 0, 0, i)))
+ return 0;
+ memcpy((char*)(e->re.string.base = (unsigned char*)e->re.data), (char*)buf, i);
+ e->re.string.size = i;
+ }
+ if (x >= 0)
+ {
+ if (!(f = node(env, REX_ONECHAR, 1, 1, 0)))
+ {
+ drop(env->disc, e);
+ return 0;
+ }
+ f->re.onechar = (env->flags & REG_ICASE) ? toupper(x) : x;
+ }
+ else
+ {
+ if (!(f = node(env, REX_STRING, 0, 0, n)))
+ return 0;
+ memcpy((char*)(f->re.string.base = (unsigned char*)f->re.data), (char*)p, n);
+ f->re.string.size = n;
+ }
+ if (!(f = rep(env, f, 0, 0)) || !(f = cat(env, f, seq(env))))
+ {
+ drop(env->disc, e);
+ return 0;
+ }
+ if (e)
+ f = cat(env, e, f);
+ return f;
+ default:
+ c = s - buf;
+ if (!(e = node(env, REX_STRING, 0, 0, c)))
+ return 0;
+ memcpy((char*)(e->re.string.base = (unsigned char*)e->re.data), (char*)buf, c);
+ e->re.string.size = c;
+ return cat(env, e, seq(env));
+ }
+ else if (c > T_BACK)
+ {
+ eat(env);
+ c -= T_BACK;
+ if (c > env->parno || !env->paren[c])
+ {
+ env->error = REG_ESUBREG;
+ return 0;
+ }
+ env->paren[c]->re.group.back = 1;
+ e = rep(env, node(env, REX_BACK, c, 0, 0), 0, 0);
+ }
+ else
+ switch (c)
+ {
+ case T_AND:
+ case T_CLOSE:
+ case T_BAR:
+ case T_END:
+ return node(env, REX_NULL, 0, 0, 0);
+ case T_DOLL:
+ eat(env);
+ e = rep(env, node(env, REX_END, 0, 0, 0), 0, 0);
+ break;
+ case T_CFLX:
+ eat(env);
+ if ((e = node(env, REX_BEG, 0, 0, 0)) && (env->flags & REG_EXTENDED))
+ e = rep(env, e, 0, 0);
+ break;
+ case T_OPEN:
+ tok = env->token;
+ eat(env);
+ flags = env->flags;
+ type = env->type;
+ if (env->token.att)
+ env->flags |= REG_MINIMAL;
+ env->parnest++;
+ if (env->type == KRE)
+ ++env->parno;
+ parno = ++env->parno;
+ if (!(e = alt(env, parno + 1, 0)))
+ break;
+ if (e->type == REX_NULL && env->type == ERE && !(env->flags & REG_NULL))
+ {
+ drop(env->disc, e);
+ env->error = (*env->cursor == 0 || *env->cursor == env->delimiter || *env->cursor == env->terminator) ? REG_EPAREN : REG_ENULL;
+ return 0;
+ }
+ if (token(env) != T_CLOSE)
+ {
+ drop(env->disc, e);
+ env->error = REG_EPAREN;
+ return 0;
+ }
+ env->parnest--;
+ eat(env);
+ if (!(f = node(env, REX_GROUP, 0, 0, 0)))
+ {
+ drop(env->disc, e);
+ return 0;
+ }
+ if (parno < elementsof(env->paren))
+ env->paren[parno] = f;
+ f->re.group.back = 0;
+ f->re.group.number = parno;
+ f->re.group.expr.rex = e;
+ if (tok.lex)
+ {
+ tok.push = 1;
+ env->token = tok;
+ }
+ if (!(e = rep(env, f, parno, env->parno)))
+ return 0;
+ if (env->type == KRE)
+ {
+ if (!(f = node(env, REX_GROUP, 0, 0, 0)))
+ {
+ drop(env->disc, e);
+ return 0;
+ }
+ if (--parno < elementsof(env->paren))
+ env->paren[parno] = f;
+ f->re.group.back = 0;
+ f->re.group.number = parno;
+ f->re.group.expr.rex = e;
+ e = f;
+ }
+ env->flags = flags;
+ env->type = type;
+ break;
+ case T_GROUP:
+ p = env->cursor;
+ eat(env);
+ flags = env->flags;
+ type = env->type;
+ if (!(e = grp(env, env->parno + 1)))
+ {
+ if (env->error)
+ return 0;
+ if (env->literal == env->pattern && env->literal == p)
+ env->literal = env->cursor;
+ continue;
+ }
+ env->flags = flags;
+ env->type = type;
+ break;
+ case T_BRA:
+ eat(env);
+ if (e = bra(env))
+ e = rep(env, e, 0, 0);
+ break;
+ case T_ALNUM:
+ case T_ALNUM_NOT:
+ case T_DIGIT:
+ case T_DIGIT_NOT:
+ case T_SPACE:
+ case T_SPACE_NOT:
+ eat(env);
+ if (e = ccl(env, c))
+ e = rep(env, e, 0, 0);
+ break;
+ case T_LT:
+ eat(env);
+ e = rep(env, node(env, REX_WBEG, 0, 0, 0), 0, 0);
+ break;
+ case T_GT:
+ eat(env);
+ e = rep(env, node(env, REX_WEND, 0, 0, 0), 0, 0);
+ break;
+ case T_DOT:
+ eat(env);
+ e = rep(env, node(env, REX_DOT, 1, 1, 0), 0, 0);
+ break;
+ case T_DOTSTAR:
+ eat(env);
+ env->token.lex = T_STAR;
+ env->token.push = 1;
+ e = rep(env, node(env, REX_DOT, 1, 1, 0), 0, 0);
+ break;
+ case T_SLASHPLUS:
+ eat(env);
+ env->token.lex = T_PLUS;
+ env->token.push = 1;
+ if (e = node(env, REX_ONECHAR, 1, 1, 0))
+ {
+ e->re.onechar = '/';
+ e = rep(env, e, 0, 0);
+ }
+ break;
+ case T_WORD:
+ eat(env);
+ e = rep(env, node(env, REX_WORD, 0, 0, 0), 0, 0);
+ break;
+ case T_WORD_NOT:
+ eat(env);
+ e = rep(env, node(env, REX_WORD_NOT, 0, 0, 0), 0, 0);
+ break;
+ case T_BEG_STR:
+ eat(env);
+ e = rep(env, node(env, REX_BEG_STR, 0, 0, 0), 0, 0);
+ break;
+ case T_END_STR:
+ eat(env);
+ e = rep(env, node(env, REX_END_STR, 0, 0, 0), 0, 0);
+ break;
+ case T_FIN_STR:
+ eat(env);
+ e = rep(env, node(env, REX_FIN_STR, 0, 0, 0), 0, 0);
+ break;
+ default:
+ env->error = REG_BADRPT;
+ return 0;
+ }
+ if (e && *env->cursor != 0 && *env->cursor != env->delimiter && *env->cursor != env->terminator)
+ e = cat(env, e, seq(env));
+ return e;
+ }
+}
+
+static Rex_t*
+con(Cenv_t* env)
+{
+ Rex_t* e;
+ Rex_t* f;
+ Rex_t* g;
+
+ if (!(e = seq(env)) || !(env->flags & REG_AUGMENTED) || token(env) != T_AND)
+ return e;
+ eat(env);
+ if (!(f = con(env)))
+ {
+ drop(env->disc, e);
+ return 0;
+ }
+ if (!(g = node(env, REX_CONJ, 0, 0, 0)))
+ {
+ drop(env->disc, e);
+ drop(env->disc, f);
+ return 0;
+ }
+ g->re.group.expr.binary.left = e;
+ g->re.group.expr.binary.right = f;
+ return g;
+}
+
+static Rex_t*
+alt(Cenv_t* env, int number, int cond)
+{
+ Rex_t* e;
+ Rex_t* f;
+ Rex_t* g;
+
+ if (!(e = con(env)))
+ return 0;
+ else if (token(env) != T_BAR)
+ {
+ if (!cond)
+ return e;
+ f = 0;
+ if (e->type == REX_NULL)
+ goto bad;
+ }
+ else
+ {
+ eat(env);
+ if (!(f = alt(env, number, 0)))
+ {
+ drop(env->disc, e);
+ return 0;
+ }
+ if ((e->type == REX_NULL || f->type == REX_NULL) && !(env->flags & REG_NULL))
+ goto bad;
+ if (!cond && (g = trie(env, e, f)))
+ return g;
+ }
+ if (!(g = node(env, REX_ALT, 0, 0, 0)))
+ {
+ env->error = REG_ESPACE;
+ goto bad;
+ }
+ g->re.group.number = number;
+ g->re.group.last = env->parno;
+ g->re.group.expr.binary.left = e;
+ g->re.group.expr.binary.right = f;
+ return g;
+ bad:
+ drop(env->disc, e);
+ drop(env->disc, f);
+ if (!env->error)
+ env->error = REG_ENULL;
+ return 0;
+}
+
+/*
+ * add v to REX_BM tables
+ */
+
+static void
+bmstr(Cenv_t* env, register Rex_t* a, unsigned char* v, int n, Bm_mask_t b)
+{
+ int c;
+ int m;
+ size_t z;
+
+ for (m = 0; m < n; m++)
+ {
+ if (!(z = n - m - 1))
+ z = HIT;
+ c = v[m];
+ a->re.bm.mask[m][c] |= b;
+ if (z == HIT || !a->re.bm.skip[c] || a->re.bm.skip[c] > z && a->re.bm.skip[c] < HIT)
+ a->re.bm.skip[c] = z;
+ if (a->flags & REG_ICASE)
+ {
+ if (isupper(c))
+ c = tolower(c);
+ else if (islower(c))
+ c = toupper(c);
+ else
+ continue;
+ a->re.bm.mask[m][c] |= b;
+ if (z == HIT || !a->re.bm.skip[c] || a->re.bm.skip[c] > z && a->re.bm.skip[c] < HIT)
+ a->re.bm.skip[c] = z;
+ }
+ }
+}
+
+/*
+ * set up BM table from trie
+ */
+
+static int
+bmtrie(Cenv_t* env, Rex_t* a, unsigned char* v, Trie_node_t* x, int n, int m, Bm_mask_t b)
+{
+ do
+ {
+ v[m] = x->c;
+ if (m >= (n - 1))
+ {
+ bmstr(env, a, v, n, b);
+ if (!(b <<= 1))
+ {
+ b = 1;
+ a->re.bm.complete = 0;
+ }
+ else if (x->son)
+ a->re.bm.complete = 0;
+ }
+ else if (x->son)
+ b = bmtrie(env, a, v, x->son, n, m + 1, b);
+ } while (x = x->sib);
+ return b;
+}
+
+/*
+ * rewrite the expression tree for some special cases
+ * 1. it is a null expression - illegal
+ * 2. max length fixed string found -- use BM algorithm
+ * 3. it begins with an unanchored string - use KMP algorithm
+ * 0 returned on success
+ */
+
+static int
+special(Cenv_t* env, regex_t* p)
+{
+ Rex_t* a;
+ Rex_t* e;
+ Rex_t* t;
+ Rex_t* x;
+ Rex_t* y;
+ unsigned char* s;
+ int* f;
+ int n;
+ int m;
+ int k;
+
+ DEBUG_INIT();
+ if (e = p->env->rex)
+ {
+ if ((x = env->stats.x) && x->re.string.size < 3)
+ x = 0;
+ if ((t = env->stats.y) && t->re.trie.min < 3)
+ t = 0;
+ if (x && t)
+ {
+ if (x->re.string.size >= t->re.trie.min)
+ t = 0;
+ else
+ x = 0;
+ }
+ if (x || t)
+ {
+ Bm_mask_t** mask;
+ Bm_mask_t* h;
+ unsigned char* v;
+ size_t* q;
+ unsigned long l;
+ int i;
+ int j;
+
+ if (x)
+ {
+ y = x;
+ n = m = x->re.string.size;
+ l = env->stats.l;
+ }
+ else
+ {
+ y = t;
+ n = t->re.trie.min;
+ m = t->re.trie.max;
+ l = env->stats.k;
+ }
+ if (!(q = (size_t*)alloc(env->disc, 0, (n + 1) * sizeof(size_t))))
+ return 1;
+ if (!(a = node(env, REX_BM, 0, 0, n * (sizeof(Bm_mask_t*) + (UCHAR_MAX + 1) * sizeof(Bm_mask_t)) + (UCHAR_MAX + n + 2) * sizeof(size_t))))
+ {
+ alloc(env->disc, q, 0);
+ return 1;
+ }
+ a->flags = y->flags;
+ a->map = y->map;
+ a->re.bm.size = n;
+ a->re.bm.back = (y == e || y == e->re.group.expr.rex) ? (m - n) : -1;
+ a->re.bm.left = l - 1;
+ a->re.bm.right = env->stats.m - l - n;
+ a->re.bm.complete = (env->stats.e || y != e && (e->type != REX_GROUP || y != e->re.group.expr.rex) || e->next || ((a->re.bm.left + a->re.bm.right) >= 0)) ? 0 : n;
+ h = (Bm_mask_t*)&a->re.bm.mask[n];
+ a->re.bm.skip = (size_t*)(h + n * (UCHAR_MAX + 1));
+ a->re.bm.fail = &a->re.bm.skip[UCHAR_MAX + 1];
+ for (m = 0; m <= UCHAR_MAX; m++)
+ a->re.bm.skip[m] = n;
+ a->re.bm.skip[0] = a->re.bm.skip[env->mappednewline] = (y->next && y->next->type == REX_END) ? HIT : (n + a->re.bm.left);
+ for (i = 1; i <= n; i++)
+ a->re.bm.fail[i] = 2 * n - i;
+ mask = a->re.bm.mask;
+ for (m = 0; m < n; m++)
+ {
+ mask[m] = h;
+ h += UCHAR_MAX + 1;
+ }
+ if (x)
+ bmstr(env, a, x->re.string.base, n, 1);
+ else
+ {
+ v = (unsigned char*)q;
+ memset(v, 0, n);
+ m = 1;
+ for (i = 0; i <= UCHAR_MAX; i++)
+ if (t->re.trie.root[i])
+ m = bmtrie(env, a, v, t->re.trie.root[i], n, 0, m);
+ }
+ mask--;
+ memset(q, 0, n * sizeof(*q));
+ for (k = (j = n) + 1; j > 0; j--, k--)
+ {
+ DEBUG_TEST(0x0010,(sfprintf(sfstderr, "BM#0: k=%d j=%d\n", k, j)),(0));
+ for (q[j] = k; k <= n; k = q[k])
+ {
+ for (m = 0; m <= UCHAR_MAX; m++)
+ if (mask[k][m] == mask[j][m])
+ {
+ DEBUG_TEST(0x0010,sfprintf(sfstderr, "CUT1: mask[%d][%c]=mask[%d][%c]\n", k, m, j, m), (0));
+ goto cut;
+ }
+ DEBUG_TEST(0x0010,sfprintf(sfstderr, "BM#2: fail[%d]=%d => %d\n", k, a->re.bm.fail[k], (a->re.bm.fail[k] > n - j) ? (n - j) : a->re.bm.fail[k]), (0));
+ if (a->re.bm.fail[k] > n - j)
+ a->re.bm.fail[k] = n - j;
+ }
+ cut: ;
+ }
+ for (i = 1; i <= n; i++)
+ if (a->re.bm.fail[i] > n + k - i)
+ {
+ DEBUG_TEST(0x0010,sfprintf(sfstderr, "BM#4: fail[%d]=%d => %d\n", i, a->re.bm.fail[i], n + k - i), (0));
+ a->re.bm.fail[i] = n + k - i;
+ }
+#if _AST_REGEX_DEBUG
+ if (DEBUG_TEST(0x0020,(1),(0)))
+ {
+ sfprintf(sfstderr, "STAT: complete=%d n=%d k=%d l=%d r=%d y=%d:%d e=%d:%d\n", a->re.bm.complete, n, k, a->re.bm.left, a->re.bm.right, y->type, y->next ? y->next->type : 0, e->type, e->next ? e->next->type : 0);
+ for (m = 0; m < n; m++)
+ for (i = 1; i <= UCHAR_MAX; i++)
+ if (a->re.bm.mask[m][i])
+ sfprintf(sfstderr, "MASK: [%d]['%c'] = %032..2u\n", m, i, a->re.bm.mask[m][i]);
+ for (i = ' '; i <= UCHAR_MAX; i++)
+ if (a->re.bm.skip[i] >= HIT)
+ sfprintf(sfstderr, "SKIP: ['%c'] = *\n", i);
+ else if (a->re.bm.skip[i] > 0 && a->re.bm.skip[i] < n)
+ sfprintf(sfstderr, "SKIP: ['%c'] = %3d\n", i, a->re.bm.skip[i]);
+ for (j = 31; j >= 0; j--)
+ {
+ sfprintf(sfstderr, " ");
+ next:
+ for (m = 0; m < n; m++)
+ {
+ for (i = 0040; i < 0177; i++)
+ if (a->re.bm.mask[m][i] & (1 << j))
+ {
+ sfprintf(sfstderr, " %c", i);
+ break;
+ }
+ if (i >= 0177)
+ {
+ if (j > 0)
+ {
+ j--;
+ goto next;
+ }
+ sfprintf(sfstderr, " ?");
+ }
+ }
+ sfprintf(sfstderr, "\n");
+ }
+ sfprintf(sfstderr, "FAIL: ");
+ for (m = 1; m <= n; m++)
+ sfprintf(sfstderr, "%3d", a->re.bm.fail[m]);
+ sfprintf(sfstderr, "\n");
+ }
+#endif
+ alloc(env->disc, q, 0);
+ a->next = e;
+ p->env->rex = a;
+ return 0;
+ }
+ switch (e->type)
+ {
+ case REX_BEG:
+ if (env->flags & REG_NEWLINE)
+ return 0;
+ break;
+ case REX_GROUP:
+ if (env->stats.b)
+ return 0;
+ e = e->re.group.expr.rex;
+ if (e->type != REX_DOT)
+ return 0;
+ /*FALLTHROUGH*/
+ case REX_DOT:
+ if (e->lo == 0 && e->hi == RE_DUP_INF)
+ break;
+ return 0;
+ case REX_NULL:
+ if (env->flags & REG_NULL)
+ break;
+ env->error = REG_ENULL;
+ return 1;
+ case REX_STRING:
+ if ((env->flags & (REG_LEFT|REG_LITERAL|REG_RIGHT)) || e->map)
+ return 0;
+ s = e->re.string.base;
+ n = e->re.string.size;
+ if (!(a = node(env, REX_KMP, 0, 0, n * (sizeof(int*) + 1))))
+ return 1;
+ a->flags = e->flags;
+ a->map = e->map;
+ f = a->re.string.fail;
+ memcpy((char*)(a->re.string.base = (unsigned char*)&f[n]), (char*)s, n);
+ s = a->re.string.base;
+ a->re.string.size = n;
+ f[0] = m = -1;
+ for (k = 1; k < n; k++)
+ {
+ while (m >= 0 && s[m+1] != s[k])
+ m = f[m];
+ if (s[m+1] == s[k])
+ m++;
+ f[k] = m;
+ }
+ a->next = e->next;
+ p->env->rex = a;
+ e->next = 0;
+ drop(env->disc, e);
+ break;
+ default:
+ return 0;
+ }
+ }
+ p->env->once = 1;
+ return 0;
+}
+
+int
+regcomp(regex_t* p, const char* pattern, regflags_t flags)
+{
+ Rex_t* e;
+ Rex_t* f;
+ regdisc_t* disc;
+ unsigned char* fold;
+ int i;
+ Cenv_t env;
+
+ if (!p)
+ return REG_BADPAT;
+ if (flags & REG_DISCIPLINE)
+ {
+ flags &= ~REG_DISCIPLINE;
+ disc = p->re_disc;
+ }
+ else
+ disc = &state.disc;
+ if (!disc->re_errorlevel)
+ disc->re_errorlevel = 2;
+ p->env = 0;
+ if (!pattern)
+ return fatal(disc, REG_BADPAT, pattern);
+ if (!state.initialized)
+ {
+ state.initialized = 1;
+ for (i = 0; i < elementsof(state.escape); i++)
+ state.magic[state.escape[i].key] = state.escape[i].val;
+ }
+ if (!(fold = (unsigned char*)LCINFO(AST_LC_CTYPE)->data))
+ {
+ if (!(fold = newof(0, unsigned char, UCHAR_MAX, 1)))
+ return fatal(disc, REG_ESPACE, pattern);
+ for (i = 0; i <= UCHAR_MAX; i++)
+ fold[i] = toupper(i);
+ LCINFO(AST_LC_CTYPE)->data = (void*)fold;
+ }
+ again:
+ if (!(p->env = (Env_t*)alloc(disc, 0, sizeof(Env_t))))
+ return fatal(disc, REG_ESPACE, pattern);
+ memset(p->env, 0, sizeof(*p->env));
+ memset(&env, 0, sizeof(env));
+ env.regex = p;
+ env.flags = flags;
+ env.disc = p->env->disc = disc;
+ if (env.flags & REG_AUGMENTED)
+ env.flags |= REG_EXTENDED;
+ env.mappeddot = '.';
+ env.mappednewline = '\n';
+ env.mappedslash = '/';
+ if (disc->re_version >= REG_VERSION_MAP && disc->re_map)
+ {
+ env.map = disc->re_map;
+ env.MAP = p->env->fold;
+ for (i = 0; i <= UCHAR_MAX; i++)
+ {
+ env.MAP[i] = fold[env.map[i]];
+ if (env.map[i] == '.')
+ env.mappeddot = i;
+ if (env.map[i] == '\n')
+ env.mappednewline = i;
+ if (env.map[i] == '/')
+ env.mappedslash = i;
+ }
+ }
+ else
+ env.MAP = fold;
+ env.type = (env.flags & REG_AUGMENTED) ? ARE : (env.flags & REG_EXTENDED) ? ERE : BRE;
+ env.explicit = -1;
+ if (env.flags & REG_SHELL)
+ {
+ if (env.flags & REG_SHELL_PATH)
+ env.explicit = env.mappedslash;
+ if (!(env.flags & REG_SHELL_ESCAPED))
+ env.flags |= REG_CLASS_ESCAPE;
+ env.flags |= REG_LENIENT|REG_NULL;
+ env.type = env.type == BRE ? SRE : KRE;
+ }
+ else
+ env.flags &= ~(REG_SHELL_DOT|REG_SHELL_ESCAPED|REG_SHELL_GROUP|REG_SHELL_PATH);
+ if ((env.flags & (REG_NEWLINE|REG_SPAN)) == REG_NEWLINE)
+ env.explicit = env.mappednewline;
+ p->env->leading = (env.flags & REG_SHELL_DOT) ? env.mappeddot : -1;
+ env.posixkludge = !(env.flags & (REG_EXTENDED|REG_SHELL));
+ env.regexp = !!(env.flags & REG_REGEXP);
+ env.token.lex = 0;
+ env.token.push = 0;
+ if (env.flags & REG_DELIMITED)
+ {
+ switch (env.delimiter = *pattern++)
+ {
+ case 0:
+ case '\\':
+ case '\n':
+ case '\r':
+ env.error = REG_EDELIM;
+ goto bad;
+ }
+ env.terminator = '\n';
+ }
+ env.literal = env.pattern = env.cursor = (unsigned char*)pattern;
+ if (!(p->env->rex = alt(&env, 1, 0)))
+ goto bad;
+ if (env.parnest)
+ {
+ env.error = REG_EPAREN;
+ goto bad;
+ }
+ if ((env.flags & REG_LEFT) && p->env->rex->type != REX_BEG)
+ {
+ if (p->env->rex->type == REX_ALT)
+ env.flags &= ~REG_FIRST;
+ if (!(e = node(&env, REX_BEG, 0, 0, 0)))
+ {
+ regfree(p);
+ return fatal(disc, REG_ESPACE, pattern);
+ }
+ e->next = p->env->rex;
+ p->env->rex = e;
+ p->env->once = 1;
+ }
+ for (e = p->env->rex; e->next; e = e->next);
+ p->env->done.type = REX_DONE;
+ p->env->done.flags = e->flags;
+ if ((env.flags & REG_RIGHT) && e->type != REX_END)
+ {
+ if (p->env->rex->type == REX_ALT)
+ env.flags &= ~REG_FIRST;
+ if (!(f = node(&env, REX_END, 0, 0, 0)))
+ {
+ regfree(p);
+ return fatal(disc, REG_ESPACE, pattern);
+ }
+ f->flags = e->flags;
+ f->map = e->map;
+ e->next = f;
+ }
+ if (stats(&env, p->env->rex))
+ {
+ if (!env.error)
+ env.error = REG_ECOUNT;
+ goto bad;
+ }
+ if (env.stats.b)
+ p->env->hard = p->env->separate = 1;
+ else if (!(env.flags & REG_FIRST) && (env.stats.a || env.stats.c > 1 && env.stats.c != env.stats.s || env.stats.t && (env.stats.t > 1 || env.stats.a || env.stats.c)))
+ p->env->hard = 1;
+ if (p->env->hard || env.stats.c || env.stats.i)
+ p->env->stats.re_min = p->env->stats.re_max = -1;
+ else
+ {
+ if (!(p->env->stats.re_min = env.stats.m))
+ p->env->stats.re_min = -1;
+ if (!(p->env->stats.re_max = env.stats.n))
+ p->env->stats.re_max = -1;
+ }
+ if (special(&env, p))
+ goto bad;
+ serialize(&env, p->env->rex, 1);
+ p->re_nsub = env.stats.p;
+ if (env.type == KRE)
+ p->re_nsub /= 2;
+ if (env.flags & REG_DELIMITED)
+ {
+ p->re_npat = env.cursor - env.pattern + 1;
+ if (*env.cursor == env.delimiter)
+ p->re_npat++;
+ else if (env.flags & REG_MUSTDELIM)
+ {
+ env.error = REG_EDELIM;
+ goto bad;
+ }
+ else
+ env.flags &= ~REG_DELIMITED;
+ }
+ p->env->explicit = env.explicit;
+ p->env->flags = env.flags & REG_COMP;
+ p->env->min = env.stats.m;
+ p->env->nsub = env.stats.p + env.stats.u;
+ p->env->refs = 1;
+ return 0;
+ bad:
+ regfree(p);
+ if (!env.error)
+ env.error = REG_ESPACE;
+ if (env.type >= SRE && env.error != REG_ESPACE && !(flags & REG_LITERAL))
+ {
+ flags |= REG_LITERAL;
+ pattern = (const char*)env.literal;
+ goto again;
+ }
+ return fatal(disc, env.error, pattern);
+}
+
+/*
+ * regcomp() on sized pattern
+ * the lazy way around adding and checking env.end
+ */
+
+int
+regncomp(regex_t* p, const char* pattern, size_t size, regflags_t flags)
+{
+ char* s;
+ int r;
+
+ if (!(s = malloc(size + 1)))
+ return fatal((flags & REG_DISCIPLINE) ? p->re_disc : &state.disc, REG_ESPACE, pattern);
+ memcpy(s, pattern, size);
+ s[size] = 0;
+ r = regcomp(p, s, flags);
+ free(s);
+ return r;
+}
+
+/*
+ * combine two compiled regular expressions if possible,
+ * replacing first with the combination and freeing second.
+ * return 0 on success.
+ * the only combinations handled are building a Trie
+ * from String|Kmp|Trie and String|Kmp
+ */
+
+int
+regcomb(regex_t* p, regex_t* q)
+{
+ Rex_t* e = p->env->rex;
+ Rex_t* f = q->env->rex;
+ Rex_t* g;
+ Rex_t* h;
+ Cenv_t env;
+
+ if (!e || !f)
+ return fatal(p->env->disc, REG_BADPAT, NiL);
+ if (p->env->separate || q->env->separate)
+ return REG_ESUBREG;
+ memset(&env, 0, sizeof(env));
+ env.disc = p->env->disc;
+ if (e->type == REX_BM)
+ {
+ p->env->rex = e->next;
+ e->next = 0;
+ drop(env.disc, e);
+ e = p->env->rex;
+ }
+ if (f->type == REX_BM)
+ {
+ q->env->rex = f->next;
+ f->next = 0;
+ drop(env.disc, f);
+ f = q->env->rex;
+ }
+ if (e->type == REX_BEG && f->type == REX_BEG)
+ {
+ p->env->flags |= REG_LEFT;
+ p->env->rex = e->next;
+ e->next = 0;
+ drop(env.disc, e);
+ e = p->env->rex;
+ q->env->rex = f->next;
+ f->next = 0;
+ drop(env.disc, f);
+ f = q->env->rex;
+ }
+ for (g = e; g->next; g = g->next);
+ for (h = f; h->next; h = h->next);
+ if (g->next && g->next->type == REX_END && h->next && h->next->type == REX_END)
+ {
+ p->env->flags |= REG_RIGHT;
+ drop(env.disc, g->next);
+ g->next = 0;
+ drop(env.disc, h->next);
+ h->next = 0;
+ }
+ if (!(g = trie(&env, f, e)))
+ return fatal(p->env->disc, REG_BADPAT, NiL);
+ p->env->rex = g;
+ if (!q->env->once)
+ p->env->once = 0;
+ q->env->rex = 0;
+ if (p->env->flags & REG_LEFT)
+ {
+ if (!(e = node(&env, REX_BEG, 0, 0, 0)))
+ {
+ regfree(p);
+ return fatal(p->env->disc, REG_ESPACE, NiL);
+ }
+ e->next = p->env->rex;
+ p->env->rex = e;
+ p->env->once = 1;
+ }
+ if (p->env->flags & REG_RIGHT)
+ {
+ for (f = p->env->rex; f->next; f = f->next);
+ if (f->type != REX_END)
+ {
+ if (!(e = node(&env, REX_END, 0, 0, 0)))
+ {
+ regfree(p);
+ return fatal(p->env->disc, REG_ESPACE, NiL);
+ }
+ f->next = e;
+ }
+ }
+ env.explicit = p->env->explicit;
+ env.flags = p->env->flags;
+ env.disc = p->env->disc;
+ if (stats(&env, p->env->rex))
+ {
+ regfree(p);
+ return fatal(p->env->disc, env.error ? env.error : REG_ECOUNT, NiL);
+ }
+ if (special(&env, p))
+ {
+ regfree(p);
+ return fatal(p->env->disc, env.error ? env.error : REG_ESPACE, NiL);
+ }
+ p->env->min = g->re.trie.min;
+ return 0;
+}
+
+/*
+ * copy a reference of p into q
+ * p and q may then have separate regsubcomp() instantiations
+ */
+
+int
+regdup(regex_t* p, regex_t* q)
+{
+ if (!p || !q)
+ return REG_BADPAT;
+ *q = *p;
+ p->env->refs++;
+ q->re_sub = 0;
+ return 0;
+}
diff --git a/src/lib/libast/regex/regdecomp.c b/src/lib/libast/regex/regdecomp.c
new file mode 100644
index 0000000..a9fe6f3
--- /dev/null
+++ b/src/lib/libast/regex/regdecomp.c
@@ -0,0 +1,448 @@
+/***********************************************************************
+* *
+* This software is part of the ast package *
+* Copyright (c) 1985-2011 AT&T Intellectual Property *
+* and is licensed under the *
+* Eclipse Public License, Version 1.0 *
+* by AT&T Intellectual Property *
+* *
+* A copy of the License is available at *
+* http://www.eclipse.org/org/documents/epl-v10.html *
+* (with md5 checksum b35adb5213ca9657e911e9befb180842) *
+* *
+* Information and Software Systems Research *
+* AT&T Research *
+* Florham Park NJ *
+* *
+* Glenn Fowler <gsf@research.att.com> *
+* David Korn <dgk@research.att.com> *
+* Phong Vo <kpv@research.att.com> *
+* *
+***********************************************************************/
+#pragma prototyped
+
+/*
+ * posix regex decompiler
+ */
+
+#include "reglib.h"
+
+#undef ismeta
+#define ismeta(c,t,e,d) (state.magic[c] && state.magic[c][(t)+(e)] >= T_META || (c) == (d))
+#define meta(f,c,t,e,d) do { if (ismeta(c,t,e,d)) sfputc(f, '\\'); sfputc(f, c); } while (0)
+
+static void
+detrie(Trie_node_t* x, Sfio_t* sp, char* b, char* p, char* e, int delimiter)
+{
+ register Trie_node_t* y;
+ char* o;
+ int k;
+
+ o = p;
+ k = 1;
+ do
+ {
+ if (k)
+ {
+ o = p;
+ if (p < e)
+ *p++ = x->c;
+ }
+ sfputc(sp, x->c);
+ for (y = x->sib; y; y = y->sib)
+ {
+ sfputc(sp, '|');
+ sfputc(sp, '<');
+ sfwrite(sp, b, p - b);
+ sfputc(sp, '>');
+ detrie(y, sp, b, p, e, delimiter);
+ }
+ if (x->end && x->son)
+ {
+ sfputc(sp, '|');
+ sfputc(sp, '{');
+ sfwrite(sp, b, p - b);
+ sfputc(sp, '}');
+ p = o;
+ }
+ } while (x = x->son);
+}
+
+static int
+decomp(register Rex_t* e, Sfio_t* sp, int type, int delimiter, regflags_t flags)
+{
+ Rex_t* q;
+ unsigned char* s;
+ unsigned char* t;
+ int c;
+ int m;
+ int cb;
+ int cd;
+ int cr;
+ int ib;
+ int ie;
+ int nb;
+ int ne;
+ unsigned char ic[2*UCHAR_MAX];
+ unsigned char nc[2*UCHAR_MAX];
+
+ do
+ {
+ switch (e->type)
+ {
+ case REX_ALT:
+ if (decomp(e->re.group.expr.binary.left, sp, type, delimiter, flags))
+ return 1;
+ sfputc(sp, '|');
+ if (e->re.group.expr.binary.right && decomp(e->re.group.expr.binary.right, sp, type, delimiter, flags))
+ return 1;
+ break;
+ case REX_BACK:
+ sfprintf(sp, "\\%d", e->lo);
+ break;
+ case REX_BEG:
+ if (type < SRE)
+ sfputc(sp, '^');
+ break;
+ case REX_END:
+ if (type < SRE)
+ sfputc(sp, '$');
+ break;
+ case REX_WBEG:
+ meta(sp, '<', type, 1, delimiter);
+ break;
+ case REX_WEND:
+ meta(sp, '<', type, 1, delimiter);
+ break;
+ case REX_WORD:
+ sfprintf(sp, "\\w");
+ break;
+ case REX_CLASS:
+ case REX_COLL_CLASS:
+ case REX_ONECHAR:
+ case REX_DOT:
+ case REX_REP:
+ if (type >= SRE)
+ {
+ c = ')';
+ if (e->hi == RE_DUP_INF)
+ {
+ if (!e->lo)
+ sfputc(sp, '*');
+ else if (e->lo == 1)
+ sfputc(sp, '+');
+ else
+ sfprintf(sp, "{%d,}", e->lo);
+ }
+ else if (e->hi != 1)
+ sfprintf(sp, "{%d,%d}", e->lo, e->hi);
+ else if (e->lo == 0)
+ sfputc(sp, '?');
+ else
+ c = 0;
+ }
+ switch (e->type)
+ {
+ case REX_REP:
+ if (decomp(e->re.group.expr.rex, sp, type, delimiter, flags))
+ return 1;
+ break;
+ case REX_CLASS:
+ sfputc(sp, '[');
+ nb = ne = ib = ie = -2;
+ cb = cd = cr = 0;
+ s = nc;
+ t = ic;
+ for (m = 0; m <= UCHAR_MAX; m++)
+ if (settst(e->re.charclass, m))
+ {
+ if (m == ']')
+ cb = 1;
+ else if (m == '-')
+ cr = 1;
+ else if (m == delimiter)
+ cd = 1;
+ else if (nb < 0)
+ ne = nb = m;
+ else if (ne == (m - 1))
+ ne = m;
+ else
+ {
+ if (ne == nb)
+ *s++ = ne;
+ else
+ {
+ *s++ = nb;
+ *s++ = '-';
+ *s++ = ne;
+ }
+ ne = nb = m;
+ }
+ }
+ else
+ {
+ if (m == ']')
+ cb = -1;
+ else if (m == '-')
+ cr = -1;
+ else if (m == delimiter)
+ cd = -1;
+ else if (ib < 0)
+ ie = ib = m;
+ else if (ie == (m - 1))
+ ie = m;
+ else
+ {
+ if (ie == ib)
+ *t++ = ie;
+ else
+ {
+ *t++ = ib;
+ *t++ = '-';
+ *t++ = ie;
+ }
+ ie = ib = m;
+ }
+ }
+ if (nb >= 0)
+ {
+ *s++ = nb;
+ if (ne != nb)
+ {
+ *s++ = '-';
+ *s++ = ne;
+ }
+ }
+ if (ib >= 0)
+ {
+ *t++ = ib;
+ if (ie != ib)
+ {
+ *t++ = '-';
+ *t++ = ie;
+ }
+ }
+ if ((t - ic + 1) < (s - nc + (nc[0] == '^')))
+ {
+ sfputc(sp, '^');
+ if (cb < 0)
+ sfputc(sp, ']');
+ if (cr < 0)
+ sfputc(sp, '-');
+ if (cd < 0 && delimiter > 0)
+ {
+ if (flags & REG_ESCAPE)
+ sfputc(sp, '\\');
+ sfputc(sp, delimiter);
+ }
+ sfwrite(sp, ic, t - ic);
+ }
+ else
+ {
+ if (cb > 0)
+ sfputc(sp, ']');
+ if (cr > 0)
+ sfputc(sp, '-');
+ if (cd > 0 && delimiter > 0)
+ {
+ if (flags & REG_ESCAPE)
+ sfputc(sp, '\\');
+ sfputc(sp, delimiter);
+ }
+ if (nc[0] == '^')
+ {
+ sfwrite(sp, nc + 1, s - nc - 1);
+ sfputc(sp, '^');
+ }
+ else
+ sfwrite(sp, nc, s - nc);
+ }
+ sfputc(sp, ']');
+ break;
+ case REX_COLL_CLASS:
+ break;
+ case REX_ONECHAR:
+ meta(sp, e->re.onechar, type, 0, delimiter);
+ break;
+ case REX_DOT:
+ sfputc(sp, '.');
+ break;
+ }
+ if (type < SRE)
+ {
+ if (e->hi == RE_DUP_INF)
+ {
+ if (!e->lo)
+ sfputc(sp, '*');
+ else if (e->lo == 1 && ismeta('+', type, 0, delimiter))
+ meta(sp, '+', type, 1, delimiter);
+ else
+ {
+ meta(sp, '{', type, 1, delimiter);
+ sfprintf(sp, "%d,", e->lo);
+ meta(sp, '}', type, 1, delimiter);
+ }
+ }
+ else if (e->hi != 1 || e->lo == 0 && !ismeta('?', type, 0, delimiter))
+ {
+ meta(sp, '{', type, 1, delimiter);
+ sfprintf(sp, "%d,%d", e->lo, e->hi);
+ meta(sp, '}', type, 1, delimiter);
+ }
+ else if (e->lo == 0)
+ meta(sp, '?', type, 1, delimiter);
+ }
+ else if (c)
+ sfputc(sp, c);
+ break;
+ case REX_STRING:
+ case REX_KMP:
+ t = (s = e->re.string.base) + e->re.string.size;
+ while (s < t)
+ {
+ c = *s++;
+ meta(sp, c, type, 0, delimiter);
+ }
+ break;
+ case REX_TRIE:
+ ib = 0;
+ for (c = 0; c <= UCHAR_MAX; c++)
+ if (e->re.trie.root[c])
+ {
+ char pfx[1024];
+
+ if (ib)
+ sfputc(sp, '|');
+ else
+ ib = 1;
+ detrie(e->re.trie.root[c], sp, pfx, pfx, &pfx[sizeof(pfx)], delimiter);
+ }
+ break;
+ case REX_NEG:
+ if (type >= SRE)
+ sfprintf(sp, "!(");
+ if (decomp(e->re.group.expr.rex, sp, type, delimiter, flags))
+ return 1;
+ if (type >= SRE)
+ sfputc(sp, ')');
+ else
+ sfputc(sp, '!');
+ break;
+ case REX_CONJ:
+ if (decomp(e->re.group.expr.binary.left, sp, type, delimiter, flags))
+ return 1;
+ sfputc(sp, '&');
+ if (decomp(e->re.group.expr.binary.right, sp, type, delimiter, flags))
+ return 1;
+ break;
+ case REX_GROUP:
+ if (type >= SRE)
+ sfputc(sp, '@');
+ meta(sp, '(', type, 1, delimiter);
+ if (decomp(e->re.group.expr.rex, sp, type, delimiter, flags))
+ return 1;
+ meta(sp, ')', type, 1, delimiter);
+ break;
+ case REX_GROUP_AHEAD:
+ case REX_GROUP_AHEAD_NOT:
+ case REX_GROUP_BEHIND:
+ case REX_GROUP_BEHIND_NOT:
+ meta(sp, '(', type, 1, delimiter);
+ sfputc(sp, '?');
+ if (decomp(e->re.group.expr.rex, sp, type, delimiter, flags))
+ return 1;
+ meta(sp, ')', type, 1, delimiter);
+ break;
+ case REX_GROUP_COND:
+ meta(sp, '(', type, 1, delimiter);
+ sfputc(sp, '?');
+ if (e->re.group.expr.binary.left && decomp(e->re.group.expr.binary.left, sp, type, delimiter, flags))
+ return 1;
+ if (q = e->re.group.expr.binary.right)
+ {
+ sfputc(sp, ':');
+ if (q->re.group.expr.binary.left && decomp(q->re.group.expr.binary.left, sp, type, delimiter, flags))
+ return 1;
+ sfputc(sp, ':');
+ if (q->re.group.expr.binary.right && decomp(q->re.group.expr.binary.right, sp, type, delimiter, flags))
+ return 1;
+ }
+ meta(sp, ')', type, 1, delimiter);
+ break;
+ case REX_GROUP_CUT:
+ meta(sp, '(', type, 1, delimiter);
+ sfputc(sp, '?');
+ if (decomp(e->re.group.expr.rex, sp, type, delimiter, flags))
+ return 1;
+ meta(sp, ')', type, 1, delimiter);
+ break;
+ case REX_BM:
+ break;
+ default:
+ sfprintf(sp, "<ERROR:REX_%d>", e->type);
+ break;
+ }
+ } while (e = e->next);
+ return 0;
+}
+
+/*
+ * reconstruct pattern from compiled re p into sp
+ */
+
+size_t
+regdecomp(regex_t* p, regflags_t flags, char* buf, size_t n)
+{
+ Sfio_t* sp;
+ char* s;
+ int type;
+ int delimiter;
+ size_t r;
+
+ if (!(sp = sfstropen()))
+ return 0;
+ if (flags == (regflags_t)~0)
+ flags = p->env->flags;
+ switch (flags & (REG_AUGMENTED|REG_EXTENDED|REG_SHELL))
+ {
+ case 0:
+ type = BRE;
+ break;
+ case REG_AUGMENTED:
+ case REG_AUGMENTED|REG_EXTENDED:
+ type = ARE;
+ break;
+ case REG_EXTENDED:
+ type = ERE;
+ break;
+ case REG_SHELL:
+ type = SRE;
+ break;
+ default:
+ type = KRE;
+ break;
+ }
+ if (flags & REG_DELIMITED)
+ {
+ delimiter = '/';
+ sfputc(sp, delimiter);
+ }
+ else
+ delimiter = -1;
+ if (decomp(p->env->rex, sp, type, delimiter, flags))
+ r = 0;
+ else
+ {
+ if (delimiter > 0)
+ sfputc(sp, delimiter);
+ if ((r = sfstrtell(sp) + 1) <= n)
+ {
+ if (!(s = sfstruse(sp)))
+ r = 0;
+ else
+ memcpy(buf, s, r);
+ }
+ }
+ sfstrclose(sp);
+ return r;
+}
diff --git a/src/lib/libast/regex/regerror.c b/src/lib/libast/regex/regerror.c
new file mode 100644
index 0000000..3d0e40d
--- /dev/null
+++ b/src/lib/libast/regex/regerror.c
@@ -0,0 +1,95 @@
+/***********************************************************************
+* *
+* This software is part of the ast package *
+* Copyright (c) 1985-2011 AT&T Intellectual Property *
+* and is licensed under the *
+* Eclipse Public License, Version 1.0 *
+* by AT&T Intellectual Property *
+* *
+* A copy of the License is available at *
+* http://www.eclipse.org/org/documents/epl-v10.html *
+* (with md5 checksum b35adb5213ca9657e911e9befb180842) *
+* *
+* Information and Software Systems Research *
+* AT&T Research *
+* Florham Park NJ *
+* *
+* Glenn Fowler <gsf@research.att.com> *
+* David Korn <dgk@research.att.com> *
+* Phong Vo <kpv@research.att.com> *
+* *
+***********************************************************************/
+#pragma prototyped
+
+/*
+ * posix regex error message handler
+ */
+
+static const char id[] = "\n@(#)$Id: regex (AT&T Research) 2010-09-22 $\0\n";
+
+#include "reglib.h"
+
+static const char* reg_error[] =
+{
+ /* REG_ENOSYS */ "not supported",
+ /* REG_SUCCESS */ "success",
+ /* REG_NOMATCH */ "no match",
+ /* REG_BADPAT */ "invalid regular expression",
+ /* REG_ECOLLATE */ "invalid collation element",
+ /* REG_ECTYPE */ "invalid character class",
+ /* REG_EESCAPE */ "trailing \\ in pattern",
+ /* REG_ESUBREG */ "invalid \\digit backreference",
+ /* REG_EBRACK */ "[...] imbalance",
+ /* REG_EPAREN */ "\\(...\\) or (...) imbalance",
+ /* REG_EBRACE */ "\\{...\\} or {...} imbalance",
+ /* REG_BADBR */ "invalid {...} digits",
+ /* REG_ERANGE */ "invalid [...] range endpoint",
+ /* REG_ESPACE */ "out of space",
+ /* REG_BADRPT */ "unary op not preceded by re",
+ /* REG_ENULL */ "empty subexpr in pattern",
+ /* REG_ECOUNT */ "re component count overflow",
+ /* REG_BADESC */ "invalid \\char escape",
+ /* REG_VERSIONID*/ &id[10],
+ /* REG_EFLAGS */ "conflicting flags",
+ /* REG_EDELIM */ "invalid or omitted delimiter",
+ /* REG_PANIC */ "unrecoverable internal error",
+};
+
+size_t
+regerror(int code, const regex_t* p, char* buf, size_t size)
+{
+ const char* s;
+
+ NoP(p);
+ if (code++ == REG_VERSIONID)
+ s = (const char*)fmtident(&id[1]);
+ else if (code >= 0 && code < elementsof(reg_error))
+ s = reg_error[code];
+ else
+ s = (const char*)"unknown error";
+ if (size)
+ {
+ strlcpy(buf, s, size);
+ buf[size - 1] = 0;
+ }
+ else
+ buf = (char*)s;
+ return strlen(buf) + 1;
+}
+
+/*
+ * discipline error intercept
+ */
+
+int
+fatal(regdisc_t* disc, int code, const char* pattern)
+{
+ if (disc->re_errorf)
+ {
+ if (pattern)
+ (*disc->re_errorf)(NiL, disc, disc->re_errorlevel, "regular expression: %s: %s", pattern, reg_error[code+1]);
+ else
+ (*disc->re_errorf)(NiL, disc, disc->re_errorlevel, "regular expression: %s", reg_error[code+1]);
+ }
+ return code;
+}
diff --git a/src/lib/libast/regex/regexec.c b/src/lib/libast/regex/regexec.c
new file mode 100644
index 0000000..fb8f428
--- /dev/null
+++ b/src/lib/libast/regex/regexec.c
@@ -0,0 +1,54 @@
+/***********************************************************************
+* *
+* This software is part of the ast package *
+* Copyright (c) 1985-2011 AT&T Intellectual Property *
+* and is licensed under the *
+* Eclipse Public License, Version 1.0 *
+* by AT&T Intellectual Property *
+* *
+* A copy of the License is available at *
+* http://www.eclipse.org/org/documents/epl-v10.html *
+* (with md5 checksum b35adb5213ca9657e911e9befb180842) *
+* *
+* Information and Software Systems Research *
+* AT&T Research *
+* Florham Park NJ *
+* *
+* Glenn Fowler <gsf@research.att.com> *
+* David Korn <dgk@research.att.com> *
+* Phong Vo <kpv@research.att.com> *
+* *
+***********************************************************************/
+#pragma prototyped
+
+/*
+ * posix regex executor
+ * single unsized-string interface
+ */
+
+#include "reglib.h"
+
+/*
+ * standard wrapper for the sized-record interface
+ */
+
+int
+regexec(const regex_t* p, const char* s, size_t nmatch, regmatch_t* match, regflags_t flags)
+{
+ if (flags & REG_STARTEND)
+ {
+ int r;
+ int m = match->rm_so;
+ regmatch_t* e;
+
+ if (!(r = regnexec(p, s + m, match->rm_eo - m, nmatch, match, flags)) && m > 0)
+ for (e = match + nmatch; match < e; match++)
+ if (match->rm_so >= 0)
+ {
+ match->rm_so += m;
+ match->rm_eo += m;
+ }
+ return r;
+ }
+ return regnexec(p, s, s ? strlen(s) : 0, nmatch, match, flags);
+}
diff --git a/src/lib/libast/regex/regfatal.c b/src/lib/libast/regex/regfatal.c
new file mode 100644
index 0000000..09e8689
--- /dev/null
+++ b/src/lib/libast/regex/regfatal.c
@@ -0,0 +1,49 @@
+/***********************************************************************
+* *
+* This software is part of the ast package *
+* Copyright (c) 1985-2011 AT&T Intellectual Property *
+* and is licensed under the *
+* Eclipse Public License, Version 1.0 *
+* by AT&T Intellectual Property *
+* *
+* A copy of the License is available at *
+* http://www.eclipse.org/org/documents/epl-v10.html *
+* (with md5 checksum b35adb5213ca9657e911e9befb180842) *
+* *
+* Information and Software Systems Research *
+* AT&T Research *
+* Florham Park NJ *
+* *
+* Glenn Fowler <gsf@research.att.com> *
+* David Korn <dgk@research.att.com> *
+* Phong Vo <kpv@research.att.com> *
+* *
+***********************************************************************/
+#pragma prototyped
+
+/*
+ * posix regex fatal error interface to error()
+ */
+
+#include "reglib.h"
+
+#include <error.h>
+
+void
+regfatalpat(regex_t* p, int level, int code, const char* pat)
+{
+ char buf[128];
+
+ regerror(code, p, buf, sizeof(buf));
+ regfree(p);
+ if (pat)
+ error(level, "regular expression: %s: %s", pat, buf);
+ else
+ error(level, "regular expression: %s", buf);
+}
+
+void
+regfatal(regex_t* p, int level, int code)
+{
+ regfatalpat(p, level, code, NiL);
+}
diff --git a/src/lib/libast/regex/reginit.c b/src/lib/libast/regex/reginit.c
new file mode 100644
index 0000000..e4719b4
--- /dev/null
+++ b/src/lib/libast/regex/reginit.c
@@ -0,0 +1,412 @@
+/***********************************************************************
+* *
+* This software is part of the ast package *
+* Copyright (c) 1985-2011 AT&T Intellectual Property *
+* and is licensed under the *
+* Eclipse Public License, Version 1.0 *
+* by AT&T Intellectual Property *
+* *
+* A copy of the License is available at *
+* http://www.eclipse.org/org/documents/epl-v10.html *
+* (with md5 checksum b35adb5213ca9657e911e9befb180842) *
+* *
+* Information and Software Systems Research *
+* AT&T Research *
+* Florham Park NJ *
+* *
+* Glenn Fowler <gsf@research.att.com> *
+* David Korn <dgk@research.att.com> *
+* Phong Vo <kpv@research.att.com> *
+* *
+***********************************************************************/
+#pragma prototyped
+
+/*
+ * posix regex state and alloc
+ */
+
+#include "reglib.h"
+
+#if _PACKAGE_ast
+
+#include <ccode.h>
+
+#else
+
+#define CC_bel '\a'
+#define CC_esc '\033'
+#define CC_vt '\v'
+
+#endif
+
+/*
+ * state shared by all threads
+ */
+
+State_t state =
+{
+ { -1, -1 },
+
+ /*
+ * escape code table
+ * the "funny" things get special treatment at ends of BRE
+ *
+ * BRE 0:normal 1:escaped 2:escaped-char-class
+ * ERE 3:normal 4:escaped 5:escaped-char-class
+ * ARE 6:normal 7:escaped 8:escaped-char-class
+ * SRE 9:normal 10:escaped 11:escaped-char-class
+ * KRE 12:normal 13:escaped 14:escaped-char-class
+ */
+
+ '\\',
+ '\\', '\\', '\\',
+ '\\', '\\', '\\',
+ '\\', '\\', '\\',
+ '\\', '\\', '\\',
+ '\\', '\\', '\\',
+ '^', /* funny */
+ '^', '^', '^',
+ T_CFLX, '^', '^',
+ T_CFLX, '^', '^',
+ '^', '^', '^',
+ '^', '^', '^',
+ '.',
+ T_DOT, '.', T_BAD,
+ T_DOT, '.', T_BAD,
+ T_DOT, '.', T_BAD,
+ '.', '.', T_BAD,
+ '.', '.', T_BAD,
+ '$', /* funny */
+ '$', '$', T_BAD,
+ T_DOLL, '$', T_BAD,
+ T_DOLL, '$', T_BAD,
+ '$', '$', T_BAD,
+ '$', '$', T_BAD,
+ '*',
+ T_STAR, '*', T_BAD,
+ T_STAR, '*', T_BAD,
+ T_STAR, '*', T_BAD,
+ T_STAR, '*', '*',
+ T_STAR, '*', '*',
+ '[',
+ T_BRA, '[', '[',
+ T_BRA, '[', '[',
+ T_BRA, '[', '[',
+ T_BRA, '[', '[',
+ T_BRA, '[', '[',
+ '|',
+ '|', T_BAD, T_BAD,
+ T_BAR, '|', T_BAD,
+ T_BAR, '|', T_BAD,
+ '|', '|', T_BAD,
+ T_BAR, '|', T_BAD,
+ '+',
+ '+', T_BAD, T_BAD,
+ T_PLUS, '+', T_BAD,
+ T_PLUS, '+', T_BAD,
+ '+', '+', T_BAD,
+ T_PLUS, '+', T_BAD,
+ '?',
+ '?', T_BAD, T_BAD,
+ T_QUES, '?', T_BAD,
+ T_QUES, '?', T_BAD,
+ T_QUES, '?', '?',
+ T_QUES, '?', '?',
+ '(',
+ '(', T_OPEN, T_BAD,
+ T_OPEN, '(', T_BAD,
+ T_OPEN, '(', T_BAD,
+ '(', '(', '(',
+ T_OPEN, '(', '(',
+ ')',
+ ')', T_CLOSE, T_BAD,
+ T_CLOSE, ')', T_BAD,
+ T_CLOSE, ')', T_BAD,
+ ')', ')', ')',
+ T_CLOSE, ')', ')',
+ '{',
+ '{', T_LEFT, T_BAD,
+ T_LEFT, '{', T_BAD,
+ T_LEFT, '{', T_BAD,
+ '{', '{', '{',
+ T_LEFT, '{', '{',
+ '}',
+ '}', T_RIGHT, T_BAD,
+ '}', T_BAD, T_BAD,
+ '}', T_BAD, T_BAD,
+ '}', '}', '}',
+ '}', '}', '}',
+ '&',
+ '&', T_BAD, T_BAD,
+ '&', T_AND, T_BAD,
+ T_AND, '&', T_BAD,
+ '&', '&', T_BAD,
+ T_AND, '&', T_BAD,
+ '!',
+ '!', T_BAD, T_BAD,
+ '!', T_BANG, T_BAD,
+ T_BANG, '!', T_BAD,
+ '!', '!', T_BAD,
+ T_BANG, '!', T_BAD,
+ '@',
+ '@', T_BAD, T_BAD,
+ '@', T_BAD, T_BAD,
+ '@', T_BAD, T_BAD,
+ '@', '@', T_BAD,
+ T_AT, '@', T_BAD,
+ '~',
+ '~', T_BAD, T_BAD,
+ '~', T_BAD, T_BAD,
+ '~', T_BAD, T_BAD,
+ '~', '~', T_BAD,
+ T_TILDE, '~', T_BAD,
+ '%',
+ '%', T_BAD, T_BAD,
+ '%', T_BAD, T_BAD,
+ '%', T_BAD, T_BAD,
+ '%', '%', T_BAD,
+ T_PERCENT, '%', T_BAD,
+ '<',
+ '<', T_LT, T_BAD,
+ '<', T_LT, T_BAD,
+ T_LT, '<', T_BAD,
+ '<', '<', T_BAD,
+ '<', '<', T_BAD,
+ '>',
+ '>', T_GT, T_BAD,
+ '>', T_GT, T_BAD,
+ T_GT, '>', T_BAD,
+ '>', '>', T_BAD,
+ '>', '>', T_BAD,
+
+ /* backrefs */
+
+ '0',
+ '0', T_BACK+0, T_ESCAPE,
+ '0', T_BACK+0, T_ESCAPE,
+ '0', T_BACK+0, T_ESCAPE,
+ '0', T_BACK+0, T_ESCAPE,
+ '0', T_BACK+0, T_ESCAPE,
+ '1',
+ '1', T_BACK+1, T_ESCAPE,
+ '1', T_BACK+1, T_ESCAPE,
+ '1', T_BACK+1, T_ESCAPE,
+ '1', T_BACK+1, T_ESCAPE,
+ '1', T_BACK+1, T_ESCAPE,
+ '2',
+ '2', T_BACK+2, T_ESCAPE,
+ '2', T_BACK+2, T_ESCAPE,
+ '2', T_BACK+2, T_ESCAPE,
+ '2', T_BACK+2, T_ESCAPE,
+ '2', T_BACK+2, T_ESCAPE,
+ '3',
+ '3', T_BACK+3, T_ESCAPE,
+ '3', T_BACK+3, T_ESCAPE,
+ '3', T_BACK+3, T_ESCAPE,
+ '3', T_BACK+3, T_ESCAPE,
+ '3', T_BACK+3, T_ESCAPE,
+ '4',
+ '4', T_BACK+4, T_ESCAPE,
+ '4', T_BACK+4, T_ESCAPE,
+ '4', T_BACK+4, T_ESCAPE,
+ '4', T_BACK+4, T_ESCAPE,
+ '4', T_BACK+4, T_ESCAPE,
+ '5',
+ '5', T_BACK+5, T_ESCAPE,
+ '5', T_BACK+5, T_ESCAPE,
+ '5', T_BACK+5, T_ESCAPE,
+ '5', T_BACK+5, T_ESCAPE,
+ '5', T_BACK+5, T_ESCAPE,
+ '6',
+ '6', T_BACK+6, T_ESCAPE,
+ '6', T_BACK+6, T_ESCAPE,
+ '6', T_BACK+6, T_ESCAPE,
+ '6', T_BACK+6, T_ESCAPE,
+ '6', T_BACK+6, T_ESCAPE,
+ '7',
+ '7', T_BACK+7, T_ESCAPE,
+ '7', T_BACK+7, T_ESCAPE,
+ '7', T_BACK+7, T_ESCAPE,
+ '7', T_BACK+7, T_ESCAPE,
+ '7', T_BACK+7, T_ESCAPE,
+ '8',
+ '8', T_BACK+8, T_ESCAPE,
+ '8', T_BACK+8, T_ESCAPE,
+ '8', T_BACK+8, T_ESCAPE,
+ '8', '8', T_ESCAPE,
+ '8', T_BACK+8, T_ESCAPE,
+ '9',
+ '9', T_BACK+9, T_ESCAPE,
+ '9', T_BACK+9, T_ESCAPE,
+ '9', T_BACK+9, T_ESCAPE,
+ '9', '9', T_ESCAPE,
+ '9', T_BACK+9, T_ESCAPE,
+
+ /* perl */
+
+ 'A',
+ 'A', T_BEG_STR, T_BAD,
+ 'A', T_BEG_STR, T_BAD,
+ 'A', T_BEG_STR, T_BAD,
+ 'A', T_BEG_STR, T_BAD,
+ 'A', T_BEG_STR, T_BAD,
+ 'b',
+ 'b', T_WORD, '\b',
+ 'b', T_WORD, '\b',
+ 'b', T_WORD, '\b',
+ 'b', T_WORD, '\b',
+ 'b', T_WORD, '\b',
+ 'B',
+ 'B', T_WORD_NOT, T_BAD,
+ 'B', T_WORD_NOT, T_BAD,
+ 'B', T_WORD_NOT, T_BAD,
+ 'B', T_WORD_NOT, T_BAD,
+ 'B', T_WORD_NOT, T_BAD,
+ 'd',
+ 'd', T_DIGIT, T_DIGIT,
+ 'd', T_DIGIT, T_DIGIT,
+ 'd', T_DIGIT, T_DIGIT,
+ 'd', T_DIGIT, T_DIGIT,
+ 'd', T_DIGIT, T_DIGIT,
+ 'D',
+ 'D', T_DIGIT_NOT, T_DIGIT_NOT,
+ 'D', T_DIGIT_NOT, T_DIGIT_NOT,
+ 'D', T_DIGIT_NOT, T_DIGIT_NOT,
+ 'D', T_DIGIT_NOT, T_DIGIT_NOT,
+ 'D', T_DIGIT_NOT, T_DIGIT_NOT,
+ 's',
+ 's', T_SPACE, T_SPACE,
+ 's', T_SPACE, T_SPACE,
+ 's', T_SPACE, T_SPACE,
+ 's', T_SPACE, T_SPACE,
+ 's', T_SPACE, T_SPACE,
+ 'S',
+ 'S', T_SPACE_NOT, T_SPACE_NOT,
+ 'S', T_SPACE_NOT, T_SPACE_NOT,
+ 'S', T_SPACE_NOT, T_SPACE_NOT,
+ 'S', T_SPACE_NOT, T_SPACE_NOT,
+ 'S', T_SPACE_NOT, T_SPACE_NOT,
+ 'w',
+ 'w', T_ALNUM, T_ALNUM,
+ 'w', T_ALNUM, T_ALNUM,
+ 'w', T_ALNUM, T_ALNUM,
+ 'w', T_ALNUM, T_ALNUM,
+ 'w', T_ALNUM, T_ALNUM,
+ 'W',
+ 'W', T_ALNUM_NOT, T_ALNUM_NOT,
+ 'W', T_ALNUM_NOT, T_ALNUM_NOT,
+ 'W', T_ALNUM_NOT, T_ALNUM_NOT,
+ 'W', T_ALNUM_NOT, T_ALNUM_NOT,
+ 'W', T_ALNUM_NOT, T_ALNUM_NOT,
+ 'z',
+ 'z', T_FIN_STR, T_BAD,
+ 'z', T_FIN_STR, T_BAD,
+ 'z', T_FIN_STR, T_BAD,
+ 'z', T_FIN_STR, T_BAD,
+ 'z', T_FIN_STR, T_BAD,
+ 'Z',
+ 'Z', T_END_STR, T_BAD,
+ 'Z', T_END_STR, T_BAD,
+ 'Z', T_END_STR, T_BAD,
+ 'Z', T_END_STR, T_BAD,
+ 'Z', T_END_STR, T_BAD,
+
+ /* C escapes */
+
+ 'a',
+ 'a', CC_bel, CC_bel,
+ 'a', CC_bel, CC_bel,
+ 'a', CC_bel, CC_bel,
+ 'a', CC_bel, CC_bel,
+ 'a', CC_bel, CC_bel,
+ 'c',
+ 'c', T_ESCAPE, T_ESCAPE,
+ 'c', T_ESCAPE, T_ESCAPE,
+ 'c', T_ESCAPE, T_ESCAPE,
+ 'c', T_ESCAPE, T_ESCAPE,
+ 'c', T_ESCAPE, T_ESCAPE,
+ 'C',
+ 'C', T_ESCAPE, T_ESCAPE,
+ 'C', T_ESCAPE, T_ESCAPE,
+ 'C', T_ESCAPE, T_ESCAPE,
+ 'C', T_ESCAPE, T_ESCAPE,
+ 'C', T_ESCAPE, T_ESCAPE,
+ 'e',
+ 'e', CC_esc, CC_esc,
+ 'e', CC_esc, CC_esc,
+ 'e', CC_esc, CC_esc,
+ 'e', CC_esc, CC_esc,
+ 'e', CC_esc, CC_esc,
+ 'E',
+ 'E', CC_esc, CC_esc,
+ 'E', CC_esc, CC_esc,
+ 'E', CC_esc, CC_esc,
+ 'E', CC_esc, CC_esc,
+ 'E', CC_esc, CC_esc,
+ 'f',
+ 'f', '\f', '\f',
+ 'f', '\f', '\f',
+ 'f', '\f', '\f',
+ 'f', '\f', '\f',
+ 'f', '\f', '\f',
+ 'n',
+ 'n', '\n', '\n',
+ 'n', '\n', '\n',
+ 'n', '\n', '\n',
+ 'n', '\n', '\n',
+ 'n', '\n', '\n',
+ 'r',
+ 'r', '\r', '\r',
+ 'r', '\r', '\r',
+ 'r', '\r', '\r',
+ 'r', '\r', '\r',
+ 'r', '\r', '\r',
+ 't',
+ 't', '\t', '\t',
+ 't', '\t', '\t',
+ 't', '\t', '\t',
+ 't', '\t', '\t',
+ 't', '\t', '\t',
+ 'v',
+ 'v', CC_vt, CC_vt,
+ 'v', CC_vt, CC_vt,
+ 'v', CC_vt, CC_vt,
+ 'v', CC_vt, CC_vt,
+ 'v', CC_vt, CC_vt,
+ 'x',
+ 'x', T_ESCAPE, T_ESCAPE,
+ 'x', T_ESCAPE, T_ESCAPE,
+ 'x', T_ESCAPE, T_ESCAPE,
+ 'x', T_ESCAPE, T_ESCAPE,
+ 'x', T_ESCAPE, T_ESCAPE,
+};
+
+/*
+ * all allocation/free done here
+ * interface compatible with vmresize()
+ *
+ * malloc(n) alloc(0,n)
+ * realloc(p,n) alloc(p,n)
+ * free(p) alloc(p,0)
+ */
+
+void*
+alloc(register regdisc_t* disc, void* p, size_t n)
+{
+ if (disc->re_resizef)
+ {
+ if (!n && (disc->re_flags & REG_NOFREE))
+ return 0;
+ return (*disc->re_resizef)(disc->re_resizehandle, p, n);
+ }
+ else if (!n)
+ {
+ if (!(disc->re_flags & REG_NOFREE))
+ free(p);
+ return 0;
+ }
+ else if (p)
+ return realloc(p, n);
+ else
+ return malloc(n);
+}
diff --git a/src/lib/libast/regex/reglib.h b/src/lib/libast/regex/reglib.h
new file mode 100644
index 0000000..6b84e5d
--- /dev/null
+++ b/src/lib/libast/regex/reglib.h
@@ -0,0 +1,582 @@
+/***********************************************************************
+* *
+* This software is part of the ast package *
+* Copyright (c) 1985-2011 AT&T Intellectual Property *
+* and is licensed under the *
+* Eclipse Public License, Version 1.0 *
+* by AT&T Intellectual Property *
+* *
+* A copy of the License is available at *
+* http://www.eclipse.org/org/documents/epl-v10.html *
+* (with md5 checksum b35adb5213ca9657e911e9befb180842) *
+* *
+* Information and Software Systems Research *
+* AT&T Research *
+* Florham Park NJ *
+* *
+* Glenn Fowler <gsf@research.att.com> *
+* David Korn <dgk@research.att.com> *
+* Phong Vo <kpv@research.att.com> *
+* *
+***********************************************************************/
+#pragma prototyped
+
+/*
+ * posix regex implementation
+ *
+ * based on Doug McIlroy's C++ implementation
+ * Knuth-Morris-Pratt adapted from Corman-Leiserson-Rivest
+ * Boyer-Moore from conversations with David Korn, Phong Vo, Andrew Hume
+ */
+
+#ifndef _REGLIB_H
+#define _REGLIB_H
+
+#define REG_VERSION_EXEC 20020509L
+#define REG_VERSION_MAP 20030916L /* regdisc_t.re_map */
+
+#define re_info env
+
+#define alloc _reg_alloc
+#define classfun _reg_classfun
+#define drop _reg_drop
+#define fatal _reg_fatal
+#define state _reg_state
+
+typedef struct regsubop_s
+{
+ int op; /* REG_SUB_LOWER,REG_SUB_UPPER */
+ int off; /* re_rhs or match[] offset */
+ int len; /* re_rhs len or len==0 match[] */
+} regsubop_t;
+
+#define _REG_SUB_PRIVATE_ \
+ char* re_cur; /* re_buf cursor */ \
+ char* re_end; /* re_buf end */ \
+ regsubop_t* re_ops; /* rhs ops */ \
+ char re_rhs[1]; /* substitution rhs */
+
+#include <ast.h>
+#include <cdt.h>
+#include <stk.h>
+
+#include "regex.h"
+
+#include <ctype.h>
+#include <errno.h>
+
+#if _BLD_DEBUG && !defined(_AST_REGEX_DEBUG)
+#define _AST_REGEX_DEBUG 1
+#endif
+
+#define MBSIZE(p) ((ast.tmp_int=mbsize(p))>0?ast.tmp_int:1)
+
+#undef RE_DUP_MAX /* posix puts this in limits.h! */
+#define RE_DUP_MAX (INT_MAX/2-1) /* 2*RE_DUP_MAX won't overflow */
+#define RE_DUP_INF (RE_DUP_MAX+1) /* infinity, for * */
+#define BACK_REF_MAX 9
+
+#define REG_COMP (~REG_EXEC)
+#define REG_EXEC (REG_ADVANCE|REG_INVERT|REG_NOTBOL|REG_NOTEOL|REG_STARTEND)
+
+#define REX_NULL 0 /* null string (internal) */
+#define REX_ALT 1 /* a|b */
+#define REX_ALT_CATCH 2 /* REX_ALT catcher */
+#define REX_BACK 3 /* \1, \2, etc */
+#define REX_BEG 4 /* initial ^ */
+#define REX_BEG_STR 5 /* initial ^ w/ no newline */
+#define REX_BM 6 /* Boyer-Moore */
+#define REX_CAT 7 /* catenation catcher */
+#define REX_CLASS 8 /* [...] */
+#define REX_COLL_CLASS 9 /* collation order [...] */
+#define REX_CONJ 10 /* a&b */
+#define REX_CONJ_LEFT 11 /* REX_CONJ left catcher */
+#define REX_CONJ_RIGHT 12 /* REX_CONJ right catcher */
+#define REX_DONE 13 /* completed match (internal) */
+#define REX_DOT 14 /* . */
+#define REX_END 15 /* final $ */
+#define REX_END_STR 16 /* final $ before tail newline */
+#define REX_EXEC 17 /* call re.re_exec() */
+#define REX_FIN_STR 18 /* final $ w/ no newline */
+#define REX_GROUP 19 /* \(...\) */
+#define REX_GROUP_CATCH 20 /* REX_GROUP catcher */
+#define REX_GROUP_AHEAD 21 /* 0-width lookahead */
+#define REX_GROUP_AHEAD_CATCH 22 /* REX_GROUP_AHEAD catcher */
+#define REX_GROUP_AHEAD_NOT 23 /* inverted 0-width lookahead */
+#define REX_GROUP_BEHIND 24 /* 0-width lookbehind */
+#define REX_GROUP_BEHIND_CATCH 25 /* REX_GROUP_BEHIND catcher */
+#define REX_GROUP_BEHIND_NOT 26 /* inverted 0-width lookbehind */
+#define REX_GROUP_BEHIND_NOT_CATCH 27 /* REX_GROUP_BEHIND_NOT catcher */
+#define REX_GROUP_COND 28 /* conditional group */
+#define REX_GROUP_COND_CATCH 29 /* conditional group catcher */
+#define REX_GROUP_CUT 30 /* don't backtrack over this */
+#define REX_GROUP_CUT_CATCH 31 /* REX_GROUP_CUT catcher */
+#define REX_KMP 32 /* Knuth-Morris-Pratt */
+#define REX_NEG 33 /* negation */
+#define REX_NEG_CATCH 34 /* REX_NEG catcher */
+#define REX_NEST 35 /* nested match */
+#define REX_ONECHAR 36 /* a single-character literal */
+#define REX_REP 37 /* Kleene closure */
+#define REX_REP_CATCH 38 /* REX_REP catcher */
+#define REX_STRING 39 /* some chars */
+#define REX_TRIE 40 /* alternation of strings */
+#define REX_WBEG 41 /* \< */
+#define REX_WEND 42 /* \> */
+#define REX_WORD 43 /* word boundary */
+#define REX_WORD_NOT 44 /* not word boundary */
+
+#define T_META ((int)UCHAR_MAX+1)
+#define T_STAR (T_META+0)
+#define T_PLUS (T_META+1)
+#define T_QUES (T_META+2)
+#define T_BANG (T_META+3)
+#define T_AT (T_META+4)
+#define T_TILDE (T_META+5)
+#define T_PERCENT (T_META+6)
+#define T_LEFT (T_META+7)
+#define T_OPEN (T_META+8)
+#define T_CLOSE (T_OPEN+1)
+#define T_RIGHT (T_OPEN+2)
+#define T_CFLX (T_OPEN+3)
+#define T_DOT (T_OPEN+4)
+#define T_DOTSTAR (T_OPEN+5)
+#define T_END (T_OPEN+6)
+#define T_BAD (T_OPEN+7)
+#define T_DOLL (T_OPEN+8)
+#define T_BRA (T_OPEN+9)
+#define T_BAR (T_OPEN+10)
+#define T_AND (T_OPEN+11)
+#define T_LT (T_OPEN+12)
+#define T_GT (T_OPEN+13)
+#define T_SLASHPLUS (T_OPEN+14)
+#define T_GROUP (T_OPEN+15)
+#define T_WORD (T_OPEN+16)
+#define T_WORD_NOT (T_WORD+1)
+#define T_BEG_STR (T_WORD+2)
+#define T_END_STR (T_WORD+3)
+#define T_FIN_STR (T_WORD+4)
+#define T_ESCAPE (T_WORD+5)
+#define T_ALNUM (T_WORD+6)
+#define T_ALNUM_NOT (T_ALNUM+1)
+#define T_DIGIT (T_ALNUM+2)
+#define T_DIGIT_NOT (T_ALNUM+3)
+#define T_SPACE (T_ALNUM+4)
+#define T_SPACE_NOT (T_ALNUM+5)
+#define T_BACK (T_ALNUM+6)
+
+#define BRE 0
+#define ERE 3
+#define ARE 6
+#define SRE 9
+#define KRE 12
+
+#define HIT SSIZE_MAX
+
+#define bitclr(p,c) ((p)[((c)>>3)&037]&=(~(1<<((c)&07))))
+#define bitset(p,c) ((p)[((c)>>3)&037]|=(1<<((c)&07)))
+#define bittst(p,c) ((p)[((c)>>3)&037]&(1<<((c)&07)))
+
+#define setadd(p,c) bitset((p)->bits,c)
+#define setclr(p,c) bitclr((p)->bits,c)
+#define settst(p,c) bittst((p)->bits,c)
+
+#if _hdr_wchar && _lib_wctype && _lib_iswctype
+
+#include <stdio.h> /* because <wchar.h> includes it and we generate it */
+#include <wchar.h>
+#if _hdr_wctype
+#include <wctype.h>
+#endif
+
+#if !defined(iswblank) && !_lib_iswblank
+#define _need_iswblank 1
+#define iswblank(x) _reg_iswblank(x)
+extern int _reg_iswblank(wint_t);
+#endif
+
+#if !defined(towupper) && !_lib_towupper
+#define towupper(x) toupper(x)
+#endif
+
+#if !defined(towlower) && !_lib_towlower
+#define towlower(x) tolower(x)
+#endif
+
+#else
+
+#undef _lib_wctype
+
+#ifndef iswalnum
+#define iswalnum(x) isalnum(x)
+#endif
+#ifndef iswalpha
+#define iswalpha(x) isalpha(x)
+#endif
+#ifndef iswcntrl
+#define iswcntrl(x) iscntrl(x)
+#endif
+#ifndef iswdigit
+#define iswdigit(x) isdigit(x)
+#endif
+#ifndef iswgraph
+#define iswgraph(x) isgraph(x)
+#endif
+#ifndef iswlower
+#define iswlower(x) islower(x)
+#endif
+#ifndef iswprint
+#define iswprint(x) isprint(x)
+#endif
+#ifndef iswpunct
+#define iswpunct(x) ispunct(x)
+#endif
+#ifndef iswspace
+#define iswspace(x) isspace(x)
+#endif
+#ifndef iswupper
+#define iswupper(x) isupper(x)
+#endif
+#ifndef iswxdigit
+#define iswxdigit(x) isxdigit(x)
+#endif
+
+#ifndef towlower
+#define towlower(x) tolower(x)
+#endif
+#ifndef towupper
+#define towupper(x) toupper(x)
+#endif
+
+#endif
+
+#ifndef iswblank
+#define iswblank(x) ((x)==' '||(x)=='\t')
+#endif
+
+#ifndef iswgraph
+#define iswgraph(x) (iswprint(x)&&!iswblank(x))
+#endif
+
+#define isword(x) (isalnum(x)||(x)=='_')
+
+/*
+ * collation element support
+ */
+
+#define COLL_KEY_MAX 32
+
+#if COLL_KEY_MAX < MB_LEN_MAX
+#undef COLL_KEY_MAX
+#define COLL_KEY_MAX MB_LEN_MAX
+#endif
+
+typedef unsigned char Ckey_t[COLL_KEY_MAX+1];
+
+#define COLL_end 0
+#define COLL_call 1
+#define COLL_char 2
+#define COLL_range 3
+#define COLL_range_lc 4
+#define COLL_range_uc 5
+
+typedef struct Celt_s
+{
+ short typ;
+ short min;
+ short max;
+ regclass_t fun;
+ Ckey_t beg;
+ Ckey_t end;
+} Celt_t;
+
+/*
+ * private stuff hanging off regex_t
+ */
+
+typedef struct Stk_pos_s
+{
+ off_t offset;
+ char* base;
+} Stk_pos_t;
+
+typedef struct Vector_s
+{
+ Stk_t* stk; /* stack pointer */
+ char* vec; /* the data */
+ int inc; /* growth increment */
+ int siz; /* element size */
+ int max; /* max index */
+ int cur; /* current index -- user domain */
+} Vector_t;
+
+/*
+ * Rex_t subtypes
+ */
+
+typedef struct Cond_s
+{
+ unsigned char* beg; /* beginning of next match */
+ struct Rex_s* next[2]; /* 0:no 1:yes next pattern */
+ struct Rex_s* cont; /* right catcher */
+ int yes; /* yes condition hit */
+} Cond_t;
+
+typedef struct Conj_left_s
+{
+ unsigned char* beg; /* beginning of left match */
+ struct Rex_s* right; /* right pattern */
+ struct Rex_s* cont; /* right catcher */
+} Conj_left_t;
+
+typedef struct Conj_right_s
+{
+ unsigned char* end; /* end of left match */
+ struct Rex_s* cont; /* ambient continuation */
+} Conj_right_t;
+
+typedef unsigned int Bm_mask_t;
+
+typedef struct Bm_s
+{
+ Bm_mask_t** mask;
+ size_t* skip;
+ size_t* fail;
+ size_t size;
+ ssize_t back;
+ ssize_t left;
+ ssize_t right;
+ size_t complete;
+} Bm_t;
+
+typedef struct String_s
+{
+ int* fail;
+ unsigned char* base;
+ size_t size;
+} String_t;
+
+typedef struct Set_s
+{
+ unsigned char bits[(UCHAR_MAX+1)/CHAR_BIT];
+} Set_t;
+
+typedef struct Collate_s
+{
+ int invert;
+ Celt_t* elements;
+} Collate_t;
+
+typedef struct Binary_s
+{
+ struct Rex_s* left;
+ struct Rex_s* right;
+ int serial;
+} Binary_t;
+
+typedef struct Group_s
+{
+ int number; /* group number */
+ int last; /* last contained group number */
+ int size; /* lookbehind size */
+ int back; /* backreferenced */
+ regflags_t flags; /* group flags */
+ union
+ {
+ Binary_t binary;
+ struct Rex_s* rex;
+ } expr;
+} Group_t;
+
+typedef struct Exec_s
+{
+ void* data;
+ const char* text;
+ size_t size;
+} Exec_t;
+
+#define REX_NEST_open 0x01
+#define REX_NEST_close 0x02
+#define REX_NEST_escape 0x04
+#define REX_NEST_quote 0x08
+#define REX_NEST_literal 0x10
+#define REX_NEST_delimiter 0x20
+#define REX_NEST_terminator 0x40
+#define REX_NEST_separator 0x80
+
+#define REX_NEST_SHIFT 8
+
+typedef struct Nest_s
+{
+ int primary;
+ unsigned short none; /* for Nest_t.type[-1] */
+ unsigned short type[1];
+} Nest_t;
+
+/*
+ * REX_ALT catcher, solely to get control at the end of an
+ * alternative to keep records for comparing matches.
+ */
+
+typedef struct Alt_catch_s
+{
+ struct Rex_s* cont;
+} Alt_catch_t;
+
+typedef struct Group_catch_s
+{
+ struct Rex_s* cont;
+ regoff_t* eo;
+} Group_catch_t;
+
+typedef struct Behind_catch_s
+{
+ struct Rex_s* cont;
+ unsigned char* beg;
+ unsigned char* end;
+} Behind_catch_t;
+
+/*
+ * REX_NEG catcher determines what string lengths can be matched,
+ * then Neg investigates continuations of other lengths.
+ * This is inefficient. For !POSITIONS expressions, we can do better:
+ * since matches to rex will be enumerated in decreasing order,
+ * we can investigate continuations whenever a length is skipped.
+ */
+
+typedef struct Neg_catch_s
+{
+ unsigned char* beg;
+ unsigned char* index;
+} Neg_catch_t;
+
+/*
+ * REX_REP catcher. One is created on the stack for
+ * each iteration of a complex repetition.
+ */
+
+typedef struct Rep_catch_s
+{
+ struct Rex_s* cont;
+ struct Rex_s* ref;
+ unsigned char* beg;
+ int n;
+} Rep_catch_t;
+
+/*
+ * data structure for an alternation of pure strings
+ * son points to a subtree of all strings with a common
+ * prefix ending in character c. sib links alternate
+ * letters in the same position of a word. end=1 if
+ * some word ends with c. the order of strings is
+ * irrelevant, except long words must be investigated
+ * before short ones.
+ */
+
+typedef struct Trie_node_s
+{
+ unsigned char c;
+ unsigned char end;
+ struct Trie_node_s* son;
+ struct Trie_node_s* sib;
+} Trie_node_t;
+
+typedef struct Trie_s
+{
+ Trie_node_t** root;
+ int min;
+ int max;
+} Trie_t;
+
+/*
+ * Rex_t is a node in a regular expression
+ */
+
+typedef struct Rex_s
+{
+ unsigned char type; /* node type */
+ unsigned char marked; /* already marked */
+ short serial; /* subpattern number */
+ regflags_t flags; /* scoped flags */
+ int explicit; /* scoped explicit match*/
+ struct Rex_s* next; /* remaining parts */
+ int lo; /* lo dup count */
+ int hi; /* hi dup count */
+ unsigned char* map; /* fold and/or ccode map*/
+ union
+ {
+ Alt_catch_t alt_catch; /* alt catcher */
+ Bm_t bm; /* bm */
+ Behind_catch_t behind_catch; /* behind catcher */
+ Set_t* charclass; /* char class */
+ Collate_t collate; /* collation class */
+ Cond_t cond_catch; /* cond catcher */
+ Conj_left_t conj_left; /* conj left catcher */
+ Conj_right_t conj_right; /* conj right catcher */
+ void* data; /* data after Rex_t */
+ Exec_t exec; /* re.re_exec() args */
+ Group_t group; /* a|b or rep */
+ Group_catch_t group_catch; /* group catcher */
+ Neg_catch_t neg_catch; /* neg catcher */
+ Nest_t nest; /* nested match */
+ unsigned char onechar; /* single char */
+ Rep_catch_t rep_catch; /* rep catcher */
+ String_t string; /* string/kmp */
+ Trie_t trie; /* trie */
+ } re;
+} Rex_t;
+
+typedef struct reglib_s /* library private regex_t info */
+{
+ struct Rex_s* rex; /* compiled expression */
+ regdisc_t* disc; /* REG_DISCIPLINE discipline */
+ const regex_t* regex; /* from regexec */
+ unsigned char* beg; /* beginning of string */
+ unsigned char* end; /* end of string */
+ Vector_t* pos; /* posns of certain subpatterns */
+ Vector_t* bestpos; /* ditto for best match */
+ regmatch_t* match; /* subexrs in current match */
+ regmatch_t* best; /* ditto in best match yet */
+ Stk_pos_t stk; /* exec stack pos */
+ size_t min; /* minimum match length */
+ size_t nsub; /* internal re_nsub */
+ regflags_t flags; /* flags from regcomp() */
+ int error; /* last error */
+ int explicit; /* explicit match on this char */
+ int leading; /* leading match on this char */
+ int refs; /* regcomp()+regdup() references*/
+ Rex_t done; /* the last continuation */
+ regstat_t stats; /* for regstat() */
+ unsigned char fold[UCHAR_MAX+1]; /* REG_ICASE map */
+ unsigned char hard; /* hard comp */
+ unsigned char once; /* if 1st parse fails, quit */
+ unsigned char separate; /* cannot combine */
+ unsigned char stack; /* hard comp or exec */
+ unsigned char sub; /* re_sub is valid */
+ unsigned char test; /* debug/test bitmask */
+} Env_t;
+
+typedef struct State_s /* shared state */
+{
+ regmatch_t nomatch;
+ struct
+ {
+ unsigned char key;
+ short val[15];
+ } escape[52];
+ short* magic[UCHAR_MAX+1];
+ regdisc_t disc;
+ int fatal;
+ int initialized;
+ Dt_t* attrs;
+ Dt_t* names;
+ Dtdisc_t dtdisc;
+} State_t;
+
+extern State_t state;
+
+extern void* alloc(regdisc_t*, void*, size_t);
+extern regclass_t classfun(int);
+extern void drop(regdisc_t*, Rex_t*);
+extern int fatal(regdisc_t*, int, const char*);
+
+#endif
diff --git a/src/lib/libast/regex/regnexec.c b/src/lib/libast/regex/regnexec.c
new file mode 100644
index 0000000..0c3a0aa
--- /dev/null
+++ b/src/lib/libast/regex/regnexec.c
@@ -0,0 +1,2045 @@
+/***********************************************************************
+* *
+* This software is part of the ast package *
+* Copyright (c) 1985-2011 AT&T Intellectual Property *
+* and is licensed under the *
+* Eclipse Public License, Version 1.0 *
+* by AT&T Intellectual Property *
+* *
+* A copy of the License is available at *
+* http://www.eclipse.org/org/documents/epl-v10.html *
+* (with md5 checksum b35adb5213ca9657e911e9befb180842) *
+* *
+* Information and Software Systems Research *
+* AT&T Research *
+* Florham Park NJ *
+* *
+* Glenn Fowler <gsf@research.att.com> *
+* David Korn <dgk@research.att.com> *
+* Phong Vo <kpv@research.att.com> *
+* *
+***********************************************************************/
+#pragma prototyped
+
+/*
+ * posix regex executor
+ * single sized-record interface
+ */
+
+#include "reglib.h"
+
+#if _AST_REGEX_DEBUG
+
+#define DEBUG_TEST(f,y,n) ((debug&(debug_flag=f))?(y):(n))
+#define DEBUG_CODE(f,y,n) do if(debug&(f)){y}else{n} while(0)
+#define DEBUG_INIT() do { char* t; if (!debug) { debug = 0x80000000; if (t = getenv("_AST_regex_exec_debug")) debug |= strtoul(t, NiL, 0); } } while (0)
+
+static unsigned long debug;
+static unsigned long debug_flag;
+
+static const char* rexnames[] =
+{
+ "REX_NULL",
+ "REX_ALT",
+ "REX_ALT_CATCH",
+ "REX_BACK",
+ "REX_BEG",
+ "REX_BEG_STR",
+ "REX_BM",
+ "REX_CAT",
+ "REX_CLASS",
+ "REX_COLL_CLASS",
+ "REX_CONJ",
+ "REX_CONJ_LEFT",
+ "REX_CONJ_RIGHT",
+ "REX_DONE",
+ "REX_DOT",
+ "REX_END",
+ "REX_END_STR",
+ "REX_EXEC",
+ "REX_FIN_STR",
+ "REX_GROUP",
+ "REX_GROUP_CATCH",
+ "REX_GROUP_AHEAD",
+ "REX_GROUP_AHEAD_CATCH",
+ "REX_GROUP_AHEAD_NOT",
+ "REX_GROUP_BEHIND",
+ "REX_GROUP_BEHIND_CATCH",
+ "REX_GROUP_BEHIND_NOT",
+ "REX_GROUP_BEHIND_NOT_CATCH",
+ "REX_GROUP_COND",
+ "REX_GROUP_COND_CATCH",
+ "REX_GROUP_CUT",
+ "REX_GROUP_CUT_CATCH",
+ "REX_KMP",
+ "REX_NEG",
+ "REX_NEG_CATCH",
+ "REX_NEST",
+ "REX_ONECHAR",
+ "REX_REP",
+ "REX_REP_CATCH",
+ "REX_STRING",
+ "REX_TRIE",
+ "REX_WBEG",
+ "REX_WEND",
+ "REX_WORD",
+ "REX_WORD_NOT"
+};
+
+static const char* rexname(Rex_t* rex)
+{
+ if (!rex)
+ return "NIL";
+ if (rex->type >= elementsof(rexnames))
+ return "ERROR";
+ return rexnames[rex->type];
+}
+
+#else
+
+#define DEBUG_INIT()
+#define DEBUG_TEST(f,y,n) (n)
+#define DEBUG_CODE(f,y,n) do {n} while(0)
+
+#endif
+
+#define BEG_ALT 1 /* beginning of an alt */
+#define BEG_ONE 2 /* beginning of one iteration of a rep */
+#define BEG_REP 3 /* beginning of a repetition */
+#define BEG_SUB 4 /* beginning of a subexpression */
+#define END_ANY 5 /* end of any of above */
+
+/*
+ * returns from parse()
+ */
+
+#define NONE 0 /* no parse found */
+#define GOOD 1 /* some parse was found */
+#define CUT 2 /* no match and no backtrack */
+#define BEST 3 /* an unbeatable parse was found */
+#define BAD 4 /* error ocurred */
+
+/*
+ * REG_SHELL_DOT test
+ */
+
+#define LEADING(e,r,s) (*(s)==(e)->leading&&((s)==(e)->beg||*((s)-1)==(r)->explicit))
+
+/*
+ * Pos_t is for comparing parses. An entry is made in the
+ * array at the beginning and at the end of each Group_t,
+ * each iteration in a Group_t, and each Binary_t.
+ */
+
+typedef struct
+{
+ unsigned char* p; /* where in string */
+ size_t length; /* length in string */
+ short serial; /* preorder subpattern number */
+ short be; /* which end of pair */
+} Pos_t;
+
+/* ===== begin library support ===== */
+
+#define vector(t,v,i) (((i)<(v)->max)?(t*)((v)->vec+(i)*(v)->siz):(t*)vecseek(&(v),i))
+
+static Vector_t*
+vecopen(int inc, int siz)
+{
+ Vector_t* v;
+ Stk_t* sp;
+
+ if (inc <= 0)
+ inc = 16;
+ if (!(sp = stkopen(STK_SMALL|STK_NULL)))
+ return 0;
+ if (!(v = (Vector_t*)stkseek(sp, sizeof(Vector_t) + inc * siz)))
+ {
+ stkclose(sp);
+ return 0;
+ }
+ v->stk = sp;
+ v->vec = (char*)v + sizeof(Vector_t);
+ v->max = v->inc = inc;
+ v->siz = siz;
+ v->cur = 0;
+ return v;
+}
+
+static void*
+vecseek(Vector_t** p, int index)
+{
+ Vector_t* v = *p;
+
+ if (index >= v->max)
+ {
+ while ((v->max += v->inc) <= index);
+ if (!(v = (Vector_t*)stkseek(v->stk, sizeof(Vector_t) + v->max * v->siz)))
+ return 0;
+ *p = v;
+ v->vec = (char*)v + sizeof(Vector_t);
+ }
+ return v->vec + index * v->siz;
+}
+
+static void
+vecclose(Vector_t* v)
+{
+ if (v)
+ stkclose(v->stk);
+}
+
+typedef struct
+{
+ Stk_pos_t pos;
+ char data[1];
+} Stk_frame_t;
+
+#define stknew(s,p) ((p)->offset=stktell(s),(p)->base=stkfreeze(s,0))
+#define stkold(s,p) stkset(s,(p)->base,(p)->offset)
+
+#define stkframe(s) (*((Stk_frame_t**)stktop(s)-1))
+#define stkdata(s,t) ((t*)stkframe(s)->data)
+#define stkpop(s) stkold(s,&(stkframe(s)->pos))
+
+static void*
+stkpush(Stk_t* sp, size_t size)
+{
+ Stk_frame_t* f;
+ Stk_pos_t p;
+
+ stknew(sp, &p);
+ size = sizeof(Stk_frame_t) + sizeof(size_t) + size - 1;
+ if (!(f = (Stk_frame_t*)stkalloc(sp, sizeof(Stk_frame_t) + sizeof(Stk_frame_t*) + size - 1)))
+ return 0;
+ f->pos = p;
+ stkframe(sp) = f;
+ return f->data;
+}
+
+/* ===== end library support ===== */
+
+/*
+ * Match_frame_t is for saving and restoring match records
+ * around alternate attempts, so that fossils will not be
+ * left in the match array. These are the only entries in
+ * the match array that are not otherwise guaranteed to
+ * have current data in them when they get used.
+ */
+
+typedef struct
+{
+ size_t size;
+ regmatch_t* match;
+ regmatch_t save[1];
+} Match_frame_t;
+
+#define matchpush(e,x) ((x)->re.group.number?_matchpush(e,x):0)
+#define matchcopy(e,x) do if ((x)->re.group.number) { Match_frame_t* fp = (void*)stkframe(stkstd)->data; memcpy(fp->match, fp->save, fp->size); } while (0)
+#define matchpop(e,x) do if ((x)->re.group.number) { Match_frame_t* fp = (void*)stkframe(stkstd)->data; memcpy(fp->match, fp->save, fp->size); stkpop(stkstd); } while (0)
+
+#define pospop(e) (--(e)->pos->cur)
+
+/*
+ * allocate a frame and push a match onto the stack
+ */
+
+static int
+_matchpush(Env_t* env, Rex_t* rex)
+{
+ Match_frame_t* f;
+ regmatch_t* m;
+ regmatch_t* e;
+ regmatch_t* s;
+ int num;
+
+ if (rex->re.group.number <= 0 || (num = rex->re.group.last - rex->re.group.number + 1) <= 0)
+ num = 0;
+ if (!(f = (Match_frame_t*)stkpush(stkstd, sizeof(Match_frame_t) + (num - 1) * sizeof(regmatch_t))))
+ {
+ env->error = REG_ESPACE;
+ return 1;
+ }
+ f->size = num * sizeof(regmatch_t);
+ f->match = m = env->match + rex->re.group.number;
+ e = m + num;
+ s = f->save;
+ while (m < e)
+ {
+ *s++ = *m;
+ *m++ = state.nomatch;
+ }
+ return 0;
+}
+
+/*
+ * allocate a frame and push a pos onto the stack
+ */
+
+static int
+pospush(Env_t* env, Rex_t* rex, unsigned char* p, int be)
+{
+ Pos_t* pos;
+
+ if (!(pos = vector(Pos_t, env->pos, env->pos->cur)))
+ {
+ env->error = REG_ESPACE;
+ return 1;
+ }
+ pos->serial = rex->serial;
+ pos->p = p;
+ pos->be = be;
+ env->pos->cur++;
+ return 0;
+}
+
+/*
+ * two matches are known to have the same length
+ * os is start of old pos array, ns is start of new,
+ * oend and nend are end+1 pointers to ends of arrays.
+ * oe and ne are ends (not end+1) of subarrays.
+ * returns 1 if new is better, -1 if old, else 0.
+ */
+
+static int
+better(Env_t* env, Pos_t* os, Pos_t* ns, Pos_t* oend, Pos_t* nend, int level)
+{
+ Pos_t* oe;
+ Pos_t* ne;
+ int k;
+ int n;
+
+ if (env->error)
+ return -1;
+ for (;;)
+ {
+ DEBUG_CODE(0x0080,{sfprintf(sfstdout, " %-*.*sold ", (level + 3) * 4, (level + 3) * 4, "");for (oe = os; oe < oend; oe++)sfprintf(sfstdout, "<%d,%d,%d>", oe->p - env->beg, oe->serial, oe->be);sfprintf(sfstdout, "\n %-*.*snew ", (level + 3) * 4, (level + 3) * 4, "");for (oe = ns; oe < nend; oe++)sfprintf(sfstdout, "<%d,%d,%d>", oe->p - env->beg, oe->serial, oe->be);sfprintf(sfstdout, "\n");},{0;});
+ if (ns >= nend)
+ return DEBUG_TEST(0x8000,(os < oend),(0));
+ if (os >= oend)
+ return DEBUG_TEST(0x8000,(-1),(1));
+ n = os->serial;
+ if (ns->serial > n)
+ return -1;
+ if (n > ns->serial)
+ {
+ env->error = REG_PANIC;
+ return -1;
+ }
+ if (ns->p > os->p)
+ return 1;
+ if (os->p > ns->p)
+ return -1;
+ oe = os;
+ k = 0;
+ for (;;)
+ if ((++oe)->serial == n)
+ {
+ if (oe->be != END_ANY)
+ k++;
+ else if (k-- <= 0)
+ break;
+ }
+ ne = ns;
+ k = 0;
+ for (;;)
+ if ((++ne)->serial == n)
+ {
+ if (ne->be != END_ANY)
+ k++;
+ else if (k-- <= 0)
+ break;
+ }
+ if (ne->p > oe->p)
+ return 1;
+ if (oe->p > ne->p)
+ return -1;
+ if (k = better(env, os + 1, ns + 1, oe, ne, level + 1))
+ return k;
+ os = oe + 1;
+ ns = ne + 1;
+ }
+}
+
+#if _AST_REGEX_DEBUG
+
+static void
+showmatch(regmatch_t* p)
+{
+ sfputc(sfstdout, '(');
+ if (p->rm_so < 0)
+ sfputc(sfstdout, '?');
+ else
+ sfprintf(sfstdout, "%d", p->rm_so);
+ sfputc(sfstdout, ',');
+ if (p->rm_eo < 0)
+ sfputc(sfstdout, '?');
+ else
+ sfprintf(sfstdout, "%d", p->rm_eo);
+ sfputc(sfstdout, ')');
+}
+
+static int
+_better(Env_t* env, Pos_t* os, Pos_t* ns, Pos_t* oend, Pos_t* nend, int level)
+{
+ int i;
+
+ DEBUG_CODE(0x0040,{sfprintf(sfstdout, "AHA better old ");for (i = 0; i <= env->nsub; i++)showmatch(&env->best[i]);sfprintf(sfstdout, "\n new ");for (i = 0; i <= env->nsub; i++)showmatch(&env->match[i]);sfprintf(sfstdout, "\n");},{0;});
+ i = better(env, os, ns, oend, nend, 0);
+ DEBUG_TEST(0x0040,(sfprintf(sfstdout, " %s\n", i <= 0 ? "OLD" : "NEW")),(0));
+ return i;
+}
+
+#define better _better
+
+#endif
+
+#define follow(e,r,c,s) ((r)->next?parse(e,(r)->next,c,s):(c)?parse(e,c,0,s):BEST)
+
+static int parse(Env_t*, Rex_t*, Rex_t*, unsigned char*);
+
+static int
+parserep(Env_t* env, Rex_t* rex, Rex_t* cont, unsigned char* s, int n)
+{
+ int i;
+ int r = NONE;
+ Rex_t catcher;
+
+ DEBUG_TEST(0x0010,(sfprintf(sfstdout, "AHA#%04d 0x%04x parserep %s %d %d %d %d `%-.*s'\n", __LINE__, debug_flag, rexname(rex->re.group.expr.rex), rex->re.group.number, rex->lo, n, rex->hi, env->end - s, s)),(0));
+ if ((rex->flags & REG_MINIMAL) && n >= rex->lo && n < rex->hi)
+ {
+ if (env->stack && pospush(env, rex, s, END_ANY))
+ return BAD;
+ i = follow(env, rex, cont, s);
+ if (env->stack)
+ pospop(env);
+ switch (i)
+ {
+ case BAD:
+ return BAD;
+ case CUT:
+ return CUT;
+ case BEST:
+ case GOOD:
+ return BEST;
+ }
+ }
+ if (n < rex->hi)
+ {
+ catcher.type = REX_REP_CATCH;
+ catcher.serial = rex->serial;
+ catcher.re.rep_catch.ref = rex;
+ catcher.re.rep_catch.cont = cont;
+ catcher.re.rep_catch.beg = s;
+ catcher.re.rep_catch.n = n + 1;
+ catcher.next = rex->next;
+ if (n == 0)
+ rex->re.rep_catch.beg = s;
+ if (env->stack)
+ {
+ if (matchpush(env, rex))
+ return BAD;
+ if (pospush(env, rex, s, BEG_ONE))
+ return BAD;
+DEBUG_TEST(0x0004,(sfprintf(sfstdout,"AHA#%04d 0x%04x PUSH %d (%d,%d)(%d,%d)(%d,%d) (%d,%d)(%d,%d)(%d,%d)\n", __LINE__, debug_flag, rex->re.group.number, env->best[0].rm_so, env->best[0].rm_eo, env->best[1].rm_so, env->best[1].rm_eo, env->best[2].rm_so, env->best[2].rm_eo, env->match[0].rm_so, env->match[0].rm_eo, env->match[1].rm_so, env->match[1].rm_eo, env->match[2].rm_so, env->match[2].rm_eo)),(0));
+ }
+ r = parse(env, rex->re.group.expr.rex, &catcher, s);
+ DEBUG_TEST(0x0010,(sfprintf(sfstdout, "AHA#%04d 0x%04x parserep parse %d %d `%-.*s'\n", __LINE__, debug_flag, rex->re.group.number, r, env->end - s, s)),(0));
+ if (env->stack)
+ {
+ pospop(env);
+ matchpop(env, rex);
+DEBUG_TEST(0x0004,(sfprintf(sfstdout,"AHA#%04d 0x%04x POP %d %d (%d,%d)(%d,%d)(%d,%d) (%d,%d)(%d,%d)(%d,%d)\n", __LINE__, debug_flag, rex->re.group.number, r, env->best[0].rm_so, env->best[0].rm_eo, env->best[1].rm_so, env->best[1].rm_eo, env->best[2].rm_so, env->best[2].rm_eo, env->match[0].rm_so, env->match[0].rm_eo, env->match[1].rm_so, env->match[1].rm_eo, env->match[2].rm_so, env->match[2].rm_eo)),(0));
+ }
+ switch (r)
+ {
+ case BAD:
+ return BAD;
+ case BEST:
+ return BEST;
+ case CUT:
+ r = NONE;
+ break;
+ case GOOD:
+ if (rex->flags & REG_MINIMAL)
+ return BEST;
+ r = GOOD;
+ break;
+ }
+ }
+ if (n < rex->lo)
+ return r;
+ if (!(rex->flags & REG_MINIMAL) || n >= rex->hi)
+ {
+ if (env->stack && pospush(env, rex, s, END_ANY))
+ return BAD;
+ i = follow(env, rex, cont, s);
+ if (env->stack)
+ pospop(env);
+ switch (i)
+ {
+ case BAD:
+ r = BAD;
+ break;
+ case CUT:
+ r = CUT;
+ break;
+ case BEST:
+ r = BEST;
+ break;
+ case GOOD:
+ r = (rex->flags & REG_MINIMAL) ? BEST : GOOD;
+ break;
+ }
+ }
+ return r;
+}
+
+static int
+parsetrie(Env_t* env, Trie_node_t* x, Rex_t* rex, Rex_t* cont, unsigned char* s)
+{
+ unsigned char* p;
+ int r;
+
+ if (p = rex->map)
+ {
+ for (;;)
+ {
+ if (s >= env->end)
+ return NONE;
+ while (x->c != p[*s])
+ if (!(x = x->sib))
+ return NONE;
+ if (x->end)
+ break;
+ x = x->son;
+ s++;
+ }
+ }
+ else
+ {
+ for (;;)
+ {
+ if (s >= env->end)
+ return NONE;
+ while (x->c != *s)
+ if (!(x = x->sib))
+ return NONE;
+ if (x->end)
+ break;
+ x = x->son;
+ s++;
+ }
+ }
+ s++;
+ if (rex->flags & REG_MINIMAL)
+ switch (follow(env, rex, cont, s))
+ {
+ case BAD:
+ return BAD;
+ case CUT:
+ return CUT;
+ case BEST:
+ case GOOD:
+ return BEST;
+ }
+ if (x->son)
+ switch (parsetrie(env, x->son, rex, cont, s))
+ {
+ case BAD:
+ return BAD;
+ case CUT:
+ return CUT;
+ case BEST:
+ return BEST;
+ case GOOD:
+ if (rex->flags & REG_MINIMAL)
+ return BEST;
+ r = GOOD;
+ break;
+ default:
+ r = NONE;
+ break;
+ }
+ else
+ r = NONE;
+ if (!(rex->flags & REG_MINIMAL))
+ switch (follow(env, rex, cont, s))
+ {
+ case BAD:
+ return BAD;
+ case CUT:
+ return CUT;
+ case BEST:
+ return BEST;
+ case GOOD:
+ return GOOD;
+ }
+ return r;
+}
+
+static int
+collelt(register Celt_t* ce, char* key, int c, int x)
+{
+ Ckey_t elt;
+
+ mbxfrm(elt, key, COLL_KEY_MAX);
+ for (;; ce++)
+ {
+ switch (ce->typ)
+ {
+ case COLL_call:
+ if (!x && (*ce->fun)(c))
+ return 1;
+ continue;
+ case COLL_char:
+ if (!strcmp((char*)ce->beg, (char*)elt))
+ return 1;
+ continue;
+ case COLL_range:
+ if (strcmp((char*)ce->beg, (char*)elt) <= ce->min && strcmp((char*)elt, (char*)ce->end) <= ce->max)
+ return 1;
+ continue;
+ case COLL_range_lc:
+ if (strcmp((char*)ce->beg, (char*)elt) <= ce->min && strcmp((char*)elt, (char*)ce->end) <= ce->max && (iswlower(c) || !iswupper(c)))
+ return 1;
+ continue;
+ case COLL_range_uc:
+ if (strcmp((char*)ce->beg, (char*)elt) <= ce->min && strcmp((char*)elt, (char*)ce->end) <= ce->max && (iswupper(c) || !iswlower(c)))
+ return 1;
+ continue;
+ }
+ break;
+ }
+ return 0;
+}
+
+static int
+collic(register Celt_t* ce, char* key, register char* nxt, int c, int x)
+{
+ if (!x)
+ {
+ if (collelt(ce, key, c, x))
+ return 1;
+ if (iswlower(c))
+ c = towupper(c);
+ else if (iswupper(c))
+ c = towlower(c);
+ else
+ return 0;
+ x = mbconv(key, c);
+ key[x] = 0;
+ return collelt(ce, key, c, 0);
+ }
+ while (*nxt)
+ {
+ if (collic(ce, key, nxt + 1, c, x))
+ return 1;
+ if (islower(*nxt))
+ *nxt = toupper(*nxt);
+ else if (isupper(*nxt))
+ *nxt = tolower(*nxt);
+ else
+ return 0;
+ nxt++;
+ }
+ return collelt(ce, key, c, x);
+}
+
+static int
+collmatch(Rex_t* rex, unsigned char* s, unsigned char* e, unsigned char** p)
+{
+ unsigned char* t;
+ wchar_t c;
+ int w;
+ int r;
+ int x;
+ int ic;
+ Ckey_t key;
+ Ckey_t elt;
+
+ ic = (rex->flags & REG_ICASE);
+ if ((w = MBSIZE(s)) > 1)
+ {
+ memcpy((char*)key, (char*)s, w);
+ key[w] = 0;
+ t = s;
+ c = mbchar(t);
+#if !_lib_wctype
+ c &= 0xff;
+#endif
+ x = 0;
+ }
+ else
+ {
+ c = s[0];
+ if (ic && isupper(c))
+ c = tolower(c);
+ key[0] = c;
+ key[1] = 0;
+ if (isalpha(c))
+ {
+ x = e - s;
+ if (x > COLL_KEY_MAX)
+ x = COLL_KEY_MAX;
+ while (w < x)
+ {
+ c = s[w];
+ if (!isalpha(c))
+ break;
+ r = mbxfrm(elt, key, COLL_KEY_MAX);
+ if (ic && isupper(c))
+ c = tolower(c);
+ key[w] = c;
+ key[w + 1] = 0;
+ if (mbxfrm(elt, key, COLL_KEY_MAX) != r)
+ break;
+ w++;
+ }
+ }
+ key[w] = 0;
+ c = key[0];
+ x = w - 1;
+ }
+ r = 1;
+ for (;;)
+ {
+ if (ic ? collic(rex->re.collate.elements, (char*)key, (char*)key, c, x) : collelt(rex->re.collate.elements, (char*)key, c, x))
+ break;
+ if (!x)
+ {
+ r = 0;
+ break;
+ }
+ w = x--;
+ key[w] = 0;
+ }
+ *p = s + w;
+ return rex->re.collate.invert ? !r : r;
+}
+
+static unsigned char*
+nestmatch(register unsigned char* s, register unsigned char* e, const unsigned short* type, register int co)
+{
+ register int c;
+ register int cc;
+ unsigned int n;
+ int oc;
+
+ if (type[co] & (REX_NEST_literal|REX_NEST_quote))
+ {
+ n = (type[co] & REX_NEST_literal) ? REX_NEST_terminator : (REX_NEST_escape|REX_NEST_terminator);
+ while (s < e)
+ {
+ c = *s++;
+ if (c == co)
+ return s;
+ else if (type[c] & n)
+ {
+ if (s >= e || (type[c] & REX_NEST_terminator))
+ break;
+ s++;
+ }
+ }
+ }
+ else
+ {
+ cc = type[co] >> REX_NEST_SHIFT;
+ oc = type[co] & (REX_NEST_open|REX_NEST_close);
+ n = 1;
+ while (s < e)
+ {
+ c = *s++;
+ switch (type[c] & (REX_NEST_escape|REX_NEST_open|REX_NEST_close|REX_NEST_delimiter|REX_NEST_separator|REX_NEST_terminator))
+ {
+ case REX_NEST_delimiter:
+ case REX_NEST_terminator:
+ return oc ? 0 : s;
+ case REX_NEST_separator:
+ if (!oc)
+ return s;
+ break;
+ case REX_NEST_escape:
+ if (s >= e)
+ return 0;
+ s++;
+ break;
+ case REX_NEST_open|REX_NEST_close:
+ if (c == cc)
+ {
+ if (!--n)
+ return s;
+ }
+ /*FALLTHROUGH*/
+ case REX_NEST_open:
+ if (c == co)
+ {
+ if (!++n)
+ return 0;
+ }
+ else if (!(s = nestmatch(s, e, type, c)))
+ return 0;
+ break;
+ case REX_NEST_close:
+ if (c != cc)
+ return 0;
+ if (!--n)
+ return s;
+ break;
+ }
+ }
+ return (oc || !(type[UCHAR_MAX+1] & REX_NEST_terminator)) ? 0 : s;
+ }
+ return 0;
+}
+
+static int
+parse(Env_t* env, Rex_t* rex, Rex_t* cont, unsigned char* s)
+{
+ int c;
+ int d;
+ int i;
+ int m;
+ int n;
+ int r;
+ int* f;
+ unsigned char* p;
+ unsigned char* t;
+ unsigned char* b;
+ unsigned char* e;
+ char* u;
+ regmatch_t* o;
+ Trie_node_t* x;
+ Rex_t* q;
+ Rex_t catcher;
+ Rex_t next;
+
+ for (;;)
+ {
+DEBUG_TEST(0x0008,(sfprintf(sfstdout, "AHA#%04d 0x%04x parse %s `%-.*s'\n", __LINE__, debug_flag, rexname(rex), env->end - s, s)),(0));
+ switch (rex->type)
+ {
+ case REX_ALT:
+ if (env->stack)
+ {
+ if (matchpush(env, rex))
+ return BAD;
+ if (pospush(env, rex, s, BEG_ALT))
+ return BAD;
+ catcher.type = REX_ALT_CATCH;
+ catcher.serial = rex->serial;
+ catcher.re.alt_catch.cont = cont;
+ catcher.next = rex->next;
+ r = parse(env, rex->re.group.expr.binary.left, &catcher, s);
+ if (r < BEST || (rex->flags & REG_MINIMAL))
+ {
+ matchcopy(env, rex);
+ ((Pos_t*)env->pos->vec + env->pos->cur - 1)->serial = catcher.serial = rex->re.group.expr.binary.serial;
+ n = parse(env, rex->re.group.expr.binary.right, &catcher, s);
+ if (n != NONE)
+ r = n;
+ }
+ pospop(env);
+ matchpop(env, rex);
+ }
+ else
+ {
+ if ((r = parse(env, rex->re.group.expr.binary.left, cont, s)) == NONE)
+ r = parse(env, rex->re.group.expr.binary.right, cont, s);
+ if (r == GOOD)
+ r = BEST;
+ }
+ return r;
+ case REX_ALT_CATCH:
+ if (pospush(env, rex, s, END_ANY))
+ return BAD;
+ r = follow(env, rex, rex->re.alt_catch.cont, s);
+ pospop(env);
+ return r;
+ case REX_BACK:
+ o = &env->match[rex->lo];
+ if (o->rm_so < 0)
+ return NONE;
+ i = o->rm_eo - o->rm_so;
+ e = s + i;
+ if (e > env->end)
+ return NONE;
+ t = env->beg + o->rm_so;
+ if (!(p = rex->map))
+ {
+ while (s < e)
+ if (*s++ != *t++)
+ return NONE;
+ }
+ else if (!mbwide())
+ {
+ while (s < e)
+ if (p[*s++] != p[*t++])
+ return NONE;
+ }
+ else
+ {
+ while (s < e)
+ {
+ c = mbchar(s);
+ d = mbchar(t);
+ if (towupper(c) != towupper(d))
+ return NONE;
+ }
+ }
+ break;
+ case REX_BEG:
+ if ((!(rex->flags & REG_NEWLINE) || s <= env->beg || *(s - 1) != '\n') && ((env->flags & REG_NOTBOL) || s != env->beg))
+ return NONE;
+ break;
+ case REX_CLASS:
+ if (LEADING(env, rex, s))
+ return NONE;
+ n = rex->hi;
+ if (n > env->end - s)
+ n = env->end - s;
+ m = rex->lo;
+ if (m > n)
+ return NONE;
+ r = NONE;
+ if (!(rex->flags & REG_MINIMAL))
+ {
+ for (i = 0; i < n; i++)
+ if (!settst(rex->re.charclass, s[i]))
+ {
+ n = i;
+ break;
+ }
+ for (s += n; n-- >= m; s--)
+ switch (follow(env, rex, cont, s))
+ {
+ case BAD:
+ return BAD;
+ case CUT:
+ return CUT;
+ case BEST:
+ return BEST;
+ case GOOD:
+ r = GOOD;
+ break;
+ }
+ }
+ else
+ {
+ for (e = s + m; s < e; s++)
+ if (!settst(rex->re.charclass, *s))
+ return r;
+ e += n - m;
+ for (;;)
+ {
+ switch (follow(env, rex, cont, s))
+ {
+ case BAD:
+ return BAD;
+ case CUT:
+ return CUT;
+ case BEST:
+ case GOOD:
+ return BEST;
+ }
+ if (s >= e || !settst(rex->re.charclass, *s))
+ break;
+ s++;
+ }
+ }
+ return r;
+ case REX_COLL_CLASS:
+ if (LEADING(env, rex, s))
+ return NONE;
+ n = rex->hi;
+ if (n > env->end - s)
+ n = env->end - s;
+ m = rex->lo;
+ if (m > n)
+ return NONE;
+ r = NONE;
+ e = env->end;
+ if (!(rex->flags & REG_MINIMAL))
+ {
+ if (!(b = (unsigned char*)stkpush(stkstd, n)))
+ {
+ env->error = REG_ESPACE;
+ return BAD;
+ }
+ for (i = 0; s < e && i < n && collmatch(rex, s, e, &t); i++)
+ {
+ b[i] = t - s;
+ s = t;
+ }
+ for (; i-- >= rex->lo; s -= b[i])
+ switch (follow(env, rex, cont, s))
+ {
+ case BAD:
+ stkpop(stkstd);
+ return BAD;
+ case CUT:
+ stkpop(stkstd);
+ return CUT;
+ case BEST:
+ stkpop(stkstd);
+ return BEST;
+ case GOOD:
+ r = GOOD;
+ break;
+ }
+ stkpop(stkstd);
+ }
+ else
+ {
+ for (i = 0; i < m && s < e; i++, s = t)
+ if (!collmatch(rex, s, e, &t))
+ return r;
+ while (i++ <= n)
+ {
+ switch (follow(env, rex, cont, s))
+ {
+ case BAD:
+ return BAD;
+ case CUT:
+ return CUT;
+ case BEST:
+ case GOOD:
+ return BEST;
+ }
+ if (s >= e || !collmatch(rex, s, e, &s))
+ break;
+ }
+ }
+ return r;
+ case REX_CONJ:
+ next.type = REX_CONJ_RIGHT;
+ next.re.conj_right.cont = cont;
+ next.next = rex->next;
+ catcher.type = REX_CONJ_LEFT;
+ catcher.re.conj_left.right = rex->re.group.expr.binary.right;
+ catcher.re.conj_left.cont = &next;
+ catcher.re.conj_left.beg = s;
+ catcher.next = 0;
+ return parse(env, rex->re.group.expr.binary.left, &catcher, s);
+ case REX_CONJ_LEFT:
+ rex->re.conj_left.cont->re.conj_right.end = s;
+ cont = rex->re.conj_left.cont;
+ s = rex->re.conj_left.beg;
+ rex = rex->re.conj_left.right;
+ continue;
+ case REX_CONJ_RIGHT:
+ if (rex->re.conj_right.end != s)
+ return NONE;
+ cont = rex->re.conj_right.cont;
+ break;
+ case REX_DONE:
+ if (!env->stack)
+ return BEST;
+ n = s - env->beg;
+ r = env->nsub;
+ DEBUG_TEST(0x0100,(sfprintf(sfstdout,"AHA#%04d 0x%04x %s (%d,%d)(%d,%d)(%d,%d)(%d,%d) (%d,%d)(%d,%d)\n", __LINE__, debug_flag, rexname(rex), env->best[0].rm_so, env->best[0].rm_eo, env->best[1].rm_so, env->best[1].rm_eo, env->best[2].rm_so, env->best[2].rm_eo, env->best[3].rm_so, env->best[3].rm_eo, env->match[0].rm_so, env->match[0].rm_eo, env->match[1].rm_so, env->match[1].rm_eo)),(0));
+ if ((i = env->best[0].rm_eo) >= 0)
+ {
+ if (rex->flags & REG_MINIMAL)
+ {
+ if (n > i)
+ return GOOD;
+ }
+ else
+ {
+ if (n < i)
+ return GOOD;
+ }
+ if (n == i && better(env,
+ (Pos_t*)env->bestpos->vec,
+ (Pos_t*)env->pos->vec,
+ (Pos_t*)env->bestpos->vec+env->bestpos->cur,
+ (Pos_t*)env->pos->vec+env->pos->cur,
+ 0) <= 0)
+ return GOOD;
+ }
+ env->best[0].rm_eo = n;
+ memcpy(&env->best[1], &env->match[1], r * sizeof(regmatch_t));
+ n = env->pos->cur;
+ if (!vector(Pos_t, env->bestpos, n))
+ {
+ env->error = REG_ESPACE;
+ return BAD;
+ }
+ env->bestpos->cur = n;
+ memcpy(env->bestpos->vec, env->pos->vec, n * sizeof(Pos_t));
+ DEBUG_TEST(0x0100,(sfprintf(sfstdout,"AHA#%04d 0x%04x %s (%d,%d)(%d,%d)(%d,%d)(%d,%d) (%d,%d)(%d,%d)\n", __LINE__, debug_flag, rexname(rex), env->best[0].rm_so, env->best[0].rm_eo, env->best[1].rm_so, env->best[1].rm_eo, env->best[2].rm_so, env->best[2].rm_eo, env->best[3].rm_so, env->best[3].rm_eo, env->match[0].rm_so, env->match[0].rm_eo, env->match[1].rm_so, env->match[1].rm_eo)),(0));
+ return GOOD;
+ case REX_DOT:
+ if (LEADING(env, rex, s))
+ return NONE;
+ n = rex->hi;
+ if (n > env->end - s)
+ n = env->end - s;
+ m = rex->lo;
+ if (m > n)
+ return NONE;
+ if ((c = rex->explicit) >= 0 && !mbwide())
+ for (i = 0; i < n; i++)
+ if (s[i] == c)
+ {
+ n = i;
+ break;
+ }
+ r = NONE;
+ if (!(rex->flags & REG_MINIMAL))
+ {
+ if (!mbwide())
+ {
+ for (s += n; n-- >= m; s--)
+ switch (follow(env, rex, cont, s))
+ {
+ case BAD:
+ return BAD;
+ case CUT:
+ return CUT;
+ case BEST:
+ return BEST;
+ case GOOD:
+ r = GOOD;
+ break;
+ }
+ }
+ else
+ {
+ if (!(b = (unsigned char*)stkpush(stkstd, n)))
+ {
+ env->error = REG_ESPACE;
+ return BAD;
+ }
+ e = env->end;
+ for (i = 0; s < e && i < n && *s != c; i++)
+ s += b[i] = MBSIZE(s);
+ for (; i-- >= m; s -= b[i])
+ switch (follow(env, rex, cont, s))
+ {
+ case BAD:
+ stkpop(stkstd);
+ return BAD;
+ case CUT:
+ stkpop(stkstd);
+ return CUT;
+ case BEST:
+ stkpop(stkstd);
+ return BEST;
+ case GOOD:
+ r = GOOD;
+ break;
+ }
+ stkpop(stkstd);
+ }
+ }
+ else
+ {
+ if (!mbwide())
+ {
+ e = s + n;
+ for (s += m; s <= e; s++)
+ switch (follow(env, rex, cont, s))
+ {
+ case BAD:
+ return BAD;
+ case CUT:
+ return CUT;
+ case BEST:
+ case GOOD:
+ return BEST;
+ }
+ }
+ else
+ {
+ e = env->end;
+ for (i = 0; s < e && i < m && *s != c; i++)
+ s += MBSIZE(s);
+ if (i >= m)
+ for (; s <= e && i <= n; s += MBSIZE(s), i++)
+ switch (follow(env, rex, cont, s))
+ {
+ case BAD:
+ return BAD;
+ case CUT:
+ return CUT;
+ case BEST:
+ case GOOD:
+ return BEST;
+ }
+ }
+ }
+ return r;
+ case REX_END:
+ if ((!(rex->flags & REG_NEWLINE) || *s != '\n') && ((env->flags & REG_NOTEOL) || s < env->end))
+ return NONE;
+ break;
+ case REX_GROUP:
+DEBUG_TEST(0x0200,(sfprintf(sfstdout,"AHA#%04d 0x%04x parse %s `%-.*s'\n", __LINE__, debug_flag, rexname(rex), env->end - s, s)),(0));
+ if (env->stack)
+ {
+ if (rex->re.group.number)
+ env->match[rex->re.group.number].rm_so = s - env->beg;
+ if (pospush(env, rex, s, BEG_SUB))
+ return BAD;
+ catcher.re.group_catch.eo = rex->re.group.number ? &env->match[rex->re.group.number].rm_eo : (regoff_t*)0;
+ }
+ catcher.type = REX_GROUP_CATCH;
+ catcher.serial = rex->serial;
+ catcher.re.group_catch.cont = cont;
+ catcher.next = rex->next;
+ r = parse(env, rex->re.group.expr.rex, &catcher, s);
+ if (env->stack)
+ {
+ pospop(env);
+ if (rex->re.group.number)
+ env->match[rex->re.group.number].rm_so = -1;
+ }
+ return r;
+ case REX_GROUP_CATCH:
+DEBUG_TEST(0x0200,(sfprintf(sfstdout,"AHA#%04d 0x%04x parse %s=>%s `%-.*s'\n", __LINE__, debug_flag, rexname(rex), rexname(rex->re.group_catch.cont), env->end - s, s)),(0));
+ if (env->stack)
+ {
+ if (rex->re.group_catch.eo)
+ *rex->re.group_catch.eo = s - env->beg;
+ if (pospush(env, rex, s, END_ANY))
+ return BAD;
+ }
+ r = follow(env, rex, rex->re.group_catch.cont, s);
+ if (env->stack)
+ {
+ pospop(env);
+ if (rex->re.group_catch.eo)
+ *rex->re.group_catch.eo = -1;
+ }
+ return r;
+ case REX_GROUP_AHEAD:
+ catcher.type = REX_GROUP_AHEAD_CATCH;
+ catcher.flags = rex->flags;
+ catcher.serial = rex->serial;
+ catcher.re.rep_catch.beg = s;
+ catcher.re.rep_catch.cont = cont;
+ catcher.next = rex->next;
+ return parse(env, rex->re.group.expr.rex, &catcher, s);
+ case REX_GROUP_AHEAD_CATCH:
+ return follow(env, rex, rex->re.rep_catch.cont, rex->re.rep_catch.beg);
+ case REX_GROUP_AHEAD_NOT:
+ r = parse(env, rex->re.group.expr.rex, NiL, s);
+ if (r == NONE)
+ r = follow(env, rex, cont, s);
+ else if (r != BAD)
+ r = NONE;
+ return r;
+ case REX_GROUP_BEHIND:
+ if ((s - env->beg) < rex->re.group.size)
+ return NONE;
+ catcher.type = REX_GROUP_BEHIND_CATCH;
+ catcher.flags = rex->flags;
+ catcher.serial = rex->serial;
+ catcher.re.behind_catch.beg = s;
+ catcher.re.behind_catch.end = e = env->end;
+ catcher.re.behind_catch.cont = cont;
+ catcher.next = rex->next;
+ for (t = s - rex->re.group.size; t >= env->beg; t--)
+ {
+ env->end = s;
+ r = parse(env, rex->re.group.expr.rex, &catcher, t);
+ env->end = e;
+ if (r != NONE)
+ return r;
+ }
+ return NONE;
+ case REX_GROUP_BEHIND_CATCH:
+ if (s != rex->re.behind_catch.beg)
+ return NONE;
+ env->end = rex->re.behind_catch.end;
+ return follow(env, rex, rex->re.behind_catch.cont, rex->re.behind_catch.beg);
+ case REX_GROUP_BEHIND_NOT:
+ if ((s - env->beg) < rex->re.group.size)
+ r = NONE;
+ else
+ {
+ catcher.type = REX_GROUP_BEHIND_NOT_CATCH;
+ catcher.re.neg_catch.beg = s;
+ catcher.next = 0;
+ e = env->end;
+ env->end = s;
+ for (t = s - rex->re.group.size; t >= env->beg; t--)
+ {
+ r = parse(env, rex->re.group.expr.rex, &catcher, t);
+ if (r != NONE)
+ break;
+ }
+ env->end = e;
+ }
+ if (r == NONE)
+ r = follow(env, rex, cont, s);
+ else if (r != BAD)
+ r = NONE;
+ return r;
+ case REX_GROUP_BEHIND_NOT_CATCH:
+ return s == rex->re.neg_catch.beg ? GOOD : NONE;
+ case REX_GROUP_COND:
+ if (q = rex->re.group.expr.binary.right)
+ {
+ catcher.re.cond_catch.next[0] = q->re.group.expr.binary.right;
+ catcher.re.cond_catch.next[1] = q->re.group.expr.binary.left;
+ }
+ else
+ catcher.re.cond_catch.next[0] = catcher.re.cond_catch.next[1] = 0;
+ if (q = rex->re.group.expr.binary.left)
+ {
+ catcher.type = REX_GROUP_COND_CATCH;
+ catcher.flags = rex->flags;
+ catcher.serial = rex->serial;
+ catcher.re.cond_catch.yes = 0;
+ catcher.re.cond_catch.beg = s;
+ catcher.re.cond_catch.cont = cont;
+ catcher.next = rex->next;
+ r = parse(env, q, &catcher, s);
+ if (r == BAD || catcher.re.cond_catch.yes)
+ return r;
+ }
+ else if (!rex->re.group.size || rex->re.group.size > 0 && env->match[rex->re.group.size].rm_so >= 0)
+ r = GOOD;
+ else
+ r = NONE;
+ if (q = catcher.re.cond_catch.next[r != NONE])
+ {
+ catcher.type = REX_CAT;
+ catcher.flags = q->flags;
+ catcher.serial = q->serial;
+ catcher.re.group_catch.cont = cont;
+ catcher.next = rex->next;
+ return parse(env, q, &catcher, s);
+ }
+ return follow(env, rex, cont, s);
+ case REX_GROUP_COND_CATCH:
+ rex->re.cond_catch.yes = 1;
+ catcher.type = REX_CAT;
+ catcher.flags = rex->flags;
+ catcher.serial = rex->serial;
+ catcher.re.group_catch.cont = rex->re.cond_catch.cont;
+ catcher.next = rex->next;
+ return parse(env, rex->re.cond_catch.next[1], &catcher, rex->re.cond_catch.beg);
+ case REX_CAT:
+ return follow(env, rex, rex->re.group_catch.cont, s);
+ case REX_GROUP_CUT:
+ catcher.type = REX_GROUP_CUT_CATCH;
+ catcher.flags = rex->flags;
+ catcher.serial = rex->serial;
+ catcher.re.group_catch.cont = cont;
+ catcher.next = rex->next;
+ return parse(env, rex->re.group.expr.rex, &catcher, s);
+ case REX_GROUP_CUT_CATCH:
+ switch (r = follow(env, rex, rex->re.group_catch.cont, s))
+ {
+ case GOOD:
+ r = BEST;
+ break;
+ case NONE:
+ r = CUT;
+ break;
+ }
+ return r;
+ case REX_KMP:
+ f = rex->re.string.fail;
+ b = rex->re.string.base;
+ n = rex->re.string.size;
+ t = s;
+ e = env->end;
+ if (p = rex->map)
+ {
+ while (t + n <= e)
+ {
+ for (i = -1; t < e; t++)
+ {
+ while (i >= 0 && b[i+1] != p[*t])
+ i = f[i];
+ if (b[i+1] == p[*t])
+ i++;
+ if (i + 1 == n)
+ {
+ t++;
+ if (env->stack)
+ env->best[0].rm_so = t - s - n;
+ switch (follow(env, rex, cont, t))
+ {
+ case BAD:
+ return BAD;
+ case CUT:
+ return CUT;
+ case BEST:
+ case GOOD:
+ return BEST;
+ }
+ t -= n - 1;
+ break;
+ }
+ }
+ }
+ }
+ else
+ {
+ while (t + n <= e)
+ {
+ for (i = -1; t < e; t++)
+ {
+ while (i >= 0 && b[i+1] != *t)
+ i = f[i];
+ if (b[i+1] == *t)
+ i++;
+ if (i + 1 == n)
+ {
+ t++;
+ if (env->stack)
+ env->best[0].rm_so = t - s - n;
+ switch (follow(env, rex, cont, t))
+ {
+ case BAD:
+ return BAD;
+ case CUT:
+ return CUT;
+ case BEST:
+ case GOOD:
+ return BEST;
+ }
+ t -= n - 1;
+ break;
+ }
+ }
+ }
+ }
+ return NONE;
+ case REX_NEG:
+ if (LEADING(env, rex, s))
+ return NONE;
+ i = env->end - s;
+ n = ((i + 7) >> 3) + 1;
+ catcher.type = REX_NEG_CATCH;
+ catcher.re.neg_catch.beg = s;
+ if (!(p = (unsigned char*)stkpush(stkstd, n)))
+ return BAD;
+ memset(catcher.re.neg_catch.index = p, 0, n);
+ catcher.next = rex->next;
+ if (parse(env, rex->re.group.expr.rex, &catcher, s) == BAD)
+ r = BAD;
+ else
+ {
+ r = NONE;
+ for (; i >= 0; i--)
+ if (!bittst(p, i))
+ {
+ switch (follow(env, rex, cont, s + i))
+ {
+ case BAD:
+ r = BAD;
+ break;
+ case BEST:
+ r = BEST;
+ break;
+ case CUT:
+ r = CUT;
+ break;
+ case GOOD:
+ r = GOOD;
+ /*FALLTHROUGH*/
+ default:
+ continue;
+ }
+ break;
+ }
+ }
+ stkpop(stkstd);
+ return r;
+ case REX_NEG_CATCH:
+ bitset(rex->re.neg_catch.index, s - rex->re.neg_catch.beg);
+ return NONE;
+ case REX_NEST:
+ if (s >= env->end)
+ return NONE;
+ do
+ {
+ if ((c = *s++) == rex->re.nest.primary)
+ {
+ if (s >= env->end || !(s = nestmatch(s, env->end, rex->re.nest.type, c)))
+ return NONE;
+ break;
+ }
+ if (rex->re.nest.primary >= 0)
+ return NONE;
+ if (rex->re.nest.type[c] & (REX_NEST_delimiter|REX_NEST_separator|REX_NEST_terminator))
+ break;
+ if (!(s = nestmatch(s, env->end, rex->re.nest.type, c)))
+ return NONE;
+ } while (s < env->end && !(rex->re.nest.type[*(s-1)] & (REX_NEST_delimiter|REX_NEST_separator|REX_NEST_terminator)));
+ break;
+ case REX_NULL:
+ break;
+ case REX_ONECHAR:
+ n = rex->hi;
+ if (n > env->end - s)
+ n = env->end - s;
+ m = rex->lo;
+ if (m > n)
+ return NONE;
+ r = NONE;
+ c = rex->re.onechar;
+ if (!(rex->flags & REG_MINIMAL))
+ {
+ if (!mbwide())
+ {
+ if (p = rex->map)
+ {
+ for (i = 0; i < n; i++, s++)
+ if (p[*s] != c)
+ break;
+ }
+ else
+ {
+ for (i = 0; i < n; i++, s++)
+ if (*s != c)
+ break;
+ }
+ for (; i-- >= m; s--)
+ switch (follow(env, rex, cont, s))
+ {
+ case BAD:
+ return BAD;
+ case BEST:
+ return BEST;
+ case CUT:
+ return CUT;
+ case GOOD:
+ r = GOOD;
+ break;
+ }
+ }
+ else
+ {
+ if (!(b = (unsigned char*)stkpush(stkstd, n)))
+ {
+ env->error = REG_ESPACE;
+ return BAD;
+ }
+ e = env->end;
+ if (!(rex->flags & REG_ICASE))
+ {
+ for (i = 0; s < e && i < n; i++, s = t)
+ {
+ t = s;
+ if (mbchar(t) != c)
+ break;
+ b[i] = t - s;
+ }
+ }
+ else
+ {
+ for (i = 0; s < e && i < n; i++, s = t)
+ {
+ t = s;
+ if (towupper(mbchar(t)) != c)
+ break;
+ b[i] = t - s;
+ }
+ }
+ for (; i-- >= m; s -= b[i])
+ switch (follow(env, rex, cont, s))
+ {
+ case BAD:
+ stkpop(stkstd);
+ return BAD;
+ case BEST:
+ stkpop(stkstd);
+ return BEST;
+ case CUT:
+ stkpop(stkstd);
+ return CUT;
+ case GOOD:
+ r = GOOD;
+ break;
+ }
+ stkpop(stkstd);
+ }
+ }
+ else
+ {
+ if (!mbwide())
+ {
+ e = s + m;
+ if (p = rex->map)
+ {
+ for (; s < e; s++)
+ if (p[*s] != c)
+ return r;
+ e += n - m;
+ for (;;)
+ {
+ switch (follow(env, rex, cont, s))
+ {
+ case BAD:
+ return BAD;
+ case CUT:
+ return CUT;
+ case BEST:
+ case GOOD:
+ return BEST;
+ }
+ if (s >= e || p[*s++] != c)
+ break;
+ }
+ }
+ else
+ {
+ for (; s < e; s++)
+ if (*s != c)
+ return r;
+ e += n - m;
+ for (;;)
+ {
+ switch (follow(env, rex, cont, s))
+ {
+ case BAD:
+ return BAD;
+ case CUT:
+ return CUT;
+ case BEST:
+ case GOOD:
+ return BEST;
+ }
+ if (s >= e || *s++ != c)
+ break;
+ }
+ }
+ }
+ else
+ {
+ if (!(rex->flags & REG_ICASE))
+ {
+ for (i = 0; i < m && s < e; i++, s = t)
+ {
+ t = s;
+ if (mbchar(t) != c)
+ return r;
+ }
+ while (i++ <= n)
+ {
+ switch (follow(env, rex, cont, s))
+ {
+ case BAD:
+ return BAD;
+ case CUT:
+ return CUT;
+ case BEST:
+ case GOOD:
+ return BEST;
+ }
+ if (s >= e || mbchar(s) != c)
+ break;
+ }
+ }
+ else
+ {
+ for (i = 0; i < m && s < e; i++, s = t)
+ {
+ t = s;
+ if (towupper(mbchar(t)) != c)
+ return r;
+ }
+ while (i++ <= n)
+ {
+ switch (follow(env, rex, cont, s))
+ {
+ case BAD:
+ return BAD;
+ case CUT:
+ return CUT;
+ case BEST:
+ case GOOD:
+ return BEST;
+ }
+ if (s >= e || towupper(mbchar(s)) != c)
+ break;
+ }
+ }
+ }
+ }
+ return r;
+ case REX_REP:
+ if (env->stack && pospush(env, rex, s, BEG_REP))
+ return BAD;
+ r = parserep(env, rex, cont, s, 0);
+ if (env->stack)
+ pospop(env);
+ return r;
+ case REX_REP_CATCH:
+ DEBUG_TEST(0x0020,(sfprintf(sfstdout, "AHA#%04d 0x%04x %s n %d len %d s `%-.*s'\n", __LINE__, debug_flag, rexname(rex), rex->re.rep_catch.n, s - rex->re.rep_catch.beg, env->end - s, s)),(0));
+ if (env->stack && pospush(env, rex, s, END_ANY))
+ return BAD;
+ if (s == rex->re.rep_catch.beg && rex->re.rep_catch.n > rex->re.rep_catch.ref->lo)
+ {
+ /*
+ * optional empty iteration
+ */
+
+DEBUG_TEST(0x0002,(sfprintf(sfstdout, "AHA#%04d %p re.group.back=%d re.group.expr.rex=%s\n", __LINE__, rex->re.rep_catch.ref->re.group.expr.rex, rex->re.rep_catch.ref->re.group.expr.rex->re.group.back, rexname(rex->re.rep_catch.ref->re.group.expr.rex))),(0));
+ if (!env->stack || s != rex->re.rep_catch.ref->re.rep_catch.beg && !rex->re.rep_catch.ref->re.group.expr.rex->re.group.back)
+ r = NONE;
+ else if (pospush(env, rex, s, END_ANY))
+ r = BAD;
+ else
+ {
+ r = follow(env, rex, rex->re.rep_catch.cont, s);
+ pospop(env);
+ }
+ }
+ else
+ r = parserep(env, rex->re.rep_catch.ref, rex->re.rep_catch.cont, s, rex->re.rep_catch.n);
+ if (env->stack)
+ pospop(env);
+ return r;
+ case REX_STRING:
+DEBUG_TEST(0x0200,(sfprintf(sfstdout,"AHA#%04d 0x%04x parse %s \"%-.*s\" `%-.*s'\n", __LINE__, debug_flag, rexname(rex), rex->re.string.size, rex->re.string.base, env->end - s, s)),(0));
+ if (rex->re.string.size > (env->end - s))
+ return NONE;
+ t = rex->re.string.base;
+ e = t + rex->re.string.size;
+ if (!(p = rex->map))
+ {
+ while (t < e)
+ if (*s++ != *t++)
+ return NONE;
+ }
+ else if (!mbwide())
+ {
+ while (t < e)
+ if (p[*s++] != *t++)
+ return NONE;
+ }
+ else
+ {
+ while (t < e)
+ {
+ c = mbchar(s);
+ d = mbchar(t);
+ if (towupper(c) != d)
+ return NONE;
+ }
+ }
+ break;
+ case REX_TRIE:
+ if (((s + rex->re.trie.min) > env->end) || !(x = rex->re.trie.root[rex->map ? rex->map[*s] : *s]))
+ return NONE;
+ return parsetrie(env, x, rex, cont, s);
+ case REX_EXEC:
+ u = 0;
+ r = (*env->disc->re_execf)(env->regex, rex->re.exec.data, rex->re.exec.text, rex->re.exec.size, (const char*)s, env->end - s, &u, env->disc);
+ e = (unsigned char*)u;
+ if (e >= s && e <= env->end)
+ s = e;
+ switch (r)
+ {
+ case 0:
+ break;
+ case REG_NOMATCH:
+ return NONE;
+ default:
+ env->error = r;
+ return BAD;
+ }
+ break;
+ case REX_WBEG:
+ if (!isword(*s) || s > env->beg && isword(*(s - 1)))
+ return NONE;
+ break;
+ case REX_WEND:
+ if (isword(*s) || s > env->beg && !isword(*(s - 1)))
+ return NONE;
+ break;
+ case REX_WORD:
+ if (s > env->beg && isword(*(s - 1)) == isword(*s))
+ return NONE;
+ break;
+ case REX_WORD_NOT:
+ if (s == env->beg || isword(*(s - 1)) != isword(*s))
+ return NONE;
+ break;
+ case REX_BEG_STR:
+ if (s != env->beg)
+ return NONE;
+ break;
+ case REX_END_STR:
+ for (t = s; t < env->end && *t == '\n'; t++);
+ if (t < env->end)
+ return NONE;
+ break;
+ case REX_FIN_STR:
+ if (s < env->end)
+ return NONE;
+ break;
+ }
+ if (!(rex = rex->next))
+ {
+ if (!(rex = cont))
+ break;
+ cont = 0;
+ }
+ }
+ return GOOD;
+}
+
+#if _AST_REGEX_DEBUG
+
+static void
+listnode(Rex_t* e, int level)
+{
+ int i;
+
+ if (e)
+ {
+ do
+ {
+ for (i = 0; i < level; i++)
+ sfprintf(sfstderr, " ");
+ sfprintf(sfstderr, "%s\n", rexname(e));
+ switch (e->type)
+ {
+ case REX_ALT:
+ case REX_CONJ:
+ listnode(e->re.group.expr.binary.left, level + 1);
+ listnode(e->re.group.expr.binary.right, level + 1);
+ break;
+ case REX_GROUP:
+ case REX_GROUP_AHEAD:
+ case REX_GROUP_AHEAD_NOT:
+ case REX_GROUP_BEHIND:
+ case REX_GROUP_BEHIND_NOT:
+ case REX_GROUP_CUT:
+ case REX_NEG:
+ case REX_REP:
+ listnode(e->re.group.expr.rex, level + 1);
+ break;
+ }
+ } while (e = e->next);
+ }
+}
+
+static int
+list(Env_t* env, Rex_t* rex)
+{
+ sfprintf(sfstderr, "AHA regex hard=%d stack=%p\n", env->hard, env->stack);
+ if (rex)
+ listnode(rex, 1);
+ return 0;
+}
+
+#endif
+
+/*
+ * returning REG_BADPAT or REG_ESPACE is not explicitly
+ * countenanced by the standard
+ */
+
+int
+regnexec(const regex_t* p, const char* s, size_t len, size_t nmatch, regmatch_t* match, regflags_t flags)
+{
+ register int n;
+ register int i;
+ int j;
+ int k;
+ int m;
+ int advance;
+ Env_t* env;
+ Rex_t* e;
+
+ DEBUG_INIT();
+ DEBUG_TEST(0x0001,(sfprintf(sfstdout, "AHA#%04d 0x%04x regnexec %d 0x%08x `%-.*s'\n", __LINE__, debug_flag, nmatch, flags, len, s)),(0));
+ if (!p || !(env = p->env))
+ return REG_BADPAT;
+ if (!s)
+ return fatal(env->disc, REG_BADPAT, NiL);
+ if (len < env->min)
+ {
+ DEBUG_TEST(0x0080,(sfprintf(sfstdout, "AHA#%04d REG_NOMATCH %d %d\n", __LINE__, len, env->min)),(0));
+ return REG_NOMATCH;
+ }
+ env->regex = p;
+ env->beg = (unsigned char*)s;
+ env->end = env->beg + len;
+ stknew(stkstd, &env->stk);
+ env->flags &= ~REG_EXEC;
+ env->flags |= (flags & REG_EXEC);
+ advance = 0;
+ if (env->stack = env->hard || !(env->flags & REG_NOSUB) && nmatch)
+ {
+ n = env->nsub;
+ if (!(env->match = (regmatch_t*)stkpush(stkstd, 2 * (n + 1) * sizeof(regmatch_t))) ||
+ !env->pos && !(env->pos = vecopen(16, sizeof(Pos_t))) ||
+ !env->bestpos && !(env->bestpos = vecopen(16, sizeof(Pos_t))))
+ {
+ k = REG_ESPACE;
+ goto done;
+ }
+ env->pos->cur = env->bestpos->cur = 0;
+ env->best = &env->match[n + 1];
+ env->best[0].rm_so = 0;
+ env->best[0].rm_eo = -1;
+ for (i = 0; i <= n; i++)
+ env->match[i] = state.nomatch;
+ if (flags & REG_ADVANCE)
+ advance = 1;
+ }
+ DEBUG_TEST(0x1000,(list(env,env->rex)),(0));
+ k = REG_NOMATCH;
+ if ((e = env->rex)->type == REX_BM)
+ {
+ DEBUG_TEST(0x0080,(sfprintf(sfstdout, "AHA#%04d REX_BM\n", __LINE__)),(0));
+ if (len < e->re.bm.right)
+ {
+ DEBUG_TEST(0x0080,(sfprintf(sfstdout, "AHA#%04d REG_NOMATCH %d %d\n", __LINE__, len, e->re.bm.right)),(0));
+ goto done;
+ }
+ else if (!(flags & REG_LEFT))
+ {
+ register unsigned char* buf = (unsigned char*)s;
+ register size_t index = e->re.bm.left + e->re.bm.size;
+ register size_t mid = len - e->re.bm.right;
+ register size_t* skip = e->re.bm.skip;
+ register size_t* fail = e->re.bm.fail;
+ register Bm_mask_t** mask = e->re.bm.mask;
+ Bm_mask_t m;
+ size_t x;
+
+ DEBUG_TEST(0x0080,(sfprintf(sfstdout, "AHA#%04d REX_BM len=%d right=%d left=%d size=%d %d %d\n", __LINE__, len, e->re.bm.right, e->re.bm.left, e->re.bm.size, index, mid)),(0));
+ for (;;)
+ {
+ while ((index += skip[buf[index]]) < mid);
+ if (index < HIT)
+ {
+ DEBUG_TEST(0x0080,(sfprintf(sfstdout, "AHA#%04d REG_NOMATCH %d %d\n", __LINE__, index, HIT)),(0));
+ goto done;
+ }
+ index -= HIT;
+ m = mask[n = e->re.bm.size - 1][buf[index]];
+ do
+ {
+ if (!n--)
+ {
+ if (e->re.bm.back < 0)
+ goto possible;
+ if (advance)
+ {
+ i = index - e->re.bm.back;
+ s += i;
+ if (env->stack)
+ env->best[0].rm_so += i;
+ goto possible;
+ }
+ x = index;
+ if (index < e->re.bm.back)
+ index = 0;
+ else
+ index -= e->re.bm.back;
+ while (index <= x)
+ {
+ if ((i = parse(env, e->next, &env->done, buf + index)) != NONE)
+ {
+ if (env->stack)
+ env->best[0].rm_so = index;
+ n = env->nsub;
+ goto hit;
+ }
+ index++;
+ }
+ index += e->re.bm.size;
+ break;
+ }
+ } while (m &= mask[n][buf[--index]]);
+ if ((index += fail[n + 1]) >= len)
+ goto done;
+ }
+ }
+ possible:
+ n = env->nsub;
+ e = e->next;
+ }
+ j = env->once || (flags & REG_LEFT);
+ DEBUG_TEST(0x0080,(sfprintf(sfstdout, "AHA#%04d parse once=%d\n", __LINE__, j)),(0));
+ while ((i = parse(env, e, &env->done, (unsigned char*)s)) == NONE || advance && !env->best[0].rm_eo && !(advance = 0))
+ {
+ if (j)
+ goto done;
+ i = MBSIZE(s);
+ s += i;
+ if ((unsigned char*)s > env->end - env->min)
+ goto done;
+ if (env->stack)
+ env->best[0].rm_so += i;
+ }
+ if ((flags & REG_LEFT) && env->stack && env->best[0].rm_so)
+ goto done;
+ hit:
+ if (k = env->error)
+ goto done;
+ if (i == CUT)
+ {
+ k = env->error = REG_NOMATCH;
+ goto done;
+ }
+ if (!(env->flags & REG_NOSUB))
+ {
+ k = (env->flags & (REG_SHELL|REG_AUGMENTED)) == (REG_SHELL|REG_AUGMENTED);
+ for (i = j = m = 0; j < nmatch; i++)
+ if (!i || !k || (i & 1))
+ {
+ if (i > n)
+ match[j] = state.nomatch;
+ else
+ match[m = j] = env->best[i];
+ j++;
+ }
+ if (k)
+ {
+ while (m > 0 && match[m].rm_so == -1 && match[m].rm_eo == -1)
+ m--;
+ ((regex_t*)p)->re_nsub = m;
+ }
+ }
+ k = 0;
+ done:
+ stkold(stkstd, &env->stk);
+ env->stk.base = 0;
+ if (k > REG_NOMATCH)
+ fatal(p->env->disc, k, NiL);
+ return k;
+}
+
+void
+regfree(regex_t* p)
+{
+ Env_t* env;
+
+ if (p && (env = p->env))
+ {
+#if _REG_subcomp
+ if (env->sub)
+ {
+ regsubfree(p);
+ p->re_sub = 0;
+ }
+#endif
+ p->env = 0;
+ if (--env->refs <= 0 && !(env->disc->re_flags & REG_NOFREE))
+ {
+ drop(env->disc, env->rex);
+ if (env->pos)
+ vecclose(env->pos);
+ if (env->bestpos)
+ vecclose(env->bestpos);
+ if (env->stk.base)
+ stkold(stkstd, &env->stk);
+ alloc(env->disc, env, 0);
+ }
+ }
+}
diff --git a/src/lib/libast/regex/regrecord.c b/src/lib/libast/regex/regrecord.c
new file mode 100644
index 0000000..001d14c
--- /dev/null
+++ b/src/lib/libast/regex/regrecord.c
@@ -0,0 +1,34 @@
+/***********************************************************************
+* *
+* This software is part of the ast package *
+* Copyright (c) 1985-2011 AT&T Intellectual Property *
+* and is licensed under the *
+* Eclipse Public License, Version 1.0 *
+* by AT&T Intellectual Property *
+* *
+* A copy of the License is available at *
+* http://www.eclipse.org/org/documents/epl-v10.html *
+* (with md5 checksum b35adb5213ca9657e911e9befb180842) *
+* *
+* Information and Software Systems Research *
+* AT&T Research *
+* Florham Park NJ *
+* *
+* Glenn Fowler <gsf@research.att.com> *
+* David Korn <dgk@research.att.com> *
+* Phong Vo <kpv@research.att.com> *
+* *
+***********************************************************************/
+#pragma prototyped
+
+/*
+ * return 1 if regrexec() can be used
+ */
+
+#include "reglib.h"
+
+int
+regrecord(const regex_t* p)
+{
+ return p && p->env && p->env->rex->type == REX_BM;
+}
diff --git a/src/lib/libast/regex/regrexec.c b/src/lib/libast/regex/regrexec.c
new file mode 100644
index 0000000..6cb1272
--- /dev/null
+++ b/src/lib/libast/regex/regrexec.c
@@ -0,0 +1,145 @@
+/***********************************************************************
+* *
+* This software is part of the ast package *
+* Copyright (c) 1985-2011 AT&T Intellectual Property *
+* and is licensed under the *
+* Eclipse Public License, Version 1.0 *
+* by AT&T Intellectual Property *
+* *
+* A copy of the License is available at *
+* http://www.eclipse.org/org/documents/epl-v10.html *
+* (with md5 checksum b35adb5213ca9657e911e9befb180842) *
+* *
+* Information and Software Systems Research *
+* AT&T Research *
+* Florham Park NJ *
+* *
+* Glenn Fowler <gsf@research.att.com> *
+* David Korn <dgk@research.att.com> *
+* Phong Vo <kpv@research.att.com> *
+* *
+***********************************************************************/
+#pragma prototyped
+
+/*
+ * posix regex record executor
+ * multiple record sized-buffer interface
+ */
+
+#include "reglib.h"
+
+/*
+ * call regnexec() on records selected by Boyer-Moore
+ */
+
+int
+regrexec(const regex_t* p, const char* s, size_t len, size_t nmatch, regmatch_t* match, regflags_t flags, int sep, void* handle, regrecord_t record)
+{
+ register unsigned char* buf = (unsigned char*)s;
+ register unsigned char* beg;
+ register unsigned char* l;
+ register unsigned char* r;
+ register unsigned char* x;
+ register size_t* skip;
+ register size_t* fail;
+ register Bm_mask_t** mask;
+ register size_t index;
+ register int n;
+ unsigned char* end;
+ size_t mid;
+ int complete;
+ int exactlen;
+ int leftlen;
+ int rightlen;
+ int inv;
+ Bm_mask_t m;
+ Env_t* env;
+ Rex_t* e;
+
+ if (!s || !p || !(env = p->env) || (e = env->rex)->type != REX_BM)
+ return REG_BADPAT;
+ inv = (flags & REG_INVERT) != 0;
+ buf = beg = (unsigned char*)s;
+ end = buf + len;
+ mid = (len < e->re.bm.right) ? 0 : (len - e->re.bm.right);
+ skip = e->re.bm.skip;
+ fail = e->re.bm.fail;
+ mask = e->re.bm.mask;
+ complete = e->re.bm.complete && !nmatch;
+ exactlen = e->re.bm.size;
+ leftlen = e->re.bm.left + exactlen;
+ rightlen = exactlen + e->re.bm.right;
+ index = leftlen++;
+ for (;;)
+ {
+ while ((index += skip[buf[index]]) < mid);
+ if (index < HIT)
+ goto impossible;
+ index -= HIT;
+ m = mask[n = exactlen - 1][buf[index]];
+ do
+ {
+ if (!n--)
+ goto possible;
+ } while (m &= mask[n][buf[--index]]);
+ if ((index += fail[n + 1]) < len)
+ continue;
+ impossible:
+ if (inv)
+ {
+ l = r = buf + len;
+ goto invert;
+ }
+ n = 0;
+ goto done;
+ possible:
+ r = (l = buf + index) + exactlen;
+ while (l > beg)
+ if (*--l == sep)
+ {
+ l++;
+ break;
+ }
+ if ((r - l) < leftlen)
+ goto spanned;
+ while (r < end && *r != sep)
+ r++;
+ if ((r - (buf + index)) < rightlen)
+ goto spanned;
+ if (complete || (env->rex = ((r - l) > 128) ? e : e->next) && !(n = regnexec(p, (char*)l, r - l, nmatch, match, flags)))
+ {
+ if (inv)
+ {
+ invert:
+ x = beg;
+ while (beg < l)
+ {
+ while (x < l && *x != sep)
+ x++;
+ if (n = (*record)(handle, (char*)beg, x - beg))
+ goto done;
+ beg = ++x;
+ }
+ }
+ else if (n = (*record)(handle, (char*)l, r - l))
+ goto done;
+ if ((index = (r - buf) + leftlen) >= len)
+ {
+ n = (inv && (++r - buf) < len) ? (*record)(handle, (char*)r, (buf + len) - r): 0;
+ goto done;
+ }
+ beg = r + 1;
+ }
+ else if (n != REG_NOMATCH)
+ goto done;
+ else
+ {
+ spanned:
+ if ((index += exactlen) >= mid)
+ goto impossible;
+ }
+ }
+ done:
+ env->rex = e;
+ return n;
+}
diff --git a/src/lib/libast/regex/regstat.c b/src/lib/libast/regex/regstat.c
new file mode 100644
index 0000000..c7f725f
--- /dev/null
+++ b/src/lib/libast/regex/regstat.c
@@ -0,0 +1,53 @@
+/***********************************************************************
+* *
+* This software is part of the ast package *
+* Copyright (c) 1985-2011 AT&T Intellectual Property *
+* and is licensed under the *
+* Eclipse Public License, Version 1.0 *
+* by AT&T Intellectual Property *
+* *
+* A copy of the License is available at *
+* http://www.eclipse.org/org/documents/epl-v10.html *
+* (with md5 checksum b35adb5213ca9657e911e9befb180842) *
+* *
+* Information and Software Systems Research *
+* AT&T Research *
+* Florham Park NJ *
+* *
+* Glenn Fowler <gsf@research.att.com> *
+* David Korn <dgk@research.att.com> *
+* Phong Vo <kpv@research.att.com> *
+* *
+***********************************************************************/
+#pragma prototyped
+
+/*
+ * return p stat info
+ */
+
+#include "reglib.h"
+
+regstat_t*
+regstat(const regex_t* p)
+{
+ register Rex_t* e;
+
+ p->env->stats.re_flags = p->env->flags;
+ p->env->stats.re_info = 0;
+ e = p->env->rex;
+ if (e && e->type == REX_BM)
+ {
+ p->env->stats.re_record = p->env->rex->re.bm.size;
+ e = e->next;
+ }
+ else
+ p->env->stats.re_record = 0;
+ if (e && e->type == REX_BEG)
+ e = e->next;
+ if (e && e->type == REX_STRING)
+ e = e->next;
+ if (!e || e->type == REX_END && !e->next)
+ p->env->stats.re_info |= REG_LITERAL;
+ p->env->stats.re_record = (p && p->env && p->env->rex->type == REX_BM) ? p->env->rex->re.bm.size : -1;
+ return &p->env->stats;
+}
diff --git a/src/lib/libast/regex/regsub.c b/src/lib/libast/regex/regsub.c
new file mode 100644
index 0000000..8e89bea
--- /dev/null
+++ b/src/lib/libast/regex/regsub.c
@@ -0,0 +1,269 @@
+/***********************************************************************
+* *
+* This software is part of the ast package *
+* Copyright (c) 1985-2011 AT&T Intellectual Property *
+* and is licensed under the *
+* Eclipse Public License, Version 1.0 *
+* by AT&T Intellectual Property *
+* *
+* A copy of the License is available at *
+* http://www.eclipse.org/org/documents/epl-v10.html *
+* (with md5 checksum b35adb5213ca9657e911e9befb180842) *
+* *
+* Information and Software Systems Research *
+* AT&T Research *
+* Florham Park NJ *
+* *
+* Glenn Fowler <gsf@research.att.com> *
+* David Korn <dgk@research.att.com> *
+* Phong Vo <kpv@research.att.com> *
+* *
+***********************************************************************/
+#pragma prototyped
+
+/*
+ * OBSOLETE Sfio_t buffer interface -- use regsubcomp(),regsubexec()
+ */
+
+#include "reglib.h"
+
+/*
+ * do a single substitution
+ */
+
+static int
+subold(register Sfio_t* dp, const char* op, register const char* sp, size_t nmatch, register regmatch_t* match, register regflags_t flags, int sre)
+{
+ register int c;
+ char* s;
+ char* e;
+ const char* b;
+ regflags_t f;
+
+ f = flags &= (REG_SUB_LOWER|REG_SUB_UPPER);
+ for (;;)
+ {
+ switch (c = *sp++)
+ {
+ case 0:
+ return 0;
+ case '~':
+ if (!sre || *sp != '(')
+ {
+ sfputc(dp, c);
+ continue;
+ }
+ b = sp - 1;
+ sp++;
+ break;
+ case '\\':
+ if (sre)
+ {
+ sfputc(dp, chresc(sp - 1, &s));
+ sp = (const char*)s;
+ continue;
+ }
+ if (*sp == '&')
+ {
+ c = *sp++;
+ sfputc(dp, c);
+ continue;
+ }
+ break;
+ case '&':
+ if (sre)
+ {
+ sfputc(dp, c);
+ continue;
+ }
+ sp--;
+ break;
+ default:
+ switch (flags)
+ {
+ case REG_SUB_UPPER:
+ if (islower(c))
+ c = toupper(c);
+ break;
+ case REG_SUB_LOWER:
+ if (isupper(c))
+ c = tolower(c);
+ break;
+ case REG_SUB_UPPER|REG_SUB_LOWER:
+ if (isupper(c))
+ c = tolower(c);
+ else if (islower(c))
+ c = toupper(c);
+ break;
+ }
+ sfputc(dp, c);
+ continue;
+ }
+ switch (c = *sp++)
+ {
+ case 0:
+ sp--;
+ continue;
+ case '&':
+ c = 0;
+ break;
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ c -= '0';
+ if (sre)
+ while (isdigit(*sp))
+ c = c * 10 + *sp++ - '0';
+ break;
+ case 'l':
+ if (sre && *sp != ')')
+ {
+ c = -1;
+ break;
+ }
+ if (c = *sp)
+ {
+ sp++;
+ if (isupper(c))
+ c = tolower(c);
+ sfputc(dp, c);
+ }
+ continue;
+ case 'u':
+ if (sre)
+ {
+ if (*sp != ')')
+ {
+ c = -1;
+ break;
+ }
+ sp++;
+ }
+ if (c = *sp)
+ {
+ sp++;
+ if (islower(c))
+ c = toupper(c);
+ sfputc(dp, c);
+ }
+ continue;
+ case 'E':
+ if (sre)
+ {
+ if (*sp != ')')
+ {
+ c = -1;
+ break;
+ }
+ sp++;
+ }
+ flags = f;
+ continue;
+ case 'L':
+ if (sre)
+ {
+ if (*sp != ')')
+ {
+ c = -1;
+ break;
+ }
+ sp++;
+ }
+ f = flags;
+ flags = REG_SUB_LOWER;
+ continue;
+ case 'U':
+ if (sre)
+ {
+ if (*sp != ')')
+ {
+ c = -1;
+ break;
+ }
+ sp++;
+ }
+ f = flags;
+ flags = REG_SUB_UPPER;
+ continue;
+ default:
+ if (!sre)
+ {
+ sfputc(dp, chresc(sp - 2, &s));
+ sp = (const char*)s;
+ continue;
+ }
+ sp--;
+ c = -1;
+ break;
+ }
+ if (sre)
+ {
+ if (c < 0 || *sp != ')')
+ {
+ for (; b < sp; b++)
+ sfputc(dp, *b);
+ continue;
+ }
+ sp++;
+ }
+ if (c >= nmatch)
+ return REG_ESUBREG;
+ s = (char*)op + match[c].rm_so;
+ e = (char*)op + match[c].rm_eo;
+ while (s < e)
+ {
+ c = *s++;
+ switch (flags)
+ {
+ case REG_SUB_UPPER:
+ if (islower(c))
+ c = toupper(c);
+ break;
+ case REG_SUB_LOWER:
+ if (isupper(c))
+ c = tolower(c);
+ break;
+ case REG_SUB_UPPER|REG_SUB_LOWER:
+ if (isupper(c))
+ c = tolower(c);
+ else if (islower(c))
+ c = toupper(c);
+ break;
+ }
+ sfputc(dp, c);
+ }
+ }
+}
+
+/*
+ * ed(1) style substitute using matches from last regexec()
+ */
+
+int
+regsub(const regex_t* p, Sfio_t* dp, const char* op, const char* sp, size_t nmatch, regmatch_t* match, regflags_t flags)
+{
+ int m;
+ int r;
+ int sre;
+
+ if ((p->env->flags & REG_NOSUB) || !nmatch)
+ return fatal(p->env->disc, REG_BADPAT, NiL);
+ m = (flags >> 16) & 0x3fff;
+ sre = !!(p->env->flags & REG_SHELL);
+ r = 0;
+ do
+ {
+ if (--m > 0)
+ sfwrite(dp, op, match->rm_eo);
+ else
+ {
+ sfwrite(dp, op, match->rm_so);
+ if (r = subold(dp, op, sp, nmatch, match, flags, sre))
+ return fatal(p->env->disc, r, NiL);
+ }
+ op += match->rm_eo;
+ } while ((m > 0 || (flags & REG_SUB_ALL)) && !(r = regexec(p, op, nmatch, match, p->env->flags|(match->rm_so == match->rm_eo ? REG_ADVANCE : 0))));
+ if (r && r != REG_NOMATCH)
+ return fatal(p->env->disc, r, NiL);
+ sfputr(dp, op, -1);
+ return 0;
+}
diff --git a/src/lib/libast/regex/regsubcomp.c b/src/lib/libast/regex/regsubcomp.c
new file mode 100644
index 0000000..73bde4c
--- /dev/null
+++ b/src/lib/libast/regex/regsubcomp.c
@@ -0,0 +1,377 @@
+/***********************************************************************
+* *
+* This software is part of the ast package *
+* Copyright (c) 1985-2011 AT&T Intellectual Property *
+* and is licensed under the *
+* Eclipse Public License, Version 1.0 *
+* by AT&T Intellectual Property *
+* *
+* A copy of the License is available at *
+* http://www.eclipse.org/org/documents/epl-v10.html *
+* (with md5 checksum b35adb5213ca9657e911e9befb180842) *
+* *
+* Information and Software Systems Research *
+* AT&T Research *
+* Florham Park NJ *
+* *
+* Glenn Fowler <gsf@research.att.com> *
+* David Korn <dgk@research.att.com> *
+* Phong Vo <kpv@research.att.com> *
+* *
+***********************************************************************/
+#pragma prototyped
+
+/*
+ * posix regex ed(1) style substitute compile
+ */
+
+#include "reglib.h"
+
+static const regflags_t submap[] =
+{
+ 'g', REG_SUB_ALL,
+ 'l', REG_SUB_LOWER,
+ 'n', REG_SUB_NUMBER,
+ 'p', REG_SUB_PRINT,
+ 's', REG_SUB_STOP,
+ 'u', REG_SUB_UPPER,
+ 'w', REG_SUB_WRITE|REG_SUB_LAST,
+ 0, 0
+};
+
+int
+regsubflags(regex_t* p, register const char* s, char** e, int delim, register const regflags_t* map, int* pm, regflags_t* pf)
+{
+ register int c;
+ register const regflags_t* m;
+ regflags_t flags;
+ int minmatch;
+ regdisc_t* disc;
+
+ flags = pf ? *pf : 0;
+ minmatch = pm ? *pm : 0;
+ if (!map)
+ map = submap;
+ while (!(flags & REG_SUB_LAST))
+ {
+ if (!(c = *s++) || c == delim)
+ {
+ s--;
+ break;
+ }
+ else if (c >= '0' && c <= '9')
+ {
+ if (minmatch)
+ {
+ disc = p->env->disc;
+ regfree(p);
+ return fatal(disc, REG_EFLAGS, s - 1);
+ }
+ minmatch = c - '0';
+ while (*s >= '0' && *s <= '9')
+ minmatch = minmatch * 10 + *s++ - '0';
+ }
+ else
+ {
+ for (m = map; *m; m++)
+ if (*m++ == c)
+ {
+ if (flags & *m)
+ {
+ disc = p->env->disc;
+ regfree(p);
+ return fatal(disc, REG_EFLAGS, s - 1);
+ }
+ flags |= *m--;
+ break;
+ }
+ if (!*m)
+ {
+ s--;
+ break;
+ }
+ }
+ }
+ if (pf)
+ *pf = flags;
+ if (pm)
+ *pm = minmatch;
+ if (e)
+ *e = (char*)s;
+ return 0;
+}
+
+/*
+ * compile substitute rhs and optional flags
+ */
+
+int
+regsubcomp(regex_t* p, register const char* s, const regflags_t* map, int minmatch, regflags_t flags)
+{
+ register regsub_t* sub;
+ register int c;
+ register int d;
+ register char* t;
+ register regsubop_t* op;
+ char* e;
+ const char* r;
+ int sre;
+ int f;
+ int g;
+ int n;
+ int nops;
+ const char* o;
+ regdisc_t* disc;
+
+ disc = p->env->disc;
+ if (p->env->flags & REG_NOSUB)
+ {
+ regfree(p);
+ return fatal(disc, REG_BADPAT, NiL);
+ }
+ if (!(sub = (regsub_t*)alloc(p->env->disc, 0, sizeof(regsub_t) + strlen(s))) || !(sub->re_ops = (regsubop_t*)alloc(p->env->disc, 0, (nops = 8) * sizeof(regsubop_t))))
+ {
+ if (sub)
+ alloc(p->env->disc, sub, 0);
+ regfree(p);
+ return fatal(disc, REG_ESPACE, s);
+ }
+ sub->re_buf = sub->re_end = 0;
+ p->re_sub = sub;
+ p->env->sub = 1;
+ op = sub->re_ops;
+ o = s;
+ if (!(p->env->flags & REG_DELIMITED))
+ d = 0;
+ else
+ switch (d = *(s - 1))
+ {
+ case '\\':
+ case '\n':
+ case '\r':
+ regfree(p);
+ return fatal(disc, REG_EDELIM, s);
+ }
+ sre = p->env->flags & REG_SHELL;
+ t = sub->re_rhs;
+ if (d)
+ {
+ r = s;
+ for (;;)
+ {
+ if (!*s)
+ {
+ if (p->env->flags & REG_MUSTDELIM)
+ {
+ regfree(p);
+ return fatal(disc, REG_EDELIM, r);
+ }
+ break;
+ }
+ else if (*s == d)
+ {
+ flags |= REG_SUB_FULL;
+ s++;
+ break;
+ }
+ else if (*s++ == '\\' && !*s++)
+ {
+ regfree(p);
+ return fatal(disc, REG_EESCAPE, r);
+ }
+ }
+ if (*s)
+ {
+ if (n = regsubflags(p, s, &e, d, map, &minmatch, &flags))
+ return n;
+ s = (const char*)e;
+ }
+ p->re_npat = s - o;
+ s = r;
+ }
+ else
+ p->re_npat = 0;
+ op->op = f = g = flags & (REG_SUB_LOWER|REG_SUB_UPPER);
+ op->off = 0;
+ while ((c = *s++) != d)
+ {
+ again:
+ if (!c)
+ {
+ p->re_npat = s - o - 1;
+ break;
+ }
+ else if (c == '\\')
+ {
+ if (*s == c)
+ {
+ *t++ = *s++;
+ continue;
+ }
+ if ((c = *s++) == d)
+ goto again;
+ if (!c)
+ {
+ regfree(p);
+ return fatal(disc, REG_EESCAPE, s - 2);
+ }
+ if (c == '&')
+ {
+ *t++ = c;
+ continue;
+ }
+ }
+ else if (c == '&')
+ {
+ if (sre)
+ {
+ *t++ = c;
+ continue;
+ }
+ }
+ else
+ {
+ switch (op->op)
+ {
+ case REG_SUB_UPPER:
+ if (islower(c))
+ c = toupper(c);
+ break;
+ case REG_SUB_LOWER:
+ if (isupper(c))
+ c = tolower(c);
+ break;
+ case REG_SUB_UPPER|REG_SUB_LOWER:
+ if (isupper(c))
+ c = tolower(c);
+ else if (islower(c))
+ c = toupper(c);
+ break;
+ }
+ *t++ = c;
+ continue;
+ }
+ switch (c)
+ {
+ case 0:
+ s--;
+ continue;
+ case '&':
+ c = 0;
+ break;
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ c -= '0';
+ if (isdigit(*s) && (p->env->flags & REG_MULTIREF))
+ c = c * 10 + *s++ - '0';
+ break;
+ case 'l':
+ if (c = *s)
+ {
+ s++;
+ if (isupper(c))
+ c = tolower(c);
+ *t++ = c;
+ }
+ continue;
+ case 'u':
+ if (c = *s)
+ {
+ s++;
+ if (islower(c))
+ c = toupper(c);
+ *t++ = c;
+ }
+ continue;
+ case 'E':
+ f = g;
+ set:
+ if ((op->len = (t - sub->re_rhs) - op->off) && (n = ++op - sub->re_ops) >= nops)
+ {
+ if (!(sub->re_ops = (regsubop_t*)alloc(p->env->disc, sub->re_ops, (nops *= 2) * sizeof(regsubop_t))))
+ {
+ regfree(p);
+ return fatal(disc, REG_ESPACE, NiL);
+ }
+ op = sub->re_ops + n;
+ }
+ op->op = f;
+ op->off = t - sub->re_rhs;
+ continue;
+ case 'L':
+ g = f;
+ f = REG_SUB_LOWER;
+ goto set;
+ case 'U':
+ g = f;
+ f = REG_SUB_UPPER;
+ goto set;
+ default:
+ if (!sre)
+ {
+ *t++ = chresc(s - 2, &e);
+ s = (const char*)e;
+ continue;
+ }
+ s--;
+ c = -1;
+ break;
+ }
+ if (c > p->re_nsub)
+ {
+ regfree(p);
+ return fatal(disc, REG_ESUBREG, s - 1);
+ }
+ if ((n = op - sub->re_ops) >= (nops - 2))
+ {
+ if (!(sub->re_ops = (regsubop_t*)alloc(p->env->disc, sub->re_ops, (nops *= 2) * sizeof(regsubop_t))))
+ {
+ regfree(p);
+ return fatal(disc, REG_ESPACE, NiL);
+ }
+ op = sub->re_ops + n;
+ }
+ if (op->len = (t - sub->re_rhs) - op->off)
+ op++;
+ op->op = f;
+ op->off = c;
+ op->len = 0;
+ op++;
+ op->op = f;
+ op->off = t - sub->re_rhs;
+ }
+ if ((op->len = (t - sub->re_rhs) - op->off) && (n = ++op - sub->re_ops) >= nops)
+ {
+ if (!(sub->re_ops = (regsubop_t*)alloc(p->env->disc, sub->re_ops, (nops *= 2) * sizeof(regsubop_t))))
+ {
+ regfree(p);
+ return fatal(disc, REG_ESPACE, NiL);
+ }
+ op = sub->re_ops + n;
+ }
+ op->len = -1;
+ sub->re_flags = flags;
+ sub->re_min = minmatch;
+ return 0;
+}
+
+void
+regsubfree(regex_t* p)
+{
+ Env_t* env;
+ regsub_t* sub;
+
+ if (p && (env = p->env) && env->sub && (sub = p->re_sub))
+ {
+ env->sub = 0;
+ p->re_sub = 0;
+ if (!(env->disc->re_flags & REG_NOFREE))
+ {
+ if (sub->re_buf)
+ alloc(env->disc, sub->re_buf, 0);
+ if (sub->re_ops)
+ alloc(env->disc, sub->re_ops, 0);
+ alloc(env->disc, sub, 0);
+ }
+ }
+}
diff --git a/src/lib/libast/regex/regsubexec.c b/src/lib/libast/regex/regsubexec.c
new file mode 100644
index 0000000..78caeba
--- /dev/null
+++ b/src/lib/libast/regex/regsubexec.c
@@ -0,0 +1,196 @@
+/***********************************************************************
+* *
+* This software is part of the ast package *
+* Copyright (c) 1985-2011 AT&T Intellectual Property *
+* and is licensed under the *
+* Eclipse Public License, Version 1.0 *
+* by AT&T Intellectual Property *
+* *
+* A copy of the License is available at *
+* http://www.eclipse.org/org/documents/epl-v10.html *
+* (with md5 checksum b35adb5213ca9657e911e9befb180842) *
+* *
+* Information and Software Systems Research *
+* AT&T Research *
+* Florham Park NJ *
+* *
+* Glenn Fowler <gsf@research.att.com> *
+* David Korn <dgk@research.att.com> *
+* Phong Vo <kpv@research.att.com> *
+* *
+***********************************************************************/
+#pragma prototyped
+
+/*
+ * posix regex ed(1) style substitute execute
+ */
+
+#include "reglib.h"
+
+#define NEED(p,b,n,r) \
+ do \
+ { \
+ if (((b)->re_end - (b)->re_cur) < (n)) \
+ { \
+ size_t o = (b)->re_cur - (b)->re_buf; \
+ size_t a = ((b)->re_end - (b)->re_buf); \
+ if (a < n) \
+ a = roundof(n, 128); \
+ a *= 2; \
+ if (!((b)->re_buf = alloc(p->env->disc, (b)->re_buf, a))) \
+ { \
+ (b)->re_buf = (b)->re_cur = (b)->re_end = 0; \
+ c = REG_ESPACE; \
+ r; \
+ } \
+ (b)->re_cur = (b)->re_buf + o; \
+ (b)->re_end = (b)->re_buf + a; \
+ } \
+ } while (0)
+
+#define PUTC(p,b,x,r) \
+ do \
+ { \
+ NEED(p, b, 1, r); \
+ *(b)->re_cur++ = (x); \
+ } while (0)
+
+#define PUTS(p,b,x,z,r) \
+ do if (z) \
+ { \
+ NEED(p, b, z, r); \
+ memcpy((b)->re_cur, x, z); \
+ (b)->re_cur += (z); \
+ } while (0)
+
+/*
+ * do a single substitution
+ */
+
+static int
+sub(const regex_t* p, register regsub_t* b, const char* ss, register regsubop_t* op, size_t nmatch, register regmatch_t* match)
+{
+ register char* s;
+ register char* e;
+ register int c;
+
+ for (;; op++)
+ {
+ switch (op->len)
+ {
+ case -1:
+ break;
+ case 0:
+ if (op->off >= nmatch)
+ return REG_ESUBREG;
+ if ((c = match[op->off].rm_so) < 0)
+ continue;
+ s = (char*)ss + c;
+ if ((c = match[op->off].rm_eo) < 0)
+ continue;
+ e = (char*)ss + c;
+ NEED(p, b, e - s, return c);
+ switch (op->op)
+ {
+ case REG_SUB_UPPER:
+ while (s < e)
+ {
+ c = *s++;
+ if (islower(c))
+ c = toupper(c);
+ *b->re_cur++ = c;
+ }
+ break;
+ case REG_SUB_LOWER:
+ while (s < e)
+ {
+ c = *s++;
+ if (isupper(c))
+ c = tolower(c);
+ *b->re_cur++ = c;
+ }
+ break;
+ case REG_SUB_UPPER|REG_SUB_LOWER:
+ while (s < e)
+ {
+ c = *s++;
+ if (isupper(c))
+ c = tolower(c);
+ else if (islower(c))
+ c = toupper(c);
+ *b->re_cur++ = c;
+ }
+ break;
+ default:
+ while (s < e)
+ *b->re_cur++ = *s++;
+ break;
+ }
+ continue;
+ default:
+ NEED(p, b, op->len, return c);
+ s = b->re_rhs + op->off;
+ e = s + op->len;
+ while (s < e)
+ *b->re_cur++ = *s++;
+ continue;
+ }
+ break;
+ }
+ return 0;
+}
+
+/*
+ * ed(1) style substitute using matches from last regexec()
+ */
+
+int
+regsubexec(const regex_t* p, const char* s, size_t nmatch, regmatch_t* match)
+{
+ register int c;
+ register regsub_t* b;
+ const char* e;
+ int m;
+
+ if (!p->env->sub || (p->env->flags & REG_NOSUB) || !nmatch)
+ return fatal(p->env->disc, REG_BADPAT, NiL);
+ b = p->re_sub;
+ m = b->re_min;
+ b->re_cur = b->re_buf;
+ e = (const char*)p->env->end;
+ c = 0;
+ for (;;)
+ {
+ if (--m > 0)
+ PUTS(p, b, s, match->rm_eo, return fatal(p->env->disc, c, NiL));
+ else
+ {
+ PUTS(p, b, s, match->rm_so, return fatal(p->env->disc, c, NiL));
+ if (!c && (c = sub(p, b, s, b->re_ops, nmatch, match)))
+ return fatal(p->env->disc, c, NiL);
+ }
+ s += match->rm_eo;
+ if (m <= 0 && !(b->re_flags & REG_SUB_ALL) || !*s)
+ break;
+ if (c = regnexec(p, s, e - s, nmatch, match, p->env->flags|(match->rm_so == match->rm_eo ? REG_ADVANCE : 0)))
+ {
+ if (c != REG_NOMATCH)
+ return fatal(p->env->disc, c, NiL);
+ break;
+ }
+ if (!match->rm_so && !match->rm_eo && *s && m <= 1)
+ {
+ match->rm_so = match->rm_eo = 1;
+ c = 1;
+ }
+ }
+ while (s < e)
+ {
+ c = *s++;
+ PUTC(p, b, c, return fatal(p->env->disc, c, NiL));
+ }
+ NEED(p, b, 1, return fatal(p->env->disc, c, NiL));
+ *b->re_cur = 0;
+ b->re_len = b->re_cur - b->re_buf;
+ return 0;
+}