summaryrefslogtreecommitdiff
path: root/usr/src/lib/libast/common/regex/regdecomp.c
diff options
context:
space:
mode:
authorchin <none@none>2007-08-17 12:01:52 -0700
committerchin <none@none>2007-08-17 12:01:52 -0700
commitda2e3ebdc1edfbc5028edf1354e7dd2fa69a7968 (patch)
tree5280d3b78e289fe9551371ab6e7f15ef9944ea14 /usr/src/lib/libast/common/regex/regdecomp.c
parent073dbf9103ef2a2b05d8a16e2d26db04e0374b0e (diff)
downloadillumos-joyent-da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968.tar.gz
6437624 RFE: Add ksh93 (as /usr/bin/ksh93) and libshell.so to OS/Net
6505835 AST tools and library (libpp) required for creating l10n messages for ksh93 PSARC/2006/550 Korn Shell 93 Integration PSARC/2006/587 /etc/ksh.kshrc for ksh93 PSARC/2007/035 ksh93 Amendments Contributed by Roland Mainz <roland.mainz@nrubsig.org> --HG-- rename : usr/src/lib/libcmd/common/mapfile-vers => deleted_files/usr/src/lib/libcmd/common/mapfile-vers rename : usr/src/lib/libcmd/common/placeholder.c => deleted_files/usr/src/lib/libcmd/common/placeholder.c
Diffstat (limited to 'usr/src/lib/libast/common/regex/regdecomp.c')
-rw-r--r--usr/src/lib/libast/common/regex/regdecomp.c448
1 files changed, 448 insertions, 0 deletions
diff --git a/usr/src/lib/libast/common/regex/regdecomp.c b/usr/src/lib/libast/common/regex/regdecomp.c
new file mode 100644
index 0000000000..8f5a2af04e
--- /dev/null
+++ b/usr/src/lib/libast/common/regex/regdecomp.c
@@ -0,0 +1,448 @@
+/***********************************************************************
+* *
+* This software is part of the ast package *
+* Copyright (c) 1985-2007 AT&T Knowledge Ventures *
+* and is licensed under the *
+* Common Public License, Version 1.0 *
+* by AT&T Knowledge Ventures *
+* *
+* A copy of the License is available at *
+* http://www.opensource.org/licenses/cpl1.0.txt *
+* (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) *
+* *
+* Information and Software Systems Research *
+* AT&T Research *
+* Florham Park NJ *
+* *
+* Glenn Fowler <gsf@research.att.com> *
+* David Korn <dgk@research.att.com> *
+* Phong Vo <kpv@research.att.com> *
+* *
+***********************************************************************/
+#pragma prototyped
+
+/*
+ * posix regex decompiler
+ */
+
+#include "reglib.h"
+
+#undef ismeta
+#define ismeta(c,t,e,d) (state.magic[c] && state.magic[c][(t)+(e)] >= T_META || (c) == (d))
+#define meta(f,c,t,e,d) do { if (ismeta(c,t,e,d)) sfputc(f, '\\'); sfputc(f, c); } while (0)
+
+static void
+detrie(Trie_node_t* x, Sfio_t* sp, char* b, char* p, char* e, int delimiter)
+{
+ register Trie_node_t* y;
+ char* o;
+ int k;
+
+ o = p;
+ k = 1;
+ do
+ {
+ if (k)
+ {
+ o = p;
+ if (p < e)
+ *p++ = x->c;
+ }
+ sfputc(sp, x->c);
+ for (y = x->sib; y; y = y->sib)
+ {
+ sfputc(sp, '|');
+ sfputc(sp, '<');
+ sfwrite(sp, b, p - b);
+ sfputc(sp, '>');
+ detrie(y, sp, b, p, e, delimiter);
+ }
+ if (x->end && x->son)
+ {
+ sfputc(sp, '|');
+ sfputc(sp, '{');
+ sfwrite(sp, b, p - b);
+ sfputc(sp, '}');
+ p = o;
+ }
+ } while (x = x->son);
+}
+
+static int
+decomp(register Rex_t* e, Sfio_t* sp, int type, int delimiter, regflags_t flags)
+{
+ Rex_t* q;
+ unsigned char* s;
+ unsigned char* t;
+ int c;
+ int d;
+ int cb;
+ int cd;
+ int cr;
+ int ib;
+ int ie;
+ int nb;
+ int ne;
+ unsigned char ic[2*UCHAR_MAX];
+ unsigned char nc[2*UCHAR_MAX];
+
+ do
+ {
+ switch (e->type)
+ {
+ case REX_ALT:
+ if (decomp(e->re.group.expr.binary.left, sp, type, delimiter, flags))
+ return 1;
+ sfputc(sp, '|');
+ if (e->re.group.expr.binary.right && decomp(e->re.group.expr.binary.right, sp, type, delimiter, flags))
+ return 1;
+ break;
+ case REX_BACK:
+ sfprintf(sp, "\\%d", e->lo);
+ break;
+ case REX_BEG:
+ if (type < SRE)
+ sfputc(sp, '^');
+ break;
+ case REX_END:
+ if (type < SRE)
+ sfputc(sp, '$');
+ break;
+ case REX_WBEG:
+ meta(sp, '<', type, 1, delimiter);
+ break;
+ case REX_WEND:
+ meta(sp, '<', type, 1, delimiter);
+ break;
+ case REX_WORD:
+ sfprintf(sp, "\\w");
+ break;
+ case REX_CLASS:
+ case REX_COLL_CLASS:
+ case REX_ONECHAR:
+ case REX_DOT:
+ case REX_REP:
+ if (type >= SRE)
+ {
+ c = ')';
+ if (e->hi == RE_DUP_INF)
+ {
+ if (!e->lo)
+ sfputc(sp, '*');
+ else if (e->lo == 1)
+ sfputc(sp, '+');
+ else
+ sfprintf(sp, "{%d,}", e->lo);
+ }
+ else if (e->hi != 1)
+ sfprintf(sp, "{%d,%d}", e->lo, e->hi);
+ else if (e->lo == 0)
+ sfputc(sp, '?');
+ else
+ c = 0;
+ }
+ switch (e->type)
+ {
+ case REX_REP:
+ if (decomp(e->re.group.expr.rex, sp, type, delimiter, flags))
+ return 1;
+ break;
+ case REX_CLASS:
+ sfputc(sp, '[');
+ nb = ne = ib = ie = -2;
+ cb = cd = cr = 0;
+ s = nc;
+ t = ic;
+ for (c = 0; c <= UCHAR_MAX; c++)
+ if (settst(e->re.charclass, c))
+ {
+ if (c == ']')
+ cb = 1;
+ else if (c == '-')
+ cr = 1;
+ else if (c == delimiter)
+ cd = 1;
+ else if (nb < 0)
+ ne = nb = c;
+ else if (ne == (c - 1))
+ ne = c;
+ else
+ {
+ if (ne == nb)
+ *s++ = ne;
+ else
+ {
+ *s++ = nb;
+ *s++ = '-';
+ *s++ = ne;
+ }
+ ne = nb = c;
+ }
+ }
+ else
+ {
+ if (c == ']')
+ cb = -1;
+ else if (c == '-')
+ cr = -1;
+ else if (c == delimiter)
+ cd = -1;
+ else if (ib < 0)
+ ie = ib = c;
+ else if (ie == (c - 1))
+ ie = c;
+ else
+ {
+ if (ie == ib)
+ *t++ = ie;
+ else
+ {
+ *t++ = ib;
+ *t++ = '-';
+ *t++ = ie;
+ }
+ ie = ib = c;
+ }
+ }
+ if (nb >= 0)
+ {
+ *s++ = nb;
+ if (ne != nb)
+ {
+ *s++ = '-';
+ *s++ = ne;
+ }
+ }
+ if (ib >= 0)
+ {
+ *t++ = ib;
+ if (ie != ib)
+ {
+ *t++ = '-';
+ *t++ = ie;
+ }
+ }
+ if ((t - ic + 1) < (s - nc + (nc[0] == '^')))
+ {
+ sfputc(sp, '^');
+ if (cb < 0)
+ sfputc(sp, ']');
+ if (cr < 0)
+ sfputc(sp, '-');
+ if (cd < 0)
+ {
+ if (flags & REG_ESCAPE)
+ sfputc(sp, '\\');
+ sfputc(sp, delimiter);
+ }
+ sfwrite(sp, ic, t - ic);
+ }
+ else
+ {
+ if (cb > 0)
+ sfputc(sp, ']');
+ if (cr > 0)
+ sfputc(sp, '-');
+ if (cd > 0)
+ {
+ if (flags & REG_ESCAPE)
+ sfputc(sp, '\\');
+ sfputc(sp, delimiter);
+ }
+ if (nc[0] == '^')
+ {
+ sfwrite(sp, nc + 1, s - nc - 1);
+ sfputc(sp, '^');
+ }
+ else
+ sfwrite(sp, nc, s - nc);
+ }
+ sfputc(sp, ']');
+ break;
+ case REX_COLL_CLASS:
+ break;
+ case REX_ONECHAR:
+ meta(sp, e->re.onechar, type, 0, delimiter);
+ break;
+ case REX_DOT:
+ sfputc(sp, '.');
+ break;
+ }
+ if (type < SRE)
+ {
+ if (e->hi == RE_DUP_INF)
+ {
+ if (!e->lo)
+ sfputc(sp, '*');
+ else if (e->lo == 1 && ismeta('+', type, 0, delimiter))
+ meta(sp, '+', type, 1, delimiter);
+ else
+ {
+ meta(sp, '{', type, 1, delimiter);
+ sfprintf(sp, "%d,", e->lo);
+ meta(sp, '}', type, 1, delimiter);
+ }
+ }
+ else if (e->hi != 1 || e->lo == 0 && !ismeta('?', type, 0, delimiter))
+ {
+ meta(sp, '{', type, 1, delimiter);
+ sfprintf(sp, "%d,%d", e->lo, e->hi);
+ meta(sp, '}', type, 1, delimiter);
+ }
+ else if (e->lo == 0)
+ meta(sp, '?', type, 1, delimiter);
+ }
+ else if (c)
+ sfputc(sp, c);
+ break;
+ case REX_STRING:
+ case REX_KMP:
+ t = (s = e->re.string.base) + e->re.string.size;
+ while (s < t)
+ {
+ c = *s++;
+ meta(sp, c, type, 0, delimiter);
+ }
+ break;
+ case REX_TRIE:
+ ib = 0;
+ for (c = 0; c <= UCHAR_MAX; c++)
+ if (e->re.trie.root[c])
+ {
+ char pfx[1024];
+
+ if (ib)
+ sfputc(sp, '|');
+ else
+ ib = 1;
+ detrie(e->re.trie.root[c], sp, pfx, pfx, &pfx[sizeof(pfx)], delimiter);
+ }
+ break;
+ case REX_NEG:
+ if (type >= SRE)
+ sfprintf(sp, "!(");
+ if (decomp(e->re.group.expr.rex, sp, type, delimiter, flags))
+ return 1;
+ if (type >= SRE)
+ sfputc(sp, ')');
+ else
+ sfputc(sp, '!');
+ break;
+ case REX_CONJ:
+ if (decomp(e->re.group.expr.binary.left, sp, type, delimiter, flags))
+ return 1;
+ sfputc(sp, '&');
+ if (decomp(e->re.group.expr.binary.right, sp, type, delimiter, flags))
+ return 1;
+ break;
+ case REX_GROUP:
+ if (type >= SRE)
+ sfputc(sp, '@');
+ meta(sp, '(', type, 1, delimiter);
+ if (decomp(e->re.group.expr.rex, sp, type, delimiter, flags))
+ return 1;
+ meta(sp, ')', type, 1, delimiter);
+ break;
+ case REX_GROUP_AHEAD:
+ case REX_GROUP_AHEAD_NOT:
+ case REX_GROUP_BEHIND:
+ case REX_GROUP_BEHIND_NOT:
+ meta(sp, '(', type, 1, delimiter);
+ sfputc(sp, '?');
+ if (decomp(e->re.group.expr.rex, sp, type, delimiter, flags))
+ return 1;
+ meta(sp, ')', type, 1, delimiter);
+ break;
+ case REX_GROUP_COND:
+ meta(sp, '(', type, 1, delimiter);
+ sfputc(sp, '?');
+ if (e->re.group.expr.binary.left && decomp(e->re.group.expr.binary.left, sp, type, delimiter, flags))
+ return 1;
+ if (q = e->re.group.expr.binary.right)
+ {
+ sfputc(sp, ':');
+ if (q->re.group.expr.binary.left && decomp(q->re.group.expr.binary.left, sp, type, delimiter, flags))
+ return 1;
+ sfputc(sp, ':');
+ if (q->re.group.expr.binary.right && decomp(q->re.group.expr.binary.right, sp, type, delimiter, flags))
+ return 1;
+ }
+ meta(sp, ')', type, 1, delimiter);
+ break;
+ case REX_GROUP_CUT:
+ meta(sp, '(', type, 1, delimiter);
+ sfputc(sp, '?');
+ if (decomp(e->re.group.expr.rex, sp, type, delimiter, flags))
+ return 1;
+ meta(sp, ')', type, 1, delimiter);
+ break;
+ case REX_BM:
+ break;
+ default:
+ sfprintf(sp, "<ERROR:REX_%d>", e->type);
+ break;
+ }
+ } while (e = e->next);
+ return 0;
+}
+
+/*
+ * reconstruct pattern from compiled re p into sp
+ */
+
+size_t
+regdecomp(regex_t* p, regflags_t flags, char* buf, size_t n)
+{
+ Sfio_t* sp;
+ char* s;
+ int type;
+ int delimiter;
+ size_t r;
+
+ if (!(sp = sfstropen()))
+ return 0;
+ if (flags < 0)
+ flags = p->env->flags;
+ switch (flags & (REG_AUGMENTED|REG_EXTENDED|REG_SHELL))
+ {
+ case 0:
+ type = BRE;
+ break;
+ case REG_AUGMENTED:
+ case REG_AUGMENTED|REG_EXTENDED:
+ type = ARE;
+ break;
+ case REG_EXTENDED:
+ type = ERE;
+ break;
+ case REG_SHELL:
+ type = SRE;
+ break;
+ default:
+ type = KRE;
+ break;
+ }
+ if (flags & REG_DELIMITED)
+ {
+ delimiter = '/';
+ sfputc(sp, delimiter);
+ }
+ else
+ delimiter = 0;
+ if (decomp(p->env->rex, sp, type, delimiter, flags))
+ r = 0;
+ else
+ {
+ if (delimiter)
+ sfputc(sp, delimiter);
+ if ((r = sfstrtell(sp) + 1) <= n)
+ {
+ if (!(s = sfstruse(sp)))
+ r = 0;
+ else
+ memcpy(buf, s, r);
+ }
+ }
+ sfstrclose(sp);
+ return r;
+}