3 files changed, 304 insertions, 251 deletions
diff --git a/usr/src/lib/libc/port/regex/engine.c b/usr/src/lib/libc/port/regex/engine.c
index c66c08e59e..2e04a9085b 100644
--- a/usr/src/lib/libc/port/regex/engine.c
+++ b/usr/src/lib/libc/port/regex/engine.c
@@ -34,6 +34,8 @@
  * SUCH DAMAGE.
  */
 
+#include <stdbool.h>
+
 /*
  * The matching engine and friends.  This file is #included by regexec.c
  * after suitable #defines of a variety of macros used herein, so that
@@ -43,8 +45,7 @@
 
 #ifdef SNAMES
 #define	matcher	smatcher
-#define	fast	sfast
-#define	slow	sslow
+#define	walk	swalk
 #define	dissect	sdissect
 #define	backref	sbackref
 #define	step	sstep
@@ -54,8 +55,7 @@
 #endif
 #ifdef LNAMES
 #define	matcher	lmatcher
-#define	fast	lfast
-#define	slow	lslow
+#define	walk	lwalk
 #define	dissect	ldissect
 #define	backref	lbackref
 #define	step	lstep
@@ -65,8 +65,7 @@
 #endif
 #ifdef MNAMES
 #define	matcher	mmatcher
-#define	fast	mfast
-#define	slow	mslow
+#define	walk	mwalk
 #define	dissect	mdissect
 #define	backref	mbackref
 #define	step	mstep
@@ -104,10 +103,8 @@ static const char *dissect(struct match *, const char *, const char *,
     sopno, sopno);
 static const char *backref(struct match *, const char *, const char *, sopno,
     sopno, sopno, int);
-static const char *fast(struct match *, const char *, const char *, sopno,
-    sopno);
-static const char *slow(struct match *, const char *, const char *, sopno,
-    sopno);
+static const char *walk(struct match *m, const char *start, const char *stop,
+    sopno startst, sopno stopst, bool fast);
 static states step(struct re_guts *, sopno, sopno, states, wint_t, states);
 #define	MAX_RECURSION	100
 #define	BOL	(OUT-1)
@@ -253,7 +250,7 @@ matcher(struct re_guts *g, const char *string, size_t nmatch,
 
 	/* this loop does only one repetition except for backrefs */
 	for (;;) {
-		endp = fast(m, start, stop, gf, gl);
+		endp = walk(m, start, stop, gf, gl, true);
 		if (endp == NULL) {		/* a miss */
 			if (m->pmatch != NULL)
 				free((char *)m->pmatch);
@@ -269,7 +266,7 @@ matcher(struct re_guts *g, const char *string, size_t nmatch,
 		assert(m->coldp != NULL);
 		for (;;) {
 			NOTE("finding start");
-			endp = slow(m, m->coldp, stop, gf, gl);
+			endp = walk(m, m->coldp, stop, gf, gl, false);
 			if (endp != NULL)
 				break;
 			assert(m->coldp < m->endp);
@@ -314,7 +311,7 @@ matcher(struct re_guts *g, const char *string, size_t nmatch,
 			if (dp != NULL || endp <= m->coldp)
 				break;		/* defeat */
 			NOTE("backoff");
-			endp = slow(m, m->coldp, endp-1, gf, gl);
+			endp = walk(m, m->coldp, endp-1, gf, gl, false);
 			if (endp == NULL)
 				break;		/* defeat */
 			/* try it on a shorter possibility */
@@ -395,7 +392,7 @@ dissect(struct match *m, const char *start, const char *stop, sopno startst,
 			es += OPND(m->g->strip[es]);
 			break;
 		case OCH_:
-			while (OP(m->g->strip[es]) != O_CH)
+			while (OP(m->g->strip[es]) != (sop)O_CH)
 				es += OPND(m->g->strip[es]);
 			break;
 		}
@@ -427,10 +424,10 @@ dissect(struct match *m, const char *start, const char *stop, sopno startst,
 			stp = stop;
 			for (;;) {
 				/* how long could this one be? */
-				rest = slow(m, sp, stp, ss, es);
+				rest = walk(m, sp, stp, ss, es, false);
 				assert(rest != NULL);	/* it did match */
 				/* could the rest match the rest? */
-				tail = slow(m, rest, stop, es, stopst);
+				tail = walk(m, rest, stop, es, stopst, false);
 				if (tail == stop)
 					break;		/* yes! */
 				/* no -- try a shorter match for this one */
@@ -440,12 +437,9 @@ dissect(struct match *m, const char *start, const char *stop, sopno startst,
 			ssub = ss + 1;
 			esub = es - 1;
 			/* did innards match? */
-			if (slow(m, sp, rest, ssub, esub) != NULL) {
+			if (walk(m, sp, rest, ssub, esub, false) != NULL) {
 				dp = dissect(m, sp, rest, ssub, esub);
 				assert(dp == rest);
-#if defined(__lint)
-				(void) dp;
-#endif
 			} else		/* no */
 				assert(sp == rest);
 			sp = rest;
@@ -454,10 +448,10 @@ dissect(struct match *m, const char *start, const char *stop, sopno startst,
 			stp = stop;
 			for (;;) {
 				/* how long could this one be? */
-				rest = slow(m, sp, stp, ss, es);
+				rest = walk(m, sp, stp, ss, es, false);
 				assert(rest != NULL);	/* it did match */
 				/* could the rest match the rest? */
-				tail = slow(m, rest, stop, es, stopst);
+				tail = walk(m, rest, stop, es, stopst, false);
 				if (tail == stop)
 					break;		/* yes! */
 				/* no -- try a shorter match for this one */
@@ -469,7 +463,7 @@ dissect(struct match *m, const char *start, const char *stop, sopno startst,
 			ssp = sp;
 			oldssp = ssp;
 			for (;;) {	/* find last match of innards */
-				sep = slow(m, ssp, rest, ssub, esub);
+				sep = walk(m, ssp, rest, ssub, esub, false);
 				if (sep == NULL || sep == ssp)
 					break;	/* failed or matched null */
 				oldssp = ssp;	/* on to next try */
@@ -481,7 +475,7 @@ dissect(struct match *m, const char *start, const char *stop, sopno startst,
 				ssp = oldssp;
 			}
 			assert(sep == rest);	/* must exhaust substring */
-			assert(slow(m, ssp, sep, ssub, esub) == rest);
+			assert(walk(m, ssp, sep, ssub, esub, false) == rest);
 			dp = dissect(m, ssp, sep, ssub, esub);
 			assert(dp == sep);
 			sp = rest;
@@ -490,10 +484,10 @@ dissect(struct match *m, const char *start, const char *stop, sopno startst,
 			stp = stop;
 			for (;;) {
 				/* how long could this one be? */
-				rest = slow(m, sp, stp, ss, es);
+				rest = walk(m, sp, stp, ss, es, false);
 				assert(rest != NULL);	/* it did match */
 				/* could the rest match the rest? */
-				tail = slow(m, rest, stop, es, stopst);
+				tail = walk(m, rest, stop, es, stopst, false);
 				if (tail == stop)
 					break;		/* yes! */
 				/* no -- try a shorter match for this one */
@@ -504,7 +498,8 @@ dissect(struct match *m, const char *start, const char *stop, sopno startst,
 			esub = ss + OPND(m->g->strip[ss]) - 1;
 			assert(OP(m->g->strip[esub]) == OOR1);
 			for (;;) {	/* find first matching branch */
-				if (slow(m, sp, rest, ssub, esub) == rest)
+				if (walk(m, sp, rest, ssub, esub,
+				    false) == rest)
 					break;	/* it matched all of it */
 				/* that one missed, try next one */
 				assert(OP(m->g->strip[esub]) == OOR1);
@@ -512,7 +507,7 @@ dissect(struct match *m, const char *start, const char *stop, sopno startst,
 				assert(OP(m->g->strip[esub]) == OOR2);
 				ssub = esub + 1;
 				esub += OPND(m->g->strip[esub]);
-				if (OP(m->g->strip[esub]) == OOR2)
+				if (OP(m->g->strip[esub]) == (sop)OOR2)
 					esub--;
 				else
 					assert(OP(m->g->strip[esub]) == O_CH);
@@ -637,7 +632,7 @@ backref(struct match *m, const char *start, const char *stop, sopno startst,
 			do {
 				assert(OP(s) == OOR2);
 				ss += OPND(s);
-			} while (OP(s = m->g->strip[ss]) != O_CH);
+			} while (OP(s = m->g->strip[ss]) != (sop)O_CH);
 			/* note that the ss++ gets us past the O_CH */
 			break;
 		default:	/* have to make a choice */
@@ -670,7 +665,7 @@ backref(struct match *m, const char *start, const char *stop, sopno startst,
 		ssp = m->offp + m->pmatch[i].rm_so;
 		if (memcmp(sp, ssp, len) != 0)
 			return (NULL);
-		while (m->g->strip[ss] != SOP(O_BACK, i))
+		while (m->g->strip[ss] != (sop)SOP(O_BACK, i))
 			ss++;
 		return (backref(m, sp+len, stop, ss+1, stopst, lev, rec));
 	case OQUEST_:		/* to null or not */
@@ -701,13 +696,13 @@ backref(struct match *m, const char *start, const char *stop, sopno startst,
 			if (dp != NULL)
 				return (dp);
 			/* that one missed, try next one */
-			if (OP(m->g->strip[esub]) == O_CH)
+			if (OP(m->g->strip[esub]) == (sop)O_CH)
 				return (NULL);	/* there is none */
 			esub++;
-			assert(OP(m->g->strip[esub]) == OOR2);
+			assert(OP(m->g->strip[esub]) == (sop)OOR2);
 			ssub = esub + 1;
 			esub += OPND(m->g->strip[esub]);
-			if (OP(m->g->strip[esub]) == OOR2)
+			if (OP(m->g->strip[esub]) == (sop)OOR2)
 				esub--;
 			else
 				assert(OP(m->g->strip[esub]) == O_CH);
@@ -745,115 +740,15 @@ backref(struct match *m, const char *start, const char *stop, sopno startst,
 }
 
 /*
- * fast - step through the string at top speed
+ * Step through the string either quickly or slowly.  Returns where it ended
+ * or NULL.
  */
 static const char *
-fast(struct match *m, const char *start, const char *stop, sopno startst,
-    sopno stopst)
+walk(struct match *m, const char *start, const char *stop, sopno startst,
+    sopno stopst, bool fast)
 {
 	states st = m->st;
 	states fresh = m->fresh;
-	states tmp = m->tmp;
-	const char *p = start;
-	wint_t c;
-	wint_t lastc;		/* previous c */
-	wint_t flagch;
-	int i;
-	const char *coldp;	/* last p after which no match was underway */
-	size_t clen;
-
-	CLEAR(st);
-	SET1(st, startst);
-	SP("fast", st, *p);
-	st = step(m->g, startst, stopst, st, NOTHING, st);
-	ASSIGN(fresh, st);
-	SP("start", st, *p);
-	coldp = NULL;
-	if (start == m->offp || (start == m->beginp && !(m->eflags&REG_NOTBOL)))
-		c = OUT;
-	else {
-		/*
-		 * XXX Wrong if the previous character was multi-byte.
-		 * Newline never is (in encodings supported by FreeBSD),
-		 * so this only breaks the ISWORD tests below.
-		 */
-		c = (uch)*(start - 1);
-	}
-	for (;;) {
-		/* next character */
-		lastc = c;
-		if (p == m->endp) {
-			clen = 0;
-			c = OUT;
-		} else
-			clen = XMBRTOWC(&c, p, m->endp - p, &m->mbs, BADCHAR);
-		if (EQ(st, fresh))
-			coldp = p;
-
-		/* is there an EOL and/or BOL between lastc and c? */
-		flagch = '\0';
-		i = 0;
-		if ((lastc == '\n' && m->g->cflags&REG_NEWLINE) ||
-		    (lastc == OUT && !(m->eflags&REG_NOTBOL))) {
-			flagch = BOL;
-			i = m->g->nbol;
-		}
-		if ((c == '\n' && m->g->cflags&REG_NEWLINE) ||
-		    (c == OUT && !(m->eflags&REG_NOTEOL))) {
-			flagch = (flagch == BOL) ? BOLEOL : EOL;
-			i += m->g->neol;
-		}
-		if (i != 0) {
-			for (; i > 0; i--)
-				st = step(m->g, startst, stopst, st,
-				    flagch, st);
-			SP("boleol", st, c);
-		}
-
-		/* how about a word boundary? */
-		if ((flagch == BOL || (lastc != OUT && !ISWORD(lastc))) &&
-		    (c != OUT && ISWORD(c))) {
-			flagch = BOW;
-		}
-		if ((lastc != OUT && ISWORD(lastc)) &&
-		    (flagch == EOL || (c != OUT && !ISWORD(c)))) {
-			flagch = EOW;
-		}
-		if (flagch == BOW || flagch == EOW) {
-			st = step(m->g, startst, stopst, st, flagch, st);
-			SP("boweow", st, c);
-		}
-
-		/* are we done? */
-		if (ISSET(st, stopst) || p == stop || clen > stop - p)
-			break;		/* NOTE BREAK OUT */
-
-		/* no, we must deal with this character */
-		ASSIGN(tmp, st);
-		ASSIGN(st, fresh);
-		assert(c != OUT);
-		st = step(m->g, startst, stopst, tmp, c, st);
-		SP("aft", st, c);
-		assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
-		p += clen;
-	}
-
-	assert(coldp != NULL);
-	m->coldp = coldp;
-	if (ISSET(st, stopst))
-		return (p+XMBRTOWC(NULL, p, stop - p, &m->mbs, 0));
-	else
-		return (NULL);
-}
-
-/*
- * slow - step through the string more deliberately
- */
-static const char *
-slow(struct match *m, const char *start, const char *stop, sopno startst,
-    sopno stopst)
-{
-	states st = m->st;
 	states empty = m->empty;
 	states tmp = m->tmp;
 	const char *p = start;
@@ -869,6 +764,8 @@ slow(struct match *m, const char *start, const char *stop, sopno startst,
 	SET1(st, startst);
 	SP("sstart", st, *p);
 	st = step(m->g, startst, stopst, st, NOTHING, st);
+	if (fast)
+		ASSIGN(fresh, st);
 	matchp = NULL;
 	if (start == m->offp || (start == m->beginp && !(m->eflags&REG_NOTBOL)))
 		c = OUT;
@@ -889,6 +786,9 @@ slow(struct match *m, const char *start, const char *stop, sopno startst,
 		} else
 			clen = XMBRTOWC(&c, p, m->endp - p, &m->mbs, BADCHAR);
 
+		if (fast && EQ(st, fresh))
+			matchp = p;
+
 		/* is there an EOL and/or BOL between lastc and c? */
 		flagch = '\0';
 		i = 0;
@@ -924,14 +824,21 @@ slow(struct match *m, const char *start, const char *stop, sopno startst,
 		}
 
 		/* are we done? */
-		if (ISSET(st, stopst))
-			matchp = p;
-		if (EQ(st, empty) || p == stop || clen > stop - p)
+		if (ISSET(st, stopst)) {
+			if (fast)
+				break;
+			else
+				matchp = p;
+		}
+		if (EQ(st, empty) || p == stop || clen > (size_t)(stop - p))
 			break;		/* NOTE BREAK OUT */
 
 		/* no, we must deal with this character */
 		ASSIGN(tmp, st);
-		ASSIGN(st, empty);
+		if (fast)
+			ASSIGN(st, fresh);
+		else
+			ASSIGN(st, empty);
 		assert(c != OUT);
 		st = step(m->g, startst, stopst, tmp, c, st);
 		SP("saft", st, c);
@@ -939,10 +846,17 @@ slow(struct match *m, const char *start, const char *stop, sopno startst,
 		p += clen;
 	}
 
-	return (matchp);
+	if (fast) {
+		assert(matchp != NULL);
+		m->coldp = matchp;
+		if (ISSET(st, stopst))
+			return (p + XMBRTOWC(NULL, p, stop - p, &m->mbs, 0));
+		else
+			return (NULL);
+	} else
+		return (matchp);
 }
 
-
 /*
  * step - map set of states reachable before char to set reachable after
  */
@@ -1028,22 +942,22 @@ step(struct re_guts *g,
 			break;
 		case OCH_:		/* mark the first two branches */
 			FWD(aft, aft, 1);
-			assert(OP(g->strip[pc+OPND(s)]) == OOR2);
+			assert(OP(g->strip[pc+OPND(s)]) == (sop)OOR2);
 			FWD(aft, aft, OPND(s));
 			break;
 		case OOR1:		/* done a branch, find the O_CH */
 			if (ISSTATEIN(aft, here)) {
 				for (look = 1;
-				    OP(s = g->strip[pc+look]) != O_CH;
+				    OP(s = g->strip[pc+look]) != (sop)O_CH;
 				    look += OPND(s))
-					assert(OP(s) == OOR2);
+					assert(OP(s) == (sop)OOR2);
 				FWD(aft, aft, look + 1);
 			}
 			break;
 		case OOR2:		/* propagate OCH_'s marking */
 			FWD(aft, aft, 1);
-			if (OP(g->strip[pc+OPND(s)]) != O_CH) {
-				assert(OP(g->strip[pc+OPND(s)]) == OOR2);
+			if (OP(g->strip[pc+OPND(s)]) != (sop)O_CH) {
+				assert(OP(g->strip[pc+OPND(s)]) == (sop)OOR2);
 				FWD(aft, aft, OPND(s));
 			}
 			break;
@@ -1124,8 +1038,7 @@ pchar(int ch)
 #endif
 
 #undef	matcher
-#undef	fast
-#undef	slow
+#undef	walk
 #undef	dissect
 #undef	backref
 #undef	step
diff --git a/usr/src/lib/libc/port/regex/regcomp.c b/usr/src/lib/libc/port/regex/regcomp.c
index 3bd27129a0..8df8c337cd 100644
--- a/usr/src/lib/libc/port/regex/regcomp.c
+++ b/usr/src/lib/libc/port/regex/regcomp.c
@@ -41,8 +41,9 @@
 #include <string.h>
 #include <ctype.h>
 #include <limits.h>
-#include <stdlib.h>
 #include <regex.h>
+#include <stdlib.h>
+#include <stdbool.h>
 #include <wchar.h>
 #include <wctype.h>
 
@@ -56,6 +57,24 @@
 #include "../locale/mblocal.h"
 
 /*
+ * Branching context, used to keep track of branch state for all of the branch-
+ * aware functions. In addition to keeping track of branch positions for the
+ * p_branch_* functions, we use this to simplify some clumsiness in BREs for
+ * detection of whether ^ is acting as an anchor or being used erroneously and
+ * also for whether we're in a sub-expression or not.
+ */
+struct branchc {
+	sopno start;
+	sopno back;
+	sopno fwd;
+
+	int nbranch;
+	int nchain;
+	bool outer;
+	bool terminate;
+};
+
+/*
  * parse structure, passed up and down to avoid global variables and
  * other clumsinesses
  */
@@ -71,6 +90,11 @@ struct parse {
 #define	NPAREN	10		/* we need to remember () 1-9 for back refs */
 	sopno pbegin[NPAREN];	/* -> ( ([0] unused) */
 	sopno pend[NPAREN];	/* -> ) ([0] unused) */
+	bool allowbranch;	/* can this expression branch? */
+	bool bre;		/* convenience; is this a BRE? */
+	bool (*parse_expr)(struct parse *, struct branchc *);
+	void (*pre_parse)(struct parse *, struct branchc *);
+	void (*post_parse)(struct parse *, struct branchc *);
 };
 
 /* ========= begin header generated by ./mkh ========= */
@@ -79,11 +103,17 @@ extern "C" {
 #endif
 
 /* === regcomp.c === */
-static void p_ere(struct parse *p, int stop);
-static void p_ere_exp(struct parse *p);
+static bool p_ere_exp(struct parse *p, struct branchc *bc);
 static void p_str(struct parse *p);
-static void p_bre(struct parse *p, int end1, int end2);
-static int p_simp_re(struct parse *p, int starordinary);
+static int p_branch_eat_delim(struct parse *p, struct branchc *bc);
+static void p_branch_ins_offset(struct parse *p, struct branchc *bc);
+static void p_branch_fix_tail(struct parse *p, struct branchc *bc);
+static bool p_branch_empty(struct parse *p, struct branchc *bc);
+static bool p_branch_do(struct parse *p, struct branchc *bc);
+static void p_bre_pre_parse(struct parse *p, struct branchc *bc);
+static void p_bre_post_parse(struct parse *p, struct branchc *bc);
+static void p_re(struct parse *p, int end1, int end2);
+static bool p_simp_re(struct parse *p, struct branchc *bc);
 static int p_count(struct parse *p);
 static void p_bracket(struct parse *p);
 static void p_b_term(struct parse *p, cset *cs);
@@ -133,6 +163,7 @@ static char nuls[10];		/* place to point scanner in event of error */
 #define	MORE2()	(p->next+1 < p->end)
 #define	SEE(c)	(MORE() && PEEK() == (c))
 #define	SEETWO(a, b)	(MORE() && MORE2() && PEEK() == (a) && PEEK2() == (b))
+#define	SEESPEC(a)	(p->bre ? SEETWO('\\', a) : SEE(a))
 #define	EAT(c)	((SEE(c)) ? (NEXT(), 1) : 0)
 #define	EATTWO(a, b)	((SEETWO(a, b)) ? (NEXT2(), 1) : 0)
 #define	NEXT()	(p->next++)
@@ -229,6 +260,19 @@ regcomp(regex_t *_RESTRICT_KYWD preg, const char *_RESTRICT_KYWD pattern,
 		p->pbegin[i] = 0;
 		p->pend[i] = 0;
 	}
+	if (cflags & REG_EXTENDED) {
+		p->allowbranch = true;
+		p->bre = false;
+		p->parse_expr = p_ere_exp;
+		p->pre_parse = NULL;
+		p->post_parse = NULL;
+	} else {
+		p->allowbranch = false;
+		p->bre = true;
+		p->parse_expr = p_simp_re;
+		p->pre_parse = p_bre_pre_parse;
+		p->post_parse = p_bre_post_parse;
+	}
 	g->sets = NULL;
 	g->ncsets = 0;
 	g->cflags = cflags;
@@ -246,12 +290,10 @@ regcomp(regex_t *_RESTRICT_KYWD preg, const char *_RESTRICT_KYWD pattern,
 	/* do it */
 	EMIT(OEND, 0);
 	g->firststate = THERE();
-	if (cflags&REG_EXTENDED)
-		p_ere(p, OUT);
-	else if (cflags&REG_NOSPEC)
+	if (cflags & REG_NOSPEC)
 		p_str(p);
 	else
-		p_bre(p, OUT, OUT);
+		p_re(p, OUT, OUT);
 	EMIT(OEND, 0);
 	g->laststate = THERE();
 
@@ -288,55 +330,11 @@ regcomp(regex_t *_RESTRICT_KYWD preg, const char *_RESTRICT_KYWD pattern,
 }
 
 /*
- * p_ere - ERE parser top level, concatenation and alternation
- */
-static void
-p_ere(struct parse *p,
-    int stop)		/* character this ERE should end at */
-{
-	char c;
-	sopno prevback;
-	sopno prevfwd;
-	sopno conc;
-	int first = 1;		/* is this the first alternative? */
-
-	for (;;) {
-		/* do a bunch of concatenated expressions */
-		conc = HERE();
-		while (MORE() && (c = PEEK()) != '|' && c != stop)
-			p_ere_exp(p);
-		/* require nonempty */
-		(void) REQUIRE(HERE() != conc, REG_BADPAT);
-
-		if (!EAT('|'))
-			break;		/* NOTE BREAK OUT */
-
-		if (first) {
-			INSERT(OCH_, conc);	/* offset is wrong */
-			prevfwd = conc;
-			prevback = conc;
-			first = 0;
-		}
-		ASTERN(OOR1, prevback);
-		prevback = THERE();
-		AHEAD(prevfwd);			/* fix previous offset */
-		prevfwd = HERE();
-		EMIT(OOR2, 0);			/* offset is very wrong */
-	}
-
-	if (!first) {		/* tail-end fixups */
-		AHEAD(prevfwd);
-		ASTERN(O_CH, prevback);
-	}
-
-	assert(!MORE() || SEE(stop));
-}
-
-/*
- * p_ere_exp - parse one subERE, an atom possibly followed by a repetition op
+ * Parse one subERE, an atom possibly followed by a repetition op,
+ * return whether we should terminate or not.
  */
-static void
-p_ere_exp(struct parse *p)
+static bool
+p_ere_exp(struct parse *p, struct branchc *bc)
 {
 	char c;
 	wint_t wc;
@@ -346,6 +344,7 @@ p_ere_exp(struct parse *p)
 	sopno subno;
 	int wascaret = 0;
 
+	(void) bc;
 	assert(MORE());		/* caller should have ensured this */
 	c = GETNEXT();
 
@@ -359,7 +358,7 @@ p_ere_exp(struct parse *p)
 			p->pbegin[subno] = HERE();
 		EMIT(OLPAREN, subno);
 		if (!SEE(')'))
-			p_ere(p, ')');
+			p_re(p, ')', IGN);
 		if (subno < NPAREN) {
 			p->pend[subno] = HERE();
 			assert(p->pend[subno] != 0);
@@ -425,7 +424,7 @@ p_ere_exp(struct parse *p)
 		break;
 	default:
 		if (p->error != 0)
-			return;
+			return (false);
 		p->next--;
 		wc = WGETNEXT();
 		ordinary(p, wc);
@@ -433,11 +432,11 @@ p_ere_exp(struct parse *p)
 	}
 
 	if (!MORE())
-		return;
+		return (false);
 	c = PEEK();
 	/* we call { a repetition if followed by a digit */
 	if (!(c == '*' || c == '+' || c == '?' || c == '{'))
-		return;		/* no repetition, we're done */
+		return (false);		/* no repetition, we're done */
 	else if (c == '{')
 		(void) REQUIRE(MORE2() && \
 		    (isdigit((uch)PEEK2()) || PEEK2() == ','), REG_BADRPT);
@@ -486,12 +485,13 @@ p_ere_exp(struct parse *p)
 	}
 
 	if (!MORE())
-		return;
+		return (false);
 	c = PEEK();
 	if (!(c == '*' || c == '+' || c == '?' ||
 	    (c == '{' && MORE2() && isdigit((uch)PEEK2()))))
-		return;
+		return (false);
 	SETERROR(REG_BADRPT);
+	return (false);
 }
 
 /*
@@ -506,47 +506,181 @@ p_str(struct parse *p)
 }
 
 /*
- * p_bre - BRE parser top level, anchoring and concatenation
- * Giving end1 as OUT essentially eliminates the end1/end2 check.
- *
- * This implementation is a bit of a kludge, in that a trailing $ is first
- * taken as an ordinary character and then revised to be an anchor.
- * The amount of lookahead needed to avoid this kludge is excessive.
+ * Eat consecutive branch delimiters for the kind of expression that we are
+ * parsing, return the number of delimiters that we ate.
+ */
+static int
+p_branch_eat_delim(struct parse *p, struct branchc *bc)
+{
+	int nskip;
+
+	(void) bc;
+	nskip = 0;
+	while (EAT('|'))
+		++nskip;
+	return (nskip);
+}
+
+/*
+ * Insert necessary branch book-keeping operations. This emits a
+ * bogus 'next' offset, since we still have more to parse
  */
 static void
-p_bre(struct parse *p,
-    int end1,		/* first terminating character */
-    int end2)		/* second terminating character */
+p_branch_ins_offset(struct parse *p, struct branchc *bc)
+{
+	if (bc->nbranch == 0) {
+		INSERT(OCH_, bc->start);	/* offset is wrong */
+		bc->fwd = bc->start;
+		bc->back = bc->start;
+	}
+
+	ASTERN(OOR1, bc->back);
+	bc->back = THERE();
+	AHEAD(bc->fwd);			/* fix previous offset */
+	bc->fwd = HERE();
+	EMIT(OOR2, 0);			/* offset is very wrong */
+	++bc->nbranch;
+}
+
+/*
+ * Fix the offset of the tail branch, if we actually had any branches.
+ * This is to correct the bogus placeholder offset that we use.
+ */
+static void
+p_branch_fix_tail(struct parse *p, struct branchc *bc)
+{
+	/* Fix bogus offset at the tail if we actually have branches */
+	if (bc->nbranch > 0) {
+		AHEAD(bc->fwd);
+		ASTERN(O_CH, bc->back);
+	}
+}
+
+/*
+ * Signal to the parser that an empty branch has been encountered; this will,
+ * in the future, be used to allow for more permissive behavior with empty
+ * branches. The return value should indicate whether parsing may continue
+ * or not.
+ */
+static bool
+p_branch_empty(struct parse *p, struct branchc *bc)
+{
+	(void) bc;
+	SETERROR(REG_BADPAT);
+	return (false);
+}
+
+/*
+ * Take care of any branching requirements. This includes inserting the
+ * appropriate branching instructions as well as eating all of the branch
+ * delimiters until we either run out of pattern or need to parse more pattern.
+ */
+static bool
+p_branch_do(struct parse *p, struct branchc *bc)
 {
-	sopno start = HERE();
-	int first = 1;			/* first subexpression? */
-	int wasdollar = 0;
+	int ate = 0;
+
+	ate = p_branch_eat_delim(p, bc);
+	if (ate == 0)
+		return (false);
+	else if ((ate > 1 || (bc->outer && !MORE())) && !p_branch_empty(p, bc))
+		/*
+		 * Halt parsing only if we have an empty branch and
+		 * p_branch_empty indicates that we must not continue.
+		 * In the future, this will not  necessarily be an error.
+		 */
+		return (false);
+	p_branch_ins_offset(p, bc);
+
+	return (true);
+}
 
+static void
+p_bre_pre_parse(struct parse *p, struct branchc *bc)
+{
+	(void) bc;
+	/*
+	 * Does not move cleanly into expression parser because of
+	 * ordinary interpration of * at the beginning position of
+	 * an expression.
+	 */
 	if (EAT('^')) {
 		EMIT(OBOL, 0);
 		p->g->iflags |= USEBOL;
 		p->g->nbol++;
 	}
-	while (MORE() && !SEETWO(end1, end2)) {
-		wasdollar = p_simp_re(p, first);
-		first = 0;
-	}
-	if (wasdollar) {	/* oops, that was a trailing anchor */
+}
+
+static void
+p_bre_post_parse(struct parse *p, struct branchc *bc)
+{
+	/* Expression is terminating due to EOL token */
+	if (bc->terminate) {
 		DROP(1);
 		EMIT(OEOL, 0);
 		p->g->iflags |= USEEOL;
 		p->g->neol++;
 	}
+}
+
+/*
+ * Top level parser, concatenation and BRE anchoring.
+ * Giving end1 as OUT essentially eliminates the end1/end2 check.
+ *
+ * This implementation is a bit of a kludge, in that a trailing $ is first
+ * taken as an ordinary character and then revised to be an anchor.
+ * The amount of lookahead needed to avoid this kludge is excessive.
+ */
+static void
+p_re(struct parse *p,
+    int end1,	/* first terminating character */
+    int end2)	/* second terminating character; ignored for EREs */
+{
+	struct branchc bc;
 
-	(void) REQUIRE(HERE() != start, REG_BADPAT);	/* require nonempty */
+	bc.nbranch = 0;
+	if (end1 == OUT && end2 == OUT)
+		bc.outer = true;
+	else
+		bc.outer = false;
+#define	SEEEND()	(!p->bre ? SEE(end1) : SEETWO(end1, end2))
+	for (;;) {
+		bc.start = HERE();
+		bc.nchain = 0;
+		bc.terminate = false;
+		if (p->pre_parse != NULL)
+			p->pre_parse(p, &bc);
+		while (MORE() && (!p->allowbranch || !SEESPEC('|')) &&
+		    !SEEEND()) {
+			bc.terminate = p->parse_expr(p, &bc);
+			++bc.nchain;
+		}
+		if (p->post_parse != NULL)
+			p->post_parse(p, &bc);
+		(void) REQUIRE(HERE() != bc.start, REG_BADPAT);
+		if (!p->allowbranch)
+			break;
+		/*
+		 * p_branch_do's return value indicates whether we should
+		 * continue parsing or not. This is both for correctness and
+		 * a slight optimization, because it will check if we've
+		 * encountered an empty branch or the end of the string
+		 * immediately following a branch delimiter.
+		 */
+		if (!p_branch_do(p, &bc))
+			break;
+	}
+#undef SEE_END
+	if (p->allowbranch)
+		p_branch_fix_tail(p, &bc);
+	assert(!MORE() || SEE(end1));
 }
 
 /*
  * p_simp_re - parse a simple RE, an atom possibly followed by a repetition
  */
-static int			/* was the simple RE an unbackslashed $? */
-p_simp_re(struct parse *p,
-    int starordinary)	/* is a leading * an ordinary character? */
+static bool			/* was the simple RE an unbackslashed $? */
+p_simp_re(struct parse *p, struct branchc *bc)
 {
 	int c;
 	int count;
@@ -592,7 +726,7 @@ p_simp_re(struct parse *p,
 		EMIT(OLPAREN, subno);
 		/* the MORE here is an error heuristic */
 		if (MORE() && !SEETWO('\\', ')'))
-			p_bre(p, '\\', ')');
+			p_re(p, '\\', ')');
 		if (subno < NPAREN) {
 			p->pend[subno] = HERE();
 			assert(p->pend[subno] != 0);
@@ -627,11 +761,16 @@ p_simp_re(struct parse *p,
 		p->g->backrefs = 1;
 		break;
 	case '*':
-		(void) REQUIRE(starordinary, REG_BADRPT);
+		/*
+		 * Ordinary if used as the first character beyond BOL anchor of
+		 * a (sub-)expression, counts as a bad repetition operator if it
+		 * appears otherwise.
+		 */
+		(void) REQUIRE(bc->nchain == 0, REG_BADRPT);
 		/* FALLTHROUGH */
 	default:
 		if (p->error != 0)
-			return (0);	/* Definitely not $... */
+			return (false);	/* Definitely not $... */
 		p->next--;
 		wc = WGETNEXT();
 		ordinary(p, wc);
@@ -662,9 +801,9 @@ p_simp_re(struct parse *p,
 			SETERROR(REG_BADBR);
 		}
 	} else if (c == '$')	/* $ (but not \$) ends it */
-		return (1);
+		return (true);
 
-	return (0);
+	return (false);
 }
 
 /*
@@ -893,7 +1032,7 @@ p_b_coll_elem(struct parse *p,
 	}
 	len = p->next - sp;
 	for (cp = cnames; cp->name != NULL; cp++)
-		if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0')
+		if (strncmp(cp->name, sp, len) == 0 && strlen(cp->name) == len)
 			return (cp->code);	/* known name */
 	(void) memset(&mbs, 0, sizeof (mbs));
 	if ((clen = mbrtowc(&wc, sp, len, &mbs)) == len)
@@ -1436,12 +1575,12 @@ findmust(struct parse *p, struct re_guts *g)
 				scan += OPND(s);
 				s = *scan;
 				/* assert() interferes w debug printouts */
-				if (OP(s) != O_QUEST && OP(s) != O_CH &&
-				    OP(s) != OOR2) {
+				if (OP(s) != (sop)O_QUEST &&
+				    OP(s) != (sop)O_CH && OP(s) != (sop)OOR2) {
 					g->iflags |= BAD;
 					return;
 				}
-			} while (OP(s) != O_QUEST && OP(s) != O_CH);
+			} while (OP(s) != (sop)O_QUEST && OP(s) != (sop)O_CH);
 			/* FALLTHROUGH */
 		case OBOW:		/* things that break a sequence */
 		case OEOW:
@@ -1450,7 +1589,7 @@ findmust(struct parse *p, struct re_guts *g)
 		case O_QUEST:
 		case O_CH:
 		case OEND:
-			if (newlen > g->mlen) {		/* ends one */
+			if (newlen > (sopno)g->mlen) {		/* ends one */
 				start = newstart;
 				g->mlen = newlen;
 				if (offset > -1) {
@@ -1465,7 +1604,7 @@ findmust(struct parse *p, struct re_guts *g)
 			newlen = 0;
 			break;
 		case OANY:
-			if (newlen > g->mlen) {		/* ends one */
+			if (newlen > (sopno)g->mlen) {		/* ends one */
 				start = newstart;
 				g->mlen = newlen;
 				if (offset > -1) {
@@ -1483,7 +1622,7 @@ findmust(struct parse *p, struct re_guts *g)
 			break;
 		case OANYOF:		/* may or may not invalidate offset */
 			/* First, everything as OANY */
-			if (newlen > g->mlen) {		/* ends one */
+			if (newlen > (sopno)g->mlen) {		/* ends one */
 				start = newstart;
 				g->mlen = newlen;
 				if (offset > -1) {
@@ -1507,7 +1646,7 @@ findmust(struct parse *p, struct re_guts *g)
 			 * save the last known good offset, in case the
 			 * must sequence doesn't occur later.
 			 */
-			if (newlen > g->mlen) {		/* ends one */
+			if (newlen > (sopno)g->mlen) {		/* ends one */
 				start = newstart;
 				g->mlen = newlen;
 				if (offset > -1)
@@ -1567,7 +1706,7 @@ altoffset(sop *scan, int offset)
 	largest = 0;
 	try = 0;
 	s = *scan++;
-	while (OP(s) != O_QUEST && OP(s) != O_CH) {
+	while (OP(s) != (sop)O_QUEST && OP(s) != (sop)O_CH) {
 		switch (OP(s)) {
 		case OOR1:
 			if (try > largest)
@@ -1583,10 +1722,10 @@ altoffset(sop *scan, int offset)
 			do {
 				scan += OPND(s);
 				s = *scan;
-				if (OP(s) != O_QUEST && OP(s) != O_CH &&
-				    OP(s) != OOR2)
+				if (OP(s) != (sop)O_QUEST &&
+				    OP(s) != (sop)O_CH && OP(s) != (sop)OOR2)
 					return (-1);
-			} while (OP(s) != O_QUEST && OP(s) != O_CH);
+			} while (OP(s) != (sop)O_QUEST && OP(s) != (sop)O_CH);
 			/*
 			 * We must skip to the next position, or we'll
 			 * leave altoffset() too early.
diff --git a/usr/src/lib/libc/port/regex/regex2.h b/usr/src/lib/libc/port/regex/regex2.h
index 551611c610..44b20c3c24 100644
--- a/usr/src/lib/libc/port/regex/regex2.h
+++ b/usr/src/lib/libc/port/regex/regex2.h
@@ -186,4 +186,5 @@ struct re_guts {
 
 /* misc utilities */
 #define	OUT	(CHAR_MIN - 1)	/* a non-character value */
+#define	IGN	(CHAR_MIN - 2)
 #define	ISWORD(c)	(iswalnum((uch)(c)) || (c) == '_')