summaryrefslogtreecommitdiff
path: root/ext/pcre/pcrelib/pcre_compile.c
diff options
context:
space:
mode:
Diffstat (limited to 'ext/pcre/pcrelib/pcre_compile.c')
-rw-r--r--ext/pcre/pcrelib/pcre_compile.c408
1 files changed, 328 insertions, 80 deletions
diff --git a/ext/pcre/pcrelib/pcre_compile.c b/ext/pcre/pcrelib/pcre_compile.c
index c191539c8..77ec51cac 100644
--- a/ext/pcre/pcrelib/pcre_compile.c
+++ b/ext/pcre/pcrelib/pcre_compile.c
@@ -42,11 +42,12 @@ POSSIBILITY OF SUCH DAMAGE.
supporting internal functions that are not used by other modules. */
+#include <config.h>
+
#define NLBLOCK cd /* Block containing newline information */
#define PSSTART start_pattern /* Field containing processed string start */
#define PSEND end_pattern /* Field containing processed string end */
-
#include "pcre_internal.h"
@@ -62,6 +63,13 @@ used by pcretest. DEBUG is not defined when building a production library. */
#define SETBIT(a,b) a[b/8] |= (1 << (b%8))
+/* Maximum length value to check against when making sure that the integer that
+holds the compiled pattern length does not overflow. We make it a bit less than
+INT_MAX to allow for adding in group terminating bytes, so that we don't have
+to check them every time. */
+
+#define OFLOW_MAX (INT_MAX - 20)
+
/*************************************************
* Code parameters and static tables *
@@ -120,7 +128,7 @@ static const short int escapes[] = {
/* B8 */ 0, 0, 0, 0, 0, ']', '=', '-',
/* C0 */ '{',-ESC_A, -ESC_B, -ESC_C, -ESC_D,-ESC_E, 0, -ESC_G,
/* C8 */-ESC_H, 0, 0, 0, 0, 0, 0, 0,
-/* D0 */ '}', 0, 0, 0, 0, 0, 0, -ESC_P,
+/* D0 */ '}', 0, -ESC_K, 0, 0, 0, 0, -ESC_P,
/* D8 */-ESC_Q,-ESC_R, 0, 0, 0, 0, 0, 0,
/* E0 */ '\\', 0, -ESC_S, 0, 0,-ESC_V, -ESC_W, -ESC_X,
/* E8 */ 0,-ESC_Z, 0, 0, 0, 0, 0, 0,
@@ -130,6 +138,27 @@ static const short int escapes[] = {
#endif
+/* Table of special "verbs" like (*PRUNE) */
+
+typedef struct verbitem {
+ const char *name;
+ int len;
+ int op;
+} verbitem;
+
+static verbitem verbs[] = {
+ { "ACCEPT", 6, OP_ACCEPT },
+ { "COMMIT", 6, OP_COMMIT },
+ { "F", 1, OP_FAIL },
+ { "FAIL", 4, OP_FAIL },
+ { "PRUNE", 5, OP_PRUNE },
+ { "SKIP", 4, OP_SKIP },
+ { "THEN", 4, OP_THEN }
+};
+
+static int verbcount = sizeof(verbs)/sizeof(verbitem);
+
+
/* Tables of names of POSIX character classes and their lengths. The list is
terminated by a zero length entry. The first three must be alpha, lower, upper,
as this is assumed for handling case independence. */
@@ -203,7 +232,7 @@ static const char *error_texts[] = {
"missing ) after comment",
"parentheses nested too deeply", /** DEAD **/
/* 20 */
- "regular expression too large",
+ "regular expression is too large",
"failed to get memory",
"unmatched parentheses",
"internal error: code overflow",
@@ -239,7 +268,7 @@ static const char *error_texts[] = {
"subpattern name is too long (maximum " XSTRING(MAX_NAME_SIZE) " characters)",
"too many named subpatterns (maximum " XSTRING(MAX_NAME_COUNT) ")",
/* 50 */
- "repeated subpattern is too long",
+ "repeated subpattern is too long", /** DEAD **/
"octal value is greater than \\377 (not in UTF-8 mode)",
"internal error: overran compiling workspace",
"internal error: previously-checked referenced subpattern not found",
@@ -248,7 +277,11 @@ static const char *error_texts[] = {
"repeating a DEFINE group is not allowed",
"inconsistent NEWLINE options",
"\\g is not followed by a braced name or an optionally braced non-zero number",
- "(?+ or (?- or (?(+ or (?(- must be followed by a non-zero number"
+ "(?+ or (?- or (?(+ or (?(- must be followed by a non-zero number",
+ "(*VERB) with an argument is not supported",
+ /* 60 */
+ "(*VERB) not recognized",
+ "number is too big"
};
@@ -405,7 +438,7 @@ Arguments:
Returns: zero or positive => a data character
negative => a special escape sequence
- on error, errorptr is set
+ on error, errorcodeptr is set
*/
static int
@@ -490,10 +523,16 @@ else
while ((digitab[ptr[1]] & ctype_digit) != 0)
c = c * 10 + *(++ptr) - '0';
+ if (c < 0)
+ {
+ *errorcodeptr = ERR61;
+ break;
+ }
+
if (c == 0 || (braced && *(++ptr) != '}'))
{
*errorcodeptr = ERR57;
- return 0;
+ break;
}
if (negated)
@@ -501,7 +540,7 @@ else
if (c > bracount)
{
*errorcodeptr = ERR15;
- return 0;
+ break;
}
c = bracount - (c - 1);
}
@@ -530,6 +569,11 @@ else
c -= '0';
while ((digitab[ptr[1]] & ctype_digit) != 0)
c = c * 10 + *(++ptr) - '0';
+ if (c < 0)
+ {
+ *errorcodeptr = ERR61;
+ break;
+ }
if (c < 10 || c <= bracount)
{
c = -(ESC_REF + c);
@@ -625,7 +669,7 @@ else
if (c == 0)
{
*errorcodeptr = ERR2;
- return 0;
+ break;
}
#ifndef EBCDIC /* ASCII coding */
@@ -701,7 +745,7 @@ if (c == '{')
*negptr = TRUE;
ptr++;
}
- for (i = 0; i < sizeof(name) - 1; i++)
+ for (i = 0; i < (int)sizeof(name) - 1; i++)
{
c = *(++ptr);
if (c == 0) goto ERROR_RETURN;
@@ -904,6 +948,7 @@ for (; *ptr != 0; ptr++)
{
while (*(++ptr) != ']')
{
+ if (*ptr == 0) return -1;
if (*ptr == '\\')
{
if (*(++ptr) == 0) return -1;
@@ -931,7 +976,7 @@ for (; *ptr != 0; ptr++)
/* An opening parens must now be a real metacharacter */
if (*ptr != '(') continue;
- if (ptr[1] != '?')
+ if (ptr[1] != '?' && ptr[1] != '*')
{
count++;
if (name == NULL && count == lorn) return count;
@@ -1059,7 +1104,6 @@ for (;;)
{
int d;
register int op = *cc;
-
switch (op)
{
case OP_CBRA:
@@ -1148,6 +1192,7 @@ for (;;)
case OP_TYPEEXACT:
branchlength += GET2(cc,1);
+ if (cc[3] == OP_PROP || cc[3] == OP_NOTPROP) cc += 2;
cc += 4;
break;
@@ -1256,13 +1301,42 @@ for (;;)
code += _pcre_OP_lengths[c];
}
- /* In UTF-8 mode, opcodes that are followed by a character may be followed by
- a multi-byte character. The length in the table is a minimum, so we have to
- arrange to skip the extra bytes. */
+ /* Otherwise, we can get the item's length from the table, except that for
+ repeated character types, we have to test for \p and \P, which have an extra
+ two bytes of parameters. */
else
{
+ switch(c)
+ {
+ case OP_TYPESTAR:
+ case OP_TYPEMINSTAR:
+ case OP_TYPEPLUS:
+ case OP_TYPEMINPLUS:
+ case OP_TYPEQUERY:
+ case OP_TYPEMINQUERY:
+ case OP_TYPEPOSSTAR:
+ case OP_TYPEPOSPLUS:
+ case OP_TYPEPOSQUERY:
+ if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2;
+ break;
+
+ case OP_TYPEUPTO:
+ case OP_TYPEMINUPTO:
+ case OP_TYPEEXACT:
+ case OP_TYPEPOSUPTO:
+ if (code[3] == OP_PROP || code[3] == OP_NOTPROP) code += 2;
+ break;
+ }
+
+ /* Add in the fixed length from the table */
+
code += _pcre_OP_lengths[c];
+
+ /* In UTF-8 mode, opcodes that are followed by a character may be followed by
+ a multi-byte character. The length in the table is a minimum, so we have to
+ arrange to skip the extra bytes. */
+
#ifdef SUPPORT_UTF8
if (utf8) switch(c)
{
@@ -1320,14 +1394,42 @@ for (;;)
if (c == OP_XCLASS) code += GET(code, 1);
- /* Otherwise, we get the item's length from the table. In UTF-8 mode, opcodes
- that are followed by a character may be followed by a multi-byte character.
- The length in the table is a minimum, so we have to arrange to skip the extra
- bytes. */
+ /* Otherwise, we can get the item's length from the table, except that for
+ repeated character types, we have to test for \p and \P, which have an extra
+ two bytes of parameters. */
else
{
+ switch(c)
+ {
+ case OP_TYPESTAR:
+ case OP_TYPEMINSTAR:
+ case OP_TYPEPLUS:
+ case OP_TYPEMINPLUS:
+ case OP_TYPEQUERY:
+ case OP_TYPEMINQUERY:
+ case OP_TYPEPOSSTAR:
+ case OP_TYPEPOSPLUS:
+ case OP_TYPEPOSQUERY:
+ if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2;
+ break;
+
+ case OP_TYPEPOSUPTO:
+ case OP_TYPEUPTO:
+ case OP_TYPEMINUPTO:
+ case OP_TYPEEXACT:
+ if (code[3] == OP_PROP || code[3] == OP_NOTPROP) code += 2;
+ break;
+ }
+
+ /* Add in the fixed length from the table */
+
code += _pcre_OP_lengths[c];
+
+ /* In UTF-8 mode, opcodes that are followed by a character may be followed
+ by a multi-byte character. The length in the table is a minimum, so we have
+ to arrange to skip the extra bytes. */
+
#ifdef SUPPORT_UTF8
if (utf8) switch(c)
{
@@ -1399,7 +1501,7 @@ for (code = first_significant_code(code + _pcre_OP_lengths[*code], NULL, 0, TRUE
/* For other groups, scan the branches. */
- if (c == OP_BRA || c == OP_CBRA || c == OP_ONCE)
+ if (c == OP_BRA || c == OP_CBRA || c == OP_ONCE || c == OP_COND)
{
BOOL empty_branch;
if (GET(code, 1) == 0) return TRUE; /* Hit unclosed bracket */
@@ -1423,11 +1525,15 @@ for (code = first_significant_code(code + _pcre_OP_lengths[*code], NULL, 0, TRUE
switch (c)
{
- /* Check for quantifiers after a class */
+ /* Check for quantifiers after a class. XCLASS is used for classes that
+ cannot be represented just by a bit map. This includes negated single
+ high-valued characters. The length in _pcre_OP_lengths[] is zero; the
+ actual length is stored in the compiled code, so we must update "code"
+ here. */
#ifdef SUPPORT_UTF8
case OP_XCLASS:
- ccode = code + GET(code, 1);
+ ccode = code += GET(code, 1);
goto CHECK_CLASS_REPEAT;
#endif
@@ -1489,6 +1595,26 @@ for (code = first_significant_code(code + _pcre_OP_lengths[*code], NULL, 0, TRUE
case OP_TYPEEXACT:
return FALSE;
+ /* These are going to continue, as they may be empty, but we have to
+ fudge the length for the \p and \P cases. */
+
+ case OP_TYPESTAR:
+ case OP_TYPEMINSTAR:
+ case OP_TYPEPOSSTAR:
+ case OP_TYPEQUERY:
+ case OP_TYPEMINQUERY:
+ case OP_TYPEPOSQUERY:
+ if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2;
+ break;
+
+ /* Same for these */
+
+ case OP_TYPEUPTO:
+ case OP_TYPEMINUPTO:
+ case OP_TYPEPOSUPTO:
+ if (code[3] == OP_PROP || code[3] == OP_NOTPROP) code += 2;
+ break;
+
/* End of branch */
case OP_KET:
@@ -1651,6 +1777,7 @@ adjust_recurse(uschar *group, int adjust, BOOL utf8, compile_data *cd,
uschar *save_hwm)
{
uschar *ptr = group;
+
while ((ptr = (uschar *)find_recurse(ptr, utf8)) != NULL)
{
int offset;
@@ -2255,6 +2382,15 @@ for (;; ptr++)
*/
if (code < last_code) code = last_code;
+
+ /* Paranoid check for integer overflow */
+
+ if (OFLOW_MAX - *lengthptr < code - last_code)
+ {
+ *errorcodeptr = ERR20;
+ goto FAILED;
+ }
+
*lengthptr += code - last_code;
DPRINTF(("length=%d added %d c=%c\n", *lengthptr, code - last_code, c));
@@ -2367,6 +2503,11 @@ for (;; ptr++)
*ptrptr = ptr;
if (lengthptr != NULL)
{
+ if (OFLOW_MAX - *lengthptr < code - last_code)
+ {
+ *errorcodeptr = ERR20;
+ goto FAILED;
+ }
*lengthptr += code - last_code; /* To include callout length */
DPRINTF((">> end branch\n"));
}
@@ -2429,16 +2570,23 @@ for (;; ptr++)
goto FAILED;
}
- /* If the first character is '^', set the negation flag and skip it. */
+ /* If the first character is '^', set the negation flag and skip it. Also,
+ if the first few characters (either before or after ^) are \Q\E or \E we
+ skip them too. This makes for compatibility with Perl. */
- if ((c = *(++ptr)) == '^')
+ negate_class = FALSE;
+ for (;;)
{
- negate_class = TRUE;
c = *(++ptr);
- }
- else
- {
- negate_class = FALSE;
+ if (c == '\\')
+ {
+ if (ptr[1] == 'E') ptr++;
+ else if (strncmp((const char *)ptr+1, "Q\\E", 3) == 0) ptr += 3;
+ else break;
+ }
+ else if (!negate_class && c == '^')
+ negate_class = TRUE;
+ else break;
}
/* Keep a count of chars with values < 256 so that we can optimize the case
@@ -2579,7 +2727,7 @@ for (;; ptr++)
of the specials, which just set a flag. The sequence \b is a special
case. Inside a class (and only there) it is treated as backspace.
Elsewhere it marks a word boundary. Other escapes have preset maps ready
- to or into the one we are building. We assume they have more than one
+ to 'or' into the one we are building. We assume they have more than one
character in them, so set class_charcount bigger than one. */
if (c == '\\')
@@ -2599,6 +2747,7 @@ for (;; ptr++)
else inescq = TRUE;
continue;
}
+ else if (-c == ESC_E) continue; /* Ignore orphan \E */
if (c < 0)
{
@@ -3045,12 +3194,26 @@ for (;; ptr++)
goto FAILED;
}
+ /* Remember whether \r or \n are in this class */
+
+ if (negate_class)
+ {
+ if ((classbits[1] & 0x24) != 0x24) cd->external_options |= PCRE_HASCRORLF;
+ }
+ else
+ {
+ if ((classbits[1] & 0x24) != 0) cd->external_options |= PCRE_HASCRORLF;
+ }
+
/* If class_charcount is 1, we saw precisely one character whose value is
- less than 256. In non-UTF-8 mode we can always optimize. In UTF-8 mode, we
- can optimize the negative case only if there were no characters >= 128
- because OP_NOT and the related opcodes like OP_NOTSTAR operate on
- single-bytes only. This is an historical hangover. Maybe one day we can
- tidy these opcodes to handle multi-byte characters.
+ less than 256. As long as there were no characters >= 128 and there was no
+ use of \p or \P, in other words, no use of any XCLASS features, we can
+ optimize.
+
+ In UTF-8 mode, we can optimize the negative case only if there were no
+ characters >= 128 because OP_NOT and the related opcodes like OP_NOTSTAR
+ operate on single-bytes only. This is an historical hangover. Maybe one day
+ we can tidy these opcodes to handle multi-byte characters.
The optimization throws away the bit map. We turn the item into a
1-character OP_CHAR[NC] if it's positive, or OP_NOT if it's negative. Note
@@ -3060,10 +3223,8 @@ for (;; ptr++)
reqbyte, save the previous value for reinstating. */
#ifdef SUPPORT_UTF8
- if (class_charcount == 1 &&
- (!utf8 ||
- (!class_utf8 && (!negate_class || class_lastchar < 128))))
-
+ if (class_charcount == 1 && !class_utf8 &&
+ (!utf8 || !negate_class || class_lastchar < 128))
#else
if (class_charcount == 1)
#endif
@@ -3521,14 +3682,6 @@ for (;; ptr++)
goto FAILED;
}
- /* This is a paranoid check to stop integer overflow later on */
-
- if (len > MAX_DUPLENGTH)
- {
- *errorcodeptr = ERR50;
- goto FAILED;
- }
-
/* If the maximum repeat count is unlimited, find the end of the bracket
by scanning through from the start, and compute the offset back to it
from the current code pointer. There may be an OP_OPT setting following
@@ -3617,10 +3770,21 @@ for (;; ptr++)
if (repeat_min > 1)
{
/* In the pre-compile phase, we don't actually do the replication. We
- just adjust the length as if we had. */
+ just adjust the length as if we had. Do some paranoid checks for
+ potential integer overflow. */
if (lengthptr != NULL)
- *lengthptr += (repeat_min - 1)*length_prevgroup;
+ {
+ int delta = (repeat_min - 1)*length_prevgroup;
+ if ((double)(repeat_min - 1)*(double)length_prevgroup >
+ (double)INT_MAX ||
+ OFLOW_MAX - *lengthptr < delta)
+ {
+ *errorcodeptr = ERR20;
+ goto FAILED;
+ }
+ *lengthptr += delta;
+ }
/* This is compiling for real */
@@ -3658,11 +3822,23 @@ for (;; ptr++)
/* In the pre-compile phase, we don't actually do the replication. We
just adjust the length as if we had. For each repetition we must add 1
to the length for BRAZERO and for all but the last repetition we must
- add 2 + 2*LINKSIZE to allow for the nesting that occurs. */
+ add 2 + 2*LINKSIZE to allow for the nesting that occurs. Do some
+ paranoid checks to avoid integer overflow. */
if (lengthptr != NULL && repeat_max > 0)
- *lengthptr += repeat_max * (length_prevgroup + 1 + 2 + 2*LINK_SIZE) -
- 2 - 2*LINK_SIZE; /* Last one doesn't nest */
+ {
+ int delta = repeat_max * (length_prevgroup + 1 + 2 + 2*LINK_SIZE) -
+ 2 - 2*LINK_SIZE; /* Last one doesn't nest */
+ if ((double)repeat_max *
+ (double)(length_prevgroup + 1 + 2 + 2*LINK_SIZE)
+ > (double)INT_MAX ||
+ OFLOW_MAX - *lengthptr < delta)
+ {
+ *errorcodeptr = ERR20;
+ goto FAILED;
+ }
+ *lengthptr += delta;
+ }
/* This is compiling for real */
@@ -3814,9 +3990,7 @@ for (;; ptr++)
/* ===================================================================*/
/* Start of nested parenthesized sub-expression, or comment or lookahead or
lookbehind or option setting or condition or all the other extended
- parenthesis forms. First deal with the specials; all are introduced by ?,
- and the appearance of any of them means that this is not a capturing
- group. */
+ parenthesis forms. */
case '(':
newoptions = options;
@@ -3825,7 +3999,44 @@ for (;; ptr++)
save_hwm = cd->hwm;
reset_bracount = FALSE;
- if (*(++ptr) == '?')
+ /* First deal with various "verbs" that can be introduced by '*'. */
+
+ if (*(++ptr) == '*' && (cd->ctypes[ptr[1]] & ctype_letter) != 0)
+ {
+ int i, namelen;
+ const uschar *name = ++ptr;
+ previous = NULL;
+ while ((cd->ctypes[*++ptr] & ctype_letter) != 0);
+ if (*ptr == ':')
+ {
+ *errorcodeptr = ERR59; /* Not supported */
+ goto FAILED;
+ }
+ if (*ptr != ')')
+ {
+ *errorcodeptr = ERR60;
+ goto FAILED;
+ }
+ namelen = ptr - name;
+ for (i = 0; i < verbcount; i++)
+ {
+ if (namelen == verbs[i].len &&
+ strncmp((char *)name, verbs[i].name, namelen) == 0)
+ {
+ *code = verbs[i].op;
+ if (*code++ == OP_ACCEPT) cd->had_accept = TRUE;
+ break;
+ }
+ }
+ if (i < verbcount) continue;
+ *errorcodeptr = ERR60;
+ goto FAILED;
+ }
+
+ /* Deal with the extended parentheses; all are introduced by '?', and the
+ appearance of any of them means that this is not a capturing group. */
+
+ else if (*ptr == '?')
{
int i, set, unset, namelen;
int *optset;
@@ -4067,8 +4278,14 @@ for (;; ptr++)
/* ------------------------------------------------------------ */
case '!': /* Negative lookahead */
- bravalue = OP_ASSERT_NOT;
ptr++;
+ if (*ptr == ')') /* Optimize (?!) */
+ {
+ *code++ = OP_FAIL;
+ previous = NULL;
+ continue;
+ }
+ bravalue = OP_ASSERT_NOT;
break;
@@ -4617,23 +4834,29 @@ for (;; ptr++)
goto FAILED;
}
- /* In the pre-compile phase, update the length by the length of the nested
- group, less the brackets at either end. Then reduce the compiled code to
- just the brackets so that it doesn't use much memory if it is duplicated by
- a quantifier. */
+ /* In the pre-compile phase, update the length by the length of the group,
+ less the brackets at either end. Then reduce the compiled code to just a
+ set of non-capturing brackets so that it doesn't use much memory if it is
+ duplicated by a quantifier.*/
if (lengthptr != NULL)
{
+ if (OFLOW_MAX - *lengthptr < length_prevgroup - 2 - 2*LINK_SIZE)
+ {
+ *errorcodeptr = ERR20;
+ goto FAILED;
+ }
*lengthptr += length_prevgroup - 2 - 2*LINK_SIZE;
- code++;
+ *code++ = OP_BRA;
PUTINC(code, 0, 1 + LINK_SIZE);
*code++ = OP_KET;
PUTINC(code, 0, 1 + LINK_SIZE);
+ break; /* No need to waste time with special character handling */
}
/* Otherwise update the main code pointer to the end of the group. */
- else code = tempcode;
+ code = tempcode;
/* For a DEFINE group, required and first character settings are not
relevant. */
@@ -4837,6 +5060,11 @@ for (;; ptr++)
*code++ = ((options & PCRE_CASELESS) != 0)? OP_CHARNC : OP_CHAR;
for (c = 0; c < mclength; c++) *code++ = mcbuffer[c];
+ /* Remember if \r or \n were seen */
+
+ if (mcbuffer[0] == '\r' || mcbuffer[0] == '\n')
+ cd->external_options |= PCRE_HASCRORLF;
+
/* Set the first and required bytes appropriately. If no previous first
byte, set it from this character, but revert to none on a zero repeat.
Otherwise, leave the firstbyte value alone, and don't change it on a zero
@@ -5119,7 +5347,15 @@ for (;;)
*ptrptr = ptr;
*firstbyteptr = firstbyte;
*reqbyteptr = reqbyte;
- if (lengthptr != NULL) *lengthptr += length;
+ if (lengthptr != NULL)
+ {
+ if (OFLOW_MAX - *lengthptr < length)
+ {
+ *errorcodeptr = ERR20;
+ return FALSE;
+ }
+ *lengthptr += length;
+ }
return TRUE;
}
@@ -5428,6 +5664,7 @@ real_pcre *re;
int length = 1; /* For final END opcode */
int firstbyte, reqbyte, newline;
int errorcode = 0;
+int skipatstart = 0;
#ifdef SUPPORT_UTF8
BOOL utf8;
#endif
@@ -5506,13 +5743,33 @@ cd->fcc = tables + fcc_offset;
cd->cbits = tables + cbits_offset;
cd->ctypes = tables + ctypes_offset;
+/* Check for newline settings at the start of the pattern, and remember the
+offset for later. */
+
+if (ptr[0] == '(' && ptr[1] == '*')
+ {
+ int newnl = 0;
+ if (strncmp((char *)(ptr+2), "CR)", 3) == 0)
+ { skipatstart = 5; newnl = PCRE_NEWLINE_CR; }
+ else if (strncmp((char *)(ptr+2), "LF)", 3) == 0)
+ { skipatstart = 5; newnl = PCRE_NEWLINE_LF; }
+ else if (strncmp((char *)(ptr+2), "CRLF)", 5) == 0)
+ { skipatstart = 7; newnl = PCRE_NEWLINE_CR + PCRE_NEWLINE_LF; }
+ else if (strncmp((char *)(ptr+2), "ANY)", 4) == 0)
+ { skipatstart = 6; newnl = PCRE_NEWLINE_ANY; }
+ else if (strncmp((char *)(ptr+2), "ANYCRLF)", 8) == 0)
+ { skipatstart = 10; newnl = PCRE_NEWLINE_ANYCRLF; }
+ if (skipatstart > 0)
+ options = (options & ~PCRE_NEWLINE_BITS) | newnl;
+ }
+
/* Handle different types of newline. The three bits give seven cases. The
current code allows for fixed one- or two-byte sequences, plus "any" and
"anycrlf". */
-switch (options & (PCRE_NEWLINE_CRLF | PCRE_NEWLINE_ANY))
+switch (options & PCRE_NEWLINE_BITS)
{
- case 0: newline = NEWLINE; break; /* Compile-time default */
+ case 0: newline = NEWLINE; break; /* Build-time default */
case PCRE_NEWLINE_CR: newline = '\r'; break;
case PCRE_NEWLINE_LF: newline = '\n'; break;
case PCRE_NEWLINE_CR+
@@ -5584,6 +5841,7 @@ been put into the cd block so that they can be changed if an option setting is
found within the regex right at the beginning. Bringing initial option settings
outside can help speed up starting point checks. */
+ptr += skipatstart;
code = cworkspace;
*code = OP_BRA;
(void)compile_regex(cd->external_options, cd->external_options & PCRE_IMS,
@@ -5647,12 +5905,13 @@ cd->start_code = codestart;
cd->hwm = cworkspace;
cd->req_varyopt = 0;
cd->nopartial = FALSE;
+cd->had_accept = FALSE;
/* Set up a starting, non-extracting bracket, then compile the expression. On
error, errorcode will be set non-zero, so we don't need to look at the result
of the function here. */
-ptr = (const uschar *)pattern;
+ptr = (const uschar *)pattern + skipatstart;
code = (uschar *)codestart;
*code = OP_BRA;
(void)compile_regex(re->options, re->options & PCRE_IMS, &code, &ptr,
@@ -5661,6 +5920,7 @@ re->top_bracket = cd->bracount;
re->top_backref = cd->top_backref;
if (cd->nopartial) re->options |= PCRE_NOPARTIAL;
+if (cd->had_accept) reqbyte = -1; /* Must disable after (*ACCEPT) */
/* If not reached end of pattern on success, there's an excess bracket. */
@@ -5759,19 +6019,7 @@ case when building a production library. */
printf("Length = %d top_bracket = %d top_backref = %d\n",
length, re->top_bracket, re->top_backref);
-if (re->options != 0)
- {
- printf("%s%s%s%s%s%s%s%s%s\n",
- ((re->options & PCRE_NOPARTIAL) != 0)? "nopartial " : "",
- ((re->options & PCRE_ANCHORED) != 0)? "anchored " : "",
- ((re->options & PCRE_CASELESS) != 0)? "caseless " : "",
- ((re->options & PCRE_EXTENDED) != 0)? "extended " : "",
- ((re->options & PCRE_MULTILINE) != 0)? "multiline " : "",
- ((re->options & PCRE_DOTALL) != 0)? "dotall " : "",
- ((re->options & PCRE_DOLLAR_ENDONLY) != 0)? "endonly " : "",
- ((re->options & PCRE_EXTRA) != 0)? "extra " : "",
- ((re->options & PCRE_UNGREEDY) != 0)? "ungreedy " : "");
- }
+printf("Options=%08x\n", re->options);
if ((re->options & PCRE_FIRSTSET) != 0)
{