summaryrefslogtreecommitdiff
path: root/ext/pcre/pcrelib/pcre_exec.c
diff options
context:
space:
mode:
Diffstat (limited to 'ext/pcre/pcrelib/pcre_exec.c')
-rw-r--r--ext/pcre/pcrelib/pcre_exec.c1292
1 files changed, 1022 insertions, 270 deletions
diff --git a/ext/pcre/pcrelib/pcre_exec.c b/ext/pcre/pcrelib/pcre_exec.c
index 073cf2410..ca3079b0a 100644
--- a/ext/pcre/pcrelib/pcre_exec.c
+++ b/ext/pcre/pcrelib/pcre_exec.c
@@ -396,10 +396,32 @@ typedef struct heapframe {
/* This function is called recursively in many circumstances. Whenever it
returns a negative (error) response, the outer incarnation must also return the
-same response.
+same response. */
-Performance note: It might be tempting to extract commonly used fields from the
-md structure (e.g. utf8, end_subject) into individual variables to improve
+/* These macros pack up tests that are used for partial matching, and which
+appears several times in the code. We set the "hit end" flag if the pointer is
+at the end of the subject and also past the start of the subject (i.e.
+something has been matched). For hard partial matching, we then return
+immediately. The second one is used when we already know we are past the end of
+the subject. */
+
+#define CHECK_PARTIAL()\
+ if (md->partial != 0 && eptr >= md->end_subject && eptr > mstart)\
+ {\
+ md->hitend = TRUE;\
+ if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);\
+ }
+
+#define SCHECK_PARTIAL()\
+ if (md->partial != 0 && eptr > mstart)\
+ {\
+ md->hitend = TRUE;\
+ if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);\
+ }
+
+
+/* Performance note: It might be tempting to extract commonly used fields from
+the md structure (e.g. utf8, end_subject) into individual variables to improve
performance. Tests using gcc on a SPARC disproved this; in the first case, it
made performance worse.
@@ -640,14 +662,6 @@ for (;;)
minimize = possessive = FALSE;
op = *ecode;
- /* For partial matching, remember if we ever hit the end of the subject after
- matching at least one subject character. */
-
- if (md->partial &&
- eptr >= md->end_subject &&
- eptr > mstart)
- md->hitend = TRUE;
-
switch(op)
{
case OP_FAIL:
@@ -823,18 +837,139 @@ for (;;)
/* Now see what the actual condition is */
- if (condcode == OP_RREF) /* Recursion test */
+ if (condcode == OP_RREF || condcode == OP_NRREF) /* Recursion test */
{
- offset = GET2(ecode, LINK_SIZE + 2); /* Recursion group number*/
- condition = md->recursive != NULL &&
- (offset == RREF_ANY || offset == md->recursive->group_num);
- ecode += condition? 3 : GET(ecode, 1);
+ if (md->recursive == NULL) /* Not recursing => FALSE */
+ {
+ condition = FALSE;
+ ecode += GET(ecode, 1);
+ }
+ else
+ {
+ int recno = GET2(ecode, LINK_SIZE + 2); /* Recursion group number*/
+ condition = (recno == RREF_ANY || recno == md->recursive->group_num);
+
+ /* If the test is for recursion into a specific subpattern, and it is
+ false, but the test was set up by name, scan the table to see if the
+ name refers to any other numbers, and test them. The condition is true
+ if any one is set. */
+
+ if (!condition && condcode == OP_NRREF && recno != RREF_ANY)
+ {
+ uschar *slotA = md->name_table;
+ for (i = 0; i < md->name_count; i++)
+ {
+ if (GET2(slotA, 0) == recno) break;
+ slotA += md->name_entry_size;
+ }
+
+ /* Found a name for the number - there can be only one; duplicate
+ names for different numbers are allowed, but not vice versa. First
+ scan down for duplicates. */
+
+ if (i < md->name_count)
+ {
+ uschar *slotB = slotA;
+ while (slotB > md->name_table)
+ {
+ slotB -= md->name_entry_size;
+ if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
+ {
+ condition = GET2(slotB, 0) == md->recursive->group_num;
+ if (condition) break;
+ }
+ else break;
+ }
+
+ /* Scan up for duplicates */
+
+ if (!condition)
+ {
+ slotB = slotA;
+ for (i++; i < md->name_count; i++)
+ {
+ slotB += md->name_entry_size;
+ if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
+ {
+ condition = GET2(slotB, 0) == md->recursive->group_num;
+ if (condition) break;
+ }
+ else break;
+ }
+ }
+ }
+ }
+
+ /* Chose branch according to the condition */
+
+ ecode += condition? 3 : GET(ecode, 1);
+ }
}
- else if (condcode == OP_CREF) /* Group used test */
+ else if (condcode == OP_CREF || condcode == OP_NCREF) /* Group used test */
{
offset = GET2(ecode, LINK_SIZE+2) << 1; /* Doubled ref number */
condition = offset < offset_top && md->offset_vector[offset] >= 0;
+
+ /* If the numbered capture is unset, but the reference was by name,
+ scan the table to see if the name refers to any other numbers, and test
+ them. The condition is true if any one is set. This is tediously similar
+ to the code above, but not close enough to try to amalgamate. */
+
+ if (!condition && condcode == OP_NCREF)
+ {
+ int refno = offset >> 1;
+ uschar *slotA = md->name_table;
+
+ for (i = 0; i < md->name_count; i++)
+ {
+ if (GET2(slotA, 0) == refno) break;
+ slotA += md->name_entry_size;
+ }
+
+ /* Found a name for the number - there can be only one; duplicate names
+ for different numbers are allowed, but not vice versa. First scan down
+ for duplicates. */
+
+ if (i < md->name_count)
+ {
+ uschar *slotB = slotA;
+ while (slotB > md->name_table)
+ {
+ slotB -= md->name_entry_size;
+ if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
+ {
+ offset = GET2(slotB, 0) << 1;
+ condition = offset < offset_top &&
+ md->offset_vector[offset] >= 0;
+ if (condition) break;
+ }
+ else break;
+ }
+
+ /* Scan up for duplicates */
+
+ if (!condition)
+ {
+ slotB = slotA;
+ for (i++; i < md->name_count; i++)
+ {
+ slotB += md->name_entry_size;
+ if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
+ {
+ offset = GET2(slotB, 0) << 1;
+ condition = offset < offset_top &&
+ md->offset_vector[offset] >= 0;
+ if (condition) break;
+ }
+ else break;
+ }
+ }
+ }
+ }
+
+ /* Chose branch according to the condition */
+
ecode += condition? 3 : GET(ecode, 1);
}
@@ -895,6 +1030,30 @@ for (;;)
break;
+ /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes,
+ to close any currently open capturing brackets. */
+
+ case OP_CLOSE:
+ number = GET2(ecode, 1);
+ offset = number << 1;
+
+#ifdef DEBUG
+ printf("end bracket %d at *ACCEPT", number);
+ printf("\n");
+#endif
+
+ md->capture_last = number;
+ if (offset >= md->offset_max) md->offset_overflow = TRUE; else
+ {
+ md->offset_vector[offset] =
+ md->offset_vector[md->offset_end - number];
+ md->offset_vector[offset+1] = eptr - md->start_subject;
+ if (offset_top <= offset) offset_top = offset + 2;
+ }
+ ecode += 3;
+ break;
+
+
/* End of the pattern, either real or forced. If we are in a top-level
recursion, we should restore the offsets appropriately and continue from
after the call. */
@@ -908,16 +1067,26 @@ for (;;)
md->recursive = rec->prevrec;
memmove(md->offset_vector, rec->offset_save,
rec->saved_max * sizeof(int));
+ offset_top = rec->save_offset_top;
mstart = rec->save_start;
ims = original_ims;
ecode = rec->after_call;
break;
}
- /* Otherwise, if PCRE_NOTEMPTY is set, fail if we have matched an empty
- string - backtracking will then try other alternatives, if any. */
+ /* Otherwise, if we have matched an empty string, fail if PCRE_NOTEMPTY is
+ set, or if PCRE_NOTEMPTY_ATSTART is set and we have matched at the start of
+ the subject. In both cases, backtracking will then try other alternatives,
+ if any. */
+
+ if (eptr == mstart &&
+ (md->notempty ||
+ (md->notempty_atstart &&
+ mstart == md->start_subject + md->start_offset)))
+ RRETURN(MATCH_NOMATCH);
+
+ /* Otherwise, we have a match. */
- if (md->notempty && eptr == mstart) RRETURN(MATCH_NOMATCH);
md->end_match_ptr = eptr; /* Record where we ended */
md->end_offset_top = offset_top; /* and how many extracts were taken */
md->start_match_ptr = mstart; /* and the start (\K can modify) */
@@ -1008,8 +1177,9 @@ for (;;)
if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
}
- /* Skip to next op code */
+ /* Save the earliest consulted character, then skip to next op code */
+ if (eptr < md->start_used_ptr) md->start_used_ptr = eptr;
ecode += 1 + LINK_SIZE;
break;
@@ -1089,6 +1259,7 @@ for (;;)
memcpy(new_recursive.offset_save, md->offset_vector,
new_recursive.saved_max * sizeof(int));
new_recursive.save_start = mstart;
+ new_recursive.save_offset_top = offset_top;
mstart = eptr;
/* OK, now we can do the recursion. For each top-level alternative we
@@ -1313,6 +1484,7 @@ for (;;)
mstart = rec->save_start;
memcpy(md->offset_vector, rec->offset_save,
rec->saved_max * sizeof(int));
+ offset_top = rec->save_offset_top;
ecode = rec->after_call;
ims = original_ims;
break;
@@ -1452,7 +1624,8 @@ for (;;)
/* Find out if the previous and current characters are "word" characters.
It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
- be "non-word" characters. */
+ be "non-word" characters. Remember the earliest consulted character for
+ partial matching. */
#ifdef SUPPORT_UTF8
if (utf8)
@@ -1461,10 +1634,16 @@ for (;;)
{
USPTR lastptr = eptr - 1;
while((*lastptr & 0xc0) == 0x80) lastptr--;
+ if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
GETCHAR(c, lastptr);
prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
}
- if (eptr >= md->end_subject) cur_is_word = FALSE; else
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ cur_is_word = FALSE;
+ }
+ else
{
GETCHAR(c, eptr);
cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
@@ -1473,13 +1652,20 @@ for (;;)
else
#endif
- /* More streamlined when not in UTF-8 mode */
+ /* Not in UTF-8 mode */
{
- prev_is_word = (eptr != md->start_subject) &&
- ((md->ctypes[eptr[-1]] & ctype_word) != 0);
- cur_is_word = (eptr < md->end_subject) &&
- ((md->ctypes[*eptr] & ctype_word) != 0);
+ if (eptr == md->start_subject) prev_is_word = FALSE; else
+ {
+ if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1;
+ prev_is_word = ((md->ctypes[eptr[-1]] & ctype_word) != 0);
+ }
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ cur_is_word = FALSE;
+ }
+ else cur_is_word = ((md->ctypes[*eptr] & ctype_word) != 0);
}
/* Now see if the situation is what we want */
@@ -1497,7 +1683,11 @@ for (;;)
/* Fall through */
case OP_ALLANY:
- if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);
+ if (eptr++ >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
+ }
if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
ecode++;
break;
@@ -1506,12 +1696,20 @@ for (;;)
any byte, even newline, independent of the setting of PCRE_DOTALL. */
case OP_ANYBYTE:
- if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);
+ if (eptr++ >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
+ }
ecode++;
break;
case OP_NOT_DIGIT:
- if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
+ }
GETCHARINCTEST(c, eptr);
if (
#ifdef SUPPORT_UTF8
@@ -1524,7 +1722,11 @@ for (;;)
break;
case OP_DIGIT:
- if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
+ }
GETCHARINCTEST(c, eptr);
if (
#ifdef SUPPORT_UTF8
@@ -1537,7 +1739,11 @@ for (;;)
break;
case OP_NOT_WHITESPACE:
- if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
+ }
GETCHARINCTEST(c, eptr);
if (
#ifdef SUPPORT_UTF8
@@ -1550,7 +1756,11 @@ for (;;)
break;
case OP_WHITESPACE:
- if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
+ }
GETCHARINCTEST(c, eptr);
if (
#ifdef SUPPORT_UTF8
@@ -1563,7 +1773,11 @@ for (;;)
break;
case OP_NOT_WORDCHAR:
- if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
+ }
GETCHARINCTEST(c, eptr);
if (
#ifdef SUPPORT_UTF8
@@ -1576,7 +1790,11 @@ for (;;)
break;
case OP_WORDCHAR:
- if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
+ }
GETCHARINCTEST(c, eptr);
if (
#ifdef SUPPORT_UTF8
@@ -1589,7 +1807,11 @@ for (;;)
break;
case OP_ANYNL:
- if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
+ }
GETCHARINCTEST(c, eptr);
switch(c)
{
@@ -1613,7 +1835,11 @@ for (;;)
break;
case OP_NOT_HSPACE:
- if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
+ }
GETCHARINCTEST(c, eptr);
switch(c)
{
@@ -1643,7 +1869,11 @@ for (;;)
break;
case OP_HSPACE:
- if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
+ }
GETCHARINCTEST(c, eptr);
switch(c)
{
@@ -1673,7 +1903,11 @@ for (;;)
break;
case OP_NOT_VSPACE:
- if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
+ }
GETCHARINCTEST(c, eptr);
switch(c)
{
@@ -1691,7 +1925,11 @@ for (;;)
break;
case OP_VSPACE:
- if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
+ }
GETCHARINCTEST(c, eptr);
switch(c)
{
@@ -1714,7 +1952,11 @@ for (;;)
case OP_PROP:
case OP_NOTPROP:
- if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
+ }
GETCHARINCTEST(c, eptr);
{
const ucd_record *prop = GET_UCD(c);
@@ -1759,7 +2001,11 @@ for (;;)
is in the binary; otherwise a compile-time error occurs. */
case OP_EXTUNI:
- if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
+ }
GETCHARINCTEST(c, eptr);
{
int category = UCD_CATEGORY(c);
@@ -1839,7 +2085,11 @@ for (;;)
break;
default: /* No repeat follows */
- if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);
+ if (!match_ref(offset, eptr, length, md, ims))
+ {
+ CHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
+ }
eptr += length;
continue; /* With the main loop */
}
@@ -1855,7 +2105,11 @@ for (;;)
for (i = 1; i <= min; i++)
{
- if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);
+ if (!match_ref(offset, eptr, length, md, ims))
+ {
+ CHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
+ }
eptr += length;
}
@@ -1872,8 +2126,12 @@ for (;;)
{
RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- if (fi >= max || !match_ref(offset, eptr, length, md, ims))
+ if (fi >= max) RRETURN(MATCH_NOMATCH);
+ if (!match_ref(offset, eptr, length, md, ims))
+ {
+ CHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
+ }
eptr += length;
}
/* Control never gets here */
@@ -1886,7 +2144,11 @@ for (;;)
pp = eptr;
for (i = min; i < max; i++)
{
- if (!match_ref(offset, eptr, length, md, ims)) break;
+ if (!match_ref(offset, eptr, length, md, ims))
+ {
+ CHECK_PARTIAL();
+ break;
+ }
eptr += length;
}
while (eptr >= pp)
@@ -1900,8 +2162,6 @@ for (;;)
}
/* Control never gets here */
-
-
/* Match a bit-mapped character class, possibly repeatedly. This op code is
used when all the characters in the class have values in the range 0-255,
and either the matching is caseful, or the characters are in the range
@@ -1956,7 +2216,11 @@ for (;;)
{
for (i = 1; i <= min; i++)
{
- if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
+ }
GETCHARINC(c, eptr);
if (c > 255)
{
@@ -1974,7 +2238,11 @@ for (;;)
{
for (i = 1; i <= min; i++)
{
- if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
+ }
c = *eptr++;
if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
}
@@ -1998,7 +2266,12 @@ for (;;)
{
RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
+ if (fi >= max) RRETURN(MATCH_NOMATCH);
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
+ }
GETCHARINC(c, eptr);
if (c > 255)
{
@@ -2018,7 +2291,12 @@ for (;;)
{
RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
+ if (fi >= max) RRETURN(MATCH_NOMATCH);
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
+ }
c = *eptr++;
if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
}
@@ -2039,7 +2317,11 @@ for (;;)
for (i = min; i < max; i++)
{
int len = 1;
- if (eptr >= md->end_subject) break;
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ break;
+ }
GETCHARLEN(c, eptr, len);
if (c > 255)
{
@@ -2065,7 +2347,11 @@ for (;;)
{
for (i = min; i < max; i++)
{
- if (eptr >= md->end_subject) break;
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ break;
+ }
c = *eptr;
if ((data[c/8] & (1 << (c&7))) == 0) break;
eptr++;
@@ -2127,7 +2413,11 @@ for (;;)
for (i = 1; i <= min; i++)
{
- if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
+ }
GETCHARINCTEST(c, eptr);
if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
}
@@ -2146,7 +2436,12 @@ for (;;)
{
RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
+ if (fi >= max) RRETURN(MATCH_NOMATCH);
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
+ }
GETCHARINCTEST(c, eptr);
if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
}
@@ -2161,7 +2456,11 @@ for (;;)
for (i = min; i < max; i++)
{
int len = 1;
- if (eptr >= md->end_subject) break;
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ break;
+ }
GETCHARLENTEST(c, eptr, len);
if (!_pcre_xclass(c, data)) break;
eptr += len;
@@ -2189,7 +2488,11 @@ for (;;)
length = 1;
ecode++;
GETCHARLEN(fc, ecode, length);
- if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
+ if (length > md->end_subject - eptr)
+ {
+ CHECK_PARTIAL(); /* Not SCHECK_PARTIAL() */
+ RRETURN(MATCH_NOMATCH);
+ }
while (length-- > 0) if (*ecode++ != *eptr++) RRETURN(MATCH_NOMATCH);
}
else
@@ -2197,7 +2500,11 @@ for (;;)
/* Non-UTF-8 mode */
{
- if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);
+ if (md->end_subject - eptr < 1)
+ {
+ SCHECK_PARTIAL(); /* This one can use SCHECK_PARTIAL() */
+ RRETURN(MATCH_NOMATCH);
+ }
if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);
ecode += 2;
}
@@ -2213,7 +2520,11 @@ for (;;)
ecode++;
GETCHARLEN(fc, ecode, length);
- if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
+ if (length > md->end_subject - eptr)
+ {
+ CHECK_PARTIAL(); /* Not SCHECK_PARTIAL() */
+ RRETURN(MATCH_NOMATCH);
+ }
/* If the pattern character's value is < 128, we have only one byte, and
can use the fast lookup table. */
@@ -2248,7 +2559,11 @@ for (;;)
/* Non-UTF-8 mode */
{
- if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);
+ if (md->end_subject - eptr < 1)
+ {
+ SCHECK_PARTIAL(); /* This one can use SCHECK_PARTIAL() */
+ RRETURN(MATCH_NOMATCH);
+ }
if (md->lcc[ecode[1]] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
ecode += 2;
}
@@ -2302,13 +2617,12 @@ for (;;)
case OP_MINQUERY:
c = *ecode++ - OP_STAR;
minimize = (c & 1) != 0;
+
min = rep_min[c]; /* Pick up values from tables; */
max = rep_max[c]; /* zero for max => infinity */
if (max == 0) max = INT_MAX;
- /* Common code for all repeated single-character matches. We can give
- up quickly if there are fewer than the minimum number of characters left in
- the subject. */
+ /* Common code for all repeated single-character matches. */
REPEATCHAR:
#ifdef SUPPORT_UTF8
@@ -2317,7 +2631,6 @@ for (;;)
length = 1;
charptr = ecode;
GETCHARLEN(fc, ecode, length);
- if (min * length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
ecode += length;
/* Handle multibyte character matching specially here. There is
@@ -2335,18 +2648,18 @@ for (;;)
for (i = 1; i <= min; i++)
{
- if (memcmp(eptr, charptr, length) == 0) eptr += length;
+ if (eptr <= md->end_subject - length &&
+ memcmp(eptr, charptr, length) == 0) eptr += length;
#ifdef SUPPORT_UCP
- /* Need braces because of following else */
- else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }
+ else if (oclength > 0 &&
+ eptr <= md->end_subject - oclength &&
+ memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
+#endif /* SUPPORT_UCP */
else
{
- if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);
- eptr += oclength;
+ CHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
}
-#else /* without SUPPORT_UCP */
- else { RRETURN(MATCH_NOMATCH); }
-#endif /* SUPPORT_UCP */
}
if (min == max) continue;
@@ -2357,19 +2670,19 @@ for (;;)
{
RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
- if (memcmp(eptr, charptr, length) == 0) eptr += length;
+ if (fi >= max) RRETURN(MATCH_NOMATCH);
+ if (eptr <= md->end_subject - length &&
+ memcmp(eptr, charptr, length) == 0) eptr += length;
#ifdef SUPPORT_UCP
- /* Need braces because of following else */
- else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }
+ else if (oclength > 0 &&
+ eptr <= md->end_subject - oclength &&
+ memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
+#endif /* SUPPORT_UCP */
else
{
- if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);
- eptr += oclength;
+ CHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
}
-#else /* without SUPPORT_UCP */
- else { RRETURN (MATCH_NOMATCH); }
-#endif /* SUPPORT_UCP */
}
/* Control never gets here */
}
@@ -2379,33 +2692,34 @@ for (;;)
pp = eptr;
for (i = min; i < max; i++)
{
- if (eptr > md->end_subject - length) break;
- if (memcmp(eptr, charptr, length) == 0) eptr += length;
+ if (eptr <= md->end_subject - length &&
+ memcmp(eptr, charptr, length) == 0) eptr += length;
#ifdef SUPPORT_UCP
- else if (oclength == 0) break;
+ else if (oclength > 0 &&
+ eptr <= md->end_subject - oclength &&
+ memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
+#endif /* SUPPORT_UCP */
else
{
- if (memcmp(eptr, occhars, oclength) != 0) break;
- eptr += oclength;
+ CHECK_PARTIAL();
+ break;
}
-#else /* without SUPPORT_UCP */
- else break;
-#endif /* SUPPORT_UCP */
}
if (possessive) continue;
+
for(;;)
- {
- RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);
- if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- if (eptr == pp) RRETURN(MATCH_NOMATCH);
+ {
+ RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);
+ if (rrc != MATCH_NOMATCH) RRETURN(rrc);
+ if (eptr == pp) { RRETURN(MATCH_NOMATCH); }
#ifdef SUPPORT_UCP
- eptr--;
- BACKCHAR(eptr);
+ eptr--;
+ BACKCHAR(eptr);
#else /* without SUPPORT_UCP */
- eptr -= length;
+ eptr -= length;
#endif /* SUPPORT_UCP */
- }
+ }
}
/* Control never gets here */
}
@@ -2418,10 +2732,8 @@ for (;;)
#endif /* SUPPORT_UTF8 */
/* When not in UTF-8 mode, load a single-byte character. */
- {
- if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
- fc = *ecode++;
- }
+
+ fc = *ecode++;
/* The value of fc at this point is always less than 256, though we may or
may not be in UTF-8 mode. The code is duplicated for the caseless and
@@ -2439,7 +2751,14 @@ for (;;)
{
fc = md->lcc[fc];
for (i = 1; i <= min; i++)
+ {
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
+ }
if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
+ }
if (min == max) continue;
if (minimize)
{
@@ -2447,9 +2766,13 @@ for (;;)
{
RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- if (fi >= max || eptr >= md->end_subject ||
- fc != md->lcc[*eptr++])
+ if (fi >= max) RRETURN(MATCH_NOMATCH);
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
+ }
+ if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
}
/* Control never gets here */
}
@@ -2458,10 +2781,17 @@ for (;;)
pp = eptr;
for (i = min; i < max; i++)
{
- if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break;
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ break;
+ }
+ if (fc != md->lcc[*eptr]) break;
eptr++;
}
+
if (possessive) continue;
+
while (eptr >= pp)
{
RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM25);
@@ -2477,16 +2807,31 @@ for (;;)
else
{
- for (i = 1; i <= min; i++) if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
+ for (i = 1; i <= min; i++)
+ {
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
+ }
+ if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
+ }
+
if (min == max) continue;
+
if (minimize)
{
for (fi = min;; fi++)
{
RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- if (fi >= max || eptr >= md->end_subject || fc != *eptr++)
+ if (fi >= max) RRETURN(MATCH_NOMATCH);
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
+ }
+ if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
}
/* Control never gets here */
}
@@ -2495,10 +2840,16 @@ for (;;)
pp = eptr;
for (i = min; i < max; i++)
{
- if (eptr >= md->end_subject || fc != *eptr) break;
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ break;
+ }
+ if (fc != *eptr) break;
eptr++;
}
if (possessive) continue;
+
while (eptr >= pp)
{
RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27);
@@ -2514,7 +2865,11 @@ for (;;)
checking can be multibyte. */
case OP_NOT:
- if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
+ }
ecode++;
GETCHARINCTEST(c, eptr);
if ((ims & PCRE_CASELESS) != 0)
@@ -2591,12 +2946,9 @@ for (;;)
max = rep_max[c]; /* zero for max => infinity */
if (max == 0) max = INT_MAX;
- /* Common code for all repeated single-byte matches. We can give up quickly
- if there are fewer than the minimum number of bytes left in the
- subject. */
+ /* Common code for all repeated single-byte matches. */
REPEATNOTCHAR:
- if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
fc = *ecode++;
/* The code is duplicated for the caseless and caseful cases, for speed,
@@ -2621,6 +2973,11 @@ for (;;)
register unsigned int d;
for (i = 1; i <= min; i++)
{
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
+ }
GETCHARINC(d, eptr);
if (d < 256) d = md->lcc[d];
if (fc == d) RRETURN(MATCH_NOMATCH);
@@ -2632,7 +2989,14 @@ for (;;)
/* Not UTF-8 mode */
{
for (i = 1; i <= min; i++)
+ {
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
+ }
if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
+ }
}
if (min == max) continue;
@@ -2648,11 +3012,15 @@ for (;;)
{
RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
+ if (fi >= max) RRETURN(MATCH_NOMATCH);
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
+ }
GETCHARINC(d, eptr);
if (d < 256) d = md->lcc[d];
if (fc == d) RRETURN(MATCH_NOMATCH);
-
}
}
else
@@ -2663,8 +3031,13 @@ for (;;)
{
RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- if (fi >= max || eptr >= md->end_subject || fc == md->lcc[*eptr++])
+ if (fi >= max) RRETURN(MATCH_NOMATCH);
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
+ }
+ if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
}
}
/* Control never gets here */
@@ -2684,7 +3057,11 @@ for (;;)
for (i = min; i < max; i++)
{
int len = 1;
- if (eptr >= md->end_subject) break;
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ break;
+ }
GETCHARLEN(d, eptr, len);
if (d < 256) d = md->lcc[d];
if (fc == d) break;
@@ -2705,7 +3082,12 @@ for (;;)
{
for (i = min; i < max; i++)
{
- if (eptr >= md->end_subject || fc == md->lcc[*eptr]) break;
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ break;
+ }
+ if (fc == md->lcc[*eptr]) break;
eptr++;
}
if (possessive) continue;
@@ -2733,6 +3115,11 @@ for (;;)
register unsigned int d;
for (i = 1; i <= min; i++)
{
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
+ }
GETCHARINC(d, eptr);
if (fc == d) RRETURN(MATCH_NOMATCH);
}
@@ -2742,7 +3129,14 @@ for (;;)
/* Not UTF-8 mode */
{
for (i = 1; i <= min; i++)
+ {
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
+ }
if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
+ }
}
if (min == max) continue;
@@ -2758,7 +3152,12 @@ for (;;)
{
RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
+ if (fi >= max) RRETURN(MATCH_NOMATCH);
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
+ }
GETCHARINC(d, eptr);
if (fc == d) RRETURN(MATCH_NOMATCH);
}
@@ -2771,8 +3170,13 @@ for (;;)
{
RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- if (fi >= max || eptr >= md->end_subject || fc == *eptr++)
+ if (fi >= max) RRETURN(MATCH_NOMATCH);
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
+ }
+ if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
}
}
/* Control never gets here */
@@ -2792,7 +3196,11 @@ for (;;)
for (i = min; i < max; i++)
{
int len = 1;
- if (eptr >= md->end_subject) break;
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ break;
+ }
GETCHARLEN(d, eptr, len);
if (fc == d) break;
eptr += len;
@@ -2812,7 +3220,12 @@ for (;;)
{
for (i = min; i < max; i++)
{
- if (eptr >= md->end_subject || fc == *eptr) break;
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ break;
+ }
+ if (fc == *eptr) break;
eptr++;
}
if (possessive) continue;
@@ -2906,13 +3319,10 @@ for (;;)
/* First, ensure the minimum number of matches are present. Use inline
code for maximizing the speed, and do the type test once at the start
- (i.e. keep it out of the loop). Also we can test that there are at least
- the minimum number of bytes before we start. This isn't as effective in
- UTF-8 mode, but it does no harm. Separate the UTF-8 code completely as that
+ (i.e. keep it out of the loop). Separate the UTF-8 code completely as that
is tidier. Also separate the UCP code, which can be the same for both UTF-8
and single-bytes. */
- if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
if (min > 0)
{
#ifdef SUPPORT_UCP
@@ -2924,7 +3334,11 @@ for (;;)
if (prop_fail_result) RRETURN(MATCH_NOMATCH);
for (i = 1; i <= min; i++)
{
- if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
+ }
GETCHARINCTEST(c, eptr);
}
break;
@@ -2932,7 +3346,11 @@ for (;;)
case PT_LAMP:
for (i = 1; i <= min; i++)
{
- if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
+ }
GETCHARINCTEST(c, eptr);
prop_chartype = UCD_CHARTYPE(c);
if ((prop_chartype == ucp_Lu ||
@@ -2945,7 +3363,11 @@ for (;;)
case PT_GC:
for (i = 1; i <= min; i++)
{
- if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
+ }
GETCHARINCTEST(c, eptr);
prop_category = UCD_CATEGORY(c);
if ((prop_category == prop_value) == prop_fail_result)
@@ -2956,7 +3378,11 @@ for (;;)
case PT_PC:
for (i = 1; i <= min; i++)
{
- if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
+ }
GETCHARINCTEST(c, eptr);
prop_chartype = UCD_CHARTYPE(c);
if ((prop_chartype == prop_value) == prop_fail_result)
@@ -2967,7 +3393,11 @@ for (;;)
case PT_SC:
for (i = 1; i <= min; i++)
{
- if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
+ }
GETCHARINCTEST(c, eptr);
prop_script = UCD_SCRIPT(c);
if ((prop_script == prop_value) == prop_fail_result)
@@ -2987,16 +3417,19 @@ for (;;)
{
for (i = 1; i <= min; i++)
{
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
+ }
GETCHARINCTEST(c, eptr);
prop_category = UCD_CATEGORY(c);
if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
while (eptr < md->end_subject)
{
int len = 1;
- if (!utf8) c = *eptr; else
- {
- GETCHARLEN(c, eptr, len);
- }
+ if (!utf8) c = *eptr;
+ else { GETCHARLEN(c, eptr, len); }
prop_category = UCD_CATEGORY(c);
if (prop_category != ucp_M) break;
eptr += len;
@@ -3015,8 +3448,12 @@ for (;;)
case OP_ANY:
for (i = 1; i <= min; i++)
{
- if (eptr >= md->end_subject || IS_NEWLINE(eptr))
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
+ }
+ if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
eptr++;
while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
}
@@ -3025,20 +3462,29 @@ for (;;)
case OP_ALLANY:
for (i = 1; i <= min; i++)
{
- if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
+ }
eptr++;
while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
}
break;
case OP_ANYBYTE:
+ if (eptr > md->end_subject - min) RRETURN(MATCH_NOMATCH);
eptr += min;
break;
case OP_ANYNL:
for (i = 1; i <= min; i++)
{
- if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
+ }
GETCHARINC(c, eptr);
switch(c)
{
@@ -3064,7 +3510,11 @@ for (;;)
case OP_NOT_HSPACE:
for (i = 1; i <= min; i++)
{
- if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
+ }
GETCHARINC(c, eptr);
switch(c)
{
@@ -3096,7 +3546,11 @@ for (;;)
case OP_HSPACE:
for (i = 1; i <= min; i++)
{
- if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
+ }
GETCHARINC(c, eptr);
switch(c)
{
@@ -3128,7 +3582,11 @@ for (;;)
case OP_NOT_VSPACE:
for (i = 1; i <= min; i++)
{
- if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
+ }
GETCHARINC(c, eptr);
switch(c)
{
@@ -3148,7 +3606,11 @@ for (;;)
case OP_VSPACE:
for (i = 1; i <= min; i++)
{
- if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
+ }
GETCHARINC(c, eptr);
switch(c)
{
@@ -3168,7 +3630,11 @@ for (;;)
case OP_NOT_DIGIT:
for (i = 1; i <= min; i++)
{
- if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
+ }
GETCHARINC(c, eptr);
if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
RRETURN(MATCH_NOMATCH);
@@ -3178,8 +3644,12 @@ for (;;)
case OP_DIGIT:
for (i = 1; i <= min; i++)
{
- if (eptr >= md->end_subject ||
- *eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
+ }
+ if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)
RRETURN(MATCH_NOMATCH);
/* No need to skip more bytes - we know it's a 1-byte character */
}
@@ -3188,8 +3658,12 @@ for (;;)
case OP_NOT_WHITESPACE:
for (i = 1; i <= min; i++)
{
- if (eptr >= md->end_subject ||
- (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0))
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
+ }
+ if (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0)
RRETURN(MATCH_NOMATCH);
while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
}
@@ -3198,8 +3672,12 @@ for (;;)
case OP_WHITESPACE:
for (i = 1; i <= min; i++)
{
- if (eptr >= md->end_subject ||
- *eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
+ }
+ if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)
RRETURN(MATCH_NOMATCH);
/* No need to skip more bytes - we know it's a 1-byte character */
}
@@ -3218,8 +3696,12 @@ for (;;)
case OP_WORDCHAR:
for (i = 1; i <= min; i++)
{
- if (eptr >= md->end_subject ||
- *eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
+ }
+ if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)
RRETURN(MATCH_NOMATCH);
/* No need to skip more bytes - we know it's a 1-byte character */
}
@@ -3233,34 +3715,49 @@ for (;;)
#endif /* SUPPORT_UTF8 */
/* Code for the non-UTF-8 case for minimum matching of operators other
- than OP_PROP and OP_NOTPROP. We can assume that there are the minimum
- number of bytes present, as this was tested above. */
+ than OP_PROP and OP_NOTPROP. */
switch(ctype)
{
case OP_ANY:
for (i = 1; i <= min; i++)
{
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
+ }
if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
eptr++;
}
break;
case OP_ALLANY:
+ if (eptr > md->end_subject - min)
+ {
+ SCHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
+ }
eptr += min;
break;
case OP_ANYBYTE:
+ if (eptr > md->end_subject - min)
+ {
+ SCHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
+ }
eptr += min;
break;
- /* Because of the CRLF case, we can't assume the minimum number of
- bytes are present in this case. */
-
case OP_ANYNL:
for (i = 1; i <= min; i++)
{
- if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
+ }
switch(*eptr++)
{
default: RRETURN(MATCH_NOMATCH);
@@ -3282,7 +3779,11 @@ for (;;)
case OP_NOT_HSPACE:
for (i = 1; i <= min; i++)
{
- if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
+ }
switch(*eptr++)
{
default: break;
@@ -3297,7 +3798,11 @@ for (;;)
case OP_HSPACE:
for (i = 1; i <= min; i++)
{
- if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
+ }
switch(*eptr++)
{
default: RRETURN(MATCH_NOMATCH);
@@ -3312,7 +3817,11 @@ for (;;)
case OP_NOT_VSPACE:
for (i = 1; i <= min; i++)
{
- if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
+ }
switch(*eptr++)
{
default: break;
@@ -3329,7 +3838,11 @@ for (;;)
case OP_VSPACE:
for (i = 1; i <= min; i++)
{
- if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
+ }
switch(*eptr++)
{
default: RRETURN(MATCH_NOMATCH);
@@ -3345,34 +3858,76 @@ for (;;)
case OP_NOT_DIGIT:
for (i = 1; i <= min; i++)
+ {
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
+ }
if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
+ }
break;
case OP_DIGIT:
for (i = 1; i <= min; i++)
+ {
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
+ }
if ((md->ctypes[*eptr++] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
+ }
break;
case OP_NOT_WHITESPACE:
for (i = 1; i <= min; i++)
+ {
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
+ }
if ((md->ctypes[*eptr++] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
+ }
break;
case OP_WHITESPACE:
for (i = 1; i <= min; i++)
+ {
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
+ }
if ((md->ctypes[*eptr++] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
+ }
break;
case OP_NOT_WORDCHAR:
for (i = 1; i <= min; i++)
+ {
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
+ }
if ((md->ctypes[*eptr++] & ctype_word) != 0)
RRETURN(MATCH_NOMATCH);
+ }
break;
case OP_WORDCHAR:
for (i = 1; i <= min; i++)
+ {
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
+ }
if ((md->ctypes[*eptr++] & ctype_word) == 0)
RRETURN(MATCH_NOMATCH);
+ }
break;
default:
@@ -3400,7 +3955,12 @@ for (;;)
{
RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM36);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
+ if (fi >= max) RRETURN(MATCH_NOMATCH);
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
+ }
GETCHARINC(c, eptr);
if (prop_fail_result) RRETURN(MATCH_NOMATCH);
}
@@ -3411,7 +3971,12 @@ for (;;)
{
RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM37);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
+ if (fi >= max) RRETURN(MATCH_NOMATCH);
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
+ }
GETCHARINC(c, eptr);
prop_chartype = UCD_CHARTYPE(c);
if ((prop_chartype == ucp_Lu ||
@@ -3426,7 +3991,12 @@ for (;;)
{
RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM38);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
+ if (fi >= max) RRETURN(MATCH_NOMATCH);
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
+ }
GETCHARINC(c, eptr);
prop_category = UCD_CATEGORY(c);
if ((prop_category == prop_value) == prop_fail_result)
@@ -3439,7 +4009,12 @@ for (;;)
{
RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM39);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
+ if (fi >= max) RRETURN(MATCH_NOMATCH);
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
+ }
GETCHARINC(c, eptr);
prop_chartype = UCD_CHARTYPE(c);
if ((prop_chartype == prop_value) == prop_fail_result)
@@ -3452,7 +4027,12 @@ for (;;)
{
RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM40);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
+ if (fi >= max) RRETURN(MATCH_NOMATCH);
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
+ }
GETCHARINC(c, eptr);
prop_script = UCD_SCRIPT(c);
if ((prop_script == prop_value) == prop_fail_result)
@@ -3474,17 +4054,20 @@ for (;;)
{
RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM41);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
+ if (fi >= max) RRETURN(MATCH_NOMATCH);
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
+ }
GETCHARINCTEST(c, eptr);
prop_category = UCD_CATEGORY(c);
if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
while (eptr < md->end_subject)
{
int len = 1;
- if (!utf8) c = *eptr; else
- {
- GETCHARLEN(c, eptr, len);
- }
+ if (!utf8) c = *eptr;
+ else { GETCHARLEN(c, eptr, len); }
prop_category = UCD_CATEGORY(c);
if (prop_category != ucp_M) break;
eptr += len;
@@ -3503,10 +4086,14 @@ for (;;)
{
RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM42);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- if (fi >= max || eptr >= md->end_subject ||
- (ctype == OP_ANY && IS_NEWLINE(eptr)))
+ if (fi >= max) RRETURN(MATCH_NOMATCH);
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
+ }
+ if (ctype == OP_ANY && IS_NEWLINE(eptr))
RRETURN(MATCH_NOMATCH);
-
GETCHARINC(c, eptr);
switch(ctype)
{
@@ -3662,10 +4249,14 @@ for (;;)
{
RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM43);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- if (fi >= max || eptr >= md->end_subject ||
- (ctype == OP_ANY && IS_NEWLINE(eptr)))
+ if (fi >= max) RRETURN(MATCH_NOMATCH);
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
+ }
+ if (ctype == OP_ANY && IS_NEWLINE(eptr))
RRETURN(MATCH_NOMATCH);
-
c = *eptr++;
switch(ctype)
{
@@ -3790,7 +4381,11 @@ for (;;)
for (i = min; i < max; i++)
{
int len = 1;
- if (eptr >= md->end_subject) break;
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ break;
+ }
GETCHARLEN(c, eptr, len);
if (prop_fail_result) break;
eptr+= len;
@@ -3801,7 +4396,11 @@ for (;;)
for (i = min; i < max; i++)
{
int len = 1;
- if (eptr >= md->end_subject) break;
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ break;
+ }
GETCHARLEN(c, eptr, len);
prop_chartype = UCD_CHARTYPE(c);
if ((prop_chartype == ucp_Lu ||
@@ -3816,7 +4415,11 @@ for (;;)
for (i = min; i < max; i++)
{
int len = 1;
- if (eptr >= md->end_subject) break;
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ break;
+ }
GETCHARLEN(c, eptr, len);
prop_category = UCD_CATEGORY(c);
if ((prop_category == prop_value) == prop_fail_result)
@@ -3829,7 +4432,11 @@ for (;;)
for (i = min; i < max; i++)
{
int len = 1;
- if (eptr >= md->end_subject) break;
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ break;
+ }
GETCHARLEN(c, eptr, len);
prop_chartype = UCD_CHARTYPE(c);
if ((prop_chartype == prop_value) == prop_fail_result)
@@ -3842,7 +4449,11 @@ for (;;)
for (i = min; i < max; i++)
{
int len = 1;
- if (eptr >= md->end_subject) break;
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ break;
+ }
GETCHARLEN(c, eptr, len);
prop_script = UCD_SCRIPT(c);
if ((prop_script == prop_value) == prop_fail_result)
@@ -3871,7 +4482,11 @@ for (;;)
{
for (i = min; i < max; i++)
{
- if (eptr >= md->end_subject) break;
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ break;
+ }
GETCHARINCTEST(c, eptr);
prop_category = UCD_CATEGORY(c);
if (prop_category == ucp_M) break;
@@ -3891,6 +4506,7 @@ for (;;)
/* eptr is now past the end of the maximum run */
if (possessive) continue;
+
for(;;)
{
RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM45);
@@ -3926,7 +4542,12 @@ for (;;)
{
for (i = min; i < max; i++)
{
- if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ break;
+ }
+ if (IS_NEWLINE(eptr)) break;
eptr++;
while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
}
@@ -3938,7 +4559,12 @@ for (;;)
{
for (i = min; i < max; i++)
{
- if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ break;
+ }
+ if (IS_NEWLINE(eptr)) break;
eptr++;
while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
}
@@ -3950,7 +4576,11 @@ for (;;)
{
for (i = min; i < max; i++)
{
- if (eptr >= md->end_subject) break;
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ break;
+ }
eptr++;
while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
}
@@ -3963,15 +4593,22 @@ for (;;)
case OP_ANYBYTE:
c = max - min;
if (c > (unsigned int)(md->end_subject - eptr))
- c = md->end_subject - eptr;
- eptr += c;
+ {
+ eptr = md->end_subject;
+ SCHECK_PARTIAL();
+ }
+ else eptr += c;
break;
case OP_ANYNL:
for (i = min; i < max; i++)
{
int len = 1;
- if (eptr >= md->end_subject) break;
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ break;
+ }
GETCHARLEN(c, eptr, len);
if (c == 0x000d)
{
@@ -3996,7 +4633,11 @@ for (;;)
{
BOOL gotspace;
int len = 1;
- if (eptr >= md->end_subject) break;
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ break;
+ }
GETCHARLEN(c, eptr, len);
switch(c)
{
@@ -4034,7 +4675,11 @@ for (;;)
{
BOOL gotspace;
int len = 1;
- if (eptr >= md->end_subject) break;
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ break;
+ }
GETCHARLEN(c, eptr, len);
switch(c)
{
@@ -4058,7 +4703,11 @@ for (;;)
for (i = min; i < max; i++)
{
int len = 1;
- if (eptr >= md->end_subject) break;
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ break;
+ }
GETCHARLEN(c, eptr, len);
if (c < 256 && (md->ctypes[c] & ctype_digit) != 0) break;
eptr+= len;
@@ -4069,7 +4718,11 @@ for (;;)
for (i = min; i < max; i++)
{
int len = 1;
- if (eptr >= md->end_subject) break;
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ break;
+ }
GETCHARLEN(c, eptr, len);
if (c >= 256 ||(md->ctypes[c] & ctype_digit) == 0) break;
eptr+= len;
@@ -4080,7 +4733,11 @@ for (;;)
for (i = min; i < max; i++)
{
int len = 1;
- if (eptr >= md->end_subject) break;
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ break;
+ }
GETCHARLEN(c, eptr, len);
if (c < 256 && (md->ctypes[c] & ctype_space) != 0) break;
eptr+= len;
@@ -4091,7 +4748,11 @@ for (;;)
for (i = min; i < max; i++)
{
int len = 1;
- if (eptr >= md->end_subject) break;
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ break;
+ }
GETCHARLEN(c, eptr, len);
if (c >= 256 ||(md->ctypes[c] & ctype_space) == 0) break;
eptr+= len;
@@ -4102,7 +4763,11 @@ for (;;)
for (i = min; i < max; i++)
{
int len = 1;
- if (eptr >= md->end_subject) break;
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ break;
+ }
GETCHARLEN(c, eptr, len);
if (c < 256 && (md->ctypes[c] & ctype_word) != 0) break;
eptr+= len;
@@ -4113,7 +4778,11 @@ for (;;)
for (i = min; i < max; i++)
{
int len = 1;
- if (eptr >= md->end_subject) break;
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ break;
+ }
GETCHARLEN(c, eptr, len);
if (c >= 256 || (md->ctypes[c] & ctype_word) == 0) break;
eptr+= len;
@@ -4145,7 +4814,12 @@ for (;;)
case OP_ANY:
for (i = min; i < max; i++)
{
- if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ break;
+ }
+ if (IS_NEWLINE(eptr)) break;
eptr++;
}
break;
@@ -4154,14 +4828,21 @@ for (;;)
case OP_ANYBYTE:
c = max - min;
if (c > (unsigned int)(md->end_subject - eptr))
- c = md->end_subject - eptr;
- eptr += c;
+ {
+ eptr = md->end_subject;
+ SCHECK_PARTIAL();
+ }
+ else eptr += c;
break;
case OP_ANYNL:
for (i = min; i < max; i++)
{
- if (eptr >= md->end_subject) break;
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ break;
+ }
c = *eptr;
if (c == 0x000d)
{
@@ -4182,7 +4863,11 @@ for (;;)
case OP_NOT_HSPACE:
for (i = min; i < max; i++)
{
- if (eptr >= md->end_subject) break;
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ break;
+ }
c = *eptr;
if (c == 0x09 || c == 0x20 || c == 0xa0) break;
eptr++;
@@ -4192,7 +4877,11 @@ for (;;)
case OP_HSPACE:
for (i = min; i < max; i++)
{
- if (eptr >= md->end_subject) break;
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ break;
+ }
c = *eptr;
if (c != 0x09 && c != 0x20 && c != 0xa0) break;
eptr++;
@@ -4202,7 +4891,11 @@ for (;;)
case OP_NOT_VSPACE:
for (i = min; i < max; i++)
{
- if (eptr >= md->end_subject) break;
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ break;
+ }
c = *eptr;
if (c == 0x0a || c == 0x0b || c == 0x0c || c == 0x0d || c == 0x85)
break;
@@ -4213,7 +4906,11 @@ for (;;)
case OP_VSPACE:
for (i = min; i < max; i++)
{
- if (eptr >= md->end_subject) break;
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ break;
+ }
c = *eptr;
if (c != 0x0a && c != 0x0b && c != 0x0c && c != 0x0d && c != 0x85)
break;
@@ -4224,8 +4921,12 @@ for (;;)
case OP_NOT_DIGIT:
for (i = min; i < max; i++)
{
- if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) != 0)
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
break;
+ }
+ if ((md->ctypes[*eptr] & ctype_digit) != 0) break;
eptr++;
}
break;
@@ -4233,8 +4934,12 @@ for (;;)
case OP_DIGIT:
for (i = min; i < max; i++)
{
- if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) == 0)
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
break;
+ }
+ if ((md->ctypes[*eptr] & ctype_digit) == 0) break;
eptr++;
}
break;
@@ -4242,8 +4947,12 @@ for (;;)
case OP_NOT_WHITESPACE:
for (i = min; i < max; i++)
{
- if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) != 0)
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
break;
+ }
+ if ((md->ctypes[*eptr] & ctype_space) != 0) break;
eptr++;
}
break;
@@ -4251,8 +4960,12 @@ for (;;)
case OP_WHITESPACE:
for (i = min; i < max; i++)
{
- if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) == 0)
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
break;
+ }
+ if ((md->ctypes[*eptr] & ctype_space) == 0) break;
eptr++;
}
break;
@@ -4260,8 +4973,12 @@ for (;;)
case OP_NOT_WORDCHAR:
for (i = min; i < max; i++)
{
- if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) != 0)
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
break;
+ }
+ if ((md->ctypes[*eptr] & ctype_word) != 0) break;
eptr++;
}
break;
@@ -4269,8 +4986,12 @@ for (;;)
case OP_WORDCHAR:
for (i = min; i < max; i++)
{
- if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) == 0)
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
break;
+ }
+ if ((md->ctypes[*eptr] & ctype_word) == 0) break;
eptr++;
}
break;
@@ -4448,6 +5169,7 @@ const uschar *tables;
const uschar *start_bits = NULL;
USPTR start_match = (USPTR)subject + start_offset;
USPTR end_subject;
+USPTR start_partial = NULL;
USPTR req_byte_ptr = start_match - 1;
pcre_study_data internal_study;
@@ -4464,6 +5186,13 @@ if (re == NULL || subject == NULL ||
(offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
+/* This information is for finding all the numbers associated with a given
+name, for condition testing. */
+
+md->name_table = (uschar *)re + re->name_table_offset;
+md->name_count = re->name_count;
+md->name_entry_size = re->name_entry_size;
+
/* Fish out the optional data from the extra_data structure, first setting
the default values. */
@@ -4531,7 +5260,9 @@ md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
md->notbol = (options & PCRE_NOTBOL) != 0;
md->noteol = (options & PCRE_NOTEOL) != 0;
md->notempty = (options & PCRE_NOTEMPTY) != 0;
-md->partial = (options & PCRE_PARTIAL) != 0;
+md->notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
+md->partial = ((options & PCRE_PARTIAL_HARD) != 0)? 2 :
+ ((options & PCRE_PARTIAL_SOFT) != 0)? 1 : 0;
md->hitend = FALSE;
md->recursive = NULL; /* No recursion at top level */
@@ -4605,8 +5336,9 @@ else
}
}
-/* Partial matching is supported only for a restricted set of regexes at the
-moment. */
+/* Partial matching was originally supported only for a restricted set of
+regexes; from release 8.00 there are no restrictions, but the bits are still
+defined (though never set). So there's no harm in leaving this code. */
if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0)
return PCRE_ERROR_BADPARTIAL;
@@ -4693,7 +5425,7 @@ if (!anchored)
}
else
if (!startline && study != NULL &&
- (study->options & PCRE_STUDY_MAPPED) != 0)
+ (study->flags & PCRE_STUDY_MAPPED) != 0)
start_bits = study->start_bits;
}
@@ -4820,79 +5552,94 @@ for(;;)
end_subject = save_end_subject;
-#ifdef DEBUG /* Sigh. Some compilers never learn. */
- printf(">>>> Match against: ");
- pchars(start_match, end_subject - start_match, TRUE, md);
- printf("\n");
-#endif
-
- /* If req_byte is set, we know that that character must appear in the
- subject for the match to succeed. If the first character is set, req_byte
- must be later in the subject; otherwise the test starts at the match point.
- This optimization can save a huge amount of backtracking in patterns with
- nested unlimited repeats that aren't going to match. Writing separate code
- for cased/caseless versions makes it go faster, as does using an
- autoincrement and backing off on a match.
-
- HOWEVER: when the subject string is very, very long, searching to its end
- can take a long time, and give bad performance on quite ordinary patterns.
- This showed up when somebody was matching something like /^\d+C/ on a
- 32-megabyte string... so we don't do this when the string is sufficiently
- long.
-
- ALSO: this processing is disabled when partial matching is requested, or if
+ /* The following two optimizations are disabled for partial matching or if
disabling is explicitly requested. */
- if ((options & PCRE_NO_START_OPTIMIZE) == 0 &&
- req_byte >= 0 &&
- end_subject - start_match < REQ_BYTE_MAX &&
- !md->partial)
+ if ((options & PCRE_NO_START_OPTIMIZE) == 0 && !md->partial)
{
- register USPTR p = start_match + ((first_byte >= 0)? 1 : 0);
+ /* If the pattern was studied, a minimum subject length may be set. This is
+ a lower bound; no actual string of that length may actually match the
+ pattern. Although the value is, strictly, in characters, we treat it as
+ bytes to avoid spending too much time in this optimization. */
- /* We don't need to repeat the search if we haven't yet reached the
- place we found it at last time. */
+ if (study != NULL && (study->flags & PCRE_STUDY_MINLEN) != 0 &&
+ end_subject - start_match < study->minlength)
+ {
+ rc = MATCH_NOMATCH;
+ break;
+ }
- if (p > req_byte_ptr)
+ /* If req_byte is set, we know that that character must appear in the
+ subject for the match to succeed. If the first character is set, req_byte
+ must be later in the subject; otherwise the test starts at the match point.
+ This optimization can save a huge amount of backtracking in patterns with
+ nested unlimited repeats that aren't going to match. Writing separate code
+ for cased/caseless versions makes it go faster, as does using an
+ autoincrement and backing off on a match.
+
+ HOWEVER: when the subject string is very, very long, searching to its end
+ can take a long time, and give bad performance on quite ordinary patterns.
+ This showed up when somebody was matching something like /^\d+C/ on a
+ 32-megabyte string... so we don't do this when the string is sufficiently
+ long. */
+
+ if (req_byte >= 0 && end_subject - start_match < REQ_BYTE_MAX)
{
- if (req_byte_caseless)
+ register USPTR p = start_match + ((first_byte >= 0)? 1 : 0);
+
+ /* We don't need to repeat the search if we haven't yet reached the
+ place we found it at last time. */
+
+ if (p > req_byte_ptr)
{
- while (p < end_subject)
+ if (req_byte_caseless)
{
- register int pp = *p++;
- if (pp == req_byte || pp == req_byte2) { p--; break; }
+ while (p < end_subject)
+ {
+ register int pp = *p++;
+ if (pp == req_byte || pp == req_byte2) { p--; break; }
+ }
}
- }
- else
- {
- while (p < end_subject)
+ else
{
- if (*p++ == req_byte) { p--; break; }
+ while (p < end_subject)
+ {
+ if (*p++ == req_byte) { p--; break; }
+ }
}
- }
- /* If we can't find the required character, break the matching loop,
- forcing a match failure. */
+ /* If we can't find the required character, break the matching loop,
+ forcing a match failure. */
- if (p >= end_subject)
- {
- rc = MATCH_NOMATCH;
- break;
- }
+ if (p >= end_subject)
+ {
+ rc = MATCH_NOMATCH;
+ break;
+ }
- /* If we have found the required character, save the point where we
- found it, so that we don't search again next time round the loop if
- the start hasn't passed this character yet. */
+ /* If we have found the required character, save the point where we
+ found it, so that we don't search again next time round the loop if
+ the start hasn't passed this character yet. */
- req_byte_ptr = p;
+ req_byte_ptr = p;
+ }
}
}
- /* OK, we can now run the match. */
+#ifdef DEBUG /* Sigh. Some compilers never learn. */
+ printf(">>>> Match against: ");
+ pchars(start_match, end_subject - start_match, TRUE, md);
+ printf("\n");
+#endif
+
+ /* OK, we can now run the match. If "hitend" is set afterwards, remember the
+ first starting point for which a partial match was found. */
md->start_match_ptr = start_match;
+ md->start_used_ptr = start_match;
md->match_call_count = 0;
rc = match(start_match, md->start_code, start_match, 2, md, ims, NULL, 0, 0);
+ if (md->hitend && start_partial == NULL) start_partial = md->start_used_ptr;
switch(rc)
{
@@ -4922,7 +5669,7 @@ for(;;)
rc = MATCH_NOMATCH;
goto ENDLOOP;
- /* Any other return is some kind of error. */
+ /* Any other return is either a match, or some kind of error. */
default:
goto ENDLOOP;
@@ -5028,14 +5775,19 @@ if (using_temporary_offsets)
(pcre_free)(md->offset_vector);
}
-if (rc != MATCH_NOMATCH)
+if (rc != MATCH_NOMATCH && rc != PCRE_ERROR_PARTIAL)
{
DPRINTF((">>>> error: returning %d\n", rc));
return rc;
}
-else if (md->partial && md->hitend)
+else if (start_partial != NULL)
{
DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));
+ if (offsetcount > 1)
+ {
+ offsets[0] = start_partial - (USPTR)subject;
+ offsets[1] = end_subject - (USPTR)subject;
+ }
return PCRE_ERROR_PARTIAL;
}
else