diff options
Diffstat (limited to 'ext/pcre/pcrelib/pcre_exec.c')
-rw-r--r-- | ext/pcre/pcrelib/pcre_exec.c | 1292 |
1 files changed, 1022 insertions, 270 deletions
diff --git a/ext/pcre/pcrelib/pcre_exec.c b/ext/pcre/pcrelib/pcre_exec.c index 073cf2410..ca3079b0a 100644 --- a/ext/pcre/pcrelib/pcre_exec.c +++ b/ext/pcre/pcrelib/pcre_exec.c @@ -396,10 +396,32 @@ typedef struct heapframe { /* This function is called recursively in many circumstances. Whenever it returns a negative (error) response, the outer incarnation must also return the -same response. +same response. */ -Performance note: It might be tempting to extract commonly used fields from the -md structure (e.g. utf8, end_subject) into individual variables to improve +/* These macros pack up tests that are used for partial matching, and which +appears several times in the code. We set the "hit end" flag if the pointer is +at the end of the subject and also past the start of the subject (i.e. +something has been matched). For hard partial matching, we then return +immediately. The second one is used when we already know we are past the end of +the subject. */ + +#define CHECK_PARTIAL()\ + if (md->partial != 0 && eptr >= md->end_subject && eptr > mstart)\ + {\ + md->hitend = TRUE;\ + if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);\ + } + +#define SCHECK_PARTIAL()\ + if (md->partial != 0 && eptr > mstart)\ + {\ + md->hitend = TRUE;\ + if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);\ + } + + +/* Performance note: It might be tempting to extract commonly used fields from +the md structure (e.g. utf8, end_subject) into individual variables to improve performance. Tests using gcc on a SPARC disproved this; in the first case, it made performance worse. @@ -640,14 +662,6 @@ for (;;) minimize = possessive = FALSE; op = *ecode; - /* For partial matching, remember if we ever hit the end of the subject after - matching at least one subject character. */ - - if (md->partial && - eptr >= md->end_subject && - eptr > mstart) - md->hitend = TRUE; - switch(op) { case OP_FAIL: @@ -823,18 +837,139 @@ for (;;) /* Now see what the actual condition is */ - if (condcode == OP_RREF) /* Recursion test */ + if (condcode == OP_RREF || condcode == OP_NRREF) /* Recursion test */ { - offset = GET2(ecode, LINK_SIZE + 2); /* Recursion group number*/ - condition = md->recursive != NULL && - (offset == RREF_ANY || offset == md->recursive->group_num); - ecode += condition? 3 : GET(ecode, 1); + if (md->recursive == NULL) /* Not recursing => FALSE */ + { + condition = FALSE; + ecode += GET(ecode, 1); + } + else + { + int recno = GET2(ecode, LINK_SIZE + 2); /* Recursion group number*/ + condition = (recno == RREF_ANY || recno == md->recursive->group_num); + + /* If the test is for recursion into a specific subpattern, and it is + false, but the test was set up by name, scan the table to see if the + name refers to any other numbers, and test them. The condition is true + if any one is set. */ + + if (!condition && condcode == OP_NRREF && recno != RREF_ANY) + { + uschar *slotA = md->name_table; + for (i = 0; i < md->name_count; i++) + { + if (GET2(slotA, 0) == recno) break; + slotA += md->name_entry_size; + } + + /* Found a name for the number - there can be only one; duplicate + names for different numbers are allowed, but not vice versa. First + scan down for duplicates. */ + + if (i < md->name_count) + { + uschar *slotB = slotA; + while (slotB > md->name_table) + { + slotB -= md->name_entry_size; + if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0) + { + condition = GET2(slotB, 0) == md->recursive->group_num; + if (condition) break; + } + else break; + } + + /* Scan up for duplicates */ + + if (!condition) + { + slotB = slotA; + for (i++; i < md->name_count; i++) + { + slotB += md->name_entry_size; + if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0) + { + condition = GET2(slotB, 0) == md->recursive->group_num; + if (condition) break; + } + else break; + } + } + } + } + + /* Chose branch according to the condition */ + + ecode += condition? 3 : GET(ecode, 1); + } } - else if (condcode == OP_CREF) /* Group used test */ + else if (condcode == OP_CREF || condcode == OP_NCREF) /* Group used test */ { offset = GET2(ecode, LINK_SIZE+2) << 1; /* Doubled ref number */ condition = offset < offset_top && md->offset_vector[offset] >= 0; + + /* If the numbered capture is unset, but the reference was by name, + scan the table to see if the name refers to any other numbers, and test + them. The condition is true if any one is set. This is tediously similar + to the code above, but not close enough to try to amalgamate. */ + + if (!condition && condcode == OP_NCREF) + { + int refno = offset >> 1; + uschar *slotA = md->name_table; + + for (i = 0; i < md->name_count; i++) + { + if (GET2(slotA, 0) == refno) break; + slotA += md->name_entry_size; + } + + /* Found a name for the number - there can be only one; duplicate names + for different numbers are allowed, but not vice versa. First scan down + for duplicates. */ + + if (i < md->name_count) + { + uschar *slotB = slotA; + while (slotB > md->name_table) + { + slotB -= md->name_entry_size; + if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0) + { + offset = GET2(slotB, 0) << 1; + condition = offset < offset_top && + md->offset_vector[offset] >= 0; + if (condition) break; + } + else break; + } + + /* Scan up for duplicates */ + + if (!condition) + { + slotB = slotA; + for (i++; i < md->name_count; i++) + { + slotB += md->name_entry_size; + if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0) + { + offset = GET2(slotB, 0) << 1; + condition = offset < offset_top && + md->offset_vector[offset] >= 0; + if (condition) break; + } + else break; + } + } + } + } + + /* Chose branch according to the condition */ + ecode += condition? 3 : GET(ecode, 1); } @@ -895,6 +1030,30 @@ for (;;) break; + /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes, + to close any currently open capturing brackets. */ + + case OP_CLOSE: + number = GET2(ecode, 1); + offset = number << 1; + +#ifdef DEBUG + printf("end bracket %d at *ACCEPT", number); + printf("\n"); +#endif + + md->capture_last = number; + if (offset >= md->offset_max) md->offset_overflow = TRUE; else + { + md->offset_vector[offset] = + md->offset_vector[md->offset_end - number]; + md->offset_vector[offset+1] = eptr - md->start_subject; + if (offset_top <= offset) offset_top = offset + 2; + } + ecode += 3; + break; + + /* End of the pattern, either real or forced. If we are in a top-level recursion, we should restore the offsets appropriately and continue from after the call. */ @@ -908,16 +1067,26 @@ for (;;) md->recursive = rec->prevrec; memmove(md->offset_vector, rec->offset_save, rec->saved_max * sizeof(int)); + offset_top = rec->save_offset_top; mstart = rec->save_start; ims = original_ims; ecode = rec->after_call; break; } - /* Otherwise, if PCRE_NOTEMPTY is set, fail if we have matched an empty - string - backtracking will then try other alternatives, if any. */ + /* Otherwise, if we have matched an empty string, fail if PCRE_NOTEMPTY is + set, or if PCRE_NOTEMPTY_ATSTART is set and we have matched at the start of + the subject. In both cases, backtracking will then try other alternatives, + if any. */ + + if (eptr == mstart && + (md->notempty || + (md->notempty_atstart && + mstart == md->start_subject + md->start_offset))) + RRETURN(MATCH_NOMATCH); + + /* Otherwise, we have a match. */ - if (md->notempty && eptr == mstart) RRETURN(MATCH_NOMATCH); md->end_match_ptr = eptr; /* Record where we ended */ md->end_offset_top = offset_top; /* and how many extracts were taken */ md->start_match_ptr = mstart; /* and the start (\K can modify) */ @@ -1008,8 +1177,9 @@ for (;;) if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH); } - /* Skip to next op code */ + /* Save the earliest consulted character, then skip to next op code */ + if (eptr < md->start_used_ptr) md->start_used_ptr = eptr; ecode += 1 + LINK_SIZE; break; @@ -1089,6 +1259,7 @@ for (;;) memcpy(new_recursive.offset_save, md->offset_vector, new_recursive.saved_max * sizeof(int)); new_recursive.save_start = mstart; + new_recursive.save_offset_top = offset_top; mstart = eptr; /* OK, now we can do the recursion. For each top-level alternative we @@ -1313,6 +1484,7 @@ for (;;) mstart = rec->save_start; memcpy(md->offset_vector, rec->offset_save, rec->saved_max * sizeof(int)); + offset_top = rec->save_offset_top; ecode = rec->after_call; ims = original_ims; break; @@ -1452,7 +1624,8 @@ for (;;) /* Find out if the previous and current characters are "word" characters. It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to - be "non-word" characters. */ + be "non-word" characters. Remember the earliest consulted character for + partial matching. */ #ifdef SUPPORT_UTF8 if (utf8) @@ -1461,10 +1634,16 @@ for (;;) { USPTR lastptr = eptr - 1; while((*lastptr & 0xc0) == 0x80) lastptr--; + if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr; GETCHAR(c, lastptr); prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0; } - if (eptr >= md->end_subject) cur_is_word = FALSE; else + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + cur_is_word = FALSE; + } + else { GETCHAR(c, eptr); cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0; @@ -1473,13 +1652,20 @@ for (;;) else #endif - /* More streamlined when not in UTF-8 mode */ + /* Not in UTF-8 mode */ { - prev_is_word = (eptr != md->start_subject) && - ((md->ctypes[eptr[-1]] & ctype_word) != 0); - cur_is_word = (eptr < md->end_subject) && - ((md->ctypes[*eptr] & ctype_word) != 0); + if (eptr == md->start_subject) prev_is_word = FALSE; else + { + if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1; + prev_is_word = ((md->ctypes[eptr[-1]] & ctype_word) != 0); + } + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + cur_is_word = FALSE; + } + else cur_is_word = ((md->ctypes[*eptr] & ctype_word) != 0); } /* Now see if the situation is what we want */ @@ -1497,7 +1683,11 @@ for (;;) /* Fall through */ case OP_ALLANY: - if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH); + if (eptr++ >= md->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++; ecode++; break; @@ -1506,12 +1696,20 @@ for (;;) any byte, even newline, independent of the setting of PCRE_DOTALL. */ case OP_ANYBYTE: - if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH); + if (eptr++ >= md->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } ecode++; break; case OP_NOT_DIGIT: - if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } GETCHARINCTEST(c, eptr); if ( #ifdef SUPPORT_UTF8 @@ -1524,7 +1722,11 @@ for (;;) break; case OP_DIGIT: - if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } GETCHARINCTEST(c, eptr); if ( #ifdef SUPPORT_UTF8 @@ -1537,7 +1739,11 @@ for (;;) break; case OP_NOT_WHITESPACE: - if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } GETCHARINCTEST(c, eptr); if ( #ifdef SUPPORT_UTF8 @@ -1550,7 +1756,11 @@ for (;;) break; case OP_WHITESPACE: - if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } GETCHARINCTEST(c, eptr); if ( #ifdef SUPPORT_UTF8 @@ -1563,7 +1773,11 @@ for (;;) break; case OP_NOT_WORDCHAR: - if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } GETCHARINCTEST(c, eptr); if ( #ifdef SUPPORT_UTF8 @@ -1576,7 +1790,11 @@ for (;;) break; case OP_WORDCHAR: - if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } GETCHARINCTEST(c, eptr); if ( #ifdef SUPPORT_UTF8 @@ -1589,7 +1807,11 @@ for (;;) break; case OP_ANYNL: - if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } GETCHARINCTEST(c, eptr); switch(c) { @@ -1613,7 +1835,11 @@ for (;;) break; case OP_NOT_HSPACE: - if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } GETCHARINCTEST(c, eptr); switch(c) { @@ -1643,7 +1869,11 @@ for (;;) break; case OP_HSPACE: - if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } GETCHARINCTEST(c, eptr); switch(c) { @@ -1673,7 +1903,11 @@ for (;;) break; case OP_NOT_VSPACE: - if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } GETCHARINCTEST(c, eptr); switch(c) { @@ -1691,7 +1925,11 @@ for (;;) break; case OP_VSPACE: - if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } GETCHARINCTEST(c, eptr); switch(c) { @@ -1714,7 +1952,11 @@ for (;;) case OP_PROP: case OP_NOTPROP: - if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } GETCHARINCTEST(c, eptr); { const ucd_record *prop = GET_UCD(c); @@ -1759,7 +2001,11 @@ for (;;) is in the binary; otherwise a compile-time error occurs. */ case OP_EXTUNI: - if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } GETCHARINCTEST(c, eptr); { int category = UCD_CATEGORY(c); @@ -1839,7 +2085,11 @@ for (;;) break; default: /* No repeat follows */ - if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH); + if (!match_ref(offset, eptr, length, md, ims)) + { + CHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } eptr += length; continue; /* With the main loop */ } @@ -1855,7 +2105,11 @@ for (;;) for (i = 1; i <= min; i++) { - if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH); + if (!match_ref(offset, eptr, length, md, ims)) + { + CHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } eptr += length; } @@ -1872,8 +2126,12 @@ for (;;) { RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14); if (rrc != MATCH_NOMATCH) RRETURN(rrc); - if (fi >= max || !match_ref(offset, eptr, length, md, ims)) + if (fi >= max) RRETURN(MATCH_NOMATCH); + if (!match_ref(offset, eptr, length, md, ims)) + { + CHECK_PARTIAL(); RRETURN(MATCH_NOMATCH); + } eptr += length; } /* Control never gets here */ @@ -1886,7 +2144,11 @@ for (;;) pp = eptr; for (i = min; i < max; i++) { - if (!match_ref(offset, eptr, length, md, ims)) break; + if (!match_ref(offset, eptr, length, md, ims)) + { + CHECK_PARTIAL(); + break; + } eptr += length; } while (eptr >= pp) @@ -1900,8 +2162,6 @@ for (;;) } /* Control never gets here */ - - /* Match a bit-mapped character class, possibly repeatedly. This op code is used when all the characters in the class have values in the range 0-255, and either the matching is caseful, or the characters are in the range @@ -1956,7 +2216,11 @@ for (;;) { for (i = 1; i <= min; i++) { - if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } GETCHARINC(c, eptr); if (c > 255) { @@ -1974,7 +2238,11 @@ for (;;) { for (i = 1; i <= min; i++) { - if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } c = *eptr++; if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH); } @@ -1998,7 +2266,12 @@ for (;;) { RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16); if (rrc != MATCH_NOMATCH) RRETURN(rrc); - if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); + if (fi >= max) RRETURN(MATCH_NOMATCH); + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } GETCHARINC(c, eptr); if (c > 255) { @@ -2018,7 +2291,12 @@ for (;;) { RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17); if (rrc != MATCH_NOMATCH) RRETURN(rrc); - if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); + if (fi >= max) RRETURN(MATCH_NOMATCH); + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } c = *eptr++; if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH); } @@ -2039,7 +2317,11 @@ for (;;) for (i = min; i < max; i++) { int len = 1; - if (eptr >= md->end_subject) break; + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + break; + } GETCHARLEN(c, eptr, len); if (c > 255) { @@ -2065,7 +2347,11 @@ for (;;) { for (i = min; i < max; i++) { - if (eptr >= md->end_subject) break; + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + break; + } c = *eptr; if ((data[c/8] & (1 << (c&7))) == 0) break; eptr++; @@ -2127,7 +2413,11 @@ for (;;) for (i = 1; i <= min; i++) { - if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } GETCHARINCTEST(c, eptr); if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH); } @@ -2146,7 +2436,12 @@ for (;;) { RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20); if (rrc != MATCH_NOMATCH) RRETURN(rrc); - if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); + if (fi >= max) RRETURN(MATCH_NOMATCH); + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } GETCHARINCTEST(c, eptr); if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH); } @@ -2161,7 +2456,11 @@ for (;;) for (i = min; i < max; i++) { int len = 1; - if (eptr >= md->end_subject) break; + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + break; + } GETCHARLENTEST(c, eptr, len); if (!_pcre_xclass(c, data)) break; eptr += len; @@ -2189,7 +2488,11 @@ for (;;) length = 1; ecode++; GETCHARLEN(fc, ecode, length); - if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH); + if (length > md->end_subject - eptr) + { + CHECK_PARTIAL(); /* Not SCHECK_PARTIAL() */ + RRETURN(MATCH_NOMATCH); + } while (length-- > 0) if (*ecode++ != *eptr++) RRETURN(MATCH_NOMATCH); } else @@ -2197,7 +2500,11 @@ for (;;) /* Non-UTF-8 mode */ { - if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH); + if (md->end_subject - eptr < 1) + { + SCHECK_PARTIAL(); /* This one can use SCHECK_PARTIAL() */ + RRETURN(MATCH_NOMATCH); + } if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH); ecode += 2; } @@ -2213,7 +2520,11 @@ for (;;) ecode++; GETCHARLEN(fc, ecode, length); - if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH); + if (length > md->end_subject - eptr) + { + CHECK_PARTIAL(); /* Not SCHECK_PARTIAL() */ + RRETURN(MATCH_NOMATCH); + } /* If the pattern character's value is < 128, we have only one byte, and can use the fast lookup table. */ @@ -2248,7 +2559,11 @@ for (;;) /* Non-UTF-8 mode */ { - if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH); + if (md->end_subject - eptr < 1) + { + SCHECK_PARTIAL(); /* This one can use SCHECK_PARTIAL() */ + RRETURN(MATCH_NOMATCH); + } if (md->lcc[ecode[1]] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH); ecode += 2; } @@ -2302,13 +2617,12 @@ for (;;) case OP_MINQUERY: c = *ecode++ - OP_STAR; minimize = (c & 1) != 0; + min = rep_min[c]; /* Pick up values from tables; */ max = rep_max[c]; /* zero for max => infinity */ if (max == 0) max = INT_MAX; - /* Common code for all repeated single-character matches. We can give - up quickly if there are fewer than the minimum number of characters left in - the subject. */ + /* Common code for all repeated single-character matches. */ REPEATCHAR: #ifdef SUPPORT_UTF8 @@ -2317,7 +2631,6 @@ for (;;) length = 1; charptr = ecode; GETCHARLEN(fc, ecode, length); - if (min * length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH); ecode += length; /* Handle multibyte character matching specially here. There is @@ -2335,18 +2648,18 @@ for (;;) for (i = 1; i <= min; i++) { - if (memcmp(eptr, charptr, length) == 0) eptr += length; + if (eptr <= md->end_subject - length && + memcmp(eptr, charptr, length) == 0) eptr += length; #ifdef SUPPORT_UCP - /* Need braces because of following else */ - else if (oclength == 0) { RRETURN(MATCH_NOMATCH); } + else if (oclength > 0 && + eptr <= md->end_subject - oclength && + memcmp(eptr, occhars, oclength) == 0) eptr += oclength; +#endif /* SUPPORT_UCP */ else { - if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH); - eptr += oclength; + CHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); } -#else /* without SUPPORT_UCP */ - else { RRETURN(MATCH_NOMATCH); } -#endif /* SUPPORT_UCP */ } if (min == max) continue; @@ -2357,19 +2670,19 @@ for (;;) { RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22); if (rrc != MATCH_NOMATCH) RRETURN(rrc); - if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); - if (memcmp(eptr, charptr, length) == 0) eptr += length; + if (fi >= max) RRETURN(MATCH_NOMATCH); + if (eptr <= md->end_subject - length && + memcmp(eptr, charptr, length) == 0) eptr += length; #ifdef SUPPORT_UCP - /* Need braces because of following else */ - else if (oclength == 0) { RRETURN(MATCH_NOMATCH); } + else if (oclength > 0 && + eptr <= md->end_subject - oclength && + memcmp(eptr, occhars, oclength) == 0) eptr += oclength; +#endif /* SUPPORT_UCP */ else { - if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH); - eptr += oclength; + CHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); } -#else /* without SUPPORT_UCP */ - else { RRETURN (MATCH_NOMATCH); } -#endif /* SUPPORT_UCP */ } /* Control never gets here */ } @@ -2379,33 +2692,34 @@ for (;;) pp = eptr; for (i = min; i < max; i++) { - if (eptr > md->end_subject - length) break; - if (memcmp(eptr, charptr, length) == 0) eptr += length; + if (eptr <= md->end_subject - length && + memcmp(eptr, charptr, length) == 0) eptr += length; #ifdef SUPPORT_UCP - else if (oclength == 0) break; + else if (oclength > 0 && + eptr <= md->end_subject - oclength && + memcmp(eptr, occhars, oclength) == 0) eptr += oclength; +#endif /* SUPPORT_UCP */ else { - if (memcmp(eptr, occhars, oclength) != 0) break; - eptr += oclength; + CHECK_PARTIAL(); + break; } -#else /* without SUPPORT_UCP */ - else break; -#endif /* SUPPORT_UCP */ } if (possessive) continue; + for(;;) - { - RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23); - if (rrc != MATCH_NOMATCH) RRETURN(rrc); - if (eptr == pp) RRETURN(MATCH_NOMATCH); + { + RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (eptr == pp) { RRETURN(MATCH_NOMATCH); } #ifdef SUPPORT_UCP - eptr--; - BACKCHAR(eptr); + eptr--; + BACKCHAR(eptr); #else /* without SUPPORT_UCP */ - eptr -= length; + eptr -= length; #endif /* SUPPORT_UCP */ - } + } } /* Control never gets here */ } @@ -2418,10 +2732,8 @@ for (;;) #endif /* SUPPORT_UTF8 */ /* When not in UTF-8 mode, load a single-byte character. */ - { - if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH); - fc = *ecode++; - } + + fc = *ecode++; /* The value of fc at this point is always less than 256, though we may or may not be in UTF-8 mode. The code is duplicated for the caseless and @@ -2439,7 +2751,14 @@ for (;;) { fc = md->lcc[fc]; for (i = 1; i <= min; i++) + { + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH); + } if (min == max) continue; if (minimize) { @@ -2447,9 +2766,13 @@ for (;;) { RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24); if (rrc != MATCH_NOMATCH) RRETURN(rrc); - if (fi >= max || eptr >= md->end_subject || - fc != md->lcc[*eptr++]) + if (fi >= max) RRETURN(MATCH_NOMATCH); + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); RRETURN(MATCH_NOMATCH); + } + if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH); } /* Control never gets here */ } @@ -2458,10 +2781,17 @@ for (;;) pp = eptr; for (i = min; i < max; i++) { - if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break; + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + break; + } + if (fc != md->lcc[*eptr]) break; eptr++; } + if (possessive) continue; + while (eptr >= pp) { RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM25); @@ -2477,16 +2807,31 @@ for (;;) else { - for (i = 1; i <= min; i++) if (fc != *eptr++) RRETURN(MATCH_NOMATCH); + for (i = 1; i <= min; i++) + { + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + if (fc != *eptr++) RRETURN(MATCH_NOMATCH); + } + if (min == max) continue; + if (minimize) { for (fi = min;; fi++) { RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26); if (rrc != MATCH_NOMATCH) RRETURN(rrc); - if (fi >= max || eptr >= md->end_subject || fc != *eptr++) + if (fi >= max) RRETURN(MATCH_NOMATCH); + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); RRETURN(MATCH_NOMATCH); + } + if (fc != *eptr++) RRETURN(MATCH_NOMATCH); } /* Control never gets here */ } @@ -2495,10 +2840,16 @@ for (;;) pp = eptr; for (i = min; i < max; i++) { - if (eptr >= md->end_subject || fc != *eptr) break; + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + break; + } + if (fc != *eptr) break; eptr++; } if (possessive) continue; + while (eptr >= pp) { RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27); @@ -2514,7 +2865,11 @@ for (;;) checking can be multibyte. */ case OP_NOT: - if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } ecode++; GETCHARINCTEST(c, eptr); if ((ims & PCRE_CASELESS) != 0) @@ -2591,12 +2946,9 @@ for (;;) max = rep_max[c]; /* zero for max => infinity */ if (max == 0) max = INT_MAX; - /* Common code for all repeated single-byte matches. We can give up quickly - if there are fewer than the minimum number of bytes left in the - subject. */ + /* Common code for all repeated single-byte matches. */ REPEATNOTCHAR: - if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH); fc = *ecode++; /* The code is duplicated for the caseless and caseful cases, for speed, @@ -2621,6 +2973,11 @@ for (;;) register unsigned int d; for (i = 1; i <= min; i++) { + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } GETCHARINC(d, eptr); if (d < 256) d = md->lcc[d]; if (fc == d) RRETURN(MATCH_NOMATCH); @@ -2632,7 +2989,14 @@ for (;;) /* Not UTF-8 mode */ { for (i = 1; i <= min; i++) + { + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH); + } } if (min == max) continue; @@ -2648,11 +3012,15 @@ for (;;) { RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28); if (rrc != MATCH_NOMATCH) RRETURN(rrc); - if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); + if (fi >= max) RRETURN(MATCH_NOMATCH); + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } GETCHARINC(d, eptr); if (d < 256) d = md->lcc[d]; if (fc == d) RRETURN(MATCH_NOMATCH); - } } else @@ -2663,8 +3031,13 @@ for (;;) { RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29); if (rrc != MATCH_NOMATCH) RRETURN(rrc); - if (fi >= max || eptr >= md->end_subject || fc == md->lcc[*eptr++]) + if (fi >= max) RRETURN(MATCH_NOMATCH); + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); RRETURN(MATCH_NOMATCH); + } + if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH); } } /* Control never gets here */ @@ -2684,7 +3057,11 @@ for (;;) for (i = min; i < max; i++) { int len = 1; - if (eptr >= md->end_subject) break; + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + break; + } GETCHARLEN(d, eptr, len); if (d < 256) d = md->lcc[d]; if (fc == d) break; @@ -2705,7 +3082,12 @@ for (;;) { for (i = min; i < max; i++) { - if (eptr >= md->end_subject || fc == md->lcc[*eptr]) break; + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + break; + } + if (fc == md->lcc[*eptr]) break; eptr++; } if (possessive) continue; @@ -2733,6 +3115,11 @@ for (;;) register unsigned int d; for (i = 1; i <= min; i++) { + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } GETCHARINC(d, eptr); if (fc == d) RRETURN(MATCH_NOMATCH); } @@ -2742,7 +3129,14 @@ for (;;) /* Not UTF-8 mode */ { for (i = 1; i <= min; i++) + { + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } if (fc == *eptr++) RRETURN(MATCH_NOMATCH); + } } if (min == max) continue; @@ -2758,7 +3152,12 @@ for (;;) { RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32); if (rrc != MATCH_NOMATCH) RRETURN(rrc); - if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); + if (fi >= max) RRETURN(MATCH_NOMATCH); + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } GETCHARINC(d, eptr); if (fc == d) RRETURN(MATCH_NOMATCH); } @@ -2771,8 +3170,13 @@ for (;;) { RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33); if (rrc != MATCH_NOMATCH) RRETURN(rrc); - if (fi >= max || eptr >= md->end_subject || fc == *eptr++) + if (fi >= max) RRETURN(MATCH_NOMATCH); + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); RRETURN(MATCH_NOMATCH); + } + if (fc == *eptr++) RRETURN(MATCH_NOMATCH); } } /* Control never gets here */ @@ -2792,7 +3196,11 @@ for (;;) for (i = min; i < max; i++) { int len = 1; - if (eptr >= md->end_subject) break; + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + break; + } GETCHARLEN(d, eptr, len); if (fc == d) break; eptr += len; @@ -2812,7 +3220,12 @@ for (;;) { for (i = min; i < max; i++) { - if (eptr >= md->end_subject || fc == *eptr) break; + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + break; + } + if (fc == *eptr) break; eptr++; } if (possessive) continue; @@ -2906,13 +3319,10 @@ for (;;) /* First, ensure the minimum number of matches are present. Use inline code for maximizing the speed, and do the type test once at the start - (i.e. keep it out of the loop). Also we can test that there are at least - the minimum number of bytes before we start. This isn't as effective in - UTF-8 mode, but it does no harm. Separate the UTF-8 code completely as that + (i.e. keep it out of the loop). Separate the UTF-8 code completely as that is tidier. Also separate the UCP code, which can be the same for both UTF-8 and single-bytes. */ - if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH); if (min > 0) { #ifdef SUPPORT_UCP @@ -2924,7 +3334,11 @@ for (;;) if (prop_fail_result) RRETURN(MATCH_NOMATCH); for (i = 1; i <= min; i++) { - if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } GETCHARINCTEST(c, eptr); } break; @@ -2932,7 +3346,11 @@ for (;;) case PT_LAMP: for (i = 1; i <= min; i++) { - if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } GETCHARINCTEST(c, eptr); prop_chartype = UCD_CHARTYPE(c); if ((prop_chartype == ucp_Lu || @@ -2945,7 +3363,11 @@ for (;;) case PT_GC: for (i = 1; i <= min; i++) { - if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } GETCHARINCTEST(c, eptr); prop_category = UCD_CATEGORY(c); if ((prop_category == prop_value) == prop_fail_result) @@ -2956,7 +3378,11 @@ for (;;) case PT_PC: for (i = 1; i <= min; i++) { - if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } GETCHARINCTEST(c, eptr); prop_chartype = UCD_CHARTYPE(c); if ((prop_chartype == prop_value) == prop_fail_result) @@ -2967,7 +3393,11 @@ for (;;) case PT_SC: for (i = 1; i <= min; i++) { - if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } GETCHARINCTEST(c, eptr); prop_script = UCD_SCRIPT(c); if ((prop_script == prop_value) == prop_fail_result) @@ -2987,16 +3417,19 @@ for (;;) { for (i = 1; i <= min; i++) { + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } GETCHARINCTEST(c, eptr); prop_category = UCD_CATEGORY(c); if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH); while (eptr < md->end_subject) { int len = 1; - if (!utf8) c = *eptr; else - { - GETCHARLEN(c, eptr, len); - } + if (!utf8) c = *eptr; + else { GETCHARLEN(c, eptr, len); } prop_category = UCD_CATEGORY(c); if (prop_category != ucp_M) break; eptr += len; @@ -3015,8 +3448,12 @@ for (;;) case OP_ANY: for (i = 1; i <= min; i++) { - if (eptr >= md->end_subject || IS_NEWLINE(eptr)) + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); RRETURN(MATCH_NOMATCH); + } + if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); eptr++; while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++; } @@ -3025,20 +3462,29 @@ for (;;) case OP_ALLANY: for (i = 1; i <= min; i++) { - if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } eptr++; while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++; } break; case OP_ANYBYTE: + if (eptr > md->end_subject - min) RRETURN(MATCH_NOMATCH); eptr += min; break; case OP_ANYNL: for (i = 1; i <= min; i++) { - if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } GETCHARINC(c, eptr); switch(c) { @@ -3064,7 +3510,11 @@ for (;;) case OP_NOT_HSPACE: for (i = 1; i <= min; i++) { - if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } GETCHARINC(c, eptr); switch(c) { @@ -3096,7 +3546,11 @@ for (;;) case OP_HSPACE: for (i = 1; i <= min; i++) { - if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } GETCHARINC(c, eptr); switch(c) { @@ -3128,7 +3582,11 @@ for (;;) case OP_NOT_VSPACE: for (i = 1; i <= min; i++) { - if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } GETCHARINC(c, eptr); switch(c) { @@ -3148,7 +3606,11 @@ for (;;) case OP_VSPACE: for (i = 1; i <= min; i++) { - if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } GETCHARINC(c, eptr); switch(c) { @@ -3168,7 +3630,11 @@ for (;;) case OP_NOT_DIGIT: for (i = 1; i <= min; i++) { - if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } GETCHARINC(c, eptr); if (c < 128 && (md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH); @@ -3178,8 +3644,12 @@ for (;;) case OP_DIGIT: for (i = 1; i <= min; i++) { - if (eptr >= md->end_subject || - *eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0) + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH); /* No need to skip more bytes - we know it's a 1-byte character */ } @@ -3188,8 +3658,12 @@ for (;;) case OP_NOT_WHITESPACE: for (i = 1; i <= min; i++) { - if (eptr >= md->end_subject || - (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0)) + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + if (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0) RRETURN(MATCH_NOMATCH); while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80); } @@ -3198,8 +3672,12 @@ for (;;) case OP_WHITESPACE: for (i = 1; i <= min; i++) { - if (eptr >= md->end_subject || - *eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0) + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0) RRETURN(MATCH_NOMATCH); /* No need to skip more bytes - we know it's a 1-byte character */ } @@ -3218,8 +3696,12 @@ for (;;) case OP_WORDCHAR: for (i = 1; i <= min; i++) { - if (eptr >= md->end_subject || - *eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0) + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0) RRETURN(MATCH_NOMATCH); /* No need to skip more bytes - we know it's a 1-byte character */ } @@ -3233,34 +3715,49 @@ for (;;) #endif /* SUPPORT_UTF8 */ /* Code for the non-UTF-8 case for minimum matching of operators other - than OP_PROP and OP_NOTPROP. We can assume that there are the minimum - number of bytes present, as this was tested above. */ + than OP_PROP and OP_NOTPROP. */ switch(ctype) { case OP_ANY: for (i = 1; i <= min; i++) { + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); eptr++; } break; case OP_ALLANY: + if (eptr > md->end_subject - min) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } eptr += min; break; case OP_ANYBYTE: + if (eptr > md->end_subject - min) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } eptr += min; break; - /* Because of the CRLF case, we can't assume the minimum number of - bytes are present in this case. */ - case OP_ANYNL: for (i = 1; i <= min; i++) { - if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } switch(*eptr++) { default: RRETURN(MATCH_NOMATCH); @@ -3282,7 +3779,11 @@ for (;;) case OP_NOT_HSPACE: for (i = 1; i <= min; i++) { - if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } switch(*eptr++) { default: break; @@ -3297,7 +3798,11 @@ for (;;) case OP_HSPACE: for (i = 1; i <= min; i++) { - if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } switch(*eptr++) { default: RRETURN(MATCH_NOMATCH); @@ -3312,7 +3817,11 @@ for (;;) case OP_NOT_VSPACE: for (i = 1; i <= min; i++) { - if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } switch(*eptr++) { default: break; @@ -3329,7 +3838,11 @@ for (;;) case OP_VSPACE: for (i = 1; i <= min; i++) { - if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } switch(*eptr++) { default: RRETURN(MATCH_NOMATCH); @@ -3345,34 +3858,76 @@ for (;;) case OP_NOT_DIGIT: for (i = 1; i <= min; i++) + { + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH); + } break; case OP_DIGIT: for (i = 1; i <= min; i++) + { + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } if ((md->ctypes[*eptr++] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH); + } break; case OP_NOT_WHITESPACE: for (i = 1; i <= min; i++) + { + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } if ((md->ctypes[*eptr++] & ctype_space) != 0) RRETURN(MATCH_NOMATCH); + } break; case OP_WHITESPACE: for (i = 1; i <= min; i++) + { + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } if ((md->ctypes[*eptr++] & ctype_space) == 0) RRETURN(MATCH_NOMATCH); + } break; case OP_NOT_WORDCHAR: for (i = 1; i <= min; i++) + { + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } if ((md->ctypes[*eptr++] & ctype_word) != 0) RRETURN(MATCH_NOMATCH); + } break; case OP_WORDCHAR: for (i = 1; i <= min; i++) + { + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } if ((md->ctypes[*eptr++] & ctype_word) == 0) RRETURN(MATCH_NOMATCH); + } break; default: @@ -3400,7 +3955,12 @@ for (;;) { RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM36); if (rrc != MATCH_NOMATCH) RRETURN(rrc); - if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); + if (fi >= max) RRETURN(MATCH_NOMATCH); + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } GETCHARINC(c, eptr); if (prop_fail_result) RRETURN(MATCH_NOMATCH); } @@ -3411,7 +3971,12 @@ for (;;) { RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM37); if (rrc != MATCH_NOMATCH) RRETURN(rrc); - if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); + if (fi >= max) RRETURN(MATCH_NOMATCH); + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } GETCHARINC(c, eptr); prop_chartype = UCD_CHARTYPE(c); if ((prop_chartype == ucp_Lu || @@ -3426,7 +3991,12 @@ for (;;) { RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM38); if (rrc != MATCH_NOMATCH) RRETURN(rrc); - if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); + if (fi >= max) RRETURN(MATCH_NOMATCH); + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } GETCHARINC(c, eptr); prop_category = UCD_CATEGORY(c); if ((prop_category == prop_value) == prop_fail_result) @@ -3439,7 +4009,12 @@ for (;;) { RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM39); if (rrc != MATCH_NOMATCH) RRETURN(rrc); - if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); + if (fi >= max) RRETURN(MATCH_NOMATCH); + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } GETCHARINC(c, eptr); prop_chartype = UCD_CHARTYPE(c); if ((prop_chartype == prop_value) == prop_fail_result) @@ -3452,7 +4027,12 @@ for (;;) { RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM40); if (rrc != MATCH_NOMATCH) RRETURN(rrc); - if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); + if (fi >= max) RRETURN(MATCH_NOMATCH); + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } GETCHARINC(c, eptr); prop_script = UCD_SCRIPT(c); if ((prop_script == prop_value) == prop_fail_result) @@ -3474,17 +4054,20 @@ for (;;) { RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM41); if (rrc != MATCH_NOMATCH) RRETURN(rrc); - if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); + if (fi >= max) RRETURN(MATCH_NOMATCH); + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } GETCHARINCTEST(c, eptr); prop_category = UCD_CATEGORY(c); if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH); while (eptr < md->end_subject) { int len = 1; - if (!utf8) c = *eptr; else - { - GETCHARLEN(c, eptr, len); - } + if (!utf8) c = *eptr; + else { GETCHARLEN(c, eptr, len); } prop_category = UCD_CATEGORY(c); if (prop_category != ucp_M) break; eptr += len; @@ -3503,10 +4086,14 @@ for (;;) { RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM42); if (rrc != MATCH_NOMATCH) RRETURN(rrc); - if (fi >= max || eptr >= md->end_subject || - (ctype == OP_ANY && IS_NEWLINE(eptr))) + if (fi >= max) RRETURN(MATCH_NOMATCH); + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + if (ctype == OP_ANY && IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); - GETCHARINC(c, eptr); switch(ctype) { @@ -3662,10 +4249,14 @@ for (;;) { RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM43); if (rrc != MATCH_NOMATCH) RRETURN(rrc); - if (fi >= max || eptr >= md->end_subject || - (ctype == OP_ANY && IS_NEWLINE(eptr))) + if (fi >= max) RRETURN(MATCH_NOMATCH); + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + if (ctype == OP_ANY && IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); - c = *eptr++; switch(ctype) { @@ -3790,7 +4381,11 @@ for (;;) for (i = min; i < max; i++) { int len = 1; - if (eptr >= md->end_subject) break; + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + break; + } GETCHARLEN(c, eptr, len); if (prop_fail_result) break; eptr+= len; @@ -3801,7 +4396,11 @@ for (;;) for (i = min; i < max; i++) { int len = 1; - if (eptr >= md->end_subject) break; + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + break; + } GETCHARLEN(c, eptr, len); prop_chartype = UCD_CHARTYPE(c); if ((prop_chartype == ucp_Lu || @@ -3816,7 +4415,11 @@ for (;;) for (i = min; i < max; i++) { int len = 1; - if (eptr >= md->end_subject) break; + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + break; + } GETCHARLEN(c, eptr, len); prop_category = UCD_CATEGORY(c); if ((prop_category == prop_value) == prop_fail_result) @@ -3829,7 +4432,11 @@ for (;;) for (i = min; i < max; i++) { int len = 1; - if (eptr >= md->end_subject) break; + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + break; + } GETCHARLEN(c, eptr, len); prop_chartype = UCD_CHARTYPE(c); if ((prop_chartype == prop_value) == prop_fail_result) @@ -3842,7 +4449,11 @@ for (;;) for (i = min; i < max; i++) { int len = 1; - if (eptr >= md->end_subject) break; + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + break; + } GETCHARLEN(c, eptr, len); prop_script = UCD_SCRIPT(c); if ((prop_script == prop_value) == prop_fail_result) @@ -3871,7 +4482,11 @@ for (;;) { for (i = min; i < max; i++) { - if (eptr >= md->end_subject) break; + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + break; + } GETCHARINCTEST(c, eptr); prop_category = UCD_CATEGORY(c); if (prop_category == ucp_M) break; @@ -3891,6 +4506,7 @@ for (;;) /* eptr is now past the end of the maximum run */ if (possessive) continue; + for(;;) { RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM45); @@ -3926,7 +4542,12 @@ for (;;) { for (i = min; i < max; i++) { - if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break; + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + break; + } + if (IS_NEWLINE(eptr)) break; eptr++; while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++; } @@ -3938,7 +4559,12 @@ for (;;) { for (i = min; i < max; i++) { - if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break; + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + break; + } + if (IS_NEWLINE(eptr)) break; eptr++; while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++; } @@ -3950,7 +4576,11 @@ for (;;) { for (i = min; i < max; i++) { - if (eptr >= md->end_subject) break; + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + break; + } eptr++; while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++; } @@ -3963,15 +4593,22 @@ for (;;) case OP_ANYBYTE: c = max - min; if (c > (unsigned int)(md->end_subject - eptr)) - c = md->end_subject - eptr; - eptr += c; + { + eptr = md->end_subject; + SCHECK_PARTIAL(); + } + else eptr += c; break; case OP_ANYNL: for (i = min; i < max; i++) { int len = 1; - if (eptr >= md->end_subject) break; + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + break; + } GETCHARLEN(c, eptr, len); if (c == 0x000d) { @@ -3996,7 +4633,11 @@ for (;;) { BOOL gotspace; int len = 1; - if (eptr >= md->end_subject) break; + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + break; + } GETCHARLEN(c, eptr, len); switch(c) { @@ -4034,7 +4675,11 @@ for (;;) { BOOL gotspace; int len = 1; - if (eptr >= md->end_subject) break; + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + break; + } GETCHARLEN(c, eptr, len); switch(c) { @@ -4058,7 +4703,11 @@ for (;;) for (i = min; i < max; i++) { int len = 1; - if (eptr >= md->end_subject) break; + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + break; + } GETCHARLEN(c, eptr, len); if (c < 256 && (md->ctypes[c] & ctype_digit) != 0) break; eptr+= len; @@ -4069,7 +4718,11 @@ for (;;) for (i = min; i < max; i++) { int len = 1; - if (eptr >= md->end_subject) break; + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + break; + } GETCHARLEN(c, eptr, len); if (c >= 256 ||(md->ctypes[c] & ctype_digit) == 0) break; eptr+= len; @@ -4080,7 +4733,11 @@ for (;;) for (i = min; i < max; i++) { int len = 1; - if (eptr >= md->end_subject) break; + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + break; + } GETCHARLEN(c, eptr, len); if (c < 256 && (md->ctypes[c] & ctype_space) != 0) break; eptr+= len; @@ -4091,7 +4748,11 @@ for (;;) for (i = min; i < max; i++) { int len = 1; - if (eptr >= md->end_subject) break; + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + break; + } GETCHARLEN(c, eptr, len); if (c >= 256 ||(md->ctypes[c] & ctype_space) == 0) break; eptr+= len; @@ -4102,7 +4763,11 @@ for (;;) for (i = min; i < max; i++) { int len = 1; - if (eptr >= md->end_subject) break; + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + break; + } GETCHARLEN(c, eptr, len); if (c < 256 && (md->ctypes[c] & ctype_word) != 0) break; eptr+= len; @@ -4113,7 +4778,11 @@ for (;;) for (i = min; i < max; i++) { int len = 1; - if (eptr >= md->end_subject) break; + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + break; + } GETCHARLEN(c, eptr, len); if (c >= 256 || (md->ctypes[c] & ctype_word) == 0) break; eptr+= len; @@ -4145,7 +4814,12 @@ for (;;) case OP_ANY: for (i = min; i < max; i++) { - if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break; + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + break; + } + if (IS_NEWLINE(eptr)) break; eptr++; } break; @@ -4154,14 +4828,21 @@ for (;;) case OP_ANYBYTE: c = max - min; if (c > (unsigned int)(md->end_subject - eptr)) - c = md->end_subject - eptr; - eptr += c; + { + eptr = md->end_subject; + SCHECK_PARTIAL(); + } + else eptr += c; break; case OP_ANYNL: for (i = min; i < max; i++) { - if (eptr >= md->end_subject) break; + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + break; + } c = *eptr; if (c == 0x000d) { @@ -4182,7 +4863,11 @@ for (;;) case OP_NOT_HSPACE: for (i = min; i < max; i++) { - if (eptr >= md->end_subject) break; + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + break; + } c = *eptr; if (c == 0x09 || c == 0x20 || c == 0xa0) break; eptr++; @@ -4192,7 +4877,11 @@ for (;;) case OP_HSPACE: for (i = min; i < max; i++) { - if (eptr >= md->end_subject) break; + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + break; + } c = *eptr; if (c != 0x09 && c != 0x20 && c != 0xa0) break; eptr++; @@ -4202,7 +4891,11 @@ for (;;) case OP_NOT_VSPACE: for (i = min; i < max; i++) { - if (eptr >= md->end_subject) break; + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + break; + } c = *eptr; if (c == 0x0a || c == 0x0b || c == 0x0c || c == 0x0d || c == 0x85) break; @@ -4213,7 +4906,11 @@ for (;;) case OP_VSPACE: for (i = min; i < max; i++) { - if (eptr >= md->end_subject) break; + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + break; + } c = *eptr; if (c != 0x0a && c != 0x0b && c != 0x0c && c != 0x0d && c != 0x85) break; @@ -4224,8 +4921,12 @@ for (;;) case OP_NOT_DIGIT: for (i = min; i < max; i++) { - if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) != 0) + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); break; + } + if ((md->ctypes[*eptr] & ctype_digit) != 0) break; eptr++; } break; @@ -4233,8 +4934,12 @@ for (;;) case OP_DIGIT: for (i = min; i < max; i++) { - if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) == 0) + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); break; + } + if ((md->ctypes[*eptr] & ctype_digit) == 0) break; eptr++; } break; @@ -4242,8 +4947,12 @@ for (;;) case OP_NOT_WHITESPACE: for (i = min; i < max; i++) { - if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) != 0) + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); break; + } + if ((md->ctypes[*eptr] & ctype_space) != 0) break; eptr++; } break; @@ -4251,8 +4960,12 @@ for (;;) case OP_WHITESPACE: for (i = min; i < max; i++) { - if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) == 0) + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); break; + } + if ((md->ctypes[*eptr] & ctype_space) == 0) break; eptr++; } break; @@ -4260,8 +4973,12 @@ for (;;) case OP_NOT_WORDCHAR: for (i = min; i < max; i++) { - if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) != 0) + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); break; + } + if ((md->ctypes[*eptr] & ctype_word) != 0) break; eptr++; } break; @@ -4269,8 +4986,12 @@ for (;;) case OP_WORDCHAR: for (i = min; i < max; i++) { - if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) == 0) + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); break; + } + if ((md->ctypes[*eptr] & ctype_word) == 0) break; eptr++; } break; @@ -4448,6 +5169,7 @@ const uschar *tables; const uschar *start_bits = NULL; USPTR start_match = (USPTR)subject + start_offset; USPTR end_subject; +USPTR start_partial = NULL; USPTR req_byte_ptr = start_match - 1; pcre_study_data internal_study; @@ -4464,6 +5186,13 @@ if (re == NULL || subject == NULL || (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL; if (offsetcount < 0) return PCRE_ERROR_BADCOUNT; +/* This information is for finding all the numbers associated with a given +name, for condition testing. */ + +md->name_table = (uschar *)re + re->name_table_offset; +md->name_count = re->name_count; +md->name_entry_size = re->name_entry_size; + /* Fish out the optional data from the extra_data structure, first setting the default values. */ @@ -4531,7 +5260,9 @@ md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0; md->notbol = (options & PCRE_NOTBOL) != 0; md->noteol = (options & PCRE_NOTEOL) != 0; md->notempty = (options & PCRE_NOTEMPTY) != 0; -md->partial = (options & PCRE_PARTIAL) != 0; +md->notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0; +md->partial = ((options & PCRE_PARTIAL_HARD) != 0)? 2 : + ((options & PCRE_PARTIAL_SOFT) != 0)? 1 : 0; md->hitend = FALSE; md->recursive = NULL; /* No recursion at top level */ @@ -4605,8 +5336,9 @@ else } } -/* Partial matching is supported only for a restricted set of regexes at the -moment. */ +/* Partial matching was originally supported only for a restricted set of +regexes; from release 8.00 there are no restrictions, but the bits are still +defined (though never set). So there's no harm in leaving this code. */ if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0) return PCRE_ERROR_BADPARTIAL; @@ -4693,7 +5425,7 @@ if (!anchored) } else if (!startline && study != NULL && - (study->options & PCRE_STUDY_MAPPED) != 0) + (study->flags & PCRE_STUDY_MAPPED) != 0) start_bits = study->start_bits; } @@ -4820,79 +5552,94 @@ for(;;) end_subject = save_end_subject; -#ifdef DEBUG /* Sigh. Some compilers never learn. */ - printf(">>>> Match against: "); - pchars(start_match, end_subject - start_match, TRUE, md); - printf("\n"); -#endif - - /* If req_byte is set, we know that that character must appear in the - subject for the match to succeed. If the first character is set, req_byte - must be later in the subject; otherwise the test starts at the match point. - This optimization can save a huge amount of backtracking in patterns with - nested unlimited repeats that aren't going to match. Writing separate code - for cased/caseless versions makes it go faster, as does using an - autoincrement and backing off on a match. - - HOWEVER: when the subject string is very, very long, searching to its end - can take a long time, and give bad performance on quite ordinary patterns. - This showed up when somebody was matching something like /^\d+C/ on a - 32-megabyte string... so we don't do this when the string is sufficiently - long. - - ALSO: this processing is disabled when partial matching is requested, or if + /* The following two optimizations are disabled for partial matching or if disabling is explicitly requested. */ - if ((options & PCRE_NO_START_OPTIMIZE) == 0 && - req_byte >= 0 && - end_subject - start_match < REQ_BYTE_MAX && - !md->partial) + if ((options & PCRE_NO_START_OPTIMIZE) == 0 && !md->partial) { - register USPTR p = start_match + ((first_byte >= 0)? 1 : 0); + /* If the pattern was studied, a minimum subject length may be set. This is + a lower bound; no actual string of that length may actually match the + pattern. Although the value is, strictly, in characters, we treat it as + bytes to avoid spending too much time in this optimization. */ - /* We don't need to repeat the search if we haven't yet reached the - place we found it at last time. */ + if (study != NULL && (study->flags & PCRE_STUDY_MINLEN) != 0 && + end_subject - start_match < study->minlength) + { + rc = MATCH_NOMATCH; + break; + } - if (p > req_byte_ptr) + /* If req_byte is set, we know that that character must appear in the + subject for the match to succeed. If the first character is set, req_byte + must be later in the subject; otherwise the test starts at the match point. + This optimization can save a huge amount of backtracking in patterns with + nested unlimited repeats that aren't going to match. Writing separate code + for cased/caseless versions makes it go faster, as does using an + autoincrement and backing off on a match. + + HOWEVER: when the subject string is very, very long, searching to its end + can take a long time, and give bad performance on quite ordinary patterns. + This showed up when somebody was matching something like /^\d+C/ on a + 32-megabyte string... so we don't do this when the string is sufficiently + long. */ + + if (req_byte >= 0 && end_subject - start_match < REQ_BYTE_MAX) { - if (req_byte_caseless) + register USPTR p = start_match + ((first_byte >= 0)? 1 : 0); + + /* We don't need to repeat the search if we haven't yet reached the + place we found it at last time. */ + + if (p > req_byte_ptr) { - while (p < end_subject) + if (req_byte_caseless) { - register int pp = *p++; - if (pp == req_byte || pp == req_byte2) { p--; break; } + while (p < end_subject) + { + register int pp = *p++; + if (pp == req_byte || pp == req_byte2) { p--; break; } + } } - } - else - { - while (p < end_subject) + else { - if (*p++ == req_byte) { p--; break; } + while (p < end_subject) + { + if (*p++ == req_byte) { p--; break; } + } } - } - /* If we can't find the required character, break the matching loop, - forcing a match failure. */ + /* If we can't find the required character, break the matching loop, + forcing a match failure. */ - if (p >= end_subject) - { - rc = MATCH_NOMATCH; - break; - } + if (p >= end_subject) + { + rc = MATCH_NOMATCH; + break; + } - /* If we have found the required character, save the point where we - found it, so that we don't search again next time round the loop if - the start hasn't passed this character yet. */ + /* If we have found the required character, save the point where we + found it, so that we don't search again next time round the loop if + the start hasn't passed this character yet. */ - req_byte_ptr = p; + req_byte_ptr = p; + } } } - /* OK, we can now run the match. */ +#ifdef DEBUG /* Sigh. Some compilers never learn. */ + printf(">>>> Match against: "); + pchars(start_match, end_subject - start_match, TRUE, md); + printf("\n"); +#endif + + /* OK, we can now run the match. If "hitend" is set afterwards, remember the + first starting point for which a partial match was found. */ md->start_match_ptr = start_match; + md->start_used_ptr = start_match; md->match_call_count = 0; rc = match(start_match, md->start_code, start_match, 2, md, ims, NULL, 0, 0); + if (md->hitend && start_partial == NULL) start_partial = md->start_used_ptr; switch(rc) { @@ -4922,7 +5669,7 @@ for(;;) rc = MATCH_NOMATCH; goto ENDLOOP; - /* Any other return is some kind of error. */ + /* Any other return is either a match, or some kind of error. */ default: goto ENDLOOP; @@ -5028,14 +5775,19 @@ if (using_temporary_offsets) (pcre_free)(md->offset_vector); } -if (rc != MATCH_NOMATCH) +if (rc != MATCH_NOMATCH && rc != PCRE_ERROR_PARTIAL) { DPRINTF((">>>> error: returning %d\n", rc)); return rc; } -else if (md->partial && md->hitend) +else if (start_partial != NULL) { DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n")); + if (offsetcount > 1) + { + offsets[0] = start_partial - (USPTR)subject; + offsets[1] = end_subject - (USPTR)subject; + } return PCRE_ERROR_PARTIAL; } else |