diff options
Diffstat (limited to 'ext/pcre/pcrelib/pcre_exec.c')
-rw-r--r-- | ext/pcre/pcrelib/pcre_exec.c | 84 |
1 files changed, 54 insertions, 30 deletions
diff --git a/ext/pcre/pcrelib/pcre_exec.c b/ext/pcre/pcrelib/pcre_exec.c index ca3079b0a..4fe798e6b 100644 --- a/ext/pcre/pcrelib/pcre_exec.c +++ b/ext/pcre/pcrelib/pcre_exec.c @@ -6,7 +6,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel - Copyright (c) 1997-2009 University of Cambridge + Copyright (c) 1997-2010 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -87,7 +87,7 @@ static const char rep_max[] = { 0, 0, 0, 0, 1, 1 }; -#ifdef DEBUG +#ifdef PCRE_DEBUG /************************************************* * Debugging function to print chars * *************************************************/ @@ -139,7 +139,7 @@ match_ref(int offset, register USPTR eptr, int length, match_data *md, { USPTR p = md->start_subject + md->offset_vector[offset]; -#ifdef DEBUG +#ifdef PCRE_DEBUG if (eptr >= md->end_subject) printf("matching subject <null>"); else @@ -247,16 +247,16 @@ enum { RM1=1, RM2, RM3, RM4, RM5, RM6, RM7, RM8, RM9, RM10, /* These versions of the macros use the stack, as normal. There are debugging versions and production versions. Note that the "rw" argument of RMATCH isn't -actuall used in this definition. */ +actually used in this definition. */ #ifndef NO_RECURSE #define REGISTER register -#ifdef DEBUG +#ifdef PCRE_DEBUG #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \ { \ printf("match() called in line %d\n", __LINE__); \ - rrc = match(ra,rb,mstart,rc,rd,re,rf,rg,rdepth+1); \ + rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rg,rdepth+1); \ printf("to line %d\n", __LINE__); \ } #define RRETURN(ra) \ @@ -266,7 +266,7 @@ actuall used in this definition. */ } #else #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \ - rrc = match(ra,rb,mstart,rc,rd,re,rf,rg,rdepth+1) + rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rg,rdepth+1) #define RRETURN(ra) return ra #endif @@ -286,6 +286,7 @@ argument of match(), which never changes. */ newframe->Xeptr = ra;\ newframe->Xecode = rb;\ newframe->Xmstart = mstart;\ + newframe->Xmarkptr = markptr;\ newframe->Xoffset_top = rc;\ newframe->Xims = re;\ newframe->Xeptrb = rf;\ @@ -323,6 +324,7 @@ typedef struct heapframe { USPTR Xeptr; const uschar *Xecode; USPTR Xmstart; + USPTR Xmarkptr; int Xoffset_top; long int Xims; eptrblock *Xeptrb; @@ -430,6 +432,7 @@ Arguments: ecode pointer to current position in compiled code mstart pointer to the current match start position (can be modified by encountering \K) + markptr pointer to the most recent MARK name, or NULL offset_top current top pointer md pointer to "static" info for the match ims current /i, /m, and /s options @@ -448,9 +451,9 @@ Returns: MATCH_MATCH if matched ) these values are >= 0 */ static int -match(REGISTER USPTR eptr, REGISTER const uschar *ecode, USPTR mstart, - int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb, - int flags, unsigned int rdepth) +match(REGISTER USPTR eptr, REGISTER const uschar *ecode, USPTR mstart, USPTR + markptr, int offset_top, match_data *md, unsigned long int ims, + eptrblock *eptrb, int flags, unsigned int rdepth) { /* These variables do not need to be preserved over recursion in this function, so they can be ordinary variables in all cases. Mark some of them with @@ -478,6 +481,7 @@ frame->Xprevframe = NULL; /* Marks the top level */ frame->Xeptr = eptr; frame->Xecode = ecode; frame->Xmstart = mstart; +frame->Xmarkptr = markptr; frame->Xoffset_top = offset_top; frame->Xims = ims; frame->Xeptrb = eptrb; @@ -493,6 +497,7 @@ HEAP_RECURSE: #define eptr frame->Xeptr #define ecode frame->Xecode #define mstart frame->Xmstart +#define markptr frame->Xmarkptr #define offset_top frame->Xoffset_top #define ims frame->Xims #define eptrb frame->Xeptrb @@ -620,7 +625,7 @@ TAIL_RECURSE: /* OK, now we can get on with the real code of the function. Recursive calls are specified by the macro RMATCH and RRETURN is used to return. When NO_RECURSE is *not* defined, these just turn into a recursive call to match() -and a "return", respectively (possibly with some debugging if DEBUG is +and a "return", respectively (possibly with some debugging if PCRE_DEBUG is defined). However, RMATCH isn't like a function call because it's quite a complicated macro. It has to be used in one particular way. This shouldn't, however, impact performance when true recursion is being used. */ @@ -711,7 +716,7 @@ for (;;) number = GET2(ecode, 1+LINK_SIZE); offset = number << 1; -#ifdef DEBUG +#ifdef PCRE_DEBUG printf("start bracket %d\n", number); printf("subject="); pchars(eptr, 16, TRUE, md); @@ -1037,7 +1042,7 @@ for (;;) number = GET2(ecode, 1); offset = number << 1; -#ifdef DEBUG +#ifdef PCRE_DEBUG printf("end bracket %d at *ACCEPT", number); printf("\n"); #endif @@ -1068,7 +1073,6 @@ for (;;) memmove(md->offset_vector, rec->offset_save, rec->saved_max * sizeof(int)); offset_top = rec->save_offset_top; - mstart = rec->save_start; ims = original_ims; ecode = rec->after_call; break; @@ -1112,7 +1116,11 @@ for (;;) { RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0, RM4); - if (rrc == MATCH_MATCH) break; + if (rrc == MATCH_MATCH) + { + mstart = md->start_match_ptr; /* In case \K reset it */ + break; + } if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc); ecode += GET(ecode, 1); } @@ -1131,7 +1139,9 @@ for (;;) offset_top = md->end_offset_top; continue; - /* Negative assertion: all branches must fail to match */ + /* Negative assertion: all branches must fail to match. Encountering SKIP, + PRUNE, or COMMIT means we must assume failure without checking subsequent + branches. */ case OP_ASSERT_NOT: case OP_ASSERTBACK_NOT: @@ -1140,6 +1150,11 @@ for (;;) RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0, RM5); if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH); + if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT) + { + do ecode += GET(ecode,1); while (*ecode == OP_ALT); + break; + } if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc); ecode += GET(ecode,1); } @@ -1258,9 +1273,7 @@ for (;;) memcpy(new_recursive.offset_save, md->offset_vector, new_recursive.saved_max * sizeof(int)); - new_recursive.save_start = mstart; new_recursive.save_offset_top = offset_top; - mstart = eptr; /* OK, now we can do the recursion. For each top-level alternative we restore the offset and recursion data. */ @@ -1307,7 +1320,8 @@ for (;;) a move back into the brackets. Friedl calls these "atomic" subpatterns. Check the alternative branches in turn - the matching won't pass the KET for this kind of subpattern. If any one branch matches, we carry on as at - the end of a normal bracket, leaving the subject pointer. */ + the end of a normal bracket, leaving the subject pointer, but resetting + the start-of-match value in case it was changed by \K. */ case OP_ONCE: prev = ecode; @@ -1316,7 +1330,11 @@ for (;;) do { RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM7); - if (rrc == MATCH_MATCH) break; + if (rrc == MATCH_MATCH) + { + mstart = md->start_match_ptr; + break; + } if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc); ecode += GET(ecode,1); } @@ -1435,9 +1453,10 @@ for (;;) } else saved_eptr = NULL; - /* If we are at the end of an assertion group, stop matching and return - MATCH_MATCH, but record the current high water mark for use by positive - assertions. Do this also for the "once" (atomic) groups. */ + /* If we are at the end of an assertion group or an atomic group, stop + matching and return MATCH_MATCH, but record the current high water mark for + use by positive assertions. We also need to record the match start in case + it was changed by \K. */ if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT || *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT || @@ -1445,6 +1464,7 @@ for (;;) { md->end_match_ptr = eptr; /* For ONCE */ md->end_offset_top = offset_top; + md->start_match_ptr = mstart; RRETURN(MATCH_MATCH); } @@ -1459,7 +1479,7 @@ for (;;) number = GET2(prev, 1+LINK_SIZE); offset = number << 1; -#ifdef DEBUG +#ifdef PCRE_DEBUG printf("end bracket %d", number); printf("\n"); #endif @@ -1481,7 +1501,6 @@ for (;;) recursion_info *rec = md->recursive; DPRINTF(("Recursion (%d) succeeded - continuing\n", number)); md->recursive = rec->prevrec; - mstart = rec->save_start; memcpy(md->offset_vector, rec->offset_save, rec->saved_max * sizeof(int)); offset_top = rec->save_offset_top; @@ -3686,8 +3705,12 @@ for (;;) case OP_NOT_WORDCHAR: for (i = 1; i <= min; i++) { - if (eptr >= md->end_subject || - (*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0)) + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + if (*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0) RRETURN(MATCH_NOMATCH); while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80); } @@ -5563,7 +5586,7 @@ for(;;) bytes to avoid spending too much time in this optimization. */ if (study != NULL && (study->flags & PCRE_STUDY_MINLEN) != 0 && - end_subject - start_match < study->minlength) + (pcre_uint32)(end_subject - start_match) < study->minlength) { rc = MATCH_NOMATCH; break; @@ -5626,7 +5649,7 @@ for(;;) } } -#ifdef DEBUG /* Sigh. Some compilers never learn. */ +#ifdef PCRE_DEBUG /* Sigh. Some compilers never learn. */ printf(">>>> Match against: "); pchars(start_match, end_subject - start_match, TRUE, md); printf("\n"); @@ -5638,7 +5661,8 @@ for(;;) md->start_match_ptr = start_match; md->start_used_ptr = start_match; md->match_call_count = 0; - rc = match(start_match, md->start_code, start_match, 2, md, ims, NULL, 0, 0); + rc = match(start_match, md->start_code, start_match, NULL, 2, md, ims, NULL, + 0, 0); if (md->hitend && start_partial == NULL) start_partial = md->start_used_ptr; switch(rc) |