diff options
author | Mike Hommey <glandium@debian.org> | 2007-04-17 20:40:00 +0200 |
---|---|---|
committer | Mike Hommey <glandium@debian.org> | 2007-04-17 20:40:00 +0200 |
commit | 789259a1b6850d30acffbb62b11456b9ed7a8f59 (patch) | |
tree | 842f2f9042a4264898ec29078aa029640078c393 /xmlregexp.c | |
parent | 968041a8b2ec86c39b5074024ce97d136ecd9a95 (diff) | |
download | libxml2-789259a1b6850d30acffbb62b11456b9ed7a8f59.tar.gz |
Load /tmp/libxml2-2.6.28 intoupstream/2.6.28.dfsg
libxml2/branches/upstream/current.
Diffstat (limited to 'xmlregexp.c')
-rw-r--r-- | xmlregexp.c | 78 |
1 files changed, 52 insertions, 26 deletions
diff --git a/xmlregexp.c b/xmlregexp.c index e7d519e..2a30d66 100644 --- a/xmlregexp.c +++ b/xmlregexp.c @@ -54,6 +54,11 @@ #define CUR_SCHAR(s, l) xmlStringCurrentChar(NULL, s, &l) #define NEXTL(l) ctxt->cur += l; #define XML_REG_STRING_SEPARATOR '|' +/* + * Need PREV to check on a '-' within a Character Group. May only be used + * when it's guaranteed that cur is not at the beginning of ctxt->string! + */ +#define PREV (ctxt->cur[-1]) /** * TODO: @@ -145,7 +150,8 @@ typedef enum { XML_REGEXP_START_STATE = 1, XML_REGEXP_FINAL_STATE, XML_REGEXP_TRANS_STATE, - XML_REGEXP_SINK_STATE + XML_REGEXP_SINK_STATE, + XML_REGEXP_UNREACH_STATE } xmlRegStateType; typedef enum { @@ -1595,6 +1601,11 @@ xmlFAGenerateTransitions(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr from, atom->quant = XML_REGEXP_QUANT_ONCE; xmlRegStateAddTrans(ctxt, to, atom, to, -1, -1); break; + case XML_REGEXP_QUANT_RANGE: + if (atom->min == 0) { + xmlFAGenerateEpsilonTransition(ctxt, from, to); + } + break; default: break; } @@ -1709,6 +1720,8 @@ xmlFAEliminateSimpleEpsilonTransitions(xmlRegParserCtxtPtr ctxt) { continue; if (state->nbTrans != 1) continue; + if (state->type == XML_REGEXP_UNREACH_STATE) + continue; /* is the only transition out a basic transition */ if ((state->trans[0].atom == NULL) && (state->trans[0].to >= 0) && @@ -1731,35 +1744,24 @@ xmlFAEliminateSimpleEpsilonTransitions(xmlRegParserCtxtPtr ctxt) { tmp = ctxt->states[state->transTo[i]]; for (j = 0;j < tmp->nbTrans;j++) { if (tmp->trans[j].to == statenr) { - tmp->trans[j].to = newto; -#ifdef DEBUG_REGEXP_GRAPH - printf("Changed transition %d on %d to go to %d\n", - j, tmp->no, newto); -#endif - xmlRegStateAddTransTo(ctxt, ctxt->states[newto], - tmp->no); - } - } - } -#if 0 - for (i = 0;i < ctxt->nbStates;i++) { - tmp = ctxt->states[i]; - for (j = 0;j < tmp->nbTrans;j++) { - if (tmp->trans[j].to == statenr) { - tmp->trans[j].to = newto; #ifdef DEBUG_REGEXP_GRAPH printf("Changed transition %d on %d to go to %d\n", j, tmp->no, newto); #endif + tmp->trans[j].to = -1; + xmlRegStateAddTrans(ctxt, tmp, tmp->trans[j].atom, + ctxt->states[newto], + tmp->trans[j].counter, + tmp->trans[j].count); } } } -#endif if (state->type == XML_REGEXP_FINAL_STATE) ctxt->states[newto]->type = XML_REGEXP_FINAL_STATE; /* eliminate the transition completely */ state->nbTrans = 0; + state->type = XML_REGEXP_UNREACH_STATE; } @@ -1779,16 +1781,33 @@ xmlFAEliminateEpsilonTransitions(xmlRegParserCtxtPtr ctxt) { if (ctxt->states == NULL) return; + /* + * Eliminate simple epsilon transition and the associated unreachable + * states. + */ xmlFAEliminateSimpleEpsilonTransitions(ctxt); + for (statenr = 0;statenr < ctxt->nbStates;statenr++) { + state = ctxt->states[statenr]; + if ((state != NULL) && (state->type == XML_REGEXP_UNREACH_STATE)) { +#ifdef DEBUG_REGEXP_GRAPH + printf("Removed unreachable state %d\n", statenr); +#endif + xmlRegFreeState(state); + ctxt->states[statenr] = NULL; + } + } has_epsilon = 0; /* - * build the completed transitions bypassing the epsilons + * Build the completed transitions bypassing the epsilons * Use a marking algorithm to avoid loops - * mark sink states too. + * Mark sink states too. + * Process from the latests states backward to the start when + * there is long cascading epsilon chains this minimize the + * recursions and transition compares when adding the new ones */ - for (statenr = 0;statenr < ctxt->nbStates;statenr++) { + for (statenr = ctxt->nbStates - 1;statenr >= 0;statenr--) { state = ctxt->states[statenr]; if (state == NULL) continue; @@ -1812,8 +1831,9 @@ xmlFAEliminateEpsilonTransitions(xmlRegParserCtxtPtr ctxt) { printf("Found epsilon trans %d from %d to %d\n", transnr, statenr, newto); #endif - state->mark = XML_REGEXP_MARK_START; has_epsilon = 1; + state->trans[transnr].to = -2; + state->mark = XML_REGEXP_MARK_START; xmlFAReduceEpsilonTransitions(ctxt, statenr, newto, state->trans[transnr].counter); state->mark = XML_REGEXP_MARK_NORMAL; @@ -2424,7 +2444,7 @@ xmlFARecurseDeterminism(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state, * check transitions conflicting with the one looked at */ if (t1->atom == NULL) { - if (t1->to == -1) + if (t1->to < 0) continue; res = xmlFARecurseDeterminism(ctxt, ctxt->states[t1->to], to, atom); @@ -2875,7 +2895,8 @@ xmlFARegDebugExec(xmlRegExecCtxtPtr exec) { int i; printf(": "); for (i = 0;(i < 3) && (i < exec->inputStackNr);i++) - printf("%s ", exec->inputStack[exec->inputStackNr - (i + 1)]); + printf("%s ", (const char *) + exec->inputStack[exec->inputStackNr - (i + 1)].value); } else { printf(": %s", &(exec->inputString[exec->index])); } @@ -4842,10 +4863,15 @@ xmlFAParseCharRange(xmlRegParserCtxtPtr ctxt) { ERROR("Expecting a char range"); return; } - NEXTL(len); - if (start == '-') { + /* + * Since we are "inside" a range, we can assume ctxt->cur is past + * the start of ctxt->string, and PREV should be safe + */ + if ((start == '-') && (NXT(1) != ']') && (PREV != '[') && (PREV != '^')) { + NEXTL(len); return; } + NEXTL(len); cur = CUR; if ((cur != '-') || (NXT(1) == ']')) { xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg, |