From a7457388701e6ccba9091ba3ec09505dc903b758 Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Sat, 5 Feb 2005 12:06:19 +0000 Subject: Load /tmp/tmp.5kkLmZ/libxml2-2.6.17 into packages/libxml2/branches/upstream/current. --- xmlregexp.c | 308 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 302 insertions(+), 6 deletions(-) (limited to 'xmlregexp.c') diff --git a/xmlregexp.c b/xmlregexp.c index eb67fa4..2b1919e 100644 --- a/xmlregexp.c +++ b/xmlregexp.c @@ -19,6 +19,8 @@ #ifdef LIBXML_REGEXP_ENABLED +#define DEBUG_ERR + #include #include #ifdef HAVE_LIMITS_H @@ -137,7 +139,8 @@ typedef enum { typedef enum { XML_REGEXP_START_STATE = 1, XML_REGEXP_FINAL_STATE, - XML_REGEXP_TRANS_STATE + XML_REGEXP_TRANS_STATE, + XML_REGEXP_SINK_STATE } xmlRegStateType; typedef enum { @@ -205,7 +208,6 @@ struct _xmlAutomataState { xmlRegMarkedType mark; xmlRegMarkedType reached; int no; - int maxTrans; int nbTrans; xmlRegTrans *trans; @@ -312,6 +314,13 @@ struct _xmlRegExecCtxt { const xmlChar *inputString; /* when operating on characters */ xmlRegInputTokenPtr inputStack;/* when operating on strings */ + /* + * error handling + */ + int errStateNo; /* the error state number */ + xmlRegStatePtr errState; /* the error state */ + xmlChar *errString; /* the string raising the error */ + int *errCounts; /* counters at the error state */ }; #define REGEXP_ALL_COUNTER 0x123456 @@ -1587,11 +1596,16 @@ xmlFAEliminateEpsilonTransitions(xmlRegParserCtxtPtr ctxt) { /* * build the completed transitions bypassing the epsilons * Use a marking algorithm to avoid loops + * mark sink states too. */ for (statenr = 0;statenr < ctxt->nbStates;statenr++) { state = ctxt->states[statenr]; if (state == NULL) continue; + if ((state->nbTrans == 0) && + (state->type != XML_REGEXP_FINAL_STATE)) { + state->type = XML_REGEXP_SINK_STATE; + } for (transnr = 0;transnr < state->nbTrans;transnr++) { if ((state->trans[transnr].atom == NULL) && (state->trans[transnr].to >= 0)) { @@ -1668,6 +1682,7 @@ xmlFAEliminateEpsilonTransitions(xmlRegParserCtxtPtr ctxt) { } } } + /* * find the next accessible state not explored */ @@ -2240,7 +2255,7 @@ static int xmlFARegExec(xmlRegexpPtr comp, const xmlChar *content) { xmlRegExecCtxt execval; xmlRegExecCtxtPtr exec = &execval; - int ret, codepoint, len; + int ret, codepoint = 0, len; exec->inputString = content; exec->index = 0; @@ -2452,6 +2467,9 @@ progress: * Progressive interface to the verifier one atom at a time * * * ************************************************************************/ +#ifdef DEBUG_ERR +static void testerr(xmlRegExecCtxtPtr exec); +#endif /** * xmlRegNewExecCtxt: @@ -2493,18 +2511,28 @@ xmlRegNewExecCtxt(xmlRegexpPtr comp, xmlRegExecCallbacks callback, void *data) { exec->callback = callback; exec->data = data; if (comp->nbCounters > 0) { - exec->counts = (int *) xmlMalloc(comp->nbCounters * sizeof(int)); + /* + * For error handling, exec->counts is allocated twice the size + * the second half is used to store the data in case of rollback + */ + exec->counts = (int *) xmlMalloc(comp->nbCounters * sizeof(int) + * 2); if (exec->counts == NULL) { xmlRegexpErrMemory(NULL, "creating execution context"); xmlFree(exec); return(NULL); } - memset(exec->counts, 0, comp->nbCounters * sizeof(int)); - } else + memset(exec->counts, 0, comp->nbCounters * sizeof(int) * 2); + exec->errCounts = &exec->counts[comp->nbCounters]; + } else { exec->counts = NULL; + exec->errCounts = NULL; + } exec->inputStackMax = 0; exec->inputStackNr = 0; exec->inputStack = NULL; + exec->errStateNo = -1; + exec->errString = NULL; return(exec); } @@ -2540,6 +2568,8 @@ xmlRegFreeExecCtxt(xmlRegExecCtxtPtr exec) { } xmlFree(exec->inputStack); } + if (exec->errString != NULL) + xmlFree(exec->errString); xmlFree(exec); } @@ -2673,6 +2703,10 @@ xmlRegCompactPushString(xmlRegExecCtxtPtr exec, #ifdef DEBUG_PUSH printf("entering state %d\n", target); #endif + if (comp->compact[target * (comp->nbstrings + 1)] == + XML_REGEXP_SINK_STATE) + goto error; + if (comp->compact[target * (comp->nbstrings + 1)] == XML_REGEXP_FINAL_STATE) return(1); @@ -2687,7 +2721,15 @@ xmlRegCompactPushString(xmlRegExecCtxtPtr exec, #ifdef DEBUG_PUSH printf("failed to find a transition for %s on state %d\n", value, state); #endif +error: + if (exec->errString != NULL) + xmlFree(exec->errString); + exec->errString = xmlStrdup(value); + exec->errStateNo = state; exec->status = -1; +#ifdef DEBUG_ERR + testerr(exec); +#endif return(-1); } @@ -2709,6 +2751,7 @@ xmlRegExecPushString(xmlRegExecCtxtPtr exec, const xmlChar *value, xmlRegAtomPtr atom; int ret; int final = 0; + int progress = 1; if (exec == NULL) return(-1); @@ -2943,6 +2986,20 @@ xmlRegExecPushString(xmlRegExecCtxtPtr exec, const xmlChar *value, #ifdef DEBUG_PUSH printf("entering state %d\n", trans->to); #endif + if ((exec->comp->states[trans->to] != NULL) && + (exec->comp->states[trans->to]->type == + XML_REGEXP_SINK_STATE)) { + /* + * entering a sink state, save the current state as error + * state. + */ + if (exec->errString != NULL) + xmlFree(exec->errString); + exec->errString = xmlStrdup(value); + exec->errState = exec->state; + memcpy(exec->errCounts, exec->counts, + exec->comp->nbCounters * sizeof(int)); + } exec->state = exec->comp->states[trans->to]; exec->transno = 0; if (trans->atom != NULL) { @@ -2977,6 +3034,21 @@ xmlRegExecPushString(xmlRegExecCtxtPtr exec, const xmlChar *value, } if ((exec->transno != 0) || (exec->state->nbTrans == 0)) { rollback: + /* + * if we didn't yet rollback on the current input + * store the current state as the error state. + */ + if ((progress) && (exec->state != NULL) && + (exec->state->type != XML_REGEXP_SINK_STATE)) { + progress = 0; + if (exec->errString != NULL) + xmlFree(exec->errString); + exec->errString = xmlStrdup(value); + exec->errState = exec->state; + memcpy(exec->errCounts, exec->counts, + exec->comp->nbCounters * sizeof(int)); + } + /* * Failed to find a way out */ @@ -2990,12 +3062,19 @@ rollback: #endif } } + continue; progress: + progress = 1; continue; } if (exec->status == 0) { return(exec->state->type == XML_REGEXP_FINAL_STATE); } +#ifdef DEBUG_ERR + if (exec->status < 0) { + testerr(exec); + } +#endif return(exec->status); } @@ -3055,6 +3134,223 @@ xmlRegExecPushString2(xmlRegExecCtxtPtr exec, const xmlChar *value, return(ret); } +/** + * xmlRegExecGetalues: + * @exec: a regexp execution context + * @err: error extraction or normal one + * @nbval: pointer to the number of accepted values IN/OUT + * @nbneg: return number of negative transitions + * @values: pointer to the array of acceptable values + * @terminal: return value if this was a terminal state + * + * Extract informations from the regexp execution, internal routine to + * implement xmlRegExecNextValues() and xmlRegExecErrInfo() + * + * Returns: 0 in case of success or -1 in case of error. + */ +static int +xmlRegExecGetValues(xmlRegExecCtxtPtr exec, int err, + int *nbval, int *nbneg, + xmlChar **values, int *terminal) { + int maxval; + int nb = 0; + + if ((exec == NULL) || (nbval == NULL) || (nbneg == NULL) || + (values == NULL) || (*nbval <= 0)) + return(-1); + + maxval = *nbval; + *nbval = 0; + *nbneg = 0; + if ((exec->comp != NULL) && (exec->comp->compact != NULL)) { + xmlRegexpPtr comp; + int target, i, state; + + comp = exec->comp; + + if (err) { + if (exec->errStateNo == -1) return(-1); + state = exec->errStateNo; + } else { + state = exec->index; + } + if (terminal != NULL) { + if (comp->compact[state * (comp->nbstrings + 1)] == + XML_REGEXP_FINAL_STATE) + *terminal = 1; + else + *terminal = 0; + } + for (i = 0;(i < comp->nbstrings) && (nb < maxval);i++) { + target = comp->compact[state * (comp->nbstrings + 1) + i + 1]; + if ((target > 0) && (target <= comp->nbstates) && + (comp->compact[(target - 1) * (comp->nbstrings + 1)] != + XML_REGEXP_SINK_STATE)) { + values[nb++] = comp->stringMap[i]; + (*nbval)++; + } + } + for (i = 0;(i < comp->nbstrings) && (nb < maxval);i++) { + target = comp->compact[state * (comp->nbstrings + 1) + i + 1]; + if ((target > 0) && (target <= comp->nbstates) && + (comp->compact[(target - 1) * (comp->nbstrings + 1)] == + XML_REGEXP_SINK_STATE)) { + values[nb++] = comp->stringMap[i]; + (*nbneg)++; + } + } + } else { + int transno; + xmlRegTransPtr trans; + xmlRegAtomPtr atom; + xmlRegStatePtr state; + + if (terminal != NULL) { + if (exec->state->type == XML_REGEXP_FINAL_STATE) + *terminal = 1; + else + *terminal = 0; + } + + if (err) { + if (exec->errState == NULL) return(-1); + state = exec->errState; + } else { + if (exec->state == NULL) return(-1); + state = exec->state; + } + for (transno = 0; + (transno < state->nbTrans) && (nb < maxval); + transno++) { + trans = &state->trans[transno]; + if (trans->to < 0) + continue; + atom = trans->atom; + if ((atom == NULL) || (atom->valuep == NULL)) + continue; + if (trans->count == REGEXP_ALL_LAX_COUNTER) { + /* this should not be reached but ... */ + TODO; + } else if (trans->count == REGEXP_ALL_COUNTER) { + /* this should not be reached but ... */ + TODO; + } else if (trans->counter >= 0) { + xmlRegCounterPtr counter; + int count; + + if (err) + count = exec->errCounts[trans->counter]; + else + count = exec->counts[trans->counter]; + counter = &exec->comp->counters[trans->counter]; + if (count < counter->max) { + values[nb++] = (xmlChar *) atom->valuep; + (*nbval)++; + } + } else { + if ((exec->comp->states[trans->to] != NULL) && + (exec->comp->states[trans->to]->type != + XML_REGEXP_SINK_STATE)) { + values[nb++] = (xmlChar *) atom->valuep; + (*nbval)++; + } + } + } + for (transno = 0; + (transno < state->nbTrans) && (nb < maxval); + transno++) { + trans = &state->trans[transno]; + if (trans->to < 0) + continue; + atom = trans->atom; + if ((atom == NULL) || (atom->valuep == NULL)) + continue; + if (trans->count == REGEXP_ALL_LAX_COUNTER) { + continue; + } else if (trans->count == REGEXP_ALL_COUNTER) { + continue; + } else if (trans->counter >= 0) { + continue; + } else { + if ((exec->comp->states[trans->to] != NULL) && + (exec->comp->states[trans->to]->type == + XML_REGEXP_SINK_STATE)) { + values[nb++] = (xmlChar *) atom->valuep; + (*nbneg)++; + } + } + } + } + return(0); +} + +/** + * xmlRegExecNextValues: + * @exec: a regexp execution context + * @nbval: pointer to the number of accepted values IN/OUT + * @nbneg: return number of negative transitions + * @values: pointer to the array of acceptable values + * @terminal: return value if this was a terminal state + * + * Extract informations from the regexp execution, + * the parameter @values must point to an array of @nbval string pointers + * on return nbval will contain the number of possible strings in that + * state and the @values array will be updated with them. The string values + * returned will be freed with the @exec context and don't need to be + * deallocated. + * + * Returns: 0 in case of success or -1 in case of error. + */ +int +xmlRegExecNextValues(xmlRegExecCtxtPtr exec, int *nbval, int *nbneg, + xmlChar **values, int *terminal) { + return(xmlRegExecGetValues(exec, 0, nbval, nbneg, values, terminal)); +} + +/** + * xmlRegExecErrInfo: + * @exec: a regexp execution context generating an error + * @string: return value for the error string + * @nbval: pointer to the number of accepted values IN/OUT + * @nbneg: return number of negative transitions + * @values: pointer to the array of acceptable values + * @terminal: return value if this was a terminal state + * + * Extract error informations from the regexp execution, the parameter + * @string will be updated with the value pushed and not accepted, + * the parameter @values must point to an array of @nbval string pointers + * on return nbval will contain the number of possible strings in that + * state and the @values array will be updated with them. The string values + * returned will be freed with the @exec context and don't need to be + * deallocated. + * + * Returns: 0 in case of success or -1 in case of error. + */ +int +xmlRegExecErrInfo(xmlRegExecCtxtPtr exec, const xmlChar **string, + int *nbval, int *nbneg, xmlChar **values, int *terminal) { + if (exec == NULL) + return(-1); + if (string != NULL) { + if (exec->status != 0) + *string = exec->errString; + else + *string = NULL; + } + return(xmlRegExecGetValues(exec, 1, nbval, nbneg, values, terminal)); +} + +#ifdef DEBUG_ERR +static void testerr(xmlRegExecCtxtPtr exec) { + const xmlChar *string; + const xmlChar *values[5]; + int nb = 5; + int nbneg; + int terminal; + xmlRegExecErrInfo(exec, &string, &nb, &nbneg, &values[0], &terminal); +} +#endif + #if 0 static int xmlRegExecPushChar(xmlRegExecCtxtPtr exec, int UCS) { -- cgit v1.2.3