diff options
Diffstat (limited to 'parser.c')
-rw-r--r-- | parser.c | 109 |
1 files changed, 79 insertions, 30 deletions
@@ -1039,6 +1039,8 @@ nodePop(xmlParserCtxtPtr ctxt) ctxt->nodeTab[ctxt->nodeNr] = 0; return (ret); } + +#ifdef LIBXML_PUSH_ENABLED /** * nameNsPush: * @ctxt: an XML parser context @@ -1109,6 +1111,7 @@ nameNsPop(xmlParserCtxtPtr ctxt) ctxt->nameTab[ctxt->nameNr] = NULL; return (ret); } +#endif /* LIBXML_PUSH_ENABLED */ /** * namePush: @@ -1456,6 +1459,7 @@ int xmlParseCharRef(xmlParserCtxtPtr ctxt) { unsigned int val = 0; int count = 0; + unsigned int outofrange = 0; /* * Using RAW/CUR/NEXT is okay since we are working on ASCII range here @@ -1480,6 +1484,9 @@ xmlParseCharRef(xmlParserCtxtPtr ctxt) { val = 0; break; } + if (val > 0x10FFFF) + outofrange = val; + NEXT; count++; } @@ -1504,6 +1511,9 @@ xmlParseCharRef(xmlParserCtxtPtr ctxt) { val = 0; break; } + if (val > 0x10FFFF) + outofrange = val; + NEXT; count++; } @@ -1522,7 +1532,7 @@ xmlParseCharRef(xmlParserCtxtPtr ctxt) { * Characters referred to using character references must match the * production for Char. */ - if (IS_CHAR(val)) { + if ((IS_CHAR(val) && (outofrange == 0))) { return(val); } else { xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, @@ -1554,7 +1564,8 @@ static int xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) { const xmlChar *ptr; xmlChar cur; - int val = 0; + unsigned int val = 0; + unsigned int outofrange = 0; if ((str == NULL) || (*str == NULL)) return(0); ptr = *str; @@ -1574,6 +1585,9 @@ xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) { val = 0; break; } + if (val > 0x10FFFF) + outofrange = val; + ptr++; cur = *ptr; } @@ -1590,6 +1604,9 @@ xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) { val = 0; break; } + if (val > 0x10FFFF) + outofrange = val; + ptr++; cur = *ptr; } @@ -1606,7 +1623,7 @@ xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) { * Characters referred to using character references must match the * production for Char. */ - if (IS_CHAR(val)) { + if ((IS_CHAR(val) && (outofrange == 0))) { return(val); } else { xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, @@ -5500,8 +5517,9 @@ xmlParseReference(xmlParserCtxtPtr ctxt) { * Prune it directly in the generated document * except for single text nodes. */ - if ((list->type == XML_TEXT_NODE) && - (list->next == NULL)) { + if (((list->type == XML_TEXT_NODE) && + (list->next == NULL)) || + (ctxt->parseMode == XML_PARSE_READER)) { list->parent = (xmlNodePtr) ent; list = NULL; ent->owner = 1; @@ -5554,10 +5572,21 @@ xmlParseReference(xmlParserCtxtPtr ctxt) { /* * Seems we are generating the DOM content, do * a simple tree copy for all references except the first - * In the first occurrence list contains the replacement + * In the first occurrence list contains the replacement. + * progressive == 2 means we are operating on the Reader + * and since nodes are discarded we must copy all the time. */ - if ((list == NULL) && (ent->owner == 0)) { + if (((list == NULL) && (ent->owner == 0)) || + (ctxt->parseMode == XML_PARSE_READER)) { xmlNodePtr nw = NULL, cur, firstChild = NULL; + + /* + * when operating on a reader, the entities definitions + * are always owning the entities subtree. + if (ctxt->parseMode == XML_PARSE_READER) + ent->owner = 1; + */ + cur = ent->children; while (cur != NULL) { nw = xmlCopyNode(cur, 1); @@ -5566,10 +5595,20 @@ xmlParseReference(xmlParserCtxtPtr ctxt) { if (firstChild == NULL){ firstChild = nw; } - xmlAddChild(ctxt->node, nw); + nw = xmlAddChild(ctxt->node, nw); } - if (cur == ent->last) + if (cur == ent->last) { + /* + * needed to detect some strange empty + * node cases in the reader tests + */ + if ((ctxt->parseMode == XML_PARSE_READER) && + (nw->type == XML_ELEMENT_NODE) && + (nw->children == NULL)) + nw->extra = 1; + break; + } cur = cur->next; } #ifdef LIBXML_LEGACY_ENABLED @@ -8776,32 +8815,40 @@ xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt, "Internal error: xmlParseGetLasts\n"); return; } - if ((ctxt->progressive == 1) && (ctxt->inputNr == 1)) { + if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) { tmp = ctxt->input->end; tmp--; - while ((tmp >= ctxt->input->base) && (*tmp != '<') && - (*tmp != '>')) tmp--; + while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--; if (tmp < ctxt->input->base) { *lastlt = NULL; *lastgt = NULL; - } else if (*tmp == '<') { + } else { *lastlt = tmp; - tmp--; - while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--; - if (tmp < ctxt->input->base) - *lastgt = NULL; - else + tmp++; + while ((tmp < ctxt->input->end) && (*tmp != '>')) { + if (*tmp == '\'') { + tmp++; + while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++; + if (tmp < ctxt->input->end) tmp++; + } else if (*tmp == '"') { + tmp++; + while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++; + if (tmp < ctxt->input->end) tmp++; + } else + tmp++; + } + if (tmp < ctxt->input->end) *lastgt = tmp; - } else { - *lastgt = tmp; - tmp--; - while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--; - if (tmp < ctxt->input->base) - *lastlt = NULL; - else - *lastlt = tmp; + else { + tmp = *lastlt; + tmp--; + while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--; + if (tmp >= ctxt->input->base) + *lastgt = tmp; + else + *lastgt = NULL; + } } - } else { *lastlt = NULL; *lastgt = NULL; @@ -9057,7 +9104,7 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { if (!terminate) { if (ctxt->progressive) { /* > can be found unescaped in attribute values */ - if ((lastlt == NULL) || (ctxt->input->cur >= lastlt)) + if ((lastgt == NULL) || (ctxt->input->cur >= lastgt)) goto done; } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) { goto done; @@ -9235,7 +9282,8 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { goto done; if (!terminate) { if (ctxt->progressive) { - if ((lastgt == NULL) || (ctxt->input->cur > lastgt)) + /* > can be found unescaped in attribute values */ + if ((lastgt == NULL) || (ctxt->input->cur >= lastgt)) goto done; } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) { goto done; @@ -9423,7 +9471,8 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { goto done; } else { ctxt->instate = XML_PARSER_START_TAG; - ctxt->progressive = 1; + if (ctxt->progressive == 0) + ctxt->progressive = 1; xmlParseGetLasts(ctxt, &lastlt, &lastgt); #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, |