diff options
Diffstat (limited to 'parser.c')
-rw-r--r-- | parser.c | 2677 |
1 files changed, 1655 insertions, 1022 deletions
@@ -80,20 +80,111 @@ #include <zlib.h> #endif +static void +xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info); + +/************************************************************************ + * * + * Arbitrary limits set in the parser. See XML_PARSE_HUGE * + * * + ************************************************************************/ + +#define XML_PARSER_BIG_ENTITY 1000 +#define XML_PARSER_LOT_ENTITY 5000 + +/* + * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity + * replacement over the size in byte of the input indicates that you have + * and eponential behaviour. A value of 10 correspond to at least 3 entity + * replacement per byte of input. + */ +#define XML_PARSER_NON_LINEAR 10 + +/* + * xmlParserEntityCheck + * + * Function to check non-linear entity expansion behaviour + * This is here to detect and stop exponential linear entity expansion + * This is not a limitation of the parser but a safety + * boundary feature. It can be disabled with the XML_PARSE_HUGE + * parser option. + */ +static int +xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long size, + xmlEntityPtr ent) +{ + unsigned long consumed = 0; + + if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE)) + return (0); + if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP) + return (1); + if (size != 0) { + /* + * Do the check based on the replacement size of the entity + */ + if (size < XML_PARSER_BIG_ENTITY) + return(0); + + /* + * A limit on the amount of text data reasonably used + */ + if (ctxt->input != NULL) { + consumed = ctxt->input->consumed + + (ctxt->input->cur - ctxt->input->base); + } + consumed += ctxt->sizeentities; + + if ((size < XML_PARSER_NON_LINEAR * consumed) && + (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed)) + return (0); + } else if (ent != NULL) { + /* + * use the number of parsed entities in the replacement + */ + size = ent->checked; + + /* + * The amount of data parsed counting entities size only once + */ + if (ctxt->input != NULL) { + consumed = ctxt->input->consumed + + (ctxt->input->cur - ctxt->input->base); + } + consumed += ctxt->sizeentities; + + /* + * Check the density of entities for the amount of data + * knowing an entity reference will take at least 3 bytes + */ + if (size * 3 < consumed * XML_PARSER_NON_LINEAR) + return (0); + } else { + /* + * strange we got no data for checking just return + */ + return (0); + } + + xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); + return (1); +} + /** * xmlParserMaxDepth: * - * arbitrary depth limit for the XML documents that we allow to - * process. This is not a limitation of the parser but a safety - * boundary feature. + * arbitrary depth limit for the XML documents that we allow to + * process. This is not a limitation of the parser but a safety + * boundary feature. It can be disabled with the XML_PARSE_HUGE + * parser option. */ -unsigned int xmlParserMaxDepth = 1024; +unsigned int xmlParserMaxDepth = 256; -#define SAX2 1 + +#define SAX2 1 #define XML_PARSER_BIG_BUFFER_SIZE 300 #define XML_PARSER_BUFFER_SIZE 100 - #define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document" /* @@ -116,6 +207,9 @@ xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt, void *user_data, int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *list); +static int +xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, + const char *encoding); #ifdef LIBXML_LEGACY_ENABLED static void xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode, @@ -418,7 +512,7 @@ xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *msg, const xmlChar *str1, const xmlChar *str2) { xmlStructuredErrorFunc schannel = NULL; - + if ((ctxt != NULL) && (ctxt->disableSAX != 0) && (ctxt->instate == XML_PARSER_EOF)) return; @@ -445,7 +539,7 @@ xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error, */ static void xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error, - const char *msg, const xmlChar *str1) + const char *msg, const xmlChar *str1, const xmlChar *str2) { xmlStructuredErrorFunc schannel = NULL; @@ -461,8 +555,8 @@ xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error, ctxt->vctxt.error, ctxt->vctxt.userData, ctxt, NULL, XML_FROM_DTD, error, XML_ERR_ERROR, NULL, 0, (const char *) str1, - NULL, NULL, 0, 0, - msg, (const char *) str1); + (const char *) str2, NULL, 0, 0, + msg, (const char *) str1, (const char *) str2); if (ctxt != NULL) { ctxt->valid = 0; } @@ -610,6 +704,33 @@ xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, ctxt->nsWellFormed = 0; } +/** + * xmlNsWarn + * @ctxt: an XML parser context + * @error: the error number + * @msg: the message + * @info1: extra information string + * @info2: extra information string + * + * Handle a fatal parser error, i.e. violating Well-Formedness constraints + */ +static void +xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error, + const char *msg, + const xmlChar * info1, const xmlChar * info2, + const xmlChar * info3) +{ + if ((ctxt != NULL) && (ctxt->disableSAX != 0) && + (ctxt->instate == XML_PARSER_EOF)) + return; + if (ctxt != NULL) + ctxt->errNo = error; + __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error, + XML_ERR_WARNING, NULL, 0, (const char *) info1, + (const char *) info2, (const char *) info3, 0, 0, msg, + info1, info2, info3); +} + /************************************************************************ * * * Library wide options * @@ -809,7 +930,7 @@ xmlHasFeature(xmlFeature feature) return(1); #else return(0); -#endif +#endif case XML_WITH_ZLIB: #ifdef LIBXML_ZLIB_ENABLED return(1); @@ -859,7 +980,7 @@ typedef xmlDefAttrs *xmlDefAttrsPtr; struct _xmlDefAttrs { int nbAttrs; /* number of defaulted attributes on that element */ int maxAttrs; /* the size of the array */ - const xmlChar *values[4]; /* array of localname/prefix/values */ + const xmlChar *values[5]; /* array of localname/prefix/values/external */ }; /** @@ -913,7 +1034,7 @@ xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst) * is needed. */ static const xmlChar * -xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, const xmlChar *src, int *len) +xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len) { int i; int remove_head = 0; @@ -954,7 +1075,8 @@ xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, const xmlChar *src, int *len) return(ret); } else if (remove_head) { *len -= remove_head; - return(src + remove_head); + memmove(src, src + remove_head, 1 + *len); + return(src); } return(NULL); } @@ -1011,7 +1133,7 @@ xmlAddDefAttrs(xmlParserCtxtPtr ctxt, defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix); if (defaults == NULL) { defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) + - (4 * 4) * sizeof(const xmlChar *)); + (4 * 5) * sizeof(const xmlChar *)); if (defaults == NULL) goto mem_error; defaults->nbAttrs = 0; @@ -1025,7 +1147,7 @@ xmlAddDefAttrs(xmlParserCtxtPtr ctxt, xmlDefAttrsPtr temp; temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) + - (2 * defaults->maxAttrs * 4) * sizeof(const xmlChar *)); + (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *)); if (temp == NULL) goto mem_error; defaults = temp; @@ -1050,13 +1172,17 @@ xmlAddDefAttrs(xmlParserCtxtPtr ctxt, prefix = xmlDictLookup(ctxt->dict, fullattr, len); } - defaults->values[4 * defaults->nbAttrs] = name; - defaults->values[4 * defaults->nbAttrs + 1] = prefix; + defaults->values[5 * defaults->nbAttrs] = name; + defaults->values[5 * defaults->nbAttrs + 1] = prefix; /* intern the string and precompute the end */ len = xmlStrlen(value); value = xmlDictLookup(ctxt->dict, value, len); - defaults->values[4 * defaults->nbAttrs + 2] = value; - defaults->values[4 * defaults->nbAttrs + 3] = value + len; + defaults->values[5 * defaults->nbAttrs + 2] = value; + defaults->values[5 * defaults->nbAttrs + 3] = value + len; + if (ctxt->external) + defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external"; + else + defaults->values[5 * defaults->nbAttrs + 4] = NULL; defaults->nbAttrs++; return; @@ -1212,7 +1338,7 @@ xmlCheckLanguageID(const xmlChar * lang) /************************************************************************ * * - * Parser stacks related functions and macros * + * Parser stacks related functions and macros * * * ************************************************************************/ @@ -1293,7 +1419,7 @@ nsPop(xmlParserCtxtPtr ctxt, int nr) } if (ctxt->nsNr <= 0) return (0); - + for (i = 0;i < nr;i++) { ctxt->nsNr--; ctxt->nsTab[ctxt->nsNr] = NULL; @@ -1343,13 +1469,13 @@ mem_error: * * Pushes a new parser input on top of the input stack * - * Returns 0 in case of error, the index in the stack otherwise + * Returns -1 in case of error, the index in the stack otherwise */ int inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value) { if ((ctxt == NULL) || (value == NULL)) - return(0); + return(-1); if (ctxt->inputNr >= ctxt->inputMax) { ctxt->inputMax *= 2; ctxt->inputTab = @@ -1358,7 +1484,10 @@ inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value) sizeof(ctxt->inputTab[0])); if (ctxt->inputTab == NULL) { xmlErrMemory(ctxt, NULL); - return (0); + xmlFreeInputStream(value); + ctxt->inputMax /= 2; + value = NULL; + return (-1); } } ctxt->inputTab[ctxt->inputNr] = value; @@ -1398,7 +1527,7 @@ inputPop(xmlParserCtxtPtr ctxt) * * Pushes a new element node on top of the node stack * - * Returns 0 in case of error, the index in the stack otherwise + * Returns -1 in case of error, the index in the stack otherwise */ int nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value) @@ -1412,22 +1541,24 @@ nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value) sizeof(ctxt->nodeTab[0])); if (tmp == NULL) { xmlErrMemory(ctxt, NULL); - return (0); + return (-1); } ctxt->nodeTab = tmp; ctxt->nodeMax *= 2; } - if (((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) { + if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) { xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR, - "Excessive depth in document: change xmlParserMaxDepth = %d\n", + "Excessive depth in document: %d use XML_PARSE_HUGE option\n", xmlParserMaxDepth); ctxt->instate = XML_PARSER_EOF; - return(0); + return(-1); } ctxt->nodeTab[ctxt->nodeNr] = value; ctxt->node = value; return (ctxt->nodeNr++); } + /** * nodePop: * @ctxt: an XML parser context @@ -1594,7 +1725,8 @@ static int spacePush(xmlParserCtxtPtr ctxt, int val) { ctxt->spaceMax * sizeof(ctxt->spaceTab[0])); if (tmp == NULL) { xmlErrMemory(ctxt, NULL); - return(0); + ctxt->spaceMax /=2; + return(-1); } ctxt->spaceTab = tmp; } @@ -1842,10 +1974,12 @@ xmlPopInput(xmlParserCtxtPtr ctxt) { * * xmlPushInput: switch to a new input stream which is stacked on top * of the previous one(s). + * Returns -1 in case of error or the index in the input stack */ -void +int xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) { - if (input == NULL) return; + int ret; + if (input == NULL) return(-1); if (xmlParserDebugEntities) { if ((ctxt->input != NULL) && (ctxt->input->filename)) @@ -1855,8 +1989,9 @@ xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) { xmlGenericError(xmlGenericErrorContext, "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur); } - inputPush(ctxt, input); + ret = inputPush(ctxt, input); GROW; + return(ret); } /** @@ -2234,7 +2369,7 @@ xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) { if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) { xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY, "PEReference: %%%s; not found\n", - name); + name, NULL); } else xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, "PEReference: %%%s; not found\n", @@ -2243,7 +2378,8 @@ xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) { } } else if (ctxt->input->free != deallocblankswrapper) { input = xmlNewBlanksWrapperInputStream(ctxt, entity); - xmlPushInput(ctxt, input); + if (xmlPushInput(ctxt, input) < 0) + return; } else { if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) || (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) { @@ -2256,7 +2392,8 @@ xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) { * this is done independently. */ input = xmlNewEntityInputStream(ctxt, entity); - xmlPushInput(ctxt, input); + if (xmlPushInput(ctxt, input) < 0) + return; /* * Get the 4 first bytes and decode the charset @@ -2299,9 +2436,10 @@ xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) { /* * Macro used to grow the current buffer. */ -#define growBuffer(buffer) { \ +#define growBuffer(buffer, n) { \ xmlChar *tmp; \ buffer##_size *= 2; \ + buffer##_size += n; \ tmp = (xmlChar *) \ xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \ if (tmp == NULL) goto mem_error; \ @@ -2344,7 +2482,9 @@ xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, return(NULL); last = str + len; - if (ctxt->depth > 40) { + if (((ctxt->depth > 40) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) || + (ctxt->depth > 1024)) { xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); return(NULL); } @@ -2374,7 +2514,7 @@ xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, COPY_BUF(0,buffer,nbchars,val); } if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) { - growBuffer(buffer); + growBuffer(buffer, XML_PARSER_BUFFER_SIZE); } } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) { if (xmlParserDebugEntities) @@ -2382,12 +2522,17 @@ xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, "String decoding Entity Reference: %.30s\n", str); ent = xmlParseStringEntityRef(ctxt, &str); + if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) || + (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR)) + goto int_error; + if (ent != NULL) + ctxt->nbentities += ent->checked; if ((ent != NULL) && (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { if (ent->content != NULL) { COPY_BUF(0,buffer,nbchars,ent->content[0]); if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) { - growBuffer(buffer); + growBuffer(buffer, XML_PARSER_BUFFER_SIZE); } } else { xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR, @@ -2398,13 +2543,16 @@ xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, rep = xmlStringDecodeEntities(ctxt, ent->content, what, 0, 0, 0); ctxt->depth--; + if (rep != NULL) { current = rep; while (*current != 0) { /* non input consuming loop */ buffer[nbchars++] = *current++; if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) { - growBuffer(buffer); + if (xmlParserEntityCheck(ctxt, nbchars, ent)) + goto int_error; + growBuffer(buffer, XML_PARSER_BUFFER_SIZE); } } xmlFree(rep); @@ -2416,7 +2564,7 @@ xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, buffer[nbchars++] = '&'; if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) { - growBuffer(buffer); + growBuffer(buffer, XML_PARSER_BUFFER_SIZE); } for (;i > 0;i--) buffer[nbchars++] = *cur++; @@ -2427,10 +2575,13 @@ xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, xmlGenericError(xmlGenericErrorContext, "String decoding PE Reference: %.30s\n", str); ent = xmlParseStringPEReference(ctxt, &str); + if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP) + goto int_error; + if (ent != NULL) + ctxt->nbentities += ent->checked; if (ent != NULL) { if (ent->content == NULL) { - if (xmlLoadEntityContent(ctxt, ent) < 0) { - } + xmlLoadEntityContent(ctxt, ent); } ctxt->depth++; rep = xmlStringDecodeEntities(ctxt, ent->content, what, @@ -2442,7 +2593,9 @@ xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, buffer[nbchars++] = *current++; if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) { - growBuffer(buffer); + if (xmlParserEntityCheck(ctxt, nbchars, ent)) + goto int_error; + growBuffer(buffer, XML_PARSER_BUFFER_SIZE); } } xmlFree(rep); @@ -2453,7 +2606,7 @@ xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, COPY_BUF(l,buffer,nbchars,c); str += l; if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) { - growBuffer(buffer); + growBuffer(buffer, XML_PARSER_BUFFER_SIZE); } } if (str < last) @@ -2466,6 +2619,7 @@ xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, mem_error: xmlErrMemory(ctxt, NULL); +int_error: if (rep != NULL) xmlFree(rep); if (buffer != NULL) @@ -2758,10 +2912,198 @@ xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) { * * ************************************************************************/ -static const xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt); +/************************************************************************ + * * + * Routines to parse Name, NCName and NmToken * + * * + ************************************************************************/ +unsigned long nbParseName = 0; +unsigned long nbParseNmToken = 0; +unsigned long nbParseNCName = 0; +unsigned long nbParseNCNameComplex = 0; +unsigned long nbParseNameComplex = 0; +unsigned long nbParseStringName = 0; +/* + * The two following functions are related to the change of accepted + * characters for Name and NmToken in the Revision 5 of XML-1.0 + * They correspond to the modified production [4] and the new production [4a] + * changes in that revision. Also note that the macros used for the + * productions Letter, Digit, CombiningChar and Extender are not needed + * anymore. + * We still keep compatibility to pre-revision5 parsing semantic if the + * new XML_PARSE_OLD10 option is given to the parser. + */ +static int +xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) { + if ((ctxt->options & XML_PARSE_OLD10) == 0) { + /* + * Use the new checks of production [4] [4a] amd [5] of the + * Update 5 of XML-1.0 + */ + if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */ + (((c >= 'a') && (c <= 'z')) || + ((c >= 'A') && (c <= 'Z')) || + (c == '_') || (c == ':') || + ((c >= 0xC0) && (c <= 0xD6)) || + ((c >= 0xD8) && (c <= 0xF6)) || + ((c >= 0xF8) && (c <= 0x2FF)) || + ((c >= 0x370) && (c <= 0x37D)) || + ((c >= 0x37F) && (c <= 0x1FFF)) || + ((c >= 0x200C) && (c <= 0x200D)) || + ((c >= 0x2070) && (c <= 0x218F)) || + ((c >= 0x2C00) && (c <= 0x2FEF)) || + ((c >= 0x3001) && (c <= 0xD7FF)) || + ((c >= 0xF900) && (c <= 0xFDCF)) || + ((c >= 0xFDF0) && (c <= 0xFFFD)) || + ((c >= 0x10000) && (c <= 0xEFFFF)))) + return(1); + } else { + if (IS_LETTER(c) || (c == '_') || (c == ':')) + return(1); + } + return(0); +} + +static int +xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) { + if ((ctxt->options & XML_PARSE_OLD10) == 0) { + /* + * Use the new checks of production [4] [4a] amd [5] of the + * Update 5 of XML-1.0 + */ + if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */ + (((c >= 'a') && (c <= 'z')) || + ((c >= 'A') && (c <= 'Z')) || + ((c >= '0') && (c <= '9')) || /* !start */ + (c == '_') || (c == ':') || + (c == '-') || (c == '.') || (c == 0xB7) || /* !start */ + ((c >= 0xC0) && (c <= 0xD6)) || + ((c >= 0xD8) && (c <= 0xF6)) || + ((c >= 0xF8) && (c <= 0x2FF)) || + ((c >= 0x300) && (c <= 0x36F)) || /* !start */ + ((c >= 0x370) && (c <= 0x37D)) || + ((c >= 0x37F) && (c <= 0x1FFF)) || + ((c >= 0x200C) && (c <= 0x200D)) || + ((c >= 0x203F) && (c <= 0x2040)) || /* !start */ + ((c >= 0x2070) && (c <= 0x218F)) || + ((c >= 0x2C00) && (c <= 0x2FEF)) || + ((c >= 0x3001) && (c <= 0xD7FF)) || + ((c >= 0xF900) && (c <= 0xFDCF)) || + ((c >= 0xFDF0) && (c <= 0xFFFD)) || + ((c >= 0x10000) && (c <= 0xEFFFF)))) + return(1); + } else { + if ((IS_LETTER(c)) || (IS_DIGIT(c)) || + (c == '.') || (c == '-') || + (c == '_') || (c == ':') || + (IS_COMBINING(c)) || + (IS_EXTENDER(c))) + return(1); + } + return(0); +} + static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc, int normalize); +static const xmlChar * +xmlParseNameComplex(xmlParserCtxtPtr ctxt) { + int len = 0, l; + int c; + int count = 0; + + nbParseNameComplex++; + + /* + * Handler for more complex cases + */ + GROW; + c = CUR_CHAR(l); + if ((ctxt->options & XML_PARSE_OLD10) == 0) { + /* + * Use the new checks of production [4] [4a] amd [5] of the + * Update 5 of XML-1.0 + */ + if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ + (!(((c >= 'a') && (c <= 'z')) || + ((c >= 'A') && (c <= 'Z')) || + (c == '_') || (c == ':') || + ((c >= 0xC0) && (c <= 0xD6)) || + ((c >= 0xD8) && (c <= 0xF6)) || + ((c >= 0xF8) && (c <= 0x2FF)) || + ((c >= 0x370) && (c <= 0x37D)) || + ((c >= 0x37F) && (c <= 0x1FFF)) || + ((c >= 0x200C) && (c <= 0x200D)) || + ((c >= 0x2070) && (c <= 0x218F)) || + ((c >= 0x2C00) && (c <= 0x2FEF)) || + ((c >= 0x3001) && (c <= 0xD7FF)) || + ((c >= 0xF900) && (c <= 0xFDCF)) || + ((c >= 0xFDF0) && (c <= 0xFFFD)) || + ((c >= 0x10000) && (c <= 0xEFFFF))))) { + return(NULL); + } + len += l; + NEXTL(l); + c = CUR_CHAR(l); + while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */ + (((c >= 'a') && (c <= 'z')) || + ((c >= 'A') && (c <= 'Z')) || + ((c >= '0') && (c <= '9')) || /* !start */ + (c == '_') || (c == ':') || + (c == '-') || (c == '.') || (c == 0xB7) || /* !start */ + ((c >= 0xC0) && (c <= 0xD6)) || + ((c >= 0xD8) && (c <= 0xF6)) || + ((c >= 0xF8) && (c <= 0x2FF)) || + ((c >= 0x300) && (c <= 0x36F)) || /* !start */ + ((c >= 0x370) && (c <= 0x37D)) || + ((c >= 0x37F) && (c <= 0x1FFF)) || + ((c >= 0x200C) && (c <= 0x200D)) || + ((c >= 0x203F) && (c <= 0x2040)) || /* !start */ + ((c >= 0x2070) && (c <= 0x218F)) || + ((c >= 0x2C00) && (c <= 0x2FEF)) || + ((c >= 0x3001) && (c <= 0xD7FF)) || + ((c >= 0xF900) && (c <= 0xFDCF)) || + ((c >= 0xFDF0) && (c <= 0xFFFD)) || + ((c >= 0x10000) && (c <= 0xEFFFF)) + )) { + if (count++ > 100) { + count = 0; + GROW; + } + len += l; + NEXTL(l); + c = CUR_CHAR(l); + } + } else { + if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ + (!IS_LETTER(c) && (c != '_') && + (c != ':'))) { + return(NULL); + } + len += l; + NEXTL(l); + c = CUR_CHAR(l); + + while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ + ((IS_LETTER(c)) || (IS_DIGIT(c)) || + (c == '.') || (c == '-') || + (c == '_') || (c == ':') || + (IS_COMBINING(c)) || + (IS_EXTENDER(c)))) { + if (count++ > 100) { + count = 0; + GROW; + } + len += l; + NEXTL(l); + c = CUR_CHAR(l); + } + } + if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r')) + return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len)); + return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len)); +} + /** * xmlParseName: * @ctxt: an XML parser context @@ -2786,6 +3128,8 @@ xmlParseName(xmlParserCtxtPtr ctxt) { GROW; + nbParseName++; + /* * Accelerator for simple ASCII names */ @@ -2811,9 +3155,93 @@ xmlParseName(xmlParserCtxtPtr ctxt) { return(ret); } } + /* accelerator for special cases */ return(xmlParseNameComplex(ctxt)); } +static const xmlChar * +xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) { + int len = 0, l; + int c; + int count = 0; + + nbParseNCNameComplex++; + + /* + * Handler for more complex cases + */ + GROW; + c = CUR_CHAR(l); + if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ + (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) { + return(NULL); + } + + while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ + (xmlIsNameChar(ctxt, c) && (c != ':'))) { + if (count++ > 100) { + count = 0; + GROW; + } + len += l; + NEXTL(l); + c = CUR_CHAR(l); + } + return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len)); +} + +/** + * xmlParseNCName: + * @ctxt: an XML parser context + * @len: lenght of the string parsed + * + * parse an XML name. + * + * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' | + * CombiningChar | Extender + * + * [5NS] NCName ::= (Letter | '_') (NCNameChar)* + * + * Returns the Name parsed or NULL + */ + +static const xmlChar * +xmlParseNCName(xmlParserCtxtPtr ctxt) { + const xmlChar *in; + const xmlChar *ret; + int count = 0; + + nbParseNCName++; + + /* + * Accelerator for simple ASCII names + */ + in = ctxt->input->cur; + if (((*in >= 0x61) && (*in <= 0x7A)) || + ((*in >= 0x41) && (*in <= 0x5A)) || + (*in == '_')) { + in++; + while (((*in >= 0x61) && (*in <= 0x7A)) || + ((*in >= 0x41) && (*in <= 0x5A)) || + ((*in >= 0x30) && (*in <= 0x39)) || + (*in == '_') || (*in == '-') || + (*in == '.')) + in++; + if ((*in > 0) && (*in < 0x80)) { + count = in - ctxt->input->cur; + ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count); + ctxt->input->cur = in; + ctxt->nbChars += count; + ctxt->input->col += count; + if (ret == NULL) { + xmlErrMemory(ctxt, NULL); + } + return(ret); + } + } + return(xmlParseNCNameComplex(ctxt)); +} + /** * xmlParseNameAndCompare: * @ctxt: an XML parser context @@ -2832,15 +3260,15 @@ xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) { const xmlChar *ret; GROW; - + in = ctxt->input->cur; while (*in != 0 && *in == *cmp) { - ++in; + ++in; ++cmp; ctxt->input->col++; } if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) { - /* success */ + /* success */ ctxt->input->cur = in; return (const xmlChar*) 1; } @@ -2853,42 +3281,6 @@ xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) { return ret; } -static const xmlChar * -xmlParseNameComplex(xmlParserCtxtPtr ctxt) { - int len = 0, l; - int c; - int count = 0; - - /* - * Handler for more complex cases - */ - GROW; - c = CUR_CHAR(l); - if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ - (!IS_LETTER(c) && (c != '_') && - (c != ':'))) { - return(NULL); - } - - while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ - ((IS_LETTER(c)) || (IS_DIGIT(c)) || - (c == '.') || (c == '-') || - (c == '_') || (c == ':') || - (IS_COMBINING(c)) || - (IS_EXTENDER(c)))) { - if (count++ > 100) { - count = 0; - GROW; - } - len += l; - NEXTL(l); - c = CUR_CHAR(l); - } - if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r')) - return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len)); - return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len)); -} - /** * xmlParseStringName: * @ctxt: an XML parser context @@ -2914,17 +3306,17 @@ xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) { int len = 0, l; int c; + nbParseStringName++; + c = CUR_SCHAR(cur, l); - if (!IS_LETTER(c) && (c != '_') && - (c != ':')) { + if (!xmlIsNameStartChar(ctxt, c)) { return(NULL); } - while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */ - (c == '.') || (c == '-') || - (c == '_') || (c == ':') || - (IS_COMBINING(c)) || - (IS_EXTENDER(c))) { + COPY_BUF(l,buf,len,c); + cur += l; + c = CUR_SCHAR(cur, l); + while (xmlIsNameChar(ctxt, c)) { COPY_BUF(l,buf,len,c); cur += l; c = CUR_SCHAR(cur, l); @@ -2935,19 +3327,14 @@ xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) { */ xmlChar *buffer; int max = len * 2; - + buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); if (buffer == NULL) { xmlErrMemory(ctxt, NULL); return(NULL); } memcpy(buffer, buf, len); - while ((IS_LETTER(c)) || (IS_DIGIT(c)) || - /* test bigentname.xml */ - (c == '.') || (c == '-') || - (c == '_') || (c == ':') || - (IS_COMBINING(c)) || - (IS_EXTENDER(c))) { + while (xmlIsNameChar(ctxt, c)) { if (len + 10 > max) { xmlChar *tmp; max *= 2; @@ -2976,7 +3363,7 @@ xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) { /** * xmlParseNmtoken: * @ctxt: an XML parser context - * + * * parse an XML Nmtoken. * * [7] Nmtoken ::= (NameChar)+ @@ -2993,14 +3380,12 @@ xmlParseNmtoken(xmlParserCtxtPtr ctxt) { int c; int count = 0; + nbParseNmToken++; + GROW; c = CUR_CHAR(l); - while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */ - (c == '.') || (c == '-') || - (c == '_') || (c == ':') || - (IS_COMBINING(c)) || - (IS_EXTENDER(c))) { + while (xmlIsNameChar(ctxt, c)) { if (count++ > 100) { count = 0; GROW; @@ -3015,18 +3400,14 @@ xmlParseNmtoken(xmlParserCtxtPtr ctxt) { */ xmlChar *buffer; int max = len * 2; - + buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); if (buffer == NULL) { xmlErrMemory(ctxt, NULL); return(NULL); } memcpy(buffer, buf, len); - while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */ - (c == '.') || (c == '-') || - (c == '_') || (c == ':') || - (IS_COMBINING(c)) || - (IS_EXTENDER(c))) { + while (xmlIsNameChar(ctxt, c)) { if (count++ > 100) { count = 0; GROW; @@ -3255,7 +3636,7 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) { if (val == '&') { if (ctxt->replaceEntities) { if (len > buf_size - 10) { - growBuffer(buf); + growBuffer(buf, 10); } buf[len++] = '&'; } else { @@ -3264,7 +3645,7 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) { * called by the attribute() function in SAX.c */ if (len > buf_size - 10) { - growBuffer(buf); + growBuffer(buf, 10); } buf[len++] = '&'; buf[len++] = '#'; @@ -3274,16 +3655,19 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) { } } else if (val != 0) { if (len > buf_size - 10) { - growBuffer(buf); + growBuffer(buf, 10); } len += xmlCopyChar(0, &buf[len], val); } } else { ent = xmlParseEntityRef(ctxt); + ctxt->nbentities++; + if (ent != NULL) + ctxt->nbentities += ent->owner; if ((ent != NULL) && (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { if (len > buf_size - 10) { - growBuffer(buf); + growBuffer(buf, 10); } if ((ctxt->replaceEntities == 0) && (ent->content[0] == '&')) { @@ -3306,7 +3690,7 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) { while (*current != 0) { /* non input consuming */ buf[len++] = *current++; if (len > buf_size - 10) { - growBuffer(buf); + growBuffer(buf, 10); } } xmlFree(rep); @@ -3314,7 +3698,7 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) { } } else { if (len > buf_size - 10) { - growBuffer(buf); + growBuffer(buf, 10); } if (ent->content != NULL) buf[len++] = ent->content[0]; @@ -3341,8 +3725,8 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) { * Just output the reference */ buf[len++] = '&'; - if (len > buf_size - i - 10) { - growBuffer(buf); + while (len > buf_size - i - 10) { + growBuffer(buf, i + 10); } for (;i > 0;i--) buf[len++] = *cur++; @@ -3354,8 +3738,8 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) { if ((len != 0) || (!normalize)) { if ((!normalize) || (!in_space)) { COPY_BUF(l,buf,len,0x20); - if (len > buf_size - 10) { - growBuffer(buf); + while (len > buf_size - 10) { + growBuffer(buf, 10); } } in_space = 1; @@ -3364,7 +3748,7 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) { in_space = 0; COPY_BUF(l,buf,len,c); if (len > buf_size - 10) { - growBuffer(buf); + growBuffer(buf, 10); } } NEXTL(l); @@ -3976,8 +4360,10 @@ xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) { int q, ql; int r, rl; int cur, l; - xmlParserInputPtr input = ctxt->input; int count = 0; + int inputid; + + inputid = ctxt->input->id; if (buf == NULL) { len = 0; @@ -4059,7 +4445,7 @@ xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) { "xmlParseComment: invalid xmlChar value %d\n", cur); } else { - if (input != ctxt->input) { + if (inputid != ctxt->input->id) { xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, "Comment doesn't start and stop in the same entity\n"); } @@ -4095,15 +4481,16 @@ xmlParseComment(xmlParserCtxtPtr ctxt) { xmlParserInputState state; const xmlChar *in; int nbchar = 0, ccol; + int inputid; /* * Check that there is a comment right here. */ if ((RAW != '<') || (NXT(1) != '!') || (NXT(2) != '-') || (NXT(3) != '-')) return; - state = ctxt->instate; ctxt->instate = XML_PARSER_COMMENT; + inputid = ctxt->input->id; SKIP(4); SHRINK; GROW; @@ -4194,6 +4581,10 @@ get_more: if (*in == '-') { if (in[1] == '-') { if (in[2] == '>') { + if (ctxt->input->id != inputid) { + xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, + "comment doesn't start and stop in the same entity\n"); + } SKIP(3); if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) && (!ctxt->disableSAX)) { @@ -4267,6 +4658,10 @@ xmlParsePITarget(xmlParserCtxtPtr ctxt) { "xmlParsePITarget: invalid name prefix 'xml'\n", NULL, NULL); } + if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) { + xmlNsErr(ctxt, XML_NS_ERR_COLON, + "colon are forbidden from PI names '%s'\n", name, NULL, NULL); + } return(name); } @@ -4510,6 +4905,11 @@ xmlParseNotationDecl(xmlParserCtxtPtr ctxt) { "Space required after the NOTATION name'\n"); return; } + if (xmlStrchr(name, ':') != NULL) { + xmlNsErr(ctxt, XML_NS_ERR_COLON, + "colon are forbidden from notation names '%s'\n", + name, NULL, NULL); + } SKIP_BLANKS; /* @@ -4594,6 +4994,11 @@ xmlParseEntityDecl(xmlParserCtxtPtr ctxt) { "xmlParseEntityDecl: no name\n"); return; } + if (xmlStrchr(name, ':') != NULL) { + xmlNsErr(ctxt, XML_NS_ERR_COLON, + "colon are forbidden from entities names '%s'\n", + name, NULL, NULL); + } skipped = SKIP_BLANKS; if (skipped == 0) { xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, @@ -4669,6 +5074,7 @@ xmlParseEntityDecl(xmlParserCtxtPtr ctxt) { xmlErrMemory(ctxt, "New Doc failed"); return; } + ctxt->myDoc->properties = XML_DOC_INTERNAL; } if (ctxt->myDoc->intSubset == NULL) ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc, @@ -4741,6 +5147,7 @@ xmlParseEntityDecl(xmlParserCtxtPtr ctxt) { xmlErrMemory(ctxt, "New Doc failed"); return; } + ctxt->myDoc->properties = XML_DOC_INTERNAL; } if (ctxt->myDoc->intSubset == NULL) @@ -4879,7 +5286,7 @@ xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) { xmlEnumerationPtr xmlParseNotationType(xmlParserCtxtPtr ctxt) { const xmlChar *name; - xmlEnumerationPtr ret = NULL, last = NULL, cur; + xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp; if (RAW != '(') { xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL); @@ -4895,12 +5302,26 @@ xmlParseNotationType(xmlParserCtxtPtr ctxt) { "Name expected in NOTATION declaration\n"); return(ret); } - cur = xmlCreateEnumeration(name); - if (cur == NULL) return(ret); - if (last == NULL) ret = last = cur; - else { - last->next = cur; - last = cur; + tmp = ret; + while (tmp != NULL) { + if (xmlStrEqual(name, tmp->name)) { + xmlValidityError(ctxt, XML_DTD_DUP_TOKEN, + "standalone: attribute notation value token %s duplicated\n", + name, NULL); + if (!xmlDictOwns(ctxt->dict, name)) + xmlFree((xmlChar *) name); + break; + } + tmp = tmp->next; + } + if (tmp == NULL) { + cur = xmlCreateEnumeration(name); + if (cur == NULL) return(ret); + if (last == NULL) ret = last = cur; + else { + last->next = cur; + last = cur; + } } SKIP_BLANKS; } while (RAW == '|'); @@ -4932,7 +5353,7 @@ xmlParseNotationType(xmlParserCtxtPtr ctxt) { xmlEnumerationPtr xmlParseEnumerationType(xmlParserCtxtPtr ctxt) { xmlChar *name; - xmlEnumerationPtr ret = NULL, last = NULL, cur; + xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp; if (RAW != '(') { xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL); @@ -4947,13 +5368,28 @@ xmlParseEnumerationType(xmlParserCtxtPtr ctxt) { xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL); return(ret); } - cur = xmlCreateEnumeration(name); - xmlFree(name); - if (cur == NULL) return(ret); - if (last == NULL) ret = last = cur; - else { - last->next = cur; - last = cur; + tmp = ret; + while (tmp != NULL) { + if (xmlStrEqual(name, tmp->name)) { + xmlValidityError(ctxt, XML_DTD_DUP_TOKEN, + "standalone: attribute enumeration value token %s duplicated\n", + name, NULL); + if (!xmlDictOwns(ctxt->dict, name)) + xmlFree(name); + break; + } + tmp = tmp->next; + } + if (tmp == NULL) { + cur = xmlCreateEnumeration(name); + if (!xmlDictOwns(ctxt->dict, name)) + xmlFree(name); + if (cur == NULL) return(ret); + if (last == NULL) ret = last = cur; + else { + last->next = cur; + last = cur; + } } SKIP_BLANKS; } while (RAW == '|'); @@ -5200,8 +5636,9 @@ xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) { } if (RAW == '>') { if (input != ctxt->input) { - xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, - "Attribute list declaration doesn't start and stop in the same entity\n"); + xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, + "Attribute list declaration doesn't start and stop in the same entity\n", + NULL, NULL); } NEXT; } @@ -5241,7 +5678,7 @@ xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) { if ((ctxt->validate) && (ctxt->input->id != inputchk)) { xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, "Element content declaration doesn't start and stop in the same entity\n", - NULL); + NULL, NULL); } NEXT; ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA); @@ -5299,7 +5736,7 @@ xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) { if ((ctxt->validate) && (ctxt->input->id != inputchk)) { xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, "Element content declaration doesn't start and stop in the same entity\n", - NULL); + NULL, NULL); } SKIP(2); } else { @@ -5531,7 +5968,7 @@ xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) { if ((ctxt->validate) && (ctxt->input->id != inputchk)) { xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, "Element content declaration doesn't start and stop in the same entity\n", - NULL); + NULL, NULL); } NEXT; if (RAW == '?') { @@ -5773,6 +6210,8 @@ xmlParseElementDecl(xmlParserCtxtPtr ctxt) { static void xmlParseConditionalSections(xmlParserCtxtPtr ctxt) { + int id = ctxt->input->id; + SKIP(3); SKIP_BLANKS; if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) { @@ -5781,6 +6220,11 @@ xmlParseConditionalSections(xmlParserCtxtPtr ctxt) { if (RAW != '[') { xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL); } else { + if (ctxt->input->id != id) { + xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, + "All markup of the conditional section is not in the same entity\n", + NULL, NULL); + } NEXT; } if (xmlParserDebugEntities) { @@ -5836,6 +6280,11 @@ xmlParseConditionalSections(xmlParserCtxtPtr ctxt) { if (RAW != '[') { xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL); } else { + if (ctxt->input->id != id) { + xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, + "All markup of the conditional section is not in the same entity\n", + NULL, NULL); + } NEXT; } if (xmlParserDebugEntities) { @@ -5892,6 +6341,11 @@ xmlParseConditionalSections(xmlParserCtxtPtr ctxt) { if (RAW == 0) { xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL); } else { + if (ctxt->input->id != id) { + xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, + "All markup of the conditional section is not in the same entity\n", + NULL, NULL); + } SKIP(3); } } @@ -5970,12 +6424,10 @@ xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) { /** * xmlParseTextDecl: * @ctxt: an XML parser context - * + * * parse an XML declaration header for external entities * * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>' - * - * Question: Seems that EncodingDecl is mandatory ? Is that a typo ? */ void @@ -6086,6 +6538,11 @@ xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID, } if (ctxt->myDoc == NULL) { ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); + if (ctxt->myDoc == NULL) { + xmlErrMemory(ctxt, "New Doc failed"); + return; + } + ctxt->myDoc->properties = XML_DOC_INTERNAL; } if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL)) xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID); @@ -6129,7 +6586,7 @@ xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID, /** * xmlParseReference: * @ctxt: an XML parser context - * + * * parse and handle entity references in content, depending on the SAX * interface, this may end-up in a call to character() if this is a * CharRef, a predefined entity, if there is no reference() callback. @@ -6141,14 +6598,23 @@ void xmlParseReference(xmlParserCtxtPtr ctxt) { xmlEntityPtr ent; xmlChar *val; - if (RAW != '&') return; + int was_checked; + xmlNodePtr list = NULL; + xmlParserErrors ret = XML_ERR_OK; + + if (RAW != '&') + return; + + /* + * Simple case of a CharRef + */ if (NXT(1) == '#') { int i = 0; xmlChar out[10]; int hex = NXT(2); int value = xmlParseCharRef(ctxt); - + if (value == 0) return; if (ctxt->charset != XML_CHAR_ENCODING_UTF8) { @@ -6182,360 +6648,342 @@ xmlParseReference(xmlParserCtxtPtr ctxt) { (!ctxt->disableSAX)) ctxt->sax->characters(ctxt->userData, out, i); } - } else { - int was_checked; + return; + } - ent = xmlParseEntityRef(ctxt); - if (ent == NULL) return; - if (!ctxt->wellFormed) - return; - was_checked = ent->checked; - if ((ent->name != NULL) && - (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) { - xmlNodePtr list = NULL; - xmlParserErrors ret = XML_ERR_OK; + /* + * We are seeing an entity reference + */ + ent = xmlParseEntityRef(ctxt); + if (ent == NULL) return; + if (!ctxt->wellFormed) + return; + was_checked = ent->checked; + /* special case of predefined entities */ + if ((ent->name == NULL) || + (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { + val = ent->content; + if (val == NULL) return; + /* + * inline the entity. + */ + if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && + (!ctxt->disableSAX)) + ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val)); + return; + } - /* - * The first reference to the entity trigger a parsing phase - * where the ent->children is filled with the result from - * the parsing. - */ - if (ent->checked == 0) { - xmlChar *value; + /* + * The first reference to the entity trigger a parsing phase + * where the ent->children is filled with the result from + * the parsing. + */ + if (ent->checked == 0) { + unsigned long oldnbent = ctxt->nbentities; - value = ent->content; + /* + * This is a bit hackish but this seems the best + * way to make sure both SAX and DOM entity support + * behaves okay. + */ + void *user_data; + if (ctxt->userData == ctxt) + user_data = NULL; + else + user_data = ctxt->userData; - /* - * Check that this entity is well formed - */ - if ((value != NULL) && (value[0] != 0) && - (value[1] == 0) && (value[0] == '<') && - (xmlStrEqual(ent->name, BAD_CAST "lt"))) { - /* - * DONE: get definite answer on this !!! - * Lots of entity decls are used to declare a single - * char - * <!ENTITY lt "<"> - * Which seems to be valid since - * 2.4: The ampersand character (&) and the left angle - * bracket (<) may appear in their literal form only - * when used ... They are also legal within the literal - * entity value of an internal entity declaration;i - * see "4.3.2 Well-Formed Parsed Entities". - * IMHO 2.4 and 4.3.2 are directly in contradiction. - * Looking at the OASIS test suite and James Clark - * tests, this is broken. However the XML REC uses - * it. Is the XML REC not well-formed ???? - * This is a hack to avoid this problem - * - * ANSWER: since lt gt amp .. are already defined, - * this is a redefinition and hence the fact that the - * content is not well balanced is not a Wf error, this - * is lousy but acceptable. - */ - list = xmlNewDocText(ctxt->myDoc, value); - if (list != NULL) { - if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) && - (ent->children == NULL)) { - ent->children = list; - ent->last = list; - ent->owner = 1; - list->parent = (xmlNodePtr) ent; - } else { - xmlFreeNodeList(list); - } - } else if (list != NULL) { - xmlFreeNodeList(list); - } - } else { - /* - * 4.3.2: An internal general parsed entity is well-formed - * if its replacement text matches the production labeled - * content. - */ + /* + * Check that this entity is well formed + * 4.3.2: An internal general parsed entity is well-formed + * if its replacement text matches the production labeled + * content. + */ + if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) { + ctxt->depth++; + ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content, + user_data, &list); + ctxt->depth--; + + } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) { + ctxt->depth++; + ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax, + user_data, ctxt->depth, ent->URI, + ent->ExternalID, &list); + ctxt->depth--; + } else { + ret = XML_ERR_ENTITY_PE_INTERNAL; + xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR, + "invalid entity type found\n", NULL); + } + + /* + * Store the number of entities needing parsing for this entity + * content and do checkings + */ + ent->checked = ctxt->nbentities - oldnbent; + if (ret == XML_ERR_ENTITY_LOOP) { + xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); + xmlFreeNodeList(list); + return; + } + if (xmlParserEntityCheck(ctxt, 0, ent)) { + xmlFreeNodeList(list); + return; + } - void *user_data; + if ((ret == XML_ERR_OK) && (list != NULL)) { + if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) || + (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&& + (ent->children == NULL)) { + ent->children = list; + if (ctxt->replaceEntities) { /* - * This is a bit hackish but this seems the best - * way to make sure both SAX and DOM entity support - * behaves okay. + * Prune it directly in the generated document + * except for single text nodes. */ - if (ctxt->userData == ctxt) - user_data = NULL; - else - user_data = ctxt->userData; - - if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) { - ctxt->depth++; - ret = xmlParseBalancedChunkMemoryInternal(ctxt, - value, user_data, &list); - ctxt->depth--; - } else if (ent->etype == - XML_EXTERNAL_GENERAL_PARSED_ENTITY) { - ctxt->depth++; - ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, - ctxt->sax, user_data, ctxt->depth, - ent->URI, ent->ExternalID, &list); - ctxt->depth--; + if (((list->type == XML_TEXT_NODE) && + (list->next == NULL)) || + (ctxt->parseMode == XML_PARSE_READER)) { + list->parent = (xmlNodePtr) ent; + list = NULL; + ent->owner = 1; } else { - ret = XML_ERR_ENTITY_PE_INTERNAL; - xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR, - "invalid entity type found\n", NULL); - } - if (ret == XML_ERR_ENTITY_LOOP) { - xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); - return; - } else if ((ret == XML_ERR_OK) && (list != NULL)) { - if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) || - (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&& - (ent->children == NULL)) { - ent->children = list; - if (ctxt->replaceEntities) { - /* - * Prune it directly in the generated document - * except for single text nodes. - */ - if (((list->type == XML_TEXT_NODE) && - (list->next == NULL)) || - (ctxt->parseMode == XML_PARSE_READER)) { - list->parent = (xmlNodePtr) ent; - list = NULL; - ent->owner = 1; - } else { - ent->owner = 0; - while (list != NULL) { - list->parent = (xmlNodePtr) ctxt->node; - list->doc = ctxt->myDoc; - if (list->next == NULL) - ent->last = list; - list = list->next; - } - list = ent->children; + ent->owner = 0; + while (list != NULL) { + list->parent = (xmlNodePtr) ctxt->node; + list->doc = ctxt->myDoc; + if (list->next == NULL) + ent->last = list; + list = list->next; + } + list = ent->children; #ifdef LIBXML_LEGACY_ENABLED - if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) - xmlAddEntityReference(ent, list, NULL); + if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) + xmlAddEntityReference(ent, list, NULL); #endif /* LIBXML_LEGACY_ENABLED */ - } - } else { - ent->owner = 1; - while (list != NULL) { - list->parent = (xmlNodePtr) ent; - if (list->next == NULL) - ent->last = list; - list = list->next; - } - } - } else { - xmlFreeNodeList(list); - list = NULL; - } - } else if ((ret != XML_ERR_OK) && - (ret != XML_WAR_UNDECLARED_ENTITY)) { - xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, - "Entity '%s' failed to parse\n", ent->name); - } else if (list != NULL) { - xmlFreeNodeList(list); - list = NULL; + } + } else { + ent->owner = 1; + while (list != NULL) { + list->parent = (xmlNodePtr) ent; + if (list->next == NULL) + ent->last = list; + list = list->next; } } - ent->checked = 1; + } else { + xmlFreeNodeList(list); + list = NULL; } + } else if ((ret != XML_ERR_OK) && + (ret != XML_WAR_UNDECLARED_ENTITY)) { + xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, + "Entity '%s' failed to parse\n", ent->name); + } else if (list != NULL) { + xmlFreeNodeList(list); + list = NULL; + } + if (ent->checked == 0) + ent->checked = 1; + } else if (ent->checked != 1) { + ctxt->nbentities += ent->checked; + } - if (ent->children == NULL) { - /* - * Probably running in SAX mode and the callbacks don't - * build the entity content. So unless we already went - * though parsing for first checking go though the entity - * content to generate callbacks associated to the entity - */ - if (was_checked == 1) { - void *user_data; - /* - * This is a bit hackish but this seems the best - * way to make sure both SAX and DOM entity support - * behaves okay. - */ - if (ctxt->userData == ctxt) - user_data = NULL; - else - user_data = ctxt->userData; - - if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) { - ctxt->depth++; - ret = xmlParseBalancedChunkMemoryInternal(ctxt, - ent->content, user_data, NULL); - ctxt->depth--; - } else if (ent->etype == - XML_EXTERNAL_GENERAL_PARSED_ENTITY) { - ctxt->depth++; - ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, - ctxt->sax, user_data, ctxt->depth, - ent->URI, ent->ExternalID, NULL); - ctxt->depth--; - } else { - ret = XML_ERR_ENTITY_PE_INTERNAL; - xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR, - "invalid entity type found\n", NULL); - } - if (ret == XML_ERR_ENTITY_LOOP) { - xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); - return; - } - } - if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && - (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) { - /* - * Entity reference callback comes second, it's somewhat - * superfluous but a compatibility to historical behaviour - */ - ctxt->sax->reference(ctxt->userData, ent->name); - } - return; + /* + * Now that the entity content has been gathered + * provide it to the application, this can take different forms based + * on the parsing modes. + */ + if (ent->children == NULL) { + /* + * Probably running in SAX mode and the callbacks don't + * build the entity content. So unless we already went + * though parsing for first checking go though the entity + * content to generate callbacks associated to the entity + */ + if (was_checked != 0) { + void *user_data; + /* + * This is a bit hackish but this seems the best + * way to make sure both SAX and DOM entity support + * behaves okay. + */ + if (ctxt->userData == ctxt) + user_data = NULL; + else + user_data = ctxt->userData; + + if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) { + ctxt->depth++; + ret = xmlParseBalancedChunkMemoryInternal(ctxt, + ent->content, user_data, NULL); + ctxt->depth--; + } else if (ent->etype == + XML_EXTERNAL_GENERAL_PARSED_ENTITY) { + ctxt->depth++; + ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, + ctxt->sax, user_data, ctxt->depth, + ent->URI, ent->ExternalID, NULL); + ctxt->depth--; + } else { + ret = XML_ERR_ENTITY_PE_INTERNAL; + xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR, + "invalid entity type found\n", NULL); } - if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && - (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) { - /* - * Create a node. - */ - ctxt->sax->reference(ctxt->userData, ent->name); + if (ret == XML_ERR_ENTITY_LOOP) { + xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); return; } - if ((ctxt->replaceEntities) || (ent->children == NULL)) { + } + if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && + (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) { + /* + * Entity reference callback comes second, it's somewhat + * superfluous but a compatibility to historical behaviour + */ + ctxt->sax->reference(ctxt->userData, ent->name); + } + return; + } + + /* + * If we didn't get any children for the entity being built + */ + if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && + (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) { + /* + * Create a node. + */ + ctxt->sax->reference(ctxt->userData, ent->name); + return; + } + + if ((ctxt->replaceEntities) || (ent->children == NULL)) { + /* + * There is a problem on the handling of _private for entities + * (bug 155816): Should we copy the content of the field from + * the entity (possibly overwriting some value set by the user + * when a copy is created), should we leave it alone, or should + * we try to take care of different situations? The problem + * is exacerbated by the usage of this field by the xmlReader. + * To fix this bug, we look at _private on the created node + * and, if it's NULL, we copy in whatever was in the entity. + * If it's not NULL we leave it alone. This is somewhat of a + * hack - maybe we should have further tests to determine + * what to do. + */ + if ((ctxt->node != NULL) && (ent->children != NULL)) { + /* + * Seems we are generating the DOM content, do + * a simple tree copy for all references except the first + * In the first occurrence list contains the replacement. + * progressive == 2 means we are operating on the Reader + * and since nodes are discarded we must copy all the time. + */ + if (((list == NULL) && (ent->owner == 0)) || + (ctxt->parseMode == XML_PARSE_READER)) { + xmlNodePtr nw = NULL, cur, firstChild = NULL; + /* - * There is a problem on the handling of _private for entities - * (bug 155816): Should we copy the content of the field from - * the entity (possibly overwriting some value set by the user - * when a copy is created), should we leave it alone, or should - * we try to take care of different situations? The problem - * is exacerbated by the usage of this field by the xmlReader. - * To fix this bug, we look at _private on the created node - * and, if it's NULL, we copy in whatever was in the entity. - * If it's not NULL we leave it alone. This is somewhat of a - * hack - maybe we should have further tests to determine - * what to do. + * when operating on a reader, the entities definitions + * are always owning the entities subtree. + if (ctxt->parseMode == XML_PARSE_READER) + ent->owner = 1; */ - if ((ctxt->node != NULL) && (ent->children != NULL)) { - /* - * Seems we are generating the DOM content, do - * a simple tree copy for all references except the first - * In the first occurrence list contains the replacement. - * progressive == 2 means we are operating on the Reader - * and since nodes are discarded we must copy all the time. - */ - if (((list == NULL) && (ent->owner == 0)) || - (ctxt->parseMode == XML_PARSE_READER)) { - xmlNodePtr nw = NULL, cur, firstChild = NULL; + cur = ent->children; + while (cur != NULL) { + nw = xmlDocCopyNode(cur, ctxt->myDoc, 1); + if (nw != NULL) { + if (nw->_private == NULL) + nw->_private = cur->_private; + if (firstChild == NULL){ + firstChild = nw; + } + nw = xmlAddChild(ctxt->node, nw); + } + if (cur == ent->last) { /* - * when operating on a reader, the entities definitions - * are always owning the entities subtree. - if (ctxt->parseMode == XML_PARSE_READER) - ent->owner = 1; + * needed to detect some strange empty + * node cases in the reader tests */ + if ((ctxt->parseMode == XML_PARSE_READER) && + (nw != NULL) && + (nw->type == XML_ELEMENT_NODE) && + (nw->children == NULL)) + nw->extra = 1; - cur = ent->children; - while (cur != NULL) { - nw = xmlDocCopyNode(cur, ctxt->myDoc, 1); - if (nw != NULL) { - if (nw->_private == NULL) - nw->_private = cur->_private; - if (firstChild == NULL){ - firstChild = nw; - } - nw = xmlAddChild(ctxt->node, nw); - } - if (cur == ent->last) { - /* - * needed to detect some strange empty - * node cases in the reader tests - */ - if ((ctxt->parseMode == XML_PARSE_READER) && - (nw != NULL) && - (nw->type == XML_ELEMENT_NODE) && - (nw->children == NULL)) - nw->extra = 1; - - break; - } - cur = cur->next; - } + break; + } + cur = cur->next; + } #ifdef LIBXML_LEGACY_ENABLED - if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) - xmlAddEntityReference(ent, firstChild, nw); + if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) + xmlAddEntityReference(ent, firstChild, nw); #endif /* LIBXML_LEGACY_ENABLED */ - } else if (list == NULL) { - xmlNodePtr nw = NULL, cur, next, last, - firstChild = NULL; - /* - * Copy the entity child list and make it the new - * entity child list. The goal is to make sure any - * ID or REF referenced will be the one from the - * document content and not the entity copy. - */ - cur = ent->children; - ent->children = NULL; - last = ent->last; - ent->last = NULL; - while (cur != NULL) { - next = cur->next; - cur->next = NULL; - cur->parent = NULL; - nw = xmlDocCopyNode(cur, ctxt->myDoc, 1); - if (nw != NULL) { - if (nw->_private == NULL) - nw->_private = cur->_private; - if (firstChild == NULL){ - firstChild = cur; - } - xmlAddChild((xmlNodePtr) ent, nw); - xmlAddChild(ctxt->node, cur); - } - if (cur == last) - break; - cur = next; + } else if (list == NULL) { + xmlNodePtr nw = NULL, cur, next, last, + firstChild = NULL; + /* + * Copy the entity child list and make it the new + * entity child list. The goal is to make sure any + * ID or REF referenced will be the one from the + * document content and not the entity copy. + */ + cur = ent->children; + ent->children = NULL; + last = ent->last; + ent->last = NULL; + while (cur != NULL) { + next = cur->next; + cur->next = NULL; + cur->parent = NULL; + nw = xmlDocCopyNode(cur, ctxt->myDoc, 1); + if (nw != NULL) { + if (nw->_private == NULL) + nw->_private = cur->_private; + if (firstChild == NULL){ + firstChild = cur; } - ent->owner = 1; + xmlAddChild((xmlNodePtr) ent, nw); + xmlAddChild(ctxt->node, cur); + } + if (cur == last) + break; + cur = next; + } + if (ent->owner == 0) + ent->owner = 1; #ifdef LIBXML_LEGACY_ENABLED - if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) - xmlAddEntityReference(ent, firstChild, nw); + if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) + xmlAddEntityReference(ent, firstChild, nw); #endif /* LIBXML_LEGACY_ENABLED */ - } else { - const xmlChar *nbktext; - - /* - * the name change is to avoid coalescing of the - * node with a possible previous text one which - * would make ent->children a dangling pointer - */ - nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext", - -1); - if (ent->children->type == XML_TEXT_NODE) - ent->children->name = nbktext; - if ((ent->last != ent->children) && - (ent->last->type == XML_TEXT_NODE)) - ent->last->name = nbktext; - xmlAddChildList(ctxt->node, ent->children); - } + } else { + const xmlChar *nbktext; - /* - * This is to avoid a nasty side effect, see - * characters() in SAX.c - */ - ctxt->nodemem = 0; - ctxt->nodelen = 0; - return; - } + /* + * the name change is to avoid coalescing of the + * node with a possible previous text one which + * would make ent->children a dangling pointer + */ + nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext", + -1); + if (ent->children->type == XML_TEXT_NODE) + ent->children->name = nbktext; + if ((ent->last != ent->children) && + (ent->last->type == XML_TEXT_NODE)) + ent->last->name = nbktext; + xmlAddChildList(ctxt->node, ent->children); } - } else { - val = ent->content; - if (val == NULL) return; + /* - * inline the entity. + * This is to avoid a nasty side effect, see + * characters() in SAX.c */ - if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && - (!ctxt->disableSAX)) - ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val)); + ctxt->nodemem = 0; + ctxt->nodelen = 0; + return; } } } @@ -6574,132 +7022,140 @@ xmlParseEntityRef(xmlParserCtxtPtr ctxt) { xmlEntityPtr ent = NULL; GROW; - - if (RAW == '&') { - NEXT; - name = xmlParseName(ctxt); - if (name == NULL) { - xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, - "xmlParseEntityRef: no name\n"); - } else { - if (RAW == ';') { - NEXT; - /* - * Ask first SAX for entity resolution, otherwise try the - * predefined set. - */ - if (ctxt->sax != NULL) { - if (ctxt->sax->getEntity != NULL) - ent = ctxt->sax->getEntity(ctxt->userData, name); - if ((ctxt->wellFormed == 1 ) && (ent == NULL)) - ent = xmlGetPredefinedEntity(name); - if ((ctxt->wellFormed == 1 ) && (ent == NULL) && - (ctxt->userData==ctxt)) { - ent = xmlSAX2GetEntity(ctxt, name); - } - } - /* - * [ WFC: Entity Declared ] - * In a document without any DTD, a document with only an - * internal DTD subset which contains no parameter entity - * references, or a document with "standalone='yes'", the - * Name given in the entity reference must match that in an - * entity declaration, except that well-formed documents - * need not declare any of the following entities: amp, lt, - * gt, apos, quot. - * The declaration of a parameter entity must precede any - * reference to it. - * Similarly, the declaration of a general entity must - * precede any reference to it which appears in a default - * value in an attribute-list declaration. Note that if - * entities are declared in the external subset or in - * external parameter entities, a non-validating processor - * is not obligated to read and process their declarations; - * for such documents, the rule that an entity must be - * declared is a well-formedness constraint only if - * standalone='yes'. - */ - if (ent == NULL) { - if ((ctxt->standalone == 1) || - ((ctxt->hasExternalSubset == 0) && - (ctxt->hasPErefs == 0))) { - xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, - "Entity '%s' not defined\n", name); - } else { - xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY, - "Entity '%s' not defined\n", name); - if ((ctxt->inSubset == 0) && - (ctxt->sax != NULL) && - (ctxt->sax->reference != NULL)) { - ctxt->sax->reference(ctxt->userData, name); - } - } - ctxt->valid = 0; - } - /* - * [ WFC: Parsed Entity ] - * An entity reference must not contain the name of an - * unparsed entity - */ - else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) { - xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY, - "Entity reference to unparsed entity %s\n", name); - } - - /* - * [ WFC: No External Entity References ] - * Attribute values cannot contain direct or indirect - * entity references to external entities. - */ - else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && - (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) { - xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL, - "Attribute references external entity '%s'\n", name); - } - /* - * [ WFC: No < in Attribute Values ] - * The replacement text of any entity referred to directly or - * indirectly in an attribute value (other than "<") must - * not contain a <. - */ - else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && - (ent != NULL) && - (!xmlStrEqual(ent->name, BAD_CAST "lt")) && - (ent->content != NULL) && - (xmlStrchr(ent->content, '<'))) { - xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, - "'<' in entity '%s' is not allowed in attributes values\n", name); - } + if (RAW != '&') + return(NULL); + NEXT; + name = xmlParseName(ctxt); + if (name == NULL) { + xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, + "xmlParseEntityRef: no name\n"); + return(NULL); + } + if (RAW != ';') { + xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); + return(NULL); + } + NEXT; - /* - * Internal check, no parameter entities here ... - */ - else { - switch (ent->etype) { - case XML_INTERNAL_PARAMETER_ENTITY: - case XML_EXTERNAL_PARAMETER_ENTITY: - xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER, - "Attempt to reference the parameter entity '%s'\n", - name); - break; - default: - break; - } - } + /* + * Predefined entites override any extra definition + */ + ent = xmlGetPredefinedEntity(name); + if (ent != NULL) + return(ent); - /* - * [ WFC: No Recursion ] - * A parsed entity must not contain a recursive reference - * to itself, either directly or indirectly. - * Done somewhere else - */ + /* + * Increate the number of entity references parsed + */ + ctxt->nbentities++; - } else { - xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); + /* + * Ask first SAX for entity resolution, otherwise try the + * entities which may have stored in the parser context. + */ + if (ctxt->sax != NULL) { + if (ctxt->sax->getEntity != NULL) + ent = ctxt->sax->getEntity(ctxt->userData, name); + if ((ctxt->wellFormed == 1 ) && (ent == NULL) && + (ctxt->userData==ctxt)) { + ent = xmlSAX2GetEntity(ctxt, name); + } + } + /* + * [ WFC: Entity Declared ] + * In a document without any DTD, a document with only an + * internal DTD subset which contains no parameter entity + * references, or a document with "standalone='yes'", the + * Name given in the entity reference must match that in an + * entity declaration, except that well-formed documents + * need not declare any of the following entities: amp, lt, + * gt, apos, quot. + * The declaration of a parameter entity must precede any + * reference to it. + * Similarly, the declaration of a general entity must + * precede any reference to it which appears in a default + * value in an attribute-list declaration. Note that if + * entities are declared in the external subset or in + * external parameter entities, a non-validating processor + * is not obligated to read and process their declarations; + * for such documents, the rule that an entity must be + * declared is a well-formedness constraint only if + * standalone='yes'. + */ + if (ent == NULL) { + if ((ctxt->standalone == 1) || + ((ctxt->hasExternalSubset == 0) && + (ctxt->hasPErefs == 0))) { + xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, + "Entity '%s' not defined\n", name); + } else { + xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY, + "Entity '%s' not defined\n", name); + if ((ctxt->inSubset == 0) && + (ctxt->sax != NULL) && + (ctxt->sax->reference != NULL)) { + ctxt->sax->reference(ctxt->userData, name); } } + ctxt->valid = 0; + } + + /* + * [ WFC: Parsed Entity ] + * An entity reference must not contain the name of an + * unparsed entity + */ + else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) { + xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY, + "Entity reference to unparsed entity %s\n", name); + } + + /* + * [ WFC: No External Entity References ] + * Attribute values cannot contain direct or indirect + * entity references to external entities. + */ + else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && + (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) { + xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL, + "Attribute references external entity '%s'\n", name); + } + /* + * [ WFC: No < in Attribute Values ] + * The replacement text of any entity referred to directly or + * indirectly in an attribute value (other than "<") must + * not contain a <. + */ + else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && + (ent != NULL) && (ent->content != NULL) && + (xmlStrchr(ent->content, '<'))) { + xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, + "'<' in entity '%s' is not allowed in attributes values\n", name); + } + + /* + * Internal check, no parameter entities here ... + */ + else { + switch (ent->etype) { + case XML_INTERNAL_PARAMETER_ENTITY: + case XML_EXTERNAL_PARAMETER_ENTITY: + xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER, + "Attempt to reference the parameter entity '%s'\n", + name); + break; + default: + break; + } } + + /* + * [ WFC: No Recursion ] + * A parsed entity must not contain a recursive reference + * to itself, either directly or indirectly. + * Done somewhere else + */ return(ent); } @@ -6745,129 +7201,143 @@ xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) { return(NULL); ptr = *str; cur = *ptr; - if (cur == '&') { - ptr++; - cur = *ptr; - name = xmlParseStringName(ctxt, &ptr); - if (name == NULL) { - xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, - "xmlParseStringEntityRef: no name\n"); - } else { - if (*ptr == ';') { - ptr++; - /* - * Ask first SAX for entity resolution, otherwise try the - * predefined set. - */ - if (ctxt->sax != NULL) { - if (ctxt->sax->getEntity != NULL) - ent = ctxt->sax->getEntity(ctxt->userData, name); - if (ent == NULL) - ent = xmlGetPredefinedEntity(name); - if ((ent == NULL) && (ctxt->userData==ctxt)) { - ent = xmlSAX2GetEntity(ctxt, name); - } - } - /* - * [ WFC: Entity Declared ] - * In a document without any DTD, a document with only an - * internal DTD subset which contains no parameter entity - * references, or a document with "standalone='yes'", the - * Name given in the entity reference must match that in an - * entity declaration, except that well-formed documents - * need not declare any of the following entities: amp, lt, - * gt, apos, quot. - * The declaration of a parameter entity must precede any - * reference to it. - * Similarly, the declaration of a general entity must - * precede any reference to it which appears in a default - * value in an attribute-list declaration. Note that if - * entities are declared in the external subset or in - * external parameter entities, a non-validating processor - * is not obligated to read and process their declarations; - * for such documents, the rule that an entity must be - * declared is a well-formedness constraint only if - * standalone='yes'. - */ - if (ent == NULL) { - if ((ctxt->standalone == 1) || - ((ctxt->hasExternalSubset == 0) && - (ctxt->hasPErefs == 0))) { - xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, - "Entity '%s' not defined\n", name); - } else { - xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY, - "Entity '%s' not defined\n", - name); - } - /* TODO ? check regressions ctxt->valid = 0; */ - } + if (cur != '&') + return(NULL); - /* - * [ WFC: Parsed Entity ] - * An entity reference must not contain the name of an - * unparsed entity - */ - else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) { - xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY, - "Entity reference to unparsed entity %s\n", name); - } + ptr++; + cur = *ptr; + name = xmlParseStringName(ctxt, &ptr); + if (name == NULL) { + xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, + "xmlParseStringEntityRef: no name\n"); + *str = ptr; + return(NULL); + } + if (*ptr != ';') { + xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); + *str = ptr; + return(NULL); + } + ptr++; - /* - * [ WFC: No External Entity References ] - * Attribute values cannot contain direct or indirect - * entity references to external entities. - */ - else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && - (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) { - xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL, - "Attribute references external entity '%s'\n", name); - } - /* - * [ WFC: No < in Attribute Values ] - * The replacement text of any entity referred to directly or - * indirectly in an attribute value (other than "<") must - * not contain a <. - */ - else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && - (ent != NULL) && - (!xmlStrEqual(ent->name, BAD_CAST "lt")) && - (ent->content != NULL) && - (xmlStrchr(ent->content, '<'))) { - xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, - "'<' in entity '%s' is not allowed in attributes values\n", - name); - } - /* - * Internal check, no parameter entities here ... - */ - else { - switch (ent->etype) { - case XML_INTERNAL_PARAMETER_ENTITY: - case XML_EXTERNAL_PARAMETER_ENTITY: - xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER, - "Attempt to reference the parameter entity '%s'\n", - name); - break; - default: - break; - } - } + /* + * Predefined entites override any extra definition + */ + ent = xmlGetPredefinedEntity(name); + if (ent != NULL) + return(ent); - /* - * [ WFC: No Recursion ] - * A parsed entity must not contain a recursive reference - * to itself, either directly or indirectly. - * Done somewhere else - */ + /* + * Increate the number of entity references parsed + */ + ctxt->nbentities++; - } else { - xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); - } - xmlFree(name); + /* + * Ask first SAX for entity resolution, otherwise try the + * entities which may have stored in the parser context. + */ + if (ctxt->sax != NULL) { + if (ctxt->sax->getEntity != NULL) + ent = ctxt->sax->getEntity(ctxt->userData, name); + if ((ent == NULL) && (ctxt->userData==ctxt)) { + ent = xmlSAX2GetEntity(ctxt, name); + } + } + + /* + * [ WFC: Entity Declared ] + * In a document without any DTD, a document with only an + * internal DTD subset which contains no parameter entity + * references, or a document with "standalone='yes'", the + * Name given in the entity reference must match that in an + * entity declaration, except that well-formed documents + * need not declare any of the following entities: amp, lt, + * gt, apos, quot. + * The declaration of a parameter entity must precede any + * reference to it. + * Similarly, the declaration of a general entity must + * precede any reference to it which appears in a default + * value in an attribute-list declaration. Note that if + * entities are declared in the external subset or in + * external parameter entities, a non-validating processor + * is not obligated to read and process their declarations; + * for such documents, the rule that an entity must be + * declared is a well-formedness constraint only if + * standalone='yes'. + */ + if (ent == NULL) { + if ((ctxt->standalone == 1) || + ((ctxt->hasExternalSubset == 0) && + (ctxt->hasPErefs == 0))) { + xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, + "Entity '%s' not defined\n", name); + } else { + xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY, + "Entity '%s' not defined\n", + name); } + /* TODO ? check regressions ctxt->valid = 0; */ + } + + /* + * [ WFC: Parsed Entity ] + * An entity reference must not contain the name of an + * unparsed entity + */ + else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) { + xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY, + "Entity reference to unparsed entity %s\n", name); } + + /* + * [ WFC: No External Entity References ] + * Attribute values cannot contain direct or indirect + * entity references to external entities. + */ + else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && + (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) { + xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL, + "Attribute references external entity '%s'\n", name); + } + /* + * [ WFC: No < in Attribute Values ] + * The replacement text of any entity referred to directly or + * indirectly in an attribute value (other than "<") must + * not contain a <. + */ + else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && + (ent != NULL) && (ent->content != NULL) && + (xmlStrchr(ent->content, '<'))) { + xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, + "'<' in entity '%s' is not allowed in attributes values\n", + name); + } + + /* + * Internal check, no parameter entities here ... + */ + else { + switch (ent->etype) { + case XML_INTERNAL_PARAMETER_ENTITY: + case XML_EXTERNAL_PARAMETER_ENTITY: + xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER, + "Attempt to reference the parameter entity '%s'\n", + name); + break; + default: + break; + } + } + + /* + * [ WFC: No Recursion ] + * A parsed entity must not contain a recursive reference + * to itself, either directly or indirectly. + * Done somewhere else + */ + + xmlFree(name); *str = ptr; return(ent); } @@ -6908,90 +7378,101 @@ xmlParsePEReference(xmlParserCtxtPtr ctxt) xmlEntityPtr entity = NULL; xmlParserInputPtr input; - if (RAW == '%') { - NEXT; - name = xmlParseName(ctxt); - if (name == NULL) { - xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, - "xmlParsePEReference: no name\n"); - } else { - if (RAW == ';') { - NEXT; - if ((ctxt->sax != NULL) && - (ctxt->sax->getParameterEntity != NULL)) - entity = ctxt->sax->getParameterEntity(ctxt->userData, - name); - if (entity == NULL) { - /* - * [ WFC: Entity Declared ] - * In a document without any DTD, a document with only an - * internal DTD subset which contains no parameter entity - * references, or a document with "standalone='yes'", ... - * ... The declaration of a parameter entity must precede - * any reference to it... - */ - if ((ctxt->standalone == 1) || - ((ctxt->hasExternalSubset == 0) && - (ctxt->hasPErefs == 0))) { - xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, - "PEReference: %%%s; not found\n", - name); - } else { - /* - * [ VC: Entity Declared ] - * In a document with an external subset or external - * parameter entities with "standalone='no'", ... - * ... The declaration of a parameter entity must - * precede any reference to it... - */ - xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, - "PEReference: %%%s; not found\n", - name, NULL); - ctxt->valid = 0; - } - } else { - /* - * Internal checking in case the entity quest barfed - */ - if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) && - (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) { - xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, - "Internal: %%%s; is not a parameter entity\n", - name, NULL); - } else if (ctxt->input->free != deallocblankswrapper) { - input = - xmlNewBlanksWrapperInputStream(ctxt, entity); - xmlPushInput(ctxt, input); - } else { - /* - * TODO !!! - * handle the extra spaces added before and after - * c.f. http://www.w3.org/TR/REC-xml#as-PE - */ - input = xmlNewEntityInputStream(ctxt, entity); - xmlPushInput(ctxt, input); - if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) && - (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && - (IS_BLANK_CH(NXT(5)))) { - xmlParseTextDecl(ctxt); - if (ctxt->errNo == - XML_ERR_UNSUPPORTED_ENCODING) { - /* - * The XML REC instructs us to stop parsing - * right here - */ - ctxt->instate = XML_PARSER_EOF; - return; - } - } - } - } - ctxt->hasPErefs = 1; - } else { - xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); - } - } + if (RAW != '%') + return; + NEXT; + name = xmlParseName(ctxt); + if (name == NULL) { + xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, + "xmlParsePEReference: no name\n"); + return; + } + if (RAW != ';') { + xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); + return; + } + + NEXT; + + /* + * Increate the number of entity references parsed + */ + ctxt->nbentities++; + + /* + * Request the entity from SAX + */ + if ((ctxt->sax != NULL) && + (ctxt->sax->getParameterEntity != NULL)) + entity = ctxt->sax->getParameterEntity(ctxt->userData, + name); + if (entity == NULL) { + /* + * [ WFC: Entity Declared ] + * In a document without any DTD, a document with only an + * internal DTD subset which contains no parameter entity + * references, or a document with "standalone='yes'", ... + * ... The declaration of a parameter entity must precede + * any reference to it... + */ + if ((ctxt->standalone == 1) || + ((ctxt->hasExternalSubset == 0) && + (ctxt->hasPErefs == 0))) { + xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, + "PEReference: %%%s; not found\n", + name); + } else { + /* + * [ VC: Entity Declared ] + * In a document with an external subset or external + * parameter entities with "standalone='no'", ... + * ... The declaration of a parameter entity must + * precede any reference to it... + */ + xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, + "PEReference: %%%s; not found\n", + name, NULL); + ctxt->valid = 0; + } + } else { + /* + * Internal checking in case the entity quest barfed + */ + if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) && + (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) { + xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, + "Internal: %%%s; is not a parameter entity\n", + name, NULL); + } else if (ctxt->input->free != deallocblankswrapper) { + input = xmlNewBlanksWrapperInputStream(ctxt, entity); + if (xmlPushInput(ctxt, input) < 0) + return; + } else { + /* + * TODO !!! + * handle the extra spaces added before and after + * c.f. http://www.w3.org/TR/REC-xml#as-PE + */ + input = xmlNewEntityInputStream(ctxt, entity); + if (xmlPushInput(ctxt, input) < 0) + return; + if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) && + (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && + (IS_BLANK_CH(NXT(5)))) { + xmlParseTextDecl(ctxt); + if (ctxt->errNo == + XML_ERR_UNSUPPORTED_ENCODING) { + /* + * The XML REC instructs us to stop parsing + * right here + */ + ctxt->instate = XML_PARSER_EOF; + return; + } + } + } } + ctxt->hasPErefs = 1; } /** @@ -7044,7 +7525,11 @@ xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { * Push the entity as the current input, read char by char * saving to the buffer until the end of the entity or an error */ - xmlPushInput(ctxt, input); + if (xmlPushInput(ctxt, input) < 0) { + xmlBufferFree(buf); + return(-1); + } + GROW; c = CUR_CHAR(l); while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) && @@ -7115,67 +7600,77 @@ xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) { if ((str == NULL) || (*str == NULL)) return(NULL); ptr = *str; cur = *ptr; - if (cur == '%') { - ptr++; - cur = *ptr; - name = xmlParseStringName(ctxt, &ptr); - if (name == NULL) { - xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, - "xmlParseStringPEReference: no name\n"); + if (cur != '%') + return(NULL); + ptr++; + cur = *ptr; + name = xmlParseStringName(ctxt, &ptr); + if (name == NULL) { + xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, + "xmlParseStringPEReference: no name\n"); + *str = ptr; + return(NULL); + } + cur = *ptr; + if (cur != ';') { + xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); + xmlFree(name); + *str = ptr; + return(NULL); + } + ptr++; + + /* + * Increate the number of entity references parsed + */ + ctxt->nbentities++; + + /* + * Request the entity from SAX + */ + if ((ctxt->sax != NULL) && + (ctxt->sax->getParameterEntity != NULL)) + entity = ctxt->sax->getParameterEntity(ctxt->userData, + name); + if (entity == NULL) { + /* + * [ WFC: Entity Declared ] + * In a document without any DTD, a document with only an + * internal DTD subset which contains no parameter entity + * references, or a document with "standalone='yes'", ... + * ... The declaration of a parameter entity must precede + * any reference to it... + */ + if ((ctxt->standalone == 1) || + ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) { + xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, + "PEReference: %%%s; not found\n", name); } else { - cur = *ptr; - if (cur == ';') { - ptr++; - cur = *ptr; - if ((ctxt->sax != NULL) && - (ctxt->sax->getParameterEntity != NULL)) - entity = ctxt->sax->getParameterEntity(ctxt->userData, - name); - if (entity == NULL) { - /* - * [ WFC: Entity Declared ] - * In a document without any DTD, a document with only an - * internal DTD subset which contains no parameter entity - * references, or a document with "standalone='yes'", ... - * ... The declaration of a parameter entity must precede - * any reference to it... - */ - if ((ctxt->standalone == 1) || - ((ctxt->hasExternalSubset == 0) && - (ctxt->hasPErefs == 0))) { - xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, - "PEReference: %%%s; not found\n", name); - } else { - /* - * [ VC: Entity Declared ] - * In a document with an external subset or external - * parameter entities with "standalone='no'", ... - * ... The declaration of a parameter entity must - * precede any reference to it... - */ - xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, - "PEReference: %%%s; not found\n", - name, NULL); - ctxt->valid = 0; - } - } else { - /* - * Internal checking in case the entity quest barfed - */ - if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) && - (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) { - xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, - "%%%s; is not a parameter entity\n", - name, NULL); - } - } - ctxt->hasPErefs = 1; - } else { - xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); - } - xmlFree(name); + /* + * [ VC: Entity Declared ] + * In a document with an external subset or external + * parameter entities with "standalone='no'", ... + * ... The declaration of a parameter entity must + * precede any reference to it... + */ + xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, + "PEReference: %%%s; not found\n", + name, NULL); + ctxt->valid = 0; + } + } else { + /* + * Internal checking in case the entity quest barfed + */ + if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) && + (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) { + xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, + "%%%s; is not a parameter entity\n", + name, NULL); } } + ctxt->hasPErefs = 1; + xmlFree(name); *str = ptr; return(entity); } @@ -7652,38 +8147,6 @@ xmlParseEndTag(xmlParserCtxtPtr ctxt) { * * ************************************************************************/ -static const xmlChar * -xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) { - int len = 0, l; - int c; - int count = 0; - - /* - * Handler for more complex cases - */ - GROW; - c = CUR_CHAR(l); - if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ - (!IS_LETTER(c) && (c != '_'))) { - return(NULL); - } - - while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ - ((IS_LETTER(c)) || (IS_DIGIT(c)) || - (c == '.') || (c == '-') || (c == '_') || - (IS_COMBINING(c)) || - (IS_EXTENDER(c)))) { - if (count++ > 100) { - count = 0; - GROW; - } - len += l; - NEXTL(l); - c = CUR_CHAR(l); - } - return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len)); -} - /* * xmlGetNamespace: * @ctxt: an XML parser context @@ -7709,56 +8172,6 @@ xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) { } /** - * xmlParseNCName: - * @ctxt: an XML parser context - * @len: lenght of the string parsed - * - * parse an XML name. - * - * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' | - * CombiningChar | Extender - * - * [5NS] NCName ::= (Letter | '_') (NCNameChar)* - * - * Returns the Name parsed or NULL - */ - -static const xmlChar * -xmlParseNCName(xmlParserCtxtPtr ctxt) { - const xmlChar *in; - const xmlChar *ret; - int count = 0; - - /* - * Accelerator for simple ASCII names - */ - in = ctxt->input->cur; - if (((*in >= 0x61) && (*in <= 0x7A)) || - ((*in >= 0x41) && (*in <= 0x5A)) || - (*in == '_')) { - in++; - while (((*in >= 0x61) && (*in <= 0x7A)) || - ((*in >= 0x41) && (*in <= 0x5A)) || - ((*in >= 0x30) && (*in <= 0x39)) || - (*in == '_') || (*in == '-') || - (*in == '.')) - in++; - if ((*in > 0) && (*in < 0x80)) { - count = in - ctxt->input->cur; - ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count); - ctxt->input->cur = in; - ctxt->nbChars += count; - ctxt->input->col += count; - if (ret == NULL) { - xmlErrMemory(ctxt, NULL); - } - return(ret); - } - } - return(xmlParseNCNameComplex(ctxt)); -} - -/** * xmlParseQName: * @ctxt: an XML parser context * @prefix: pointer to store the prefix part @@ -7800,7 +8213,13 @@ xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) { xmlNsErr(ctxt, XML_NS_ERR_QNAME, "Failed to parse QName '%s:'\n", p, NULL, NULL); - tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0); + l = xmlParseNmtoken(ctxt); + if (l == NULL) + tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0); + else { + tmp = xmlBuildQName(l, p, NULL, 0); + xmlFree((char *)l); + } p = xmlDictLookup(ctxt->dict, tmp, -1); if (tmp != NULL) xmlFree(tmp); *prefix = NULL; @@ -8108,7 +8527,7 @@ xmlParseAttribute2(xmlParserCtxtPtr ctxt, const xmlChar *val2; val2 = xmlAttrNormalizeSpace2(ctxt, val, len); - if (val2 != NULL) { + if ((val2 != NULL) && (val2 != val)) { xmlFree(val); val = (xmlChar *) val2; } @@ -8271,17 +8690,33 @@ reparse: if (*URL != 0) { uri = xmlParseURI((const char *) URL); if (uri == NULL) { - xmlWarningMsg(ctxt, XML_WAR_NS_URI, - "xmlns: %s not a valid URI\n", - URL, NULL); + xmlNsErr(ctxt, XML_WAR_NS_URI, + "xmlns: '%s' is not a valid URI\n", + URL, NULL, NULL); } else { - if (uri->scheme == NULL) { - xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE, - "xmlns: URI %s is not absolute\n", - URL, NULL); + if ((ctxt->pedantic) && (uri->scheme == NULL)) { + xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE, + "xmlns: URI %s is not absolute\n", + URL, NULL, NULL); } xmlFreeURI(uri); } + if (URL == ctxt->str_xml_ns) { + if (attname != ctxt->str_xml) { + xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, + "xml namespace URI cannot be the default namespace\n", + NULL, NULL, NULL); + } + goto skip_default_ns; + } + if ((len == 29) && + (xmlStrEqual(URL, + BAD_CAST "http://www.w3.org/2000/xmlns/"))) { + xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, + "reuse of the xmlns namespace name is forbidden\n", + NULL, NULL, NULL); + goto skip_default_ns; + } } /* * check that it's not a defined namespace @@ -8293,6 +8728,7 @@ reparse: xmlErrAttributeDup(ctxt, NULL, attname); else if (nsPush(ctxt, NULL, URL) > 0) nbNs++; +skip_default_ns: if (alloc != 0) xmlFree(attvalue); SKIP_BLANKS; continue; @@ -8310,22 +8746,49 @@ reparse: /* * Do not keep a namespace definition node */ - if (alloc != 0) xmlFree(attvalue); - SKIP_BLANKS; - continue; + goto skip_ns; + } + if (URL == ctxt->str_xml_ns) { + if (attname != ctxt->str_xml) { + xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, + "xml namespace URI mapped to wrong prefix\n", + NULL, NULL, NULL); + } + goto skip_ns; + } + if (attname == ctxt->str_xmlns) { + xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, + "redefinition of the xmlns prefix is forbidden\n", + NULL, NULL, NULL); + goto skip_ns; } - uri = xmlParseURI((const char *) URL); - if (uri == NULL) { - xmlWarningMsg(ctxt, XML_WAR_NS_URI, - "xmlns:%s: '%s' is not a valid URI\n", - attname, URL); + if ((len == 29) && + (xmlStrEqual(URL, + BAD_CAST "http://www.w3.org/2000/xmlns/"))) { + xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, + "reuse of the xmlns namespace name is forbidden\n", + NULL, NULL, NULL); + goto skip_ns; + } + if ((URL == NULL) || (URL[0] == 0)) { + xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, + "xmlns:%s: Empty XML namespace is not allowed\n", + attname, NULL, NULL); + goto skip_ns; } else { - if ((ctxt->pedantic) && (uri->scheme == NULL)) { - xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE, + uri = xmlParseURI((const char *) URL); + if (uri == NULL) { + xmlNsErr(ctxt, XML_WAR_NS_URI, + "xmlns:%s: '%s' is not a valid URI\n", + attname, URL, NULL); + } else { + if ((ctxt->pedantic) && (uri->scheme == NULL)) { + xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE, "xmlns:%s: URI %s is not absolute\n", - attname, URL); + attname, URL, NULL); + } + xmlFreeURI(uri); } - xmlFreeURI(uri); } /* @@ -8338,6 +8801,7 @@ reparse: xmlErrAttributeDup(ctxt, aprefix, attname); else if (nsPush(ctxt, attname, URL) > 0) nbNs++; +skip_ns: if (alloc != 0) xmlFree(attvalue); SKIP_BLANKS; if (ctxt->input->base != base) goto base_changed; @@ -8372,7 +8836,7 @@ reparse: xmlFree(attvalue); } -failed: +failed: GROW if (ctxt->input->base != base) goto base_changed; @@ -8403,8 +8867,8 @@ failed: defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix); if (defaults != NULL) { for (i = 0;i < defaults->nbAttrs;i++) { - attname = defaults->values[4 * i]; - aprefix = defaults->values[4 * i + 1]; + attname = defaults->values[5 * i]; + aprefix = defaults->values[5 * i + 1]; /* * special work for namespaces defaulted defs @@ -8419,9 +8883,9 @@ failed: if (j <= nbNs) continue; nsname = xmlGetNamespace(ctxt, NULL); - if (nsname != defaults->values[4 * i + 2]) { + if (nsname != defaults->values[5 * i + 2]) { if (nsPush(ctxt, NULL, - defaults->values[4 * i + 2]) > 0) + defaults->values[5 * i + 2]) > 0) nbNs++; } } else if (aprefix == ctxt->str_xmlns) { @@ -8436,7 +8900,7 @@ failed: nsname = xmlGetNamespace(ctxt, attname); if (nsname != defaults->values[2]) { if (nsPush(ctxt, attname, - defaults->values[4 * i + 2]) > 0) + defaults->values[5 * i + 2]) > 0) nbNs++; } } else { @@ -8462,8 +8926,14 @@ failed: atts[nbatts++] = NULL; else atts[nbatts++] = xmlGetNamespace(ctxt, aprefix); - atts[nbatts++] = defaults->values[4 * i + 2]; - atts[nbatts++] = defaults->values[4 * i + 3]; + atts[nbatts++] = defaults->values[5 * i + 2]; + atts[nbatts++] = defaults->values[5 * i + 3]; + if ((ctxt->standalone == 1) && + (defaults->values[5 * i + 4] != NULL)) { + xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED, + "standalone: attribute %s on %s defaulted from external subset\n", + attname, localname); + } nbdef++; } } @@ -8846,10 +9316,11 @@ xmlParseElement(xmlParserCtxtPtr ctxt) { xmlNodePtr ret; int nsNr = ctxt->nsNr; - if ((unsigned int) ctxt->nameNr > xmlParserMaxDepth) { - xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR, - "Excessive depth in document: change xmlParserMaxDepth = %d\n", - xmlParserMaxDepth); + if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) { + xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR, + "Excessive depth in document: %d use XML_PARSE_HUGE option\n", + xmlParserMaxDepth); ctxt->instate = XML_PARSER_EOF; return; } @@ -9003,7 +9474,9 @@ xmlParseElement(xmlParserCtxtPtr ctxt) { * * parse the XML version value. * - * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+ + * [26] VersionNum ::= '1.' [0-9]+ + * + * In practice allow [0-9].[0-9]+ at that level * * Returns the string giving the XML version number, or NULL */ @@ -9020,11 +9493,21 @@ xmlParseVersionNum(xmlParserCtxtPtr ctxt) { return(NULL); } cur = CUR; - while (((cur >= 'a') && (cur <= 'z')) || - ((cur >= 'A') && (cur <= 'Z')) || - ((cur >= '0') && (cur <= '9')) || - (cur == '_') || (cur == '.') || - (cur == ':') || (cur == '-')) { + if (!((cur >= '0') && (cur <= '9'))) { + xmlFree(buf); + return(NULL); + } + buf[len++] = cur; + NEXT; + cur=CUR; + if (cur != '.') { + xmlFree(buf); + return(NULL); + } + buf[len++] = cur; + NEXT; + cur=CUR; + while ((cur >= '0') && (cur <= '9')) { if (len + 1 >= size) { xmlChar *tmp; @@ -9117,7 +9600,7 @@ xmlParseEncName(xmlParserCtxtPtr ctxt) { xmlErrMemory(ctxt, NULL); return(NULL); } - + buf[len++] = cur; NEXT; cur = CUR; @@ -9205,6 +9688,18 @@ xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) { if ((encoding != NULL) && ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) || (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) { + /* + * If no encoding was passed to the parser, that we are + * using UTF-16 and no decoder is present i.e. the + * document is apparently UTF-8 compatible, then raise an + * encoding mismatch fatal error + */ + if ((ctxt->encoding == NULL) && + (ctxt->input->buf != NULL) && + (ctxt->input->buf->encoder == NULL)) { + xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING, + "Document labelled UTF-16 but has UTF-8 content\n"); + } if (ctxt->encoding != NULL) xmlFree((xmlChar *) ctxt->encoding); ctxt->encoding = encoding; @@ -9363,11 +9858,23 @@ xmlParseXMLDecl(xmlParserCtxtPtr ctxt) { } else { if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) { /* - * TODO: Blueberry should be detected here + * Changed here for XML-1.0 5th edition */ - xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION, - "Unsupported version '%s'\n", - version, NULL); + if (ctxt->options & XML_PARSE_OLD10) { + xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION, + "Unsupported version '%s'\n", + version); + } else { + if ((version[0] == '1') && ((version[1] == '.'))) { + xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION, + "Unsupported version '%s'\n", + version, NULL); + } else { + xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION, + "Unsupported version '%s'\n", + version); + } + } } if (ctxt->version != NULL) xmlFree((void *) ctxt->version); @@ -9601,6 +10108,15 @@ xmlParseDocument(xmlParserCtxtPtr ctxt) { ctxt->myDoc = NULL; } + if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) { + ctxt->myDoc->properties |= XML_DOC_WELLFORMED; + if (ctxt->valid) + ctxt->myDoc->properties |= XML_DOC_DTDVALID; + if (ctxt->nsWellFormed) + ctxt->myDoc->properties |= XML_DOC_NSVALID; + if (ctxt->options & XML_PARSE_OLD10) + ctxt->myDoc->properties |= XML_DOC_OLD10; + } if (! ctxt->wellFormed) { ctxt->valid = 0; return(-1); @@ -11245,7 +11761,11 @@ xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input, /* * plug some encoding conversion routines here. */ - xmlPushInput(ctxt, pinput); + if (xmlPushInput(ctxt, pinput) < 0) { + if (sax != NULL) ctxt->sax = NULL; + xmlFreeParserCtxt(ctxt); + return(NULL); + } if (enc != XML_CHAR_ENCODING_NONE) { xmlSwitchEncoding(ctxt, enc); } @@ -11262,6 +11782,11 @@ xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input, */ ctxt->inSubset = 2; ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); + if (ctxt->myDoc == NULL) { + xmlErrMemory(ctxt, "New Doc failed"); + return(NULL); + } + ctxt->myDoc->properties = XML_DOC_INTERNAL; ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none", BAD_CAST "none", BAD_CAST "none"); @@ -11374,7 +11899,13 @@ xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID, /* * plug some encoding conversion routines here. */ - xmlPushInput(ctxt, input); + if (xmlPushInput(ctxt, input) < 0) { + if (sax != NULL) ctxt->sax = NULL; + xmlFreeParserCtxt(ctxt); + if (systemIdCanonic != NULL) + xmlFree(systemIdCanonic); + return(NULL); + } if ((ctxt->input->end - ctxt->input->cur) >= 4) { enc = xmlDetectCharEncoding(ctxt->input->cur, 4); xmlSwitchEncoding(ctxt, enc); @@ -11395,6 +11926,13 @@ xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID, */ ctxt->inSubset = 2; ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); + if (ctxt->myDoc == NULL) { + xmlErrMemory(ctxt, "New Doc failed"); + if (sax != NULL) ctxt->sax = NULL; + xmlFreeParserCtxt(ctxt); + return(NULL); + } + ctxt->myDoc->properties = XML_DOC_INTERNAL; ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none", ExternalID, SystemID); xmlParseExternalSubset(ctxt, ExternalID, SystemID); @@ -11421,7 +11959,7 @@ xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID, } if (sax != NULL) ctxt->sax = NULL; xmlFreeParserCtxt(ctxt); - + return(ret); } @@ -11432,7 +11970,7 @@ xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID, * @SystemID: a NAME* containing the URL to the DTD * * Load and parse an external subset. - * + * * Returns the resulting xmlDtdPtr or NULL in case of error. */ @@ -11480,7 +12018,8 @@ xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL, if (ctx == NULL) return(-1); - if (ctx->depth > 40) { + if (((ctx->depth > 40) && ((ctx->options & XML_PARSE_HUGE) == 0)) || + (ctx->depth > 1024)) { return(XML_ERR_ENTITY_LOOP); } @@ -11495,10 +12034,10 @@ xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL, if (ctxt == NULL) { return(-1); } - + ctxt->userData = ctxt; ctxt->_private = ctx->_private; - + inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt); if (inputStream == NULL) { xmlFreeParserCtxt(ctxt); @@ -11511,7 +12050,7 @@ xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL, directory = xmlParserGetDirectory((char *)URL); if ((ctxt->directory == NULL) && (directory != NULL)) ctxt->directory = directory; - + oldsax = ctxt->sax; ctxt->sax = ctx->sax; xmlDetectSAX2(ctxt); @@ -11520,6 +12059,7 @@ xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL, xmlFreeParserCtxt(ctxt); return(-1); } + newDoc->properties = XML_DOC_INTERNAL; if (ctx->myDoc->dict) { newDoc->dict = ctx->myDoc->dict; xmlDictReference(newDoc->dict); @@ -11549,7 +12089,7 @@ xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL, newDoc->children->doc = ctx->myDoc; } - /* + /* * Get the 4 first bytes and decode the charset * if enc != XML_CHAR_ENCODING_NONE * plug some encoding conversion routines. @@ -11571,6 +12111,14 @@ xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL, */ if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { xmlParseTextDecl(ctxt); + /* + * An XML-1.0 document can't reference an entity not XML-1.0 + */ + if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) && + (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) { + xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH, + "Version mismatch between document and entity\n"); + } } /* @@ -11604,7 +12152,7 @@ xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL, ctxt->linenumbers = ctx->linenumbers; xmlParseContent(ctxt); - + ctx->validate = ctxt->validate; ctx->valid = ctxt->valid; if ((RAW == '<') && (NXT(1) == '/')) { @@ -11647,7 +12195,7 @@ xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL, newDoc->intSubset = NULL; newDoc->extSubset = NULL; xmlFreeDoc(newDoc); - + return(ret); } @@ -11681,12 +12229,12 @@ xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt, xmlChar start[4]; xmlCharEncoding enc; - if (depth > 40) { + if (((depth > 40) && + ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) || + (depth > 1024)) { return(XML_ERR_ENTITY_LOOP); } - - if (list != NULL) *list = NULL; if ((URL == NULL) && (ID == NULL)) @@ -11732,6 +12280,7 @@ xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt, xmlFreeParserCtxt(ctxt); return(XML_ERR_INTERNAL_ERROR); } + newDoc->properties = XML_DOC_INTERNAL; newDoc->intSubset = doc->intSubset; newDoc->extSubset = doc->extSubset; newDoc->dict = doc->dict; @@ -11758,7 +12307,7 @@ xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt, ctxt->myDoc = doc; newRoot->doc = doc; - /* + /* * Get the 4 first bytes and decode the charset * if enc != XML_CHAR_ENCODING_NONE * plug some encoding conversion routines. @@ -11786,7 +12335,7 @@ xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt, ctxt->depth = depth; xmlParseContent(ctxt); - + if ((RAW == '<') && (NXT(1) == '/')) { xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); } else if (RAW != 0) { @@ -11819,6 +12368,25 @@ xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt, } ret = XML_ERR_OK; } + + /* + * Record in the parent context the number of entities replacement + * done when parsing that reference. + */ + oldctxt->nbentities += ctxt->nbentities; + /* + * Also record the size of the entity parsed + */ + if (ctxt->input != NULL) { + oldctxt->sizeentities += ctxt->input->consumed; + oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base); + } + /* + * And record the last error if any + */ + if (ctxt->lastError.code != XML_ERR_OK) + xmlCopyError(&ctxt->lastError, &oldctxt->lastError); + if (sax != NULL) ctxt->sax = oldsax; oldctxt->node_seq.maximum = ctxt->node_seq.maximum; @@ -11831,7 +12399,7 @@ xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt, newDoc->intSubset = NULL; newDoc->extSubset = NULL; xmlFreeDoc(newDoc); - + return(ret); } @@ -11908,9 +12476,9 @@ xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax, * * Returns XML_ERR_OK if the chunk is well balanced, and the parser * error code otherwise - * + * * In case recover is set to 1, the nodelist will not be empty even if - * the parsed chunk is not well balanced. + * the parsed chunk is not well balanced. */ static xmlParserErrors xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt, @@ -11924,7 +12492,8 @@ xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt, int size; xmlParserErrors ret = XML_ERR_OK; - if (oldctxt->depth > 40) { + if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) || + (oldctxt->depth > 1024)) { return(XML_ERR_ENTITY_LOOP); } @@ -11953,7 +12522,7 @@ xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt, xmlDetectSAX2(ctxt); ctxt->replaceEntities = oldctxt->replaceEntities; ctxt->options = oldctxt->options; - + ctxt->_private = oldctxt->_private; if (oldctxt->myDoc == NULL) { newDoc = xmlNewDoc(BAD_CAST "1.0"); @@ -11963,6 +12532,7 @@ xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt, xmlFreeParserCtxt(ctxt); return(XML_ERR_INTERNAL_ERROR); } + newDoc->properties = XML_DOC_INTERNAL; newDoc->dict = ctxt->dict; xmlDictReference(newDoc->dict); ctxt->myDoc = newDoc; @@ -12018,7 +12588,7 @@ xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt, } else { ret = XML_ERR_OK; } - + if ((lst != NULL) && (ret == XML_ERR_OK)) { xmlNodePtr cur; @@ -12047,7 +12617,18 @@ xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt, ctxt->myDoc->children = content; ctxt->myDoc->last = last; } - + + /* + * Record in the parent context the number of entities replacement + * done when parsing that reference. + */ + oldctxt->nbentities += ctxt->nbentities; + /* + * Also record the last error if any + */ + if (ctxt->lastError.code != XML_ERR_OK) + xmlCopyError(&ctxt->lastError, &oldctxt->lastError); + ctxt->sax = oldsax; ctxt->dict = NULL; ctxt->attsDefault = NULL; @@ -12056,7 +12637,7 @@ xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt, if (newDoc != NULL) { xmlFreeDoc(newDoc); } - + return(ret); } @@ -12157,7 +12738,7 @@ xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen, } else options |= XML_PARSE_NODICT; - xmlCtxtUseOptions(ctxt, options); + xmlCtxtUseOptionsInternal(ctxt, options, NULL); xmlDetectSAX2(ctxt); ctxt->myDoc = doc; @@ -12189,7 +12770,7 @@ xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen, cur = cur->parent; } ctxt->instate = XML_PARSER_CONTENT; - } + } if ((ctxt->validate) || (ctxt->replaceEntities != 0)) { /* @@ -12224,12 +12805,12 @@ xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen, } else { ret = XML_ERR_OK; } - + /* * Return the newly created nodeset after unlinking it from * the pseudo sibling. */ - + cur = fake->next; fake->next = NULL; node->last = fake; @@ -12257,7 +12838,7 @@ xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen, if (doc->dict != NULL) ctxt->dict = NULL; xmlFreeParserCtxt(ctxt); - + return(ret); #else /* !SAX2 */ return(XML_ERR_INTERNAL_ERROR); @@ -12327,6 +12908,7 @@ xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax, xmlFreeParserCtxt(ctxt); return(-1); } + newDoc->properties = XML_DOC_INTERNAL; if ((doc != NULL) && (doc->dict != NULL)) { xmlDictFree(ctxt->dict); ctxt->dict = doc->dict; @@ -12336,7 +12918,7 @@ xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax, ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); ctxt->dictNames = 1; } else { - xmlCtxtUseOptions(ctxt, XML_PARSE_NODICT); + xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL); } if (doc != NULL) { newDoc->intSubset = doc->intSubset; @@ -12399,7 +12981,7 @@ xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax, } else { ret = 0; } - + if ((lst != NULL) && ((ret == 0) || (recover == 1))) { xmlNodePtr cur; @@ -12416,15 +12998,15 @@ xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax, } newDoc->children->children = NULL; } - - if (sax != NULL) + + if (sax != NULL) ctxt->sax = oldsax; xmlFreeParserCtxt(ctxt); newDoc->intSubset = NULL; newDoc->extSubset = NULL; newDoc->oldNs = NULL; xmlFreeDoc(newDoc); - + return(ret); } @@ -12472,7 +13054,7 @@ xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) { if (sax != NULL) ctxt->sax = NULL; xmlFreeParserCtxt(ctxt); - + return(ret); } @@ -12514,7 +13096,7 @@ xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID, xmlParserInputPtr inputStream; char *directory = NULL; xmlChar *uri; - + ctxt = xmlNewParserCtxt(); if (ctxt == NULL) { return(NULL); @@ -12556,7 +13138,7 @@ xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID, /************************************************************************ * * - * Front ends when parsing from a file * + * Front ends when parsing from a file * * * ************************************************************************/ @@ -12585,9 +13167,9 @@ xmlCreateURLParserCtxt(const char *filename, int options) } if (options) - xmlCtxtUseOptions(ctxt, options); + xmlCtxtUseOptionsInternal(ctxt, options, NULL); ctxt->linenumbers = 1; - + inputStream = xmlLoadExternalEntity(filename, NULL, ctxt); if (inputStream == NULL) { xmlFreeParserCtxt(ctxt); @@ -13363,6 +13945,8 @@ xmlCtxtReset(xmlParserCtxtPtr ctxt) ctxt->depth = 0; ctxt->charset = XML_CHAR_ENCODING_UTF8; ctxt->catalogs = NULL; + ctxt->nbentities = 0; + ctxt->sizeentities = 0; xmlInitNodeInfoSeq(&ctxt->node_seq); if (ctxt->attsDefault != NULL) { @@ -13474,6 +14058,10 @@ xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk, if (encoding != NULL) { xmlCharEncodingHandlerPtr hdlr; + if (ctxt->encoding != NULL) + xmlFree((xmlChar *) ctxt->encoding); + ctxt->encoding = xmlStrdup((const xmlChar *) encoding); + hdlr = xmlFindCharEncodingHandler(encoding); if (hdlr != NULL) { xmlSwitchToEncoding(ctxt, hdlr); @@ -13488,50 +14076,63 @@ xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk, return(0); } + /** - * xmlCtxtUseOptions: + * xmlCtxtUseOptionsInternal: * @ctxt: an XML parser context * @options: a combination of xmlParserOption + * @encoding: the user provided encoding to use * * Applies the options to the parser context * * Returns 0 in case of success, the set of unknown or unimplemented options * in case of error. */ -int -xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options) +static int +xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding) { if (ctxt == NULL) return(-1); + if (encoding != NULL) { + if (ctxt->encoding != NULL) + xmlFree((xmlChar *) ctxt->encoding); + ctxt->encoding = xmlStrdup((const xmlChar *) encoding); + } if (options & XML_PARSE_RECOVER) { ctxt->recovery = 1; options -= XML_PARSE_RECOVER; + ctxt->options |= XML_PARSE_RECOVER; } else ctxt->recovery = 0; if (options & XML_PARSE_DTDLOAD) { ctxt->loadsubset = XML_DETECT_IDS; options -= XML_PARSE_DTDLOAD; + ctxt->options |= XML_PARSE_DTDLOAD; } else ctxt->loadsubset = 0; if (options & XML_PARSE_DTDATTR) { ctxt->loadsubset |= XML_COMPLETE_ATTRS; options -= XML_PARSE_DTDATTR; + ctxt->options |= XML_PARSE_DTDATTR; } if (options & XML_PARSE_NOENT) { ctxt->replaceEntities = 1; /* ctxt->loadsubset |= XML_DETECT_IDS; */ options -= XML_PARSE_NOENT; + ctxt->options |= XML_PARSE_NOENT; } else ctxt->replaceEntities = 0; if (options & XML_PARSE_PEDANTIC) { ctxt->pedantic = 1; options -= XML_PARSE_PEDANTIC; + ctxt->options |= XML_PARSE_PEDANTIC; } else ctxt->pedantic = 0; if (options & XML_PARSE_NOBLANKS) { ctxt->keepBlanks = 0; ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace; options -= XML_PARSE_NOBLANKS; + ctxt->options |= XML_PARSE_NOBLANKS; } else ctxt->keepBlanks = 1; if (options & XML_PARSE_DTDVALID) { @@ -13541,6 +14142,7 @@ xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options) if (options & XML_PARSE_NOERROR) ctxt->vctxt.error = NULL; options -= XML_PARSE_DTDVALID; + ctxt->options |= XML_PARSE_DTDVALID; } else ctxt->validate = 0; if (options & XML_PARSE_NOWARNING) { @@ -13560,17 +14162,20 @@ xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options) ctxt->sax->endElementNs = NULL; ctxt->sax->initialized = 1; options -= XML_PARSE_SAX1; + ctxt->options |= XML_PARSE_SAX1; } #endif /* LIBXML_SAX1_ENABLED */ if (options & XML_PARSE_NODICT) { ctxt->dictNames = 0; options -= XML_PARSE_NODICT; + ctxt->options |= XML_PARSE_NODICT; } else { ctxt->dictNames = 1; } if (options & XML_PARSE_NOCDATA) { ctxt->sax->cdataBlock = NULL; options -= XML_PARSE_NOCDATA; + ctxt->options |= XML_PARSE_NOCDATA; } if (options & XML_PARSE_NSCLEAN) { ctxt->options |= XML_PARSE_NSCLEAN; @@ -13584,11 +14189,39 @@ xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options) ctxt->options |= XML_PARSE_COMPACT; options -= XML_PARSE_COMPACT; } + if (options & XML_PARSE_OLD10) { + ctxt->options |= XML_PARSE_OLD10; + options -= XML_PARSE_OLD10; + } + if (options & XML_PARSE_NOBASEFIX) { + ctxt->options |= XML_PARSE_NOBASEFIX; + options -= XML_PARSE_NOBASEFIX; + } + if (options & XML_PARSE_HUGE) { + ctxt->options |= XML_PARSE_HUGE; + options -= XML_PARSE_HUGE; + } ctxt->linenumbers = 1; return (options); } /** + * xmlCtxtUseOptions: + * @ctxt: an XML parser context + * @options: a combination of xmlParserOption + * + * Applies the options to the parser context + * + * Returns 0 in case of success, the set of unknown or unimplemented options + * in case of error. + */ +int +xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options) +{ + return(xmlCtxtUseOptionsInternal(ctxt, options, NULL)); +} + +/** * xmlDoRead: * @ctxt: an XML parser context * @URL: the base URL to use for the document @@ -13597,7 +14230,7 @@ xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options) * @reuse: keep the context for reuse * * Common front-end for the xmlRead functions - * + * * Returns the resulting document tree or NULL */ static xmlDocPtr @@ -13605,8 +14238,8 @@ xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding, int options, int reuse) { xmlDocPtr ret; - - xmlCtxtUseOptions(ctxt, options); + + xmlCtxtUseOptionsInternal(ctxt, options, encoding); if (encoding != NULL) { xmlCharEncodingHandlerPtr hdlr; |