diff options
Diffstat (limited to 'parser.c')
| -rw-r--r-- | parser.c | 1205 |
1 files changed, 805 insertions, 400 deletions
@@ -17,7 +17,7 @@ * parserInternals.c to reduce this file size. * As much as possible the functions are associated with their relative * production in the XML specification. A few productions defining the - * different ranges of character are actually implanted either in + * different ranges of character are actually implanted either in * parserInternals.h or parserInternals.c * The DOM tree build is realized from the default SAX callbacks in * the module SAX.c. @@ -40,6 +40,7 @@ #endif #include <stdlib.h> +#include <limits.h> #include <string.h> #include <stdarg.h> #include <libxml/xmlmemory.h> @@ -83,6 +84,9 @@ #include <lzma.h> #endif +#include "buf.h" +#include "enc.h" + static void xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info); @@ -117,10 +121,10 @@ xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID, * parser option. */ static int -xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long size, +xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size, xmlEntityPtr ent) { - unsigned long consumed = 0; + size_t consumed = 0; if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE)) return (0); @@ -194,6 +198,17 @@ unsigned int xmlParserMaxDepth = 256; #define XML_PARSER_BUFFER_SIZE 100 #define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document" +/** + * XML_PARSER_CHUNK_SIZE + * + * When calling GROW that's the minimal amount of data + * the parser expected to have received. It is not a hard + * limit but an optimization when reading strings like Names + * It is not strictly needed as long as inputs available characters + * are followed by 0, which should be provided by the I/O level + */ +#define XML_PARSER_CHUNK_SIZE 100 + /* * List of XML prefixed PI allowed by W3C specs */ @@ -233,7 +248,7 @@ xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity); /************************************************************************ * * - * Some factorized error routines * + * Some factorized error routines * * * ************************************************************************/ @@ -285,193 +300,201 @@ static void xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info) { const char *errmsg; + char errstr[129] = ""; if ((ctxt != NULL) && (ctxt->disableSAX != 0) && (ctxt->instate == XML_PARSER_EOF)) return; switch (error) { case XML_ERR_INVALID_HEX_CHARREF: - errmsg = "CharRef: invalid hexadecimal value\n"; + errmsg = "CharRef: invalid hexadecimal value"; break; case XML_ERR_INVALID_DEC_CHARREF: - errmsg = "CharRef: invalid decimal value\n"; + errmsg = "CharRef: invalid decimal value"; break; case XML_ERR_INVALID_CHARREF: - errmsg = "CharRef: invalid value\n"; + errmsg = "CharRef: invalid value"; break; case XML_ERR_INTERNAL_ERROR: errmsg = "internal error"; break; case XML_ERR_PEREF_AT_EOF: - errmsg = "PEReference at end of document\n"; + errmsg = "PEReference at end of document"; break; case XML_ERR_PEREF_IN_PROLOG: - errmsg = "PEReference in prolog\n"; + errmsg = "PEReference in prolog"; break; case XML_ERR_PEREF_IN_EPILOG: - errmsg = "PEReference in epilog\n"; + errmsg = "PEReference in epilog"; break; case XML_ERR_PEREF_NO_NAME: - errmsg = "PEReference: no name\n"; + errmsg = "PEReference: no name"; break; case XML_ERR_PEREF_SEMICOL_MISSING: - errmsg = "PEReference: expecting ';'\n"; + errmsg = "PEReference: expecting ';'"; break; case XML_ERR_ENTITY_LOOP: - errmsg = "Detected an entity reference loop\n"; + errmsg = "Detected an entity reference loop"; break; case XML_ERR_ENTITY_NOT_STARTED: - errmsg = "EntityValue: \" or ' expected\n"; + errmsg = "EntityValue: \" or ' expected"; break; case XML_ERR_ENTITY_PE_INTERNAL: - errmsg = "PEReferences forbidden in internal subset\n"; + errmsg = "PEReferences forbidden in internal subset"; break; case XML_ERR_ENTITY_NOT_FINISHED: - errmsg = "EntityValue: \" or ' expected\n"; + errmsg = "EntityValue: \" or ' expected"; break; case XML_ERR_ATTRIBUTE_NOT_STARTED: - errmsg = "AttValue: \" or ' expected\n"; + errmsg = "AttValue: \" or ' expected"; break; case XML_ERR_LT_IN_ATTRIBUTE: - errmsg = "Unescaped '<' not allowed in attributes values\n"; + errmsg = "Unescaped '<' not allowed in attributes values"; break; case XML_ERR_LITERAL_NOT_STARTED: - errmsg = "SystemLiteral \" or ' expected\n"; + errmsg = "SystemLiteral \" or ' expected"; break; case XML_ERR_LITERAL_NOT_FINISHED: - errmsg = "Unfinished System or Public ID \" or ' expected\n"; + errmsg = "Unfinished System or Public ID \" or ' expected"; break; case XML_ERR_MISPLACED_CDATA_END: - errmsg = "Sequence ']]>' not allowed in content\n"; + errmsg = "Sequence ']]>' not allowed in content"; break; case XML_ERR_URI_REQUIRED: - errmsg = "SYSTEM or PUBLIC, the URI is missing\n"; + errmsg = "SYSTEM or PUBLIC, the URI is missing"; break; case XML_ERR_PUBID_REQUIRED: - errmsg = "PUBLIC, the Public Identifier is missing\n"; + errmsg = "PUBLIC, the Public Identifier is missing"; break; case XML_ERR_HYPHEN_IN_COMMENT: - errmsg = "Comment must not contain '--' (double-hyphen)\n"; + errmsg = "Comment must not contain '--' (double-hyphen)"; break; case XML_ERR_PI_NOT_STARTED: - errmsg = "xmlParsePI : no target name\n"; + errmsg = "xmlParsePI : no target name"; break; case XML_ERR_RESERVED_XML_NAME: - errmsg = "Invalid PI name\n"; + errmsg = "Invalid PI name"; break; case XML_ERR_NOTATION_NOT_STARTED: - errmsg = "NOTATION: Name expected here\n"; + errmsg = "NOTATION: Name expected here"; break; case XML_ERR_NOTATION_NOT_FINISHED: - errmsg = "'>' required to close NOTATION declaration\n"; + errmsg = "'>' required to close NOTATION declaration"; break; case XML_ERR_VALUE_REQUIRED: - errmsg = "Entity value required\n"; + errmsg = "Entity value required"; break; case XML_ERR_URI_FRAGMENT: errmsg = "Fragment not allowed"; break; case XML_ERR_ATTLIST_NOT_STARTED: - errmsg = "'(' required to start ATTLIST enumeration\n"; + errmsg = "'(' required to start ATTLIST enumeration"; break; case XML_ERR_NMTOKEN_REQUIRED: - errmsg = "NmToken expected in ATTLIST enumeration\n"; + errmsg = "NmToken expected in ATTLIST enumeration"; break; case XML_ERR_ATTLIST_NOT_FINISHED: - errmsg = "')' required to finish ATTLIST enumeration\n"; + errmsg = "')' required to finish ATTLIST enumeration"; break; case XML_ERR_MIXED_NOT_STARTED: - errmsg = "MixedContentDecl : '|' or ')*' expected\n"; + errmsg = "MixedContentDecl : '|' or ')*' expected"; break; case XML_ERR_PCDATA_REQUIRED: - errmsg = "MixedContentDecl : '#PCDATA' expected\n"; + errmsg = "MixedContentDecl : '#PCDATA' expected"; break; case XML_ERR_ELEMCONTENT_NOT_STARTED: - errmsg = "ContentDecl : Name or '(' expected\n"; + errmsg = "ContentDecl : Name or '(' expected"; break; case XML_ERR_ELEMCONTENT_NOT_FINISHED: - errmsg = "ContentDecl : ',' '|' or ')' expected\n"; + errmsg = "ContentDecl : ',' '|' or ')' expected"; break; case XML_ERR_PEREF_IN_INT_SUBSET: errmsg = - "PEReference: forbidden within markup decl in internal subset\n"; + "PEReference: forbidden within markup decl in internal subset"; break; case XML_ERR_GT_REQUIRED: - errmsg = "expected '>'\n"; + errmsg = "expected '>'"; break; case XML_ERR_CONDSEC_INVALID: - errmsg = "XML conditional section '[' expected\n"; + errmsg = "XML conditional section '[' expected"; break; case XML_ERR_EXT_SUBSET_NOT_FINISHED: - errmsg = "Content error in the external subset\n"; + errmsg = "Content error in the external subset"; break; case XML_ERR_CONDSEC_INVALID_KEYWORD: errmsg = - "conditional section INCLUDE or IGNORE keyword expected\n"; + "conditional section INCLUDE or IGNORE keyword expected"; break; case XML_ERR_CONDSEC_NOT_FINISHED: - errmsg = "XML conditional section not closed\n"; + errmsg = "XML conditional section not closed"; break; case XML_ERR_XMLDECL_NOT_STARTED: - errmsg = "Text declaration '<?xml' required\n"; + errmsg = "Text declaration '<?xml' required"; break; case XML_ERR_XMLDECL_NOT_FINISHED: - errmsg = "parsing XML declaration: '?>' expected\n"; + errmsg = "parsing XML declaration: '?>' expected"; break; case XML_ERR_EXT_ENTITY_STANDALONE: - errmsg = "external parsed entities cannot be standalone\n"; + errmsg = "external parsed entities cannot be standalone"; break; case XML_ERR_ENTITYREF_SEMICOL_MISSING: - errmsg = "EntityRef: expecting ';'\n"; + errmsg = "EntityRef: expecting ';'"; break; case XML_ERR_DOCTYPE_NOT_FINISHED: - errmsg = "DOCTYPE improperly terminated\n"; + errmsg = "DOCTYPE improperly terminated"; break; case XML_ERR_LTSLASH_REQUIRED: - errmsg = "EndTag: '</' not found\n"; + errmsg = "EndTag: '</' not found"; break; case XML_ERR_EQUAL_REQUIRED: - errmsg = "expected '='\n"; + errmsg = "expected '='"; break; case XML_ERR_STRING_NOT_CLOSED: - errmsg = "String not closed expecting \" or '\n"; + errmsg = "String not closed expecting \" or '"; break; case XML_ERR_STRING_NOT_STARTED: - errmsg = "String not started expecting ' or \"\n"; + errmsg = "String not started expecting ' or \""; break; case XML_ERR_ENCODING_NAME: - errmsg = "Invalid XML encoding name\n"; + errmsg = "Invalid XML encoding name"; break; case XML_ERR_STANDALONE_VALUE: - errmsg = "standalone accepts only 'yes' or 'no'\n"; + errmsg = "standalone accepts only 'yes' or 'no'"; break; case XML_ERR_DOCUMENT_EMPTY: - errmsg = "Document is empty\n"; + errmsg = "Document is empty"; break; case XML_ERR_DOCUMENT_END: - errmsg = "Extra content at the end of the document\n"; + errmsg = "Extra content at the end of the document"; break; case XML_ERR_NOT_WELL_BALANCED: - errmsg = "chunk is not well balanced\n"; + errmsg = "chunk is not well balanced"; break; case XML_ERR_EXTRA_CONTENT: - errmsg = "extra content at the end of well balanced chunk\n"; + errmsg = "extra content at the end of well balanced chunk"; break; case XML_ERR_VERSION_MISSING: - errmsg = "Malformed declaration expecting version\n"; + errmsg = "Malformed declaration expecting version"; + break; + case XML_ERR_NAME_TOO_LONG: + errmsg = "Name too long use XML_PARSE_HUGE option"; break; #if 0 case: - errmsg = "\n"; + errmsg = ""; break; #endif default: - errmsg = "Unregistered error message\n"; + errmsg = "Unregistered error message"; } + if (info == NULL) + snprintf(errstr, 128, "%s\n", errmsg); + else + snprintf(errstr, 128, "%s: %%s\n", errmsg); if (ctxt != NULL) ctxt->errNo = error; __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error, - XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg, + XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, &errstr[0], info); if (ctxt != NULL) { ctxt->wellFormed = 0; @@ -626,7 +649,7 @@ xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error, */ static void xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error, - const char *msg, const xmlChar *str1, int val, + const char *msg, const xmlChar *str1, int val, const xmlChar *str2) { if ((ctxt != NULL) && (ctxt->disableSAX != 0) && @@ -754,7 +777,7 @@ xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error, /************************************************************************ * * - * Library wide options * + * Library wide options * * * ************************************************************************/ @@ -978,7 +1001,7 @@ xmlHasFeature(xmlFeature feature) /************************************************************************ * * - * SAX2 defaulted attributes handling * + * SAX2 defaulted attributes handling * * * ************************************************************************/ @@ -1002,8 +1025,8 @@ xmlDetectSAX2(xmlParserCtxtPtr ctxt) { ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); - if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) || - (ctxt->str_xml_ns == NULL)) { + if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) || + (ctxt->str_xml_ns == NULL)) { xmlErrMemory(ctxt, NULL); } } @@ -1922,7 +1945,7 @@ static int spacePop(xmlParserCtxtPtr ctxt) { * to compare on ASCII based substring. * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined * strings without newlines within the parser. - * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII + * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII * defined char within the parser. * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding * @@ -1971,10 +1994,10 @@ static int spacePop(xmlParserCtxtPtr ctxt) { #define SKIPL(val) do { \ int skipl; \ for(skipl=0; skipl<val; skipl++) { \ - if (*(ctxt->input->cur) == '\n') { \ + if (*(ctxt->input->cur) == '\n') { \ ctxt->input->line++; ctxt->input->col = 1; \ - } else ctxt->input->col++; \ - ctxt->nbChars++; \ + } else ctxt->input->col++; \ + ctxt->nbChars++; \ ctxt->input->cur++; \ } \ if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ @@ -2000,6 +2023,12 @@ static void xmlSHRINK (xmlParserCtxtPtr ctxt) { xmlGROW (ctxt); static void xmlGROW (xmlParserCtxtPtr ctxt) { + if ((((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) || + ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) { + xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup"); + ctxt->instate = XML_PARSER_EOF; + } xmlParserInputGrow(ctxt->input, INPUT_CHUNK); if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0) && (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) @@ -2144,6 +2173,8 @@ xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) { "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur); } ret = inputPush(ctxt, input); + if (ctxt->instate == XML_PARSER_EOF) + return(-1); GROW; return(ret); } @@ -2159,7 +2190,7 @@ xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) { * * [ WFC: Legal Character ] * Characters referred to using character references must match the - * production for Char. + * production for Char. * * Returns the value parsed (as an int), 0 in case of error */ @@ -2180,8 +2211,10 @@ xmlParseCharRef(xmlParserCtxtPtr ctxt) { if (count++ > 20) { count = 0; GROW; + if (ctxt->instate == XML_PARSER_EOF) + return(0); } - if ((RAW >= '0') && (RAW <= '9')) + if ((RAW >= '0') && (RAW <= '9')) val = val * 16 + (CUR - '0'); else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20)) val = val * 16 + (CUR - 'a') + 10; @@ -2211,8 +2244,10 @@ xmlParseCharRef(xmlParserCtxtPtr ctxt) { if (count++ > 20) { count = 0; GROW; + if (ctxt->instate == XML_PARSER_EOF) + return(0); } - if ((RAW >= '0') && (RAW <= '9')) + if ((RAW >= '0') && (RAW <= '9')) val = val * 10 + (CUR - '0'); else { xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL); @@ -2238,7 +2273,7 @@ xmlParseCharRef(xmlParserCtxtPtr ctxt) { /* * [ WFC: Legal Character ] * Characters referred to using character references must match the - * production for Char. + * production for Char. */ if ((IS_CHAR(val) && (outofrange == 0))) { return(val); @@ -2263,7 +2298,7 @@ xmlParseCharRef(xmlParserCtxtPtr ctxt) { * * [ WFC: Legal Character ] * Characters referred to using character references must match the - * production for Char. + * production for Char. * * Returns the value parsed (as an int), 0 in case of error, str will be * updated to the current value of the index @@ -2282,7 +2317,7 @@ xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) { ptr += 3; cur = *ptr; while (cur != ';') { /* Non input consuming loop */ - if ((cur >= '0') && (cur <= '9')) + if ((cur >= '0') && (cur <= '9')) val = val * 16 + (cur - '0'); else if ((cur >= 'a') && (cur <= 'f')) val = val * 16 + (cur - 'a') + 10; @@ -2305,7 +2340,7 @@ xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) { ptr += 2; cur = *ptr; while (cur != ';') { /* Non input consuming loops */ - if ((cur >= '0') && (cur <= '9')) + if ((cur >= '0') && (cur <= '9')) val = val * 10 + (cur - '0'); else { xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL); @@ -2329,7 +2364,7 @@ xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) { /* * [ WFC: Legal Character ] * Characters referred to using character references must match the - * production for Char. + * production for Char. */ if ((IS_CHAR(val) && (outofrange == 0))) { return(val); @@ -2351,9 +2386,9 @@ xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) { * * Returns the new input stream or NULL */ - + static void deallocblankswrapper (xmlChar *str) {xmlFree(str);} - + static xmlParserInputPtr xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { xmlParserInputPtr input; @@ -2376,7 +2411,7 @@ xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { if (buffer == NULL) { xmlErrMemory(ctxt, NULL); xmlFree(input); - return(NULL); + return(NULL); } buffer [0] = ' '; buffer [1] = '%'; @@ -2395,12 +2430,12 @@ xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { /** * xmlParserHandlePEReference: * @ctxt: the parser context - * + * * [69] PEReference ::= '%' Name ';' * * [ WFC: No Recursion ] * A parsed entity must not contain a recursive - * reference to itself, either directly or indirectly. + * reference to itself, either directly or indirectly. * * [ WFC: Entity Declared ] * In a document without any DTD, a document with only an internal DTD @@ -2418,9 +2453,9 @@ xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { * NOTE: misleading but this is handled. * * A PEReference may have been detected in the current input stream - * the handling is done accordingly to + * the handling is done accordingly to * http://www.w3.org/TR/REC-xml#entproc - * i.e. + * i.e. * - Included in literal in entity values * - Included as Parameter Entity reference within DTDs */ @@ -2498,7 +2533,7 @@ xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) { if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL)) entity = ctxt->sax->getParameterEntity(ctxt->userData, name); if (entity == NULL) { - + /* * [ WFC: Entity Declared ] * In a document without any DTD, a document with only an @@ -2524,7 +2559,7 @@ xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) { xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY, "PEReference: %%%s; not found\n", name, NULL); - } else + } else xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, "PEReference: %%%s; not found\n", name, NULL); @@ -2549,7 +2584,7 @@ xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) { if (xmlPushInput(ctxt, input) < 0) return; - /* + /* * Get the 4 first bytes and decode the charset * if enc != XML_CHAR_ENCODING_NONE * plug some encoding conversion routines. @@ -2559,6 +2594,8 @@ xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) { * the amount of data in the buffer. */ GROW + if (ctxt->instate == XML_PARSER_EOF) + return; if ((ctxt->input->end - ctxt->input->cur)>=4) { start[0] = RAW; start[1] = NXT(1); @@ -2589,15 +2626,17 @@ xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) { /* * Macro used to grow the current buffer. + * buffer##_size is expected to be a size_t + * mem_error: is expected to handle memory allocation failures */ #define growBuffer(buffer, n) { \ xmlChar *tmp; \ - buffer##_size *= 2; \ - buffer##_size += n; \ - tmp = (xmlChar *) \ - xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \ + size_t new_size = buffer##_size * 2 + n; \ + if (new_size < buffer##_size) goto mem_error; \ + tmp = (xmlChar *) xmlRealloc(buffer, new_size); \ if (tmp == NULL) goto mem_error; \ buffer = tmp; \ + buffer##_size = new_size; \ } /** @@ -2609,7 +2648,7 @@ xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) { * @end: an end marker xmlChar, 0 if none * @end2: an end marker xmlChar, 0 if none * @end3: an end marker xmlChar, 0 if none - * + * * Takes a entity string content and process to do the adequate substitutions. * * [67] Reference ::= EntityRef | CharRef @@ -2623,14 +2662,14 @@ xmlChar * xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, int what, xmlChar end, xmlChar end2, xmlChar end3) { xmlChar *buffer = NULL; - int buffer_size = 0; + size_t buffer_size = 0; + size_t nbchars = 0; xmlChar *current = NULL; xmlChar *rep = NULL; const xmlChar *last; xmlEntityPtr ent; int c,l; - int nbchars = 0; if ((ctxt == NULL) || (str == NULL) || (len < 0)) return(NULL); @@ -2647,7 +2686,7 @@ xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, * allocate a translation buffer. */ buffer_size = XML_PARSER_BIG_BUFFER_SIZE; - buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar)); + buffer = (xmlChar *) xmlMallocAtomic(buffer_size); if (buffer == NULL) goto mem_error; /* @@ -2667,7 +2706,7 @@ xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, if (val != 0) { COPY_BUF(0,buffer,nbchars,val); } - if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) { + if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { growBuffer(buffer, XML_PARSER_BUFFER_SIZE); } } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) { @@ -2685,7 +2724,7 @@ xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { if (ent->content != NULL) { COPY_BUF(0,buffer,nbchars,ent->content[0]); - if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) { + if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { growBuffer(buffer, XML_PARSER_BUFFER_SIZE); } } else { @@ -2702,8 +2741,7 @@ xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, current = rep; while (*current != 0) { /* non input consuming loop */ buffer[nbchars++] = *current++; - if (nbchars > - buffer_size - XML_PARSER_BUFFER_SIZE) { + if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { if (xmlParserEntityCheck(ctxt, nbchars, ent)) goto int_error; growBuffer(buffer, XML_PARSER_BUFFER_SIZE); @@ -2717,7 +2755,7 @@ xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, const xmlChar *cur = ent->name; buffer[nbchars++] = '&'; - if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) { + if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) { growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE); } for (;i > 0;i--) @@ -2745,8 +2783,7 @@ xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, current = rep; while (*current != 0) { /* non input consuming loop */ buffer[nbchars++] = *current++; - if (nbchars > - buffer_size - XML_PARSER_BUFFER_SIZE) { + if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { if (xmlParserEntityCheck(ctxt, nbchars, ent)) goto int_error; growBuffer(buffer, XML_PARSER_BUFFER_SIZE); @@ -2759,8 +2796,8 @@ xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, } else { COPY_BUF(l,buffer,nbchars,c); str += l; - if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) { - growBuffer(buffer, XML_PARSER_BUFFER_SIZE); + if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { + growBuffer(buffer, XML_PARSER_BUFFER_SIZE); } } if (str < last) @@ -2789,7 +2826,7 @@ int_error: * @end: an end marker xmlChar, 0 if none * @end2: an end marker xmlChar, 0 if none * @end3: an end marker xmlChar, 0 if none - * + * * Takes a entity string content and process to do the adequate substitutions. * * [67] Reference ::= EntityRef | CharRef @@ -3152,7 +3189,7 @@ xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) { } else { if ((IS_LETTER(c)) || (IS_DIGIT(c)) || (c == '.') || (c == '-') || - (c == '_') || (c == ':') || + (c == '_') || (c == ':') || (IS_COMBINING(c)) || (IS_EXTENDER(c))) return(1); @@ -3177,6 +3214,8 @@ xmlParseNameComplex(xmlParserCtxtPtr ctxt) { * Handler for more complex cases */ GROW; + if (ctxt->instate == XML_PARSER_EOF) + return(NULL); c = CUR_CHAR(l); if ((ctxt->options & XML_PARSE_OLD10) == 0) { /* @@ -3225,9 +3264,11 @@ xmlParseNameComplex(xmlParserCtxtPtr ctxt) { ((c >= 0xFDF0) && (c <= 0xFFFD)) || ((c >= 0x10000) && (c <= 0xEFFFF)) )) { - if (count++ > 100) { + if (count++ > XML_PARSER_CHUNK_SIZE) { count = 0; GROW; + if (ctxt->instate == XML_PARSER_EOF) + return(NULL); } len += l; NEXTL(l); @@ -3246,18 +3287,32 @@ xmlParseNameComplex(xmlParserCtxtPtr ctxt) { while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ ((IS_LETTER(c)) || (IS_DIGIT(c)) || (c == '.') || (c == '-') || - (c == '_') || (c == ':') || + (c == '_') || (c == ':') || (IS_COMBINING(c)) || (IS_EXTENDER(c)))) { - if (count++ > 100) { + if (count++ > XML_PARSER_CHUNK_SIZE) { count = 0; GROW; + if (ctxt->instate == XML_PARSER_EOF) + return(NULL); } len += l; NEXTL(l); c = CUR_CHAR(l); + if (c == 0) { + count = 0; + GROW; + if (ctxt->instate == XML_PARSER_EOF) + return(NULL); + c = CUR_CHAR(l); + } } } + if ((len > XML_MAX_NAME_LENGTH) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) { + xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name"); + return(NULL); + } if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r')) return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len)); return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len)); @@ -3307,6 +3362,11 @@ xmlParseName(xmlParserCtxtPtr ctxt) { in++; if ((*in > 0) && (*in < 0x80)) { count = in - ctxt->input->cur; + if ((count > XML_MAX_NAME_LENGTH) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) { + xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name"); + return(NULL); + } ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count); ctxt->input->cur = in; ctxt->nbChars += count; @@ -3342,13 +3402,32 @@ xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) { while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ (xmlIsNameChar(ctxt, c) && (c != ':'))) { - if (count++ > 100) { + if (count++ > XML_PARSER_CHUNK_SIZE) { + if ((len > XML_MAX_NAME_LENGTH) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) { + xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); + return(NULL); + } count = 0; GROW; + if (ctxt->instate == XML_PARSER_EOF) + return(NULL); } len += l; NEXTL(l); c = CUR_CHAR(l); + if (c == 0) { + count = 0; + GROW; + if (ctxt->instate == XML_PARSER_EOF) + return(NULL); + c = CUR_CHAR(l); + } + } + if ((len > XML_MAX_NAME_LENGTH) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) { + xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); + return(NULL); } return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len)); } @@ -3394,6 +3473,11 @@ xmlParseNCName(xmlParserCtxtPtr ctxt) { in++; if ((*in > 0) && (*in < 0x80)) { count = in - ctxt->input->cur; + if ((count > XML_MAX_NAME_LENGTH) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) { + xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); + return(NULL); + } ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count); ctxt->input->cur = in; ctxt->nbChars += count; @@ -3425,6 +3509,8 @@ xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) { const xmlChar *ret; GROW; + if (ctxt->instate == XML_PARSER_EOF) + return(NULL); in = ctxt->input->cur; while (*in != 0 && *in == *cmp) { @@ -3460,7 +3546,7 @@ xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) { * * [6] Names ::= Name (#x20 Name)* * - * Returns the Name parsed or NULL. The @str pointer + * Returns the Name parsed or NULL. The @str pointer * is updated to the current location in the string. */ @@ -3504,6 +3590,13 @@ xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) { while (xmlIsNameChar(ctxt, c)) { if (len + 10 > max) { xmlChar *tmp; + + if ((len > XML_MAX_NAME_LENGTH) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) { + xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); + xmlFree(buffer); + return(NULL); + } max *= 2; tmp = (xmlChar *) xmlRealloc(buffer, max * sizeof(xmlChar)); @@ -3523,6 +3616,11 @@ xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) { return(buffer); } } + if ((len > XML_MAX_NAME_LENGTH) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) { + xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); + return(NULL); + } *str = cur; return(xmlStrndup(buf, len)); } @@ -3552,16 +3650,25 @@ xmlParseNmtoken(xmlParserCtxtPtr ctxt) { #endif GROW; + if (ctxt->instate == XML_PARSER_EOF) + return(NULL); c = CUR_CHAR(l); while (xmlIsNameChar(ctxt, c)) { - if (count++ > 100) { + if (count++ > XML_PARSER_CHUNK_SIZE) { count = 0; GROW; } COPY_BUF(l,buf,len,c); NEXTL(l); c = CUR_CHAR(l); + if (c == 0) { + count = 0; + GROW; + if (ctxt->instate == XML_PARSER_EOF) + return(NULL); + c = CUR_CHAR(l); + } if (len >= XML_MAX_NAMELEN) { /* * Okay someone managed to make a huge token, so he's ready to pay @@ -3577,13 +3684,23 @@ xmlParseNmtoken(xmlParserCtxtPtr ctxt) { } memcpy(buffer, buf, len); while (xmlIsNameChar(ctxt, c)) { - if (count++ > 100) { + if (count++ > XML_PARSER_CHUNK_SIZE) { count = 0; GROW; + if (ctxt->instate == XML_PARSER_EOF) { + xmlFree(buffer); + return(NULL); + } } if (len + 10 > max) { xmlChar *tmp; + if ((max > XML_MAX_NAME_LENGTH) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) { + xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken"); + xmlFree(buffer); + return(NULL); + } max *= 2; tmp = (xmlChar *) xmlRealloc(buffer, max * sizeof(xmlChar)); @@ -3604,6 +3721,11 @@ xmlParseNmtoken(xmlParserCtxtPtr ctxt) { } if (len == 0) return(NULL); + if ((len > XML_MAX_NAME_LENGTH) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) { + xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken"); + return(NULL); + } return(xmlStrndup(buf, len)); } @@ -3650,6 +3772,10 @@ xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) { ctxt->instate = XML_PARSER_ENTITY_VALUE; input = ctxt->input; GROW; + if (ctxt->instate == XML_PARSER_EOF) { + xmlFree(buf); + return(NULL); + } NEXT; c = CUR_CHAR(l); /* @@ -3657,12 +3783,12 @@ xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) { * When a parameter entity reference appears in a literal entity * value, ... a single or double quote character in the replacement * text is always treated as a normal data character and will not - * terminate the literal. + * terminate the literal. * In practice it means we stop the loop only when back at parsing * the initial entity and the quote is found */ - while ((IS_CHAR(c)) && ((c != stop) || /* checked */ - (ctxt->input != input))) { + while (((IS_CHAR(c)) && ((c != stop) || /* checked */ + (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) { if (len + 5 >= size) { xmlChar *tmp; @@ -3691,6 +3817,10 @@ xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) { } } buf[len] = 0; + if (ctxt->instate == XML_PARSER_EOF) { + xmlFree(buf); + return(NULL); + } /* * Raise problem w.r.t. '&' and '%' being used in non-entities @@ -3738,12 +3868,12 @@ xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) { */ ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF, 0, 0, 0); - if (orig != NULL) + if (orig != NULL) *orig = buf; else xmlFree(buf); } - + return(ret); } @@ -3764,8 +3894,8 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) { xmlChar limit = 0; xmlChar *buf = NULL; xmlChar *rep = NULL; - int len = 0; - int buf_size = 0; + size_t len = 0; + size_t buf_size = 0; int c, l, in_space = 0; xmlChar *current = NULL; xmlEntityPtr ent; @@ -3787,15 +3917,26 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) { * allocate a translation buffer. */ buf_size = XML_PARSER_BUFFER_SIZE; - buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar)); + buf = (xmlChar *) xmlMallocAtomic(buf_size); if (buf == NULL) goto mem_error; /* * OK loop until we reach one of the ending char or a size limit. */ c = CUR_CHAR(l); - while ((NXT(0) != limit) && /* checked */ - (IS_CHAR(c)) && (c != '<')) { + while (((NXT(0) != limit) && /* checked */ + (IS_CHAR(c)) && (c != '<')) && + (ctxt->instate != XML_PARSER_EOF)) { + /* + * Impose a reasonable limit on attribute size, unless XML_PARSE_HUGE + * special option is given + */ + if ((len > XML_MAX_TEXT_LENGTH) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) { + xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, + "AttValue lenght too long\n"); + goto mem_error; + } if (c == 0) break; if (c == '&') { in_space = 0; @@ -3804,7 +3945,7 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) { if (val == '&') { if (ctxt->replaceEntities) { - if (len > buf_size - 10) { + if (len + 10 > buf_size) { growBuffer(buf, 10); } buf[len++] = '&'; @@ -3813,7 +3954,7 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) { * The reparsing will be done in xmlStringGetNodeList() * called by the attribute() function in SAX.c */ - if (len > buf_size - 10) { + if (len + 10 > buf_size) { growBuffer(buf, 10); } buf[len++] = '&'; @@ -3823,7 +3964,7 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) { buf[len++] = ';'; } } else if (val != 0) { - if (len > buf_size - 10) { + if (len + 10 > buf_size) { growBuffer(buf, 10); } len += xmlCopyChar(0, &buf[len], val); @@ -3835,7 +3976,7 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) { ctxt->nbentities += ent->owner; if ((ent != NULL) && (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { - if (len > buf_size - 10) { + if (len + 10 > buf_size) { growBuffer(buf, 10); } if ((ctxt->replaceEntities == 0) && @@ -3848,7 +3989,7 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) { } else { buf[len++] = ent->content[0]; } - } else if ((ent != NULL) && + } else if ((ent != NULL) && (ctxt->replaceEntities != 0)) { if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) { rep = xmlStringDecodeEntities(ctxt, ent->content, @@ -3863,7 +4004,7 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) { current++; } else buf[len++] = *current++; - if (len > buf_size - 10) { + if (len + 10 > buf_size) { growBuffer(buf, 10); } } @@ -3871,7 +4012,7 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) { rep = NULL; } } else { - if (len > buf_size - 10) { + if (len + 10 > buf_size) { growBuffer(buf, 10); } if (ent->content != NULL) @@ -3899,7 +4040,7 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) { * Just output the reference */ buf[len++] = '&'; - while (len > buf_size - i - 10) { + while (len + i + 10 > buf_size) { growBuffer(buf, i + 10); } for (;i > 0;i--) @@ -3912,7 +4053,7 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) { if ((len != 0) || (!normalize)) { if ((!normalize) || (!in_space)) { COPY_BUF(l,buf,len,0x20); - while (len > buf_size - 10) { + while (len + 10 > buf_size) { growBuffer(buf, 10); } } @@ -3921,7 +4062,7 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) { } else { in_space = 0; COPY_BUF(l,buf,len,c); - if (len > buf_size - 10) { + if (len + 10 > buf_size) { growBuffer(buf, 10); } } @@ -3930,6 +4071,9 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) { GROW; c = CUR_CHAR(l); } + if (ctxt->instate == XML_PARSER_EOF) + goto error; + if ((in_space) && (normalize)) { while (buf[len - 1] == 0x20) len--; } @@ -3946,11 +4090,23 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) { } } else NEXT; - if (attlen != NULL) *attlen = len; + + /* + * There we potentially risk an overflow, don't allow attribute value of + * lenght more than INT_MAX it is a very reasonnable assumption ! + */ + if (len >= INT_MAX) { + xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, + "AttValue lenght too long\n"); + goto mem_error; + } + + if (attlen != NULL) *attlen = (int) len; return(buf); mem_error: xmlErrMemory(ctxt, NULL); +error: if (buf != NULL) xmlFree(buf); if (rep != NULL) @@ -3971,20 +4127,20 @@ mem_error: * * 3.3.3 Attribute-Value Normalization: * Before the value of an attribute is passed to the application or - * checked for validity, the XML processor must normalize it as follows: + * checked for validity, the XML processor must normalize it as follows: * - a character reference is processed by appending the referenced * character to the attribute value * - an entity reference is processed by recursively processing the - * replacement text of the entity + * replacement text of the entity * - a whitespace character (#x20, #xD, #xA, #x9) is processed by * appending #x20 to the normalized value, except that only a single * #x20 is appended for a "#xD#xA" sequence that is part of an external - * parsed entity or the literal entity value of an internal parsed entity - * - other characters are processed by appending them to the normalized value + * parsed entity or the literal entity value of an internal parsed entity + * - other characters are processed by appending them to the normalized value * If the declared value is not CDATA, then the XML processor must further * process the normalized attribute value by discarding any leading and * trailing space (#x20) characters, and by replacing sequences of space - * (#x20) characters by a single space (#x20) character. + * (#x20) characters by a single space (#x20) character. * All attributes for which no declaration has been read should be treated * by a non-validating parser as if declared CDATA. * @@ -4001,7 +4157,7 @@ xmlParseAttValue(xmlParserCtxtPtr ctxt) { /** * xmlParseSystemLiteral: * @ctxt: an XML parser context - * + * * parse an XML Literal * * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") @@ -4030,7 +4186,7 @@ xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) { xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL); return(NULL); } - + buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); if (buf == NULL) { xmlErrMemory(ctxt, NULL); @@ -4042,6 +4198,13 @@ xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) { if (len + 5 >= size) { xmlChar *tmp; + if ((size > XML_MAX_NAME_LENGTH) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) { + xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral"); + xmlFree(buf); + ctxt->instate = (xmlParserInputState) state; + return(NULL); + } size *= 2; tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); if (tmp == NULL) { @@ -4056,6 +4219,10 @@ xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) { if (count > 50) { GROW; count = 0; + if (ctxt->instate == XML_PARSER_EOF) { + xmlFree(buf); + return(NULL); + } } COPY_BUF(l,buf,len,cur); NEXTL(l); @@ -4119,6 +4286,12 @@ xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) { if (len + 1 >= size) { xmlChar *tmp; + if ((size > XML_MAX_NAME_LENGTH) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) { + xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID"); + xmlFree(buf); + return(NULL); + } size *= 2; tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); if (tmp == NULL) { @@ -4133,6 +4306,10 @@ xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) { if (count > 50) { GROW; count = 0; + if (ctxt->instate == XML_PARSER_EOF) { + xmlFree(buf); + return(NULL); + } } NEXT; cur = CUR; @@ -4203,7 +4380,7 @@ static const unsigned char test_char_data[256] = { * The right angle bracket (>) may be represented using the string ">", * and must, for compatibility, be escaped using ">" or a character * reference when it appears in the string "]]>" in content, when that - * string is not marking the end of a CDATA section. + * string is not marking the end of a CDATA section. * * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) */ @@ -4339,6 +4516,8 @@ get_more: } SHRINK; GROW; + if (ctxt->instate == XML_PARSER_EOF) + return; in = ctxt->input->cur; } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09)); nbchar = 0; @@ -4368,7 +4547,7 @@ xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) { GROW; cur = CUR_CHAR(l); while ((cur != '<') && /* checked */ - (cur != '&') && + (cur != '&') && (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ { if ((cur == ']') && (NXT(1) == ']') && (NXT(2) == '>')) { @@ -4407,6 +4586,8 @@ xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) { if (count > 50) { GROW; count = 0; + if (ctxt->instate == XML_PARSER_EOF) + return; } NEXTL(l); cur = CUR_CHAR(l); @@ -4499,7 +4680,7 @@ xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) { } } else { /* - * We handle [83] so we return immediately, if + * We handle [83] so we return immediately, if * "S SystemLiteral" is not detected. From a purely parsing * point of view that's a nice mess. */ @@ -4508,7 +4689,7 @@ xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) { ptr = CUR_PTR; if (!IS_BLANK_CH(*ptr)) return(NULL); - + while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */ if ((*ptr != '\'') && (*ptr != '"')) return(NULL); } @@ -4536,11 +4717,12 @@ xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) { * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' */ static void -xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) { +xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, + size_t len, size_t size) { int q, ql; int r, rl; int cur, l; - int count = 0; + size_t count = 0; int inputid; inputid = ctxt->input->id; @@ -4586,16 +4768,26 @@ xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) { if ((r == '-') && (q == '-')) { xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL); } + if ((len > XML_MAX_TEXT_LENGTH) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) { + xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, + "Comment too big found", NULL); + xmlFree (buf); + return; + } if (len + 5 >= size) { xmlChar *new_buf; - size *= 2; - new_buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); + size_t new_size; + + new_size = size * 2; + new_buf = (xmlChar *) xmlRealloc(buf, new_size); if (new_buf == NULL) { xmlFree (buf); xmlErrMemory(ctxt, NULL); return; } buf = new_buf; + size = new_size; } COPY_BUF(ql,buf,len,q); q = r; @@ -4607,6 +4799,10 @@ xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) { if (count > 50) { GROW; count = 0; + if (ctxt->instate == XML_PARSER_EOF) { + xmlFree(buf); + return; + } } NEXTL(l); cur = CUR_CHAR(l); @@ -4656,11 +4852,12 @@ not_terminated: void xmlParseComment(xmlParserCtxtPtr ctxt) { xmlChar *buf = NULL; - int size = XML_PARSER_BUFFER_SIZE; - int len = 0; + size_t size = XML_PARSER_BUFFER_SIZE; + size_t len = 0; xmlParserInputState state; const xmlChar *in; - int nbchar = 0, ccol; + size_t nbchar = 0; + int ccol; int inputid; /* @@ -4740,6 +4937,13 @@ get_more: buf[len] = 0; } } + if ((len > XML_MAX_TEXT_LENGTH) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) { + xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, + "Comment too big found", NULL); + xmlFree (buf); + return; + } ctxt->input->cur = in; if (*in == 0xA) { in++; @@ -4757,6 +4961,10 @@ get_more: } SHRINK; GROW; + if (ctxt->instate == XML_PARSER_EOF) { + xmlFree(buf); + return; + } in = ctxt->input->cur; if (*in == '-') { if (in[1] == '-') { @@ -4803,7 +5011,7 @@ get_more: /** * xmlParsePITarget: * @ctxt: an XML parser context - * + * * parse the name of a PI * * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) @@ -4840,7 +5048,7 @@ xmlParsePITarget(xmlParserCtxtPtr ctxt) { NULL, NULL); } if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) { - xmlNsErr(ctxt, XML_NS_ERR_COLON, + xmlNsErr(ctxt, XML_NS_ERR_COLON, "colon are forbidden from PI names '%s'\n", name, NULL, NULL); } return(name); @@ -4851,7 +5059,7 @@ xmlParsePITarget(xmlParserCtxtPtr ctxt) { * xmlParseCatalogPI: * @ctxt: an XML parser context * @catalog: the PI value string - * + * * parse an XML Catalog Processing Instruction. * * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?> @@ -4911,7 +5119,7 @@ error: /** * xmlParsePI: * @ctxt: an XML parser context - * + * * parse an XML Processing Instruction. * * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' @@ -4922,8 +5130,8 @@ error: void xmlParsePI(xmlParserCtxtPtr ctxt) { xmlChar *buf = NULL; - int len = 0; - int size = XML_PARSER_BUFFER_SIZE; + size_t len = 0; + size_t size = XML_PARSER_BUFFER_SIZE; int cur, l; const xmlChar *target; xmlParserInputState state; @@ -4980,9 +5188,8 @@ xmlParsePI(xmlParserCtxtPtr ctxt) { ((cur != '?') || (NXT(1) != '>'))) { if (len + 5 >= size) { xmlChar *tmp; - - size *= 2; - tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); + size_t new_size = size * 2; + tmp = (xmlChar *) xmlRealloc(buf, new_size); if (tmp == NULL) { xmlErrMemory(ctxt, NULL); xmlFree(buf); @@ -4990,11 +5197,24 @@ xmlParsePI(xmlParserCtxtPtr ctxt) { return; } buf = tmp; + size = new_size; } count++; if (count > 50) { GROW; + if (ctxt->instate == XML_PARSER_EOF) { + xmlFree(buf); + return; + } count = 0; + if ((len > XML_MAX_TEXT_LENGTH) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) { + xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED, + "PI %s too big found", target); + xmlFree(buf); + ctxt->instate = state; + return; + } } COPY_BUF(l,buf,len,cur); NEXTL(l); @@ -5005,6 +5225,14 @@ xmlParsePI(xmlParserCtxtPtr ctxt) { cur = CUR_CHAR(l); } } + if ((len > XML_MAX_TEXT_LENGTH) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) { + xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED, + "PI %s too big found", target); + xmlFree(buf); + ctxt->instate = state; + return; + } buf[len] = 0; if (cur != '?') { xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED, @@ -5066,7 +5294,7 @@ xmlParseNotationDecl(xmlParserCtxtPtr ctxt) { const xmlChar *name; xmlChar *Pubid; xmlChar *Systemid; - + if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) { xmlParserInputPtr input = ctxt->input; SHRINK; @@ -5089,7 +5317,7 @@ xmlParseNotationDecl(xmlParserCtxtPtr ctxt) { return; } if (xmlStrchr(name, ':') != NULL) { - xmlNsErr(ctxt, XML_NS_ERR_COLON, + xmlNsErr(ctxt, XML_NS_ERR_COLON, "colon are forbidden from notation names '%s'\n", name, NULL, NULL); } @@ -5149,7 +5377,7 @@ xmlParseEntityDecl(xmlParserCtxtPtr ctxt) { int isParameter = 0; xmlChar *orig = NULL; int skipped; - + /* GROW; done in the caller */ if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) { xmlParserInputPtr input = ctxt->input; @@ -5178,7 +5406,7 @@ xmlParseEntityDecl(xmlParserCtxtPtr ctxt) { return; } if (xmlStrchr(name, ':') != NULL) { - xmlNsErr(ctxt, XML_NS_ERR_COLON, + xmlNsErr(ctxt, XML_NS_ERR_COLON, "colon are forbidden from entities names '%s'\n", name, NULL, NULL); } @@ -5406,13 +5634,13 @@ xmlParseEntityDecl(xmlParserCtxtPtr ctxt) { * * [ VC: Fixed Attribute Default ] * if an attribute has a default value declared with the #FIXED - * keyword, instances of that attribute must match the default value. + * keyword, instances of that attribute must match the default value. * * [ WFC: No < in Attribute Values ] * handled in xmlParseAttValue() * * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED - * or XML_ATTRIBUTE_FIXED. + * or XML_ATTRIBUTE_FIXED. */ int @@ -5461,7 +5689,7 @@ xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) { * * [ VC: Notation Attributes ] * Values of this type must match one of the notation names included - * in the declaration; all notation names in the declaration must be declared. + * in the declaration; all notation names in the declaration must be declared. * * Returns: the notation attribute tree built while parsing */ @@ -5661,15 +5889,15 @@ xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) { * [ VC: Entity Name ] * Values of type ENTITY must match the Name production, values * of type ENTITIES must match Names; each Entity Name must match the - * name of an unparsed entity declared in the DTD. + * name of an unparsed entity declared in the DTD. * * [ VC: Name Token ] * Values of type NMTOKEN must match the Nmtoken production; values - * of type NMTOKENS must match Nmtokens. + * of type NMTOKENS must match Nmtokens. * * Returns the attribute type */ -int +int xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) { SHRINK; if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) { @@ -5734,7 +5962,7 @@ xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) { } SKIP_BLANKS; GROW; - while (RAW != '>') { + while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) { const xmlChar *check = CUR_PTR; int type; int def; @@ -5812,7 +6040,7 @@ xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) { xmlFreeEnumeration(tree); if ((ctxt->sax2) && (defaultValue != NULL) && - (def != XML_ATTRIBUTE_IMPLIED) && + (def != XML_ATTRIBUTE_IMPLIED) && (def != XML_ATTRIBUTE_REQUIRED)) { xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue); } @@ -5841,7 +6069,7 @@ xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) { * * parse the declaration for a Mixed Element content * The leading '(' and spaces have been skipped in xmlParseElementContentDecl - * + * * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' | * '(' S? '#PCDATA' S? ')' * @@ -5849,7 +6077,7 @@ xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) { * * [ VC: No Duplicate Types ] * The same name must not appear more than once in a single - * mixed-content declaration. + * mixed-content declaration. * * returns: the list of the xmlElementContentPtr describing the element choices */ @@ -5883,7 +6111,7 @@ xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) { ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA); if (ret == NULL) return(NULL); } - while (RAW == '|') { + while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) { NEXT; if (elem == NULL) { ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR); @@ -5949,7 +6177,7 @@ xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) { * * parse the declaration for a Mixed Element content * The leading '(' and spaces have been skipped in xmlParseElementContentDecl - * + * * * [47] children ::= (choice | seq) ('?' | '*' | '+')? * @@ -5970,7 +6198,7 @@ xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) { * be empty, and neither the first nor last non-blank character of * the replacement text should be a connector (| or ,). * - * Returns the tree of xmlElementContentPtr describing the element + * Returns the tree of xmlElementContentPtr describing the element * hierarchy. */ static xmlElementContentPtr @@ -6027,7 +6255,7 @@ xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk, } SKIP_BLANKS; SHRINK; - while (RAW != ')') { + while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) { /* * Each loop we parse one separator and one element. */ @@ -6283,7 +6511,7 @@ xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) { * * parse the declaration for an Element content either Mixed or Children, * the cases EMPTY and ANY are handled directly in xmlParseElementDecl - * + * * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children * * returns: the type of element content XML_ELEMENT_TYPE_xxx @@ -6306,6 +6534,8 @@ xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name, } NEXT; GROW; + if (ctxt->instate == XML_PARSER_EOF) + return(-1); SKIP_BLANKS; if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) { tree = xmlParseElementMixedContentDecl(ctxt, inputid); @@ -6409,7 +6639,7 @@ xmlParseElementDecl(xmlParserCtxtPtr ctxt) { xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, "Element declaration doesn't start and stop in the same entity\n"); } - + NEXT; if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && (ctxt->sax->elementDecl != NULL)) { @@ -6421,7 +6651,7 @@ xmlParseElementDecl(xmlParserCtxtPtr ctxt) { /* * this is a trick: if xmlAddElementDecl is called, * instead of copying the full tree it is plugged directly - * if called from the parser. Avoid duplicating the + * if called from the parser. Avoid duplicating the * interfaces or change the API/ABI */ xmlFreeDocElementContent(ctxt->myDoc, content); @@ -6438,8 +6668,8 @@ xmlParseElementDecl(xmlParserCtxtPtr ctxt) { * xmlParseConditionalSections * @ctxt: an XML parser context * - * [61] conditionalSect ::= includeSect | ignoreSect - * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>' + * [61] conditionalSect ::= includeSect | ignoreSect + * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>' * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>' * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)* * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*) @@ -6473,8 +6703,8 @@ xmlParseConditionalSections(xmlParserCtxtPtr ctxt) { "Entering INCLUDE Conditional Section\n"); } - while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') || - (NXT(2) != '>'))) { + while (((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') || + (NXT(2) != '>'))) && (ctxt->instate != XML_PARSER_EOF)) { const xmlChar *check = CUR_PTR; unsigned int cons = ctxt->input->consumed; @@ -6542,7 +6772,8 @@ xmlParseConditionalSections(xmlParserCtxtPtr ctxt) { if (ctxt->recovery == 0) ctxt->disableSAX = 1; ctxt->instate = XML_PARSER_IGNORE; - while ((depth >= 0) && (RAW != 0)) { + while (((depth >= 0) && (RAW != 0)) && + (ctxt->instate != XML_PARSER_EOF)) { if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { depth++; SKIP(3); @@ -6590,7 +6821,7 @@ xmlParseConditionalSections(xmlParserCtxtPtr ctxt) { /** * xmlParseMarkupDecl: * @ctxt: an XML parser context - * + * * parse Markup declarations * * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl | @@ -6607,7 +6838,7 @@ xmlParseConditionalSections(xmlParserCtxtPtr ctxt) { * In the internal DTD subset, parameter-entity references can occur * only where markup declarations can occur, not within markup declarations. * (This does not apply to references that occur in external parameter - * entities or to the external subset.) + * entities or to the external subset.) */ void xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) { @@ -6736,7 +6967,7 @@ xmlParseTextDecl(xmlParserCtxtPtr ctxt) { * @ctxt: an XML parser context * @ExternalID: the external identifier * @SystemID: the system identifier (or URL) - * + * * parse Markup declarations from an external subset * * [30] extSubset ::= textDecl? extSubsetDecl @@ -6813,7 +7044,7 @@ xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID, break; } } - + if (RAW != 0) { xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL); } @@ -6915,8 +7146,15 @@ xmlParseReference(xmlParserCtxtPtr ctxt) { * The first reference to the entity trigger a parsing phase * where the ent->children is filled with the result from * the parsing. - */ - if (ent->checked == 0) { + * Note: external parsed entities will not be loaded, it is not + * required for a non-validating parser, unless the parsing option + * of validating, or substituting entities were given. Doing so is + * far more secure as the parser will only process data coming from + * the document entity by default. + */ + if ((ent->checked == 0) && + ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) || + (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) { unsigned long oldnbent = ctxt->nbentities; /* @@ -7116,8 +7354,6 @@ xmlParseReference(xmlParserCtxtPtr ctxt) { * Seems we are generating the DOM content, do * a simple tree copy for all references except the first * In the first occurrence list contains the replacement. - * progressive == 2 means we are operating on the Reader - * and since nodes are discarded we must copy all the time. */ if (((list == NULL) && (ent->owner == 0)) || (ctxt->parseMode == XML_PARSE_READER)) { @@ -7160,7 +7396,7 @@ xmlParseReference(xmlParserCtxtPtr ctxt) { if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) xmlAddEntityReference(ent, firstChild, nw); #endif /* LIBXML_LEGACY_ENABLED */ - } else if (list == NULL) { + } else if ((list == NULL) || (ctxt->inputNr > 0)) { xmlNodePtr nw = NULL, cur, next, last, firstChild = NULL; /* @@ -7260,6 +7496,8 @@ xmlParseEntityRef(xmlParserCtxtPtr ctxt) { xmlEntityPtr ent = NULL; GROW; + if (ctxt->instate == XML_PARSER_EOF) + return(NULL); if (RAW != '&') return(NULL); @@ -7297,7 +7535,7 @@ xmlParseEntityRef(xmlParserCtxtPtr ctxt) { if (ctxt->sax != NULL) { if (ctxt->sax->getEntity != NULL) ent = ctxt->sax->getEntity(ctxt->userData, name); - if ((ctxt->wellFormed == 1 ) && (ent == NULL) && + if ((ctxt->wellFormed == 1 ) && (ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX)) ent = xmlGetPredefinedEntity(name); if ((ctxt->wellFormed == 1 ) && (ent == NULL) && @@ -7368,7 +7606,7 @@ xmlParseEntityRef(xmlParserCtxtPtr ctxt) { * [ WFC: No < in Attribute Values ] * The replacement text of any entity referred to directly or * indirectly in an attribute value (other than "<") must - * not contain a <. + * not contain a <. */ else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && (ent != NULL) && (ent->content != NULL) && @@ -7397,7 +7635,7 @@ xmlParseEntityRef(xmlParserCtxtPtr ctxt) { /* * [ WFC: No Recursion ] * A parsed entity must not contain a recursive reference - * to itself, either directly or indirectly. + * to itself, either directly or indirectly. * Done somewhere else */ return(ent); @@ -7515,7 +7753,7 @@ xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) { * is not obligated to read and process their declarations; * for such documents, the rule that an entity must be * declared is a well-formedness constraint only if - * standalone='yes'. + * standalone='yes'. */ if (ent == NULL) { if ((ctxt->standalone == 1) || @@ -7606,7 +7844,7 @@ xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) { * * [ WFC: No Recursion ] * A parsed entity must not contain a recursive - * reference to itself, either directly or indirectly. + * reference to itself, either directly or indirectly. * * [ WFC: Entity Declared ] * In a document without any DTD, a document with only an internal DTD @@ -7787,12 +8025,25 @@ xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) && (IS_CHAR(c))) { xmlBufferAdd(buf, ctxt->input->cur, l); - if (count++ > 100) { + if (count++ > XML_PARSER_CHUNK_SIZE) { count = 0; GROW; + if (ctxt->instate == XML_PARSER_EOF) { + xmlBufferFree(buf); + return(-1); + } } NEXTL(l); c = CUR_CHAR(l); + if (c == 0) { + count = 0; + GROW; + if (ctxt->instate == XML_PARSER_EOF) { + xmlBufferFree(buf); + return(-1); + } + c = CUR_CHAR(l); + } } if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) { @@ -7932,12 +8183,12 @@ xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) { * * parse a DOCTYPE declaration * - * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? + * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? * ('[' (markupdecl | PEReference | S)* ']' S?)? '>' * * [ VC: Root Element Type ] * The Name in the document type declaration must match the element - * type of the root element. + * type of the root element. */ void @@ -8019,11 +8270,11 @@ xmlParseInternalSubset(xmlParserCtxtPtr ctxt) { ctxt->instate = XML_PARSER_DTD; NEXT; /* - * Parse the succession of Markup declarations and + * Parse the succession of Markup declarations and * PEReferences. * Subsequence (markupdecl | PEReference | S)* */ - while (RAW != ']') { + while ((RAW != ']') && (ctxt->instate != XML_PARSER_EOF)) { const xmlChar *check = CUR_PTR; unsigned int cons = ctxt->input->consumed; @@ -8043,7 +8294,7 @@ xmlParseInternalSubset(xmlParserCtxtPtr ctxt) { break; } } - if (RAW == ']') { + if (RAW == ']') { NEXT; SKIP_BLANKS; } @@ -8074,8 +8325,8 @@ xmlParseInternalSubset(xmlParserCtxtPtr ctxt) { * * [ WFC: No < in Attribute Values ] * The replacement text of any entity referred to directly or indirectly in - * an attribute value (other than "<") must not contain a <. - * + * an attribute value (other than "<") must not contain a <. + * * [ VC: Attribute Value Type ] * The attribute must have been declared; the value must be of the type * declared for it. @@ -8156,7 +8407,7 @@ xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) { /** * xmlParseStartTag: * @ctxt: an XML parser context - * + * * parse a start of tag either for rule element or * EmptyElement. In both case we don't parse the tag closing chars. * @@ -8164,13 +8415,13 @@ xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) { * * [ WFC: Unique Att Spec ] * No attribute name may appear more than once in the same start-tag or - * empty-element tag. + * empty-element tag. * * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' * * [ WFC: Unique Att Spec ] * No attribute name may appear more than once in the same start-tag or - * empty-element tag. + * empty-element tag. * * With namespace: * @@ -8209,9 +8460,9 @@ xmlParseStartTag(xmlParserCtxtPtr ctxt) { SKIP_BLANKS; GROW; - while ((RAW != '>') && + while (((RAW != '>') && ((RAW != '/') || (NXT(1) != '>')) && - (IS_BYTE_CHAR(RAW))) { + (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) { const xmlChar *q = CUR_PTR; unsigned int cons = ctxt->input->consumed; @@ -8220,7 +8471,7 @@ xmlParseStartTag(xmlParserCtxtPtr ctxt) { /* * [ WFC: Unique Att Spec ] * No attribute name may appear more than once in the same - * start-tag or empty-element tag. + * start-tag or empty-element tag. */ for (i = 0; i < nbatts;i += 2) { if (xmlStrEqual(atts[i], attname)) { @@ -8269,7 +8520,7 @@ xmlParseStartTag(xmlParserCtxtPtr ctxt) { xmlFree(attvalue); } -failed: +failed: GROW if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>')))) @@ -8351,7 +8602,7 @@ xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) { /* * [ WFC: Element Type Match ] * The Name in an element's end-tag must match the element type in the - * start-tag. + * start-tag. * */ if (name != (xmlChar*)1) { @@ -8447,7 +8698,7 @@ xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) { if (CUR == ':') { l = xmlParseName(ctxt); if (l != NULL) { - xmlNsErr(ctxt, XML_NS_ERR_QNAME, + xmlNsErr(ctxt, XML_NS_ERR_QNAME, "Failed to parse QName '%s'\n", l, NULL, NULL); *prefix = NULL; return(l); @@ -8530,7 +8781,7 @@ xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name, cmp = prefix; while (*in != 0 && *in == *cmp) { - ++in; + ++in; ++cmp; } if ((*cmp == 0) && (*in == ':')) { @@ -8568,20 +8819,20 @@ xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name, * * 3.3.3 Attribute-Value Normalization: * Before the value of an attribute is passed to the application or - * checked for validity, the XML processor must normalize it as follows: + * checked for validity, the XML processor must normalize it as follows: * - a character reference is processed by appending the referenced * character to the attribute value * - an entity reference is processed by recursively processing the - * replacement text of the entity + * replacement text of the entity * - a whitespace character (#x20, #xD, #xA, #x9) is processed by * appending #x20 to the normalized value, except that only a single * #x20 is appended for a "#xD#xA" sequence that is part of an external - * parsed entity or the literal entity value of an internal parsed entity - * - other characters are processed by appending them to the normalized value + * parsed entity or the literal entity value of an internal parsed entity + * - other characters are processed by appending them to the normalized value * If the declared value is not CDATA, then the XML processor must further * process the normalized attribute value by discarding any leading and * trailing space (#x20) characters, and by replacing sequences of space - * (#x20) characters by a single space (#x20) character. + * (#x20) characters by a single space (#x20) character. * All attributes for which no declaration has been read should be treated * by a non-validating parser as if declared CDATA. * @@ -8627,7 +8878,7 @@ xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc, /* * Skip any leading spaces */ - while ((in < end) && (*in != limit) && + while ((in < end) && (*in != limit) && ((*in == 0x20) || (*in == 0x9) || (*in == 0xA) || (*in == 0xD))) { in++; @@ -8635,12 +8886,20 @@ xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc, if (in >= end) { const xmlChar *oldbase = ctxt->input->base; GROW; + if (ctxt->instate == XML_PARSER_EOF) + return(NULL); if (oldbase != ctxt->input->base) { long delta = ctxt->input->base - oldbase; start = start + delta; in = in + delta; } end = ctxt->input->end; + if (((in - start) > XML_MAX_TEXT_LENGTH) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) { + xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, + "AttValue lenght too long\n"); + return(NULL); + } } } while ((in < end) && (*in != limit) && (*in >= 0x20) && @@ -8649,12 +8908,20 @@ xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc, if (in >= end) { const xmlChar *oldbase = ctxt->input->base; GROW; + if (ctxt->instate == XML_PARSER_EOF) + return(NULL); if (oldbase != ctxt->input->base) { long delta = ctxt->input->base - oldbase; start = start + delta; in = in + delta; } end = ctxt->input->end; + if (((in - start) > XML_MAX_TEXT_LENGTH) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) { + xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, + "AttValue lenght too long\n"); + return(NULL); + } } } last = in; @@ -8662,13 +8929,15 @@ xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc, * skip the trailing blanks */ while ((last[-1] == 0x20) && (last > start)) last--; - while ((in < end) && (*in != limit) && + while ((in < end) && (*in != limit) && ((*in == 0x20) || (*in == 0x9) || (*in == 0xA) || (*in == 0xD))) { in++; if (in >= end) { const xmlChar *oldbase = ctxt->input->base; GROW; + if (ctxt->instate == XML_PARSER_EOF) + return(NULL); if (oldbase != ctxt->input->base) { long delta = ctxt->input->base - oldbase; start = start + delta; @@ -8676,8 +8945,20 @@ xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc, last = last + delta; } end = ctxt->input->end; - } - } + if (((in - start) > XML_MAX_TEXT_LENGTH) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) { + xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, + "AttValue lenght too long\n"); + return(NULL); + } + } + } + if (((in - start) > XML_MAX_TEXT_LENGTH) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) { + xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, + "AttValue lenght too long\n"); + return(NULL); + } if (*in != limit) goto need_complex; } else { while ((in < end) && (*in != limit) && (*in >= 0x20) && @@ -8686,15 +8967,29 @@ xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc, if (in >= end) { const xmlChar *oldbase = ctxt->input->base; GROW; + if (ctxt->instate == XML_PARSER_EOF) + return(NULL); if (oldbase != ctxt->input->base) { long delta = ctxt->input->base - oldbase; start = start + delta; in = in + delta; } end = ctxt->input->end; + if (((in - start) > XML_MAX_TEXT_LENGTH) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) { + xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, + "AttValue lenght too long\n"); + return(NULL); + } } } last = in; + if (((in - start) > XML_MAX_TEXT_LENGTH) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) { + xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, + "AttValue lenght too long\n"); + return(NULL); + } if (*in != limit) goto need_complex; } in++; @@ -8833,7 +9128,7 @@ xmlParseAttribute2(xmlParserCtxtPtr ctxt, /** * xmlParseStartTag2: * @ctxt: an XML parser context - * + * * parse a start of tag either for rule element or * EmptyElement. In both case we don't parse the tag closing chars. * This routine is called when running SAX2 parsing @@ -8842,13 +9137,13 @@ xmlParseAttribute2(xmlParserCtxtPtr ctxt, * * [ WFC: Unique Att Spec ] * No attribute name may appear more than once in the same start-tag or - * empty-element tag. + * empty-element tag. * * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' * * [ WFC: Unique Att Spec ] * No attribute name may appear more than once in the same start-tag or - * empty-element tag. + * empty-element tag. * * With namespace: * @@ -8917,9 +9212,9 @@ reparse: GROW; if (ctxt->input->base != base) goto base_changed; - while ((RAW != '>') && + while (((RAW != '>') && ((RAW != '/') || (NXT(1) != '>')) && - (IS_BYTE_CHAR(RAW))) { + (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) { const xmlChar *q = CUR_PTR; unsigned int cons = ctxt->input->consumed; int len = -1, alloc = 0; @@ -9090,6 +9385,8 @@ skip_ns: failed: GROW + if (ctxt->instate == XML_PARSER_EOF) + break; if (ctxt->input->base != base) goto base_changed; if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>')))) break; @@ -9181,7 +9478,7 @@ failed: atts[nbatts++] = defaults->values[5 * i + 3]; if ((ctxt->standalone == 1) && (defaults->values[5 * i + 4] != NULL)) { - xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED, + xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED, "standalone: attribute %s on %s defaulted from external subset\n", attname, localname); } @@ -9211,7 +9508,7 @@ failed: /* * [ WFC: Unique Att Spec ] * No attribute name may appear more than once in the same - * start-tag or empty-element tag. + * start-tag or empty-element tag. * As extended by the Namespace in XML REC. */ for (j = 0; j < i;j += 5) { @@ -9327,6 +9624,8 @@ xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix, * We should definitely be at the ending "S? '>'" part */ GROW; + if (ctxt->instate == XML_PARSER_EOF) + return; SKIP_BLANKS; if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) { xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL); @@ -9336,7 +9635,7 @@ xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix, /* * [ WFC: Element Type Match ] * The Name in an element's end-tag must match the element type in the - * start-tag. + * start-tag. * */ if (name != (xmlChar*)1) { @@ -9365,7 +9664,7 @@ done: /** * xmlParseCDSect: * @ctxt: an XML parser context - * + * * Parse escaped pure raw content. * * [18] CDSect ::= CDStart CData CDEnd @@ -9418,14 +9717,21 @@ xmlParseCDSect(xmlParserCtxtPtr ctxt) { if (len + 5 >= size) { xmlChar *tmp; - size *= 2; - tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); + if ((size > XML_MAX_TEXT_LENGTH) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) { + xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED, + "CData section too big found", NULL); + xmlFree (buf); + return; + } + tmp = (xmlChar *) xmlRealloc(buf, size * 2 * sizeof(xmlChar)); if (tmp == NULL) { xmlFree(buf); xmlErrMemory(ctxt, NULL); return; } buf = tmp; + size *= 2; } COPY_BUF(rl,buf,len,r); r = s; @@ -9435,6 +9741,10 @@ xmlParseCDSect(xmlParserCtxtPtr ctxt) { count++; if (count > 50) { GROW; + if (ctxt->instate == XML_PARSER_EOF) { + xmlFree(buf); + return; + } count = 0; } NEXTL(l); @@ -9514,7 +9824,7 @@ xmlParseContent(xmlParserCtxtPtr ctxt) { /* * Fifth case : a reference. If if has not been resolved, - * parsing returns it's Name, create the node + * parsing returns it's Name, create the node */ else if (*cur == '&') { @@ -9555,7 +9865,7 @@ xmlParseContent(xmlParserCtxtPtr ctxt) { * * [ WFC: Element Type Match ] * The Name in an element's end-tag must match the element type in the - * start-tag. + * start-tag. * */ @@ -9614,7 +9924,7 @@ xmlParseElement(xmlParserCtxtPtr ctxt) { /* * [ VC: Root Element Type ] * The Name in the document type declaration must match the element - * type of the root element. + * type of the root element. */ if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc && ctxt->node && (ctxt->node == ctxt->myDoc->children)) @@ -9895,7 +10205,7 @@ xmlParseEncName(xmlParserCtxtPtr ctxt) { /** * xmlParseEncodingDecl: * @ctxt: an XML parser context - * + * * parse the XML encoding declaration * * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'") @@ -9952,7 +10262,7 @@ xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) { (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) { /* * If no encoding was passed to the parser, that we are - * using UTF-16 and no decoder is present i.e. the + * using UTF-16 and no decoder is present i.e. the * document is apparently UTF-8 compatible, then raise an * encoding mismatch fatal error */ @@ -10003,7 +10313,7 @@ xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) { * parse the XML standalone declaration * * [32] SDDecl ::= S 'standalone' Eq - * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"')) + * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"')) * * [ VC: Standalone Document Declaration ] * TODO The standalone document declaration must have the value "no" @@ -10083,7 +10393,7 @@ xmlParseSDDecl(xmlParserCtxtPtr ctxt) { /** * xmlParseXMLDecl: * @ctxt: an XML parser context - * + * * parse an XML declaration header * * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' @@ -10197,7 +10507,7 @@ xmlParseXMLDecl(xmlParserCtxtPtr ctxt) { /** * xmlParseMisc: * @ctxt: an XML parser context - * + * * parse an XML Misc* optional field. * * [27] Misc ::= Comment | PI | S @@ -10205,9 +10515,10 @@ xmlParseXMLDecl(xmlParserCtxtPtr ctxt) { void xmlParseMisc(xmlParserCtxtPtr ctxt) { - while (((RAW == '<') && (NXT(1) == '?')) || - (CMP4(CUR_PTR, '<', '!', '-', '-')) || - IS_BLANK_CH(CUR)) { + while ((ctxt->instate != XML_PARSER_EOF) && + (((RAW == '<') && (NXT(1) == '?')) || + (CMP4(CUR_PTR, '<', '!', '-', '-')) || + IS_BLANK_CH(CUR))) { if ((RAW == '<') && (NXT(1) == '?')) { xmlParsePI(ctxt); } else if (IS_BLANK_CH(CUR)) { @@ -10220,7 +10531,7 @@ xmlParseMisc(xmlParserCtxtPtr ctxt) { /** * xmlParseDocument: * @ctxt: an XML parser context - * + * * parse an XML document (and build a tree if using the standard SAX * interface). * @@ -10257,7 +10568,7 @@ xmlParseDocument(xmlParserCtxtPtr ctxt) { if ((ctxt->encoding == NULL) && ((ctxt->input->end - ctxt->input->cur) >= 4)) { - /* + /* * Get the 4 first bytes and decode the charset * if enc != XML_CHAR_ENCODING_NONE * plug some encoding conversion routines. @@ -10400,7 +10711,7 @@ xmlParseDocument(xmlParserCtxtPtr ctxt) { /** * xmlParseExtParsedEnt: * @ctxt: an XML parser context - * + * * parse a general parsed entity * An external general parsed entity is well-formed if it matches the * production labeled extParsedEnt. @@ -10431,7 +10742,7 @@ xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) { if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator); - /* + /* * Get the 4 first bytes and decode the charset * if enc != XML_CHAR_ENCODING_NONE * plug some encoding conversion routines. @@ -10484,7 +10795,7 @@ xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) { ctxt->depth = 0; xmlParseContent(ctxt); - + if ((RAW == '<') && (NXT(1) == '/')) { xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); } else if (RAW != 0) { @@ -10504,7 +10815,7 @@ xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) { #ifdef LIBXML_PUSH_ENABLED /************************************************************************ * * - * Progressive parsing interfaces * + * Progressive parsing interfaces * * * ************************************************************************/ @@ -10541,8 +10852,8 @@ xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first, buf = in->base; len = in->length; } else { - buf = in->buf->buffer->content; - len = in->buf->buffer->use; + buf = xmlBufContent(in->buf->buffer); + len = xmlBufUse(in->buf->buffer); } /* take into account the sequence length */ if (third) len -= 2; @@ -10565,7 +10876,7 @@ xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first, xmlGenericError(xmlGenericErrorContext, "PP: lookup '%c%c' found at %d\n", first, next, base); - else + else xmlGenericError(xmlGenericErrorContext, "PP: lookup '%c%c%c' found at %d\n", first, next, third, base); @@ -10581,7 +10892,7 @@ xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first, else if (third == 0) xmlGenericError(xmlGenericErrorContext, "PP: lookup '%c%c' failed\n", first, next); - else + else xmlGenericError(xmlGenericErrorContext, "PP: lookup '%c%c%c' failed\n", first, next, third); #endif @@ -10663,7 +10974,7 @@ xmlCheckCdataPush(const xmlChar *utf, int len) { if ((utf == NULL) || (len <= 0)) return(0); - + for (ix = 0; ix < len;) { /* string is 0-terminated */ c = utf[ix]; if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */ @@ -10795,7 +11106,7 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1)) return(0); - + /* * Pop-up of finished entities. */ @@ -10812,20 +11123,16 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { * remainng chars to avoid them stalling in the non-converted * buffer. */ - if ((ctxt->input->buf->raw != NULL) && - (ctxt->input->buf->raw->use > 0)) { - int base = ctxt->input->base - - ctxt->input->buf->buffer->content; - int current = ctxt->input->cur - ctxt->input->base; + if (xmlBufIsEmpty(ctxt->input->buf->buffer) == 0) { + size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, + ctxt->input); + size_t current = ctxt->input->cur - ctxt->input->base; xmlParserInputBufferPush(ctxt->input->buf, 0, ""); - ctxt->input->base = ctxt->input->buf->buffer->content + base; - ctxt->input->cur = ctxt->input->base + current; - ctxt->input->end = - &ctxt->input->buf->buffer->content[ - ctxt->input->buf->buffer->use]; + xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, + base, current); } - avail = ctxt->input->buf->buffer->use - + avail = xmlBufUse(ctxt->input->buf->buffer) - (ctxt->input->cur - ctxt->input->base); } if (avail < 1) @@ -10847,7 +11154,7 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { if (avail < 4) goto done; - /* + /* * Get the 4 first bytes and decode the charset * if enc != XML_CHAR_ENCODING_NONE * plug some encoding conversion routines, @@ -11002,7 +11309,7 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { /* * [ VC: Root Element Type ] * The Name in the document type declaration must match - * the element type of the root element. + * the element type of the root element. */ if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc && ctxt->node && (ctxt->node == ctxt->myDoc->children)) @@ -11037,6 +11344,7 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { } else { ctxt->instate = XML_PARSER_CONTENT; } + ctxt->progressive = 1; break; } if (RAW == '>') { @@ -11056,6 +11364,7 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { #endif /* LIBXML_SAX1_ENABLED */ ctxt->instate = XML_PARSER_CONTENT; + ctxt->progressive = 1; break; } case XML_PARSER_CONTENT: { @@ -11073,9 +11382,13 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { break; } else if ((cur == '<') && (next == '?')) { if ((!terminate) && - (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) + (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) { + ctxt->progressive = XML_PARSER_PI; goto done; + } xmlParsePI(ctxt); + ctxt->instate = XML_PARSER_CONTENT; + ctxt->progressive = 1; } else if ((cur == '<') && (next != '!')) { ctxt->instate = XML_PARSER_START_TAG; break; @@ -11089,10 +11402,13 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { ctxt->input->cur += 4; term = xmlParseLookupSequence(ctxt, '-', '-', '>'); ctxt->input->cur -= 4; - if ((!terminate) && (term < 0)) + if ((!terminate) && (term < 0)) { + ctxt->progressive = XML_PARSER_COMMENT; goto done; + } xmlParseComment(ctxt); ctxt->instate = XML_PARSER_CONTENT; + ctxt->progressive = 1; } else if ((cur == '<') && (ctxt->input->cur[1] == '!') && (ctxt->input->cur[2] == '[') && (ctxt->input->cur[3] == 'C') && @@ -11187,7 +11503,7 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { break; case XML_PARSER_CDATA_SECTION: { /* - * The Push mode need to have the SAX callback for + * The Push mode need to have the SAX callback for * cdataBlock merge back contiguous callbacks. */ int base; @@ -11197,7 +11513,7 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) { int tmp; - tmp = xmlCheckCdataPush(ctxt->input->cur, + tmp = xmlCheckCdataPush(ctxt->input->cur, XML_PARSER_BIG_BUFFER_SIZE); if (tmp < 0) { tmp = -tmp; @@ -11263,7 +11579,7 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base); else - avail = ctxt->input->buf->buffer->use - + avail = xmlBufUse(ctxt->input->buf->buffer) - (ctxt->input->cur - ctxt->input->base); if (avail < 2) goto done; @@ -11271,26 +11587,33 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { next = ctxt->input->cur[1]; if ((cur == '<') && (next == '?')) { if ((!terminate) && - (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) + (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) { + ctxt->progressive = XML_PARSER_PI; goto done; + } #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, "PP: Parsing PI\n"); #endif xmlParsePI(ctxt); + ctxt->instate = XML_PARSER_MISC; + ctxt->progressive = 1; ctxt->checkIndex = 0; } else if ((cur == '<') && (next == '!') && (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) { if ((!terminate) && - (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) + (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) { + ctxt->progressive = XML_PARSER_COMMENT; goto done; + } #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, "PP: Parsing Comment\n"); #endif xmlParseComment(ctxt); ctxt->instate = XML_PARSER_MISC; + ctxt->progressive = 1; ctxt->checkIndex = 0; } else if ((cur == '<') && (next == '!') && (ctxt->input->cur[2] == 'D') && @@ -11301,13 +11624,17 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { (ctxt->input->cur[7] == 'P') && (ctxt->input->cur[8] == 'E')) { if ((!terminate) && - (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) + (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) { + ctxt->progressive = XML_PARSER_DTD; goto done; + } #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, "PP: Parsing internal subset\n"); #endif ctxt->inSubset = 1; + ctxt->progressive = 1; + ctxt->checkIndex = 0; xmlParseDocTypeDecl(ctxt); if (RAW == '[') { ctxt->instate = XML_PARSER_DTD; @@ -11338,7 +11665,7 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { goto done; } else { ctxt->instate = XML_PARSER_START_TAG; - ctxt->progressive = 1; + ctxt->progressive = XML_PARSER_START_TAG; xmlParseGetLasts(ctxt, &lastlt, &lastgt); #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, @@ -11351,38 +11678,46 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { if (ctxt->input->buf == NULL) avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base); else - avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base); - if (avail < 2) + avail = xmlBufUse(ctxt->input->buf->buffer) - + (ctxt->input->cur - ctxt->input->base); + if (avail < 2) goto done; cur = ctxt->input->cur[0]; next = ctxt->input->cur[1]; if ((cur == '<') && (next == '?')) { if ((!terminate) && - (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) + (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) { + ctxt->progressive = XML_PARSER_PI; goto done; + } #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, "PP: Parsing PI\n"); #endif xmlParsePI(ctxt); + ctxt->instate = XML_PARSER_PROLOG; + ctxt->progressive = 1; } else if ((cur == '<') && (next == '!') && (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) { if ((!terminate) && - (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) + (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) { + ctxt->progressive = XML_PARSER_COMMENT; goto done; + } #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, "PP: Parsing Comment\n"); #endif xmlParseComment(ctxt); ctxt->instate = XML_PARSER_PROLOG; + ctxt->progressive = 1; } else if ((cur == '<') && (next == '!') && (avail < 4)) { goto done; } else { ctxt->instate = XML_PARSER_START_TAG; if (ctxt->progressive == 0) - ctxt->progressive = 1; + ctxt->progressive = XML_PARSER_START_TAG; xmlParseGetLasts(ctxt, &lastlt, &lastgt); #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, @@ -11395,32 +11730,39 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { if (ctxt->input->buf == NULL) avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base); else - avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base); + avail = xmlBufUse(ctxt->input->buf->buffer) - + (ctxt->input->cur - ctxt->input->base); if (avail < 2) goto done; cur = ctxt->input->cur[0]; next = ctxt->input->cur[1]; if ((cur == '<') && (next == '?')) { if ((!terminate) && - (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) + (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) { + ctxt->progressive = XML_PARSER_PI; goto done; + } #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, "PP: Parsing PI\n"); #endif xmlParsePI(ctxt); ctxt->instate = XML_PARSER_EPILOG; + ctxt->progressive = 1; } else if ((cur == '<') && (next == '!') && (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) { if ((!terminate) && - (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) + (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) { + ctxt->progressive = XML_PARSER_COMMENT; goto done; + } #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, "PP: Parsing Comment\n"); #endif xmlParseComment(ctxt); ctxt->instate = XML_PARSER_EPILOG; + ctxt->progressive = 1; } else if ((cur == '<') && (next == '!') && (avail < 4)) { goto done; @@ -11450,29 +11792,28 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { int base, i; xmlChar *buf; xmlChar quote = 0; + size_t use; base = ctxt->input->cur - ctxt->input->base; if (base < 0) return(0); if (ctxt->checkIndex > base) base = ctxt->checkIndex; - buf = ctxt->input->buf->buffer->content; - for (;(unsigned int) base < ctxt->input->buf->buffer->use; - base++) { + buf = xmlBufContent(ctxt->input->buf->buffer); + use = xmlBufUse(ctxt->input->buf->buffer); + for (;(unsigned int) base < use; base++) { if (quote != 0) { if (buf[base] == quote) quote = 0; - continue; + continue; } if ((quote == 0) && (buf[base] == '<')) { int found = 0; /* special handling of comments */ - if (((unsigned int) base + 4 < - ctxt->input->buf->buffer->use) && + if (((unsigned int) base + 4 < use) && (buf[base + 1] == '!') && (buf[base + 2] == '-') && (buf[base + 3] == '-')) { - for (;(unsigned int) base + 3 < - ctxt->input->buf->buffer->use; base++) { + for (;(unsigned int) base + 3 < use; base++) { if ((buf[base] == '-') && (buf[base + 1] == '-') && (buf[base + 2] == '>')) { @@ -11503,17 +11844,14 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { fprintf(stderr, "%c%c%c%c: ", buf[base], buf[base + 1], buf[base + 2], buf[base + 3]); #endif - if ((unsigned int) base +1 >= - ctxt->input->buf->buffer->use) + if ((unsigned int) base +1 >= use) break; if (buf[base + 1] == ']') { /* conditional crap, skip both ']' ! */ base++; continue; } - for (i = 1; - (unsigned int) base + i < ctxt->input->buf->buffer->use; - i++) { + for (i = 1; (unsigned int) base + i < use; i++) { if (buf[base + i] == '>') { #if 0 fprintf(stderr, "found\n"); @@ -11531,7 +11869,7 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { fprintf(stderr, "end of stream\n"); #endif break; - + } not_end_of_int_subset: continue; /* for */ @@ -11539,6 +11877,10 @@ not_end_of_int_subset: /* * We didn't found the end of the Internal subset */ + if (quote == 0) + ctxt->checkIndex = base; + else + ctxt->checkIndex = 0; #ifdef DEBUG_PUSH if (next == 0) xmlGenericError(xmlGenericErrorContext, @@ -11547,6 +11889,7 @@ not_end_of_int_subset: goto done; found_end_int_subset: + ctxt->checkIndex = 0; xmlParseInternalSubset(ctxt); ctxt->inSubset = 2; if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && @@ -11637,7 +11980,7 @@ found_end_int_subset: break; } } -done: +done: #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret); #endif @@ -11657,6 +12000,55 @@ encoding_error: } /** + * xmlParseCheckTransition: + * @ctxt: an XML parser context + * @chunk: a char array + * @size: the size in byte of the chunk + * + * Check depending on the current parser state if the chunk given must be + * processed immediately or one need more data to advance on parsing. + * + * Returns -1 in case of error, 0 if the push is not needed and 1 if needed + */ +static int +xmlParseCheckTransition(xmlParserCtxtPtr ctxt, const char *chunk, int size) { + if ((ctxt == NULL) || (chunk == NULL) || (size < 0)) + return(-1); + if (ctxt->instate == XML_PARSER_START_TAG) { + if (memchr(chunk, '>', size) != NULL) + return(1); + return(0); + } + if (ctxt->progressive == XML_PARSER_COMMENT) { + if (memchr(chunk, '>', size) != NULL) + return(1); + return(0); + } + if (ctxt->instate == XML_PARSER_CDATA_SECTION) { + if (memchr(chunk, '>', size) != NULL) + return(1); + return(0); + } + if (ctxt->progressive == XML_PARSER_PI) { + if (memchr(chunk, '>', size) != NULL) + return(1); + return(0); + } + if (ctxt->instate == XML_PARSER_END_TAG) { + if (memchr(chunk, '>', size) != NULL) + return(1); + return(0); + } + if ((ctxt->progressive == XML_PARSER_DTD) || + (ctxt->instate == XML_PARSER_DTD)) { + if (memchr(chunk, ']', size) != NULL) + return(1); + return(0); + } + return(1); +} + +/** * xmlParseChunk: * @ctxt: an XML parser context * @chunk: an char array @@ -11672,11 +12064,15 @@ xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size, int terminate) { int end_in_lf = 0; int remain = 0; + size_t old_avail = 0; + size_t avail = 0; if (ctxt == NULL) return(XML_ERR_INTERNAL_ERROR); if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1)) return(ctxt->errNo); + if (ctxt->instate == XML_PARSER_EOF) + return(-1); if (ctxt->instate == XML_PARSER_START) xmlDetectSAX2(ctxt); if ((size > 0) && (chunk != NULL) && (!terminate) && @@ -11689,10 +12085,11 @@ xmldecl_done: if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) { - int base = ctxt->input->base - ctxt->input->buf->buffer->content; - int cur = ctxt->input->cur - ctxt->input->base; + size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input); + size_t cur = ctxt->input->cur - ctxt->input->base; int res; + old_avail = xmlBufUse(ctxt->input->buf->buffer); /* * Specific handling if we autodetected an encoding, we should not * push more than the first line ... which depend on the encoding @@ -11734,10 +12131,7 @@ xmldecl_done: ctxt->disableSAX = 1; return (XML_PARSER_EOF); } - ctxt->input->base = ctxt->input->buf->buffer->content + base; - ctxt->input->cur = ctxt->input->base + cur; - ctxt->input->end = - &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use]; + xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur); #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); #endif @@ -11749,7 +12143,7 @@ xmldecl_done: (in->raw != NULL)) { int nbchars; - nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw); + nbchars = xmlCharEncInput(in); if (nbchars < 0) { /* TODO 2.6.0 */ xmlGenericError(xmlGenericErrorContext, @@ -11759,10 +12153,31 @@ xmldecl_done: } } } - if (remain != 0) + if (remain != 0) { xmlParseTryOrFinish(ctxt, 0); - else - xmlParseTryOrFinish(ctxt, terminate); + } else { + if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) + avail = xmlBufUse(ctxt->input->buf->buffer); + /* + * Depending on the current state it may not be such + * a good idea to try parsing if there is nothing in the chunk + * which would be worth doing a parser state transition and we + * need to wait for more data + */ + if ((terminate) || (avail > XML_MAX_TEXT_LENGTH) || + (old_avail == 0) || (avail == 0) || + (xmlParseCheckTransition(ctxt, + (const char *)&ctxt->input->base[old_avail], + avail - old_avail))) + xmlParseTryOrFinish(ctxt, terminate); + } + if ((ctxt->input != NULL) && + (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) || + ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) && + ((ctxt->options & XML_PARSE_HUGE) == 0)) { + xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup"); + ctxt->instate = XML_PARSER_EOF; + } if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1)) return(ctxt->errNo); @@ -11780,22 +12195,22 @@ xmldecl_done: /* * Check for termination */ - int avail = 0; + int cur_avail = 0; if (ctxt->input != NULL) { if (ctxt->input->buf == NULL) - avail = ctxt->input->length - - (ctxt->input->cur - ctxt->input->base); + cur_avail = ctxt->input->length - + (ctxt->input->cur - ctxt->input->base); else - avail = ctxt->input->buf->buffer->use - - (ctxt->input->cur - ctxt->input->base); + cur_avail = xmlBufUse(ctxt->input->buf->buffer) - + (ctxt->input->cur - ctxt->input->base); } - + if ((ctxt->instate != XML_PARSER_EOF) && (ctxt->instate != XML_PARSER_EPILOG)) { xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); - } - if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) { + } + if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) { xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); } if (ctxt->instate != XML_PARSER_EOF) { @@ -11804,12 +12219,12 @@ xmldecl_done: } ctxt->instate = XML_PARSER_EOF; } - return((xmlParserErrors) ctxt->errNo); + return((xmlParserErrors) ctxt->errNo); } /************************************************************************ * * - * I/O front end functions to the parser * + * I/O front end functions to the parser * * * ************************************************************************/ @@ -11833,7 +12248,7 @@ xmldecl_done: */ xmlParserCtxtPtr -xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data, +xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data, const char *chunk, int size, const char *filename) { xmlParserCtxtPtr ctxt; xmlParserInputPtr inputStream; @@ -11882,7 +12297,7 @@ xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data, memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1)); if (user_data != NULL) ctxt->userData = user_data; - } + } if (filename == NULL) { ctxt->directory = NULL; } else { @@ -11908,11 +12323,7 @@ xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data, } } inputStream->buf = buf; - inputStream->base = inputStream->buf->buffer->content; - inputStream->cur = inputStream->buf->buffer->content; - inputStream->end = - &inputStream->buf->buffer->content[inputStream->buf->buffer->use]; - + xmlBufResetInput(inputStream->buf->buffer, inputStream); inputPush(ctxt, inputStream); /* @@ -11923,15 +12334,12 @@ xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data, if ((size == 0) || (chunk == NULL)) { ctxt->charset = XML_CHAR_ENCODING_NONE; } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) { - int base = ctxt->input->base - ctxt->input->buf->buffer->content; - int cur = ctxt->input->cur - ctxt->input->base; + size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input); + size_t cur = ctxt->input->cur - ctxt->input->base; - xmlParserInputBufferPush(ctxt->input->buf, size, chunk); + xmlParserInputBufferPush(ctxt->input->buf, size, chunk); - ctxt->input->base = ctxt->input->buf->buffer->content + base; - ctxt->input->cur = ctxt->input->base + cur; - ctxt->input->end = - &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use]; + xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur); #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); #endif @@ -11951,7 +12359,7 @@ xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data, * * Blocks further parser processing */ -void +void xmlStopParser(xmlParserCtxtPtr ctxt) { if (ctxt == NULL) return; @@ -12032,7 +12440,7 @@ xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data, #ifdef LIBXML_VALID_ENABLED /************************************************************************ * * - * Front ends when parsing a DTD * + * Front ends when parsing a DTD * * * ************************************************************************/ @@ -12043,7 +12451,7 @@ xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data, * @enc: the charset encoding if known * * Load and parse a DTD - * + * * Returns the resulting xmlDtdPtr or NULL in case of error. * @input will be freed by the function in any case. */ @@ -12068,7 +12476,7 @@ xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input, /* * Set-up the SAX context */ - if (sax != NULL) { + if (sax != NULL) { if (ctxt->sax != NULL) xmlFree(ctxt->sax); ctxt->sax = sax; @@ -12122,7 +12530,7 @@ xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input, if ((enc == XML_CHAR_ENCODING_NONE) && ((ctxt->input->end - ctxt->input->cur) >= 4)) { - /* + /* * Get the 4 first bytes and decode the charset * if enc != XML_CHAR_ENCODING_NONE * plug some encoding conversion routines. @@ -12161,7 +12569,7 @@ xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input, } if (sax != NULL) ctxt->sax = NULL; xmlFreeParserCtxt(ctxt); - + return(ret); } @@ -12172,7 +12580,7 @@ xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input, * @SystemID: a NAME* containing the URL to the DTD * * Load and parse an external subset. - * + * * Returns the resulting xmlDtdPtr or NULL in case of error. */ @@ -12195,13 +12603,13 @@ xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID, /* * Set-up the SAX context */ - if (sax != NULL) { + if (sax != NULL) { if (ctxt->sax != NULL) xmlFree(ctxt->sax); ctxt->sax = sax; ctxt->userData = ctxt; } - + /* * Canonicalise the system ID */ @@ -12312,7 +12720,7 @@ xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) { /************************************************************************ * * - * Front ends when parsing an Entity * + * Front ends when parsing an Entity * * * ************************************************************************/ @@ -12428,7 +12836,7 @@ xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL, */ if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) && (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) { - xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH, + xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH, "Version mismatch between document and entity\n"); } } @@ -12711,7 +13119,7 @@ xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt, if (ctxt->lastError.code != XML_ERR_OK) xmlCopyError(&ctxt->lastError, &oldctxt->lastError); - if (sax != NULL) + if (sax != NULL) ctxt->sax = oldsax; oldctxt->node_seq.maximum = ctxt->node_seq.maximum; oldctxt->node_seq.length = ctxt->node_seq.length; @@ -13530,7 +13938,7 @@ xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID, * @filename: the filename or URL * @options: a combination of xmlParserOption * - * Create a parser context for a file or URL content. + * Create a parser context for a file or URL content. * Automatic support for ZLIB/Compress compressed document is provided * by default if found at compile-time and for file accesses * @@ -13572,7 +13980,7 @@ xmlCreateURLParserCtxt(const char *filename, int options) * xmlCreateFileParserCtxt: * @filename: the filename * - * Create a parser context for a file content. + * Create a parser context for a file content. * Automatic support for ZLIB/Compress compressed document is provided * by default if found at compile-time. * @@ -13650,7 +14058,7 @@ xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename, if (sax != NULL) ctxt->sax = NULL; xmlFreeParserCtxt(ctxt); - + return(ret); } @@ -13750,7 +14158,7 @@ xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer, xmlClearParserCtxt(ctxt); return; } - + xmlClearParserCtxt(ctxt); if (filename != NULL) input->filename = (char *) xmlCanonicPath((const xmlChar *)filename); @@ -13768,7 +14176,7 @@ xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer, * * parse an XML file and call the given SAX handler routines. * Automatic support for ZLIB/Compress compressed document is provided - * + * * Returns 0 in case of success or a error number otherwise */ int @@ -13776,7 +14184,7 @@ xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data, const char *filename) { int ret = 0; xmlParserCtxtPtr ctxt; - + ctxt = xmlCreateFileParserCtxt(filename); if (ctxt == NULL) return -1; if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) @@ -13786,9 +14194,9 @@ xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data, if (user_data != NULL) ctxt->userData = user_data; - + xmlParseDocument(ctxt); - + if (ctxt->wellFormed) ret = 0; else { @@ -13804,14 +14212,14 @@ xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data, ctxt->myDoc = NULL; } xmlFreeParserCtxt(ctxt); - + return ret; } #endif /* LIBXML_SAX1_ENABLED */ /************************************************************************ * * - * Front ends when parsing from memory * + * Front ends when parsing from memory * * * ************************************************************************/ @@ -13855,9 +14263,7 @@ xmlCreateMemoryParserCtxt(const char *buffer, int size) { input->filename = NULL; input->buf = buf; - input->base = input->buf->buffer->content; - input->cur = input->buf->buffer->content; - input->end = &input->buf->buffer->content[input->buf->buffer->use]; + xmlBufResetInput(input->buf->buffer, input); inputPush(ctxt, input); return(ctxt); @@ -13913,7 +14319,7 @@ xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer, xmlFreeDoc(ctxt->myDoc); ctxt->myDoc = NULL; } - if (sax != NULL) + if (sax != NULL) ctxt->sax = NULL; xmlFreeParserCtxt(ctxt); @@ -13931,7 +14337,7 @@ xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer, * parse an XML in-memory block and use the given SAX function block * to handle the parsing callback. If sax is NULL, fallback to the default * DOM tree building routines. - * + * * Returns the resulting document tree */ xmlDocPtr @@ -13946,7 +14352,7 @@ xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer, * @size: the size of the array * * parse an XML in-memory block and build a tree. - * + * * Returns the resulting document tree */ @@ -14000,7 +14406,7 @@ int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data, ctxt->userData = user_data; xmlParseDocument(ctxt); - + if (ctxt->wellFormed) ret = 0; else { @@ -14016,7 +14422,7 @@ int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data, ctxt->myDoc = NULL; } xmlFreeParserCtxt(ctxt); - + return ret; } #endif /* LIBXML_SAX1_ENABLED */ @@ -14050,7 +14456,7 @@ xmlCreateDocParserCtxt(const xmlChar *cur) { * parse an XML in-memory document and build a tree. * It use the given SAX function block to handle the parsing callback. * If sax is NULL, fallback to the default DOM tree building routines. - * + * * Returns the resulting document tree */ @@ -14065,7 +14471,7 @@ xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) { ctxt = xmlCreateDocParserCtxt(cur); if (ctxt == NULL) return(NULL); - if (sax != NULL) { + if (sax != NULL) { oldsax = ctxt->sax; ctxt->sax = sax; ctxt->userData = NULL; @@ -14082,7 +14488,7 @@ xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) { if (sax != NULL) ctxt->sax = oldsax; xmlFreeParserCtxt(ctxt); - + return(ret); } @@ -14091,7 +14497,7 @@ xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) { * @cur: a pointer to an array of xmlChar * * parse an XML in-memory document and build a tree. - * + * * Returns the resulting document tree */ @@ -14104,8 +14510,8 @@ xmlParseDoc(const xmlChar *cur) { #ifdef LIBXML_LEGACY_ENABLED /************************************************************************ * * - * Specific function to keep track of entities references * - * and used by the XSLT debugger * + * Specific function to keep track of entities references * + * and used by the XSLT debugger * * * ************************************************************************/ @@ -14115,7 +14521,7 @@ static xmlEntityReferenceFunc xmlEntityRefFunc = NULL; * xmlAddEntityReference: * @ent : A valid entity * @firstNode : A valid first node for children of entity - * @lastNode : A valid last node of children entity + * @lastNode : A valid last node of children entity * * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY */ @@ -14144,7 +14550,7 @@ xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func) /************************************************************************ * * - * Miscellaneous * + * Miscellaneous * * * ************************************************************************/ @@ -14260,7 +14666,7 @@ xmlCleanupParser(void) { * current scope */ #define DICT_FREE(str) \ - if ((str) && ((!dict) || \ + if ((str) && ((!dict) || \ (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \ xmlFree((char *)(str)); @@ -14275,7 +14681,7 @@ xmlCtxtReset(xmlParserCtxtPtr ctxt) { xmlParserInputPtr input; xmlDictPtr dict; - + if (ctxt == NULL) return; @@ -14427,25 +14833,18 @@ xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk, inputStream->filename = (char *) xmlCanonicPath((const xmlChar *) filename); inputStream->buf = buf; - inputStream->base = inputStream->buf->buffer->content; - inputStream->cur = inputStream->buf->buffer->content; - inputStream->end = - &inputStream->buf->buffer->content[inputStream->buf->buffer->use]; + xmlBufResetInput(buf->buffer, inputStream); inputPush(ctxt, inputStream); if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && (ctxt->input->buf != NULL)) { - int base = ctxt->input->base - ctxt->input->buf->buffer->content; - int cur = ctxt->input->cur - ctxt->input->base; + size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input); + size_t cur = ctxt->input->cur - ctxt->input->base; xmlParserInputBufferPush(ctxt->input->buf, size, chunk); - ctxt->input->base = ctxt->input->buf->buffer->content + base; - ctxt->input->cur = ctxt->input->base + cur; - ctxt->input->end = - &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer-> - use]; + xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur); #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); #endif @@ -14596,6 +14995,8 @@ xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encodi if (options & XML_PARSE_HUGE) { ctxt->options |= XML_PARSE_HUGE; options -= XML_PARSE_HUGE; + if (ctxt->dict != NULL) + xmlDictSetLimit(ctxt->dict, 0); } if (options & XML_PARSE_OLDSAX) { ctxt->options |= XML_PARSE_OLDSAX; @@ -14605,6 +15006,10 @@ xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encodi ctxt->options |= XML_PARSE_IGNORE_ENC; options -= XML_PARSE_IGNORE_ENC; } + if (options & XML_PARSE_BIG_LINES) { + ctxt->options |= XML_PARSE_BIG_LINES; + options -= XML_PARSE_BIG_LINES; + } ctxt->linenumbers = 1; return (options); } @@ -14679,7 +15084,7 @@ xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding, * @options: a combination of xmlParserOption * * parse an XML in-memory document and build a tree. - * + * * Returns the resulting document tree */ xmlDocPtr @@ -14703,7 +15108,7 @@ xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int optio * @options: a combination of xmlParserOption * * parse an XML file from the filesystem or the network. - * + * * Returns the resulting document tree */ xmlDocPtr @@ -14726,7 +15131,7 @@ xmlReadFile(const char *filename, const char *encoding, int options) * @options: a combination of xmlParserOption * * parse an XML in-memory document and build a tree. - * + * * Returns the resulting document tree */ xmlDocPtr @@ -14750,7 +15155,7 @@ xmlReadMemory(const char *buffer, int size, const char *URL, const char *encodin * parse an XML from a file descriptor and build a tree. * NOTE that the file descriptor will not be closed when the * reader is closed or reset. - * + * * Returns the resulting document tree */ xmlDocPtr @@ -14871,7 +15276,7 @@ xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur, * * parse an XML file from the filesystem or the network. * This reuses the existing @ctxt parser context - * + * * Returns the resulting document tree */ xmlDocPtr @@ -14906,7 +15311,7 @@ xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename, * * parse an XML in-memory document and build a tree. * This reuses the existing @ctxt parser context - * + * * Returns the resulting document tree */ xmlDocPtr @@ -14950,7 +15355,7 @@ xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size, * This reuses the existing @ctxt parser context * NOTE that the file descriptor will not be closed when the * reader is closed or reset. - * + * * Returns the resulting document tree */ xmlDocPtr |
