summaryrefslogtreecommitdiff
path: root/parser.c
diff options
context:
space:
mode:
Diffstat (limited to 'parser.c')
-rw-r--r--parser.c1205
1 files changed, 805 insertions, 400 deletions
diff --git a/parser.c b/parser.c
index 2c38fae..192eaed 100644
--- a/parser.c
+++ b/parser.c
@@ -17,7 +17,7 @@
* parserInternals.c to reduce this file size.
* As much as possible the functions are associated with their relative
* production in the XML specification. A few productions defining the
- * different ranges of character are actually implanted either in
+ * different ranges of character are actually implanted either in
* parserInternals.h or parserInternals.c
* The DOM tree build is realized from the default SAX callbacks in
* the module SAX.c.
@@ -40,6 +40,7 @@
#endif
#include <stdlib.h>
+#include <limits.h>
#include <string.h>
#include <stdarg.h>
#include <libxml/xmlmemory.h>
@@ -83,6 +84,9 @@
#include <lzma.h>
#endif
+#include "buf.h"
+#include "enc.h"
+
static void
xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
@@ -117,10 +121,10 @@ xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
* parser option.
*/
static int
-xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long size,
+xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size,
xmlEntityPtr ent)
{
- unsigned long consumed = 0;
+ size_t consumed = 0;
if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
return (0);
@@ -194,6 +198,17 @@ unsigned int xmlParserMaxDepth = 256;
#define XML_PARSER_BUFFER_SIZE 100
#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
+/**
+ * XML_PARSER_CHUNK_SIZE
+ *
+ * When calling GROW that's the minimal amount of data
+ * the parser expected to have received. It is not a hard
+ * limit but an optimization when reading strings like Names
+ * It is not strictly needed as long as inputs available characters
+ * are followed by 0, which should be provided by the I/O level
+ */
+#define XML_PARSER_CHUNK_SIZE 100
+
/*
* List of XML prefixed PI allowed by W3C specs
*/
@@ -233,7 +248,7 @@ xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
/************************************************************************
* *
- * Some factorized error routines *
+ * Some factorized error routines *
* *
************************************************************************/
@@ -285,193 +300,201 @@ static void
xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
{
const char *errmsg;
+ char errstr[129] = "";
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
(ctxt->instate == XML_PARSER_EOF))
return;
switch (error) {
case XML_ERR_INVALID_HEX_CHARREF:
- errmsg = "CharRef: invalid hexadecimal value\n";
+ errmsg = "CharRef: invalid hexadecimal value";
break;
case XML_ERR_INVALID_DEC_CHARREF:
- errmsg = "CharRef: invalid decimal value\n";
+ errmsg = "CharRef: invalid decimal value";
break;
case XML_ERR_INVALID_CHARREF:
- errmsg = "CharRef: invalid value\n";
+ errmsg = "CharRef: invalid value";
break;
case XML_ERR_INTERNAL_ERROR:
errmsg = "internal error";
break;
case XML_ERR_PEREF_AT_EOF:
- errmsg = "PEReference at end of document\n";
+ errmsg = "PEReference at end of document";
break;
case XML_ERR_PEREF_IN_PROLOG:
- errmsg = "PEReference in prolog\n";
+ errmsg = "PEReference in prolog";
break;
case XML_ERR_PEREF_IN_EPILOG:
- errmsg = "PEReference in epilog\n";
+ errmsg = "PEReference in epilog";
break;
case XML_ERR_PEREF_NO_NAME:
- errmsg = "PEReference: no name\n";
+ errmsg = "PEReference: no name";
break;
case XML_ERR_PEREF_SEMICOL_MISSING:
- errmsg = "PEReference: expecting ';'\n";
+ errmsg = "PEReference: expecting ';'";
break;
case XML_ERR_ENTITY_LOOP:
- errmsg = "Detected an entity reference loop\n";
+ errmsg = "Detected an entity reference loop";
break;
case XML_ERR_ENTITY_NOT_STARTED:
- errmsg = "EntityValue: \" or ' expected\n";
+ errmsg = "EntityValue: \" or ' expected";
break;
case XML_ERR_ENTITY_PE_INTERNAL:
- errmsg = "PEReferences forbidden in internal subset\n";
+ errmsg = "PEReferences forbidden in internal subset";
break;
case XML_ERR_ENTITY_NOT_FINISHED:
- errmsg = "EntityValue: \" or ' expected\n";
+ errmsg = "EntityValue: \" or ' expected";
break;
case XML_ERR_ATTRIBUTE_NOT_STARTED:
- errmsg = "AttValue: \" or ' expected\n";
+ errmsg = "AttValue: \" or ' expected";
break;
case XML_ERR_LT_IN_ATTRIBUTE:
- errmsg = "Unescaped '<' not allowed in attributes values\n";
+ errmsg = "Unescaped '<' not allowed in attributes values";
break;
case XML_ERR_LITERAL_NOT_STARTED:
- errmsg = "SystemLiteral \" or ' expected\n";
+ errmsg = "SystemLiteral \" or ' expected";
break;
case XML_ERR_LITERAL_NOT_FINISHED:
- errmsg = "Unfinished System or Public ID \" or ' expected\n";
+ errmsg = "Unfinished System or Public ID \" or ' expected";
break;
case XML_ERR_MISPLACED_CDATA_END:
- errmsg = "Sequence ']]>' not allowed in content\n";
+ errmsg = "Sequence ']]>' not allowed in content";
break;
case XML_ERR_URI_REQUIRED:
- errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
+ errmsg = "SYSTEM or PUBLIC, the URI is missing";
break;
case XML_ERR_PUBID_REQUIRED:
- errmsg = "PUBLIC, the Public Identifier is missing\n";
+ errmsg = "PUBLIC, the Public Identifier is missing";
break;
case XML_ERR_HYPHEN_IN_COMMENT:
- errmsg = "Comment must not contain '--' (double-hyphen)\n";
+ errmsg = "Comment must not contain '--' (double-hyphen)";
break;
case XML_ERR_PI_NOT_STARTED:
- errmsg = "xmlParsePI : no target name\n";
+ errmsg = "xmlParsePI : no target name";
break;
case XML_ERR_RESERVED_XML_NAME:
- errmsg = "Invalid PI name\n";
+ errmsg = "Invalid PI name";
break;
case XML_ERR_NOTATION_NOT_STARTED:
- errmsg = "NOTATION: Name expected here\n";
+ errmsg = "NOTATION: Name expected here";
break;
case XML_ERR_NOTATION_NOT_FINISHED:
- errmsg = "'>' required to close NOTATION declaration\n";
+ errmsg = "'>' required to close NOTATION declaration";
break;
case XML_ERR_VALUE_REQUIRED:
- errmsg = "Entity value required\n";
+ errmsg = "Entity value required";
break;
case XML_ERR_URI_FRAGMENT:
errmsg = "Fragment not allowed";
break;
case XML_ERR_ATTLIST_NOT_STARTED:
- errmsg = "'(' required to start ATTLIST enumeration\n";
+ errmsg = "'(' required to start ATTLIST enumeration";
break;
case XML_ERR_NMTOKEN_REQUIRED:
- errmsg = "NmToken expected in ATTLIST enumeration\n";
+ errmsg = "NmToken expected in ATTLIST enumeration";
break;
case XML_ERR_ATTLIST_NOT_FINISHED:
- errmsg = "')' required to finish ATTLIST enumeration\n";
+ errmsg = "')' required to finish ATTLIST enumeration";
break;
case XML_ERR_MIXED_NOT_STARTED:
- errmsg = "MixedContentDecl : '|' or ')*' expected\n";
+ errmsg = "MixedContentDecl : '|' or ')*' expected";
break;
case XML_ERR_PCDATA_REQUIRED:
- errmsg = "MixedContentDecl : '#PCDATA' expected\n";
+ errmsg = "MixedContentDecl : '#PCDATA' expected";
break;
case XML_ERR_ELEMCONTENT_NOT_STARTED:
- errmsg = "ContentDecl : Name or '(' expected\n";
+ errmsg = "ContentDecl : Name or '(' expected";
break;
case XML_ERR_ELEMCONTENT_NOT_FINISHED:
- errmsg = "ContentDecl : ',' '|' or ')' expected\n";
+ errmsg = "ContentDecl : ',' '|' or ')' expected";
break;
case XML_ERR_PEREF_IN_INT_SUBSET:
errmsg =
- "PEReference: forbidden within markup decl in internal subset\n";
+ "PEReference: forbidden within markup decl in internal subset";
break;
case XML_ERR_GT_REQUIRED:
- errmsg = "expected '>'\n";
+ errmsg = "expected '>'";
break;
case XML_ERR_CONDSEC_INVALID:
- errmsg = "XML conditional section '[' expected\n";
+ errmsg = "XML conditional section '[' expected";
break;
case XML_ERR_EXT_SUBSET_NOT_FINISHED:
- errmsg = "Content error in the external subset\n";
+ errmsg = "Content error in the external subset";
break;
case XML_ERR_CONDSEC_INVALID_KEYWORD:
errmsg =
- "conditional section INCLUDE or IGNORE keyword expected\n";
+ "conditional section INCLUDE or IGNORE keyword expected";
break;
case XML_ERR_CONDSEC_NOT_FINISHED:
- errmsg = "XML conditional section not closed\n";
+ errmsg = "XML conditional section not closed";
break;
case XML_ERR_XMLDECL_NOT_STARTED:
- errmsg = "Text declaration '<?xml' required\n";
+ errmsg = "Text declaration '<?xml' required";
break;
case XML_ERR_XMLDECL_NOT_FINISHED:
- errmsg = "parsing XML declaration: '?>' expected\n";
+ errmsg = "parsing XML declaration: '?>' expected";
break;
case XML_ERR_EXT_ENTITY_STANDALONE:
- errmsg = "external parsed entities cannot be standalone\n";
+ errmsg = "external parsed entities cannot be standalone";
break;
case XML_ERR_ENTITYREF_SEMICOL_MISSING:
- errmsg = "EntityRef: expecting ';'\n";
+ errmsg = "EntityRef: expecting ';'";
break;
case XML_ERR_DOCTYPE_NOT_FINISHED:
- errmsg = "DOCTYPE improperly terminated\n";
+ errmsg = "DOCTYPE improperly terminated";
break;
case XML_ERR_LTSLASH_REQUIRED:
- errmsg = "EndTag: '</' not found\n";
+ errmsg = "EndTag: '</' not found";
break;
case XML_ERR_EQUAL_REQUIRED:
- errmsg = "expected '='\n";
+ errmsg = "expected '='";
break;
case XML_ERR_STRING_NOT_CLOSED:
- errmsg = "String not closed expecting \" or '\n";
+ errmsg = "String not closed expecting \" or '";
break;
case XML_ERR_STRING_NOT_STARTED:
- errmsg = "String not started expecting ' or \"\n";
+ errmsg = "String not started expecting ' or \"";
break;
case XML_ERR_ENCODING_NAME:
- errmsg = "Invalid XML encoding name\n";
+ errmsg = "Invalid XML encoding name";
break;
case XML_ERR_STANDALONE_VALUE:
- errmsg = "standalone accepts only 'yes' or 'no'\n";
+ errmsg = "standalone accepts only 'yes' or 'no'";
break;
case XML_ERR_DOCUMENT_EMPTY:
- errmsg = "Document is empty\n";
+ errmsg = "Document is empty";
break;
case XML_ERR_DOCUMENT_END:
- errmsg = "Extra content at the end of the document\n";
+ errmsg = "Extra content at the end of the document";
break;
case XML_ERR_NOT_WELL_BALANCED:
- errmsg = "chunk is not well balanced\n";
+ errmsg = "chunk is not well balanced";
break;
case XML_ERR_EXTRA_CONTENT:
- errmsg = "extra content at the end of well balanced chunk\n";
+ errmsg = "extra content at the end of well balanced chunk";
break;
case XML_ERR_VERSION_MISSING:
- errmsg = "Malformed declaration expecting version\n";
+ errmsg = "Malformed declaration expecting version";
+ break;
+ case XML_ERR_NAME_TOO_LONG:
+ errmsg = "Name too long use XML_PARSE_HUGE option";
break;
#if 0
case:
- errmsg = "\n";
+ errmsg = "";
break;
#endif
default:
- errmsg = "Unregistered error message\n";
+ errmsg = "Unregistered error message";
}
+ if (info == NULL)
+ snprintf(errstr, 128, "%s\n", errmsg);
+ else
+ snprintf(errstr, 128, "%s: %%s\n", errmsg);
if (ctxt != NULL)
ctxt->errNo = error;
__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
- XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
+ XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, &errstr[0],
info);
if (ctxt != NULL) {
ctxt->wellFormed = 0;
@@ -626,7 +649,7 @@ xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
*/
static void
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
- const char *msg, const xmlChar *str1, int val,
+ const char *msg, const xmlChar *str1, int val,
const xmlChar *str2)
{
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
@@ -754,7 +777,7 @@ xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
/************************************************************************
* *
- * Library wide options *
+ * Library wide options *
* *
************************************************************************/
@@ -978,7 +1001,7 @@ xmlHasFeature(xmlFeature feature)
/************************************************************************
* *
- * SAX2 defaulted attributes handling *
+ * SAX2 defaulted attributes handling *
* *
************************************************************************/
@@ -1002,8 +1025,8 @@ xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
- if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
- (ctxt->str_xml_ns == NULL)) {
+ if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
+ (ctxt->str_xml_ns == NULL)) {
xmlErrMemory(ctxt, NULL);
}
}
@@ -1922,7 +1945,7 @@ static int spacePop(xmlParserCtxtPtr ctxt) {
* to compare on ASCII based substring.
* SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
* strings without newlines within the parser.
- * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
+ * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
* defined char within the parser.
* Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
*
@@ -1971,10 +1994,10 @@ static int spacePop(xmlParserCtxtPtr ctxt) {
#define SKIPL(val) do { \
int skipl; \
for(skipl=0; skipl<val; skipl++) { \
- if (*(ctxt->input->cur) == '\n') { \
+ if (*(ctxt->input->cur) == '\n') { \
ctxt->input->line++; ctxt->input->col = 1; \
- } else ctxt->input->col++; \
- ctxt->nbChars++; \
+ } else ctxt->input->col++; \
+ ctxt->nbChars++; \
ctxt->input->cur++; \
} \
if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
@@ -2000,6 +2023,12 @@ static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
xmlGROW (ctxt);
static void xmlGROW (xmlParserCtxtPtr ctxt) {
+ if ((((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
+ ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
+ ((ctxt->options & XML_PARSE_HUGE) == 0)) {
+ xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
+ ctxt->instate = XML_PARSER_EOF;
+ }
xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0) &&
(xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
@@ -2144,6 +2173,8 @@ xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
"Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
}
ret = inputPush(ctxt, input);
+ if (ctxt->instate == XML_PARSER_EOF)
+ return(-1);
GROW;
return(ret);
}
@@ -2159,7 +2190,7 @@ xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
*
* [ WFC: Legal Character ]
* Characters referred to using character references must match the
- * production for Char.
+ * production for Char.
*
* Returns the value parsed (as an int), 0 in case of error
*/
@@ -2180,8 +2211,10 @@ xmlParseCharRef(xmlParserCtxtPtr ctxt) {
if (count++ > 20) {
count = 0;
GROW;
+ if (ctxt->instate == XML_PARSER_EOF)
+ return(0);
}
- if ((RAW >= '0') && (RAW <= '9'))
+ if ((RAW >= '0') && (RAW <= '9'))
val = val * 16 + (CUR - '0');
else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
val = val * 16 + (CUR - 'a') + 10;
@@ -2211,8 +2244,10 @@ xmlParseCharRef(xmlParserCtxtPtr ctxt) {
if (count++ > 20) {
count = 0;
GROW;
+ if (ctxt->instate == XML_PARSER_EOF)
+ return(0);
}
- if ((RAW >= '0') && (RAW <= '9'))
+ if ((RAW >= '0') && (RAW <= '9'))
val = val * 10 + (CUR - '0');
else {
xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
@@ -2238,7 +2273,7 @@ xmlParseCharRef(xmlParserCtxtPtr ctxt) {
/*
* [ WFC: Legal Character ]
* Characters referred to using character references must match the
- * production for Char.
+ * production for Char.
*/
if ((IS_CHAR(val) && (outofrange == 0))) {
return(val);
@@ -2263,7 +2298,7 @@ xmlParseCharRef(xmlParserCtxtPtr ctxt) {
*
* [ WFC: Legal Character ]
* Characters referred to using character references must match the
- * production for Char.
+ * production for Char.
*
* Returns the value parsed (as an int), 0 in case of error, str will be
* updated to the current value of the index
@@ -2282,7 +2317,7 @@ xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
ptr += 3;
cur = *ptr;
while (cur != ';') { /* Non input consuming loop */
- if ((cur >= '0') && (cur <= '9'))
+ if ((cur >= '0') && (cur <= '9'))
val = val * 16 + (cur - '0');
else if ((cur >= 'a') && (cur <= 'f'))
val = val * 16 + (cur - 'a') + 10;
@@ -2305,7 +2340,7 @@ xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
ptr += 2;
cur = *ptr;
while (cur != ';') { /* Non input consuming loops */
- if ((cur >= '0') && (cur <= '9'))
+ if ((cur >= '0') && (cur <= '9'))
val = val * 10 + (cur - '0');
else {
xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
@@ -2329,7 +2364,7 @@ xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
/*
* [ WFC: Legal Character ]
* Characters referred to using character references must match the
- * production for Char.
+ * production for Char.
*/
if ((IS_CHAR(val) && (outofrange == 0))) {
return(val);
@@ -2351,9 +2386,9 @@ xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
*
* Returns the new input stream or NULL
*/
-
+
static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
-
+
static xmlParserInputPtr
xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
xmlParserInputPtr input;
@@ -2376,7 +2411,7 @@ xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
if (buffer == NULL) {
xmlErrMemory(ctxt, NULL);
xmlFree(input);
- return(NULL);
+ return(NULL);
}
buffer [0] = ' ';
buffer [1] = '%';
@@ -2395,12 +2430,12 @@ xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
/**
* xmlParserHandlePEReference:
* @ctxt: the parser context
- *
+ *
* [69] PEReference ::= '%' Name ';'
*
* [ WFC: No Recursion ]
* A parsed entity must not contain a recursive
- * reference to itself, either directly or indirectly.
+ * reference to itself, either directly or indirectly.
*
* [ WFC: Entity Declared ]
* In a document without any DTD, a document with only an internal DTD
@@ -2418,9 +2453,9 @@ xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
* NOTE: misleading but this is handled.
*
* A PEReference may have been detected in the current input stream
- * the handling is done accordingly to
+ * the handling is done accordingly to
* http://www.w3.org/TR/REC-xml#entproc
- * i.e.
+ * i.e.
* - Included in literal in entity values
* - Included as Parameter Entity reference within DTDs
*/
@@ -2498,7 +2533,7 @@ xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
if (entity == NULL) {
-
+
/*
* [ WFC: Entity Declared ]
* In a document without any DTD, a document with only an
@@ -2524,7 +2559,7 @@ xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
"PEReference: %%%s; not found\n",
name, NULL);
- } else
+ } else
xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
"PEReference: %%%s; not found\n",
name, NULL);
@@ -2549,7 +2584,7 @@ xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
if (xmlPushInput(ctxt, input) < 0)
return;
- /*
+ /*
* Get the 4 first bytes and decode the charset
* if enc != XML_CHAR_ENCODING_NONE
* plug some encoding conversion routines.
@@ -2559,6 +2594,8 @@ xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
* the amount of data in the buffer.
*/
GROW
+ if (ctxt->instate == XML_PARSER_EOF)
+ return;
if ((ctxt->input->end - ctxt->input->cur)>=4) {
start[0] = RAW;
start[1] = NXT(1);
@@ -2589,15 +2626,17 @@ xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
/*
* Macro used to grow the current buffer.
+ * buffer##_size is expected to be a size_t
+ * mem_error: is expected to handle memory allocation failures
*/
#define growBuffer(buffer, n) { \
xmlChar *tmp; \
- buffer##_size *= 2; \
- buffer##_size += n; \
- tmp = (xmlChar *) \
- xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
+ size_t new_size = buffer##_size * 2 + n; \
+ if (new_size < buffer##_size) goto mem_error; \
+ tmp = (xmlChar *) xmlRealloc(buffer, new_size); \
if (tmp == NULL) goto mem_error; \
buffer = tmp; \
+ buffer##_size = new_size; \
}
/**
@@ -2609,7 +2648,7 @@ xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
* @end: an end marker xmlChar, 0 if none
* @end2: an end marker xmlChar, 0 if none
* @end3: an end marker xmlChar, 0 if none
- *
+ *
* Takes a entity string content and process to do the adequate substitutions.
*
* [67] Reference ::= EntityRef | CharRef
@@ -2623,14 +2662,14 @@ xmlChar *
xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
int what, xmlChar end, xmlChar end2, xmlChar end3) {
xmlChar *buffer = NULL;
- int buffer_size = 0;
+ size_t buffer_size = 0;
+ size_t nbchars = 0;
xmlChar *current = NULL;
xmlChar *rep = NULL;
const xmlChar *last;
xmlEntityPtr ent;
int c,l;
- int nbchars = 0;
if ((ctxt == NULL) || (str == NULL) || (len < 0))
return(NULL);
@@ -2647,7 +2686,7 @@ xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
* allocate a translation buffer.
*/
buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
- buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
+ buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
if (buffer == NULL) goto mem_error;
/*
@@ -2667,7 +2706,7 @@ xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
if (val != 0) {
COPY_BUF(0,buffer,nbchars,val);
}
- if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
+ if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
}
} else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
@@ -2685,7 +2724,7 @@ xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
(ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
if (ent->content != NULL) {
COPY_BUF(0,buffer,nbchars,ent->content[0]);
- if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
+ if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
}
} else {
@@ -2702,8 +2741,7 @@ xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
current = rep;
while (*current != 0) { /* non input consuming loop */
buffer[nbchars++] = *current++;
- if (nbchars >
- buffer_size - XML_PARSER_BUFFER_SIZE) {
+ if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
if (xmlParserEntityCheck(ctxt, nbchars, ent))
goto int_error;
growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
@@ -2717,7 +2755,7 @@ xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
const xmlChar *cur = ent->name;
buffer[nbchars++] = '&';
- if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
+ if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
}
for (;i > 0;i--)
@@ -2745,8 +2783,7 @@ xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
current = rep;
while (*current != 0) { /* non input consuming loop */
buffer[nbchars++] = *current++;
- if (nbchars >
- buffer_size - XML_PARSER_BUFFER_SIZE) {
+ if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
if (xmlParserEntityCheck(ctxt, nbchars, ent))
goto int_error;
growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
@@ -2759,8 +2796,8 @@ xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
} else {
COPY_BUF(l,buffer,nbchars,c);
str += l;
- if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
- growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
+ if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
+ growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
}
}
if (str < last)
@@ -2789,7 +2826,7 @@ int_error:
* @end: an end marker xmlChar, 0 if none
* @end2: an end marker xmlChar, 0 if none
* @end3: an end marker xmlChar, 0 if none
- *
+ *
* Takes a entity string content and process to do the adequate substitutions.
*
* [67] Reference ::= EntityRef | CharRef
@@ -3152,7 +3189,7 @@ xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
} else {
if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
(c == '.') || (c == '-') ||
- (c == '_') || (c == ':') ||
+ (c == '_') || (c == ':') ||
(IS_COMBINING(c)) ||
(IS_EXTENDER(c)))
return(1);
@@ -3177,6 +3214,8 @@ xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
* Handler for more complex cases
*/
GROW;
+ if (ctxt->instate == XML_PARSER_EOF)
+ return(NULL);
c = CUR_CHAR(l);
if ((ctxt->options & XML_PARSE_OLD10) == 0) {
/*
@@ -3225,9 +3264,11 @@ xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
((c >= 0xFDF0) && (c <= 0xFFFD)) ||
((c >= 0x10000) && (c <= 0xEFFFF))
)) {
- if (count++ > 100) {
+ if (count++ > XML_PARSER_CHUNK_SIZE) {
count = 0;
GROW;
+ if (ctxt->instate == XML_PARSER_EOF)
+ return(NULL);
}
len += l;
NEXTL(l);
@@ -3246,18 +3287,32 @@ xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
((IS_LETTER(c)) || (IS_DIGIT(c)) ||
(c == '.') || (c == '-') ||
- (c == '_') || (c == ':') ||
+ (c == '_') || (c == ':') ||
(IS_COMBINING(c)) ||
(IS_EXTENDER(c)))) {
- if (count++ > 100) {
+ if (count++ > XML_PARSER_CHUNK_SIZE) {
count = 0;
GROW;
+ if (ctxt->instate == XML_PARSER_EOF)
+ return(NULL);
}
len += l;
NEXTL(l);
c = CUR_CHAR(l);
+ if (c == 0) {
+ count = 0;
+ GROW;
+ if (ctxt->instate == XML_PARSER_EOF)
+ return(NULL);
+ c = CUR_CHAR(l);
+ }
}
}
+ if ((len > XML_MAX_NAME_LENGTH) &&
+ ((ctxt->options & XML_PARSE_HUGE) == 0)) {
+ xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
+ return(NULL);
+ }
if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
@@ -3307,6 +3362,11 @@ xmlParseName(xmlParserCtxtPtr ctxt) {
in++;
if ((*in > 0) && (*in < 0x80)) {
count = in - ctxt->input->cur;
+ if ((count > XML_MAX_NAME_LENGTH) &&
+ ((ctxt->options & XML_PARSE_HUGE) == 0)) {
+ xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
+ return(NULL);
+ }
ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
ctxt->input->cur = in;
ctxt->nbChars += count;
@@ -3342,13 +3402,32 @@ xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
(xmlIsNameChar(ctxt, c) && (c != ':'))) {
- if (count++ > 100) {
+ if (count++ > XML_PARSER_CHUNK_SIZE) {
+ if ((len > XML_MAX_NAME_LENGTH) &&
+ ((ctxt->options & XML_PARSE_HUGE) == 0)) {
+ xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
+ return(NULL);
+ }
count = 0;
GROW;
+ if (ctxt->instate == XML_PARSER_EOF)
+ return(NULL);
}
len += l;
NEXTL(l);
c = CUR_CHAR(l);
+ if (c == 0) {
+ count = 0;
+ GROW;
+ if (ctxt->instate == XML_PARSER_EOF)
+ return(NULL);
+ c = CUR_CHAR(l);
+ }
+ }
+ if ((len > XML_MAX_NAME_LENGTH) &&
+ ((ctxt->options & XML_PARSE_HUGE) == 0)) {
+ xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
+ return(NULL);
}
return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
}
@@ -3394,6 +3473,11 @@ xmlParseNCName(xmlParserCtxtPtr ctxt) {
in++;
if ((*in > 0) && (*in < 0x80)) {
count = in - ctxt->input->cur;
+ if ((count > XML_MAX_NAME_LENGTH) &&
+ ((ctxt->options & XML_PARSE_HUGE) == 0)) {
+ xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
+ return(NULL);
+ }
ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
ctxt->input->cur = in;
ctxt->nbChars += count;
@@ -3425,6 +3509,8 @@ xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
const xmlChar *ret;
GROW;
+ if (ctxt->instate == XML_PARSER_EOF)
+ return(NULL);
in = ctxt->input->cur;
while (*in != 0 && *in == *cmp) {
@@ -3460,7 +3546,7 @@ xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
*
* [6] Names ::= Name (#x20 Name)*
*
- * Returns the Name parsed or NULL. The @str pointer
+ * Returns the Name parsed or NULL. The @str pointer
* is updated to the current location in the string.
*/
@@ -3504,6 +3590,13 @@ xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
while (xmlIsNameChar(ctxt, c)) {
if (len + 10 > max) {
xmlChar *tmp;
+
+ if ((len > XML_MAX_NAME_LENGTH) &&
+ ((ctxt->options & XML_PARSE_HUGE) == 0)) {
+ xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
+ xmlFree(buffer);
+ return(NULL);
+ }
max *= 2;
tmp = (xmlChar *) xmlRealloc(buffer,
max * sizeof(xmlChar));
@@ -3523,6 +3616,11 @@ xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
return(buffer);
}
}
+ if ((len > XML_MAX_NAME_LENGTH) &&
+ ((ctxt->options & XML_PARSE_HUGE) == 0)) {
+ xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
+ return(NULL);
+ }
*str = cur;
return(xmlStrndup(buf, len));
}
@@ -3552,16 +3650,25 @@ xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
#endif
GROW;
+ if (ctxt->instate == XML_PARSER_EOF)
+ return(NULL);
c = CUR_CHAR(l);
while (xmlIsNameChar(ctxt, c)) {
- if (count++ > 100) {
+ if (count++ > XML_PARSER_CHUNK_SIZE) {
count = 0;
GROW;
}
COPY_BUF(l,buf,len,c);
NEXTL(l);
c = CUR_CHAR(l);
+ if (c == 0) {
+ count = 0;
+ GROW;
+ if (ctxt->instate == XML_PARSER_EOF)
+ return(NULL);
+ c = CUR_CHAR(l);
+ }
if (len >= XML_MAX_NAMELEN) {
/*
* Okay someone managed to make a huge token, so he's ready to pay
@@ -3577,13 +3684,23 @@ xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
}
memcpy(buffer, buf, len);
while (xmlIsNameChar(ctxt, c)) {
- if (count++ > 100) {
+ if (count++ > XML_PARSER_CHUNK_SIZE) {
count = 0;
GROW;
+ if (ctxt->instate == XML_PARSER_EOF) {
+ xmlFree(buffer);
+ return(NULL);
+ }
}
if (len + 10 > max) {
xmlChar *tmp;
+ if ((max > XML_MAX_NAME_LENGTH) &&
+ ((ctxt->options & XML_PARSE_HUGE) == 0)) {
+ xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
+ xmlFree(buffer);
+ return(NULL);
+ }
max *= 2;
tmp = (xmlChar *) xmlRealloc(buffer,
max * sizeof(xmlChar));
@@ -3604,6 +3721,11 @@ xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
}
if (len == 0)
return(NULL);
+ if ((len > XML_MAX_NAME_LENGTH) &&
+ ((ctxt->options & XML_PARSE_HUGE) == 0)) {
+ xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
+ return(NULL);
+ }
return(xmlStrndup(buf, len));
}
@@ -3650,6 +3772,10 @@ xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
ctxt->instate = XML_PARSER_ENTITY_VALUE;
input = ctxt->input;
GROW;
+ if (ctxt->instate == XML_PARSER_EOF) {
+ xmlFree(buf);
+ return(NULL);
+ }
NEXT;
c = CUR_CHAR(l);
/*
@@ -3657,12 +3783,12 @@ xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
* When a parameter entity reference appears in a literal entity
* value, ... a single or double quote character in the replacement
* text is always treated as a normal data character and will not
- * terminate the literal.
+ * terminate the literal.
* In practice it means we stop the loop only when back at parsing
* the initial entity and the quote is found
*/
- while ((IS_CHAR(c)) && ((c != stop) || /* checked */
- (ctxt->input != input))) {
+ while (((IS_CHAR(c)) && ((c != stop) || /* checked */
+ (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
if (len + 5 >= size) {
xmlChar *tmp;
@@ -3691,6 +3817,10 @@ xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
}
}
buf[len] = 0;
+ if (ctxt->instate == XML_PARSER_EOF) {
+ xmlFree(buf);
+ return(NULL);
+ }
/*
* Raise problem w.r.t. '&' and '%' being used in non-entities
@@ -3738,12 +3868,12 @@ xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
*/
ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
0, 0, 0);
- if (orig != NULL)
+ if (orig != NULL)
*orig = buf;
else
xmlFree(buf);
}
-
+
return(ret);
}
@@ -3764,8 +3894,8 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
xmlChar limit = 0;
xmlChar *buf = NULL;
xmlChar *rep = NULL;
- int len = 0;
- int buf_size = 0;
+ size_t len = 0;
+ size_t buf_size = 0;
int c, l, in_space = 0;
xmlChar *current = NULL;
xmlEntityPtr ent;
@@ -3787,15 +3917,26 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
* allocate a translation buffer.
*/
buf_size = XML_PARSER_BUFFER_SIZE;
- buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
+ buf = (xmlChar *) xmlMallocAtomic(buf_size);
if (buf == NULL) goto mem_error;
/*
* OK loop until we reach one of the ending char or a size limit.
*/
c = CUR_CHAR(l);
- while ((NXT(0) != limit) && /* checked */
- (IS_CHAR(c)) && (c != '<')) {
+ while (((NXT(0) != limit) && /* checked */
+ (IS_CHAR(c)) && (c != '<')) &&
+ (ctxt->instate != XML_PARSER_EOF)) {
+ /*
+ * Impose a reasonable limit on attribute size, unless XML_PARSE_HUGE
+ * special option is given
+ */
+ if ((len > XML_MAX_TEXT_LENGTH) &&
+ ((ctxt->options & XML_PARSE_HUGE) == 0)) {
+ xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
+ "AttValue lenght too long\n");
+ goto mem_error;
+ }
if (c == 0) break;
if (c == '&') {
in_space = 0;
@@ -3804,7 +3945,7 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
if (val == '&') {
if (ctxt->replaceEntities) {
- if (len > buf_size - 10) {
+ if (len + 10 > buf_size) {
growBuffer(buf, 10);
}
buf[len++] = '&';
@@ -3813,7 +3954,7 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
* The reparsing will be done in xmlStringGetNodeList()
* called by the attribute() function in SAX.c
*/
- if (len > buf_size - 10) {
+ if (len + 10 > buf_size) {
growBuffer(buf, 10);
}
buf[len++] = '&';
@@ -3823,7 +3964,7 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
buf[len++] = ';';
}
} else if (val != 0) {
- if (len > buf_size - 10) {
+ if (len + 10 > buf_size) {
growBuffer(buf, 10);
}
len += xmlCopyChar(0, &buf[len], val);
@@ -3835,7 +3976,7 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
ctxt->nbentities += ent->owner;
if ((ent != NULL) &&
(ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
- if (len > buf_size - 10) {
+ if (len + 10 > buf_size) {
growBuffer(buf, 10);
}
if ((ctxt->replaceEntities == 0) &&
@@ -3848,7 +3989,7 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
} else {
buf[len++] = ent->content[0];
}
- } else if ((ent != NULL) &&
+ } else if ((ent != NULL) &&
(ctxt->replaceEntities != 0)) {
if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
rep = xmlStringDecodeEntities(ctxt, ent->content,
@@ -3863,7 +4004,7 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
current++;
} else
buf[len++] = *current++;
- if (len > buf_size - 10) {
+ if (len + 10 > buf_size) {
growBuffer(buf, 10);
}
}
@@ -3871,7 +4012,7 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
rep = NULL;
}
} else {
- if (len > buf_size - 10) {
+ if (len + 10 > buf_size) {
growBuffer(buf, 10);
}
if (ent->content != NULL)
@@ -3899,7 +4040,7 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
* Just output the reference
*/
buf[len++] = '&';
- while (len > buf_size - i - 10) {
+ while (len + i + 10 > buf_size) {
growBuffer(buf, i + 10);
}
for (;i > 0;i--)
@@ -3912,7 +4053,7 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
if ((len != 0) || (!normalize)) {
if ((!normalize) || (!in_space)) {
COPY_BUF(l,buf,len,0x20);
- while (len > buf_size - 10) {
+ while (len + 10 > buf_size) {
growBuffer(buf, 10);
}
}
@@ -3921,7 +4062,7 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
} else {
in_space = 0;
COPY_BUF(l,buf,len,c);
- if (len > buf_size - 10) {
+ if (len + 10 > buf_size) {
growBuffer(buf, 10);
}
}
@@ -3930,6 +4071,9 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
GROW;
c = CUR_CHAR(l);
}
+ if (ctxt->instate == XML_PARSER_EOF)
+ goto error;
+
if ((in_space) && (normalize)) {
while (buf[len - 1] == 0x20) len--;
}
@@ -3946,11 +4090,23 @@ xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
}
} else
NEXT;
- if (attlen != NULL) *attlen = len;
+
+ /*
+ * There we potentially risk an overflow, don't allow attribute value of
+ * lenght more than INT_MAX it is a very reasonnable assumption !
+ */
+ if (len >= INT_MAX) {
+ xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
+ "AttValue lenght too long\n");
+ goto mem_error;
+ }
+
+ if (attlen != NULL) *attlen = (int) len;
return(buf);
mem_error:
xmlErrMemory(ctxt, NULL);
+error:
if (buf != NULL)
xmlFree(buf);
if (rep != NULL)
@@ -3971,20 +4127,20 @@ mem_error:
*
* 3.3.3 Attribute-Value Normalization:
* Before the value of an attribute is passed to the application or
- * checked for validity, the XML processor must normalize it as follows:
+ * checked for validity, the XML processor must normalize it as follows:
* - a character reference is processed by appending the referenced
* character to the attribute value
* - an entity reference is processed by recursively processing the
- * replacement text of the entity
+ * replacement text of the entity
* - a whitespace character (#x20, #xD, #xA, #x9) is processed by
* appending #x20 to the normalized value, except that only a single
* #x20 is appended for a "#xD#xA" sequence that is part of an external
- * parsed entity or the literal entity value of an internal parsed entity
- * - other characters are processed by appending them to the normalized value
+ * parsed entity or the literal entity value of an internal parsed entity
+ * - other characters are processed by appending them to the normalized value
* If the declared value is not CDATA, then the XML processor must further
* process the normalized attribute value by discarding any leading and
* trailing space (#x20) characters, and by replacing sequences of space
- * (#x20) characters by a single space (#x20) character.
+ * (#x20) characters by a single space (#x20) character.
* All attributes for which no declaration has been read should be treated
* by a non-validating parser as if declared CDATA.
*
@@ -4001,7 +4157,7 @@ xmlParseAttValue(xmlParserCtxtPtr ctxt) {
/**
* xmlParseSystemLiteral:
* @ctxt: an XML parser context
- *
+ *
* parse an XML Literal
*
* [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
@@ -4030,7 +4186,7 @@ xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
return(NULL);
}
-
+
buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
if (buf == NULL) {
xmlErrMemory(ctxt, NULL);
@@ -4042,6 +4198,13 @@ xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
if (len + 5 >= size) {
xmlChar *tmp;
+ if ((size > XML_MAX_NAME_LENGTH) &&
+ ((ctxt->options & XML_PARSE_HUGE) == 0)) {
+ xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
+ xmlFree(buf);
+ ctxt->instate = (xmlParserInputState) state;
+ return(NULL);
+ }
size *= 2;
tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
if (tmp == NULL) {
@@ -4056,6 +4219,10 @@ xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
if (count > 50) {
GROW;
count = 0;
+ if (ctxt->instate == XML_PARSER_EOF) {
+ xmlFree(buf);
+ return(NULL);
+ }
}
COPY_BUF(l,buf,len,cur);
NEXTL(l);
@@ -4119,6 +4286,12 @@ xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
if (len + 1 >= size) {
xmlChar *tmp;
+ if ((size > XML_MAX_NAME_LENGTH) &&
+ ((ctxt->options & XML_PARSE_HUGE) == 0)) {
+ xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
+ xmlFree(buf);
+ return(NULL);
+ }
size *= 2;
tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
if (tmp == NULL) {
@@ -4133,6 +4306,10 @@ xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
if (count > 50) {
GROW;
count = 0;
+ if (ctxt->instate == XML_PARSER_EOF) {
+ xmlFree(buf);
+ return(NULL);
+ }
}
NEXT;
cur = CUR;
@@ -4203,7 +4380,7 @@ static const unsigned char test_char_data[256] = {
* The right angle bracket (>) may be represented using the string "&gt;",
* and must, for compatibility, be escaped using "&gt;" or a character
* reference when it appears in the string "]]>" in content, when that
- * string is not marking the end of a CDATA section.
+ * string is not marking the end of a CDATA section.
*
* [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
*/
@@ -4339,6 +4516,8 @@ get_more:
}
SHRINK;
GROW;
+ if (ctxt->instate == XML_PARSER_EOF)
+ return;
in = ctxt->input->cur;
} while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
nbchar = 0;
@@ -4368,7 +4547,7 @@ xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
GROW;
cur = CUR_CHAR(l);
while ((cur != '<') && /* checked */
- (cur != '&') &&
+ (cur != '&') &&
(IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
if ((cur == ']') && (NXT(1) == ']') &&
(NXT(2) == '>')) {
@@ -4407,6 +4586,8 @@ xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
if (count > 50) {
GROW;
count = 0;
+ if (ctxt->instate == XML_PARSER_EOF)
+ return;
}
NEXTL(l);
cur = CUR_CHAR(l);
@@ -4499,7 +4680,7 @@ xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
}
} else {
/*
- * We handle [83] so we return immediately, if
+ * We handle [83] so we return immediately, if
* "S SystemLiteral" is not detected. From a purely parsing
* point of view that's a nice mess.
*/
@@ -4508,7 +4689,7 @@ xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
ptr = CUR_PTR;
if (!IS_BLANK_CH(*ptr)) return(NULL);
-
+
while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
}
@@ -4536,11 +4717,12 @@ xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
* [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
*/
static void
-xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) {
+xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
+ size_t len, size_t size) {
int q, ql;
int r, rl;
int cur, l;
- int count = 0;
+ size_t count = 0;
int inputid;
inputid = ctxt->input->id;
@@ -4586,16 +4768,26 @@ xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) {
if ((r == '-') && (q == '-')) {
xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
}
+ if ((len > XML_MAX_TEXT_LENGTH) &&
+ ((ctxt->options & XML_PARSE_HUGE) == 0)) {
+ xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
+ "Comment too big found", NULL);
+ xmlFree (buf);
+ return;
+ }
if (len + 5 >= size) {
xmlChar *new_buf;
- size *= 2;
- new_buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
+ size_t new_size;
+
+ new_size = size * 2;
+ new_buf = (xmlChar *) xmlRealloc(buf, new_size);
if (new_buf == NULL) {
xmlFree (buf);
xmlErrMemory(ctxt, NULL);
return;
}
buf = new_buf;
+ size = new_size;
}
COPY_BUF(ql,buf,len,q);
q = r;
@@ -4607,6 +4799,10 @@ xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) {
if (count > 50) {
GROW;
count = 0;
+ if (ctxt->instate == XML_PARSER_EOF) {
+ xmlFree(buf);
+ return;
+ }
}
NEXTL(l);
cur = CUR_CHAR(l);
@@ -4656,11 +4852,12 @@ not_terminated:
void
xmlParseComment(xmlParserCtxtPtr ctxt) {
xmlChar *buf = NULL;
- int size = XML_PARSER_BUFFER_SIZE;
- int len = 0;
+ size_t size = XML_PARSER_BUFFER_SIZE;
+ size_t len = 0;
xmlParserInputState state;
const xmlChar *in;
- int nbchar = 0, ccol;
+ size_t nbchar = 0;
+ int ccol;
int inputid;
/*
@@ -4740,6 +4937,13 @@ get_more:
buf[len] = 0;
}
}
+ if ((len > XML_MAX_TEXT_LENGTH) &&
+ ((ctxt->options & XML_PARSE_HUGE) == 0)) {
+ xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
+ "Comment too big found", NULL);
+ xmlFree (buf);
+ return;
+ }
ctxt->input->cur = in;
if (*in == 0xA) {
in++;
@@ -4757,6 +4961,10 @@ get_more:
}
SHRINK;
GROW;
+ if (ctxt->instate == XML_PARSER_EOF) {
+ xmlFree(buf);
+ return;
+ }
in = ctxt->input->cur;
if (*in == '-') {
if (in[1] == '-') {
@@ -4803,7 +5011,7 @@ get_more:
/**
* xmlParsePITarget:
* @ctxt: an XML parser context
- *
+ *
* parse the name of a PI
*
* [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
@@ -4840,7 +5048,7 @@ xmlParsePITarget(xmlParserCtxtPtr ctxt) {
NULL, NULL);
}
if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
- xmlNsErr(ctxt, XML_NS_ERR_COLON,
+ xmlNsErr(ctxt, XML_NS_ERR_COLON,
"colon are forbidden from PI names '%s'\n", name, NULL, NULL);
}
return(name);
@@ -4851,7 +5059,7 @@ xmlParsePITarget(xmlParserCtxtPtr ctxt) {
* xmlParseCatalogPI:
* @ctxt: an XML parser context
* @catalog: the PI value string
- *
+ *
* parse an XML Catalog Processing Instruction.
*
* <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
@@ -4911,7 +5119,7 @@ error:
/**
* xmlParsePI:
* @ctxt: an XML parser context
- *
+ *
* parse an XML Processing Instruction.
*
* [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
@@ -4922,8 +5130,8 @@ error:
void
xmlParsePI(xmlParserCtxtPtr ctxt) {
xmlChar *buf = NULL;
- int len = 0;
- int size = XML_PARSER_BUFFER_SIZE;
+ size_t len = 0;
+ size_t size = XML_PARSER_BUFFER_SIZE;
int cur, l;
const xmlChar *target;
xmlParserInputState state;
@@ -4980,9 +5188,8 @@ xmlParsePI(xmlParserCtxtPtr ctxt) {
((cur != '?') || (NXT(1) != '>'))) {
if (len + 5 >= size) {
xmlChar *tmp;
-
- size *= 2;
- tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
+ size_t new_size = size * 2;
+ tmp = (xmlChar *) xmlRealloc(buf, new_size);
if (tmp == NULL) {
xmlErrMemory(ctxt, NULL);
xmlFree(buf);
@@ -4990,11 +5197,24 @@ xmlParsePI(xmlParserCtxtPtr ctxt) {
return;
}
buf = tmp;
+ size = new_size;
}
count++;
if (count > 50) {
GROW;
+ if (ctxt->instate == XML_PARSER_EOF) {
+ xmlFree(buf);
+ return;
+ }
count = 0;
+ if ((len > XML_MAX_TEXT_LENGTH) &&
+ ((ctxt->options & XML_PARSE_HUGE) == 0)) {
+ xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
+ "PI %s too big found", target);
+ xmlFree(buf);
+ ctxt->instate = state;
+ return;
+ }
}
COPY_BUF(l,buf,len,cur);
NEXTL(l);
@@ -5005,6 +5225,14 @@ xmlParsePI(xmlParserCtxtPtr ctxt) {
cur = CUR_CHAR(l);
}
}
+ if ((len > XML_MAX_TEXT_LENGTH) &&
+ ((ctxt->options & XML_PARSE_HUGE) == 0)) {
+ xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
+ "PI %s too big found", target);
+ xmlFree(buf);
+ ctxt->instate = state;
+ return;
+ }
buf[len] = 0;
if (cur != '?') {
xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
@@ -5066,7 +5294,7 @@ xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
const xmlChar *name;
xmlChar *Pubid;
xmlChar *Systemid;
-
+
if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
xmlParserInputPtr input = ctxt->input;
SHRINK;
@@ -5089,7 +5317,7 @@ xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
return;
}
if (xmlStrchr(name, ':') != NULL) {
- xmlNsErr(ctxt, XML_NS_ERR_COLON,
+ xmlNsErr(ctxt, XML_NS_ERR_COLON,
"colon are forbidden from notation names '%s'\n",
name, NULL, NULL);
}
@@ -5149,7 +5377,7 @@ xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
int isParameter = 0;
xmlChar *orig = NULL;
int skipped;
-
+
/* GROW; done in the caller */
if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
xmlParserInputPtr input = ctxt->input;
@@ -5178,7 +5406,7 @@ xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
return;
}
if (xmlStrchr(name, ':') != NULL) {
- xmlNsErr(ctxt, XML_NS_ERR_COLON,
+ xmlNsErr(ctxt, XML_NS_ERR_COLON,
"colon are forbidden from entities names '%s'\n",
name, NULL, NULL);
}
@@ -5406,13 +5634,13 @@ xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
*
* [ VC: Fixed Attribute Default ]
* if an attribute has a default value declared with the #FIXED
- * keyword, instances of that attribute must match the default value.
+ * keyword, instances of that attribute must match the default value.
*
* [ WFC: No < in Attribute Values ]
* handled in xmlParseAttValue()
*
* returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
- * or XML_ATTRIBUTE_FIXED.
+ * or XML_ATTRIBUTE_FIXED.
*/
int
@@ -5461,7 +5689,7 @@ xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
*
* [ VC: Notation Attributes ]
* Values of this type must match one of the notation names included
- * in the declaration; all notation names in the declaration must be declared.
+ * in the declaration; all notation names in the declaration must be declared.
*
* Returns: the notation attribute tree built while parsing
*/
@@ -5661,15 +5889,15 @@ xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
* [ VC: Entity Name ]
* Values of type ENTITY must match the Name production, values
* of type ENTITIES must match Names; each Entity Name must match the
- * name of an unparsed entity declared in the DTD.
+ * name of an unparsed entity declared in the DTD.
*
* [ VC: Name Token ]
* Values of type NMTOKEN must match the Nmtoken production; values
- * of type NMTOKENS must match Nmtokens.
+ * of type NMTOKENS must match Nmtokens.
*
* Returns the attribute type
*/
-int
+int
xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
SHRINK;
if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
@@ -5734,7 +5962,7 @@ xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
}
SKIP_BLANKS;
GROW;
- while (RAW != '>') {
+ while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
const xmlChar *check = CUR_PTR;
int type;
int def;
@@ -5812,7 +6040,7 @@ xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
xmlFreeEnumeration(tree);
if ((ctxt->sax2) && (defaultValue != NULL) &&
- (def != XML_ATTRIBUTE_IMPLIED) &&
+ (def != XML_ATTRIBUTE_IMPLIED) &&
(def != XML_ATTRIBUTE_REQUIRED)) {
xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
}
@@ -5841,7 +6069,7 @@ xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
*
* parse the declaration for a Mixed Element content
* The leading '(' and spaces have been skipped in xmlParseElementContentDecl
- *
+ *
* [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
* '(' S? '#PCDATA' S? ')'
*
@@ -5849,7 +6077,7 @@ xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
*
* [ VC: No Duplicate Types ]
* The same name must not appear more than once in a single
- * mixed-content declaration.
+ * mixed-content declaration.
*
* returns: the list of the xmlElementContentPtr describing the element choices
*/
@@ -5883,7 +6111,7 @@ xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
if (ret == NULL) return(NULL);
}
- while (RAW == '|') {
+ while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
NEXT;
if (elem == NULL) {
ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
@@ -5949,7 +6177,7 @@ xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
*
* parse the declaration for a Mixed Element content
* The leading '(' and spaces have been skipped in xmlParseElementContentDecl
- *
+ *
*
* [47] children ::= (choice | seq) ('?' | '*' | '+')?
*
@@ -5970,7 +6198,7 @@ xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
* be empty, and neither the first nor last non-blank character of
* the replacement text should be a connector (| or ,).
*
- * Returns the tree of xmlElementContentPtr describing the element
+ * Returns the tree of xmlElementContentPtr describing the element
* hierarchy.
*/
static xmlElementContentPtr
@@ -6027,7 +6255,7 @@ xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
}
SKIP_BLANKS;
SHRINK;
- while (RAW != ')') {
+ while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
/*
* Each loop we parse one separator and one element.
*/
@@ -6283,7 +6511,7 @@ xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
*
* parse the declaration for an Element content either Mixed or Children,
* the cases EMPTY and ANY are handled directly in xmlParseElementDecl
- *
+ *
* [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
*
* returns: the type of element content XML_ELEMENT_TYPE_xxx
@@ -6306,6 +6534,8 @@ xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
}
NEXT;
GROW;
+ if (ctxt->instate == XML_PARSER_EOF)
+ return(-1);
SKIP_BLANKS;
if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
tree = xmlParseElementMixedContentDecl(ctxt, inputid);
@@ -6409,7 +6639,7 @@ xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
"Element declaration doesn't start and stop in the same entity\n");
}
-
+
NEXT;
if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
(ctxt->sax->elementDecl != NULL)) {
@@ -6421,7 +6651,7 @@ xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
/*
* this is a trick: if xmlAddElementDecl is called,
* instead of copying the full tree it is plugged directly
- * if called from the parser. Avoid duplicating the
+ * if called from the parser. Avoid duplicating the
* interfaces or change the API/ABI
*/
xmlFreeDocElementContent(ctxt->myDoc, content);
@@ -6438,8 +6668,8 @@ xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
* xmlParseConditionalSections
* @ctxt: an XML parser context
*
- * [61] conditionalSect ::= includeSect | ignoreSect
- * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
+ * [61] conditionalSect ::= includeSect | ignoreSect
+ * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
* [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
* [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
* [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
@@ -6473,8 +6703,8 @@ xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
"Entering INCLUDE Conditional Section\n");
}
- while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
- (NXT(2) != '>'))) {
+ while (((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
+ (NXT(2) != '>'))) && (ctxt->instate != XML_PARSER_EOF)) {
const xmlChar *check = CUR_PTR;
unsigned int cons = ctxt->input->consumed;
@@ -6542,7 +6772,8 @@ xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
if (ctxt->recovery == 0) ctxt->disableSAX = 1;
ctxt->instate = XML_PARSER_IGNORE;
- while ((depth >= 0) && (RAW != 0)) {
+ while (((depth >= 0) && (RAW != 0)) &&
+ (ctxt->instate != XML_PARSER_EOF)) {
if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
depth++;
SKIP(3);
@@ -6590,7 +6821,7 @@ xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
/**
* xmlParseMarkupDecl:
* @ctxt: an XML parser context
- *
+ *
* parse Markup declarations
*
* [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
@@ -6607,7 +6838,7 @@ xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
* In the internal DTD subset, parameter-entity references can occur
* only where markup declarations can occur, not within markup declarations.
* (This does not apply to references that occur in external parameter
- * entities or to the external subset.)
+ * entities or to the external subset.)
*/
void
xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
@@ -6736,7 +6967,7 @@ xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
* @ctxt: an XML parser context
* @ExternalID: the external identifier
* @SystemID: the system identifier (or URL)
- *
+ *
* parse Markup declarations from an external subset
*
* [30] extSubset ::= textDecl? extSubsetDecl
@@ -6813,7 +7044,7 @@ xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
break;
}
}
-
+
if (RAW != 0) {
xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
}
@@ -6915,8 +7146,15 @@ xmlParseReference(xmlParserCtxtPtr ctxt) {
* The first reference to the entity trigger a parsing phase
* where the ent->children is filled with the result from
* the parsing.
- */
- if (ent->checked == 0) {
+ * Note: external parsed entities will not be loaded, it is not
+ * required for a non-validating parser, unless the parsing option
+ * of validating, or substituting entities were given. Doing so is
+ * far more secure as the parser will only process data coming from
+ * the document entity by default.
+ */
+ if ((ent->checked == 0) &&
+ ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
+ (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
unsigned long oldnbent = ctxt->nbentities;
/*
@@ -7116,8 +7354,6 @@ xmlParseReference(xmlParserCtxtPtr ctxt) {
* Seems we are generating the DOM content, do
* a simple tree copy for all references except the first
* In the first occurrence list contains the replacement.
- * progressive == 2 means we are operating on the Reader
- * and since nodes are discarded we must copy all the time.
*/
if (((list == NULL) && (ent->owner == 0)) ||
(ctxt->parseMode == XML_PARSE_READER)) {
@@ -7160,7 +7396,7 @@ xmlParseReference(xmlParserCtxtPtr ctxt) {
if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
xmlAddEntityReference(ent, firstChild, nw);
#endif /* LIBXML_LEGACY_ENABLED */
- } else if (list == NULL) {
+ } else if ((list == NULL) || (ctxt->inputNr > 0)) {
xmlNodePtr nw = NULL, cur, next, last,
firstChild = NULL;
/*
@@ -7260,6 +7496,8 @@ xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
xmlEntityPtr ent = NULL;
GROW;
+ if (ctxt->instate == XML_PARSER_EOF)
+ return(NULL);
if (RAW != '&')
return(NULL);
@@ -7297,7 +7535,7 @@ xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
if (ctxt->sax != NULL) {
if (ctxt->sax->getEntity != NULL)
ent = ctxt->sax->getEntity(ctxt->userData, name);
- if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
+ if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
(ctxt->options & XML_PARSE_OLDSAX))
ent = xmlGetPredefinedEntity(name);
if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
@@ -7368,7 +7606,7 @@ xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
* [ WFC: No < in Attribute Values ]
* The replacement text of any entity referred to directly or
* indirectly in an attribute value (other than "&lt;") must
- * not contain a <.
+ * not contain a <.
*/
else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
(ent != NULL) && (ent->content != NULL) &&
@@ -7397,7 +7635,7 @@ xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
/*
* [ WFC: No Recursion ]
* A parsed entity must not contain a recursive reference
- * to itself, either directly or indirectly.
+ * to itself, either directly or indirectly.
* Done somewhere else
*/
return(ent);
@@ -7515,7 +7753,7 @@ xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
* is not obligated to read and process their declarations;
* for such documents, the rule that an entity must be
* declared is a well-formedness constraint only if
- * standalone='yes'.
+ * standalone='yes'.
*/
if (ent == NULL) {
if ((ctxt->standalone == 1) ||
@@ -7606,7 +7844,7 @@ xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
*
* [ WFC: No Recursion ]
* A parsed entity must not contain a recursive
- * reference to itself, either directly or indirectly.
+ * reference to itself, either directly or indirectly.
*
* [ WFC: Entity Declared ]
* In a document without any DTD, a document with only an internal DTD
@@ -7787,12 +8025,25 @@ xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
(IS_CHAR(c))) {
xmlBufferAdd(buf, ctxt->input->cur, l);
- if (count++ > 100) {
+ if (count++ > XML_PARSER_CHUNK_SIZE) {
count = 0;
GROW;
+ if (ctxt->instate == XML_PARSER_EOF) {
+ xmlBufferFree(buf);
+ return(-1);
+ }
}
NEXTL(l);
c = CUR_CHAR(l);
+ if (c == 0) {
+ count = 0;
+ GROW;
+ if (ctxt->instate == XML_PARSER_EOF) {
+ xmlBufferFree(buf);
+ return(-1);
+ }
+ c = CUR_CHAR(l);
+ }
}
if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
@@ -7932,12 +8183,12 @@ xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
*
* parse a DOCTYPE declaration
*
- * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
+ * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
* ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
*
* [ VC: Root Element Type ]
* The Name in the document type declaration must match the element
- * type of the root element.
+ * type of the root element.
*/
void
@@ -8019,11 +8270,11 @@ xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
ctxt->instate = XML_PARSER_DTD;
NEXT;
/*
- * Parse the succession of Markup declarations and
+ * Parse the succession of Markup declarations and
* PEReferences.
* Subsequence (markupdecl | PEReference | S)*
*/
- while (RAW != ']') {
+ while ((RAW != ']') && (ctxt->instate != XML_PARSER_EOF)) {
const xmlChar *check = CUR_PTR;
unsigned int cons = ctxt->input->consumed;
@@ -8043,7 +8294,7 @@ xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
break;
}
}
- if (RAW == ']') {
+ if (RAW == ']') {
NEXT;
SKIP_BLANKS;
}
@@ -8074,8 +8325,8 @@ xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
*
* [ WFC: No < in Attribute Values ]
* The replacement text of any entity referred to directly or indirectly in
- * an attribute value (other than "&lt;") must not contain a <.
- *
+ * an attribute value (other than "&lt;") must not contain a <.
+ *
* [ VC: Attribute Value Type ]
* The attribute must have been declared; the value must be of the type
* declared for it.
@@ -8156,7 +8407,7 @@ xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
/**
* xmlParseStartTag:
* @ctxt: an XML parser context
- *
+ *
* parse a start of tag either for rule element or
* EmptyElement. In both case we don't parse the tag closing chars.
*
@@ -8164,13 +8415,13 @@ xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
*
* [ WFC: Unique Att Spec ]
* No attribute name may appear more than once in the same start-tag or
- * empty-element tag.
+ * empty-element tag.
*
* [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
*
* [ WFC: Unique Att Spec ]
* No attribute name may appear more than once in the same start-tag or
- * empty-element tag.
+ * empty-element tag.
*
* With namespace:
*
@@ -8209,9 +8460,9 @@ xmlParseStartTag(xmlParserCtxtPtr ctxt) {
SKIP_BLANKS;
GROW;
- while ((RAW != '>') &&
+ while (((RAW != '>') &&
((RAW != '/') || (NXT(1) != '>')) &&
- (IS_BYTE_CHAR(RAW))) {
+ (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
const xmlChar *q = CUR_PTR;
unsigned int cons = ctxt->input->consumed;
@@ -8220,7 +8471,7 @@ xmlParseStartTag(xmlParserCtxtPtr ctxt) {
/*
* [ WFC: Unique Att Spec ]
* No attribute name may appear more than once in the same
- * start-tag or empty-element tag.
+ * start-tag or empty-element tag.
*/
for (i = 0; i < nbatts;i += 2) {
if (xmlStrEqual(atts[i], attname)) {
@@ -8269,7 +8520,7 @@ xmlParseStartTag(xmlParserCtxtPtr ctxt) {
xmlFree(attvalue);
}
-failed:
+failed:
GROW
if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
@@ -8351,7 +8602,7 @@ xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
/*
* [ WFC: Element Type Match ]
* The Name in an element's end-tag must match the element type in the
- * start-tag.
+ * start-tag.
*
*/
if (name != (xmlChar*)1) {
@@ -8447,7 +8698,7 @@ xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
if (CUR == ':') {
l = xmlParseName(ctxt);
if (l != NULL) {
- xmlNsErr(ctxt, XML_NS_ERR_QNAME,
+ xmlNsErr(ctxt, XML_NS_ERR_QNAME,
"Failed to parse QName '%s'\n", l, NULL, NULL);
*prefix = NULL;
return(l);
@@ -8530,7 +8781,7 @@ xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
cmp = prefix;
while (*in != 0 && *in == *cmp) {
- ++in;
+ ++in;
++cmp;
}
if ((*cmp == 0) && (*in == ':')) {
@@ -8568,20 +8819,20 @@ xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
*
* 3.3.3 Attribute-Value Normalization:
* Before the value of an attribute is passed to the application or
- * checked for validity, the XML processor must normalize it as follows:
+ * checked for validity, the XML processor must normalize it as follows:
* - a character reference is processed by appending the referenced
* character to the attribute value
* - an entity reference is processed by recursively processing the
- * replacement text of the entity
+ * replacement text of the entity
* - a whitespace character (#x20, #xD, #xA, #x9) is processed by
* appending #x20 to the normalized value, except that only a single
* #x20 is appended for a "#xD#xA" sequence that is part of an external
- * parsed entity or the literal entity value of an internal parsed entity
- * - other characters are processed by appending them to the normalized value
+ * parsed entity or the literal entity value of an internal parsed entity
+ * - other characters are processed by appending them to the normalized value
* If the declared value is not CDATA, then the XML processor must further
* process the normalized attribute value by discarding any leading and
* trailing space (#x20) characters, and by replacing sequences of space
- * (#x20) characters by a single space (#x20) character.
+ * (#x20) characters by a single space (#x20) character.
* All attributes for which no declaration has been read should be treated
* by a non-validating parser as if declared CDATA.
*
@@ -8627,7 +8878,7 @@ xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
/*
* Skip any leading spaces
*/
- while ((in < end) && (*in != limit) &&
+ while ((in < end) && (*in != limit) &&
((*in == 0x20) || (*in == 0x9) ||
(*in == 0xA) || (*in == 0xD))) {
in++;
@@ -8635,12 +8886,20 @@ xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
if (in >= end) {
const xmlChar *oldbase = ctxt->input->base;
GROW;
+ if (ctxt->instate == XML_PARSER_EOF)
+ return(NULL);
if (oldbase != ctxt->input->base) {
long delta = ctxt->input->base - oldbase;
start = start + delta;
in = in + delta;
}
end = ctxt->input->end;
+ if (((in - start) > XML_MAX_TEXT_LENGTH) &&
+ ((ctxt->options & XML_PARSE_HUGE) == 0)) {
+ xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
+ "AttValue lenght too long\n");
+ return(NULL);
+ }
}
}
while ((in < end) && (*in != limit) && (*in >= 0x20) &&
@@ -8649,12 +8908,20 @@ xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
if (in >= end) {
const xmlChar *oldbase = ctxt->input->base;
GROW;
+ if (ctxt->instate == XML_PARSER_EOF)
+ return(NULL);
if (oldbase != ctxt->input->base) {
long delta = ctxt->input->base - oldbase;
start = start + delta;
in = in + delta;
}
end = ctxt->input->end;
+ if (((in - start) > XML_MAX_TEXT_LENGTH) &&
+ ((ctxt->options & XML_PARSE_HUGE) == 0)) {
+ xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
+ "AttValue lenght too long\n");
+ return(NULL);
+ }
}
}
last = in;
@@ -8662,13 +8929,15 @@ xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
* skip the trailing blanks
*/
while ((last[-1] == 0x20) && (last > start)) last--;
- while ((in < end) && (*in != limit) &&
+ while ((in < end) && (*in != limit) &&
((*in == 0x20) || (*in == 0x9) ||
(*in == 0xA) || (*in == 0xD))) {
in++;
if (in >= end) {
const xmlChar *oldbase = ctxt->input->base;
GROW;
+ if (ctxt->instate == XML_PARSER_EOF)
+ return(NULL);
if (oldbase != ctxt->input->base) {
long delta = ctxt->input->base - oldbase;
start = start + delta;
@@ -8676,8 +8945,20 @@ xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
last = last + delta;
}
end = ctxt->input->end;
- }
- }
+ if (((in - start) > XML_MAX_TEXT_LENGTH) &&
+ ((ctxt->options & XML_PARSE_HUGE) == 0)) {
+ xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
+ "AttValue lenght too long\n");
+ return(NULL);
+ }
+ }
+ }
+ if (((in - start) > XML_MAX_TEXT_LENGTH) &&
+ ((ctxt->options & XML_PARSE_HUGE) == 0)) {
+ xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
+ "AttValue lenght too long\n");
+ return(NULL);
+ }
if (*in != limit) goto need_complex;
} else {
while ((in < end) && (*in != limit) && (*in >= 0x20) &&
@@ -8686,15 +8967,29 @@ xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
if (in >= end) {
const xmlChar *oldbase = ctxt->input->base;
GROW;
+ if (ctxt->instate == XML_PARSER_EOF)
+ return(NULL);
if (oldbase != ctxt->input->base) {
long delta = ctxt->input->base - oldbase;
start = start + delta;
in = in + delta;
}
end = ctxt->input->end;
+ if (((in - start) > XML_MAX_TEXT_LENGTH) &&
+ ((ctxt->options & XML_PARSE_HUGE) == 0)) {
+ xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
+ "AttValue lenght too long\n");
+ return(NULL);
+ }
}
}
last = in;
+ if (((in - start) > XML_MAX_TEXT_LENGTH) &&
+ ((ctxt->options & XML_PARSE_HUGE) == 0)) {
+ xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
+ "AttValue lenght too long\n");
+ return(NULL);
+ }
if (*in != limit) goto need_complex;
}
in++;
@@ -8833,7 +9128,7 @@ xmlParseAttribute2(xmlParserCtxtPtr ctxt,
/**
* xmlParseStartTag2:
* @ctxt: an XML parser context
- *
+ *
* parse a start of tag either for rule element or
* EmptyElement. In both case we don't parse the tag closing chars.
* This routine is called when running SAX2 parsing
@@ -8842,13 +9137,13 @@ xmlParseAttribute2(xmlParserCtxtPtr ctxt,
*
* [ WFC: Unique Att Spec ]
* No attribute name may appear more than once in the same start-tag or
- * empty-element tag.
+ * empty-element tag.
*
* [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
*
* [ WFC: Unique Att Spec ]
* No attribute name may appear more than once in the same start-tag or
- * empty-element tag.
+ * empty-element tag.
*
* With namespace:
*
@@ -8917,9 +9212,9 @@ reparse:
GROW;
if (ctxt->input->base != base) goto base_changed;
- while ((RAW != '>') &&
+ while (((RAW != '>') &&
((RAW != '/') || (NXT(1) != '>')) &&
- (IS_BYTE_CHAR(RAW))) {
+ (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
const xmlChar *q = CUR_PTR;
unsigned int cons = ctxt->input->consumed;
int len = -1, alloc = 0;
@@ -9090,6 +9385,8 @@ skip_ns:
failed:
GROW
+ if (ctxt->instate == XML_PARSER_EOF)
+ break;
if (ctxt->input->base != base) goto base_changed;
if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
break;
@@ -9181,7 +9478,7 @@ failed:
atts[nbatts++] = defaults->values[5 * i + 3];
if ((ctxt->standalone == 1) &&
(defaults->values[5 * i + 4] != NULL)) {
- xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
+ xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
"standalone: attribute %s on %s defaulted from external subset\n",
attname, localname);
}
@@ -9211,7 +9508,7 @@ failed:
/*
* [ WFC: Unique Att Spec ]
* No attribute name may appear more than once in the same
- * start-tag or empty-element tag.
+ * start-tag or empty-element tag.
* As extended by the Namespace in XML REC.
*/
for (j = 0; j < i;j += 5) {
@@ -9327,6 +9624,8 @@ xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
* We should definitely be at the ending "S? '>'" part
*/
GROW;
+ if (ctxt->instate == XML_PARSER_EOF)
+ return;
SKIP_BLANKS;
if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
@@ -9336,7 +9635,7 @@ xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
/*
* [ WFC: Element Type Match ]
* The Name in an element's end-tag must match the element type in the
- * start-tag.
+ * start-tag.
*
*/
if (name != (xmlChar*)1) {
@@ -9365,7 +9664,7 @@ done:
/**
* xmlParseCDSect:
* @ctxt: an XML parser context
- *
+ *
* Parse escaped pure raw content.
*
* [18] CDSect ::= CDStart CData CDEnd
@@ -9418,14 +9717,21 @@ xmlParseCDSect(xmlParserCtxtPtr ctxt) {
if (len + 5 >= size) {
xmlChar *tmp;
- size *= 2;
- tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
+ if ((size > XML_MAX_TEXT_LENGTH) &&
+ ((ctxt->options & XML_PARSE_HUGE) == 0)) {
+ xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
+ "CData section too big found", NULL);
+ xmlFree (buf);
+ return;
+ }
+ tmp = (xmlChar *) xmlRealloc(buf, size * 2 * sizeof(xmlChar));
if (tmp == NULL) {
xmlFree(buf);
xmlErrMemory(ctxt, NULL);
return;
}
buf = tmp;
+ size *= 2;
}
COPY_BUF(rl,buf,len,r);
r = s;
@@ -9435,6 +9741,10 @@ xmlParseCDSect(xmlParserCtxtPtr ctxt) {
count++;
if (count > 50) {
GROW;
+ if (ctxt->instate == XML_PARSER_EOF) {
+ xmlFree(buf);
+ return;
+ }
count = 0;
}
NEXTL(l);
@@ -9514,7 +9824,7 @@ xmlParseContent(xmlParserCtxtPtr ctxt) {
/*
* Fifth case : a reference. If if has not been resolved,
- * parsing returns it's Name, create the node
+ * parsing returns it's Name, create the node
*/
else if (*cur == '&') {
@@ -9555,7 +9865,7 @@ xmlParseContent(xmlParserCtxtPtr ctxt) {
*
* [ WFC: Element Type Match ]
* The Name in an element's end-tag must match the element type in the
- * start-tag.
+ * start-tag.
*
*/
@@ -9614,7 +9924,7 @@ xmlParseElement(xmlParserCtxtPtr ctxt) {
/*
* [ VC: Root Element Type ]
* The Name in the document type declaration must match the element
- * type of the root element.
+ * type of the root element.
*/
if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
ctxt->node && (ctxt->node == ctxt->myDoc->children))
@@ -9895,7 +10205,7 @@ xmlParseEncName(xmlParserCtxtPtr ctxt) {
/**
* xmlParseEncodingDecl:
* @ctxt: an XML parser context
- *
+ *
* parse the XML encoding declaration
*
* [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
@@ -9952,7 +10262,7 @@ xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
(!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
/*
* If no encoding was passed to the parser, that we are
- * using UTF-16 and no decoder is present i.e. the
+ * using UTF-16 and no decoder is present i.e. the
* document is apparently UTF-8 compatible, then raise an
* encoding mismatch fatal error
*/
@@ -10003,7 +10313,7 @@ xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
* parse the XML standalone declaration
*
* [32] SDDecl ::= S 'standalone' Eq
- * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
+ * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
*
* [ VC: Standalone Document Declaration ]
* TODO The standalone document declaration must have the value "no"
@@ -10083,7 +10393,7 @@ xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
/**
* xmlParseXMLDecl:
* @ctxt: an XML parser context
- *
+ *
* parse an XML declaration header
*
* [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
@@ -10197,7 +10507,7 @@ xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
/**
* xmlParseMisc:
* @ctxt: an XML parser context
- *
+ *
* parse an XML Misc* optional field.
*
* [27] Misc ::= Comment | PI | S
@@ -10205,9 +10515,10 @@ xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
void
xmlParseMisc(xmlParserCtxtPtr ctxt) {
- while (((RAW == '<') && (NXT(1) == '?')) ||
- (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
- IS_BLANK_CH(CUR)) {
+ while ((ctxt->instate != XML_PARSER_EOF) &&
+ (((RAW == '<') && (NXT(1) == '?')) ||
+ (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
+ IS_BLANK_CH(CUR))) {
if ((RAW == '<') && (NXT(1) == '?')) {
xmlParsePI(ctxt);
} else if (IS_BLANK_CH(CUR)) {
@@ -10220,7 +10531,7 @@ xmlParseMisc(xmlParserCtxtPtr ctxt) {
/**
* xmlParseDocument:
* @ctxt: an XML parser context
- *
+ *
* parse an XML document (and build a tree if using the standard SAX
* interface).
*
@@ -10257,7 +10568,7 @@ xmlParseDocument(xmlParserCtxtPtr ctxt) {
if ((ctxt->encoding == NULL) &&
((ctxt->input->end - ctxt->input->cur) >= 4)) {
- /*
+ /*
* Get the 4 first bytes and decode the charset
* if enc != XML_CHAR_ENCODING_NONE
* plug some encoding conversion routines.
@@ -10400,7 +10711,7 @@ xmlParseDocument(xmlParserCtxtPtr ctxt) {
/**
* xmlParseExtParsedEnt:
* @ctxt: an XML parser context
- *
+ *
* parse a general parsed entity
* An external general parsed entity is well-formed if it matches the
* production labeled extParsedEnt.
@@ -10431,7 +10742,7 @@ xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
- /*
+ /*
* Get the 4 first bytes and decode the charset
* if enc != XML_CHAR_ENCODING_NONE
* plug some encoding conversion routines.
@@ -10484,7 +10795,7 @@ xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
ctxt->depth = 0;
xmlParseContent(ctxt);
-
+
if ((RAW == '<') && (NXT(1) == '/')) {
xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
} else if (RAW != 0) {
@@ -10504,7 +10815,7 @@ xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
#ifdef LIBXML_PUSH_ENABLED
/************************************************************************
* *
- * Progressive parsing interfaces *
+ * Progressive parsing interfaces *
* *
************************************************************************/
@@ -10541,8 +10852,8 @@ xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
buf = in->base;
len = in->length;
} else {
- buf = in->buf->buffer->content;
- len = in->buf->buffer->use;
+ buf = xmlBufContent(in->buf->buffer);
+ len = xmlBufUse(in->buf->buffer);
}
/* take into account the sequence length */
if (third) len -= 2;
@@ -10565,7 +10876,7 @@ xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
xmlGenericError(xmlGenericErrorContext,
"PP: lookup '%c%c' found at %d\n",
first, next, base);
- else
+ else
xmlGenericError(xmlGenericErrorContext,
"PP: lookup '%c%c%c' found at %d\n",
first, next, third, base);
@@ -10581,7 +10892,7 @@ xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
else if (third == 0)
xmlGenericError(xmlGenericErrorContext,
"PP: lookup '%c%c' failed\n", first, next);
- else
+ else
xmlGenericError(xmlGenericErrorContext,
"PP: lookup '%c%c%c' failed\n", first, next, third);
#endif
@@ -10663,7 +10974,7 @@ xmlCheckCdataPush(const xmlChar *utf, int len) {
if ((utf == NULL) || (len <= 0))
return(0);
-
+
for (ix = 0; ix < len;) { /* string is 0-terminated */
c = utf[ix];
if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
@@ -10795,7 +11106,7 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
return(0);
-
+
/*
* Pop-up of finished entities.
*/
@@ -10812,20 +11123,16 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
* remainng chars to avoid them stalling in the non-converted
* buffer.
*/
- if ((ctxt->input->buf->raw != NULL) &&
- (ctxt->input->buf->raw->use > 0)) {
- int base = ctxt->input->base -
- ctxt->input->buf->buffer->content;
- int current = ctxt->input->cur - ctxt->input->base;
+ if (xmlBufIsEmpty(ctxt->input->buf->buffer) == 0) {
+ size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
+ ctxt->input);
+ size_t current = ctxt->input->cur - ctxt->input->base;
xmlParserInputBufferPush(ctxt->input->buf, 0, "");
- ctxt->input->base = ctxt->input->buf->buffer->content + base;
- ctxt->input->cur = ctxt->input->base + current;
- ctxt->input->end =
- &ctxt->input->buf->buffer->content[
- ctxt->input->buf->buffer->use];
+ xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
+ base, current);
}
- avail = ctxt->input->buf->buffer->use -
+ avail = xmlBufUse(ctxt->input->buf->buffer) -
(ctxt->input->cur - ctxt->input->base);
}
if (avail < 1)
@@ -10847,7 +11154,7 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
if (avail < 4)
goto done;
- /*
+ /*
* Get the 4 first bytes and decode the charset
* if enc != XML_CHAR_ENCODING_NONE
* plug some encoding conversion routines,
@@ -11002,7 +11309,7 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
/*
* [ VC: Root Element Type ]
* The Name in the document type declaration must match
- * the element type of the root element.
+ * the element type of the root element.
*/
if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
ctxt->node && (ctxt->node == ctxt->myDoc->children))
@@ -11037,6 +11344,7 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
} else {
ctxt->instate = XML_PARSER_CONTENT;
}
+ ctxt->progressive = 1;
break;
}
if (RAW == '>') {
@@ -11056,6 +11364,7 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
#endif /* LIBXML_SAX1_ENABLED */
ctxt->instate = XML_PARSER_CONTENT;
+ ctxt->progressive = 1;
break;
}
case XML_PARSER_CONTENT: {
@@ -11073,9 +11382,13 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
break;
} else if ((cur == '<') && (next == '?')) {
if ((!terminate) &&
- (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
+ (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
+ ctxt->progressive = XML_PARSER_PI;
goto done;
+ }
xmlParsePI(ctxt);
+ ctxt->instate = XML_PARSER_CONTENT;
+ ctxt->progressive = 1;
} else if ((cur == '<') && (next != '!')) {
ctxt->instate = XML_PARSER_START_TAG;
break;
@@ -11089,10 +11402,13 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
ctxt->input->cur += 4;
term = xmlParseLookupSequence(ctxt, '-', '-', '>');
ctxt->input->cur -= 4;
- if ((!terminate) && (term < 0))
+ if ((!terminate) && (term < 0)) {
+ ctxt->progressive = XML_PARSER_COMMENT;
goto done;
+ }
xmlParseComment(ctxt);
ctxt->instate = XML_PARSER_CONTENT;
+ ctxt->progressive = 1;
} else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
(ctxt->input->cur[2] == '[') &&
(ctxt->input->cur[3] == 'C') &&
@@ -11187,7 +11503,7 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
break;
case XML_PARSER_CDATA_SECTION: {
/*
- * The Push mode need to have the SAX callback for
+ * The Push mode need to have the SAX callback for
* cdataBlock merge back contiguous callbacks.
*/
int base;
@@ -11197,7 +11513,7 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
int tmp;
- tmp = xmlCheckCdataPush(ctxt->input->cur,
+ tmp = xmlCheckCdataPush(ctxt->input->cur,
XML_PARSER_BIG_BUFFER_SIZE);
if (tmp < 0) {
tmp = -tmp;
@@ -11263,7 +11579,7 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
avail = ctxt->input->length -
(ctxt->input->cur - ctxt->input->base);
else
- avail = ctxt->input->buf->buffer->use -
+ avail = xmlBufUse(ctxt->input->buf->buffer) -
(ctxt->input->cur - ctxt->input->base);
if (avail < 2)
goto done;
@@ -11271,26 +11587,33 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
next = ctxt->input->cur[1];
if ((cur == '<') && (next == '?')) {
if ((!terminate) &&
- (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
+ (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
+ ctxt->progressive = XML_PARSER_PI;
goto done;
+ }
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext,
"PP: Parsing PI\n");
#endif
xmlParsePI(ctxt);
+ ctxt->instate = XML_PARSER_MISC;
+ ctxt->progressive = 1;
ctxt->checkIndex = 0;
} else if ((cur == '<') && (next == '!') &&
(ctxt->input->cur[2] == '-') &&
(ctxt->input->cur[3] == '-')) {
if ((!terminate) &&
- (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
+ (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
+ ctxt->progressive = XML_PARSER_COMMENT;
goto done;
+ }
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext,
"PP: Parsing Comment\n");
#endif
xmlParseComment(ctxt);
ctxt->instate = XML_PARSER_MISC;
+ ctxt->progressive = 1;
ctxt->checkIndex = 0;
} else if ((cur == '<') && (next == '!') &&
(ctxt->input->cur[2] == 'D') &&
@@ -11301,13 +11624,17 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
(ctxt->input->cur[7] == 'P') &&
(ctxt->input->cur[8] == 'E')) {
if ((!terminate) &&
- (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
+ (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) {
+ ctxt->progressive = XML_PARSER_DTD;
goto done;
+ }
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext,
"PP: Parsing internal subset\n");
#endif
ctxt->inSubset = 1;
+ ctxt->progressive = 1;
+ ctxt->checkIndex = 0;
xmlParseDocTypeDecl(ctxt);
if (RAW == '[') {
ctxt->instate = XML_PARSER_DTD;
@@ -11338,7 +11665,7 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
goto done;
} else {
ctxt->instate = XML_PARSER_START_TAG;
- ctxt->progressive = 1;
+ ctxt->progressive = XML_PARSER_START_TAG;
xmlParseGetLasts(ctxt, &lastlt, &lastgt);
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext,
@@ -11351,38 +11678,46 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
if (ctxt->input->buf == NULL)
avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
else
- avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
- if (avail < 2)
+ avail = xmlBufUse(ctxt->input->buf->buffer) -
+ (ctxt->input->cur - ctxt->input->base);
+ if (avail < 2)
goto done;
cur = ctxt->input->cur[0];
next = ctxt->input->cur[1];
if ((cur == '<') && (next == '?')) {
if ((!terminate) &&
- (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
+ (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
+ ctxt->progressive = XML_PARSER_PI;
goto done;
+ }
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext,
"PP: Parsing PI\n");
#endif
xmlParsePI(ctxt);
+ ctxt->instate = XML_PARSER_PROLOG;
+ ctxt->progressive = 1;
} else if ((cur == '<') && (next == '!') &&
(ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
if ((!terminate) &&
- (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
+ (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
+ ctxt->progressive = XML_PARSER_COMMENT;
goto done;
+ }
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext,
"PP: Parsing Comment\n");
#endif
xmlParseComment(ctxt);
ctxt->instate = XML_PARSER_PROLOG;
+ ctxt->progressive = 1;
} else if ((cur == '<') && (next == '!') &&
(avail < 4)) {
goto done;
} else {
ctxt->instate = XML_PARSER_START_TAG;
if (ctxt->progressive == 0)
- ctxt->progressive = 1;
+ ctxt->progressive = XML_PARSER_START_TAG;
xmlParseGetLasts(ctxt, &lastlt, &lastgt);
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext,
@@ -11395,32 +11730,39 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
if (ctxt->input->buf == NULL)
avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
else
- avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
+ avail = xmlBufUse(ctxt->input->buf->buffer) -
+ (ctxt->input->cur - ctxt->input->base);
if (avail < 2)
goto done;
cur = ctxt->input->cur[0];
next = ctxt->input->cur[1];
if ((cur == '<') && (next == '?')) {
if ((!terminate) &&
- (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
+ (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
+ ctxt->progressive = XML_PARSER_PI;
goto done;
+ }
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext,
"PP: Parsing PI\n");
#endif
xmlParsePI(ctxt);
ctxt->instate = XML_PARSER_EPILOG;
+ ctxt->progressive = 1;
} else if ((cur == '<') && (next == '!') &&
(ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
if ((!terminate) &&
- (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
+ (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
+ ctxt->progressive = XML_PARSER_COMMENT;
goto done;
+ }
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext,
"PP: Parsing Comment\n");
#endif
xmlParseComment(ctxt);
ctxt->instate = XML_PARSER_EPILOG;
+ ctxt->progressive = 1;
} else if ((cur == '<') && (next == '!') &&
(avail < 4)) {
goto done;
@@ -11450,29 +11792,28 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
int base, i;
xmlChar *buf;
xmlChar quote = 0;
+ size_t use;
base = ctxt->input->cur - ctxt->input->base;
if (base < 0) return(0);
if (ctxt->checkIndex > base)
base = ctxt->checkIndex;
- buf = ctxt->input->buf->buffer->content;
- for (;(unsigned int) base < ctxt->input->buf->buffer->use;
- base++) {
+ buf = xmlBufContent(ctxt->input->buf->buffer);
+ use = xmlBufUse(ctxt->input->buf->buffer);
+ for (;(unsigned int) base < use; base++) {
if (quote != 0) {
if (buf[base] == quote)
quote = 0;
- continue;
+ continue;
}
if ((quote == 0) && (buf[base] == '<')) {
int found = 0;
/* special handling of comments */
- if (((unsigned int) base + 4 <
- ctxt->input->buf->buffer->use) &&
+ if (((unsigned int) base + 4 < use) &&
(buf[base + 1] == '!') &&
(buf[base + 2] == '-') &&
(buf[base + 3] == '-')) {
- for (;(unsigned int) base + 3 <
- ctxt->input->buf->buffer->use; base++) {
+ for (;(unsigned int) base + 3 < use; base++) {
if ((buf[base] == '-') &&
(buf[base + 1] == '-') &&
(buf[base + 2] == '>')) {
@@ -11503,17 +11844,14 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
fprintf(stderr, "%c%c%c%c: ", buf[base],
buf[base + 1], buf[base + 2], buf[base + 3]);
#endif
- if ((unsigned int) base +1 >=
- ctxt->input->buf->buffer->use)
+ if ((unsigned int) base +1 >= use)
break;
if (buf[base + 1] == ']') {
/* conditional crap, skip both ']' ! */
base++;
continue;
}
- for (i = 1;
- (unsigned int) base + i < ctxt->input->buf->buffer->use;
- i++) {
+ for (i = 1; (unsigned int) base + i < use; i++) {
if (buf[base + i] == '>') {
#if 0
fprintf(stderr, "found\n");
@@ -11531,7 +11869,7 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
fprintf(stderr, "end of stream\n");
#endif
break;
-
+
}
not_end_of_int_subset:
continue; /* for */
@@ -11539,6 +11877,10 @@ not_end_of_int_subset:
/*
* We didn't found the end of the Internal subset
*/
+ if (quote == 0)
+ ctxt->checkIndex = base;
+ else
+ ctxt->checkIndex = 0;
#ifdef DEBUG_PUSH
if (next == 0)
xmlGenericError(xmlGenericErrorContext,
@@ -11547,6 +11889,7 @@ not_end_of_int_subset:
goto done;
found_end_int_subset:
+ ctxt->checkIndex = 0;
xmlParseInternalSubset(ctxt);
ctxt->inSubset = 2;
if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
@@ -11637,7 +11980,7 @@ found_end_int_subset:
break;
}
}
-done:
+done:
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
#endif
@@ -11657,6 +12000,55 @@ encoding_error:
}
/**
+ * xmlParseCheckTransition:
+ * @ctxt: an XML parser context
+ * @chunk: a char array
+ * @size: the size in byte of the chunk
+ *
+ * Check depending on the current parser state if the chunk given must be
+ * processed immediately or one need more data to advance on parsing.
+ *
+ * Returns -1 in case of error, 0 if the push is not needed and 1 if needed
+ */
+static int
+xmlParseCheckTransition(xmlParserCtxtPtr ctxt, const char *chunk, int size) {
+ if ((ctxt == NULL) || (chunk == NULL) || (size < 0))
+ return(-1);
+ if (ctxt->instate == XML_PARSER_START_TAG) {
+ if (memchr(chunk, '>', size) != NULL)
+ return(1);
+ return(0);
+ }
+ if (ctxt->progressive == XML_PARSER_COMMENT) {
+ if (memchr(chunk, '>', size) != NULL)
+ return(1);
+ return(0);
+ }
+ if (ctxt->instate == XML_PARSER_CDATA_SECTION) {
+ if (memchr(chunk, '>', size) != NULL)
+ return(1);
+ return(0);
+ }
+ if (ctxt->progressive == XML_PARSER_PI) {
+ if (memchr(chunk, '>', size) != NULL)
+ return(1);
+ return(0);
+ }
+ if (ctxt->instate == XML_PARSER_END_TAG) {
+ if (memchr(chunk, '>', size) != NULL)
+ return(1);
+ return(0);
+ }
+ if ((ctxt->progressive == XML_PARSER_DTD) ||
+ (ctxt->instate == XML_PARSER_DTD)) {
+ if (memchr(chunk, ']', size) != NULL)
+ return(1);
+ return(0);
+ }
+ return(1);
+}
+
+/**
* xmlParseChunk:
* @ctxt: an XML parser context
* @chunk: an char array
@@ -11672,11 +12064,15 @@ xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
int terminate) {
int end_in_lf = 0;
int remain = 0;
+ size_t old_avail = 0;
+ size_t avail = 0;
if (ctxt == NULL)
return(XML_ERR_INTERNAL_ERROR);
if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
return(ctxt->errNo);
+ if (ctxt->instate == XML_PARSER_EOF)
+ return(-1);
if (ctxt->instate == XML_PARSER_START)
xmlDetectSAX2(ctxt);
if ((size > 0) && (chunk != NULL) && (!terminate) &&
@@ -11689,10 +12085,11 @@ xmldecl_done:
if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
(ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
- int base = ctxt->input->base - ctxt->input->buf->buffer->content;
- int cur = ctxt->input->cur - ctxt->input->base;
+ size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
+ size_t cur = ctxt->input->cur - ctxt->input->base;
int res;
+ old_avail = xmlBufUse(ctxt->input->buf->buffer);
/*
* Specific handling if we autodetected an encoding, we should not
* push more than the first line ... which depend on the encoding
@@ -11734,10 +12131,7 @@ xmldecl_done:
ctxt->disableSAX = 1;
return (XML_PARSER_EOF);
}
- ctxt->input->base = ctxt->input->buf->buffer->content + base;
- ctxt->input->cur = ctxt->input->base + cur;
- ctxt->input->end =
- &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
+ xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
#endif
@@ -11749,7 +12143,7 @@ xmldecl_done:
(in->raw != NULL)) {
int nbchars;
- nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
+ nbchars = xmlCharEncInput(in);
if (nbchars < 0) {
/* TODO 2.6.0 */
xmlGenericError(xmlGenericErrorContext,
@@ -11759,10 +12153,31 @@ xmldecl_done:
}
}
}
- if (remain != 0)
+ if (remain != 0) {
xmlParseTryOrFinish(ctxt, 0);
- else
- xmlParseTryOrFinish(ctxt, terminate);
+ } else {
+ if ((ctxt->input != NULL) && (ctxt->input->buf != NULL))
+ avail = xmlBufUse(ctxt->input->buf->buffer);
+ /*
+ * Depending on the current state it may not be such
+ * a good idea to try parsing if there is nothing in the chunk
+ * which would be worth doing a parser state transition and we
+ * need to wait for more data
+ */
+ if ((terminate) || (avail > XML_MAX_TEXT_LENGTH) ||
+ (old_avail == 0) || (avail == 0) ||
+ (xmlParseCheckTransition(ctxt,
+ (const char *)&ctxt->input->base[old_avail],
+ avail - old_avail)))
+ xmlParseTryOrFinish(ctxt, terminate);
+ }
+ if ((ctxt->input != NULL) &&
+ (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
+ ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
+ ((ctxt->options & XML_PARSE_HUGE) == 0)) {
+ xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
+ ctxt->instate = XML_PARSER_EOF;
+ }
if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
return(ctxt->errNo);
@@ -11780,22 +12195,22 @@ xmldecl_done:
/*
* Check for termination
*/
- int avail = 0;
+ int cur_avail = 0;
if (ctxt->input != NULL) {
if (ctxt->input->buf == NULL)
- avail = ctxt->input->length -
- (ctxt->input->cur - ctxt->input->base);
+ cur_avail = ctxt->input->length -
+ (ctxt->input->cur - ctxt->input->base);
else
- avail = ctxt->input->buf->buffer->use -
- (ctxt->input->cur - ctxt->input->base);
+ cur_avail = xmlBufUse(ctxt->input->buf->buffer) -
+ (ctxt->input->cur - ctxt->input->base);
}
-
+
if ((ctxt->instate != XML_PARSER_EOF) &&
(ctxt->instate != XML_PARSER_EPILOG)) {
xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
- }
- if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
+ }
+ if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) {
xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
}
if (ctxt->instate != XML_PARSER_EOF) {
@@ -11804,12 +12219,12 @@ xmldecl_done:
}
ctxt->instate = XML_PARSER_EOF;
}
- return((xmlParserErrors) ctxt->errNo);
+ return((xmlParserErrors) ctxt->errNo);
}
/************************************************************************
* *
- * I/O front end functions to the parser *
+ * I/O front end functions to the parser *
* *
************************************************************************/
@@ -11833,7 +12248,7 @@ xmldecl_done:
*/
xmlParserCtxtPtr
-xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
+xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
const char *chunk, int size, const char *filename) {
xmlParserCtxtPtr ctxt;
xmlParserInputPtr inputStream;
@@ -11882,7 +12297,7 @@ xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
if (user_data != NULL)
ctxt->userData = user_data;
- }
+ }
if (filename == NULL) {
ctxt->directory = NULL;
} else {
@@ -11908,11 +12323,7 @@ xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
}
}
inputStream->buf = buf;
- inputStream->base = inputStream->buf->buffer->content;
- inputStream->cur = inputStream->buf->buffer->content;
- inputStream->end =
- &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
-
+ xmlBufResetInput(inputStream->buf->buffer, inputStream);
inputPush(ctxt, inputStream);
/*
@@ -11923,15 +12334,12 @@ xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
if ((size == 0) || (chunk == NULL)) {
ctxt->charset = XML_CHAR_ENCODING_NONE;
} else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
- int base = ctxt->input->base - ctxt->input->buf->buffer->content;
- int cur = ctxt->input->cur - ctxt->input->base;
+ size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
+ size_t cur = ctxt->input->cur - ctxt->input->base;
- xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
+ xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
- ctxt->input->base = ctxt->input->buf->buffer->content + base;
- ctxt->input->cur = ctxt->input->base + cur;
- ctxt->input->end =
- &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
+ xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
#endif
@@ -11951,7 +12359,7 @@ xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
*
* Blocks further parser processing
*/
-void
+void
xmlStopParser(xmlParserCtxtPtr ctxt) {
if (ctxt == NULL)
return;
@@ -12032,7 +12440,7 @@ xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
#ifdef LIBXML_VALID_ENABLED
/************************************************************************
* *
- * Front ends when parsing a DTD *
+ * Front ends when parsing a DTD *
* *
************************************************************************/
@@ -12043,7 +12451,7 @@ xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
* @enc: the charset encoding if known
*
* Load and parse a DTD
- *
+ *
* Returns the resulting xmlDtdPtr or NULL in case of error.
* @input will be freed by the function in any case.
*/
@@ -12068,7 +12476,7 @@ xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
/*
* Set-up the SAX context
*/
- if (sax != NULL) {
+ if (sax != NULL) {
if (ctxt->sax != NULL)
xmlFree(ctxt->sax);
ctxt->sax = sax;
@@ -12122,7 +12530,7 @@ xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
if ((enc == XML_CHAR_ENCODING_NONE) &&
((ctxt->input->end - ctxt->input->cur) >= 4)) {
- /*
+ /*
* Get the 4 first bytes and decode the charset
* if enc != XML_CHAR_ENCODING_NONE
* plug some encoding conversion routines.
@@ -12161,7 +12569,7 @@ xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
}
if (sax != NULL) ctxt->sax = NULL;
xmlFreeParserCtxt(ctxt);
-
+
return(ret);
}
@@ -12172,7 +12580,7 @@ xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
* @SystemID: a NAME* containing the URL to the DTD
*
* Load and parse an external subset.
- *
+ *
* Returns the resulting xmlDtdPtr or NULL in case of error.
*/
@@ -12195,13 +12603,13 @@ xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
/*
* Set-up the SAX context
*/
- if (sax != NULL) {
+ if (sax != NULL) {
if (ctxt->sax != NULL)
xmlFree(ctxt->sax);
ctxt->sax = sax;
ctxt->userData = ctxt;
}
-
+
/*
* Canonicalise the system ID
*/
@@ -12312,7 +12720,7 @@ xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
/************************************************************************
* *
- * Front ends when parsing an Entity *
+ * Front ends when parsing an Entity *
* *
************************************************************************/
@@ -12428,7 +12836,7 @@ xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
*/
if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) &&
(!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
- xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
+ xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
"Version mismatch between document and entity\n");
}
}
@@ -12711,7 +13119,7 @@ xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
if (ctxt->lastError.code != XML_ERR_OK)
xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
- if (sax != NULL)
+ if (sax != NULL)
ctxt->sax = oldsax;
oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
oldctxt->node_seq.length = ctxt->node_seq.length;
@@ -13530,7 +13938,7 @@ xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
* @filename: the filename or URL
* @options: a combination of xmlParserOption
*
- * Create a parser context for a file or URL content.
+ * Create a parser context for a file or URL content.
* Automatic support for ZLIB/Compress compressed document is provided
* by default if found at compile-time and for file accesses
*
@@ -13572,7 +13980,7 @@ xmlCreateURLParserCtxt(const char *filename, int options)
* xmlCreateFileParserCtxt:
* @filename: the filename
*
- * Create a parser context for a file content.
+ * Create a parser context for a file content.
* Automatic support for ZLIB/Compress compressed document is provided
* by default if found at compile-time.
*
@@ -13650,7 +14058,7 @@ xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
if (sax != NULL)
ctxt->sax = NULL;
xmlFreeParserCtxt(ctxt);
-
+
return(ret);
}
@@ -13750,7 +14158,7 @@ xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
xmlClearParserCtxt(ctxt);
return;
}
-
+
xmlClearParserCtxt(ctxt);
if (filename != NULL)
input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
@@ -13768,7 +14176,7 @@ xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
*
* parse an XML file and call the given SAX handler routines.
* Automatic support for ZLIB/Compress compressed document is provided
- *
+ *
* Returns 0 in case of success or a error number otherwise
*/
int
@@ -13776,7 +14184,7 @@ xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
const char *filename) {
int ret = 0;
xmlParserCtxtPtr ctxt;
-
+
ctxt = xmlCreateFileParserCtxt(filename);
if (ctxt == NULL) return -1;
if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
@@ -13786,9 +14194,9 @@ xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
if (user_data != NULL)
ctxt->userData = user_data;
-
+
xmlParseDocument(ctxt);
-
+
if (ctxt->wellFormed)
ret = 0;
else {
@@ -13804,14 +14212,14 @@ xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
ctxt->myDoc = NULL;
}
xmlFreeParserCtxt(ctxt);
-
+
return ret;
}
#endif /* LIBXML_SAX1_ENABLED */
/************************************************************************
* *
- * Front ends when parsing from memory *
+ * Front ends when parsing from memory *
* *
************************************************************************/
@@ -13855,9 +14263,7 @@ xmlCreateMemoryParserCtxt(const char *buffer, int size) {
input->filename = NULL;
input->buf = buf;
- input->base = input->buf->buffer->content;
- input->cur = input->buf->buffer->content;
- input->end = &input->buf->buffer->content[input->buf->buffer->use];
+ xmlBufResetInput(input->buf->buffer, input);
inputPush(ctxt, input);
return(ctxt);
@@ -13913,7 +14319,7 @@ xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
xmlFreeDoc(ctxt->myDoc);
ctxt->myDoc = NULL;
}
- if (sax != NULL)
+ if (sax != NULL)
ctxt->sax = NULL;
xmlFreeParserCtxt(ctxt);
@@ -13931,7 +14337,7 @@ xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
* parse an XML in-memory block and use the given SAX function block
* to handle the parsing callback. If sax is NULL, fallback to the default
* DOM tree building routines.
- *
+ *
* Returns the resulting document tree
*/
xmlDocPtr
@@ -13946,7 +14352,7 @@ xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
* @size: the size of the array
*
* parse an XML in-memory block and build a tree.
- *
+ *
* Returns the resulting document tree
*/
@@ -14000,7 +14406,7 @@ int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
ctxt->userData = user_data;
xmlParseDocument(ctxt);
-
+
if (ctxt->wellFormed)
ret = 0;
else {
@@ -14016,7 +14422,7 @@ int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
ctxt->myDoc = NULL;
}
xmlFreeParserCtxt(ctxt);
-
+
return ret;
}
#endif /* LIBXML_SAX1_ENABLED */
@@ -14050,7 +14456,7 @@ xmlCreateDocParserCtxt(const xmlChar *cur) {
* parse an XML in-memory document and build a tree.
* It use the given SAX function block to handle the parsing callback.
* If sax is NULL, fallback to the default DOM tree building routines.
- *
+ *
* Returns the resulting document tree
*/
@@ -14065,7 +14471,7 @@ xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
ctxt = xmlCreateDocParserCtxt(cur);
if (ctxt == NULL) return(NULL);
- if (sax != NULL) {
+ if (sax != NULL) {
oldsax = ctxt->sax;
ctxt->sax = sax;
ctxt->userData = NULL;
@@ -14082,7 +14488,7 @@ xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
if (sax != NULL)
ctxt->sax = oldsax;
xmlFreeParserCtxt(ctxt);
-
+
return(ret);
}
@@ -14091,7 +14497,7 @@ xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
* @cur: a pointer to an array of xmlChar
*
* parse an XML in-memory document and build a tree.
- *
+ *
* Returns the resulting document tree
*/
@@ -14104,8 +14510,8 @@ xmlParseDoc(const xmlChar *cur) {
#ifdef LIBXML_LEGACY_ENABLED
/************************************************************************
* *
- * Specific function to keep track of entities references *
- * and used by the XSLT debugger *
+ * Specific function to keep track of entities references *
+ * and used by the XSLT debugger *
* *
************************************************************************/
@@ -14115,7 +14521,7 @@ static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
* xmlAddEntityReference:
* @ent : A valid entity
* @firstNode : A valid first node for children of entity
- * @lastNode : A valid last node of children entity
+ * @lastNode : A valid last node of children entity
*
* Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
*/
@@ -14144,7 +14550,7 @@ xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
/************************************************************************
* *
- * Miscellaneous *
+ * Miscellaneous *
* *
************************************************************************/
@@ -14260,7 +14666,7 @@ xmlCleanupParser(void) {
* current scope
*/
#define DICT_FREE(str) \
- if ((str) && ((!dict) || \
+ if ((str) && ((!dict) || \
(xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
xmlFree((char *)(str));
@@ -14275,7 +14681,7 @@ xmlCtxtReset(xmlParserCtxtPtr ctxt)
{
xmlParserInputPtr input;
xmlDictPtr dict;
-
+
if (ctxt == NULL)
return;
@@ -14427,25 +14833,18 @@ xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
inputStream->filename = (char *)
xmlCanonicPath((const xmlChar *) filename);
inputStream->buf = buf;
- inputStream->base = inputStream->buf->buffer->content;
- inputStream->cur = inputStream->buf->buffer->content;
- inputStream->end =
- &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
+ xmlBufResetInput(buf->buffer, inputStream);
inputPush(ctxt, inputStream);
if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
(ctxt->input->buf != NULL)) {
- int base = ctxt->input->base - ctxt->input->buf->buffer->content;
- int cur = ctxt->input->cur - ctxt->input->base;
+ size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
+ size_t cur = ctxt->input->cur - ctxt->input->base;
xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
- ctxt->input->base = ctxt->input->buf->buffer->content + base;
- ctxt->input->cur = ctxt->input->base + cur;
- ctxt->input->end =
- &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->
- use];
+ xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
#endif
@@ -14596,6 +14995,8 @@ xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encodi
if (options & XML_PARSE_HUGE) {
ctxt->options |= XML_PARSE_HUGE;
options -= XML_PARSE_HUGE;
+ if (ctxt->dict != NULL)
+ xmlDictSetLimit(ctxt->dict, 0);
}
if (options & XML_PARSE_OLDSAX) {
ctxt->options |= XML_PARSE_OLDSAX;
@@ -14605,6 +15006,10 @@ xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encodi
ctxt->options |= XML_PARSE_IGNORE_ENC;
options -= XML_PARSE_IGNORE_ENC;
}
+ if (options & XML_PARSE_BIG_LINES) {
+ ctxt->options |= XML_PARSE_BIG_LINES;
+ options -= XML_PARSE_BIG_LINES;
+ }
ctxt->linenumbers = 1;
return (options);
}
@@ -14679,7 +15084,7 @@ xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
* @options: a combination of xmlParserOption
*
* parse an XML in-memory document and build a tree.
- *
+ *
* Returns the resulting document tree
*/
xmlDocPtr
@@ -14703,7 +15108,7 @@ xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int optio
* @options: a combination of xmlParserOption
*
* parse an XML file from the filesystem or the network.
- *
+ *
* Returns the resulting document tree
*/
xmlDocPtr
@@ -14726,7 +15131,7 @@ xmlReadFile(const char *filename, const char *encoding, int options)
* @options: a combination of xmlParserOption
*
* parse an XML in-memory document and build a tree.
- *
+ *
* Returns the resulting document tree
*/
xmlDocPtr
@@ -14750,7 +15155,7 @@ xmlReadMemory(const char *buffer, int size, const char *URL, const char *encodin
* parse an XML from a file descriptor and build a tree.
* NOTE that the file descriptor will not be closed when the
* reader is closed or reset.
- *
+ *
* Returns the resulting document tree
*/
xmlDocPtr
@@ -14871,7 +15276,7 @@ xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
*
* parse an XML file from the filesystem or the network.
* This reuses the existing @ctxt parser context
- *
+ *
* Returns the resulting document tree
*/
xmlDocPtr
@@ -14906,7 +15311,7 @@ xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
*
* parse an XML in-memory document and build a tree.
* This reuses the existing @ctxt parser context
- *
+ *
* Returns the resulting document tree
*/
xmlDocPtr
@@ -14950,7 +15355,7 @@ xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
* This reuses the existing @ctxt parser context
* NOTE that the file descriptor will not be closed when the
* reader is closed or reset.
- *
+ *
* Returns the resulting document tree
*/
xmlDocPtr