summaryrefslogtreecommitdiff
path: root/parser.c
diff options
context:
space:
mode:
Diffstat (limited to 'parser.c')
-rw-r--r--parser.c163
1 files changed, 145 insertions, 18 deletions
diff --git a/parser.c b/parser.c
index ee429f3..1d93967 100644
--- a/parser.c
+++ b/parser.c
@@ -130,6 +130,29 @@ xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size,
return (0);
if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
return (1);
+
+ /*
+ * This may look absurd but is needed to detect
+ * entities problems
+ */
+ if ((ent != NULL) && (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
+ (ent->content != NULL) && (ent->checked == 0)) {
+ unsigned long oldnbent = ctxt->nbentities;
+ xmlChar *rep;
+
+ ent->checked = 1;
+
+ rep = xmlStringDecodeEntities(ctxt, ent->content,
+ XML_SUBSTITUTE_REF, 0, 0, 0);
+
+ ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
+ if (rep != NULL) {
+ if (xmlStrchr(rep, '<'))
+ ent->checked |= 1;
+ xmlFree(rep);
+ rep = NULL;
+ }
+ }
if (replacement != 0) {
if (replacement < XML_MAX_TEXT_LENGTH)
return(0);
@@ -189,9 +212,12 @@ xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size,
return (0);
} else {
/*
- * strange we got no data for checking just return
+ * strange we got no data for checking
*/
- return (0);
+ if (((ctxt->lastError.code != XML_ERR_UNDECLARED_ENTITY) &&
+ (ctxt->lastError.code != XML_WAR_UNDECLARED_ENTITY)) ||
+ (ctxt->nbentities <= 10000))
+ return (0);
}
xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
return (1);
@@ -2039,8 +2065,11 @@ static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
xmlGROW (ctxt);
static void xmlGROW (xmlParserCtxtPtr ctxt) {
- if ((((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
- ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
+ unsigned long curEnd = ctxt->input->end - ctxt->input->cur;
+ unsigned long curBase = ctxt->input->cur - ctxt->input->base;
+
+ if (((curEnd > (unsigned long) XML_MAX_LOOKUP_LIMIT) ||
+ (curBase > (unsigned long) XML_MAX_LOOKUP_LIMIT)) &&
((ctxt->input->buf) && (ctxt->input->buf->readcallback != (xmlInputReadCallback) xmlNop)) &&
((ctxt->options & XML_PARSE_HUGE) == 0)) {
xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
@@ -2106,6 +2135,8 @@ xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
while (IS_BLANK_CH(*cur)) {
if (*cur == '\n') {
ctxt->input->line++; ctxt->input->col = 1;
+ } else {
+ ctxt->input->col++;
}
cur++;
res++;
@@ -2584,6 +2615,7 @@ xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
name, NULL);
ctxt->valid = 0;
}
+ xmlParserEntityCheck(ctxt, 0, NULL, 0);
} else if (ctxt->input->free != deallocblankswrapper) {
input = xmlNewBlanksWrapperInputStream(ctxt, entity);
if (xmlPushInput(ctxt, input) < 0)
@@ -2595,6 +2627,23 @@ xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
xmlCharEncoding enc;
/*
+ * Note: external parameter entities will not be loaded, it
+ * is not required for a non-validating parser, unless the
+ * option of validating, or substituting entities were
+ * given. Doing so is far more secure as the parser will
+ * only process data coming from the document entity by
+ * default.
+ */
+ if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
+ ((ctxt->options & XML_PARSE_NOENT) == 0) &&
+ ((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
+ ((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
+ ((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
+ (ctxt->replaceEntities == 0) &&
+ (ctxt->validate == 0))
+ return;
+
+ /*
* handle the extra spaces added before and after
* c.f. http://www.w3.org/TR/REC-xml#as-PE
* this is done independently.
@@ -2737,6 +2786,7 @@ xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) ||
(ctxt->lastError.code == XML_ERR_INTERNAL_ERROR))
goto int_error;
+ xmlParserEntityCheck(ctxt, 0, ent, 0);
if (ent != NULL)
ctxt->nbentities += ent->checked / 2;
if ((ent != NULL) &&
@@ -2788,6 +2838,7 @@ xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
ent = xmlParseStringPEReference(ctxt, &str);
if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
goto int_error;
+ xmlParserEntityCheck(ctxt, 0, ent, 0);
if (ent != NULL)
ctxt->nbentities += ent->checked / 2;
if (ent != NULL) {
@@ -3404,6 +3455,7 @@ xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
int len = 0, l;
int c;
int count = 0;
+ const xmlChar *end; /* needed because CUR_CHAR() can move cur on \r\n */
#ifdef DEBUG
nbParseNCNameComplex++;
@@ -3413,6 +3465,7 @@ xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
* Handler for more complex cases
*/
GROW;
+ end = ctxt->input->cur;
c = CUR_CHAR(l);
if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
(!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
@@ -3434,12 +3487,14 @@ xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
}
len += l;
NEXTL(l);
+ end = ctxt->input->cur;
c = CUR_CHAR(l);
if (c == 0) {
count = 0;
GROW;
if (ctxt->instate == XML_PARSER_EOF)
return(NULL);
+ end = ctxt->input->cur;
c = CUR_CHAR(l);
}
}
@@ -3448,7 +3503,7 @@ xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
return(NULL);
}
- return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
+ return(xmlDictLookup(ctxt->dict, end - len, len));
}
/**
@@ -5075,7 +5130,7 @@ xmlParsePITarget(xmlParserCtxtPtr ctxt) {
}
if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
xmlNsErr(ctxt, XML_NS_ERR_COLON,
- "colon are forbidden from PI names '%s'\n", name, NULL, NULL);
+ "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
}
return(name);
}
@@ -5344,7 +5399,7 @@ xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
}
if (xmlStrchr(name, ':') != NULL) {
xmlNsErr(ctxt, XML_NS_ERR_COLON,
- "colon are forbidden from notation names '%s'\n",
+ "colons are forbidden from notation names '%s'\n",
name, NULL, NULL);
}
SKIP_BLANKS;
@@ -5433,7 +5488,7 @@ xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
}
if (xmlStrchr(name, ':') != NULL) {
xmlNsErr(ctxt, XML_NS_ERR_COLON,
- "colon are forbidden from entities names '%s'\n",
+ "colons are forbidden from entities names '%s'\n",
name, NULL, NULL);
}
skipped = SKIP_BLANKS;
@@ -7286,6 +7341,7 @@ xmlParseReference(xmlParserCtxtPtr ctxt) {
(ret != XML_WAR_UNDECLARED_ENTITY)) {
xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
"Entity '%s' failed to parse\n", ent->name);
+ xmlParserEntityCheck(ctxt, 0, ent, 0);
} else if (list != NULL) {
xmlFreeNodeList(list);
list = NULL;
@@ -7392,7 +7448,7 @@ xmlParseReference(xmlParserCtxtPtr ctxt) {
/*
* We are copying here, make sure there is no abuse
*/
- ctxt->sizeentcopy += ent->length;
+ ctxt->sizeentcopy += ent->length + 5;
if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
return;
@@ -7440,7 +7496,7 @@ xmlParseReference(xmlParserCtxtPtr ctxt) {
/*
* We are copying here, make sure there is no abuse
*/
- ctxt->sizeentcopy += ent->length;
+ ctxt->sizeentcopy += ent->length + 5;
if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
return;
@@ -7626,6 +7682,7 @@ xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
ctxt->sax->reference(ctxt->userData, name);
}
}
+ xmlParserEntityCheck(ctxt, 0, ent, 0);
ctxt->valid = 0;
}
@@ -7658,8 +7715,8 @@ xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
(ent != NULL) &&
(ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
- if ((ent->checked & 1) || ((ent->checked == 0) &&
- (ent->content != NULL) &&(xmlStrchr(ent->content, '<')))) {
+ if (((ent->checked & 1) || (ent->checked == 0)) &&
+ (ent->content != NULL) && (xmlStrchr(ent->content, '<'))) {
xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
"'<' in entity '%s' is not allowed in attributes values\n", name);
}
@@ -7753,7 +7810,7 @@ xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
/*
- * Predefined entites override any extra definition
+ * Predefined entities override any extra definition
*/
if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
ent = xmlGetPredefinedEntity(name);
@@ -7819,6 +7876,7 @@ xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
"Entity '%s' not defined\n",
name);
}
+ xmlParserEntityCheck(ctxt, 0, ent, 0);
/* TODO ? check regressions ctxt->valid = 0; */
}
@@ -7978,6 +8036,7 @@ xmlParsePEReference(xmlParserCtxtPtr ctxt)
name, NULL);
ctxt->valid = 0;
}
+ xmlParserEntityCheck(ctxt, 0, NULL, 0);
} else {
/*
* Internal checking in case the entity quest barfed
@@ -8217,6 +8276,7 @@ xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
name, NULL);
ctxt->valid = 0;
}
+ xmlParserEntityCheck(ctxt, 0, NULL, 0);
} else {
/*
* Internal checking in case the entity quest barfed
@@ -8906,9 +8966,12 @@ xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
xmlChar limit = 0;
const xmlChar *in = NULL, *start, *end, *last;
xmlChar *ret = NULL;
+ int line, col;
GROW;
in = (xmlChar *) CUR_PTR;
+ line = ctxt->input->line;
+ col = ctxt->input->col;
if (*in != '"' && *in != '\'') {
xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
return (NULL);
@@ -8921,6 +8984,7 @@ xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
* pure ASCII.
*/
limit = *in++;
+ col++;
end = ctxt->input->end;
start = in;
if (in >= end) {
@@ -8940,6 +9004,11 @@ xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
while ((in < end) && (*in != limit) &&
((*in == 0x20) || (*in == 0x9) ||
(*in == 0xA) || (*in == 0xD))) {
+ if (*in == 0xA) {
+ line++; col = 1;
+ } else {
+ col++;
+ }
in++;
start = in;
if (in >= end) {
@@ -8963,6 +9032,7 @@ xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
}
while ((in < end) && (*in != limit) && (*in >= 0x20) &&
(*in <= 0x7f) && (*in != '&') && (*in != '<')) {
+ col++;
if ((*in++ == 0x20) && (*in == 0x20)) break;
if (in >= end) {
const xmlChar *oldbase = ctxt->input->base;
@@ -8991,6 +9061,11 @@ xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
while ((in < end) && (*in != limit) &&
((*in == 0x20) || (*in == 0x9) ||
(*in == 0xA) || (*in == 0xD))) {
+ if (*in == 0xA) {
+ line++, col = 1;
+ } else {
+ col++;
+ }
in++;
if (in >= end) {
const xmlChar *oldbase = ctxt->input->base;
@@ -9023,6 +9098,7 @@ xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
while ((in < end) && (*in != limit) && (*in >= 0x20) &&
(*in <= 0x7f) && (*in != '&') && (*in != '<')) {
in++;
+ col++;
if (in >= end) {
const xmlChar *oldbase = ctxt->input->base;
GROW;
@@ -9052,6 +9128,7 @@ xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
if (*in != limit) goto need_complex;
}
in++;
+ col++;
if (len != NULL) {
*len = last - start;
ret = (xmlChar *) start;
@@ -9060,6 +9137,8 @@ xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
ret = xmlStrndup(start, last - start);
}
CUR_PTR = in;
+ ctxt->input->line = line;
+ ctxt->input->col = col;
if (alloc) *alloc = 0;
return ret;
need_complex:
@@ -9292,6 +9371,12 @@ reparse:
const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
xmlURIPtr uri;
+ if (URL == NULL) {
+ xmlErrMemory(ctxt, "dictionary allocation failure");
+ if ((attvalue != NULL) && (alloc != 0))
+ xmlFree(attvalue);
+ return(NULL);
+ }
if (*URL != 0) {
uri = xmlParseURI((const char *) URL);
if (uri == NULL) {
@@ -9335,6 +9420,13 @@ reparse:
if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
skip_default_ns:
if (alloc != 0) xmlFree(attvalue);
+ if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
+ break;
+ if (!IS_BLANK_CH(RAW)) {
+ xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
+ "attributes construct error\n");
+ break;
+ }
SKIP_BLANKS;
continue;
}
@@ -9408,6 +9500,13 @@ skip_default_ns:
if (nsPush(ctxt, attname, URL) > 0) nbNs++;
skip_ns:
if (alloc != 0) xmlFree(attvalue);
+ if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
+ break;
+ if (!IS_BLANK_CH(RAW)) {
+ xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
+ "attributes construct error\n");
+ break;
+ }
SKIP_BLANKS;
if (ctxt->input->base != base) goto base_changed;
continue;
@@ -9668,9 +9767,11 @@ xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
if (ctxt->input->cur[tlen] == '>') {
ctxt->input->cur += tlen + 1;
+ ctxt->input->col += tlen + 1;
goto done;
}
ctxt->input->cur += tlen;
+ ctxt->input->col += tlen;
name = (xmlChar*)1;
} else {
if (prefix == NULL)
@@ -10311,8 +10412,10 @@ xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
/*
* Non standard parsing, allowing the user to ignore encoding
*/
- if (ctxt->options & XML_PARSE_IGNORE_ENC)
- return(encoding);
+ if (ctxt->options & XML_PARSE_IGNORE_ENC) {
+ xmlFree((xmlChar *) encoding);
+ return(NULL);
+ }
/*
* UTF-16 encoding stwich has already taken place at this stage,
@@ -10681,6 +10784,10 @@ xmlParseDocument(xmlParserCtxtPtr ctxt) {
ctxt->sax->startDocument(ctxt->userData);
if (ctxt->instate == XML_PARSER_EOF)
return(-1);
+ if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
+ (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
+ ctxt->myDoc->compression = ctxt->input->buf->compressed;
+ }
/*
* The Misc part of the Prolog
@@ -12591,6 +12698,9 @@ xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
return(NULL);
}
+ /* We are loading a DTD */
+ ctxt->options |= XML_PARSE_DTDLOAD;
+
/*
* Set-up the SAX context
*/
@@ -12718,6 +12828,9 @@ xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
return(NULL);
}
+ /* We are loading a DTD */
+ ctxt->options |= XML_PARSE_DTDLOAD;
+
/*
* Set-up the SAX context
*/
@@ -13607,7 +13720,7 @@ xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
xmlFree((xmlChar *) ctxt->encoding);
ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
- hdlr = xmlFindCharEncodingHandler(doc->encoding);
+ hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding);
if (hdlr != NULL) {
xmlSwitchToEncoding(ctxt, hdlr);
} else {
@@ -13618,6 +13731,8 @@ xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
xmlCtxtUseOptionsInternal(ctxt, options, NULL);
xmlDetectSAX2(ctxt);
ctxt->myDoc = doc;
+ /* parsing in context, i.e. as within existing content */
+ ctxt->instate = XML_PARSER_CONTENT;
fake = xmlNewComment(NULL);
if (fake == NULL) {
@@ -13653,7 +13768,6 @@ xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
}
cur = cur->parent;
}
- ctxt->instate = XML_PARSER_CONTENT;
}
if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
@@ -14716,6 +14830,9 @@ xmlInitParser(void) {
#ifdef LIBXML_XPATH_ENABLED
xmlXPathInit();
#endif
+#ifdef LIBXML_CATALOG_ENABLED
+ xmlInitializeCatalog();
+#endif
xmlParserInitialized = 1;
#ifdef LIBXML_THREAD_ENABLED
}
@@ -14763,8 +14880,8 @@ xmlCleanupParser(void) {
xmlSchemaCleanupTypes();
xmlRelaxNGCleanupTypes();
#endif
- xmlCleanupGlobals();
xmlResetLastError();
+ xmlCleanupGlobals();
xmlCleanupThreads(); /* must be last if called not from the main thread */
xmlCleanupMemory();
xmlParserInitialized = 0;
@@ -15213,6 +15330,7 @@ xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int optio
if (cur == NULL)
return (NULL);
+ xmlInitParser();
ctxt = xmlCreateDocParserCtxt(cur);
if (ctxt == NULL)
@@ -15235,6 +15353,7 @@ xmlReadFile(const char *filename, const char *encoding, int options)
{
xmlParserCtxtPtr ctxt;
+ xmlInitParser();
ctxt = xmlCreateURLParserCtxt(filename, options);
if (ctxt == NULL)
return (NULL);
@@ -15258,6 +15377,7 @@ xmlReadMemory(const char *buffer, int size, const char *URL, const char *encodin
{
xmlParserCtxtPtr ctxt;
+ xmlInitParser();
ctxt = xmlCreateMemoryParserCtxt(buffer, size);
if (ctxt == NULL)
return (NULL);
@@ -15286,6 +15406,7 @@ xmlReadFd(int fd, const char *URL, const char *encoding, int options)
if (fd < 0)
return (NULL);
+ xmlInitParser();
input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
if (input == NULL)
@@ -15329,6 +15450,7 @@ xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
if (ioread == NULL)
return (NULL);
+ xmlInitParser();
input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
XML_CHAR_ENCODING_NONE);
@@ -15375,6 +15497,7 @@ xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
return (NULL);
if (ctxt == NULL)
return (NULL);
+ xmlInitParser();
xmlCtxtReset(ctxt);
@@ -15408,6 +15531,7 @@ xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
return (NULL);
if (ctxt == NULL)
return (NULL);
+ xmlInitParser();
xmlCtxtReset(ctxt);
@@ -15444,6 +15568,7 @@ xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
return (NULL);
if (buffer == NULL)
return (NULL);
+ xmlInitParser();
xmlCtxtReset(ctxt);
@@ -15488,6 +15613,7 @@ xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
return (NULL);
if (ctxt == NULL)
return (NULL);
+ xmlInitParser();
xmlCtxtReset(ctxt);
@@ -15533,6 +15659,7 @@ xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
return (NULL);
if (ctxt == NULL)
return (NULL);
+ xmlInitParser();
xmlCtxtReset(ctxt);