summaryrefslogtreecommitdiff
path: root/parser.c
diff options
context:
space:
mode:
authorMike Hommey <mh@glandium.org>2004-07-06 12:57:17 +0000
committerMike Hommey <mh@glandium.org>2004-07-06 12:57:17 +0000
commitc14c53a3645d81281058d4bb4cff24fa8d6faf33 (patch)
tree29bccc2e7499af078a3d1cdcfb517a1dee891be5 /parser.c
parentd4e028c96af89ade493b440d4f2de6b684c03a06 (diff)
downloadlibxml2-c14c53a3645d81281058d4bb4cff24fa8d6faf33.tar.gz
Load /tmp/tmp.DIvcnD/libxml2-2.6.11 intoupstream/2.6.11
packages/libxml2/branches/upstream/current.
Diffstat (limited to 'parser.c')
-rw-r--r--parser.c109
1 files changed, 79 insertions, 30 deletions
diff --git a/parser.c b/parser.c
index d038e46..77a12f5 100644
--- a/parser.c
+++ b/parser.c
@@ -1039,6 +1039,8 @@ nodePop(xmlParserCtxtPtr ctxt)
ctxt->nodeTab[ctxt->nodeNr] = 0;
return (ret);
}
+
+#ifdef LIBXML_PUSH_ENABLED
/**
* nameNsPush:
* @ctxt: an XML parser context
@@ -1109,6 +1111,7 @@ nameNsPop(xmlParserCtxtPtr ctxt)
ctxt->nameTab[ctxt->nameNr] = NULL;
return (ret);
}
+#endif /* LIBXML_PUSH_ENABLED */
/**
* namePush:
@@ -1456,6 +1459,7 @@ int
xmlParseCharRef(xmlParserCtxtPtr ctxt) {
unsigned int val = 0;
int count = 0;
+ unsigned int outofrange = 0;
/*
* Using RAW/CUR/NEXT is okay since we are working on ASCII range here
@@ -1480,6 +1484,9 @@ xmlParseCharRef(xmlParserCtxtPtr ctxt) {
val = 0;
break;
}
+ if (val > 0x10FFFF)
+ outofrange = val;
+
NEXT;
count++;
}
@@ -1504,6 +1511,9 @@ xmlParseCharRef(xmlParserCtxtPtr ctxt) {
val = 0;
break;
}
+ if (val > 0x10FFFF)
+ outofrange = val;
+
NEXT;
count++;
}
@@ -1522,7 +1532,7 @@ xmlParseCharRef(xmlParserCtxtPtr ctxt) {
* Characters referred to using character references must match the
* production for Char.
*/
- if (IS_CHAR(val)) {
+ if ((IS_CHAR(val) && (outofrange == 0))) {
return(val);
} else {
xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
@@ -1554,7 +1564,8 @@ static int
xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
const xmlChar *ptr;
xmlChar cur;
- int val = 0;
+ unsigned int val = 0;
+ unsigned int outofrange = 0;
if ((str == NULL) || (*str == NULL)) return(0);
ptr = *str;
@@ -1574,6 +1585,9 @@ xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
val = 0;
break;
}
+ if (val > 0x10FFFF)
+ outofrange = val;
+
ptr++;
cur = *ptr;
}
@@ -1590,6 +1604,9 @@ xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
val = 0;
break;
}
+ if (val > 0x10FFFF)
+ outofrange = val;
+
ptr++;
cur = *ptr;
}
@@ -1606,7 +1623,7 @@ xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
* Characters referred to using character references must match the
* production for Char.
*/
- if (IS_CHAR(val)) {
+ if ((IS_CHAR(val) && (outofrange == 0))) {
return(val);
} else {
xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
@@ -5500,8 +5517,9 @@ xmlParseReference(xmlParserCtxtPtr ctxt) {
* Prune it directly in the generated document
* except for single text nodes.
*/
- if ((list->type == XML_TEXT_NODE) &&
- (list->next == NULL)) {
+ if (((list->type == XML_TEXT_NODE) &&
+ (list->next == NULL)) ||
+ (ctxt->parseMode == XML_PARSE_READER)) {
list->parent = (xmlNodePtr) ent;
list = NULL;
ent->owner = 1;
@@ -5554,10 +5572,21 @@ xmlParseReference(xmlParserCtxtPtr ctxt) {
/*
* Seems we are generating the DOM content, do
* a simple tree copy for all references except the first
- * In the first occurrence list contains the replacement
+ * In the first occurrence list contains the replacement.
+ * progressive == 2 means we are operating on the Reader
+ * and since nodes are discarded we must copy all the time.
*/
- if ((list == NULL) && (ent->owner == 0)) {
+ if (((list == NULL) && (ent->owner == 0)) ||
+ (ctxt->parseMode == XML_PARSE_READER)) {
xmlNodePtr nw = NULL, cur, firstChild = NULL;
+
+ /*
+ * when operating on a reader, the entities definitions
+ * are always owning the entities subtree.
+ if (ctxt->parseMode == XML_PARSE_READER)
+ ent->owner = 1;
+ */
+
cur = ent->children;
while (cur != NULL) {
nw = xmlCopyNode(cur, 1);
@@ -5566,10 +5595,20 @@ xmlParseReference(xmlParserCtxtPtr ctxt) {
if (firstChild == NULL){
firstChild = nw;
}
- xmlAddChild(ctxt->node, nw);
+ nw = xmlAddChild(ctxt->node, nw);
}
- if (cur == ent->last)
+ if (cur == ent->last) {
+ /*
+ * needed to detect some strange empty
+ * node cases in the reader tests
+ */
+ if ((ctxt->parseMode == XML_PARSE_READER) &&
+ (nw->type == XML_ELEMENT_NODE) &&
+ (nw->children == NULL))
+ nw->extra = 1;
+
break;
+ }
cur = cur->next;
}
#ifdef LIBXML_LEGACY_ENABLED
@@ -8776,32 +8815,40 @@ xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
"Internal error: xmlParseGetLasts\n");
return;
}
- if ((ctxt->progressive == 1) && (ctxt->inputNr == 1)) {
+ if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
tmp = ctxt->input->end;
tmp--;
- while ((tmp >= ctxt->input->base) && (*tmp != '<') &&
- (*tmp != '>')) tmp--;
+ while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
if (tmp < ctxt->input->base) {
*lastlt = NULL;
*lastgt = NULL;
- } else if (*tmp == '<') {
+ } else {
*lastlt = tmp;
- tmp--;
- while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
- if (tmp < ctxt->input->base)
- *lastgt = NULL;
- else
+ tmp++;
+ while ((tmp < ctxt->input->end) && (*tmp != '>')) {
+ if (*tmp == '\'') {
+ tmp++;
+ while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
+ if (tmp < ctxt->input->end) tmp++;
+ } else if (*tmp == '"') {
+ tmp++;
+ while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
+ if (tmp < ctxt->input->end) tmp++;
+ } else
+ tmp++;
+ }
+ if (tmp < ctxt->input->end)
*lastgt = tmp;
- } else {
- *lastgt = tmp;
- tmp--;
- while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
- if (tmp < ctxt->input->base)
- *lastlt = NULL;
- else
- *lastlt = tmp;
+ else {
+ tmp = *lastlt;
+ tmp--;
+ while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
+ if (tmp >= ctxt->input->base)
+ *lastgt = tmp;
+ else
+ *lastgt = NULL;
+ }
}
-
} else {
*lastlt = NULL;
*lastgt = NULL;
@@ -9057,7 +9104,7 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
if (!terminate) {
if (ctxt->progressive) {
/* > can be found unescaped in attribute values */
- if ((lastlt == NULL) || (ctxt->input->cur >= lastlt))
+ if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
goto done;
} else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
goto done;
@@ -9235,7 +9282,8 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
goto done;
if (!terminate) {
if (ctxt->progressive) {
- if ((lastgt == NULL) || (ctxt->input->cur > lastgt))
+ /* > can be found unescaped in attribute values */
+ if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
goto done;
} else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
goto done;
@@ -9423,7 +9471,8 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
goto done;
} else {
ctxt->instate = XML_PARSER_START_TAG;
- ctxt->progressive = 1;
+ if (ctxt->progressive == 0)
+ ctxt->progressive = 1;
xmlParseGetLasts(ctxt, &lastlt, &lastgt);
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext,