summaryrefslogtreecommitdiff
path: root/HTMLparser.c
diff options
context:
space:
mode:
Diffstat (limited to 'HTMLparser.c')
-rw-r--r--HTMLparser.c184
1 files changed, 173 insertions, 11 deletions
diff --git a/HTMLparser.c b/HTMLparser.c
index 10f8516..947e4aa 100644
--- a/HTMLparser.c
+++ b/HTMLparser.c
@@ -2168,11 +2168,11 @@ htmlParseHTMLName(htmlParserCtxtPtr ctxt) {
int i = 0;
xmlChar loc[HTML_PARSER_BUFFER_SIZE];
- if (!IS_LETTER_CH(CUR) && (CUR != '_') &&
+ if (!IS_ASCII_LETTER(CUR) && (CUR != '_') &&
(CUR != ':')) return(NULL);
while ((i < HTML_PARSER_BUFFER_SIZE) &&
- ((IS_LETTER_CH(CUR)) || (IS_DIGIT_CH(CUR)) ||
+ ((IS_ASCII_LETTER(CUR)) || (IS_ASCII_DIGIT(CUR)) ||
(CUR == ':') || (CUR == '-') || (CUR == '_'))) {
if ((CUR >= 'A') && (CUR <= 'Z')) loc[i] = CUR + 0x20;
else loc[i] = CUR;
@@ -2809,6 +2809,117 @@ htmlParseExternalID(htmlParserCtxtPtr ctxt, xmlChar **publicID) {
}
/**
+ * xmlParsePI:
+ * @ctxt: an XML parser context
+ *
+ * parse an XML Processing Instruction.
+ *
+ * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
+ */
+static void
+htmlParsePI(htmlParserCtxtPtr ctxt) {
+ xmlChar *buf = NULL;
+ int len = 0;
+ int size = HTML_PARSER_BUFFER_SIZE;
+ int cur, l;
+ const xmlChar *target;
+ xmlParserInputState state;
+ int count = 0;
+
+ if ((RAW == '<') && (NXT(1) == '?')) {
+ state = ctxt->instate;
+ ctxt->instate = XML_PARSER_PI;
+ /*
+ * this is a Processing Instruction.
+ */
+ SKIP(2);
+ SHRINK;
+
+ /*
+ * Parse the target name and check for special support like
+ * namespace.
+ */
+ target = htmlParseName(ctxt);
+ if (target != NULL) {
+ if (RAW == '>') {
+ SKIP(1);
+
+ /*
+ * SAX: PI detected.
+ */
+ if ((ctxt->sax) && (!ctxt->disableSAX) &&
+ (ctxt->sax->processingInstruction != NULL))
+ ctxt->sax->processingInstruction(ctxt->userData,
+ target, NULL);
+ ctxt->instate = state;
+ return;
+ }
+ buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
+ if (buf == NULL) {
+ htmlErrMemory(ctxt, NULL);
+ ctxt->instate = state;
+ return;
+ }
+ cur = CUR;
+ if (!IS_BLANK(cur)) {
+ htmlParseErr(ctxt, XML_ERR_SPACE_REQUIRED,
+ "ParsePI: PI %s space expected\n", target, NULL);
+ }
+ SKIP_BLANKS;
+ cur = CUR_CHAR(l);
+ while (IS_CHAR(cur) && (cur != '>')) {
+ if (len + 5 >= size) {
+ xmlChar *tmp;
+
+ size *= 2;
+ tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
+ if (tmp == NULL) {
+ htmlErrMemory(ctxt, NULL);
+ xmlFree(buf);
+ ctxt->instate = state;
+ return;
+ }
+ buf = tmp;
+ }
+ count++;
+ if (count > 50) {
+ GROW;
+ count = 0;
+ }
+ COPY_BUF(l,buf,len,cur);
+ NEXTL(l);
+ cur = CUR_CHAR(l);
+ if (cur == 0) {
+ SHRINK;
+ GROW;
+ cur = CUR_CHAR(l);
+ }
+ }
+ buf[len] = 0;
+ if (cur != '>') {
+ htmlParseErr(ctxt, XML_ERR_PI_NOT_FINISHED,
+ "ParsePI: PI %s never end ...\n", target, NULL);
+ } else {
+ SKIP(1);
+
+ /*
+ * SAX: PI detected.
+ */
+ if ((ctxt->sax) && (!ctxt->disableSAX) &&
+ (ctxt->sax->processingInstruction != NULL))
+ ctxt->sax->processingInstruction(ctxt->userData,
+ target, buf);
+ }
+ xmlFree(buf);
+ } else {
+ htmlParseErr(ctxt, XML_ERR_PI_NOT_STARTED,
+ "PI is not started correctly", NULL, NULL);
+ }
+ ctxt->instate = state;
+ }
+}
+
+/**
* htmlParseComment:
* @ctxt: an HTML parser context
*
@@ -3643,14 +3754,21 @@ htmlParseContent(htmlParserCtxtPtr ctxt) {
}
/*
- * Second case : a sub-element.
+ * Second case : a Processing Instruction.
+ */
+ else if ((CUR == '<') && (NXT(1) == '?')) {
+ htmlParsePI(ctxt);
+ }
+
+ /*
+ * Third case : a sub-element.
*/
else if (CUR == '<') {
htmlParseElement(ctxt);
}
/*
- * Third case : a reference. If if has not been resolved,
+ * Fourth case : a reference. If if has not been resolved,
* parsing returns it's Name, create the node
*/
else if (CUR == '&') {
@@ -3658,7 +3776,7 @@ htmlParseContent(htmlParserCtxtPtr ctxt) {
}
/*
- * Fourth : end of the resource
+ * Fifth case : end of the resource
*/
else if (CUR == 0) {
htmlAutoCloseOnEnd(ctxt);
@@ -3852,11 +3970,13 @@ htmlParseDocument(htmlParserCtxtPtr ctxt) {
/*
- * Parse possible comments before any content
+ * Parse possible comments and PIs before any content
*/
- while ((CUR == '<') && (NXT(1) == '!') &&
- (NXT(2) == '-') && (NXT(3) == '-')) {
+ while (((CUR == '<') && (NXT(1) == '!') &&
+ (NXT(2) == '-') && (NXT(3) == '-')) ||
+ ((CUR == '<') && (NXT(1) == '?'))) {
htmlParseComment(ctxt);
+ htmlParsePI(ctxt);
SKIP_BLANKS;
}
@@ -3875,11 +3995,13 @@ htmlParseDocument(htmlParserCtxtPtr ctxt) {
SKIP_BLANKS;
/*
- * Parse possible comments before any content
+ * Parse possible comments and PIs before any content
*/
- while ((CUR == '<') && (NXT(1) == '!') &&
- (NXT(2) == '-') && (NXT(3) == '-')) {
+ while (((CUR == '<') && (NXT(1) == '!') &&
+ (NXT(2) == '-') && (NXT(3) == '-')) ||
+ ((CUR == '<') && (NXT(1) == '?'))) {
htmlParseComment(ctxt);
+ htmlParsePI(ctxt);
SKIP_BLANKS;
}
@@ -4444,6 +4566,16 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
#endif
htmlParseComment(ctxt);
ctxt->instate = XML_PARSER_MISC;
+ } else if ((cur == '<') && (next == '?')) {
+ if ((!terminate) &&
+ (htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0))
+ goto done;
+#ifdef DEBUG_PUSH
+ xmlGenericError(xmlGenericErrorContext,
+ "HPP: Parsing PI\n");
+#endif
+ htmlParsePI(ctxt);
+ ctxt->instate = XML_PARSER_MISC;
} else if ((cur == '<') && (next == '!') &&
(UPP(2) == 'D') && (UPP(3) == 'O') &&
(UPP(4) == 'C') && (UPP(5) == 'T') &&
@@ -4494,6 +4626,16 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
#endif
htmlParseComment(ctxt);
ctxt->instate = XML_PARSER_PROLOG;
+ } else if ((cur == '<') && (next == '?')) {
+ if ((!terminate) &&
+ (htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0))
+ goto done;
+#ifdef DEBUG_PUSH
+ xmlGenericError(xmlGenericErrorContext,
+ "HPP: Parsing PI\n");
+#endif
+ htmlParsePI(ctxt);
+ ctxt->instate = XML_PARSER_PROLOG;
} else if ((cur == '<') && (next == '!') &&
(avail < 4)) {
goto done;
@@ -4531,6 +4673,16 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
#endif
htmlParseComment(ctxt);
ctxt->instate = XML_PARSER_EPILOG;
+ } else if ((cur == '<') && (next == '?')) {
+ if ((!terminate) &&
+ (htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0))
+ goto done;
+#ifdef DEBUG_PUSH
+ xmlGenericError(xmlGenericErrorContext,
+ "HPP: Parsing PI\n");
+#endif
+ htmlParsePI(ctxt);
+ ctxt->instate = XML_PARSER_EPILOG;
} else if ((cur == '<') && (next == '!') &&
(avail < 4)) {
goto done;
@@ -4737,6 +4889,16 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
#endif
htmlParseComment(ctxt);
ctxt->instate = XML_PARSER_CONTENT;
+ } else if ((cur == '<') && (next == '?')) {
+ if ((!terminate) &&
+ (htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0))
+ goto done;
+#ifdef DEBUG_PUSH
+ xmlGenericError(xmlGenericErrorContext,
+ "HPP: Parsing PI\n");
+#endif
+ htmlParsePI(ctxt);
+ ctxt->instate = XML_PARSER_CONTENT;
} else if ((cur == '<') && (next == '!') && (avail < 4)) {
goto done;
} else if ((cur == '<') && (next == '/')) {