diff options
Diffstat (limited to 'HTMLparser.c')
-rw-r--r-- | HTMLparser.c | 184 |
1 files changed, 173 insertions, 11 deletions
diff --git a/HTMLparser.c b/HTMLparser.c index 10f8516..947e4aa 100644 --- a/HTMLparser.c +++ b/HTMLparser.c @@ -2168,11 +2168,11 @@ htmlParseHTMLName(htmlParserCtxtPtr ctxt) { int i = 0; xmlChar loc[HTML_PARSER_BUFFER_SIZE]; - if (!IS_LETTER_CH(CUR) && (CUR != '_') && + if (!IS_ASCII_LETTER(CUR) && (CUR != '_') && (CUR != ':')) return(NULL); while ((i < HTML_PARSER_BUFFER_SIZE) && - ((IS_LETTER_CH(CUR)) || (IS_DIGIT_CH(CUR)) || + ((IS_ASCII_LETTER(CUR)) || (IS_ASCII_DIGIT(CUR)) || (CUR == ':') || (CUR == '-') || (CUR == '_'))) { if ((CUR >= 'A') && (CUR <= 'Z')) loc[i] = CUR + 0x20; else loc[i] = CUR; @@ -2809,6 +2809,117 @@ htmlParseExternalID(htmlParserCtxtPtr ctxt, xmlChar **publicID) { } /** + * xmlParsePI: + * @ctxt: an XML parser context + * + * parse an XML Processing Instruction. + * + * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' + */ +static void +htmlParsePI(htmlParserCtxtPtr ctxt) { + xmlChar *buf = NULL; + int len = 0; + int size = HTML_PARSER_BUFFER_SIZE; + int cur, l; + const xmlChar *target; + xmlParserInputState state; + int count = 0; + + if ((RAW == '<') && (NXT(1) == '?')) { + state = ctxt->instate; + ctxt->instate = XML_PARSER_PI; + /* + * this is a Processing Instruction. + */ + SKIP(2); + SHRINK; + + /* + * Parse the target name and check for special support like + * namespace. + */ + target = htmlParseName(ctxt); + if (target != NULL) { + if (RAW == '>') { + SKIP(1); + + /* + * SAX: PI detected. + */ + if ((ctxt->sax) && (!ctxt->disableSAX) && + (ctxt->sax->processingInstruction != NULL)) + ctxt->sax->processingInstruction(ctxt->userData, + target, NULL); + ctxt->instate = state; + return; + } + buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); + if (buf == NULL) { + htmlErrMemory(ctxt, NULL); + ctxt->instate = state; + return; + } + cur = CUR; + if (!IS_BLANK(cur)) { + htmlParseErr(ctxt, XML_ERR_SPACE_REQUIRED, + "ParsePI: PI %s space expected\n", target, NULL); + } + SKIP_BLANKS; + cur = CUR_CHAR(l); + while (IS_CHAR(cur) && (cur != '>')) { + if (len + 5 >= size) { + xmlChar *tmp; + + size *= 2; + tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); + if (tmp == NULL) { + htmlErrMemory(ctxt, NULL); + xmlFree(buf); + ctxt->instate = state; + return; + } + buf = tmp; + } + count++; + if (count > 50) { + GROW; + count = 0; + } + COPY_BUF(l,buf,len,cur); + NEXTL(l); + cur = CUR_CHAR(l); + if (cur == 0) { + SHRINK; + GROW; + cur = CUR_CHAR(l); + } + } + buf[len] = 0; + if (cur != '>') { + htmlParseErr(ctxt, XML_ERR_PI_NOT_FINISHED, + "ParsePI: PI %s never end ...\n", target, NULL); + } else { + SKIP(1); + + /* + * SAX: PI detected. + */ + if ((ctxt->sax) && (!ctxt->disableSAX) && + (ctxt->sax->processingInstruction != NULL)) + ctxt->sax->processingInstruction(ctxt->userData, + target, buf); + } + xmlFree(buf); + } else { + htmlParseErr(ctxt, XML_ERR_PI_NOT_STARTED, + "PI is not started correctly", NULL, NULL); + } + ctxt->instate = state; + } +} + +/** * htmlParseComment: * @ctxt: an HTML parser context * @@ -3643,14 +3754,21 @@ htmlParseContent(htmlParserCtxtPtr ctxt) { } /* - * Second case : a sub-element. + * Second case : a Processing Instruction. + */ + else if ((CUR == '<') && (NXT(1) == '?')) { + htmlParsePI(ctxt); + } + + /* + * Third case : a sub-element. */ else if (CUR == '<') { htmlParseElement(ctxt); } /* - * Third case : a reference. If if has not been resolved, + * Fourth case : a reference. If if has not been resolved, * parsing returns it's Name, create the node */ else if (CUR == '&') { @@ -3658,7 +3776,7 @@ htmlParseContent(htmlParserCtxtPtr ctxt) { } /* - * Fourth : end of the resource + * Fifth case : end of the resource */ else if (CUR == 0) { htmlAutoCloseOnEnd(ctxt); @@ -3852,11 +3970,13 @@ htmlParseDocument(htmlParserCtxtPtr ctxt) { /* - * Parse possible comments before any content + * Parse possible comments and PIs before any content */ - while ((CUR == '<') && (NXT(1) == '!') && - (NXT(2) == '-') && (NXT(3) == '-')) { + while (((CUR == '<') && (NXT(1) == '!') && + (NXT(2) == '-') && (NXT(3) == '-')) || + ((CUR == '<') && (NXT(1) == '?'))) { htmlParseComment(ctxt); + htmlParsePI(ctxt); SKIP_BLANKS; } @@ -3875,11 +3995,13 @@ htmlParseDocument(htmlParserCtxtPtr ctxt) { SKIP_BLANKS; /* - * Parse possible comments before any content + * Parse possible comments and PIs before any content */ - while ((CUR == '<') && (NXT(1) == '!') && - (NXT(2) == '-') && (NXT(3) == '-')) { + while (((CUR == '<') && (NXT(1) == '!') && + (NXT(2) == '-') && (NXT(3) == '-')) || + ((CUR == '<') && (NXT(1) == '?'))) { htmlParseComment(ctxt); + htmlParsePI(ctxt); SKIP_BLANKS; } @@ -4444,6 +4566,16 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { #endif htmlParseComment(ctxt); ctxt->instate = XML_PARSER_MISC; + } else if ((cur == '<') && (next == '?')) { + if ((!terminate) && + (htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0)) + goto done; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "HPP: Parsing PI\n"); +#endif + htmlParsePI(ctxt); + ctxt->instate = XML_PARSER_MISC; } else if ((cur == '<') && (next == '!') && (UPP(2) == 'D') && (UPP(3) == 'O') && (UPP(4) == 'C') && (UPP(5) == 'T') && @@ -4494,6 +4626,16 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { #endif htmlParseComment(ctxt); ctxt->instate = XML_PARSER_PROLOG; + } else if ((cur == '<') && (next == '?')) { + if ((!terminate) && + (htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0)) + goto done; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "HPP: Parsing PI\n"); +#endif + htmlParsePI(ctxt); + ctxt->instate = XML_PARSER_PROLOG; } else if ((cur == '<') && (next == '!') && (avail < 4)) { goto done; @@ -4531,6 +4673,16 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { #endif htmlParseComment(ctxt); ctxt->instate = XML_PARSER_EPILOG; + } else if ((cur == '<') && (next == '?')) { + if ((!terminate) && + (htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0)) + goto done; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "HPP: Parsing PI\n"); +#endif + htmlParsePI(ctxt); + ctxt->instate = XML_PARSER_EPILOG; } else if ((cur == '<') && (next == '!') && (avail < 4)) { goto done; @@ -4737,6 +4889,16 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { #endif htmlParseComment(ctxt); ctxt->instate = XML_PARSER_CONTENT; + } else if ((cur == '<') && (next == '?')) { + if ((!terminate) && + (htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0)) + goto done; +#ifdef DEBUG_PUSH + xmlGenericError(xmlGenericErrorContext, + "HPP: Parsing PI\n"); +#endif + htmlParsePI(ctxt); + ctxt->instate = XML_PARSER_CONTENT; } else if ((cur == '<') && (next == '!') && (avail < 4)) { goto done; } else if ((cur == '<') && (next == '/')) { |