diff options
Diffstat (limited to 'HTMLparser.c')
-rw-r--r-- | HTMLparser.c | 23 |
1 files changed, 21 insertions, 2 deletions
diff --git a/HTMLparser.c b/HTMLparser.c index 92503a1..57e64df 100644 --- a/HTMLparser.c +++ b/HTMLparser.c @@ -2143,6 +2143,7 @@ htmlNewDocNoDtD(const xmlChar *URI, const xmlChar *ExternalID) { cur->refs = NULL; cur->_private = NULL; cur->charset = XML_CHAR_ENCODING_UTF8; + cur->properties = XML_DOC_HTML | XML_DOC_USERBUILT; if ((ExternalID != NULL) || (URI != NULL)) xmlCreateIntSubset(cur, BAD_CAST "html", ExternalID, URI); @@ -3482,6 +3483,7 @@ htmlParseStartTag(htmlParserCtxtPtr ctxt) { "htmlParseStartTag: misplaced <html> tag\n", name, NULL); discardtag = 1; + ctxt->depth++; } if ((ctxt->nameNr != 1) && (xmlStrEqual(name, BAD_CAST"head"))) { @@ -3489,6 +3491,7 @@ htmlParseStartTag(htmlParserCtxtPtr ctxt) { "htmlParseStartTag: misplaced <head> tag\n", name, NULL); discardtag = 1; + ctxt->depth++; } if (xmlStrEqual(name, BAD_CAST"body")) { int indx; @@ -3498,6 +3501,7 @@ htmlParseStartTag(htmlParserCtxtPtr ctxt) { "htmlParseStartTag: misplaced <body> tag\n", name, NULL); discardtag = 1; + ctxt->depth++; } } } @@ -3648,7 +3652,6 @@ htmlParseEndTag(htmlParserCtxtPtr ctxt) name = htmlParseHTMLName(ctxt); if (name == NULL) return (0); - /* * We should definitely be at the ending "S? '>'" part */ @@ -3669,6 +3672,18 @@ htmlParseEndTag(htmlParserCtxtPtr ctxt) NEXT; /* + * if we ignored misplaced tags in htmlParseStartTag don't pop them + * out now. + */ + if ((ctxt->depth > 0) && + (xmlStrEqual(name, BAD_CAST "html") || + xmlStrEqual(name, BAD_CAST "body") || + xmlStrEqual(name, BAD_CAST "head"))) { + ctxt->depth--; + return (0); + } + + /* * If the name read is not one of the element in the parsing stack * then return, it's just an error. */ @@ -5958,8 +5973,12 @@ htmlDoRead(htmlParserCtxtPtr ctxt, const char *URL, const char *encoding, xmlCharEncodingHandlerPtr hdlr; hdlr = xmlFindCharEncodingHandler(encoding); - if (hdlr != NULL) + if (hdlr != NULL) { xmlSwitchToEncoding(ctxt, hdlr); + if (ctxt->input->encoding != NULL) + xmlFree((xmlChar *) ctxt->input->encoding); + ctxt->input->encoding = xmlStrdup((xmlChar *)encoding); + } } if ((URL != NULL) && (ctxt->input != NULL) && (ctxt->input->filename == NULL)) |