diff options
Diffstat (limited to 'HTMLparser.c')
-rw-r--r-- | HTMLparser.c | 140 |
1 files changed, 48 insertions, 92 deletions
diff --git a/HTMLparser.c b/HTMLparser.c index dd0c1ea..66ff17b 100644 --- a/HTMLparser.c +++ b/HTMLparser.c @@ -44,9 +44,6 @@ #include <libxml/globals.h> #include <libxml/uri.h> -#include "buf.h" -#include "enc.h" - #define HTML_MAX_NAMELEN 1000 #define HTML_PARSER_BIG_BUFFER_SIZE 1000 #define HTML_PARSER_BUFFER_SIZE 100 @@ -1085,7 +1082,7 @@ static const char * const htmlStartClose[] = { "div", "p", "head", NULL, "noscript", "p", NULL, "center", "font", "b", "i", "p", "head", NULL, -"a", "a", "head", NULL, +"a", "a", NULL, "caption", "p", NULL, "colgroup", "caption", "colgroup", "col", "p", NULL, "col", "caption", "col", "p", NULL, @@ -1103,43 +1100,6 @@ static const char * const htmlStartClose[] = { "option", "option", NULL, "fieldset", "legend", "p", "head", "h1", "h2", "h3", "h4", "h5", "h6", "pre", "listing", "xmp", "a", NULL, -/* most tags in in FONTSTYLE, PHRASE and SPECIAL should close <head> */ -"tt", "head", NULL, -"i", "head", NULL, -"b", "head", NULL, -"u", "head", NULL, -"s", "head", NULL, -"strike", "head", NULL, -"big", "head", NULL, -"small", "head", NULL, - -"em", "head", NULL, -"strong", "head", NULL, -"dfn", "head", NULL, -"code", "head", NULL, -"samp", "head", NULL, -"kbd", "head", NULL, -"var", "head", NULL, -"cite", "head", NULL, -"abbr", "head", NULL, -"acronym", "head", NULL, - -/* "a" */ -"img", "head", NULL, -/* "applet" */ -/* "embed" */ -/* "object" */ -"font", "head", NULL, -/* "basefont" */ -"br", "head", NULL, -/* "script" */ -"map", "head", NULL, -"q", "head", NULL, -"sub", "head", NULL, -"sup", "head", NULL, -"span", "head", NULL, -"bdo", "head", NULL, -"iframe", "head", NULL, NULL }; @@ -2981,14 +2941,9 @@ htmlParseCharData(htmlParserCtxtPtr ctxt) { */ if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { if (areBlanks(ctxt, buf, nbchar)) { - if (ctxt->keepBlanks) { - if (ctxt->sax->characters != NULL) - ctxt->sax->characters(ctxt->userData, buf, nbchar); - } else { - if (ctxt->sax->ignorableWhitespace != NULL) - ctxt->sax->ignorableWhitespace(ctxt->userData, - buf, nbchar); - } + if (ctxt->sax->ignorableWhitespace != NULL) + ctxt->sax->ignorableWhitespace(ctxt->userData, + buf, nbchar); } else { htmlCheckParagraph(ctxt); if (ctxt->sax->characters != NULL) @@ -3019,14 +2974,8 @@ htmlParseCharData(htmlParserCtxtPtr ctxt) { */ if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { if (areBlanks(ctxt, buf, nbchar)) { - if (ctxt->keepBlanks) { - if (ctxt->sax->characters != NULL) - ctxt->sax->characters(ctxt->userData, buf, nbchar); - } else { - if (ctxt->sax->ignorableWhitespace != NULL) - ctxt->sax->ignorableWhitespace(ctxt->userData, - buf, nbchar); - } + if (ctxt->sax->ignorableWhitespace != NULL) + ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar); } else { htmlCheckParagraph(ctxt); if (ctxt->sax->characters != NULL) @@ -3560,14 +3509,19 @@ htmlCheckEncodingDirect(htmlParserCtxtPtr ctxt, const xmlChar *encoding) { * convert as much as possible to the parser reading buffer. */ processed = ctxt->input->cur - ctxt->input->base; - xmlBufShrink(ctxt->input->buf->buffer, processed); - nbchars = xmlCharEncInput(ctxt->input->buf, 1); + xmlBufferShrink(ctxt->input->buf->buffer, processed); + nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder, + ctxt->input->buf->buffer, + ctxt->input->buf->raw); if (nbchars < 0) { htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING, "htmlCheckEncoding: encoder error\n", NULL, NULL); } - xmlBufResetInput(ctxt->input->buf->buffer, ctxt->input); + ctxt->input->base = + ctxt->input->cur = ctxt->input->buf->buffer->content; + ctxt->input->end = + &ctxt->input->base[ctxt->input->buf->buffer->use]; } } } @@ -4952,7 +4906,9 @@ htmlCreateMemoryParserCtxt(const char *buffer, int size) { input->filename = NULL; input->buf = buf; - xmlBufResetInput(buf->buffer, input); + input->base = input->buf->buffer->content; + input->cur = input->buf->buffer->content; + input->end = &input->buf->buffer->content[input->buf->buffer->use]; inputPush(ctxt, input); return(ctxt); @@ -5069,8 +5025,8 @@ htmlParseLookupSequence(htmlParserCtxtPtr ctxt, xmlChar first, buf = in->base; len = in->length; } else { - buf = xmlBufContent(in->buf->buffer); - len = xmlBufUse(in->buf->buffer); + buf = in->buf->buffer->content; + len = in->buf->buffer->use; } /* take into account the sequence length */ @@ -5162,13 +5118,13 @@ htmlParseLookupSequence(htmlParserCtxtPtr ctxt, xmlChar first, * @stop: Array of chars, which stop the lookup. * @stopLen: Length of stop-Array * - * Try to find if any char of the stop-Array is available in the input + * Try to find if any char of the stop-Array is available in the input * stream. * This function has a side effect of (possibly) incrementing ctxt->checkIndex * to avoid rescanning sequences of bytes, it DOES change the state of the * parser, do not use liberally. * - * Returns the index to the current parsing point if a stopChar + * Returns the index to the current parsing point if a stopChar * is available, -1 otherwise. */ static int @@ -5196,8 +5152,8 @@ htmlParseLookupChars(htmlParserCtxtPtr ctxt, const xmlChar * stop, buf = in->base; len = in->length; } else { - buf = xmlBufContent(in->buf->buffer); - len = xmlBufUse(in->buf->buffer); + buf = in->buf->buffer->content; + len = in->buf->buffer->use; } for (; base < len; base++) { @@ -5308,7 +5264,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { if (in->buf == NULL) avail = in->length - (in->cur - in->base); else - avail = xmlBufUse(in->buf->buffer) - (in->cur - in->base); + avail = in->buf->buffer->use - (in->cur - in->base); if ((avail == 0) && (terminate)) { htmlAutoCloseOnEnd(ctxt); if ((ctxt->nameNr == 0) && (ctxt->instate != XML_PARSER_EOF)) { @@ -5344,7 +5300,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { if (in->buf == NULL) avail = in->length - (in->cur - in->base); else - avail = xmlBufUse(in->buf->buffer) - (in->cur - in->base); + avail = in->buf->buffer->use - (in->cur - in->base); } if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) ctxt->sax->setDocumentLocator(ctxt->userData, @@ -5386,7 +5342,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { if (in->buf == NULL) avail = in->length - (in->cur - in->base); else - avail = xmlBufUse(in->buf->buffer) - (in->cur - in->base); + avail = in->buf->buffer->use - (in->cur - in->base); /* * no chars in buffer */ @@ -5459,7 +5415,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { if (in->buf == NULL) avail = in->length - (in->cur - in->base); else - avail = xmlBufUse(in->buf->buffer) - (in->cur - in->base); + avail = in->buf->buffer->use - (in->cur - in->base); if (avail < 2) goto done; cur = in->cur[0]; @@ -5500,7 +5456,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { if (in->buf == NULL) avail = in->length - (in->cur - in->base); else - avail = xmlBufUse(in->buf->buffer) - (in->cur - in->base); + avail = in->buf->buffer->use - (in->cur - in->base); if (avail < 1) goto done; cur = in->cur[0]; @@ -5698,15 +5654,9 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { if ((cur != '<') && (cur != '&')) { if (ctxt->sax != NULL) { if (IS_BLANK_CH(cur)) { - if (ctxt->keepBlanks) { - if (ctxt->sax->characters != NULL) - ctxt->sax->characters( - ctxt->userData, &cur, 1); - } else { - if (ctxt->sax->ignorableWhitespace != NULL) - ctxt->sax->ignorableWhitespace( - ctxt->userData, &cur, 1); - } + if (ctxt->sax->ignorableWhitespace != NULL) + ctxt->sax->ignorableWhitespace( + ctxt->userData, &cur, 1); } else { htmlCheckParagraph(ctxt); if (ctxt->sax->characters != NULL) @@ -6029,8 +5979,8 @@ htmlParseChunk(htmlParserCtxtPtr ctxt, const char *chunk, int size, } if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) { - size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input); - size_t cur = ctxt->input->cur - ctxt->input->base; + int base = ctxt->input->base - ctxt->input->buf->buffer->content; + int cur = ctxt->input->cur - ctxt->input->base; int res; res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk); @@ -6039,7 +5989,10 @@ htmlParseChunk(htmlParserCtxtPtr ctxt, const char *chunk, int size, ctxt->disableSAX = 1; return (XML_PARSER_EOF); } - xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur); + ctxt->input->base = ctxt->input->buf->buffer->content + base; + ctxt->input->cur = ctxt->input->base + cur; + ctxt->input->end = + &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use]; #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, "HPP: pushed %d\n", size); #endif @@ -6054,16 +6007,13 @@ htmlParseChunk(htmlParserCtxtPtr ctxt, const char *chunk, int size, if ((in->encoder != NULL) && (in->buffer != NULL) && (in->raw != NULL)) { int nbchars; - size_t base = xmlBufGetInputBase(in->buffer, ctxt->input); - size_t current = ctxt->input->cur - ctxt->input->base; - nbchars = xmlCharEncInput(in, terminate); + nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw); if (nbchars < 0) { htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING, "encoder error\n", NULL, NULL); return(XML_ERR_INVALID_ENCODING); } - xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current); } } } @@ -6157,18 +6107,24 @@ htmlCreatePushParserCtxt(htmlSAXHandlerPtr sax, void *user_data, inputStream->filename = (char *) xmlCanonicPath((const xmlChar *) filename); inputStream->buf = buf; - xmlBufResetInput(buf->buffer, inputStream); + inputStream->base = inputStream->buf->buffer->content; + inputStream->cur = inputStream->buf->buffer->content; + inputStream->end = + &inputStream->buf->buffer->content[inputStream->buf->buffer->use]; inputPush(ctxt, inputStream); if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && (ctxt->input->buf != NULL)) { - size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input); - size_t cur = ctxt->input->cur - ctxt->input->base; + int base = ctxt->input->base - ctxt->input->buf->buffer->content; + int cur = ctxt->input->cur - ctxt->input->base; xmlParserInputBufferPush(ctxt->input->buf, size, chunk); - xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur); + ctxt->input->base = ctxt->input->buf->buffer->content + base; + ctxt->input->cur = ctxt->input->base + cur; + ctxt->input->end = + &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use]; #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, "HPP: pushed %d\n", size); #endif |