summaryrefslogtreecommitdiff
path: root/HTMLparser.c
diff options
context:
space:
mode:
Diffstat (limited to 'HTMLparser.c')
-rw-r--r--HTMLparser.c140
1 files changed, 48 insertions, 92 deletions
diff --git a/HTMLparser.c b/HTMLparser.c
index dd0c1ea..66ff17b 100644
--- a/HTMLparser.c
+++ b/HTMLparser.c
@@ -44,9 +44,6 @@
#include <libxml/globals.h>
#include <libxml/uri.h>
-#include "buf.h"
-#include "enc.h"
-
#define HTML_MAX_NAMELEN 1000
#define HTML_PARSER_BIG_BUFFER_SIZE 1000
#define HTML_PARSER_BUFFER_SIZE 100
@@ -1085,7 +1082,7 @@ static const char * const htmlStartClose[] = {
"div", "p", "head", NULL,
"noscript", "p", NULL,
"center", "font", "b", "i", "p", "head", NULL,
-"a", "a", "head", NULL,
+"a", "a", NULL,
"caption", "p", NULL,
"colgroup", "caption", "colgroup", "col", "p", NULL,
"col", "caption", "col", "p", NULL,
@@ -1103,43 +1100,6 @@ static const char * const htmlStartClose[] = {
"option", "option", NULL,
"fieldset", "legend", "p", "head", "h1", "h2", "h3", "h4", "h5", "h6",
"pre", "listing", "xmp", "a", NULL,
-/* most tags in in FONTSTYLE, PHRASE and SPECIAL should close <head> */
-"tt", "head", NULL,
-"i", "head", NULL,
-"b", "head", NULL,
-"u", "head", NULL,
-"s", "head", NULL,
-"strike", "head", NULL,
-"big", "head", NULL,
-"small", "head", NULL,
-
-"em", "head", NULL,
-"strong", "head", NULL,
-"dfn", "head", NULL,
-"code", "head", NULL,
-"samp", "head", NULL,
-"kbd", "head", NULL,
-"var", "head", NULL,
-"cite", "head", NULL,
-"abbr", "head", NULL,
-"acronym", "head", NULL,
-
-/* "a" */
-"img", "head", NULL,
-/* "applet" */
-/* "embed" */
-/* "object" */
-"font", "head", NULL,
-/* "basefont" */
-"br", "head", NULL,
-/* "script" */
-"map", "head", NULL,
-"q", "head", NULL,
-"sub", "head", NULL,
-"sup", "head", NULL,
-"span", "head", NULL,
-"bdo", "head", NULL,
-"iframe", "head", NULL,
NULL
};
@@ -2981,14 +2941,9 @@ htmlParseCharData(htmlParserCtxtPtr ctxt) {
*/
if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
if (areBlanks(ctxt, buf, nbchar)) {
- if (ctxt->keepBlanks) {
- if (ctxt->sax->characters != NULL)
- ctxt->sax->characters(ctxt->userData, buf, nbchar);
- } else {
- if (ctxt->sax->ignorableWhitespace != NULL)
- ctxt->sax->ignorableWhitespace(ctxt->userData,
- buf, nbchar);
- }
+ if (ctxt->sax->ignorableWhitespace != NULL)
+ ctxt->sax->ignorableWhitespace(ctxt->userData,
+ buf, nbchar);
} else {
htmlCheckParagraph(ctxt);
if (ctxt->sax->characters != NULL)
@@ -3019,14 +2974,8 @@ htmlParseCharData(htmlParserCtxtPtr ctxt) {
*/
if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
if (areBlanks(ctxt, buf, nbchar)) {
- if (ctxt->keepBlanks) {
- if (ctxt->sax->characters != NULL)
- ctxt->sax->characters(ctxt->userData, buf, nbchar);
- } else {
- if (ctxt->sax->ignorableWhitespace != NULL)
- ctxt->sax->ignorableWhitespace(ctxt->userData,
- buf, nbchar);
- }
+ if (ctxt->sax->ignorableWhitespace != NULL)
+ ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
} else {
htmlCheckParagraph(ctxt);
if (ctxt->sax->characters != NULL)
@@ -3560,14 +3509,19 @@ htmlCheckEncodingDirect(htmlParserCtxtPtr ctxt, const xmlChar *encoding) {
* convert as much as possible to the parser reading buffer.
*/
processed = ctxt->input->cur - ctxt->input->base;
- xmlBufShrink(ctxt->input->buf->buffer, processed);
- nbchars = xmlCharEncInput(ctxt->input->buf, 1);
+ xmlBufferShrink(ctxt->input->buf->buffer, processed);
+ nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
+ ctxt->input->buf->buffer,
+ ctxt->input->buf->raw);
if (nbchars < 0) {
htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING,
"htmlCheckEncoding: encoder error\n",
NULL, NULL);
}
- xmlBufResetInput(ctxt->input->buf->buffer, ctxt->input);
+ ctxt->input->base =
+ ctxt->input->cur = ctxt->input->buf->buffer->content;
+ ctxt->input->end =
+ &ctxt->input->base[ctxt->input->buf->buffer->use];
}
}
}
@@ -4952,7 +4906,9 @@ htmlCreateMemoryParserCtxt(const char *buffer, int size) {
input->filename = NULL;
input->buf = buf;
- xmlBufResetInput(buf->buffer, input);
+ input->base = input->buf->buffer->content;
+ input->cur = input->buf->buffer->content;
+ input->end = &input->buf->buffer->content[input->buf->buffer->use];
inputPush(ctxt, input);
return(ctxt);
@@ -5069,8 +5025,8 @@ htmlParseLookupSequence(htmlParserCtxtPtr ctxt, xmlChar first,
buf = in->base;
len = in->length;
} else {
- buf = xmlBufContent(in->buf->buffer);
- len = xmlBufUse(in->buf->buffer);
+ buf = in->buf->buffer->content;
+ len = in->buf->buffer->use;
}
/* take into account the sequence length */
@@ -5162,13 +5118,13 @@ htmlParseLookupSequence(htmlParserCtxtPtr ctxt, xmlChar first,
* @stop: Array of chars, which stop the lookup.
* @stopLen: Length of stop-Array
*
- * Try to find if any char of the stop-Array is available in the input
+ * Try to find if any char of the stop-Array is available in the input
* stream.
* This function has a side effect of (possibly) incrementing ctxt->checkIndex
* to avoid rescanning sequences of bytes, it DOES change the state of the
* parser, do not use liberally.
*
- * Returns the index to the current parsing point if a stopChar
+ * Returns the index to the current parsing point if a stopChar
* is available, -1 otherwise.
*/
static int
@@ -5196,8 +5152,8 @@ htmlParseLookupChars(htmlParserCtxtPtr ctxt, const xmlChar * stop,
buf = in->base;
len = in->length;
} else {
- buf = xmlBufContent(in->buf->buffer);
- len = xmlBufUse(in->buf->buffer);
+ buf = in->buf->buffer->content;
+ len = in->buf->buffer->use;
}
for (; base < len; base++) {
@@ -5308,7 +5264,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
if (in->buf == NULL)
avail = in->length - (in->cur - in->base);
else
- avail = xmlBufUse(in->buf->buffer) - (in->cur - in->base);
+ avail = in->buf->buffer->use - (in->cur - in->base);
if ((avail == 0) && (terminate)) {
htmlAutoCloseOnEnd(ctxt);
if ((ctxt->nameNr == 0) && (ctxt->instate != XML_PARSER_EOF)) {
@@ -5344,7 +5300,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
if (in->buf == NULL)
avail = in->length - (in->cur - in->base);
else
- avail = xmlBufUse(in->buf->buffer) - (in->cur - in->base);
+ avail = in->buf->buffer->use - (in->cur - in->base);
}
if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
ctxt->sax->setDocumentLocator(ctxt->userData,
@@ -5386,7 +5342,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
if (in->buf == NULL)
avail = in->length - (in->cur - in->base);
else
- avail = xmlBufUse(in->buf->buffer) - (in->cur - in->base);
+ avail = in->buf->buffer->use - (in->cur - in->base);
/*
* no chars in buffer
*/
@@ -5459,7 +5415,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
if (in->buf == NULL)
avail = in->length - (in->cur - in->base);
else
- avail = xmlBufUse(in->buf->buffer) - (in->cur - in->base);
+ avail = in->buf->buffer->use - (in->cur - in->base);
if (avail < 2)
goto done;
cur = in->cur[0];
@@ -5500,7 +5456,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
if (in->buf == NULL)
avail = in->length - (in->cur - in->base);
else
- avail = xmlBufUse(in->buf->buffer) - (in->cur - in->base);
+ avail = in->buf->buffer->use - (in->cur - in->base);
if (avail < 1)
goto done;
cur = in->cur[0];
@@ -5698,15 +5654,9 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
if ((cur != '<') && (cur != '&')) {
if (ctxt->sax != NULL) {
if (IS_BLANK_CH(cur)) {
- if (ctxt->keepBlanks) {
- if (ctxt->sax->characters != NULL)
- ctxt->sax->characters(
- ctxt->userData, &cur, 1);
- } else {
- if (ctxt->sax->ignorableWhitespace != NULL)
- ctxt->sax->ignorableWhitespace(
- ctxt->userData, &cur, 1);
- }
+ if (ctxt->sax->ignorableWhitespace != NULL)
+ ctxt->sax->ignorableWhitespace(
+ ctxt->userData, &cur, 1);
} else {
htmlCheckParagraph(ctxt);
if (ctxt->sax->characters != NULL)
@@ -6029,8 +5979,8 @@ htmlParseChunk(htmlParserCtxtPtr ctxt, const char *chunk, int size,
}
if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
(ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
- size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
- size_t cur = ctxt->input->cur - ctxt->input->base;
+ int base = ctxt->input->base - ctxt->input->buf->buffer->content;
+ int cur = ctxt->input->cur - ctxt->input->base;
int res;
res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
@@ -6039,7 +5989,10 @@ htmlParseChunk(htmlParserCtxtPtr ctxt, const char *chunk, int size,
ctxt->disableSAX = 1;
return (XML_PARSER_EOF);
}
- xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
+ ctxt->input->base = ctxt->input->buf->buffer->content + base;
+ ctxt->input->cur = ctxt->input->base + cur;
+ ctxt->input->end =
+ &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext, "HPP: pushed %d\n", size);
#endif
@@ -6054,16 +6007,13 @@ htmlParseChunk(htmlParserCtxtPtr ctxt, const char *chunk, int size,
if ((in->encoder != NULL) && (in->buffer != NULL) &&
(in->raw != NULL)) {
int nbchars;
- size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
- size_t current = ctxt->input->cur - ctxt->input->base;
- nbchars = xmlCharEncInput(in, terminate);
+ nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
if (nbchars < 0) {
htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING,
"encoder error\n", NULL, NULL);
return(XML_ERR_INVALID_ENCODING);
}
- xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
}
}
}
@@ -6157,18 +6107,24 @@ htmlCreatePushParserCtxt(htmlSAXHandlerPtr sax, void *user_data,
inputStream->filename = (char *)
xmlCanonicPath((const xmlChar *) filename);
inputStream->buf = buf;
- xmlBufResetInput(buf->buffer, inputStream);
+ inputStream->base = inputStream->buf->buffer->content;
+ inputStream->cur = inputStream->buf->buffer->content;
+ inputStream->end =
+ &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
inputPush(ctxt, inputStream);
if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
(ctxt->input->buf != NULL)) {
- size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
- size_t cur = ctxt->input->cur - ctxt->input->base;
+ int base = ctxt->input->base - ctxt->input->buf->buffer->content;
+ int cur = ctxt->input->cur - ctxt->input->base;
xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
- xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
+ ctxt->input->base = ctxt->input->buf->buffer->content + base;
+ ctxt->input->cur = ctxt->input->base + cur;
+ ctxt->input->end =
+ &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext, "HPP: pushed %d\n", size);
#endif