summaryrefslogtreecommitdiff
path: root/parserInternals.c
diff options
context:
space:
mode:
Diffstat (limited to 'parserInternals.c')
-rw-r--r--parserInternals.c206
1 files changed, 123 insertions, 83 deletions
diff --git a/parserInternals.c b/parserInternals.c
index f8a7041..746b7fd 100644
--- a/parserInternals.c
+++ b/parserInternals.c
@@ -55,9 +55,6 @@
#include <libxml/globals.h>
#include <libxml/chvalid.h>
-#include "buf.h"
-#include "enc.h"
-
/*
* Various global defaults for parsing
*/
@@ -76,15 +73,15 @@ xmlCheckVersion(int version) {
xmlInitParser();
if ((myversion / 10000) != (version / 10000)) {
- xmlGenericError(xmlGenericErrorContext,
+ xmlGenericError(xmlGenericErrorContext,
"Fatal: program compiled against libxml %d using libxml %d\n",
(version / 10000), (myversion / 10000));
- fprintf(stderr,
+ fprintf(stderr,
"Fatal: program compiled against libxml %d using libxml %d\n",
(version / 10000), (myversion / 10000));
}
if ((myversion / 100) < (version / 100)) {
- xmlGenericError(xmlGenericErrorContext,
+ xmlGenericError(xmlGenericErrorContext,
"Warning: program compiled against libxml %d using older %d\n",
(version / 100), (myversion / 100));
}
@@ -93,7 +90,7 @@ xmlCheckVersion(int version) {
/************************************************************************
* *
- * Some factorized error routines *
+ * Some factorized error routines *
* *
************************************************************************/
@@ -228,7 +225,7 @@ xmlIsLetter(int c) {
/************************************************************************
* *
- * Input handling functions for progressive parsing *
+ * Input handling functions for progressive parsing *
* *
************************************************************************/
@@ -245,7 +242,7 @@ xmlIsLetter(int c) {
static
void check_buffer(xmlParserInputPtr in) {
- if (in->base != xmlBufContent(in->buf->buffer)) {
+ if (in->base != in->buf->buffer->content) {
xmlGenericError(xmlGenericErrorContext,
"xmlParserInput: base mismatch problem\n");
}
@@ -253,17 +250,17 @@ void check_buffer(xmlParserInputPtr in) {
xmlGenericError(xmlGenericErrorContext,
"xmlParserInput: cur < base problem\n");
}
- if (in->cur > in->base + xmlBufUse(in->buf->buffer)) {
+ if (in->cur > in->base + in->buf->buffer->use) {
xmlGenericError(xmlGenericErrorContext,
"xmlParserInput: cur > base + use problem\n");
}
- xmlGenericError(xmlGenericErrorContext,"buffer %x : content %x, cur %d, use %d\n",
- (int) in, (int) xmlBufContent(in->buf->buffer), in->cur - in->base,
- xmlBufUse(in->buf->buffer));
+ xmlGenericError(xmlGenericErrorContext,"buffer %x : content %x, cur %d, use %d, size %d\n",
+ (int) in, (int) in->buf->buffer->content, in->cur - in->base,
+ in->buf->buffer->use, in->buf->buffer->size);
}
#else
-#define CHECK_BUFFER(in)
+#define CHECK_BUFFER(in)
#endif
@@ -272,13 +269,50 @@ void check_buffer(xmlParserInputPtr in) {
* @in: an XML parser input
* @len: an indicative size for the lookahead
*
- * This function was internal and is deprecated.
+ * This function refresh the input for the parser. It doesn't try to
+ * preserve pointers to the input buffer, and discard already read data
*
- * Returns -1 as this is an error to use it.
+ * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
+ * end of this entity
*/
int
-xmlParserInputRead(xmlParserInputPtr in ATTRIBUTE_UNUSED, int len ATTRIBUTE_UNUSED) {
- return(-1);
+xmlParserInputRead(xmlParserInputPtr in, int len) {
+ int ret;
+ int used;
+ int indx;
+
+ if (in == NULL) return(-1);
+#ifdef DEBUG_INPUT
+ xmlGenericError(xmlGenericErrorContext, "Read\n");
+#endif
+ if (in->buf == NULL) return(-1);
+ if (in->base == NULL) return(-1);
+ if (in->cur == NULL) return(-1);
+ if (in->buf->buffer == NULL) return(-1);
+ if (in->buf->readcallback == NULL) return(-1);
+
+ CHECK_BUFFER(in);
+
+ used = in->cur - in->buf->buffer->content;
+ ret = xmlBufferShrink(in->buf->buffer, used);
+ if (ret > 0) {
+ in->cur -= ret;
+ in->consumed += ret;
+ }
+ ret = xmlParserInputBufferRead(in->buf, len);
+ if (in->base != in->buf->buffer->content) {
+ /*
+ * the buffer has been reallocated
+ */
+ indx = in->cur - in->base;
+ in->base = in->buf->buffer->content;
+ in->cur = &in->buf->buffer->content[indx];
+ }
+ in->end = &in->buf->buffer->content[in->buf->buffer->use];
+
+ CHECK_BUFFER(in);
+
+ return(ret);
}
/**
@@ -289,16 +323,15 @@ xmlParserInputRead(xmlParserInputPtr in ATTRIBUTE_UNUSED, int len ATTRIBUTE_UNUS
* This function increase the input for the parser. It tries to
* preserve pointers to the input buffer, and keep already read data
*
- * Returns the amount of char read, or -1 in case of error, 0 indicate the
+ * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
* end of this entity
*/
int
xmlParserInputGrow(xmlParserInputPtr in, int len) {
- size_t ret;
- size_t indx;
- const xmlChar *content;
+ int ret;
+ int indx;
- if ((in == NULL) || (len < 0)) return(-1);
+ if (in == NULL) return(-1);
#ifdef DEBUG_INPUT
xmlGenericError(xmlGenericErrorContext, "Grow\n");
#endif
@@ -310,15 +343,15 @@ xmlParserInputGrow(xmlParserInputPtr in, int len) {
CHECK_BUFFER(in);
indx = in->cur - in->base;
- if (xmlBufUse(in->buf->buffer) > (unsigned int) indx + INPUT_CHUNK) {
+ if (in->buf->buffer->use > (unsigned int) indx + INPUT_CHUNK) {
CHECK_BUFFER(in);
return(0);
}
- if (in->buf->readcallback != NULL) {
+ if (in->buf->readcallback != NULL)
ret = xmlParserInputBufferGrow(in->buf, len);
- } else
+ else
return(0);
/*
@@ -327,17 +360,15 @@ xmlParserInputGrow(xmlParserInputPtr in, int len) {
* pointer arithmetic. Insure will raise it as a bug but in
* that specific case, that's not !
*/
-
- content = xmlBufContent(in->buf->buffer);
- if (in->base != content) {
+ if (in->base != in->buf->buffer->content) {
/*
* the buffer has been reallocated
*/
indx = in->cur - in->base;
- in->base = content;
- in->cur = &content[indx];
+ in->base = in->buf->buffer->content;
+ in->cur = &in->buf->buffer->content[indx];
}
- in->end = xmlBufEnd(in->buf->buffer);
+ in->end = &in->buf->buffer->content[in->buf->buffer->use];
CHECK_BUFFER(in);
@@ -352,10 +383,9 @@ xmlParserInputGrow(xmlParserInputPtr in, int len) {
*/
void
xmlParserInputShrink(xmlParserInputPtr in) {
- size_t used;
- size_t ret;
- size_t indx;
- const xmlChar *content;
+ int used;
+ int ret;
+ int indx;
#ifdef DEBUG_INPUT
xmlGenericError(xmlGenericErrorContext, "Shrink\n");
@@ -368,43 +398,42 @@ xmlParserInputShrink(xmlParserInputPtr in) {
CHECK_BUFFER(in);
- used = in->cur - xmlBufContent(in->buf->buffer);
+ used = in->cur - in->buf->buffer->content;
/*
* Do not shrink on large buffers whose only a tiny fraction
* was consumed
*/
if (used > INPUT_CHUNK) {
- ret = xmlBufShrink(in->buf->buffer, used - LINE_LEN);
+ ret = xmlBufferShrink(in->buf->buffer, used - LINE_LEN);
if (ret > 0) {
in->cur -= ret;
in->consumed += ret;
}
- in->end = xmlBufEnd(in->buf->buffer);
+ in->end = &in->buf->buffer->content[in->buf->buffer->use];
}
CHECK_BUFFER(in);
- if (xmlBufUse(in->buf->buffer) > INPUT_CHUNK) {
+ if (in->buf->buffer->use > INPUT_CHUNK) {
return;
}
xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
- content = xmlBufContent(in->buf->buffer);
- if (in->base != content) {
+ if (in->base != in->buf->buffer->content) {
/*
* the buffer has been reallocated
*/
indx = in->cur - in->base;
- in->base = content;
- in->cur = &content[indx];
+ in->base = in->buf->buffer->content;
+ in->cur = &in->buf->buffer->content[indx];
}
- in->end = xmlBufEnd(in->buf->buffer);
+ in->end = &in->buf->buffer->content[in->buf->buffer->use];
CHECK_BUFFER(in);
}
/************************************************************************
* *
- * UTF8 character input and related functions *
+ * UTF8 character input and related functions *
* *
************************************************************************/
@@ -455,7 +484,7 @@ xmlNextChar(xmlParserCtxtPtr ctxt)
* UCS-4 range (hex.) UTF-8 octet sequence (binary)
* 0000 0000-0000 007F 0xxxxxxx
* 0000 0080-0000 07FF 110xxxxx 10xxxxxx
- * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
+ * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
*
* Check for the 0x110000 limit too
*/
@@ -605,7 +634,7 @@ xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
* UCS-4 range (hex.) UTF-8 octet sequence (binary)
* 0000 0000-0000 007F 0xxxxxxx
* 0000 0080-0000 07FF 110xxxxx 10xxxxxx
- * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
+ * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
*
* Check for the 0x110000 limit too
*/
@@ -666,7 +695,7 @@ xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
if (!IS_CHAR(val)) {
xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
"Char 0x%X out of allowed range\n", val);
- }
+ }
return(val);
} else {
/* 1-byte code */
@@ -730,7 +759,7 @@ encoding_error:
"Input is not proper UTF-8, indicate encoding !\n%s",
BAD_CAST buffer, NULL);
}
- ctxt->charset = XML_CHAR_ENCODING_8859_1;
+ ctxt->charset = XML_CHAR_ENCODING_8859_1;
*len = 1;
return((int) *ctxt->input->cur);
}
@@ -759,7 +788,7 @@ xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar * cur, int *len)
* UCS-4 range (hex.) UTF-8 octet sequence (binary)
* 0000 0000-0000 007F 0xxxxxxx
* 0000 0080-0000 07FF 110xxxxx 10xxxxxx
- * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
+ * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
*
* Check for the 0x110000 limit too
*/
@@ -852,7 +881,7 @@ encoding_error:
* @out: pointer to an array of xmlChar
* @val: the char value
*
- * append the char value in the array
+ * append the char value in the array
*
* Returns the number of xmlChar written
*/
@@ -866,7 +895,7 @@ xmlCopyCharMultiByte(xmlChar *out, int val) {
* UCS-4 range (hex.) UTF-8 octet sequence (binary)
* 0000 0000-0000 007F 0xxxxxxx
* 0000 0080-0000 07FF 110xxxxx 10xxxxxx
- * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
+ * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
*/
if (val >= 0x80) {
xmlChar *savedout = out;
@@ -894,7 +923,7 @@ xmlCopyCharMultiByte(xmlChar *out, int val) {
* @out: pointer to an array of xmlChar
* @val: the char value
*
- * append the char value in the array
+ * append the char value in the array
*
* Returns the number of xmlChar written
*/
@@ -916,6 +945,11 @@ xmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar *out, int val) {
* *
************************************************************************/
+/* defined in encoding.c, not public */
+int
+xmlCharEncFirstLineInt(xmlCharEncodingHandler *handler, xmlBufferPtr out,
+ xmlBufferPtr in, int len);
+
static int
xmlSwitchToEncodingInt(xmlParserCtxtPtr ctxt,
xmlCharEncodingHandlerPtr handler, int len);
@@ -1155,12 +1189,12 @@ xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
/*
* Is there already some content down the pipe to convert ?
*/
- if (xmlBufIsEmpty(input->buf->buffer) == 0) {
+ if ((input->buf->buffer != NULL) && (input->buf->buffer->use > 0)) {
int processed;
unsigned int use;
/*
- * Specific handling of the Byte Order Mark for
+ * Specific handling of the Byte Order Mark for
* UTF-16
*/
if ((handler->name != NULL) &&
@@ -1191,17 +1225,19 @@ xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
* Move it as the raw buffer and create a new input buffer
*/
processed = input->cur - input->base;
- xmlBufShrink(input->buf->buffer, processed);
+ xmlBufferShrink(input->buf->buffer, processed);
input->buf->raw = input->buf->buffer;
- input->buf->buffer = xmlBufCreate();
+ input->buf->buffer = xmlBufferCreate();
input->buf->rawconsumed = processed;
- use = xmlBufUse(input->buf->raw);
+ use = input->buf->raw->use;
if (ctxt->html) {
/*
* convert as much as possible of the buffer
*/
- nbchars = xmlCharEncInput(input->buf, 1);
+ nbchars = xmlCharEncInFunc(input->buf->encoder,
+ input->buf->buffer,
+ input->buf->raw);
} else {
/*
* convert just enough to get
@@ -1209,7 +1245,10 @@ xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
* parsed with the autodetected encoding
* into the parser reading buffer.
*/
- nbchars = xmlCharEncFirstLineInput(input->buf, len);
+ nbchars = xmlCharEncFirstLineInt(input->buf->encoder,
+ input->buf->buffer,
+ input->buf->raw,
+ len);
}
if (nbchars < 0) {
xmlErrInternal(ctxt,
@@ -1217,8 +1256,10 @@ xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
NULL);
return (-1);
}
- input->buf->rawconsumed += use - xmlBufUse(input->buf->raw);
- xmlBufResetInput(input->buf->buffer, input);
+ input->buf->rawconsumed += use - input->buf->raw->use;
+ input->base = input->cur = input->buf->buffer->content;
+ input->end = &input->base[input->buf->buffer->use];
+
}
return (0);
} else if (input->length == 0) {
@@ -1253,7 +1294,7 @@ xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
* xmlSwitchToEncodingInt:
* @ctxt: the parser context
* @handler: the encoding handler
- * @len: the length to convert or -1
+ * @len: the lenght to convert or -1
*
* change the input functions when discovering the character encoding
* of a given entity, and convert only @len bytes of the output, this
@@ -1295,7 +1336,7 @@ xmlSwitchToEncodingInt(xmlParserCtxtPtr ctxt,
* Returns 0 in case of success, -1 otherwise
*/
int
-xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
+xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
{
return (xmlSwitchToEncodingInt(ctxt, handler, -1));
}
@@ -1322,7 +1363,7 @@ xmlFreeInputStream(xmlParserInputPtr input) {
if (input->version != NULL) xmlFree((char *) input->version);
if ((input->free != NULL) && (input->base != NULL))
input->free((xmlChar *) input->base);
- if (input->buf != NULL)
+ if (input->buf != NULL)
xmlFreeParserInputBuffer(input->buf);
xmlFree(input);
}
@@ -1385,8 +1426,9 @@ xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input,
}
inputStream->filename = NULL;
inputStream->buf = input;
- xmlBufResetInput(inputStream->buf->buffer, inputStream);
-
+ inputStream->base = inputStream->buf->buffer->content;
+ inputStream->cur = inputStream->buf->buffer->content;
+ inputStream->end = &inputStream->base[inputStream->buf->buffer->use];
if (enc != XML_CHAR_ENCODING_NONE) {
xmlSwitchEncoding(ctxt, enc);
}
@@ -1528,7 +1570,7 @@ xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
inputStream = xmlCheckHTTPInput(ctxt, inputStream);
if (inputStream == NULL)
return(NULL);
-
+
if (inputStream->filename == NULL)
URI = xmlStrdup((xmlChar *) filename);
else
@@ -1539,7 +1581,9 @@ xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
if (URI != NULL) xmlFree((char *) URI);
inputStream->directory = directory;
- xmlBufResetInput(inputStream->buf->buffer, inputStream);
+ inputStream->base = inputStream->buf->buffer->content;
+ inputStream->cur = inputStream->buf->buffer->content;
+ inputStream->end = &inputStream->base[inputStream->buf->buffer->use];
if ((ctxt->directory == NULL) && (directory != NULL))
ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory);
return(inputStream);
@@ -1578,8 +1622,6 @@ xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
xmlErrMemory(NULL, "cannot initialize parser context\n");
return(-1);
}
- xmlDictSetLimit(ctxt->dict, XML_MAX_DICTIONARY_LIMIT);
-
if (ctxt->sax == NULL)
ctxt->sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
if (ctxt->sax == NULL) {
@@ -1719,8 +1761,6 @@ xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
ctxt->charset = XML_CHAR_ENCODING_UTF8;
ctxt->catalogs = NULL;
ctxt->nbentities = 0;
- ctxt->sizeentities = 0;
- ctxt->sizeentcopy = 0;
ctxt->input_id = 1;
xmlInitNodeInfoSeq(&ctxt->node_seq);
return(0);
@@ -1767,7 +1807,7 @@ xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
if (ctxt->nsTab != NULL) xmlFree((char *) ctxt->nsTab);
if (ctxt->pushTab != NULL) xmlFree(ctxt->pushTab);
if (ctxt->attallocs != NULL) xmlFree(ctxt->attallocs);
- if (ctxt->attsDefault != NULL)
+ if (ctxt->attsDefault != NULL)
xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
if (ctxt->attsSpecial != NULL)
xmlHashFree(ctxt->attsSpecial, NULL);
@@ -1867,7 +1907,7 @@ xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
* @node: an XML node within the tree
*
* Find the parser node info struct for a given node
- *
+ *
* Returns an xmlParserNodeInfo block pointer or NULL
*/
const xmlParserNodeInfo *
@@ -1925,7 +1965,7 @@ xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
* @seq: a node info sequence pointer
* @node: an XML node pointer
*
- *
+ *
* xmlParserFindNodeInfoIndex : Find the index that the info record for
* the given node is or should be at in a sorted sequence
*
@@ -1982,7 +2022,7 @@ xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, (xmlNodePtr)
info->node);
- if ((pos < ctxt->node_seq.length) &&
+ if ((pos < ctxt->node_seq.length) &&
(ctxt->node_seq.buffer != NULL) &&
(ctxt->node_seq.buffer[pos].node == info->node)) {
ctxt->node_seq.buffer[pos] = *info;
@@ -2035,7 +2075,7 @@ xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
************************************************************************/
/**
* xmlPedanticParserDefault:
- * @val: int 0 or 1
+ * @val: int 0 or 1
*
* Set and return the previous value for enabling pedantic warnings.
*
@@ -2052,7 +2092,7 @@ xmlPedanticParserDefault(int val) {
/**
* xmlLineNumbersDefault:
- * @val: int 0 or 1
+ * @val: int 0 or 1
*
* Set and return the previous value for enabling line numbers in elements
* contents. This may break on old application and is turned off by default.
@@ -2070,7 +2110,7 @@ xmlLineNumbersDefault(int val) {
/**
* xmlSubstituteEntitiesDefault:
- * @val: int 0 or 1
+ * @val: int 0 or 1
*
* Set and return the previous value for default entity support.
* Initially the parser always keep entity references instead of substituting
@@ -2092,7 +2132,7 @@ xmlSubstituteEntitiesDefault(int val) {
/**
* xmlKeepBlanksDefault:
- * @val: int 0 or 1
+ * @val: int 0 or 1
*
* Set and return the previous value for default blanks text nodes support.
* The 1.x version of the parser used an heuristic to try to detect
@@ -2103,7 +2143,7 @@ xmlSubstituteEntitiesDefault(int val) {
* ignorableWhitespace() are only generated when running the parser in
* validating mode and when the current element doesn't allow CDATA or
* mixed content.
- * This function is provided as a way to force the standard behavior
+ * This function is provided as a way to force the standard behavior
* on 1.X libs and to switch back to the old mode for compatibility when
* running 1.X client code on 2.X . Upgrade of 1.X code should be done
* by using xmlIsBlankNode() commodity function to detect the "empty"