diff options
Diffstat (limited to 'parserInternals.c')
-rw-r--r-- | parserInternals.c | 206 |
1 files changed, 123 insertions, 83 deletions
diff --git a/parserInternals.c b/parserInternals.c index f8a7041..746b7fd 100644 --- a/parserInternals.c +++ b/parserInternals.c @@ -55,9 +55,6 @@ #include <libxml/globals.h> #include <libxml/chvalid.h> -#include "buf.h" -#include "enc.h" - /* * Various global defaults for parsing */ @@ -76,15 +73,15 @@ xmlCheckVersion(int version) { xmlInitParser(); if ((myversion / 10000) != (version / 10000)) { - xmlGenericError(xmlGenericErrorContext, + xmlGenericError(xmlGenericErrorContext, "Fatal: program compiled against libxml %d using libxml %d\n", (version / 10000), (myversion / 10000)); - fprintf(stderr, + fprintf(stderr, "Fatal: program compiled against libxml %d using libxml %d\n", (version / 10000), (myversion / 10000)); } if ((myversion / 100) < (version / 100)) { - xmlGenericError(xmlGenericErrorContext, + xmlGenericError(xmlGenericErrorContext, "Warning: program compiled against libxml %d using older %d\n", (version / 100), (myversion / 100)); } @@ -93,7 +90,7 @@ xmlCheckVersion(int version) { /************************************************************************ * * - * Some factorized error routines * + * Some factorized error routines * * * ************************************************************************/ @@ -228,7 +225,7 @@ xmlIsLetter(int c) { /************************************************************************ * * - * Input handling functions for progressive parsing * + * Input handling functions for progressive parsing * * * ************************************************************************/ @@ -245,7 +242,7 @@ xmlIsLetter(int c) { static void check_buffer(xmlParserInputPtr in) { - if (in->base != xmlBufContent(in->buf->buffer)) { + if (in->base != in->buf->buffer->content) { xmlGenericError(xmlGenericErrorContext, "xmlParserInput: base mismatch problem\n"); } @@ -253,17 +250,17 @@ void check_buffer(xmlParserInputPtr in) { xmlGenericError(xmlGenericErrorContext, "xmlParserInput: cur < base problem\n"); } - if (in->cur > in->base + xmlBufUse(in->buf->buffer)) { + if (in->cur > in->base + in->buf->buffer->use) { xmlGenericError(xmlGenericErrorContext, "xmlParserInput: cur > base + use problem\n"); } - xmlGenericError(xmlGenericErrorContext,"buffer %x : content %x, cur %d, use %d\n", - (int) in, (int) xmlBufContent(in->buf->buffer), in->cur - in->base, - xmlBufUse(in->buf->buffer)); + xmlGenericError(xmlGenericErrorContext,"buffer %x : content %x, cur %d, use %d, size %d\n", + (int) in, (int) in->buf->buffer->content, in->cur - in->base, + in->buf->buffer->use, in->buf->buffer->size); } #else -#define CHECK_BUFFER(in) +#define CHECK_BUFFER(in) #endif @@ -272,13 +269,50 @@ void check_buffer(xmlParserInputPtr in) { * @in: an XML parser input * @len: an indicative size for the lookahead * - * This function was internal and is deprecated. + * This function refresh the input for the parser. It doesn't try to + * preserve pointers to the input buffer, and discard already read data * - * Returns -1 as this is an error to use it. + * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the + * end of this entity */ int -xmlParserInputRead(xmlParserInputPtr in ATTRIBUTE_UNUSED, int len ATTRIBUTE_UNUSED) { - return(-1); +xmlParserInputRead(xmlParserInputPtr in, int len) { + int ret; + int used; + int indx; + + if (in == NULL) return(-1); +#ifdef DEBUG_INPUT + xmlGenericError(xmlGenericErrorContext, "Read\n"); +#endif + if (in->buf == NULL) return(-1); + if (in->base == NULL) return(-1); + if (in->cur == NULL) return(-1); + if (in->buf->buffer == NULL) return(-1); + if (in->buf->readcallback == NULL) return(-1); + + CHECK_BUFFER(in); + + used = in->cur - in->buf->buffer->content; + ret = xmlBufferShrink(in->buf->buffer, used); + if (ret > 0) { + in->cur -= ret; + in->consumed += ret; + } + ret = xmlParserInputBufferRead(in->buf, len); + if (in->base != in->buf->buffer->content) { + /* + * the buffer has been reallocated + */ + indx = in->cur - in->base; + in->base = in->buf->buffer->content; + in->cur = &in->buf->buffer->content[indx]; + } + in->end = &in->buf->buffer->content[in->buf->buffer->use]; + + CHECK_BUFFER(in); + + return(ret); } /** @@ -289,16 +323,15 @@ xmlParserInputRead(xmlParserInputPtr in ATTRIBUTE_UNUSED, int len ATTRIBUTE_UNUS * This function increase the input for the parser. It tries to * preserve pointers to the input buffer, and keep already read data * - * Returns the amount of char read, or -1 in case of error, 0 indicate the + * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the * end of this entity */ int xmlParserInputGrow(xmlParserInputPtr in, int len) { - size_t ret; - size_t indx; - const xmlChar *content; + int ret; + int indx; - if ((in == NULL) || (len < 0)) return(-1); + if (in == NULL) return(-1); #ifdef DEBUG_INPUT xmlGenericError(xmlGenericErrorContext, "Grow\n"); #endif @@ -310,15 +343,15 @@ xmlParserInputGrow(xmlParserInputPtr in, int len) { CHECK_BUFFER(in); indx = in->cur - in->base; - if (xmlBufUse(in->buf->buffer) > (unsigned int) indx + INPUT_CHUNK) { + if (in->buf->buffer->use > (unsigned int) indx + INPUT_CHUNK) { CHECK_BUFFER(in); return(0); } - if (in->buf->readcallback != NULL) { + if (in->buf->readcallback != NULL) ret = xmlParserInputBufferGrow(in->buf, len); - } else + else return(0); /* @@ -327,17 +360,15 @@ xmlParserInputGrow(xmlParserInputPtr in, int len) { * pointer arithmetic. Insure will raise it as a bug but in * that specific case, that's not ! */ - - content = xmlBufContent(in->buf->buffer); - if (in->base != content) { + if (in->base != in->buf->buffer->content) { /* * the buffer has been reallocated */ indx = in->cur - in->base; - in->base = content; - in->cur = &content[indx]; + in->base = in->buf->buffer->content; + in->cur = &in->buf->buffer->content[indx]; } - in->end = xmlBufEnd(in->buf->buffer); + in->end = &in->buf->buffer->content[in->buf->buffer->use]; CHECK_BUFFER(in); @@ -352,10 +383,9 @@ xmlParserInputGrow(xmlParserInputPtr in, int len) { */ void xmlParserInputShrink(xmlParserInputPtr in) { - size_t used; - size_t ret; - size_t indx; - const xmlChar *content; + int used; + int ret; + int indx; #ifdef DEBUG_INPUT xmlGenericError(xmlGenericErrorContext, "Shrink\n"); @@ -368,43 +398,42 @@ xmlParserInputShrink(xmlParserInputPtr in) { CHECK_BUFFER(in); - used = in->cur - xmlBufContent(in->buf->buffer); + used = in->cur - in->buf->buffer->content; /* * Do not shrink on large buffers whose only a tiny fraction * was consumed */ if (used > INPUT_CHUNK) { - ret = xmlBufShrink(in->buf->buffer, used - LINE_LEN); + ret = xmlBufferShrink(in->buf->buffer, used - LINE_LEN); if (ret > 0) { in->cur -= ret; in->consumed += ret; } - in->end = xmlBufEnd(in->buf->buffer); + in->end = &in->buf->buffer->content[in->buf->buffer->use]; } CHECK_BUFFER(in); - if (xmlBufUse(in->buf->buffer) > INPUT_CHUNK) { + if (in->buf->buffer->use > INPUT_CHUNK) { return; } xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK); - content = xmlBufContent(in->buf->buffer); - if (in->base != content) { + if (in->base != in->buf->buffer->content) { /* * the buffer has been reallocated */ indx = in->cur - in->base; - in->base = content; - in->cur = &content[indx]; + in->base = in->buf->buffer->content; + in->cur = &in->buf->buffer->content[indx]; } - in->end = xmlBufEnd(in->buf->buffer); + in->end = &in->buf->buffer->content[in->buf->buffer->use]; CHECK_BUFFER(in); } /************************************************************************ * * - * UTF8 character input and related functions * + * UTF8 character input and related functions * * * ************************************************************************/ @@ -455,7 +484,7 @@ xmlNextChar(xmlParserCtxtPtr ctxt) * UCS-4 range (hex.) UTF-8 octet sequence (binary) * 0000 0000-0000 007F 0xxxxxxx * 0000 0080-0000 07FF 110xxxxx 10xxxxxx - * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx + * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx * * Check for the 0x110000 limit too */ @@ -605,7 +634,7 @@ xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) { * UCS-4 range (hex.) UTF-8 octet sequence (binary) * 0000 0000-0000 007F 0xxxxxxx * 0000 0080-0000 07FF 110xxxxx 10xxxxxx - * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx + * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx * * Check for the 0x110000 limit too */ @@ -666,7 +695,7 @@ xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) { if (!IS_CHAR(val)) { xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR, "Char 0x%X out of allowed range\n", val); - } + } return(val); } else { /* 1-byte code */ @@ -730,7 +759,7 @@ encoding_error: "Input is not proper UTF-8, indicate encoding !\n%s", BAD_CAST buffer, NULL); } - ctxt->charset = XML_CHAR_ENCODING_8859_1; + ctxt->charset = XML_CHAR_ENCODING_8859_1; *len = 1; return((int) *ctxt->input->cur); } @@ -759,7 +788,7 @@ xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar * cur, int *len) * UCS-4 range (hex.) UTF-8 octet sequence (binary) * 0000 0000-0000 007F 0xxxxxxx * 0000 0080-0000 07FF 110xxxxx 10xxxxxx - * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx + * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx * * Check for the 0x110000 limit too */ @@ -852,7 +881,7 @@ encoding_error: * @out: pointer to an array of xmlChar * @val: the char value * - * append the char value in the array + * append the char value in the array * * Returns the number of xmlChar written */ @@ -866,7 +895,7 @@ xmlCopyCharMultiByte(xmlChar *out, int val) { * UCS-4 range (hex.) UTF-8 octet sequence (binary) * 0000 0000-0000 007F 0xxxxxxx * 0000 0080-0000 07FF 110xxxxx 10xxxxxx - * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx + * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx */ if (val >= 0x80) { xmlChar *savedout = out; @@ -894,7 +923,7 @@ xmlCopyCharMultiByte(xmlChar *out, int val) { * @out: pointer to an array of xmlChar * @val: the char value * - * append the char value in the array + * append the char value in the array * * Returns the number of xmlChar written */ @@ -916,6 +945,11 @@ xmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar *out, int val) { * * ************************************************************************/ +/* defined in encoding.c, not public */ +int +xmlCharEncFirstLineInt(xmlCharEncodingHandler *handler, xmlBufferPtr out, + xmlBufferPtr in, int len); + static int xmlSwitchToEncodingInt(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler, int len); @@ -1155,12 +1189,12 @@ xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input, /* * Is there already some content down the pipe to convert ? */ - if (xmlBufIsEmpty(input->buf->buffer) == 0) { + if ((input->buf->buffer != NULL) && (input->buf->buffer->use > 0)) { int processed; unsigned int use; /* - * Specific handling of the Byte Order Mark for + * Specific handling of the Byte Order Mark for * UTF-16 */ if ((handler->name != NULL) && @@ -1191,17 +1225,19 @@ xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input, * Move it as the raw buffer and create a new input buffer */ processed = input->cur - input->base; - xmlBufShrink(input->buf->buffer, processed); + xmlBufferShrink(input->buf->buffer, processed); input->buf->raw = input->buf->buffer; - input->buf->buffer = xmlBufCreate(); + input->buf->buffer = xmlBufferCreate(); input->buf->rawconsumed = processed; - use = xmlBufUse(input->buf->raw); + use = input->buf->raw->use; if (ctxt->html) { /* * convert as much as possible of the buffer */ - nbchars = xmlCharEncInput(input->buf, 1); + nbchars = xmlCharEncInFunc(input->buf->encoder, + input->buf->buffer, + input->buf->raw); } else { /* * convert just enough to get @@ -1209,7 +1245,10 @@ xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input, * parsed with the autodetected encoding * into the parser reading buffer. */ - nbchars = xmlCharEncFirstLineInput(input->buf, len); + nbchars = xmlCharEncFirstLineInt(input->buf->encoder, + input->buf->buffer, + input->buf->raw, + len); } if (nbchars < 0) { xmlErrInternal(ctxt, @@ -1217,8 +1256,10 @@ xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input, NULL); return (-1); } - input->buf->rawconsumed += use - xmlBufUse(input->buf->raw); - xmlBufResetInput(input->buf->buffer, input); + input->buf->rawconsumed += use - input->buf->raw->use; + input->base = input->cur = input->buf->buffer->content; + input->end = &input->base[input->buf->buffer->use]; + } return (0); } else if (input->length == 0) { @@ -1253,7 +1294,7 @@ xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input, * xmlSwitchToEncodingInt: * @ctxt: the parser context * @handler: the encoding handler - * @len: the length to convert or -1 + * @len: the lenght to convert or -1 * * change the input functions when discovering the character encoding * of a given entity, and convert only @len bytes of the output, this @@ -1295,7 +1336,7 @@ xmlSwitchToEncodingInt(xmlParserCtxtPtr ctxt, * Returns 0 in case of success, -1 otherwise */ int -xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler) +xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler) { return (xmlSwitchToEncodingInt(ctxt, handler, -1)); } @@ -1322,7 +1363,7 @@ xmlFreeInputStream(xmlParserInputPtr input) { if (input->version != NULL) xmlFree((char *) input->version); if ((input->free != NULL) && (input->base != NULL)) input->free((xmlChar *) input->base); - if (input->buf != NULL) + if (input->buf != NULL) xmlFreeParserInputBuffer(input->buf); xmlFree(input); } @@ -1385,8 +1426,9 @@ xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input, } inputStream->filename = NULL; inputStream->buf = input; - xmlBufResetInput(inputStream->buf->buffer, inputStream); - + inputStream->base = inputStream->buf->buffer->content; + inputStream->cur = inputStream->buf->buffer->content; + inputStream->end = &inputStream->base[inputStream->buf->buffer->use]; if (enc != XML_CHAR_ENCODING_NONE) { xmlSwitchEncoding(ctxt, enc); } @@ -1528,7 +1570,7 @@ xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) { inputStream = xmlCheckHTTPInput(ctxt, inputStream); if (inputStream == NULL) return(NULL); - + if (inputStream->filename == NULL) URI = xmlStrdup((xmlChar *) filename); else @@ -1539,7 +1581,9 @@ xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) { if (URI != NULL) xmlFree((char *) URI); inputStream->directory = directory; - xmlBufResetInput(inputStream->buf->buffer, inputStream); + inputStream->base = inputStream->buf->buffer->content; + inputStream->cur = inputStream->buf->buffer->content; + inputStream->end = &inputStream->base[inputStream->buf->buffer->use]; if ((ctxt->directory == NULL) && (directory != NULL)) ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory); return(inputStream); @@ -1578,8 +1622,6 @@ xmlInitParserCtxt(xmlParserCtxtPtr ctxt) xmlErrMemory(NULL, "cannot initialize parser context\n"); return(-1); } - xmlDictSetLimit(ctxt->dict, XML_MAX_DICTIONARY_LIMIT); - if (ctxt->sax == NULL) ctxt->sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler)); if (ctxt->sax == NULL) { @@ -1719,8 +1761,6 @@ xmlInitParserCtxt(xmlParserCtxtPtr ctxt) ctxt->charset = XML_CHAR_ENCODING_UTF8; ctxt->catalogs = NULL; ctxt->nbentities = 0; - ctxt->sizeentities = 0; - ctxt->sizeentcopy = 0; ctxt->input_id = 1; xmlInitNodeInfoSeq(&ctxt->node_seq); return(0); @@ -1767,7 +1807,7 @@ xmlFreeParserCtxt(xmlParserCtxtPtr ctxt) if (ctxt->nsTab != NULL) xmlFree((char *) ctxt->nsTab); if (ctxt->pushTab != NULL) xmlFree(ctxt->pushTab); if (ctxt->attallocs != NULL) xmlFree(ctxt->attallocs); - if (ctxt->attsDefault != NULL) + if (ctxt->attsDefault != NULL) xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree); if (ctxt->attsSpecial != NULL) xmlHashFree(ctxt->attsSpecial, NULL); @@ -1867,7 +1907,7 @@ xmlClearParserCtxt(xmlParserCtxtPtr ctxt) * @node: an XML node within the tree * * Find the parser node info struct for a given node - * + * * Returns an xmlParserNodeInfo block pointer or NULL */ const xmlParserNodeInfo * @@ -1925,7 +1965,7 @@ xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq) * @seq: a node info sequence pointer * @node: an XML node pointer * - * + * * xmlParserFindNodeInfoIndex : Find the index that the info record for * the given node is or should be at in a sorted sequence * @@ -1982,7 +2022,7 @@ xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt, pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, (xmlNodePtr) info->node); - if ((pos < ctxt->node_seq.length) && + if ((pos < ctxt->node_seq.length) && (ctxt->node_seq.buffer != NULL) && (ctxt->node_seq.buffer[pos].node == info->node)) { ctxt->node_seq.buffer[pos] = *info; @@ -2035,7 +2075,7 @@ xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt, ************************************************************************/ /** * xmlPedanticParserDefault: - * @val: int 0 or 1 + * @val: int 0 or 1 * * Set and return the previous value for enabling pedantic warnings. * @@ -2052,7 +2092,7 @@ xmlPedanticParserDefault(int val) { /** * xmlLineNumbersDefault: - * @val: int 0 or 1 + * @val: int 0 or 1 * * Set and return the previous value for enabling line numbers in elements * contents. This may break on old application and is turned off by default. @@ -2070,7 +2110,7 @@ xmlLineNumbersDefault(int val) { /** * xmlSubstituteEntitiesDefault: - * @val: int 0 or 1 + * @val: int 0 or 1 * * Set and return the previous value for default entity support. * Initially the parser always keep entity references instead of substituting @@ -2092,7 +2132,7 @@ xmlSubstituteEntitiesDefault(int val) { /** * xmlKeepBlanksDefault: - * @val: int 0 or 1 + * @val: int 0 or 1 * * Set and return the previous value for default blanks text nodes support. * The 1.x version of the parser used an heuristic to try to detect @@ -2103,7 +2143,7 @@ xmlSubstituteEntitiesDefault(int val) { * ignorableWhitespace() are only generated when running the parser in * validating mode and when the current element doesn't allow CDATA or * mixed content. - * This function is provided as a way to force the standard behavior + * This function is provided as a way to force the standard behavior * on 1.X libs and to switch back to the old mode for compatibility when * running 1.X client code on 2.X . Upgrade of 1.X code should be done * by using xmlIsBlankNode() commodity function to detect the "empty" |