diff options
Diffstat (limited to 'parserInternals.c')
-rw-r--r-- | parserInternals.c | 223 |
1 files changed, 98 insertions, 125 deletions
diff --git a/parserInternals.c b/parserInternals.c index 746b7fd..df204fd 100644 --- a/parserInternals.c +++ b/parserInternals.c @@ -55,6 +55,9 @@ #include <libxml/globals.h> #include <libxml/chvalid.h> +#include "buf.h" +#include "enc.h" + /* * Various global defaults for parsing */ @@ -73,15 +76,15 @@ xmlCheckVersion(int version) { xmlInitParser(); if ((myversion / 10000) != (version / 10000)) { - xmlGenericError(xmlGenericErrorContext, + xmlGenericError(xmlGenericErrorContext, "Fatal: program compiled against libxml %d using libxml %d\n", (version / 10000), (myversion / 10000)); - fprintf(stderr, + fprintf(stderr, "Fatal: program compiled against libxml %d using libxml %d\n", (version / 10000), (myversion / 10000)); } if ((myversion / 100) < (version / 100)) { - xmlGenericError(xmlGenericErrorContext, + xmlGenericError(xmlGenericErrorContext, "Warning: program compiled against libxml %d using older %d\n", (version / 100), (myversion / 100)); } @@ -90,7 +93,7 @@ xmlCheckVersion(int version) { /************************************************************************ * * - * Some factorized error routines * + * Some factorized error routines * * * ************************************************************************/ @@ -225,7 +228,7 @@ xmlIsLetter(int c) { /************************************************************************ * * - * Input handling functions for progressive parsing * + * Input handling functions for progressive parsing * * * ************************************************************************/ @@ -242,7 +245,7 @@ xmlIsLetter(int c) { static void check_buffer(xmlParserInputPtr in) { - if (in->base != in->buf->buffer->content) { + if (in->base != xmlBufContent(in->buf->buffer)) { xmlGenericError(xmlGenericErrorContext, "xmlParserInput: base mismatch problem\n"); } @@ -250,17 +253,17 @@ void check_buffer(xmlParserInputPtr in) { xmlGenericError(xmlGenericErrorContext, "xmlParserInput: cur < base problem\n"); } - if (in->cur > in->base + in->buf->buffer->use) { + if (in->cur > in->base + xmlBufUse(in->buf->buffer)) { xmlGenericError(xmlGenericErrorContext, "xmlParserInput: cur > base + use problem\n"); } - xmlGenericError(xmlGenericErrorContext,"buffer %x : content %x, cur %d, use %d, size %d\n", - (int) in, (int) in->buf->buffer->content, in->cur - in->base, - in->buf->buffer->use, in->buf->buffer->size); + xmlGenericError(xmlGenericErrorContext,"buffer %x : content %x, cur %d, use %d\n", + (int) in, (int) xmlBufContent(in->buf->buffer), in->cur - in->base, + xmlBufUse(in->buf->buffer)); } #else -#define CHECK_BUFFER(in) +#define CHECK_BUFFER(in) #endif @@ -269,50 +272,13 @@ void check_buffer(xmlParserInputPtr in) { * @in: an XML parser input * @len: an indicative size for the lookahead * - * This function refresh the input for the parser. It doesn't try to - * preserve pointers to the input buffer, and discard already read data + * This function was internal and is deprecated. * - * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the - * end of this entity + * Returns -1 as this is an error to use it. */ int -xmlParserInputRead(xmlParserInputPtr in, int len) { - int ret; - int used; - int indx; - - if (in == NULL) return(-1); -#ifdef DEBUG_INPUT - xmlGenericError(xmlGenericErrorContext, "Read\n"); -#endif - if (in->buf == NULL) return(-1); - if (in->base == NULL) return(-1); - if (in->cur == NULL) return(-1); - if (in->buf->buffer == NULL) return(-1); - if (in->buf->readcallback == NULL) return(-1); - - CHECK_BUFFER(in); - - used = in->cur - in->buf->buffer->content; - ret = xmlBufferShrink(in->buf->buffer, used); - if (ret > 0) { - in->cur -= ret; - in->consumed += ret; - } - ret = xmlParserInputBufferRead(in->buf, len); - if (in->base != in->buf->buffer->content) { - /* - * the buffer has been reallocated - */ - indx = in->cur - in->base; - in->base = in->buf->buffer->content; - in->cur = &in->buf->buffer->content[indx]; - } - in->end = &in->buf->buffer->content[in->buf->buffer->use]; - - CHECK_BUFFER(in); - - return(ret); +xmlParserInputRead(xmlParserInputPtr in ATTRIBUTE_UNUSED, int len ATTRIBUTE_UNUSED) { + return(-1); } /** @@ -323,15 +289,16 @@ xmlParserInputRead(xmlParserInputPtr in, int len) { * This function increase the input for the parser. It tries to * preserve pointers to the input buffer, and keep already read data * - * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the + * Returns the amount of char read, or -1 in case of error, 0 indicate the * end of this entity */ int xmlParserInputGrow(xmlParserInputPtr in, int len) { - int ret; - int indx; + size_t ret; + size_t indx; + const xmlChar *content; - if (in == NULL) return(-1); + if ((in == NULL) || (len < 0)) return(-1); #ifdef DEBUG_INPUT xmlGenericError(xmlGenericErrorContext, "Grow\n"); #endif @@ -343,15 +310,15 @@ xmlParserInputGrow(xmlParserInputPtr in, int len) { CHECK_BUFFER(in); indx = in->cur - in->base; - if (in->buf->buffer->use > (unsigned int) indx + INPUT_CHUNK) { + if (xmlBufUse(in->buf->buffer) > (unsigned int) indx + INPUT_CHUNK) { CHECK_BUFFER(in); return(0); } - if (in->buf->readcallback != NULL) + if (in->buf->readcallback != NULL) { ret = xmlParserInputBufferGrow(in->buf, len); - else + } else return(0); /* @@ -360,15 +327,17 @@ xmlParserInputGrow(xmlParserInputPtr in, int len) { * pointer arithmetic. Insure will raise it as a bug but in * that specific case, that's not ! */ - if (in->base != in->buf->buffer->content) { + + content = xmlBufContent(in->buf->buffer); + if (in->base != content) { /* * the buffer has been reallocated */ indx = in->cur - in->base; - in->base = in->buf->buffer->content; - in->cur = &in->buf->buffer->content[indx]; + in->base = content; + in->cur = &content[indx]; } - in->end = &in->buf->buffer->content[in->buf->buffer->use]; + in->end = xmlBufEnd(in->buf->buffer); CHECK_BUFFER(in); @@ -383,9 +352,10 @@ xmlParserInputGrow(xmlParserInputPtr in, int len) { */ void xmlParserInputShrink(xmlParserInputPtr in) { - int used; - int ret; - int indx; + size_t used; + size_t ret; + size_t indx; + const xmlChar *content; #ifdef DEBUG_INPUT xmlGenericError(xmlGenericErrorContext, "Shrink\n"); @@ -398,42 +368,43 @@ xmlParserInputShrink(xmlParserInputPtr in) { CHECK_BUFFER(in); - used = in->cur - in->buf->buffer->content; + used = in->cur - xmlBufContent(in->buf->buffer); /* * Do not shrink on large buffers whose only a tiny fraction * was consumed */ if (used > INPUT_CHUNK) { - ret = xmlBufferShrink(in->buf->buffer, used - LINE_LEN); + ret = xmlBufShrink(in->buf->buffer, used - LINE_LEN); if (ret > 0) { in->cur -= ret; in->consumed += ret; } - in->end = &in->buf->buffer->content[in->buf->buffer->use]; + in->end = xmlBufEnd(in->buf->buffer); } CHECK_BUFFER(in); - if (in->buf->buffer->use > INPUT_CHUNK) { + if (xmlBufUse(in->buf->buffer) > INPUT_CHUNK) { return; } xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK); - if (in->base != in->buf->buffer->content) { + content = xmlBufContent(in->buf->buffer); + if (in->base != content) { /* * the buffer has been reallocated */ indx = in->cur - in->base; - in->base = in->buf->buffer->content; - in->cur = &in->buf->buffer->content[indx]; + in->base = content; + in->cur = &content[indx]; } - in->end = &in->buf->buffer->content[in->buf->buffer->use]; + in->end = xmlBufEnd(in->buf->buffer); CHECK_BUFFER(in); } /************************************************************************ * * - * UTF8 character input and related functions * + * UTF8 character input and related functions * * * ************************************************************************/ @@ -484,7 +455,7 @@ xmlNextChar(xmlParserCtxtPtr ctxt) * UCS-4 range (hex.) UTF-8 octet sequence (binary) * 0000 0000-0000 007F 0xxxxxxx * 0000 0080-0000 07FF 110xxxxx 10xxxxxx - * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx + * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx * * Check for the 0x110000 limit too */ @@ -634,7 +605,7 @@ xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) { * UCS-4 range (hex.) UTF-8 octet sequence (binary) * 0000 0000-0000 007F 0xxxxxxx * 0000 0080-0000 07FF 110xxxxx 10xxxxxx - * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx + * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx * * Check for the 0x110000 limit too */ @@ -695,7 +666,7 @@ xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) { if (!IS_CHAR(val)) { xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR, "Char 0x%X out of allowed range\n", val); - } + } return(val); } else { /* 1-byte code */ @@ -759,7 +730,7 @@ encoding_error: "Input is not proper UTF-8, indicate encoding !\n%s", BAD_CAST buffer, NULL); } - ctxt->charset = XML_CHAR_ENCODING_8859_1; + ctxt->charset = XML_CHAR_ENCODING_8859_1; *len = 1; return((int) *ctxt->input->cur); } @@ -788,7 +759,7 @@ xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar * cur, int *len) * UCS-4 range (hex.) UTF-8 octet sequence (binary) * 0000 0000-0000 007F 0xxxxxxx * 0000 0080-0000 07FF 110xxxxx 10xxxxxx - * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx + * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx * * Check for the 0x110000 limit too */ @@ -881,7 +852,7 @@ encoding_error: * @out: pointer to an array of xmlChar * @val: the char value * - * append the char value in the array + * append the char value in the array * * Returns the number of xmlChar written */ @@ -895,7 +866,7 @@ xmlCopyCharMultiByte(xmlChar *out, int val) { * UCS-4 range (hex.) UTF-8 octet sequence (binary) * 0000 0000-0000 007F 0xxxxxxx * 0000 0080-0000 07FF 110xxxxx 10xxxxxx - * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx + * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx */ if (val >= 0x80) { xmlChar *savedout = out; @@ -923,7 +894,7 @@ xmlCopyCharMultiByte(xmlChar *out, int val) { * @out: pointer to an array of xmlChar * @val: the char value * - * append the char value in the array + * append the char value in the array * * Returns the number of xmlChar written */ @@ -945,11 +916,6 @@ xmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar *out, int val) { * * ************************************************************************/ -/* defined in encoding.c, not public */ -int -xmlCharEncFirstLineInt(xmlCharEncodingHandler *handler, xmlBufferPtr out, - xmlBufferPtr in, int len); - static int xmlSwitchToEncodingInt(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler, int len); @@ -1189,12 +1155,12 @@ xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input, /* * Is there already some content down the pipe to convert ? */ - if ((input->buf->buffer != NULL) && (input->buf->buffer->use > 0)) { + if (xmlBufIsEmpty(input->buf->buffer) == 0) { int processed; unsigned int use; /* - * Specific handling of the Byte Order Mark for + * Specific handling of the Byte Order Mark for * UTF-16 */ if ((handler->name != NULL) && @@ -1225,19 +1191,17 @@ xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input, * Move it as the raw buffer and create a new input buffer */ processed = input->cur - input->base; - xmlBufferShrink(input->buf->buffer, processed); + xmlBufShrink(input->buf->buffer, processed); input->buf->raw = input->buf->buffer; - input->buf->buffer = xmlBufferCreate(); + input->buf->buffer = xmlBufCreate(); input->buf->rawconsumed = processed; - use = input->buf->raw->use; + use = xmlBufUse(input->buf->raw); if (ctxt->html) { /* * convert as much as possible of the buffer */ - nbchars = xmlCharEncInFunc(input->buf->encoder, - input->buf->buffer, - input->buf->raw); + nbchars = xmlCharEncInput(input->buf, 1); } else { /* * convert just enough to get @@ -1245,10 +1209,7 @@ xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input, * parsed with the autodetected encoding * into the parser reading buffer. */ - nbchars = xmlCharEncFirstLineInt(input->buf->encoder, - input->buf->buffer, - input->buf->raw, - len); + nbchars = xmlCharEncFirstLineInput(input->buf, len); } if (nbchars < 0) { xmlErrInternal(ctxt, @@ -1256,10 +1217,8 @@ xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input, NULL); return (-1); } - input->buf->rawconsumed += use - input->buf->raw->use; - input->base = input->cur = input->buf->buffer->content; - input->end = &input->base[input->buf->buffer->use]; - + input->buf->rawconsumed += use - xmlBufUse(input->buf->raw); + xmlBufResetInput(input->buf->buffer, input); } return (0); } else if (input->length == 0) { @@ -1294,7 +1253,7 @@ xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input, * xmlSwitchToEncodingInt: * @ctxt: the parser context * @handler: the encoding handler - * @len: the lenght to convert or -1 + * @len: the length to convert or -1 * * change the input functions when discovering the character encoding * of a given entity, and convert only @len bytes of the output, this @@ -1336,7 +1295,7 @@ xmlSwitchToEncodingInt(xmlParserCtxtPtr ctxt, * Returns 0 in case of success, -1 otherwise */ int -xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler) +xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler) { return (xmlSwitchToEncodingInt(ctxt, handler, -1)); } @@ -1363,7 +1322,7 @@ xmlFreeInputStream(xmlParserInputPtr input) { if (input->version != NULL) xmlFree((char *) input->version); if ((input->free != NULL) && (input->base != NULL)) input->free((xmlChar *) input->base); - if (input->buf != NULL) + if (input->buf != NULL) xmlFreeParserInputBuffer(input->buf); xmlFree(input); } @@ -1426,9 +1385,8 @@ xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input, } inputStream->filename = NULL; inputStream->buf = input; - inputStream->base = inputStream->buf->buffer->content; - inputStream->cur = inputStream->buf->buffer->content; - inputStream->end = &inputStream->base[inputStream->buf->buffer->use]; + xmlBufResetInput(inputStream->buf->buffer, inputStream); + if (enc != XML_CHAR_ENCODING_NONE) { xmlSwitchEncoding(ctxt, enc); } @@ -1570,7 +1528,7 @@ xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) { inputStream = xmlCheckHTTPInput(ctxt, inputStream); if (inputStream == NULL) return(NULL); - + if (inputStream->filename == NULL) URI = xmlStrdup((xmlChar *) filename); else @@ -1581,9 +1539,7 @@ xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) { if (URI != NULL) xmlFree((char *) URI); inputStream->directory = directory; - inputStream->base = inputStream->buf->buffer->content; - inputStream->cur = inputStream->buf->buffer->content; - inputStream->end = &inputStream->base[inputStream->buf->buffer->use]; + xmlBufResetInput(inputStream->buf->buffer, inputStream); if ((ctxt->directory == NULL) && (directory != NULL)) ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory); return(inputStream); @@ -1622,6 +1578,8 @@ xmlInitParserCtxt(xmlParserCtxtPtr ctxt) xmlErrMemory(NULL, "cannot initialize parser context\n"); return(-1); } + xmlDictSetLimit(ctxt->dict, XML_MAX_DICTIONARY_LIMIT); + if (ctxt->sax == NULL) ctxt->sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler)); if (ctxt->sax == NULL) { @@ -1733,12 +1691,20 @@ xmlInitParserCtxt(xmlParserCtxtPtr ctxt) ctxt->nsWellFormed = 1; ctxt->valid = 1; ctxt->loadsubset = xmlLoadExtDtdDefaultValue; + if (ctxt->loadsubset) { + ctxt->options |= XML_PARSE_DTDLOAD; + } ctxt->validate = xmlDoValidityCheckingDefaultValue; ctxt->pedantic = xmlPedanticParserDefaultValue; + if (ctxt->pedantic) { + ctxt->options |= XML_PARSE_PEDANTIC; + } ctxt->linenumbers = xmlLineNumbersDefaultValue; ctxt->keepBlanks = xmlKeepBlanksDefaultValue; - if (ctxt->keepBlanks == 0) + if (ctxt->keepBlanks == 0) { ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace; + ctxt->options |= XML_PARSE_NOBLANKS; + } ctxt->vctxt.finishDtd = XML_CTXT_FINISH_DTD_0; ctxt->vctxt.userData = ctxt; @@ -1750,8 +1716,12 @@ xmlInitParserCtxt(xmlParserCtxtPtr ctxt) else ctxt->vctxt.warning = xmlParserValidityWarning; ctxt->vctxt.nodeMax = 0; + ctxt->options |= XML_PARSE_DTDVALID; } ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue; + if (ctxt->replaceEntities) { + ctxt->options |= XML_PARSE_NOENT; + } ctxt->record_info = 0; ctxt->nbChars = 0; ctxt->checkIndex = 0; @@ -1761,6 +1731,8 @@ xmlInitParserCtxt(xmlParserCtxtPtr ctxt) ctxt->charset = XML_CHAR_ENCODING_UTF8; ctxt->catalogs = NULL; ctxt->nbentities = 0; + ctxt->sizeentities = 0; + ctxt->sizeentcopy = 0; ctxt->input_id = 1; xmlInitNodeInfoSeq(&ctxt->node_seq); return(0); @@ -1807,7 +1779,7 @@ xmlFreeParserCtxt(xmlParserCtxtPtr ctxt) if (ctxt->nsTab != NULL) xmlFree((char *) ctxt->nsTab); if (ctxt->pushTab != NULL) xmlFree(ctxt->pushTab); if (ctxt->attallocs != NULL) xmlFree(ctxt->attallocs); - if (ctxt->attsDefault != NULL) + if (ctxt->attsDefault != NULL) xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree); if (ctxt->attsSpecial != NULL) xmlHashFree(ctxt->attsSpecial, NULL); @@ -1907,7 +1879,7 @@ xmlClearParserCtxt(xmlParserCtxtPtr ctxt) * @node: an XML node within the tree * * Find the parser node info struct for a given node - * + * * Returns an xmlParserNodeInfo block pointer or NULL */ const xmlParserNodeInfo * @@ -1965,7 +1937,7 @@ xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq) * @seq: a node info sequence pointer * @node: an XML node pointer * - * + * * xmlParserFindNodeInfoIndex : Find the index that the info record for * the given node is or should be at in a sorted sequence * @@ -2022,7 +1994,7 @@ xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt, pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, (xmlNodePtr) info->node); - if ((pos < ctxt->node_seq.length) && + if ((pos < ctxt->node_seq.length) && (ctxt->node_seq.buffer != NULL) && (ctxt->node_seq.buffer[pos].node == info->node)) { ctxt->node_seq.buffer[pos] = *info; @@ -2030,7 +2002,8 @@ xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt, /* Otherwise, we need to add new node to buffer */ else { - if (ctxt->node_seq.length + 1 > ctxt->node_seq.maximum) { + if ((ctxt->node_seq.length + 1 > ctxt->node_seq.maximum) || + (ctxt->node_seq.buffer == NULL)) { xmlParserNodeInfo *tmp_buffer; unsigned int byte_size; @@ -2075,7 +2048,7 @@ xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt, ************************************************************************/ /** * xmlPedanticParserDefault: - * @val: int 0 or 1 + * @val: int 0 or 1 * * Set and return the previous value for enabling pedantic warnings. * @@ -2092,7 +2065,7 @@ xmlPedanticParserDefault(int val) { /** * xmlLineNumbersDefault: - * @val: int 0 or 1 + * @val: int 0 or 1 * * Set and return the previous value for enabling line numbers in elements * contents. This may break on old application and is turned off by default. @@ -2110,7 +2083,7 @@ xmlLineNumbersDefault(int val) { /** * xmlSubstituteEntitiesDefault: - * @val: int 0 or 1 + * @val: int 0 or 1 * * Set and return the previous value for default entity support. * Initially the parser always keep entity references instead of substituting @@ -2132,7 +2105,7 @@ xmlSubstituteEntitiesDefault(int val) { /** * xmlKeepBlanksDefault: - * @val: int 0 or 1 + * @val: int 0 or 1 * * Set and return the previous value for default blanks text nodes support. * The 1.x version of the parser used an heuristic to try to detect @@ -2143,7 +2116,7 @@ xmlSubstituteEntitiesDefault(int val) { * ignorableWhitespace() are only generated when running the parser in * validating mode and when the current element doesn't allow CDATA or * mixed content. - * This function is provided as a way to force the standard behavior + * This function is provided as a way to force the standard behavior * on 1.X libs and to switch back to the old mode for compatibility when * running 1.X client code on 2.X . Upgrade of 1.X code should be done * by using xmlIsBlankNode() commodity function to detect the "empty" |