diff options
author | Mike Hommey <glandium@debian.org> | 2005-03-27 13:13:58 +0000 |
---|---|---|
committer | Mike Hommey <glandium@debian.org> | 2005-03-27 13:13:58 +0000 |
commit | 50e5b428562964b1eb2f876370058b34b47c5e90 (patch) | |
tree | c66bcae6dbbce07128ee881353ff60090524462c /parser.c | |
parent | a7457388701e6ccba9091ba3ec09505dc903b758 (diff) | |
download | libxml2-50e5b428562964b1eb2f876370058b34b47c5e90.tar.gz |
Load /tmp/tmp.XJZ6qc/libxml2-2.6.18 intoupstream/2.6.18
packages/libxml2/branches/upstream/current.
Diffstat (limited to 'parser.c')
-rw-r--r-- | parser.c | 358 |
1 files changed, 283 insertions, 75 deletions
@@ -410,7 +410,6 @@ xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error, if ((ctxt != NULL) && (ctxt->disableSAX != 0) && (ctxt->instate == XML_PARSER_EOF)) return; - ctxt->errNo = error; if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC)) schannel = ctxt->sax->serror; __xmlRaiseError(schannel, @@ -642,7 +641,7 @@ xmlAddDefAttrs(xmlParserCtxtPtr ctxt, const xmlChar *prefix; if (ctxt->attsDefault == NULL) { - ctxt->attsDefault = xmlHashCreate(10); + ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict); if (ctxt->attsDefault == NULL) goto mem_error; } @@ -729,7 +728,7 @@ xmlAddSpecialAttr(xmlParserCtxtPtr ctxt, int type) { if (ctxt->attsSpecial == NULL) { - ctxt->attsSpecial = xmlHashCreate(10); + ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict); if (ctxt->attsSpecial == NULL) goto mem_error; } @@ -1389,7 +1388,7 @@ xmlSkipBlankChars(xmlParserCtxtPtr ctxt) { int cur; do { cur = CUR; - while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */ + while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */ NEXT; cur = CUR; res++; @@ -3189,6 +3188,7 @@ xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) { int nbchar = 0; int line = ctxt->input->line; int col = ctxt->input->col; + int ccol; SHRINK; GROW; @@ -3233,15 +3233,18 @@ get_more_space: } return; } + get_more: + ccol = ctxt->input->col; while (((*in > ']') && (*in <= 0x7F)) || ((*in > '&') && (*in < '<')) || ((*in > '<') && (*in < ']')) || ((*in >= 0x20) && (*in < '&')) || (*in == 0x09)) { in++; - ctxt->input->col++; + ccol++; } + ctxt->input->col = ccol; if (*in == 0xA) { ctxt->input->line++; ctxt->input->col = 1; in++; @@ -3471,42 +3474,35 @@ xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) { } /** - * xmlParseComment: + * xmlParseCommentComplex: * @ctxt: an XML parser context + * @buf: the already parsed part of the buffer + * @len: number of bytes filles in the buffer + * @size: allocated size of the buffer * * Skip an XML (SGML) comment <!-- .... --> * The spec says that "For compatibility, the string "--" (double-hyphen) * must not occur within comments. " + * This is the slow routine in case the accelerator for ascii didn't work * * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' */ -void -xmlParseComment(xmlParserCtxtPtr ctxt) { - xmlChar *buf = NULL; - int len; - int size = XML_PARSER_BUFFER_SIZE; +static void +xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) { int q, ql; int r, rl; int cur, l; - xmlParserInputState state; xmlParserInputPtr input = ctxt->input; int count = 0; - /* - * Check that there is a comment right here. - */ - if ((RAW != '<') || (NXT(1) != '!') || - (NXT(2) != '-') || (NXT(3) != '-')) return; - - state = ctxt->instate; - ctxt->instate = XML_PARSER_COMMENT; - SHRINK; - SKIP(4); - buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); if (buf == NULL) { - xmlErrMemory(ctxt, NULL); - ctxt->instate = state; - return; + len = 0; + size = XML_PARSER_BUFFER_SIZE; + buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); + if (buf == NULL) { + xmlErrMemory(ctxt, NULL); + return; + } } q = CUR_CHAR(ql); if (q == 0) @@ -3519,7 +3515,6 @@ xmlParseComment(xmlParserCtxtPtr ctxt) { cur = CUR_CHAR(l); if (cur == 0) goto not_terminated; - len = 0; while (IS_CHAR(cur) && /* checked */ ((cur != '>') || (r != '-') || (q != '-'))) { @@ -3533,7 +3528,6 @@ xmlParseComment(xmlParserCtxtPtr ctxt) { if (new_buf == NULL) { xmlFree (buf); xmlErrMemory(ctxt, NULL); - ctxt->instate = state; return; } buf = new_buf; @@ -3573,13 +3567,164 @@ xmlParseComment(xmlParserCtxtPtr ctxt) { ctxt->sax->comment(ctxt->userData, buf); xmlFree(buf); } - ctxt->instate = state; return; not_terminated: xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, "Comment not terminated\n", NULL); xmlFree(buf); } +/** + * xmlParseComment: + * @ctxt: an XML parser context + * + * Skip an XML (SGML) comment <!-- .... --> + * The spec says that "For compatibility, the string "--" (double-hyphen) + * must not occur within comments. " + * + * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' + */ +void +xmlParseComment(xmlParserCtxtPtr ctxt) { + xmlChar *buf = NULL; + int size = XML_PARSER_BUFFER_SIZE; + int len = 0; + xmlParserInputState state; + const xmlChar *in; + int nbchar = 0, ccol; + + /* + * Check that there is a comment right here. + */ + if ((RAW != '<') || (NXT(1) != '!') || + (NXT(2) != '-') || (NXT(3) != '-')) return; + + state = ctxt->instate; + ctxt->instate = XML_PARSER_COMMENT; + SKIP(4); + SHRINK; + GROW; + + /* + * Accelerated common case where input don't need to be + * modified before passing it to the handler. + */ + in = ctxt->input->cur; + do { + if (*in == 0xA) { + ctxt->input->line++; ctxt->input->col = 1; + in++; + while (*in == 0xA) { + ctxt->input->line++; ctxt->input->col = 1; + in++; + } + } +get_more: + ccol = ctxt->input->col; + while (((*in > '-') && (*in <= 0x7F)) || + ((*in >= 0x20) && (*in < '-')) || + (*in == 0x09)) { + in++; + ccol++; + } + ctxt->input->col = ccol; + if (*in == 0xA) { + ctxt->input->line++; ctxt->input->col = 1; + in++; + while (*in == 0xA) { + ctxt->input->line++; ctxt->input->col = 1; + in++; + } + goto get_more; + } + nbchar = in - ctxt->input->cur; + /* + * save current set of data + */ + if (nbchar > 0) { + if ((ctxt->sax != NULL) && + (ctxt->sax->comment != NULL)) { + if (buf == NULL) { + if ((*in == '-') && (in[1] == '-')) + size = nbchar + 1; + else + size = XML_PARSER_BUFFER_SIZE + nbchar; + buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); + if (buf == NULL) { + xmlErrMemory(ctxt, NULL); + ctxt->instate = state; + return; + } + len = 0; + } else if (len + nbchar + 1 >= size) { + xmlChar *new_buf; + size += len + nbchar + XML_PARSER_BUFFER_SIZE; + new_buf = (xmlChar *) xmlRealloc(buf, + size * sizeof(xmlChar)); + if (new_buf == NULL) { + xmlFree (buf); + xmlErrMemory(ctxt, NULL); + ctxt->instate = state; + return; + } + buf = new_buf; + } + memcpy(&buf[len], ctxt->input->cur, nbchar); + len += nbchar; + buf[len] = 0; + } + } + ctxt->input->cur = in; + if (*in == 0xA) + + if (*in == 0xD) { + in++; + if (*in == 0xA) { + ctxt->input->cur = in; + in++; + ctxt->input->line++; ctxt->input->col = 1; + continue; /* while */ + } + in--; + } + SHRINK; + GROW; + in = ctxt->input->cur; + if (*in == '-') { + if (in[1] == '-') { + if (in[2] == '>') { + SKIP(3); + if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) && + (!ctxt->disableSAX)) { + if (buf != NULL) + ctxt->sax->comment(ctxt->userData, buf); + else + ctxt->sax->comment(ctxt->userData, BAD_CAST ""); + } + if (buf != NULL) + xmlFree(buf); + ctxt->instate = state; + return; + } + if (buf != NULL) + xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, + "Comment not terminated \n<!--%.50s\n", + buf); + else + xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, + "Comment not terminated \n", NULL); + in++; + ctxt->input->col++; + } + in++; + ctxt->input->col++; + goto get_more; + } + } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09)); + xmlParseCommentComplex(ctxt, buf, len, size); + ctxt->instate = state; + return; +} + /** * xmlParsePITarget: @@ -3920,7 +4065,7 @@ xmlParseEntityDecl(xmlParserCtxtPtr ctxt) { xmlChar *orig = NULL; int skipped; - GROW; + /* GROW; done in the caller */ if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) { xmlParserInputPtr input = ctxt->input; SHRINK; @@ -4593,7 +4738,7 @@ xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) { NULL); } NEXT; - ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA); + ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA); if (RAW == '*') { ret->ocur = XML_ELEMENT_CONTENT_MULT; NEXT; @@ -4601,22 +4746,22 @@ xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) { return(ret); } if ((RAW == '(') || (RAW == '|')) { - ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA); + ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA); if (ret == NULL) return(NULL); } while (RAW == '|') { NEXT; if (elem == NULL) { - ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR); + ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR); if (ret == NULL) return(NULL); ret->c1 = cur; if (cur != NULL) cur->parent = ret; cur = ret; } else { - n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR); + n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR); if (n == NULL) return(NULL); - n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT); + n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT); if (n->c1 != NULL) n->c1->parent = n; cur->c2 = n; @@ -4629,7 +4774,7 @@ xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) { if (elem == NULL) { xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, "xmlParseElementMixedContentDecl : Name expected\n"); - xmlFreeElementContent(cur); + xmlFreeDocElementContent(ctxt->myDoc, cur); return(NULL); } SKIP_BLANKS; @@ -4637,7 +4782,7 @@ xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) { } if ((RAW == ')') && (NXT(1) == '*')) { if (elem != NULL) { - cur->c2 = xmlNewElementContent(elem, + cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT); if (cur->c2 != NULL) cur->c2->parent = cur; @@ -4650,7 +4795,7 @@ xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) { } SKIP(2); } else { - xmlFreeElementContent(ret); + xmlFreeDocElementContent(ctxt->myDoc, ret); xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL); return(NULL); } @@ -4715,7 +4860,7 @@ xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) { xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL); return(NULL); } - cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT); + cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT); if (cur == NULL) { xmlErrMemory(ctxt, NULL); return(NULL); @@ -4752,18 +4897,18 @@ xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) { "xmlParseElementChildrenContentDecl : '%c' expected\n", type); if ((last != NULL) && (last != ret)) - xmlFreeElementContent(last); + xmlFreeDocElementContent(ctxt->myDoc, last); if (ret != NULL) - xmlFreeElementContent(ret); + xmlFreeDocElementContent(ctxt->myDoc, ret); return(NULL); } NEXT; - op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ); + op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ); if (op == NULL) { if ((last != NULL) && (last != ret)) - xmlFreeElementContent(last); - xmlFreeElementContent(ret); + xmlFreeDocElementContent(ctxt->myDoc, last); + xmlFreeDocElementContent(ctxt->myDoc, ret); return(NULL); } if (last == NULL) { @@ -4792,19 +4937,19 @@ xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) { "xmlParseElementChildrenContentDecl : '%c' expected\n", type); if ((last != NULL) && (last != ret)) - xmlFreeElementContent(last); + xmlFreeDocElementContent(ctxt->myDoc, last); if (ret != NULL) - xmlFreeElementContent(ret); + xmlFreeDocElementContent(ctxt->myDoc, ret); return(NULL); } NEXT; - op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR); + op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR); if (op == NULL) { if ((last != NULL) && (last != ret)) - xmlFreeElementContent(last); + xmlFreeDocElementContent(ctxt->myDoc, last); if (ret != NULL) - xmlFreeElementContent(ret); + xmlFreeDocElementContent(ctxt->myDoc, ret); return(NULL); } if (last == NULL) { @@ -4825,7 +4970,7 @@ xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) { } else { xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL); if (ret != NULL) - xmlFreeElementContent(ret); + xmlFreeDocElementContent(ctxt->myDoc, ret); return(NULL); } GROW; @@ -4843,10 +4988,10 @@ xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) { if (elem == NULL) { xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL); if (ret != NULL) - xmlFreeElementContent(ret); + xmlFreeDocElementContent(ctxt->myDoc, ret); return(NULL); } - last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT); + last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT); if (RAW == '?') { last->ocur = XML_ELEMENT_CONTENT_OPT; NEXT; @@ -5004,7 +5149,7 @@ xmlParseElementDecl(xmlParserCtxtPtr ctxt) { int ret = -1; xmlElementContentPtr content = NULL; - GROW; + /* GROW; done in the caller */ if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) { xmlParserInputPtr input = ctxt->input; @@ -5067,6 +5212,9 @@ xmlParseElementDecl(xmlParserCtxtPtr ctxt) { if (RAW != '>') { xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL); + if (content != NULL) { + xmlFreeDocElementContent(ctxt->myDoc, content); + } } else { if (input != ctxt->input) { xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, @@ -5075,12 +5223,23 @@ xmlParseElementDecl(xmlParserCtxtPtr ctxt) { NEXT; if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && - (ctxt->sax->elementDecl != NULL)) + (ctxt->sax->elementDecl != NULL)) { + if (content != NULL) + content->parent = NULL; ctxt->sax->elementDecl(ctxt->userData, name, ret, content); - } - if (content != NULL) { - xmlFreeElementContent(content); + if ((content != NULL) && (content->parent == NULL)) { + /* + * this is a trick: if xmlAddElementDecl is called, + * instead of copying the full tree it is plugged directly + * if called from the parser. Avoid duplicating the + * interfaces or change the API/ABI + */ + xmlFreeDocElementContent(ctxt->myDoc, content); + } + } else if (content != NULL) { + xmlFreeDocElementContent(ctxt->myDoc, content); + } } } return(ret); @@ -5247,12 +5406,32 @@ xmlParseConditionalSections(xmlParserCtxtPtr ctxt) { void xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) { GROW; - xmlParseElementDecl(ctxt); - xmlParseAttributeListDecl(ctxt); - xmlParseEntityDecl(ctxt); - xmlParseNotationDecl(ctxt); - xmlParsePI(ctxt); - xmlParseComment(ctxt); + if (CUR == '<') { + if (NXT(1) == '!') { + switch (NXT(2)) { + case 'E': + if (NXT(3) == 'L') + xmlParseElementDecl(ctxt); + else if (NXT(3) == 'N') + xmlParseEntityDecl(ctxt); + break; + case 'A': + xmlParseAttributeListDecl(ctxt); + break; + case 'N': + xmlParseNotationDecl(ctxt); + break; + case '-': + xmlParseComment(ctxt); + break; + default: + /* there is an error but it will be detected later */ + break; + } + } else if (NXT(1) == '?') { + xmlParsePI(ctxt); + } + } /* * This is only for internal subset. On external entities, * the replacement is done before parsing stage @@ -9093,16 +9272,16 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { /* * Get the 4 first bytes and decode the charset * if enc != XML_CHAR_ENCODING_NONE - * plug some encoding conversion routines. + * plug some encoding conversion routines, + * else xmlSwitchEncoding will set to (default) + * UTF8. */ start[0] = RAW; start[1] = NXT(1); start[2] = NXT(2); start[3] = NXT(3); enc = xmlDetectCharEncoding(start, 4); - if (enc != XML_CHAR_ENCODING_NONE) { - xmlSwitchEncoding(ctxt, enc); - } + xmlSwitchEncoding(ctxt, enc); break; } @@ -9681,8 +9860,12 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { break; } } - if (!found) - break; + if (!found) { +#if 0 + fprintf(stderr, "unfinished comment\n"); +#endif + break; /* for */ + } continue; } } @@ -9695,6 +9878,10 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { continue; } if (buf[base] == ']') { +#if 0 + fprintf(stderr, "%c%c%c%c: ", buf[base], + buf[base + 1], buf[base + 2], buf[base + 3]); +#endif if ((unsigned int) base +1 >= ctxt->input->buf->buffer->use) break; @@ -9703,20 +9890,34 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { base++; continue; } - for (i = 0; + for (i = 1; (unsigned int) base + i < ctxt->input->buf->buffer->use; i++) { - if (buf[base + i] == '>') + if (buf[base + i] == '>') { +#if 0 + fprintf(stderr, "found\n"); +#endif goto found_end_int_subset; + } + if (!IS_BLANK_CH(buf[base + i])) { +#if 0 + fprintf(stderr, "not found\n"); +#endif + goto not_end_of_int_subset; + } } +#if 0 + fprintf(stderr, "end of stream\n"); +#endif break; + } +not_end_of_int_subset: + continue; /* for */ } /* * We didn't found the end of the Internal subset */ - if (quote == 0) - ctxt->checkIndex = base; #ifdef DEBUG_PUSH if (next == 0) xmlGenericError(xmlGenericErrorContext, @@ -10038,8 +10239,14 @@ xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data, inputPush(ctxt, inputStream); - if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && - (ctxt->input->buf != NULL)) { + /* + * If the caller didn't provide an initial 'chunk' for determining + * the encoding, we set the context to XML_CHAR_ENCODING_NONE so + * that it can be automatically determined later + */ + if ((size == 0) || (chunk == NULL)) { + ctxt->charset = XML_CHAR_ENCODING_NONE; + } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) { int base = ctxt->input->base - ctxt->input->buf->buffer->content; int cur = ctxt->input->cur - ctxt->input->base; @@ -12158,6 +12365,7 @@ xmlCleanupParser(void) { #ifdef LIBXML_CATALOG_ENABLED xmlCatalogCleanup(); #endif + xmlDictCleanup(); xmlCleanupInputCallbacks(); #ifdef LIBXML_OUTPUT_ENABLED xmlCleanupOutputCallbacks(); |