diff options
author | Aron Xu <aron@debian.org> | 2014-10-26 07:02:25 +0800 |
---|---|---|
committer | Aron Xu <aron@debian.org> | 2014-10-26 07:02:25 +0800 |
commit | 3871a83a5f0aebd8c00879eab14fe901c93dbfcf (patch) | |
tree | b022967f880b7fb1e56c8cc4c3f200d6ffbc9efd /HTMLtree.c | |
parent | 7042e17490515a990a45aa7237d11bc49ab0eaf0 (diff) | |
download | libxml2-3871a83a5f0aebd8c00879eab14fe901c93dbfcf.tar.gz |
Imported Upstream version 2.9.2+dfsg1
Diffstat (limited to 'HTMLtree.c')
-rw-r--r-- | HTMLtree.c | 141 |
1 files changed, 93 insertions, 48 deletions
@@ -30,16 +30,18 @@ #include <libxml/globals.h> #include <libxml/uri.h> +#include "buf.h" + /************************************************************************ * * - * Getting/Setting encoding meta tags * + * Getting/Setting encoding meta tags * * * ************************************************************************/ /** * htmlGetMetaEncoding: * @doc: the document - * + * * Encoding definition lookup in the Meta tags * * Returns the current encoding as flagged in the HTML source @@ -126,17 +128,17 @@ found_meta: found_content: encoding = xmlStrstr(content, BAD_CAST"charset="); - if (encoding == NULL) + if (encoding == NULL) encoding = xmlStrstr(content, BAD_CAST"Charset="); - if (encoding == NULL) + if (encoding == NULL) encoding = xmlStrstr(content, BAD_CAST"CHARSET="); if (encoding != NULL) { encoding += 8; } else { encoding = xmlStrstr(content, BAD_CAST"charset ="); - if (encoding == NULL) + if (encoding == NULL) encoding = xmlStrstr(content, BAD_CAST"Charset ="); - if (encoding == NULL) + if (encoding == NULL) encoding = xmlStrstr(content, BAD_CAST"CHARSET ="); if (encoding != NULL) encoding += 9; @@ -314,7 +316,7 @@ static const char* htmlBooleanAttrs[] = { * @name: the name of the attribute to check * * Determine if a given attribute is a boolean attribute. - * + * * returns: false if the attribute is not boolean, true otherwise. */ int @@ -338,7 +340,7 @@ xmlOutputBufferPtr xmlAllocOutputBufferInternal(xmlCharEncodingHandlerPtr encoder); /************************************************************************ * * - * Output error handlers * + * Output error handlers * * * ************************************************************************/ /** @@ -387,17 +389,13 @@ htmlSaveErr(int code, xmlNodePtr node, const char *extra) /************************************************************************ * * - * Dumping HTML tree content to a simple buffer * + * Dumping HTML tree content to a simple buffer * * * ************************************************************************/ -static int -htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, - int format); - /** - * htmlNodeDumpFormat: - * @buf: the HTML buffer output + * htmlBufNodeDumpFormat: + * @buf: the xmlBufPtr output * @doc: the document * @cur: the current node * @format: should formatting spaces been added @@ -406,10 +404,10 @@ htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, * * Returns the number of byte written or -1 in case of error */ -static int -htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, +static size_t +htmlBufNodeDumpFormat(xmlBufPtr buf, xmlDocPtr doc, xmlNodePtr cur, int format) { - unsigned int use; + size_t use; int ret; xmlOutputBufferPtr outbuf; @@ -432,10 +430,10 @@ htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, outbuf->context = NULL; outbuf->written = 0; - use = buf->use; + use = xmlBufUse(buf); htmlNodeDumpFormatOutput(outbuf, doc, cur, NULL, format); xmlFree(outbuf); - ret = buf->use - use; + ret = xmlBufUse(buf) - use; return (ret); } @@ -452,9 +450,24 @@ htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, */ int htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) { + xmlBufPtr buffer; + size_t ret; + + if ((buf == NULL) || (cur == NULL)) + return(-1); + xmlInitParser(); + buffer = xmlBufFromBuffer(buf); + if (buffer == NULL) + return(-1); + + ret = htmlBufNodeDumpFormat(buffer, doc, cur, 1); - return(htmlNodeDumpFormat(buf, doc, cur, 1)); + xmlBufBackToBuffer(buffer); + + if (ret > INT_MAX) + return(-1); + return((int) ret); } /** @@ -499,7 +512,7 @@ htmlNodeDumpFileFormat(FILE *out, xmlDocPtr doc, if (handler == NULL) handler = xmlFindCharEncodingHandler("ascii"); - /* + /* * save the content to a temp buffer. */ buf = xmlOutputBufferCreateFile(out, handler); @@ -595,11 +608,11 @@ htmlDocDumpMemoryFormat(xmlDocPtr cur, xmlChar**mem, int *size, int format) { xmlOutputBufferFlush(buf); if (buf->conv != NULL) { - *size = buf->conv->use; - *mem = xmlStrndup(buf->conv->content, *size); + *size = xmlBufUse(buf->conv); + *mem = xmlStrndup(xmlBufContent(buf->conv), *size); } else { - *size = buf->buffer->use; - *mem = xmlStrndup(buf->buffer->content, *size); + *size = xmlBufUse(buf->buffer); + *mem = xmlStrndup(xmlBufContent(buf->buffer), *size); } (void)xmlOutputBufferClose(buf); } @@ -621,7 +634,7 @@ htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) { /************************************************************************ * * - * Dumping HTML tree content to an I/O output buffer * + * Dumping HTML tree content to an I/O output buffer * * * ************************************************************************/ @@ -632,7 +645,7 @@ void xmlNsListDumpOutput(xmlOutputBufferPtr buf, xmlNsPtr cur); * @buf: the HTML buffer output * @doc: the document * @encoding: the encoding string - * + * * TODO: check whether encoding is needed * * Dump the HTML document DTD, if any. @@ -650,14 +663,14 @@ htmlDtdDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlOutputBufferWriteString(buf, (const char *)cur->name); if (cur->ExternalID != NULL) { xmlOutputBufferWriteString(buf, " PUBLIC "); - xmlBufferWriteQuotedString(buf->buffer, cur->ExternalID); + xmlBufWriteQuotedString(buf->buffer, cur->ExternalID); if (cur->SystemID != NULL) { xmlOutputBufferWriteString(buf, " "); - xmlBufferWriteQuotedString(buf->buffer, cur->SystemID); - } + xmlBufWriteQuotedString(buf->buffer, cur->SystemID); + } } else if (cur->SystemID != NULL) { xmlOutputBufferWriteString(buf, " SYSTEM "); - xmlBufferWriteQuotedString(buf->buffer, cur->SystemID); + xmlBufWriteQuotedString(buf->buffer, cur->SystemID); } xmlOutputBufferWriteString(buf, ">\n"); } @@ -677,9 +690,10 @@ htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, xmlChar *value; /* - * TODO: The html output method should not escape a & character - * occurring in an attribute value immediately followed by - * a { character (see Section B.7.1 of the HTML 4.0 Recommendation). + * The html output method should not escape a & character + * occurring in an attribute value immediately followed by + * a { character (see Section B.7.1 of the HTML 4.0 Recommendation). + * This is implemented in xmlEncodeEntitiesReentrant */ if (cur == NULL) { @@ -702,20 +716,51 @@ htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, (!xmlStrcasecmp(cur->name, BAD_CAST "src")) || ((!xmlStrcasecmp(cur->name, BAD_CAST "name")) && (!xmlStrcasecmp(cur->parent->name, BAD_CAST "a"))))) { - xmlChar *escaped; xmlChar *tmp = value; + /* xmlURIEscapeStr() escapes '"' so it can be safely used. */ + xmlBufCCat(buf->buffer, "\""); while (IS_BLANK_CH(*tmp)) tmp++; - escaped = xmlURIEscapeStr(tmp, BAD_CAST"@/:=?;#%&,+"); - if (escaped != NULL) { - xmlBufferWriteQuotedString(buf->buffer, escaped); - xmlFree(escaped); - } else { - xmlBufferWriteQuotedString(buf->buffer, value); + /* URI Escape everything, except server side includes. */ + for ( ; ; ) { + xmlChar *escaped; + xmlChar endChar; + xmlChar *end = NULL; + xmlChar *start = (xmlChar *)xmlStrstr(tmp, BAD_CAST "<!--"); + if (start != NULL) { + end = (xmlChar *)xmlStrstr(tmp, BAD_CAST "-->"); + if (end != NULL) { + *start = '\0'; + } + } + + /* Escape the whole string, or until start (set to '\0'). */ + escaped = xmlURIEscapeStr(tmp, BAD_CAST"@/:=?;#%&,+"); + if (escaped != NULL) { + xmlBufCat(buf->buffer, escaped); + xmlFree(escaped); + } else { + xmlBufCat(buf->buffer, tmp); + } + + if (end == NULL) { /* Everything has been written. */ + break; + } + + /* Do not escape anything within server side includes. */ + *start = '<'; /* Restore the first character of "<!--". */ + end += 3; /* strlen("-->") */ + endChar = *end; + *end = '\0'; + xmlBufCat(buf->buffer, start); + *end = endChar; + tmp = end; } + + xmlBufCCat(buf->buffer, "\""); } else { - xmlBufferWriteQuotedString(buf->buffer, value); + xmlBufWriteQuotedString(buf->buffer, value); } xmlFree(value); } else { @@ -1105,7 +1150,7 @@ htmlSaveFile(const char *filename, xmlDocPtr cur) { if ((cur == NULL) || (filename == NULL)) return(-1); - + xmlInitParser(); encoding = (const char *) htmlGetMetaEncoding(cur); @@ -1136,7 +1181,7 @@ htmlSaveFile(const char *filename, xmlDocPtr cur) { if (handler == NULL) handler = xmlFindCharEncodingHandler("ascii"); - /* + /* * save the content to a temp buffer. */ buf = xmlOutputBufferCreateFilename(filename, handler, cur->compression); @@ -1156,7 +1201,7 @@ htmlSaveFile(const char *filename, xmlDocPtr cur) { * @encoding: the document encoding * * Dump an HTML document to a file using a given encoding. - * + * * returns: the number of byte written or -1 in case of failure. */ int @@ -1200,7 +1245,7 @@ htmlSaveFileFormat(const char *filename, xmlDocPtr cur, if (handler == NULL) handler = xmlFindCharEncodingHandler("ascii"); - /* + /* * save the content to a temp buffer. */ buf = xmlOutputBufferCreateFilename(filename, handler, 0); @@ -1220,7 +1265,7 @@ htmlSaveFileFormat(const char *filename, xmlDocPtr cur, * * Dump an HTML document to a file using a given encoding * and formatting returns/spaces are added. - * + * * returns: the number of byte written or -1 in case of failure. */ int |