summaryrefslogtreecommitdiff
path: root/HTMLtree.c
diff options
context:
space:
mode:
authorAron Xu <aron@debian.org>2014-10-26 07:02:25 +0800
committerAron Xu <aron@debian.org>2014-10-26 07:02:25 +0800
commit3871a83a5f0aebd8c00879eab14fe901c93dbfcf (patch)
treeb022967f880b7fb1e56c8cc4c3f200d6ffbc9efd /HTMLtree.c
parent7042e17490515a990a45aa7237d11bc49ab0eaf0 (diff)
downloadlibxml2-3871a83a5f0aebd8c00879eab14fe901c93dbfcf.tar.gz
Imported Upstream version 2.9.2+dfsg1
Diffstat (limited to 'HTMLtree.c')
-rw-r--r--HTMLtree.c141
1 files changed, 93 insertions, 48 deletions
diff --git a/HTMLtree.c b/HTMLtree.c
index 5d0893b..5c57fc5 100644
--- a/HTMLtree.c
+++ b/HTMLtree.c
@@ -30,16 +30,18 @@
#include <libxml/globals.h>
#include <libxml/uri.h>
+#include "buf.h"
+
/************************************************************************
* *
- * Getting/Setting encoding meta tags *
+ * Getting/Setting encoding meta tags *
* *
************************************************************************/
/**
* htmlGetMetaEncoding:
* @doc: the document
- *
+ *
* Encoding definition lookup in the Meta tags
*
* Returns the current encoding as flagged in the HTML source
@@ -126,17 +128,17 @@ found_meta:
found_content:
encoding = xmlStrstr(content, BAD_CAST"charset=");
- if (encoding == NULL)
+ if (encoding == NULL)
encoding = xmlStrstr(content, BAD_CAST"Charset=");
- if (encoding == NULL)
+ if (encoding == NULL)
encoding = xmlStrstr(content, BAD_CAST"CHARSET=");
if (encoding != NULL) {
encoding += 8;
} else {
encoding = xmlStrstr(content, BAD_CAST"charset =");
- if (encoding == NULL)
+ if (encoding == NULL)
encoding = xmlStrstr(content, BAD_CAST"Charset =");
- if (encoding == NULL)
+ if (encoding == NULL)
encoding = xmlStrstr(content, BAD_CAST"CHARSET =");
if (encoding != NULL)
encoding += 9;
@@ -314,7 +316,7 @@ static const char* htmlBooleanAttrs[] = {
* @name: the name of the attribute to check
*
* Determine if a given attribute is a boolean attribute.
- *
+ *
* returns: false if the attribute is not boolean, true otherwise.
*/
int
@@ -338,7 +340,7 @@ xmlOutputBufferPtr
xmlAllocOutputBufferInternal(xmlCharEncodingHandlerPtr encoder);
/************************************************************************
* *
- * Output error handlers *
+ * Output error handlers *
* *
************************************************************************/
/**
@@ -387,17 +389,13 @@ htmlSaveErr(int code, xmlNodePtr node, const char *extra)
/************************************************************************
* *
- * Dumping HTML tree content to a simple buffer *
+ * Dumping HTML tree content to a simple buffer *
* *
************************************************************************/
-static int
-htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur,
- int format);
-
/**
- * htmlNodeDumpFormat:
- * @buf: the HTML buffer output
+ * htmlBufNodeDumpFormat:
+ * @buf: the xmlBufPtr output
* @doc: the document
* @cur: the current node
* @format: should formatting spaces been added
@@ -406,10 +404,10 @@ htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur,
*
* Returns the number of byte written or -1 in case of error
*/
-static int
-htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur,
+static size_t
+htmlBufNodeDumpFormat(xmlBufPtr buf, xmlDocPtr doc, xmlNodePtr cur,
int format) {
- unsigned int use;
+ size_t use;
int ret;
xmlOutputBufferPtr outbuf;
@@ -432,10 +430,10 @@ htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur,
outbuf->context = NULL;
outbuf->written = 0;
- use = buf->use;
+ use = xmlBufUse(buf);
htmlNodeDumpFormatOutput(outbuf, doc, cur, NULL, format);
xmlFree(outbuf);
- ret = buf->use - use;
+ ret = xmlBufUse(buf) - use;
return (ret);
}
@@ -452,9 +450,24 @@ htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur,
*/
int
htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
+ xmlBufPtr buffer;
+ size_t ret;
+
+ if ((buf == NULL) || (cur == NULL))
+ return(-1);
+
xmlInitParser();
+ buffer = xmlBufFromBuffer(buf);
+ if (buffer == NULL)
+ return(-1);
+
+ ret = htmlBufNodeDumpFormat(buffer, doc, cur, 1);
- return(htmlNodeDumpFormat(buf, doc, cur, 1));
+ xmlBufBackToBuffer(buffer);
+
+ if (ret > INT_MAX)
+ return(-1);
+ return((int) ret);
}
/**
@@ -499,7 +512,7 @@ htmlNodeDumpFileFormat(FILE *out, xmlDocPtr doc,
if (handler == NULL)
handler = xmlFindCharEncodingHandler("ascii");
- /*
+ /*
* save the content to a temp buffer.
*/
buf = xmlOutputBufferCreateFile(out, handler);
@@ -595,11 +608,11 @@ htmlDocDumpMemoryFormat(xmlDocPtr cur, xmlChar**mem, int *size, int format) {
xmlOutputBufferFlush(buf);
if (buf->conv != NULL) {
- *size = buf->conv->use;
- *mem = xmlStrndup(buf->conv->content, *size);
+ *size = xmlBufUse(buf->conv);
+ *mem = xmlStrndup(xmlBufContent(buf->conv), *size);
} else {
- *size = buf->buffer->use;
- *mem = xmlStrndup(buf->buffer->content, *size);
+ *size = xmlBufUse(buf->buffer);
+ *mem = xmlStrndup(xmlBufContent(buf->buffer), *size);
}
(void)xmlOutputBufferClose(buf);
}
@@ -621,7 +634,7 @@ htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) {
/************************************************************************
* *
- * Dumping HTML tree content to an I/O output buffer *
+ * Dumping HTML tree content to an I/O output buffer *
* *
************************************************************************/
@@ -632,7 +645,7 @@ void xmlNsListDumpOutput(xmlOutputBufferPtr buf, xmlNsPtr cur);
* @buf: the HTML buffer output
* @doc: the document
* @encoding: the encoding string
- *
+ *
* TODO: check whether encoding is needed
*
* Dump the HTML document DTD, if any.
@@ -650,14 +663,14 @@ htmlDtdDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
xmlOutputBufferWriteString(buf, (const char *)cur->name);
if (cur->ExternalID != NULL) {
xmlOutputBufferWriteString(buf, " PUBLIC ");
- xmlBufferWriteQuotedString(buf->buffer, cur->ExternalID);
+ xmlBufWriteQuotedString(buf->buffer, cur->ExternalID);
if (cur->SystemID != NULL) {
xmlOutputBufferWriteString(buf, " ");
- xmlBufferWriteQuotedString(buf->buffer, cur->SystemID);
- }
+ xmlBufWriteQuotedString(buf->buffer, cur->SystemID);
+ }
} else if (cur->SystemID != NULL) {
xmlOutputBufferWriteString(buf, " SYSTEM ");
- xmlBufferWriteQuotedString(buf->buffer, cur->SystemID);
+ xmlBufWriteQuotedString(buf->buffer, cur->SystemID);
}
xmlOutputBufferWriteString(buf, ">\n");
}
@@ -677,9 +690,10 @@ htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur,
xmlChar *value;
/*
- * TODO: The html output method should not escape a & character
- * occurring in an attribute value immediately followed by
- * a { character (see Section B.7.1 of the HTML 4.0 Recommendation).
+ * The html output method should not escape a & character
+ * occurring in an attribute value immediately followed by
+ * a { character (see Section B.7.1 of the HTML 4.0 Recommendation).
+ * This is implemented in xmlEncodeEntitiesReentrant
*/
if (cur == NULL) {
@@ -702,20 +716,51 @@ htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur,
(!xmlStrcasecmp(cur->name, BAD_CAST "src")) ||
((!xmlStrcasecmp(cur->name, BAD_CAST "name")) &&
(!xmlStrcasecmp(cur->parent->name, BAD_CAST "a"))))) {
- xmlChar *escaped;
xmlChar *tmp = value;
+ /* xmlURIEscapeStr() escapes '"' so it can be safely used. */
+ xmlBufCCat(buf->buffer, "\"");
while (IS_BLANK_CH(*tmp)) tmp++;
- escaped = xmlURIEscapeStr(tmp, BAD_CAST"@/:=?;#%&,+");
- if (escaped != NULL) {
- xmlBufferWriteQuotedString(buf->buffer, escaped);
- xmlFree(escaped);
- } else {
- xmlBufferWriteQuotedString(buf->buffer, value);
+ /* URI Escape everything, except server side includes. */
+ for ( ; ; ) {
+ xmlChar *escaped;
+ xmlChar endChar;
+ xmlChar *end = NULL;
+ xmlChar *start = (xmlChar *)xmlStrstr(tmp, BAD_CAST "<!--");
+ if (start != NULL) {
+ end = (xmlChar *)xmlStrstr(tmp, BAD_CAST "-->");
+ if (end != NULL) {
+ *start = '\0';
+ }
+ }
+
+ /* Escape the whole string, or until start (set to '\0'). */
+ escaped = xmlURIEscapeStr(tmp, BAD_CAST"@/:=?;#%&,+");
+ if (escaped != NULL) {
+ xmlBufCat(buf->buffer, escaped);
+ xmlFree(escaped);
+ } else {
+ xmlBufCat(buf->buffer, tmp);
+ }
+
+ if (end == NULL) { /* Everything has been written. */
+ break;
+ }
+
+ /* Do not escape anything within server side includes. */
+ *start = '<'; /* Restore the first character of "<!--". */
+ end += 3; /* strlen("-->") */
+ endChar = *end;
+ *end = '\0';
+ xmlBufCat(buf->buffer, start);
+ *end = endChar;
+ tmp = end;
}
+
+ xmlBufCCat(buf->buffer, "\"");
} else {
- xmlBufferWriteQuotedString(buf->buffer, value);
+ xmlBufWriteQuotedString(buf->buffer, value);
}
xmlFree(value);
} else {
@@ -1105,7 +1150,7 @@ htmlSaveFile(const char *filename, xmlDocPtr cur) {
if ((cur == NULL) || (filename == NULL))
return(-1);
-
+
xmlInitParser();
encoding = (const char *) htmlGetMetaEncoding(cur);
@@ -1136,7 +1181,7 @@ htmlSaveFile(const char *filename, xmlDocPtr cur) {
if (handler == NULL)
handler = xmlFindCharEncodingHandler("ascii");
- /*
+ /*
* save the content to a temp buffer.
*/
buf = xmlOutputBufferCreateFilename(filename, handler, cur->compression);
@@ -1156,7 +1201,7 @@ htmlSaveFile(const char *filename, xmlDocPtr cur) {
* @encoding: the document encoding
*
* Dump an HTML document to a file using a given encoding.
- *
+ *
* returns: the number of byte written or -1 in case of failure.
*/
int
@@ -1200,7 +1245,7 @@ htmlSaveFileFormat(const char *filename, xmlDocPtr cur,
if (handler == NULL)
handler = xmlFindCharEncodingHandler("ascii");
- /*
+ /*
* save the content to a temp buffer.
*/
buf = xmlOutputBufferCreateFilename(filename, handler, 0);
@@ -1220,7 +1265,7 @@ htmlSaveFileFormat(const char *filename, xmlDocPtr cur,
*
* Dump an HTML document to a file using a given encoding
* and formatting returns/spaces are added.
- *
+ *
* returns: the number of byte written or -1 in case of failure.
*/
int