summaryrefslogtreecommitdiff
path: root/parserInternals.c
diff options
context:
space:
mode:
authorMike Hommey <glandium@debian.org>2009-09-10 22:10:39 +0200
committerMike Hommey <glandium@debian.org>2009-09-10 22:10:39 +0200
commitc3e4f3c26035bc93a69e5aa2ad435809e8be8a4e (patch)
treee65557c2e042fe8a77635b0548db10ad97bc1bfe /parserInternals.c
parente248b20a3b7df364cc9617b8685b4c190338bcd2 (diff)
downloadlibxml2-c3e4f3c26035bc93a69e5aa2ad435809e8be8a4e.tar.gz
Import upstream version 2.7.4upstream/2.7.4.dfsg
Diffstat (limited to 'parserInternals.c')
-rw-r--r--parserInternals.c130
1 files changed, 108 insertions, 22 deletions
diff --git a/parserInternals.c b/parserInternals.c
index 758c6b3..ff20435 100644
--- a/parserInternals.c
+++ b/parserInternals.c
@@ -494,20 +494,26 @@ xmlNextChar(xmlParserCtxtPtr ctxt)
if (c & 0x80) {
if (c == 0xC0)
goto encoding_error;
- if (cur[1] == 0)
+ if (cur[1] == 0) {
xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
+ cur = ctxt->input->cur;
+ }
if ((cur[1] & 0xc0) != 0x80)
goto encoding_error;
if ((c & 0xe0) == 0xe0) {
unsigned int val;
- if (cur[2] == 0)
+ if (cur[2] == 0) {
xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
+ cur = ctxt->input->cur;
+ }
if ((cur[2] & 0xc0) != 0x80)
goto encoding_error;
if ((c & 0xf0) == 0xf0) {
- if (cur[3] == 0)
+ if (cur[3] == 0) {
xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
+ cur = ctxt->input->cur;
+ }
if (((c & 0xf8) != 0xf0) ||
((cur[3] & 0xc0) != 0x80))
goto encoding_error;
@@ -640,18 +646,24 @@ xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
if (c & 0x80) {
if (((c & 0x40) == 0) || (c == 0xC0))
goto encoding_error;
- if (cur[1] == 0)
+ if (cur[1] == 0) {
xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
+ cur = ctxt->input->cur;
+ }
if ((cur[1] & 0xc0) != 0x80)
goto encoding_error;
if ((c & 0xe0) == 0xe0) {
- if (cur[2] == 0)
+ if (cur[2] == 0) {
xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
+ cur = ctxt->input->cur;
+ }
if ((cur[2] & 0xc0) != 0x80)
goto encoding_error;
if ((c & 0xf0) == 0xf0) {
- if (cur[3] == 0)
+ if (cur[3] == 0) {
xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
+ cur = ctxt->input->cur;
+ }
if (((c & 0xf8) != 0xf0) ||
((cur[3] & 0xc0) != 0x80))
goto encoding_error;
@@ -933,6 +945,17 @@ xmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar *out, int val) {
* *
************************************************************************/
+/* defined in encoding.c, not public */
+int
+xmlCharEncFirstLineInt(xmlCharEncodingHandler *handler, xmlBufferPtr out,
+ xmlBufferPtr in, int len);
+
+static int
+xmlSwitchToEncodingInt(xmlParserCtxtPtr ctxt,
+ xmlCharEncodingHandlerPtr handler, int len);
+static int
+xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
+ xmlCharEncodingHandlerPtr handler, int len);
/**
* xmlSwitchEncoding:
* @ctxt: the parser context
@@ -947,6 +970,7 @@ int
xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
{
xmlCharEncodingHandlerPtr handler;
+ int len = -1;
if (ctxt == NULL) return(-1);
switch (enc) {
@@ -990,9 +1014,33 @@ xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
(ctxt->input->cur[2] == 0xBF)) {
ctxt->input->cur += 3;
}
- break ;
- default:
- break;
+ len = 90;
+ break;
+ case XML_CHAR_ENCODING_UCS2:
+ len = 90;
+ break;
+ case XML_CHAR_ENCODING_UCS4BE:
+ case XML_CHAR_ENCODING_UCS4LE:
+ case XML_CHAR_ENCODING_UCS4_2143:
+ case XML_CHAR_ENCODING_UCS4_3412:
+ len = 180;
+ break;
+ case XML_CHAR_ENCODING_EBCDIC:
+ case XML_CHAR_ENCODING_8859_1:
+ case XML_CHAR_ENCODING_8859_2:
+ case XML_CHAR_ENCODING_8859_3:
+ case XML_CHAR_ENCODING_8859_4:
+ case XML_CHAR_ENCODING_8859_5:
+ case XML_CHAR_ENCODING_8859_6:
+ case XML_CHAR_ENCODING_8859_7:
+ case XML_CHAR_ENCODING_8859_8:
+ case XML_CHAR_ENCODING_8859_9:
+ case XML_CHAR_ENCODING_ASCII:
+ case XML_CHAR_ENCODING_2022_JP:
+ case XML_CHAR_ENCODING_SHIFT_JIS:
+ case XML_CHAR_ENCODING_EUC_JP:
+ len = 45;
+ break;
}
handler = xmlGetCharEncodingHandler(enc);
if (handler == NULL) {
@@ -1083,7 +1131,7 @@ xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
if (handler == NULL)
return(-1);
ctxt->charset = XML_CHAR_ENCODING_UTF8;
- return(xmlSwitchToEncoding(ctxt, handler));
+ return(xmlSwitchToEncodingInt(ctxt, handler, len));
}
/**
@@ -1091,15 +1139,16 @@ xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
* @ctxt: the parser context
* @input: the input stream
* @handler: the encoding handler
+ * @len: the number of bytes to convert for the first line or -1
*
* change the input functions when discovering the character encoding
* of a given entity.
*
* Returns 0 in case of success, -1 otherwise
*/
-int
-xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
- xmlCharEncodingHandlerPtr handler)
+static int
+xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
+ xmlCharEncodingHandlerPtr handler, int len)
{
int nbchars;
@@ -1196,9 +1245,10 @@ xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
* parsed with the autodetected encoding
* into the parser reading buffer.
*/
- nbchars = xmlCharEncFirstLine(input->buf->encoder,
- input->buf->buffer,
- input->buf->raw);
+ nbchars = xmlCharEncFirstLineInt(input->buf->encoder,
+ input->buf->buffer,
+ input->buf->raw,
+ len);
}
if (nbchars < 0) {
xmlErrInternal(ctxt,
@@ -1224,8 +1274,9 @@ xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
}
/**
- * xmlSwitchToEncoding:
+ * xmlSwitchInputEncoding:
* @ctxt: the parser context
+ * @input: the input stream
* @handler: the encoding handler
*
* change the input functions when discovering the character encoding
@@ -1234,13 +1285,32 @@ xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
* Returns 0 in case of success, -1 otherwise
*/
int
-xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
-{
+xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
+ xmlCharEncodingHandlerPtr handler) {
+ return(xmlSwitchInputEncodingInt(ctxt, input, handler, -1));
+}
+
+/**
+ * xmlSwitchToEncodingInt:
+ * @ctxt: the parser context
+ * @handler: the encoding handler
+ * @len: the lenght to convert or -1
+ *
+ * change the input functions when discovering the character encoding
+ * of a given entity, and convert only @len bytes of the output, this
+ * is needed on auto detect to allows any declared encoding later to
+ * convert the actual content after the xmlDecl
+ *
+ * Returns 0 in case of success, -1 otherwise
+ */
+static int
+xmlSwitchToEncodingInt(xmlParserCtxtPtr ctxt,
+ xmlCharEncodingHandlerPtr handler, int len) {
int ret = 0;
if (handler != NULL) {
if (ctxt->input != NULL) {
- ret = xmlSwitchInputEncoding(ctxt, ctxt->input, handler);
+ ret = xmlSwitchInputEncodingInt(ctxt, ctxt->input, handler, len);
} else {
xmlErrInternal(ctxt, "xmlSwitchToEncoding : no input\n",
NULL);
@@ -1250,11 +1320,27 @@ xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
* The parsing is now done in UTF8 natively
*/
ctxt->charset = XML_CHAR_ENCODING_UTF8;
- } else
+ } else
return(-1);
return(ret);
}
+/**
+ * xmlSwitchToEncoding:
+ * @ctxt: the parser context
+ * @handler: the encoding handler
+ *
+ * change the input functions when discovering the character encoding
+ * of a given entity.
+ *
+ * Returns 0 in case of success, -1 otherwise
+ */
+int
+xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
+{
+ return (xmlSwitchToEncodingInt(ctxt, handler, -1));
+}
+
/************************************************************************
* *
* Commodity functions to handle entities processing *
@@ -2067,7 +2153,7 @@ xmlKeepBlanksDefault(int val) {
int old = xmlKeepBlanksDefaultValue;
xmlKeepBlanksDefaultValue = val;
- xmlIndentTreeOutput = !val;
+ if (!val) xmlIndentTreeOutput = 1;
return(old);
}