summaryrefslogtreecommitdiff
path: root/ext/xml/xml.c
diff options
context:
space:
mode:
Diffstat (limited to 'ext/xml/xml.c')
-rw-r--r--ext/xml/xml.c102
1 files changed, 1 insertions, 101 deletions
diff --git a/ext/xml/xml.c b/ext/xml/xml.c
index 3427face1..50a1a5ccd 100644
--- a/ext/xml/xml.c
+++ b/ext/xml/xml.c
@@ -32,6 +32,7 @@
#include "zend_variables.h"
#include "ext/standard/php_string.h"
#include "ext/standard/info.h"
+#include "ext/standard/html.h"
#if HAVE_XML
@@ -659,107 +660,6 @@ PHPAPI char *xml_utf8_encode(const char *s, int len, int *newlen, const XML_Char
}
/* }}} */
-/* copied from trunk's implementation of get_next_char in ext/standard/html.c */
-#define MB_FAILURE(pos, advance) do { \
- *cursor = pos + (advance); \
- *status = FAILURE; \
- return 0; \
-} while (0)
-
-#define CHECK_LEN(pos, chars_need) ((str_len - (pos)) >= (chars_need))
-#define utf8_lead(c) ((c) < 0x80 || ((c) >= 0xC2 && (c) <= 0xF4))
-#define utf8_trail(c) ((c) >= 0x80 && (c) <= 0xBF)
-
-/* {{{ php_next_utf8_char
- */
-static inline unsigned int php_next_utf8_char(
- const unsigned char *str,
- size_t str_len,
- size_t *cursor,
- int *status)
-{
- size_t pos = *cursor;
- unsigned int this_char = 0;
- unsigned char c;
-
- *status = SUCCESS;
-
- if (!CHECK_LEN(pos, 1))
- MB_FAILURE(pos, 1);
-
- /* We'll follow strategy 2. from section 3.6.1 of UTR #36:
- * "In a reported illegal byte sequence, do not include any
- * non-initial byte that encodes a valid character or is a leading
- * byte for a valid sequence.» */
- c = str[pos];
- if (c < 0x80) {
- this_char = c;
- pos++;
- } else if (c < 0xc2) {
- MB_FAILURE(pos, 1);
- } else if (c < 0xe0) {
- if (!CHECK_LEN(pos, 2))
- MB_FAILURE(pos, 1);
-
- if (!utf8_trail(str[pos + 1])) {
- MB_FAILURE(pos, utf8_lead(str[pos + 1]) ? 1 : 2);
- }
- this_char = ((c & 0x1f) << 6) | (str[pos + 1] & 0x3f);
- if (this_char < 0x80) { /* non-shortest form */
- MB_FAILURE(pos, 2);
- }
- pos += 2;
- } else if (c < 0xf0) {
- size_t avail = str_len - pos;
-
- if (avail < 3 ||
- !utf8_trail(str[pos + 1]) || !utf8_trail(str[pos + 2])) {
- if (avail < 2 || utf8_lead(str[pos + 1]))
- MB_FAILURE(pos, 1);
- else if (avail < 3 || utf8_lead(str[pos + 2]))
- MB_FAILURE(pos, 2);
- else
- MB_FAILURE(pos, 3);
- }
-
- this_char = ((c & 0x0f) << 12) | ((str[pos + 1] & 0x3f) << 6) | (str[pos + 2] & 0x3f);
- if (this_char < 0x800) { /* non-shortest form */
- MB_FAILURE(pos, 3);
- } else if (this_char >= 0xd800 && this_char <= 0xdfff) { /* surrogate */
- MB_FAILURE(pos, 3);
- }
- pos += 3;
- } else if (c < 0xf5) {
- size_t avail = str_len - pos;
-
- if (avail < 4 ||
- !utf8_trail(str[pos + 1]) || !utf8_trail(str[pos + 2]) ||
- !utf8_trail(str[pos + 3])) {
- if (avail < 2 || utf8_lead(str[pos + 1]))
- MB_FAILURE(pos, 1);
- else if (avail < 3 || utf8_lead(str[pos + 2]))
- MB_FAILURE(pos, 2);
- else if (avail < 4 || utf8_lead(str[pos + 3]))
- MB_FAILURE(pos, 3);
- else
- MB_FAILURE(pos, 4);
- }
-
- this_char = ((c & 0x07) << 18) | ((str[pos + 1] & 0x3f) << 12) | ((str[pos + 2] & 0x3f) << 6) | (str[pos + 3] & 0x3f);
- if (this_char < 0x10000 || this_char > 0x10FFFF) { /* non-shortest form or outside range */
- MB_FAILURE(pos, 4);
- }
- pos += 4;
- } else {
- MB_FAILURE(pos, 1);
- }
-
- *cursor = pos;
- return this_char;
-}
-/* }}} */
-
-
/* {{{ xml_utf8_decode */
PHPAPI char *xml_utf8_decode(const XML_Char *s, int len, int *newlen, const XML_Char *encoding)
{