diff options
author | Sean Finney <seanius@debian.org> | 2009-04-10 14:09:48 +0200 |
---|---|---|
committer | Sean Finney <seanius@debian.org> | 2009-04-10 14:09:48 +0200 |
commit | cd0b49c72aee33b3e44a9c589fcd93b9e1c7a64f (patch) | |
tree | 1315c623bb7d9dfa8d366fa9cd2c6834ceeb5da5 /ext/mbstring/libmbfl | |
parent | 9ea47aab740772adf0c69d8c94b208a464e599ea (diff) | |
download | php-cd0b49c72aee33b3e44a9c589fcd93b9e1c7a64f.tar.gz |
Imported Upstream version 5.2.9.dfsg.1upstream/5.2.9.dfsg.1
Diffstat (limited to 'ext/mbstring/libmbfl')
-rw-r--r-- | ext/mbstring/libmbfl/filters/mbfilter_cp932.c | 2 | ||||
-rw-r--r-- | ext/mbstring/libmbfl/filters/mbfilter_cp936.c | 2 | ||||
-rw-r--r-- | ext/mbstring/libmbfl/filters/mbfilter_htmlent.c | 17 | ||||
-rw-r--r-- | ext/mbstring/libmbfl/filters/mbfilter_sjis.c | 2 | ||||
-rw-r--r-- | ext/mbstring/libmbfl/mbfl/mbfilter.c | 253 | ||||
-rw-r--r-- | ext/mbstring/libmbfl/mbfl/mbfl_convert.c | 60 |
6 files changed, 232 insertions, 104 deletions
diff --git a/ext/mbstring/libmbfl/filters/mbfilter_cp932.c b/ext/mbstring/libmbfl/filters/mbfilter_cp932.c index 80f7bfbc6..8fa254b6c 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_cp932.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_cp932.c @@ -167,7 +167,7 @@ mbfl_filt_conv_sjiswin_wchar(int c, mbfl_convert_filter *filter) case 1: /* kanji second char */ filter->status = 0; c1 = filter->cache; - if (c > 0x39 && c < 0xfd && c != 0x7f) { + if (c >= 0x40 && c <= 0xfc && c != 0x7f) { w = 0; SJIS_DECODE(c1, c, s1, s2); s = (s1 - 0x21)*94 + s2 - 0x21; diff --git a/ext/mbstring/libmbfl/filters/mbfilter_cp936.c b/ext/mbstring/libmbfl/filters/mbfilter_cp936.c index 9cdd0520e..561dc3003 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_cp936.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_cp936.c @@ -192,7 +192,7 @@ mbfl_filt_conv_wchar_cp936(int c, mbfl_convert_filter *filter) } } if (s >= 0) { - if (s < 0x80) { /* latin */ + if (s <= 0x80) { /* latin */ CK((*filter->output_function)(s, filter->data)); } else { CK((*filter->output_function)((s >> 8) & 0xff, filter->data)); diff --git a/ext/mbstring/libmbfl/filters/mbfilter_htmlent.c b/ext/mbstring/libmbfl/filters/mbfilter_htmlent.c index 40c24c342..6c6654a1b 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_htmlent.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_htmlent.c @@ -232,8 +232,7 @@ int mbfl_filt_conv_html_dec(int c, mbfl_convert_filter *filter) mbfl_filt_conv_html_dec_flush(filter); if (c=='&') { - filter->status = 1; - buffer[0] = '&'; + buffer[filter->status++] = '&'; } } } @@ -244,17 +243,19 @@ int mbfl_filt_conv_html_dec(int c, mbfl_convert_filter *filter) int mbfl_filt_conv_html_dec_flush(mbfl_convert_filter *filter) { int status, pos = 0; - char *buffer; + unsigned char *buffer; + int err = 0; - buffer = (char*)filter->opaque; + buffer = (unsigned char*)filter->opaque; status = filter->status; + filter->status = 0; /* flush fragments */ while (status--) { - CK((*filter->output_function)(buffer[pos++], filter->data)); + int e = (*filter->output_function)(buffer[pos++], filter->data); + if (e != 0) + err = e; } - filter->status = 0; - /*filter->buffer = 0; of cause NOT*/ - return 0; + return err; } diff --git a/ext/mbstring/libmbfl/filters/mbfilter_sjis.c b/ext/mbstring/libmbfl/filters/mbfilter_sjis.c index f9d7ff671..83ef56592 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_sjis.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_sjis.c @@ -167,7 +167,7 @@ mbfl_filt_conv_sjis_wchar(int c, mbfl_convert_filter *filter) case 1: /* kanji second char */ filter->status = 0; c1 = filter->cache; - if (c > 0x39 && c < 0xfd && c != 0x7f) { + if (c >= 0x40 && c <= 0xfc && c != 0x7f) { SJIS_DECODE(c1, c, s1, s2); w = (s1 - 0x21)*94 + s2 - 0x21; if (w >= 0 && w < jisx0208_ucs_table_size) { diff --git a/ext/mbstring/libmbfl/mbfl/mbfilter.c b/ext/mbstring/libmbfl/mbfl/mbfilter.c index 97d2f048c..1aeb38cc9 100644 --- a/ext/mbstring/libmbfl/mbfl/mbfilter.c +++ b/ext/mbstring/libmbfl/mbfl/mbfilter.c @@ -462,10 +462,9 @@ enum mbfl_no_encoding mbfl_encoding_detector_judge(mbfl_encoding_detector *ident while (n >= 0) { filter = identd->filter_list[n]; if (!filter->flag) { - if (identd->strict && filter->status) { - continue; + if (!identd->strict || !filter->status) { + encoding = filter->encoding->no_encoding; } - encoding = filter->encoding->no_encoding; } n--; } @@ -779,7 +778,7 @@ retry: for (;;) { pc->found_pos++; p = h; - m = pc->needle.buffer; + m = (int *)pc->needle.buffer; n = pc->needle_pos - 1; while (n > 0 && *p == *m) { n--; @@ -858,84 +857,203 @@ mbfl_strpos( int offset, int reverse) { - int n, result, negative_offset = 0; - unsigned char *p; - mbfl_convert_filter *filter; - struct collector_strpos_data pc; + int result; + mbfl_string _haystack_u8, _needle_u8; + const mbfl_string *haystack_u8, *needle_u8; + const unsigned char *u8_tbl; - if (haystack == NULL || needle == NULL) { + if (haystack == NULL || haystack->val == NULL || needle == NULL || needle->val == NULL) { return -8; } - /* needle is converted into wchar */ - mbfl_wchar_device_init(&pc.needle); - filter = mbfl_convert_filter_new( - needle->no_encoding, - mbfl_no_encoding_wchar, - mbfl_wchar_device_output, 0, &pc.needle); - if (filter == NULL) { - return -4; - } - p = needle->val; - n = needle->len; - if (p != NULL) { - while (n > 0) { - if ((*filter->filter_function)(*p++, filter) < 0) { - break; - } - n--; + + { + const mbfl_encoding *u8_enc; + u8_enc = mbfl_no2encoding(mbfl_no_encoding_utf8); + if (u8_enc == NULL || u8_enc->mblen_table == NULL) { + return -8; } + u8_tbl = u8_enc->mblen_table; } - mbfl_convert_filter_flush(filter); - mbfl_convert_filter_delete(filter); - pc.needle_len = pc.needle.pos; - if (pc.needle.buffer == NULL) { - return -4; - } - if (pc.needle_len <= 0) { - mbfl_wchar_device_clear(&pc.needle); - return -2; + + if (haystack->no_encoding != mbfl_no_encoding_utf8) { + mbfl_string_init(&_haystack_u8); + haystack_u8 = mbfl_convert_encoding(haystack, &_haystack_u8, mbfl_no_encoding_utf8); + if (haystack_u8 == NULL) { + result = -4; + goto out; + } + } else { + haystack_u8 = haystack; } - /* initialize filter and collector data */ - filter = mbfl_convert_filter_new( - haystack->no_encoding, - mbfl_no_encoding_wchar, - collector_strpos, 0, &pc); - if (filter == NULL) { - mbfl_wchar_device_clear(&pc.needle); - return -4; + + if (needle->no_encoding != mbfl_no_encoding_utf8) { + mbfl_string_init(&_needle_u8); + needle_u8 = mbfl_convert_encoding(needle, &_needle_u8, mbfl_no_encoding_utf8); + if (needle_u8 == NULL) { + result = -4; + goto out; + } + } else { + needle_u8 = needle; } - if (offset < 0) { - negative_offset = -offset-1; - offset = 0; + if (needle_u8->len < 1) { + result = -8; + goto out; } - pc.start = offset; - pc.output = 0; - pc.needle_pos = 0; - pc.found_pos = 0; - pc.matched_pos = -1; + result = -1; + if (haystack_u8->len < needle_u8->len) { + goto out; + } - /* feed data */ - p = haystack->val; - n = haystack->len - negative_offset; - if (p != NULL) { - while (n > 0) { - if ((*filter->filter_function)(*p++, filter) < 0) { - pc.matched_pos = -4; - break; + if (!reverse) { + unsigned int jtbl[1 << (sizeof(unsigned char) * 8)]; + unsigned int needle_u8_len = needle_u8->len; + unsigned int i; + const unsigned char *p, *q, *e; + const unsigned char *haystack_u8_val = haystack_u8->val, + *needle_u8_val = needle_u8->val; + for (i = 0; i < sizeof(jtbl) / sizeof(*jtbl); ++i) { + jtbl[i] = needle_u8_len + 1; + } + for (i = 0; i < needle_u8_len - 1; ++i) { + jtbl[needle_u8_val[i]] = needle_u8_len - i; + } + e = haystack_u8_val + haystack_u8->len; + p = haystack_u8_val; + while (--offset >= 0) { + if (p >= e) { + result = -16; + goto out; } - if (pc.matched_pos >= 0 && !reverse) { - break; + p += u8_tbl[*p]; + } + p += needle_u8_len; + if (p > e) { + goto out; + } + while (p <= e) { + const unsigned char *pv = p; + q = needle_u8_val + needle_u8_len; + for (;;) { + if (q == needle_u8_val) { + result = 0; + while (p > haystack_u8_val) { + unsigned char c = *--p; + if (c < 0x80) { + ++result; + } else if ((c & 0xc0) != 0x80) { + ++result; + } + } + goto out; + } + if (*--q != *--p) { + break; + } + } + p += jtbl[*p]; + if (p <= pv) { + p = pv + 1; + } + } + } else { + unsigned int jtbl[1 << (sizeof(unsigned char) * 8)]; + unsigned int needle_u8_len = needle_u8->len, needle_len = 0; + unsigned int i; + const unsigned char *p, *e, *q, *qe; + const unsigned char *haystack_u8_val = haystack_u8->val, + *needle_u8_val = needle_u8->val; + for (i = 0; i < sizeof(jtbl) / sizeof(*jtbl); ++i) { + jtbl[i] = needle_u8_len; + } + for (i = needle_u8_len - 1; i > 0; --i) { + unsigned char c = needle_u8_val[i]; + jtbl[c] = i; + if (c < 0x80) { + ++needle_len; + } else if ((c & 0xc0) != 0x80) { + ++needle_len; + } + } + { + unsigned char c = needle_u8_val[0]; + if (c < 0x80) { + ++needle_len; + } else if ((c & 0xc0) != 0x80) { + ++needle_len; + } + } + e = haystack_u8_val; + p = e + haystack_u8->len; + qe = needle_u8_val + needle_u8_len; + if (offset < 0) { + if (-offset > needle_len) { + offset += needle_len; + while (offset < 0) { + unsigned char c; + if (p <= e) { + result = -16; + goto out; + } + c = *(--p); + if (c < 0x80) { + ++offset; + } else if ((c & 0xc0) != 0x80) { + ++offset; + } + } + } + } else { + const unsigned char *ee = haystack_u8_val + haystack_u8->len; + while (--offset >= 0) { + if (e >= ee) { + result = -16; + goto out; + } + e += u8_tbl[*e]; + } + } + if (p < e + needle_u8_len) { + goto out; + } + p -= needle_u8_len; + while (p >= e) { + const unsigned char *pv = p; + q = needle_u8_val; + for (;;) { + if (q == qe) { + result = 0; + p -= needle_u8_len; + while (p > haystack_u8_val) { + unsigned char c = *--p; + if (c < 0x80) { + ++result; + } else if ((c & 0xc0) != 0x80) { + ++result; + } + } + goto out; + } + if (*q != *p) { + break; + } + ++p, ++q; + } + p -= jtbl[*p]; + if (p >= pv) { + p = pv - 1; } - n--; } } - mbfl_convert_filter_flush(filter); - result = pc.matched_pos; - mbfl_convert_filter_delete(filter); - mbfl_wchar_device_clear(&pc.needle); - +out: + if (haystack_u8 == &_haystack_u8) { + mbfl_string_clear(&_haystack_u8); + } + if (needle_u8 == &_needle_u8) { + mbfl_string_clear(&_needle_u8); + } return result; } @@ -1013,6 +1131,7 @@ mbfl_substr_count( if (pc.matched_pos >= 0) { ++result; pc.matched_pos = -1; + pc.needle_pos = 0; } n--; } diff --git a/ext/mbstring/libmbfl/mbfl/mbfl_convert.c b/ext/mbstring/libmbfl/mbfl/mbfl_convert.c index f6810738f..8c3a9758d 100644 --- a/ext/mbstring/libmbfl/mbfl/mbfl_convert.c +++ b/ext/mbstring/libmbfl/mbfl/mbfl_convert.c @@ -357,22 +357,6 @@ int mbfl_convert_filter_strcat(mbfl_convert_filter *filter, const unsigned char return 0; } -#if 0 -static int -mbfl_convert_filter_strncat(mbfl_convert_filter *filter, const unsigned char *p, - int n) -{ - while (n > 0) { - if ((*filter->filter_function)(*p++, filter) < 0) { - return -1; - } - n--; - } - - return n; -} -#endif - /* illegal character output function for conv-filter */ int mbfl_filt_conv_illegal_output(int c, mbfl_convert_filter *filter) @@ -387,14 +371,9 @@ mbfl_filt_conv_illegal_output(int c, mbfl_convert_filter *filter) ret = (*filter->filter_function)(filter->illegal_substchar, filter); break; case MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG: - case MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY: if (c >= 0) { if (c < MBFL_WCSGROUP_UCS4MAX) { /* unicode */ - if (mode_backup == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) { - ret = mbfl_convert_filter_strcat(filter, (const unsigned char *)"U+"); - } else { /* entity */ - ret = mbfl_convert_filter_strcat(filter, (const unsigned char *)"&#"); - } + ret = mbfl_convert_filter_strcat(filter, (const unsigned char *)"U+"); } else { if (c < MBFL_WCSGROUP_WCHARMAX) { m = c & ~MBFL_WCSPLANE_MASK; @@ -438,9 +417,38 @@ mbfl_filt_conv_illegal_output(int c, mbfl_convert_filter *filter) if (m == 0 && ret >= 0) { ret = (*filter->filter_function)(mbfl_hexchar_table[0], filter); } - if (mode_backup == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) { - ret = mbfl_convert_filter_strcat(filter, (const unsigned char *)";"); + } + } + break; + case MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY: + if (c >= 0) { + if (c < MBFL_WCSGROUP_UCS4MAX) { /* unicode */ + ret = mbfl_convert_filter_strcat(filter, (const unsigned char *)"&#x"); + if (ret < 0) + break; + + m = 0; + r = 28; + while (r >= 0) { + n = (c >> r) & 0xf; + if (n || m) { + m = 1; + ret = (*filter->filter_function)(mbfl_hexchar_table[n], filter); + if (ret < 0) { + break; + } + } + r -= 4; + } + if (ret < 0) { + break; + } + if (m == 0) { + ret = (*filter->filter_function)(mbfl_hexchar_table[0], filter); } + ret = mbfl_convert_filter_strcat(filter, (const unsigned char *)";"); + } else { + ret = (*filter->filter_function)(filter->illegal_substchar, filter); } } break; @@ -462,8 +470,8 @@ const struct mbfl_convert_vtbl * mbfl_convert_filter_get_vtbl(enum mbfl_no_encod to == mbfl_no_encoding_7bit) { from = mbfl_no_encoding_8bit; } else if (from == mbfl_no_encoding_base64 || - from == mbfl_no_encoding_qprint || - from == mbfl_no_encoding_uuencode) { + from == mbfl_no_encoding_qprint || + from == mbfl_no_encoding_uuencode) { to = mbfl_no_encoding_8bit; } |