summaryrefslogtreecommitdiff
path: root/ext/mbstring/libmbfl
diff options
context:
space:
mode:
authorSean Finney <seanius@debian.org>2009-04-10 14:09:48 +0200
committerSean Finney <seanius@debian.org>2009-04-10 14:09:48 +0200
commitcd0b49c72aee33b3e44a9c589fcd93b9e1c7a64f (patch)
tree1315c623bb7d9dfa8d366fa9cd2c6834ceeb5da5 /ext/mbstring/libmbfl
parent9ea47aab740772adf0c69d8c94b208a464e599ea (diff)
downloadphp-cd0b49c72aee33b3e44a9c589fcd93b9e1c7a64f.tar.gz
Imported Upstream version 5.2.9.dfsg.1upstream/5.2.9.dfsg.1
Diffstat (limited to 'ext/mbstring/libmbfl')
-rw-r--r--ext/mbstring/libmbfl/filters/mbfilter_cp932.c2
-rw-r--r--ext/mbstring/libmbfl/filters/mbfilter_cp936.c2
-rw-r--r--ext/mbstring/libmbfl/filters/mbfilter_htmlent.c17
-rw-r--r--ext/mbstring/libmbfl/filters/mbfilter_sjis.c2
-rw-r--r--ext/mbstring/libmbfl/mbfl/mbfilter.c253
-rw-r--r--ext/mbstring/libmbfl/mbfl/mbfl_convert.c60
6 files changed, 232 insertions, 104 deletions
diff --git a/ext/mbstring/libmbfl/filters/mbfilter_cp932.c b/ext/mbstring/libmbfl/filters/mbfilter_cp932.c
index 80f7bfbc6..8fa254b6c 100644
--- a/ext/mbstring/libmbfl/filters/mbfilter_cp932.c
+++ b/ext/mbstring/libmbfl/filters/mbfilter_cp932.c
@@ -167,7 +167,7 @@ mbfl_filt_conv_sjiswin_wchar(int c, mbfl_convert_filter *filter)
case 1: /* kanji second char */
filter->status = 0;
c1 = filter->cache;
- if (c > 0x39 && c < 0xfd && c != 0x7f) {
+ if (c >= 0x40 && c <= 0xfc && c != 0x7f) {
w = 0;
SJIS_DECODE(c1, c, s1, s2);
s = (s1 - 0x21)*94 + s2 - 0x21;
diff --git a/ext/mbstring/libmbfl/filters/mbfilter_cp936.c b/ext/mbstring/libmbfl/filters/mbfilter_cp936.c
index 9cdd0520e..561dc3003 100644
--- a/ext/mbstring/libmbfl/filters/mbfilter_cp936.c
+++ b/ext/mbstring/libmbfl/filters/mbfilter_cp936.c
@@ -192,7 +192,7 @@ mbfl_filt_conv_wchar_cp936(int c, mbfl_convert_filter *filter)
}
}
if (s >= 0) {
- if (s < 0x80) { /* latin */
+ if (s <= 0x80) { /* latin */
CK((*filter->output_function)(s, filter->data));
} else {
CK((*filter->output_function)((s >> 8) & 0xff, filter->data));
diff --git a/ext/mbstring/libmbfl/filters/mbfilter_htmlent.c b/ext/mbstring/libmbfl/filters/mbfilter_htmlent.c
index 40c24c342..6c6654a1b 100644
--- a/ext/mbstring/libmbfl/filters/mbfilter_htmlent.c
+++ b/ext/mbstring/libmbfl/filters/mbfilter_htmlent.c
@@ -232,8 +232,7 @@ int mbfl_filt_conv_html_dec(int c, mbfl_convert_filter *filter)
mbfl_filt_conv_html_dec_flush(filter);
if (c=='&')
{
- filter->status = 1;
- buffer[0] = '&';
+ buffer[filter->status++] = '&';
}
}
}
@@ -244,17 +243,19 @@ int mbfl_filt_conv_html_dec(int c, mbfl_convert_filter *filter)
int mbfl_filt_conv_html_dec_flush(mbfl_convert_filter *filter)
{
int status, pos = 0;
- char *buffer;
+ unsigned char *buffer;
+ int err = 0;
- buffer = (char*)filter->opaque;
+ buffer = (unsigned char*)filter->opaque;
status = filter->status;
+ filter->status = 0;
/* flush fragments */
while (status--) {
- CK((*filter->output_function)(buffer[pos++], filter->data));
+ int e = (*filter->output_function)(buffer[pos++], filter->data);
+ if (e != 0)
+ err = e;
}
- filter->status = 0;
- /*filter->buffer = 0; of cause NOT*/
- return 0;
+ return err;
}
diff --git a/ext/mbstring/libmbfl/filters/mbfilter_sjis.c b/ext/mbstring/libmbfl/filters/mbfilter_sjis.c
index f9d7ff671..83ef56592 100644
--- a/ext/mbstring/libmbfl/filters/mbfilter_sjis.c
+++ b/ext/mbstring/libmbfl/filters/mbfilter_sjis.c
@@ -167,7 +167,7 @@ mbfl_filt_conv_sjis_wchar(int c, mbfl_convert_filter *filter)
case 1: /* kanji second char */
filter->status = 0;
c1 = filter->cache;
- if (c > 0x39 && c < 0xfd && c != 0x7f) {
+ if (c >= 0x40 && c <= 0xfc && c != 0x7f) {
SJIS_DECODE(c1, c, s1, s2);
w = (s1 - 0x21)*94 + s2 - 0x21;
if (w >= 0 && w < jisx0208_ucs_table_size) {
diff --git a/ext/mbstring/libmbfl/mbfl/mbfilter.c b/ext/mbstring/libmbfl/mbfl/mbfilter.c
index 97d2f048c..1aeb38cc9 100644
--- a/ext/mbstring/libmbfl/mbfl/mbfilter.c
+++ b/ext/mbstring/libmbfl/mbfl/mbfilter.c
@@ -462,10 +462,9 @@ enum mbfl_no_encoding mbfl_encoding_detector_judge(mbfl_encoding_detector *ident
while (n >= 0) {
filter = identd->filter_list[n];
if (!filter->flag) {
- if (identd->strict && filter->status) {
- continue;
+ if (!identd->strict || !filter->status) {
+ encoding = filter->encoding->no_encoding;
}
- encoding = filter->encoding->no_encoding;
}
n--;
}
@@ -779,7 +778,7 @@ retry:
for (;;) {
pc->found_pos++;
p = h;
- m = pc->needle.buffer;
+ m = (int *)pc->needle.buffer;
n = pc->needle_pos - 1;
while (n > 0 && *p == *m) {
n--;
@@ -858,84 +857,203 @@ mbfl_strpos(
int offset,
int reverse)
{
- int n, result, negative_offset = 0;
- unsigned char *p;
- mbfl_convert_filter *filter;
- struct collector_strpos_data pc;
+ int result;
+ mbfl_string _haystack_u8, _needle_u8;
+ const mbfl_string *haystack_u8, *needle_u8;
+ const unsigned char *u8_tbl;
- if (haystack == NULL || needle == NULL) {
+ if (haystack == NULL || haystack->val == NULL || needle == NULL || needle->val == NULL) {
return -8;
}
- /* needle is converted into wchar */
- mbfl_wchar_device_init(&pc.needle);
- filter = mbfl_convert_filter_new(
- needle->no_encoding,
- mbfl_no_encoding_wchar,
- mbfl_wchar_device_output, 0, &pc.needle);
- if (filter == NULL) {
- return -4;
- }
- p = needle->val;
- n = needle->len;
- if (p != NULL) {
- while (n > 0) {
- if ((*filter->filter_function)(*p++, filter) < 0) {
- break;
- }
- n--;
+
+ {
+ const mbfl_encoding *u8_enc;
+ u8_enc = mbfl_no2encoding(mbfl_no_encoding_utf8);
+ if (u8_enc == NULL || u8_enc->mblen_table == NULL) {
+ return -8;
}
+ u8_tbl = u8_enc->mblen_table;
}
- mbfl_convert_filter_flush(filter);
- mbfl_convert_filter_delete(filter);
- pc.needle_len = pc.needle.pos;
- if (pc.needle.buffer == NULL) {
- return -4;
- }
- if (pc.needle_len <= 0) {
- mbfl_wchar_device_clear(&pc.needle);
- return -2;
+
+ if (haystack->no_encoding != mbfl_no_encoding_utf8) {
+ mbfl_string_init(&_haystack_u8);
+ haystack_u8 = mbfl_convert_encoding(haystack, &_haystack_u8, mbfl_no_encoding_utf8);
+ if (haystack_u8 == NULL) {
+ result = -4;
+ goto out;
+ }
+ } else {
+ haystack_u8 = haystack;
}
- /* initialize filter and collector data */
- filter = mbfl_convert_filter_new(
- haystack->no_encoding,
- mbfl_no_encoding_wchar,
- collector_strpos, 0, &pc);
- if (filter == NULL) {
- mbfl_wchar_device_clear(&pc.needle);
- return -4;
+
+ if (needle->no_encoding != mbfl_no_encoding_utf8) {
+ mbfl_string_init(&_needle_u8);
+ needle_u8 = mbfl_convert_encoding(needle, &_needle_u8, mbfl_no_encoding_utf8);
+ if (needle_u8 == NULL) {
+ result = -4;
+ goto out;
+ }
+ } else {
+ needle_u8 = needle;
}
- if (offset < 0) {
- negative_offset = -offset-1;
- offset = 0;
+ if (needle_u8->len < 1) {
+ result = -8;
+ goto out;
}
- pc.start = offset;
- pc.output = 0;
- pc.needle_pos = 0;
- pc.found_pos = 0;
- pc.matched_pos = -1;
+ result = -1;
+ if (haystack_u8->len < needle_u8->len) {
+ goto out;
+ }
- /* feed data */
- p = haystack->val;
- n = haystack->len - negative_offset;
- if (p != NULL) {
- while (n > 0) {
- if ((*filter->filter_function)(*p++, filter) < 0) {
- pc.matched_pos = -4;
- break;
+ if (!reverse) {
+ unsigned int jtbl[1 << (sizeof(unsigned char) * 8)];
+ unsigned int needle_u8_len = needle_u8->len;
+ unsigned int i;
+ const unsigned char *p, *q, *e;
+ const unsigned char *haystack_u8_val = haystack_u8->val,
+ *needle_u8_val = needle_u8->val;
+ for (i = 0; i < sizeof(jtbl) / sizeof(*jtbl); ++i) {
+ jtbl[i] = needle_u8_len + 1;
+ }
+ for (i = 0; i < needle_u8_len - 1; ++i) {
+ jtbl[needle_u8_val[i]] = needle_u8_len - i;
+ }
+ e = haystack_u8_val + haystack_u8->len;
+ p = haystack_u8_val;
+ while (--offset >= 0) {
+ if (p >= e) {
+ result = -16;
+ goto out;
}
- if (pc.matched_pos >= 0 && !reverse) {
- break;
+ p += u8_tbl[*p];
+ }
+ p += needle_u8_len;
+ if (p > e) {
+ goto out;
+ }
+ while (p <= e) {
+ const unsigned char *pv = p;
+ q = needle_u8_val + needle_u8_len;
+ for (;;) {
+ if (q == needle_u8_val) {
+ result = 0;
+ while (p > haystack_u8_val) {
+ unsigned char c = *--p;
+ if (c < 0x80) {
+ ++result;
+ } else if ((c & 0xc0) != 0x80) {
+ ++result;
+ }
+ }
+ goto out;
+ }
+ if (*--q != *--p) {
+ break;
+ }
+ }
+ p += jtbl[*p];
+ if (p <= pv) {
+ p = pv + 1;
+ }
+ }
+ } else {
+ unsigned int jtbl[1 << (sizeof(unsigned char) * 8)];
+ unsigned int needle_u8_len = needle_u8->len, needle_len = 0;
+ unsigned int i;
+ const unsigned char *p, *e, *q, *qe;
+ const unsigned char *haystack_u8_val = haystack_u8->val,
+ *needle_u8_val = needle_u8->val;
+ for (i = 0; i < sizeof(jtbl) / sizeof(*jtbl); ++i) {
+ jtbl[i] = needle_u8_len;
+ }
+ for (i = needle_u8_len - 1; i > 0; --i) {
+ unsigned char c = needle_u8_val[i];
+ jtbl[c] = i;
+ if (c < 0x80) {
+ ++needle_len;
+ } else if ((c & 0xc0) != 0x80) {
+ ++needle_len;
+ }
+ }
+ {
+ unsigned char c = needle_u8_val[0];
+ if (c < 0x80) {
+ ++needle_len;
+ } else if ((c & 0xc0) != 0x80) {
+ ++needle_len;
+ }
+ }
+ e = haystack_u8_val;
+ p = e + haystack_u8->len;
+ qe = needle_u8_val + needle_u8_len;
+ if (offset < 0) {
+ if (-offset > needle_len) {
+ offset += needle_len;
+ while (offset < 0) {
+ unsigned char c;
+ if (p <= e) {
+ result = -16;
+ goto out;
+ }
+ c = *(--p);
+ if (c < 0x80) {
+ ++offset;
+ } else if ((c & 0xc0) != 0x80) {
+ ++offset;
+ }
+ }
+ }
+ } else {
+ const unsigned char *ee = haystack_u8_val + haystack_u8->len;
+ while (--offset >= 0) {
+ if (e >= ee) {
+ result = -16;
+ goto out;
+ }
+ e += u8_tbl[*e];
+ }
+ }
+ if (p < e + needle_u8_len) {
+ goto out;
+ }
+ p -= needle_u8_len;
+ while (p >= e) {
+ const unsigned char *pv = p;
+ q = needle_u8_val;
+ for (;;) {
+ if (q == qe) {
+ result = 0;
+ p -= needle_u8_len;
+ while (p > haystack_u8_val) {
+ unsigned char c = *--p;
+ if (c < 0x80) {
+ ++result;
+ } else if ((c & 0xc0) != 0x80) {
+ ++result;
+ }
+ }
+ goto out;
+ }
+ if (*q != *p) {
+ break;
+ }
+ ++p, ++q;
+ }
+ p -= jtbl[*p];
+ if (p >= pv) {
+ p = pv - 1;
}
- n--;
}
}
- mbfl_convert_filter_flush(filter);
- result = pc.matched_pos;
- mbfl_convert_filter_delete(filter);
- mbfl_wchar_device_clear(&pc.needle);
-
+out:
+ if (haystack_u8 == &_haystack_u8) {
+ mbfl_string_clear(&_haystack_u8);
+ }
+ if (needle_u8 == &_needle_u8) {
+ mbfl_string_clear(&_needle_u8);
+ }
return result;
}
@@ -1013,6 +1131,7 @@ mbfl_substr_count(
if (pc.matched_pos >= 0) {
++result;
pc.matched_pos = -1;
+ pc.needle_pos = 0;
}
n--;
}
diff --git a/ext/mbstring/libmbfl/mbfl/mbfl_convert.c b/ext/mbstring/libmbfl/mbfl/mbfl_convert.c
index f6810738f..8c3a9758d 100644
--- a/ext/mbstring/libmbfl/mbfl/mbfl_convert.c
+++ b/ext/mbstring/libmbfl/mbfl/mbfl_convert.c
@@ -357,22 +357,6 @@ int mbfl_convert_filter_strcat(mbfl_convert_filter *filter, const unsigned char
return 0;
}
-#if 0
-static int
-mbfl_convert_filter_strncat(mbfl_convert_filter *filter, const unsigned char *p,
- int n)
-{
- while (n > 0) {
- if ((*filter->filter_function)(*p++, filter) < 0) {
- return -1;
- }
- n--;
- }
-
- return n;
-}
-#endif
-
/* illegal character output function for conv-filter */
int
mbfl_filt_conv_illegal_output(int c, mbfl_convert_filter *filter)
@@ -387,14 +371,9 @@ mbfl_filt_conv_illegal_output(int c, mbfl_convert_filter *filter)
ret = (*filter->filter_function)(filter->illegal_substchar, filter);
break;
case MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG:
- case MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY:
if (c >= 0) {
if (c < MBFL_WCSGROUP_UCS4MAX) { /* unicode */
- if (mode_backup == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
- ret = mbfl_convert_filter_strcat(filter, (const unsigned char *)"U+");
- } else { /* entity */
- ret = mbfl_convert_filter_strcat(filter, (const unsigned char *)"&#");
- }
+ ret = mbfl_convert_filter_strcat(filter, (const unsigned char *)"U+");
} else {
if (c < MBFL_WCSGROUP_WCHARMAX) {
m = c & ~MBFL_WCSPLANE_MASK;
@@ -438,9 +417,38 @@ mbfl_filt_conv_illegal_output(int c, mbfl_convert_filter *filter)
if (m == 0 && ret >= 0) {
ret = (*filter->filter_function)(mbfl_hexchar_table[0], filter);
}
- if (mode_backup == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) {
- ret = mbfl_convert_filter_strcat(filter, (const unsigned char *)";");
+ }
+ }
+ break;
+ case MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY:
+ if (c >= 0) {
+ if (c < MBFL_WCSGROUP_UCS4MAX) { /* unicode */
+ ret = mbfl_convert_filter_strcat(filter, (const unsigned char *)"&#x");
+ if (ret < 0)
+ break;
+
+ m = 0;
+ r = 28;
+ while (r >= 0) {
+ n = (c >> r) & 0xf;
+ if (n || m) {
+ m = 1;
+ ret = (*filter->filter_function)(mbfl_hexchar_table[n], filter);
+ if (ret < 0) {
+ break;
+ }
+ }
+ r -= 4;
+ }
+ if (ret < 0) {
+ break;
+ }
+ if (m == 0) {
+ ret = (*filter->filter_function)(mbfl_hexchar_table[0], filter);
}
+ ret = mbfl_convert_filter_strcat(filter, (const unsigned char *)";");
+ } else {
+ ret = (*filter->filter_function)(filter->illegal_substchar, filter);
}
}
break;
@@ -462,8 +470,8 @@ const struct mbfl_convert_vtbl * mbfl_convert_filter_get_vtbl(enum mbfl_no_encod
to == mbfl_no_encoding_7bit) {
from = mbfl_no_encoding_8bit;
} else if (from == mbfl_no_encoding_base64 ||
- from == mbfl_no_encoding_qprint ||
- from == mbfl_no_encoding_uuencode) {
+ from == mbfl_no_encoding_qprint ||
+ from == mbfl_no_encoding_uuencode) {
to = mbfl_no_encoding_8bit;
}