diff options
| author | Mark A. Hershberger <mah@debian.(none)> | 2009-03-25 00:37:27 -0400 |
|---|---|---|
| committer | Mark A. Hershberger <mah@debian.(none)> | 2009-03-25 00:37:27 -0400 |
| commit | 2d4e5b09576bb4f0ba716cc82cdf29ea04d9184b (patch) | |
| tree | 41ccc042009cba53e4ce43e727fcba4c1cfbf7f3 /ext/mbstring | |
| parent | d29a4fd2dd3b5d4cf6e80b602544d7b71d794e76 (diff) | |
| download | php-upstream/5.2.2.tar.gz | |
Imported Upstream version 5.2.2upstream/5.2.2
Diffstat (limited to 'ext/mbstring')
38 files changed, 1113 insertions, 76 deletions
diff --git a/ext/mbstring/config.m4 b/ext/mbstring/config.m4 index aa55d0b04..4f493a7de 100644 --- a/ext/mbstring/config.m4 +++ b/ext/mbstring/config.m4 @@ -1,5 +1,5 @@ dnl -dnl $Id: config.m4,v 1.58.2.4.2.6 2006/10/01 08:34:39 hirokawa Exp $ +dnl $Id: config.m4,v 1.58.2.4.2.8 2007/01/29 22:53:47 hirokawa Exp $ dnl AC_DEFUN([PHP_MBSTRING_ADD_SOURCES], [ @@ -189,6 +189,7 @@ AC_DEFUN([PHP_MBSTRING_SETUP_LIBMBFL], [ libmbfl/filters/mbfilter_euc_cn.c libmbfl/filters/mbfilter_euc_jp.c libmbfl/filters/mbfilter_euc_jp_win.c + libmbfl/filters/mbfilter_cp51932.c libmbfl/filters/mbfilter_euc_kr.c libmbfl/filters/mbfilter_euc_tw.c libmbfl/filters/mbfilter_htmlent.c @@ -209,6 +210,7 @@ AC_DEFUN([PHP_MBSTRING_SETUP_LIBMBFL], [ libmbfl/filters/mbfilter_iso8859_8.c libmbfl/filters/mbfilter_iso8859_9.c libmbfl/filters/mbfilter_jis.c + libmbfl/filters/mbfilter_iso2022_jp_ms.c libmbfl/filters/mbfilter_koi8r.c libmbfl/filters/mbfilter_armscii8.c libmbfl/filters/mbfilter_qprint.c diff --git a/ext/mbstring/config.w32 b/ext/mbstring/config.w32 index 32e6aca5e..e5feb0ead 100644 --- a/ext/mbstring/config.w32 +++ b/ext/mbstring/config.w32 @@ -1,4 +1,4 @@ -// $Id: config.w32,v 1.10.2.1.2.2 2006/09/21 16:37:20 masugata Exp $ +// $Id: config.w32,v 1.10.2.1.2.4 2007/02/04 00:21:51 fmk Exp $ // vim:ft=javascript ARG_ENABLE("mbstring", "multibyte string functions", "no"); @@ -21,14 +21,15 @@ if (PHP_MBSTRING == "yes") { ADD_SOURCES("ext/mbstring/libmbfl/filters", "html_entities.c \ mbfilter_7bit.c mbfilter_ascii.c mbfilter_base64.c mbfilter_big5.c \ mbfilter_byte2.c mbfilter_byte4.c mbfilter_cp1251.c mbfilter_cp1252.c \ - mbfilter_cp866.c mbfilter_cp932.c mbfilter_cp936.c mbfilter_euc_cn.c \ - mbfilter_euc_jp.c mbfilter_euc_jp_win.c mbfilter_euc_kr.c \ + mbfilter_cp866.c mbfilter_cp932.c mbfilter_cp936.c mbfilter_cp51932.c \ + mbfilter_euc_cn.c mbfilter_euc_jp.c mbfilter_euc_jp_win.c mbfilter_euc_kr.c \ mbfilter_euc_tw.c mbfilter_htmlent.c mbfilter_hz.c mbfilter_iso2022_kr.c \ mbfilter_iso8859_1.c mbfilter_iso8859_10.c mbfilter_iso8859_13.c \ mbfilter_iso8859_14.c mbfilter_iso8859_15.c mbfilter_iso8859_16.c \ mbfilter_iso8859_2.c mbfilter_iso8859_3.c mbfilter_iso8859_4.c \ mbfilter_iso8859_5.c mbfilter_iso8859_6.c mbfilter_iso8859_7.c \ mbfilter_iso8859_8.c mbfilter_iso8859_9.c mbfilter_jis.c \ + mbfilter_iso2022_jp_ms.c \ mbfilter_koi8r.c mbfilter_qprint.c mbfilter_sjis.c mbfilter_ucs2.c \ mbfilter_ucs4.c mbfilter_uhc.c mbfilter_utf16.c mbfilter_utf32.c \ mbfilter_utf7.c mbfilter_utf7imap.c mbfilter_utf8.c \ diff --git a/ext/mbstring/libmbfl/filters/Makefile.am b/ext/mbstring/libmbfl/filters/Makefile.am index 0c71c8604..5f24ae8c4 100644 --- a/ext/mbstring/libmbfl/filters/Makefile.am +++ b/ext/mbstring/libmbfl/filters/Makefile.am @@ -2,7 +2,7 @@ EXTRA_DIST=Makefile.bcc32 mk_sb_tbl.awk noinst_LTLIBRARIES=libmbfl_filters.la INCLUDES=-I../mbfl libmbfl_filters_la_LDFLAGS=-version-info $(SHLIB_VERSION) -libmbfl_filters_la_SOURCES=mbfilter_cp936.c mbfilter_hz.c mbfilter_euc_tw.c mbfilter_big5.c mbfilter_euc_jp.c mbfilter_jis.c mbfilter_iso8859_1.c mbfilter_iso8859_2.c mbfilter_cp1252.c mbfilter_cp1251.c mbfilter_ascii.c mbfilter_iso8859_3.c mbfilter_iso8859_4.c mbfilter_iso8859_5.c mbfilter_iso8859_6.c mbfilter_iso8859_7.c mbfilter_iso8859_8.c mbfilter_iso8859_9.c mbfilter_iso8859_10.c mbfilter_iso8859_13.c mbfilter_iso8859_14.c mbfilter_iso8859_15.c mbfilter_iso8859_16.c mbfilter_htmlent.c mbfilter_byte2.c mbfilter_byte4.c mbfilter_uuencode.c mbfilter_base64.c mbfilter_sjis.c mbfilter_7bit.c mbfilter_qprint.c mbfilter_ucs4.c mbfilter_ucs2.c mbfilter_utf32.c mbfilter_utf16.c mbfilter_utf8.c mbfilter_utf7.c mbfilter_utf7imap.c mbfilter_euc_jp_win.c mbfilter_cp932.c mbfilter_euc_cn.c mbfilter_euc_kr.c mbfilter_uhc.c mbfilter_iso2022_kr.c mbfilter_cp866.c mbfilter_koi8r.c mbfilter_armscii8.c html_entities.c cp932_table.h html_entities.h mbfilter_7bit.h mbfilter_ascii.h mbfilter_base64.h mbfilter_big5.h mbfilter_byte2.h mbfilter_byte4.h mbfilter_cp1251.h mbfilter_cp1252.h mbfilter_cp866.h mbfilter_cp932.h mbfilter_cp936.h mbfilter_euc_cn.h mbfilter_euc_jp.h mbfilter_euc_jp_win.h mbfilter_euc_kr.h mbfilter_euc_tw.h mbfilter_htmlent.h mbfilter_hz.h mbfilter_iso2022_kr.h mbfilter_iso8859_1.h mbfilter_iso8859_10.h mbfilter_iso8859_13.h mbfilter_iso8859_14.h mbfilter_iso8859_15.h mbfilter_iso8859_16.h mbfilter_iso8859_2.h mbfilter_iso8859_3.h mbfilter_iso8859_4.h mbfilter_iso8859_5.h mbfilter_iso8859_6.h mbfilter_iso8859_7.h mbfilter_iso8859_8.h mbfilter_iso8859_9.h mbfilter_jis.h mbfilter_koi8r.h mbfilter_armscii8.h mbfilter_qprint.h mbfilter_sjis.h mbfilter_ucs2.h mbfilter_ucs4.h mbfilter_uhc.h mbfilter_utf16.h mbfilter_utf32.h mbfilter_utf7.h mbfilter_utf7imap.h mbfilter_utf8.h mbfilter_uuencode.h unicode_prop.h unicode_table_big5.h unicode_table_cns11643.h unicode_table_cp1251.h unicode_table_cp1252.h unicode_table_cp866.h unicode_table_cp932_ext.h unicode_table_cp936.h unicode_table_iso8859_10.h unicode_table_iso8859_13.h unicode_table_iso8859_14.h unicode_table_iso8859_15.h unicode_table_iso8859_16.h unicode_table_iso8859_2.h unicode_table_iso8859_3.h unicode_table_iso8859_4.h unicode_table_iso8859_5.h unicode_table_iso8859_6.h unicode_table_iso8859_7.h unicode_table_iso8859_8.h unicode_table_iso8859_9.h unicode_table_jis.h unicode_table_koi8r.h unicode_table_armscii8.h unicode_table_uhc.h +libmbfl_filters_la_SOURCES=mbfilter_cp936.c mbfilter_hz.c mbfilter_euc_tw.c mbfilter_big5.c mbfilter_euc_jp.c mbfilter_jis.c mbfilter_iso8859_1.c mbfilter_iso8859_2.c mbfilter_cp1252.c mbfilter_cp1251.c mbfilter_ascii.c mbfilter_iso8859_3.c mbfilter_iso8859_4.c mbfilter_iso8859_5.c mbfilter_iso8859_6.c mbfilter_iso8859_7.c mbfilter_iso8859_8.c mbfilter_iso8859_9.c mbfilter_iso8859_10.c mbfilter_iso8859_13.c mbfilter_iso8859_14.c mbfilter_iso8859_15.c mbfilter_iso8859_16.c mbfilter_htmlent.c mbfilter_byte2.c mbfilter_byte4.c mbfilter_uuencode.c mbfilter_base64.c mbfilter_sjis.c mbfilter_7bit.c mbfilter_qprint.c mbfilter_ucs4.c mbfilter_ucs2.c mbfilter_utf32.c mbfilter_utf16.c mbfilter_utf8.c mbfilter_utf7.c mbfilter_utf7imap.c mbfilter_euc_jp_win.c mbfilter_cp932.c mbfilter_cp51932.c mbfilter_iso2022_jp_ms.c mbfilter_euc_cn.c mbfilter_euc_kr.c mbfilter_uhc.c mbfilter_iso2022_kr.c mbfilter_cp866.c mbfilter_koi8r.c mbfilter_armscii8.c html_entities.c cp932_table.h html_entities.h mbfilter_7bit.h mbfilter_ascii.h mbfilter_base64.h mbfilter_big5.h mbfilter_byte2.h mbfilter_byte4.h mbfilter_cp1251.h mbfilter_cp1252.h mbfilter_cp866.h mbfilter_cp932.h mbfilter_cp51932.h mbfilter_iso2022_jp_ms.h mbfilter_cp936.h mbfilter_euc_cn.h mbfilter_euc_jp.h mbfilter_euc_jp_win.h mbfilter_euc_kr.h mbfilter_euc_tw.h mbfilter_htmlent.h mbfilter_hz.h mbfilter_iso2022_kr.h mbfilter_iso8859_1.h mbfilter_iso8859_10.h mbfilter_iso8859_13.h mbfilter_iso8859_14.h mbfilter_iso8859_15.h mbfilter_iso8859_16.h mbfilter_iso8859_2.h mbfilter_iso8859_3.h mbfilter_iso8859_4.h mbfilter_iso8859_5.h mbfilter_iso8859_6.h mbfilter_iso8859_7.h mbfilter_iso8859_8.h mbfilter_iso8859_9.h mbfilter_jis.h mbfilter_koi8r.h mbfilter_armscii8.h mbfilter_qprint.h mbfilter_sjis.h mbfilter_ucs2.h mbfilter_ucs4.h mbfilter_uhc.h mbfilter_utf16.h mbfilter_utf32.h mbfilter_utf7.h mbfilter_utf7imap.h mbfilter_utf8.h mbfilter_uuencode.h unicode_prop.h unicode_table_big5.h unicode_table_cns11643.h unicode_table_cp1251.h unicode_table_cp1252.h unicode_table_cp866.h unicode_table_cp932_ext.h unicode_table_cp936.h unicode_table_iso8859_10.h unicode_table_iso8859_13.h unicode_table_iso8859_14.h unicode_table_iso8859_15.h unicode_table_iso8859_16.h unicode_table_iso8859_2.h unicode_table_iso8859_3.h unicode_table_iso8859_4.h unicode_table_iso8859_5.h unicode_table_iso8859_6.h unicode_table_iso8859_7.h unicode_table_iso8859_8.h unicode_table_iso8859_9.h unicode_table_jis.h unicode_table_koi8r.h unicode_table_armscii8.h unicode_table_uhc.h mbfilter_iso8859_2.c: unicode_table_iso8859_2.h diff --git a/ext/mbstring/libmbfl/filters/mbfilter_cp51932.c b/ext/mbstring/libmbfl/filters/mbfilter_cp51932.c new file mode 100644 index 000000000..f1505ae9b --- /dev/null +++ b/ext/mbstring/libmbfl/filters/mbfilter_cp51932.c @@ -0,0 +1,360 @@ +/* + * "streamable kanji code filter and converter" + * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. + * + * LICENSE NOTICES + * + * This file is part of "streamable kanji code filter and converter", + * which is distributed under the terms of GNU Lesser General Public + * License (version 2) as published by the Free Software Foundation. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with "streamable kanji code filter and converter"; + * if not, write to the Free Software Foundation, Inc., 59 Temple Place, + * Suite 330, Boston, MA 02111-1307 USA + * + * The author of this file: + * + */ +/* + * The source code included in this files was separated from mbfilter_ja.c + * by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002. + * + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "mbfilter.h" +#include "mbfilter_cp51932.h" + +#include "unicode_table_cp932_ext.h" +#include "unicode_table_jis.h" +#include "cp932_table.h" + +static int mbfl_filt_ident_cp51932(int c, mbfl_identify_filter *filter); + +static const unsigned char mblen_table_eucjp[] = { /* 0xA1-0xFE */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 +}; + + +static const char *mbfl_encoding_cp51932_aliases[] = {"cp51932", NULL}; + +const struct mbfl_identify_vtbl vtbl_identify_cp51932 = { + mbfl_no_encoding_cp51932, + mbfl_filt_ident_common_ctor, + mbfl_filt_ident_common_dtor, + mbfl_filt_ident_cp51932 +}; + +const mbfl_encoding mbfl_encoding_cp51932 = { + mbfl_no_encoding_cp51932, + "CP51932", + "CP51932", + (const char *(*)[])&mbfl_encoding_cp51932_aliases, + mblen_table_eucjp, + MBFL_ENCTYPE_MBCS +}; + +const struct mbfl_convert_vtbl vtbl_cp51932_wchar = { + mbfl_no_encoding_cp51932, + mbfl_no_encoding_wchar, + mbfl_filt_conv_common_ctor, + mbfl_filt_conv_common_dtor, + mbfl_filt_conv_cp51932_wchar, + mbfl_filt_conv_common_flush +}; + +const struct mbfl_convert_vtbl vtbl_wchar_cp51932 = { + mbfl_no_encoding_wchar, + mbfl_no_encoding_cp51932, + mbfl_filt_conv_common_ctor, + mbfl_filt_conv_common_dtor, + mbfl_filt_conv_wchar_cp51932, + mbfl_filt_conv_common_flush +}; + +#define CK(statement) do { if ((statement) < 0) return (-1); } while (0) + +#define sjistoidx(c1, c2) \ + (((c1) > 0x9f) \ + ? (((c1) - 0xc1) * 188 + (c2) - (((c2) > 0x7e) ? 0x41 : 0x40)) \ + : (((c1) - 0x81) * 188 + (c2) - (((c2) > 0x7e) ? 0x41 : 0x40))) +#define idxtoeuc1(c) (((c) / 94) + 0xa1) +#define idxtoeuc2(c) (((c) % 94) + 0xa1) + +/* + * cp51932 => wchar + */ +int +mbfl_filt_conv_cp51932_wchar(int c, mbfl_convert_filter *filter) +{ + int c1, s, w; + + switch (filter->status) { + case 0: + if (c >= 0 && c < 0x80) { /* latin */ + CK((*filter->output_function)(c, filter->data)); + } else if (c > 0xa0 && c < 0xff) { /* CP932 first char */ + filter->status = 1; + filter->cache = c; + } else if (c == 0x8e) { /* kana first char */ + filter->status = 2; + } else { + w = c & MBFL_WCSGROUP_MASK; + w |= MBFL_WCSGROUP_THROUGH; + CK((*filter->output_function)(w, filter->data)); + } + break; + + case 1: /* got first half */ + filter->status = 0; + c1 = filter->cache; + if (c > 0xa0 && c < 0xff) { + w = 0; + s = (c1 - 0xa1)*94 + c - 0xa1; + if (s <= 137) { + if (s == 31) { + w = 0xff3c; /* FULLWIDTH REVERSE SOLIDUS */ + } else if (s == 32) { + w = 0xff5e; /* FULLWIDTH TILDE */ + } else if (s == 33) { + w = 0x2225; /* PARALLEL TO */ + } else if (s == 60) { + w = 0xff0d; /* FULLWIDTH HYPHEN-MINUS */ + } else if (s == 80) { + w = 0xffe0; /* FULLWIDTH CENT SIGN */ + } else if (s == 81) { + w = 0xffe1; /* FULLWIDTH POUND SIGN */ + } else if (s == 137) { + w = 0xffe2; /* FULLWIDTH NOT SIGN */ + } + } + if (w == 0) { + if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) { /* vendor ext1 (13ku) */ + w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min]; + } else if (s >= 0 && s < jisx0208_ucs_table_size) { /* X 0208 */ + w = jisx0208_ucs_table[s]; + } else if (s >= cp932ext2_ucs_table_min && s < cp932ext2_ucs_table_max) { /* vendor ext2 (89ku - 92ku) */ + w = cp932ext2_ucs_table[s - cp932ext2_ucs_table_min]; + } + } + if (w <= 0) { + w = ((c1 & 0x7f) << 8) | (c & 0x7f); + w &= MBFL_WCSPLANE_MASK; + w |= MBFL_WCSPLANE_WINCP932; + } + CK((*filter->output_function)(w, filter->data)); + } else if ((c >= 0 && c < 0x21) || c == 0x7f) { /* CTLs */ + CK((*filter->output_function)(c, filter->data)); + } else { + w = (c1 << 8) | c; + w &= MBFL_WCSGROUP_MASK; + w |= MBFL_WCSGROUP_THROUGH; + CK((*filter->output_function)(w, filter->data)); + } + break; + + case 2: /* got 0x8e, X0201 kana */ + filter->status = 0; + if (c > 0xa0 && c < 0xe0) { + w = 0xfec0 + c; + CK((*filter->output_function)(w, filter->data)); + } else if ((c >= 0 && c < 0x21) || c == 0x7f) { /* CTLs */ + CK((*filter->output_function)(c, filter->data)); + } else { + w = 0x8e00 | c; + w &= MBFL_WCSGROUP_MASK; + w |= MBFL_WCSGROUP_THROUGH; + CK((*filter->output_function)(w, filter->data)); + } + break; + + default: + filter->status = 0; + break; + } + + return c; +} + +int +cp932ext3_to_cp51932(int c) +{ + int idx; + + idx = sjistoidx(c >> 8, c & 0xff); + if (idx >= sjistoidx(0xfa, 0x5c)) + idx -= sjistoidx(0xfa, 0x5c) - sjistoidx(0xed, 0x40); + else if (idx >= sjistoidx(0xfa, 0x55)) + idx -= sjistoidx(0xfa, 0x55) - sjistoidx(0xee, 0xfa); + else if (idx >= sjistoidx(0xfa, 0x40)) + idx -= sjistoidx(0xfa, 0x40) - sjistoidx(0xee, 0xef); + return idxtoeuc1(idx) << 8 | idxtoeuc2(idx); +} + +/* + * wchar => cp51932 + */ +int +mbfl_filt_conv_wchar_cp51932(int c, mbfl_convert_filter *filter) +{ + int c1, c2, s1; + + s1 = 0; + if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) { + s1 = ucs_a1_jis_table[c - ucs_a1_jis_table_min]; + } else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) { + s1 = ucs_a2_jis_table[c - ucs_a2_jis_table_min]; + } else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) { + s1 = ucs_i_jis_table[c - ucs_i_jis_table_min]; + } else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) { + s1 = ucs_r_jis_table[c - ucs_r_jis_table_min]; + } + if (s1 >= 0x8080) s1 = -1; /* we don't support JIS X0213 */ + if (s1 <= 0) { + c1 = c & ~MBFL_WCSPLANE_MASK; + if (c1 == MBFL_WCSPLANE_WINCP932) { + s1 = c & MBFL_WCSPLANE_MASK; + if (s1 >= ((85 + 0x20) << 8)) { /* 85ku - 120ku */ + s1 = -1; + } + } else if (c1 == MBFL_WCSPLANE_JIS0208) { + s1 = c & MBFL_WCSPLANE_MASK; + if ((s1 >= ((85 + 0x20) << 8) && /* 85ku - 94ku */ + s1 <= ((88 + 0x20) << 8)) ||/* IBM extension */ + (s1 >= ((93 + 0x20) << 8) && /* 89ku - 92ku */ + s1 <= ((94 + 0x20) << 8))) { + s1 = -1; + } + } else if (c == 0xa5) { /* YEN SIGN */ + s1 = 0x216f; /* FULLWIDTH YEN SIGN */ + } else if (c == 0x203e) { /* OVER LINE */ + s1 = 0x2131; /* FULLWIDTH MACRON */ + } else if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */ + s1 = 0x2140; + } else if (c == 0xff5e) { /* FULLWIDTH TILDE */ + s1 = 0x2141; + } else if (c == 0x2225) { /* PARALLEL TO */ + s1 = 0x2142; + } else if (c == 0xff0d) { /* FULLWIDTH HYPHEN-MINUS */ + s1 = 0x215d; + } else if (c == 0xffe0) { /* FULLWIDTH CENT SIGN */ + s1 = 0x2171; + } else if (c == 0xffe1) { /* FULLWIDTH POUND SIGN */ + s1 = 0x2172; + } else if (c == 0xffe2) { /* FULLWIDTH NOT SIGN */ + s1 = 0x224c; + } else { + s1 = -1; + c1 = 0; + c2 = cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; + while (c1 < c2) { /* CP932 vendor ext1 (13ku) */ + if (c == cp932ext1_ucs_table[c1]) { + s1 = ((c1/94 + 0x2d) << 8) + (c1%94 + 0x21); + break; + } + c1++; + } + if (s1 < 0) { + c1 = 0; + c2 = cp932ext2_ucs_table_max - cp932ext2_ucs_table_min; + while (c1 < c2) { /* CP932 vendor ext3 (115ku - 119ku) */ + if (c == cp932ext2_ucs_table[c1]) { + s1 = ((c1/94 + 0x79) << 8) +(c1%94 + 0x21); + break; + } + c1++; + } + } + } + if (c == 0) { + s1 = 0; + } else if (s1 <= 0) { + s1 = -1; + } + } + + if (s1 >= 0) { + if (s1 < 0x80) { /* latin */ + CK((*filter->output_function)(s1, filter->data)); + } else if (s1 < 0x100) { /* kana */ + CK((*filter->output_function)(0x8e, filter->data)); + CK((*filter->output_function)(s1, filter->data)); + } else if (s1 < 0x8080) { /* X 0208 */ + CK((*filter->output_function)(((s1 >> 8) & 0xff) | 0x80, filter->data)); + CK((*filter->output_function)((s1 & 0xff) | 0x80, filter->data)); + } else { + if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) { + CK(mbfl_filt_conv_illegal_output(c, filter)); + } + } + } else { + if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) { + CK(mbfl_filt_conv_illegal_output(c, filter)); + } + } + + return c; +} + +static int mbfl_filt_ident_cp51932(int c, mbfl_identify_filter *filter) +{ + switch (filter->status) { + case 0: /* latin */ + if (c >= 0 && c < 0x80) { /* ok */ + ; + } else if (c > 0xa0 && c < 0xff) { /* kanji first char */ + filter->status = 1; + } else if (c == 0x8e) { /* kana first char */ + filter->status = 2; + } else { /* bad */ + filter->flag = 1; + } + break; + + case 1: /* got first half */ + if (c < 0xa1 || c > 0xfe) { /* bad */ + filter->flag = 1; + } + filter->status = 0; + break; + + case 2: /* got 0x8e */ + if (c < 0xa1 || c > 0xdf) { /* bad */ + filter->flag = 1; + } + filter->status = 0; + break; + + default: + filter->status = 0; + break; + } + + return c; +} + + diff --git a/ext/mbstring/libmbfl/filters/mbfilter_cp51932.h b/ext/mbstring/libmbfl/filters/mbfilter_cp51932.h new file mode 100644 index 000000000..be86b8226 --- /dev/null +++ b/ext/mbstring/libmbfl/filters/mbfilter_cp51932.h @@ -0,0 +1,43 @@ +/* + * "streamable kanji code filter and converter" + * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. + * + * LICENSE NOTICES + * + * This file is part of "streamable kanji code filter and converter", + * which is distributed under the terms of GNU Lesser General Public + * License (version 2) as published by the Free Software Foundation. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with "streamable kanji code filter and converter"; + * if not, write to the Free Software Foundation, Inc., 59 Temple Place, + * Suite 330, Boston, MA 02111-1307 USA + * + * The author of this file: + * + */ +/* + * The source code included in this files was separated from mbfilter_ja.h + * by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002. + * + */ + +#ifndef MBFL_MBFILTER_CP51932_H +#define MBFL_MBFILTER_CP51932_H + +#include "mbfilter.h" + +extern const mbfl_encoding mbfl_encoding_cp51932; +extern const struct mbfl_identify_vtbl vtbl_identify_cp51932; +extern const struct mbfl_convert_vtbl vtbl_cp51932_wchar; +extern const struct mbfl_convert_vtbl vtbl_wchar_cp51932; + +int mbfl_filt_conv_cp51932_wchar(int c, mbfl_convert_filter *filter); +int mbfl_filt_conv_wchar_cp51932(int c, mbfl_convert_filter *filter); + +#endif /* MBFL_MBFILTER_CP51932_H */ diff --git a/ext/mbstring/libmbfl/filters/mbfilter_euc_jp_win.c b/ext/mbstring/libmbfl/filters/mbfilter_euc_jp_win.c index e57ae6935..ece0c7ee4 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_euc_jp_win.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_euc_jp_win.c @@ -60,7 +60,8 @@ static const unsigned char mblen_table_eucjp[] = { /* 0xA1-0xFE */ }; -static const char *mbfl_encoding_eucjp_win_aliases[] = {"eucJP-open", NULL}; +static const char *mbfl_encoding_eucjp_win_aliases[] = {"eucJP-open", + "eucJP-ms", NULL}; const struct mbfl_identify_vtbl vtbl_identify_eucjpwin = { mbfl_no_encoding_eucjp_win, @@ -203,6 +204,9 @@ mbfl_filt_conv_eucjpwin_wchar(int c, mbfl_convert_filter *filter) s = (c1 - 0xa1)*94 + c - 0xa1; if (s >= 0 && s < jisx0212_ucs_table_size) { w = jisx0212_ucs_table[s]; + if (w == 0x007e) { + w = 0xff5e; /* FULLWIDTH TILDE */ + } } else if (s >= (82*94) && s < (84*94)) { /* vender ext3 (83ku - 84ku) <-> CP932 (115ku -120ku) */ s = (c1<< 8) | c; w = 0; @@ -221,6 +225,9 @@ mbfl_filt_conv_eucjpwin_wchar(int c, mbfl_convert_filter *filter) } else { w = 0; } + if (w == 0x00A6) { + w = 0xFFE4; /* FULLWIDTH BROKEN BAR */ + } if (w <= 0) { w = ((c1 & 0x7f) << 8) | (c & 0x7f); w &= MBFL_WCSPLANE_MASK; @@ -273,6 +280,9 @@ mbfl_filt_conv_wchar_eucjpwin(int c, mbfl_convert_filter *filter) c2 = s1%94 + 0xa1; s1 = (c1 << 8) | c2; } + if (s1 == 0xa2f1) { + s1 = 0x2d62; /* NUMERO SIGN */ + } if (s1 <= 0) { c1 = c & ~MBFL_WCSPLANE_MASK; if (c1 == MBFL_WCSPLANE_WINCP932) { @@ -310,6 +320,8 @@ mbfl_filt_conv_wchar_eucjpwin(int c, mbfl_convert_filter *filter) s1 = 0x2172; } else if (c == 0xffe2) { /* FULLWIDTH NOT SIGN */ s1 = 0x224c; + } else if (c == 0xff5e) { /* FULLWIDTH TILDE */ + s1 = 0x2141; } else { s1 = -1; c1 = 0; diff --git a/ext/mbstring/libmbfl/filters/mbfilter_iso2022_jp_ms.c b/ext/mbstring/libmbfl/filters/mbfilter_iso2022_jp_ms.c new file mode 100644 index 000000000..df961677d --- /dev/null +++ b/ext/mbstring/libmbfl/filters/mbfilter_iso2022_jp_ms.c @@ -0,0 +1,522 @@ +/* + * "streamable kanji code filter and converter" + * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. + * + * LICENSE NOTICES + * + * This file is part of "streamable kanji code filter and converter", + * which is distributed under the terms of GNU Lesser General Public + * License (version 2) as published by the Free Software Foundation. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with "streamable kanji code filter and converter"; + * if not, write to the Free Software Foundation, Inc., 59 Temple Place, + * Suite 330, Boston, MA 02111-1307 USA + * + * The author of this file: + * + */ +/* + * The source code included in this files was separated from mbfilter_ja.c + * by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002. + * + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "mbfilter.h" +#include "mbfilter_iso2022_jp_ms.h" + +#include "unicode_table_cp932_ext.h" +#include "unicode_table_jis.h" +#include "cp932_table.h" + +static int mbfl_filt_ident_2022jpms(int c, mbfl_identify_filter *filter); + +static const char *mbfl_encoding_2022jpms_aliases[] = {"ISO2022JPMS", NULL}; + +const mbfl_encoding mbfl_encoding_2022jpms = { + mbfl_no_encoding_2022jpms, + "ISO-2022-JP-MS", + "ISO-2022-JP", + (const char *(*)[])&mbfl_encoding_2022jpms_aliases, + NULL, + MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE +}; + +const struct mbfl_identify_vtbl vtbl_identify_2022jpms = { + mbfl_no_encoding_2022jpms, + mbfl_filt_ident_common_ctor, + mbfl_filt_ident_common_dtor, + mbfl_filt_ident_2022jpms +}; + +const struct mbfl_convert_vtbl vtbl_2022jpms_wchar = { + mbfl_no_encoding_2022jpms, + mbfl_no_encoding_wchar, + mbfl_filt_conv_common_ctor, + mbfl_filt_conv_common_dtor, + mbfl_filt_conv_2022jpms_wchar, + mbfl_filt_conv_common_flush +}; + +const struct mbfl_convert_vtbl vtbl_wchar_2022jpms = { + mbfl_no_encoding_wchar, + mbfl_no_encoding_2022jpms, + mbfl_filt_conv_common_ctor, + mbfl_filt_conv_common_dtor, + mbfl_filt_conv_wchar_2022jpms, + mbfl_filt_conv_any_2022jpms_flush +}; + +#define CK(statement) do { if ((statement) < 0) return (-1); } while (0) + +#define sjistoidx(c1, c2) \ + (((c1) > 0x9f) \ + ? (((c1) - 0xc1) * 188 + (c2) - (((c2) > 0x7e) ? 0x41 : 0x40)) \ + : (((c1) - 0x81) * 188 + (c2) - (((c2) > 0x7e) ? 0x41 : 0x40))) +#define idxtojis1(c) (((c) / 94) + 0x21) +#define idxtojis2(c) (((c) % 94) + 0x21) + +/* + * ISO-2022-JP-MS => wchar + */ +int +mbfl_filt_conv_2022jpms_wchar(int c, mbfl_convert_filter *filter) +{ + int c1, s, w; + +retry: + switch (filter->status & 0xf) { +/* case 0x00: ASCII */ +/* case 0x10: X 0201 latin */ +/* case 0x20: X 0201 kana */ +/* case 0x80: X 0208 */ +/* case 0xa0: UDC */ + case 0: + if (c == 0x1b) { + filter->status += 2; + } else if (filter->status == 0x20 && c > 0x20 && c < 0x60) { /* kana */ + CK((*filter->output_function)(0xff40 + c, filter->data)); + } else if ((filter->status == 0x80 || filter->status == 0xa0) && c > 0x20 && c < 0x80) { /* kanji first char */ + filter->cache = c; + filter->status += 1; + } else if (c >= 0 && c < 0x80) { /* latin, CTLs */ + CK((*filter->output_function)(c, filter->data)); + } else if (c > 0xa0 && c < 0xe0) { /* GR kana */ + CK((*filter->output_function)(0xfec0 + c, filter->data)); + } else { + w = c & MBFL_WCSGROUP_MASK; + w |= MBFL_WCSGROUP_THROUGH; + CK((*filter->output_function)(w, filter->data)); + } + break; + +/* case 0x81: X 0208 second char */ +/* case 0xa1: UDC second char */ + case 1: + w = 0; + filter->status &= ~0xf; + c1 = filter->cache; + if (c > 0x20 && c < 0x7f) { + s = (c1 - 0x21)*94 + c - 0x21; + if (filter->status == 0x80) { + if (s <= 137) { + if (s == 31) { + w = 0xff3c; /* FULLWIDTH REVERSE SOLIDUS */ + } else if (s == 32) { + w = 0xff5e; /* FULLWIDTH TILDE */ + } else if (s == 33) { + w = 0x2225; /* PARALLEL TO */ + } else if (s == 60) { + w = 0xff0d; /* FULLWIDTH HYPHEN-MINUS */ + } else if (s == 80) { + w = 0xffe0; /* FULLWIDTH CENT SIGN */ + } else if (s == 81) { + w = 0xffe1; /* FULLWIDTH POUND SIGN */ + } else if (s == 137) { + w = 0xffe2; /* FULLWIDTH NOT SIGN */ + } + } + if (w == 0) { + if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) { /* vendor ext1 (13ku) */ + w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min]; + } else if (s >= 0 && s < jisx0208_ucs_table_size) { + w = jisx0208_ucs_table[s]; + } else if (s >= cp932ext2_ucs_table_min && s < cp932ext2_ucs_table_max) { /* vendor ext2 (89ku - 92ku) */ + w = cp932ext2_ucs_table[s - cp932ext2_ucs_table_min]; + } else { + w = 0; + } + } + if (w <= 0) { + w = (c1 << 8) | c; + w &= MBFL_WCSPLANE_MASK; + w |= MBFL_WCSPLANE_JIS0208; + } + CK((*filter->output_function)(w, filter->data)); + } else { + if (c1 > 0x20 && c1 < 0x35) { + w = 0xe000 + (c1 - 0x21)*94 + c - 0x21; + } + if (w <= 0) { + w = (((c1 - 0x21) + 0x7f) << 8) | c; + w &= MBFL_WCSPLANE_MASK; + w |= MBFL_WCSPLANE_JIS0208; + } + CK((*filter->output_function)(w, filter->data)); + } + } else if (c == 0x1b) { + filter->status += 2; + } else if ((c >= 0 && c < 0x21) || c == 0x7f) { /* CTLs */ + CK((*filter->output_function)(c, filter->data)); + } else { + w = (c1 << 8) | c; + w &= MBFL_WCSGROUP_MASK; + w |= MBFL_WCSGROUP_THROUGH; + CK((*filter->output_function)(w, filter->data)); + } + break; + + /* ESC */ +/* case 0x02: */ +/* case 0x12: */ +/* case 0x22: */ +/* case 0x82: */ +/* case 0xa2: */ + case 2: + if (c == 0x24) { /* '$' */ + filter->status++; + } else if (c == 0x28) { /* '(' */ + filter->status += 3; + } else { + filter->status &= ~0xf; + CK((*filter->output_function)(0x1b, filter->data)); + goto retry; + } + break; + + /* ESC $ */ +/* case 0x03: */ +/* case 0x13: */ +/* case 0x23: */ +/* case 0x83: */ +/* case 0xa3: */ + case 3: + if (c == 0x40 || c == 0x42) { /* '@' or 'B' */ + filter->status = 0x80; + } else if (c == 0x28) { /* '(' */ + filter->status++; + } else { + filter->status &= ~0xf; + CK((*filter->output_function)(0x1b, filter->data)); + CK((*filter->output_function)(0x24, filter->data)); + goto retry; + } + break; + + /* ESC $ ( */ +/* case 0x04: */ +/* case 0x14: */ +/* case 0x24: */ +/* case 0x84: */ +/* case 0xa4: */ + case 4: + if (c == 0x40 || c == 0x42) { /* '@' or 'B' */ + filter->status = 0x80; + } else if (c == 0x3f) { /* '?' */ + filter->status = 0xa0; + } else { + filter->status &= ~0xf; + CK((*filter->output_function)(0x1b, filter->data)); + CK((*filter->output_function)(0x24, filter->data)); + CK((*filter->output_function)(0x28, filter->data)); + goto retry; + } + break; + + /* ESC ( */ +/* case 0x05: */ +/* case 0x15: */ +/* case 0x25: */ +/* case 0x85: */ +/* case 0xa5: */ + case 5: + if (c == 0x42) { /* 'B' */ + filter->status = 0; + } else if (c == 0x4a) { /* 'J' */ + filter->status = 0; + } else if (c == 0x49) { /* 'I' */ + filter->status = 0x20; + } else { + filter->status &= ~0xf; + CK((*filter->output_function)(0x1b, filter->data)); + CK((*filter->output_function)(0x28, filter->data)); + goto retry; + } + break; + + default: + filter->status = 0; + break; + } + + return c; +} + +static int +cp932ext3_cp932ext2_jis(int c) +{ + int idx; + + idx = sjistoidx(0xfa, 0x40) + c; + if (idx >= sjistoidx(0xfa, 0x5c)) + idx -= sjistoidx(0xfa, 0x5c) - sjistoidx(0xed, 0x40); + else if (idx >= sjistoidx(0xfa, 0x55)) + idx -= sjistoidx(0xfa, 0x55) - sjistoidx(0xee, 0xfa); + else if (idx >= sjistoidx(0xfa, 0x40)) + idx -= sjistoidx(0xfa, 0x40) - sjistoidx(0xee, 0xef); + return idxtojis1(idx) << 8 | idxtojis2(idx); +} + +/* + * wchar => ISO-2022-JP-MS + */ +int +mbfl_filt_conv_wchar_2022jpms(int c, mbfl_convert_filter *filter) +{ + int c1, c2, s1, s2; + + s1 = 0; + s2 = 0; + if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) { + s1 = ucs_a1_jis_table[c - ucs_a1_jis_table_min]; + } else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) { + s1 = ucs_a2_jis_table[c - ucs_a2_jis_table_min]; + } else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) { + s1 = ucs_i_jis_table[c - ucs_i_jis_table_min]; + } else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) { + s1 = ucs_r_jis_table[c - ucs_r_jis_table_min]; + } else if (c >= 0xe000 && c < (0xe000 + 20*94)) { /* user (95ku - 114ku) */ + s1 = c - 0xe000; + c1 = s1/94 + 0x7f; + c2 = s1%94 + 0x21; + s1 = (c1 << 8) | c2; + } + if (s1 <= 0) { + c1 = c & ~MBFL_WCSPLANE_MASK; + if (c1 == MBFL_WCSPLANE_WINCP932) { + s1 = c & MBFL_WCSPLANE_MASK; + s2 = 1; + } else if (c1 == MBFL_WCSPLANE_JIS0208) { + s1 = c & MBFL_WCSPLANE_MASK; + } else if (c1 == MBFL_WCSPLANE_JIS0212) { + s1 = c & MBFL_WCSPLANE_MASK; + s1 |= 0x8080; + } else if (c == 0xa5) { /* YEN SIGN */ + s1 = 0x216f; /* FULLWIDTH YEN SIGN */ + } else if (c == 0x203e) { /* OVER LINE */ + s1 = 0x2131; /* FULLWIDTH MACRON */ + } else if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */ + s1 = 0x2140; + } else if (c == 0xff5e) { /* FULLWIDTH TILDE */ + s1 = 0x2141; + } else if (c == 0x2225) { /* PARALLEL TO */ + s1 = 0x2142; + } else if (c == 0xff0d) { /* FULLWIDTH HYPHEN-MINUS */ + s1 = 0x215d; + } else if (c == 0xffe0) { /* FULLWIDTH CENT SIGN */ + s1 = 0x2171; + } else if (c == 0xffe1) { /* FULLWIDTH POUND SIGN */ + s1 = 0x2172; + } else if (c == 0xffe2) { /* FULLWIDTH NOT SIGN */ + s1 = 0x224c; + } + } + if ((s1 <= 0) || (s1 >= 0xa1a1 && s2 == 0)) { /* not found or X 0212 */ + s1 = -1; + c1 = 0; + c2 = cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; + while (c1 < c2) { /* CP932 vendor ext1 (13ku) */ + if (c == cp932ext1_ucs_table[c1]) { + s1 = ((c1/94 + 0x2d) << 8) + (c1%94 + 0x21); + break; + } + c1++; + } + if (s1 <= 0) { + c1 = 0; + c2 = cp932ext3_ucs_table_max - cp932ext3_ucs_table_min; + while (c1 < c2) { /* CP932 vendor ext3 (115ku - 119ku) */ + if (c == cp932ext3_ucs_table[c1]) { + s1 = cp932ext3_cp932ext2_jis(c1); + break; + } + c1++; + } + } + if (c == 0) { + s1 = 0; + } else if (s1 <= 0) { + s1 = -1; + } + } + if (s1 >= 0) { + if (s1 < 0x80) { /* latin */ + if ((filter->status & 0xff00) != 0) { + CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ + CK((*filter->output_function)(0x28, filter->data)); /* '(' */ + CK((*filter->output_function)(0x42, filter->data)); /* 'B' */ + } + CK((*filter->output_function)(s1, filter->data)); + filter->status = 0; + } else if (s1 > 0xa0 && s1 < 0xe0) { /* kana */ + if ((filter->status & 0xff00) != 0x100) { + CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ + CK((*filter->output_function)(0x28, filter->data)); /* '(' */ + CK((*filter->output_function)(0x49, filter->data)); /* 'I' */ + } + filter->status = 0x100; + CK((*filter->output_function)(s1 & 0x7f, filter->data)); + } else if (s1 < 0x7e7f) { /* X 0208 */ + if ((filter->status & 0xff00) != 0x200) { + CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ + CK((*filter->output_function)(0x24, filter->data)); /* '$' */ + CK((*filter->output_function)(0x42, filter->data)); /* 'B' */ + } + filter->status = 0x200; + CK((*filter->output_function)((s1 >> 8) & 0xff, filter->data)); + CK((*filter->output_function)(s1 & 0x7f, filter->data)); + } else if (s1 < 0x927f) { /* UDC */ + if ((filter->status & 0xff00) != 0x800) { + CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ + CK((*filter->output_function)(0x24, filter->data)); /* '$' */ + CK((*filter->output_function)(0x28, filter->data)); /* '(' */ + CK((*filter->output_function)(0x3f, filter->data)); /* '?' */ + } + filter->status = 0x800; + CK((*filter->output_function)(((s1 >> 8) - 0x5e) & 0x7f, filter->data)); + CK((*filter->output_function)(s1 & 0x7f, filter->data)); + } + } else { + if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) { + CK(mbfl_filt_conv_illegal_output(c, filter)); + } + } + + return c; +} + +int +mbfl_filt_conv_any_2022jpms_flush(mbfl_convert_filter *filter) +{ + /* back to latin */ + if ((filter->status & 0xff00) != 0) { + CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ + CK((*filter->output_function)(0x28, filter->data)); /* '(' */ + CK((*filter->output_function)(0x42, filter->data)); /* 'B' */ + } + filter->status &= 0xff; + return 0; +} + +static int mbfl_filt_ident_2022jpms(int c, mbfl_identify_filter *filter) +{ +retry: + switch (filter->status & 0xf) { +/* case 0x00: ASCII */ +/* case 0x10: X 0201 latin */ +/* case 0x20: X 0201 kana */ +/* case 0x80: X 0208 */ +/* case 0xa0: X UDC */ + case 0: + if (c == 0x1b) { + filter->status += 2; + } else if ((filter->status == 0x80 || filter->status == 0xa0) && c > 0x20 && c < 0x80) { /* kanji first char */ + filter->status += 1; + } else if (c >= 0 && c < 0x80) { /* latin, CTLs */ + ; + } else { + filter->flag = 1; /* bad */ + } + break; + +/* case 0x81: X 0208 second char */ +/* case 0xa1: UDC second char */ + case 1: + filter->status &= ~0xf; + if (c == 0x1b) { + goto retry; + } else if (c < 0x21 || c > 0x7e) { /* bad */ + filter->flag = 1; + } + break; + + /* ESC */ + case 2: + if (c == 0x24) { /* '$' */ + filter->status++; + } else if (c == 0x28) { /* '(' */ + filter->status += 3; + } else { + filter->flag = 1; /* bad */ + filter->status &= ~0xf; + goto retry; + } + break; + + /* ESC $ */ + case 3: + if (c == 0x40 || c == 0x42) { /* '@' or 'B' */ + filter->status = 0x80; + } else if (c == 0x28) { /* '(' */ + filter->status++; + } else { + filter->flag = 1; /* bad */ + filter->status &= ~0xf; + goto retry; + } + break; + + /* ESC $ ( */ + case 4: + if (c == 0x40 || c == 0x42) { /* '@' or 'B' */ + filter->status = 0x80; + } else if (c == 0x3f) { /* '?' */ + filter->status = 0xa0; + } else { + filter->flag = 1; /* bad */ + filter->status &= ~0xf; + goto retry; + } + break; + + /* ESC ( */ + case 5: + if (c == 0x42) { /* 'B' */ + filter->status = 0; + } else if (c == 0x4a) { /* 'J' */ + filter->status = 0; + } else if (c == 0x49) { /* 'I' */ + filter->status = 0x20; + } else { + filter->flag = 1; /* bad */ + filter->status &= ~0xf; + goto retry; + } + break; + + default: + filter->status = 0; + break; + } + + return c; +} diff --git a/ext/mbstring/libmbfl/filters/mbfilter_iso2022_jp_ms.h b/ext/mbstring/libmbfl/filters/mbfilter_iso2022_jp_ms.h new file mode 100644 index 000000000..8479a4509 --- /dev/null +++ b/ext/mbstring/libmbfl/filters/mbfilter_iso2022_jp_ms.h @@ -0,0 +1,44 @@ +/* + * "streamable kanji code filter and converter" + * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. + * + * LICENSE NOTICES + * + * This file is part of "streamable kanji code filter and converter", + * which is distributed under the terms of GNU Lesser General Public + * License (version 2) as published by the Free Software Foundation. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with "streamable kanji code filter and converter"; + * if not, write to the Free Software Foundation, Inc., 59 Temple Place, + * Suite 330, Boston, MA 02111-1307 USA + * + * The author of this file: + * + */ +/* + * The source code included in this files was separated from mbfilter_ja.c + * by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002. + * + */ + +#ifndef MBFL_MBFILTER_ISO2022_JP_MS_H +#define MBFL_MBFILTER_ISO2022_JP_MS_H + +#include "mbfilter.h" + +extern const mbfl_encoding mbfl_encoding_2022jpms; +extern const struct mbfl_identify_vtbl vtbl_identify_2022jpms; +extern const struct mbfl_convert_vtbl vtbl_2022jpms_wchar; +extern const struct mbfl_convert_vtbl vtbl_wchar_2022jpms; + +int mbfl_filt_conv_2022jpms_wchar(int c, mbfl_convert_filter *filter); +int mbfl_filt_conv_wchar_2022jpms(int c, mbfl_convert_filter *filter); +int mbfl_filt_conv_any_2022jpms_flush(mbfl_convert_filter *filter); + +#endif /* MBFL_MBFILTER_ISO2022_JP_MS_H */ diff --git a/ext/mbstring/libmbfl/mbfl/mbfl_convert.c b/ext/mbstring/libmbfl/mbfl/mbfl_convert.c index 78a995cd2..5cac3bdda 100644 --- a/ext/mbstring/libmbfl/mbfl/mbfl_convert.c +++ b/ext/mbstring/libmbfl/mbfl/mbfl_convert.c @@ -51,7 +51,9 @@ #include "filters/mbfilter_euc_kr.h" #include "filters/mbfilter_iso2022_kr.h" #include "filters/mbfilter_sjis.h" +#include "filters/mbfilter_cp51932.h" #include "filters/mbfilter_jis.h" +#include "filters/mbfilter_iso2022_jp_ms.h" #include "filters/mbfilter_euc_jp.h" #include "filters/mbfilter_euc_jp_win.h" #include "filters/mbfilter_ascii.h" @@ -104,10 +106,14 @@ const struct mbfl_convert_vtbl *mbfl_convert_filter_list[] = { &vtbl_wchar_eucjp, &vtbl_sjis_wchar, &vtbl_wchar_sjis, + &vtbl_cp51932_wchar, + &vtbl_wchar_cp51932, &vtbl_jis_wchar, &vtbl_wchar_jis, &vtbl_2022jp_wchar, &vtbl_wchar_2022jp, + &vtbl_2022jpms_wchar, + &vtbl_wchar_2022jpms, &vtbl_eucjpwin_wchar, &vtbl_wchar_eucjpwin, &vtbl_sjiswin_wchar, diff --git a/ext/mbstring/libmbfl/mbfl/mbfl_encoding.c b/ext/mbstring/libmbfl/mbfl/mbfl_encoding.c index 64d8cb4e4..6818f6a8d 100644 --- a/ext/mbstring/libmbfl/mbfl/mbfl_encoding.c +++ b/ext/mbstring/libmbfl/mbfl/mbfl_encoding.c @@ -57,7 +57,9 @@ #include "filters/mbfilter_euc_kr.h" #include "filters/mbfilter_iso2022_kr.h" #include "filters/mbfilter_sjis.h" +#include "filters/mbfilter_cp51932.h" #include "filters/mbfilter_jis.h" +#include "filters/mbfilter_iso2022_jp_ms.h" #include "filters/mbfilter_euc_jp.h" #include "filters/mbfilter_euc_jp_win.h" #include "filters/mbfilter_ascii.h" @@ -149,8 +151,10 @@ static const mbfl_encoding *mbfl_encoding_ptr_list[] = { &mbfl_encoding_sjis, &mbfl_encoding_eucjp_win, &mbfl_encoding_sjis_win, + &mbfl_encoding_cp51932, &mbfl_encoding_jis, &mbfl_encoding_2022jp, + &mbfl_encoding_2022jpms, &mbfl_encoding_cp1252, &mbfl_encoding_8859_1, &mbfl_encoding_8859_2, diff --git a/ext/mbstring/libmbfl/mbfl/mbfl_encoding.h b/ext/mbstring/libmbfl/mbfl/mbfl_encoding.h index d38b1d7d4..af93b6ea5 100644 --- a/ext/mbstring/libmbfl/mbfl/mbfl_encoding.h +++ b/ext/mbstring/libmbfl/mbfl/mbfl_encoding.h @@ -70,8 +70,10 @@ enum mbfl_no_encoding { mbfl_no_encoding_eucjp_win, mbfl_no_encoding_sjis_win, mbfl_no_encoding_sjis_mac, + mbfl_no_encoding_cp51932, mbfl_no_encoding_jis, mbfl_no_encoding_2022jp, + mbfl_no_encoding_2022jpms, mbfl_no_encoding_cp1252, mbfl_no_encoding_8859_1, mbfl_no_encoding_8859_2, diff --git a/ext/mbstring/libmbfl/mbfl/mbfl_ident.c b/ext/mbstring/libmbfl/mbfl/mbfl_ident.c index 72ccef68d..7162e29b4 100644 --- a/ext/mbstring/libmbfl/mbfl/mbfl_ident.c +++ b/ext/mbstring/libmbfl/mbfl/mbfl_ident.c @@ -51,6 +51,7 @@ #include "filters/mbfilter_iso2022_kr.h" #include "filters/mbfilter_sjis.h" #include "filters/mbfilter_jis.h" +#include "filters/mbfilter_iso2022_jp_ms.h" #include "filters/mbfilter_euc_jp.h" #include "filters/mbfilter_euc_jp_win.h" #include "filters/mbfilter_ascii.h" @@ -60,6 +61,7 @@ #include "filters/mbfilter_cp936.h" #include "filters/mbfilter_cp1251.h" #include "filters/mbfilter_cp1252.h" +#include "filters/mbfilter_cp51932.h" #include "filters/mbfilter_iso8859_1.h" #include "filters/mbfilter_iso8859_2.h" #include "filters/mbfilter_iso8859_3.h" @@ -106,6 +108,8 @@ static const struct mbfl_identify_vtbl *mbfl_identify_filter_list[] = { &vtbl_identify_sjiswin, &vtbl_identify_jis, &vtbl_identify_2022jp, + &vtbl_identify_2022jpms, + &vtbl_identify_cp51932, &vtbl_identify_euccn, &vtbl_identify_cp936, &vtbl_identify_hz, diff --git a/ext/mbstring/mb_gpc.c b/ext/mbstring/mb_gpc.c index 2cf57b7fc..1cae5788e 100644 --- a/ext/mbstring/mb_gpc.c +++ b/ext/mbstring/mb_gpc.c @@ -2,7 +2,7 @@ +----------------------------------------------------------------------+ | PHP Version 5 | +----------------------------------------------------------------------+ - | Copyright (c) 1997-2006 The PHP Group | + | Copyright (c) 1997-2007 The PHP Group | +----------------------------------------------------------------------+ | This source file is subject to version 3.01 of the PHP license, | | that is bundled with this package in the file LICENSE, and is | @@ -17,7 +17,7 @@ +----------------------------------------------------------------------+ */ -/* $Id: mb_gpc.c,v 1.17.2.2 2006/01/01 12:50:08 sniper Exp $ */ +/* $Id: mb_gpc.c,v 1.17.2.2.2.3 2007/03/18 16:36:13 iliaa Exp $ */ /* {{{ includes */ #ifdef HAVE_CONFIG_H @@ -154,6 +154,8 @@ MBSTRING_API SAPI_TREAT_DATA_FUNC(mbstr_treat_data) info.num_from_encodings = MBSTRG(http_input_list_size); info.from_language = MBSTRG(language); + MBSTRG(illegalchars) = 0; + detected = _php_mb_encoding_handler_ex(&info, array_ptr, res TSRMLS_CC); MBSTRG(http_input_identify) = detected; @@ -206,9 +208,8 @@ enum mbfl_no_encoding _php_mb_encoding_handler_ex(const php_mb_encoding_handler_ /* register_globals stuff * XXX: this feature is going to be deprecated? */ - if (info->force_register_globals) { - prev_rg_state = PG(register_globals); - PG(register_globals) = 1; + if (info->force_register_globals && !(prev_rg_state = PG(register_globals))) { + zend_alter_ini_entry("register_globals", sizeof("register_globals"), "1", sizeof("1")-1, PHP_INI_PERDIR, PHP_INI_STAGE_RUNTIME); } if (!res || *res == '\0') { @@ -341,11 +342,12 @@ enum mbfl_no_encoding _php_mb_encoding_handler_ex(const php_mb_encoding_handler_ out: /* register_global stuff */ - if (info->force_register_globals) { - PG(register_globals) = prev_rg_state; + if (info->force_register_globals && !prev_rg_state) { + zend_alter_ini_entry("register_globals", sizeof("register_globals"), "0", sizeof("0")-1, PHP_INI_PERDIR, PHP_INI_STAGE_RUNTIME); } if (convd != NULL) { + MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd); mbfl_buffer_converter_delete(convd); } if (val_list != NULL) { diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c index 84cd7ca54..2cf47de61 100644 --- a/ext/mbstring/mbstring.c +++ b/ext/mbstring/mbstring.c @@ -2,7 +2,7 @@ +----------------------------------------------------------------------+ | PHP Version 5 | +----------------------------------------------------------------------+ - | Copyright (c) 1997-2006 The PHP Group | + | Copyright (c) 1997-2007 The PHP Group | +----------------------------------------------------------------------+ | This source file is subject to version 3.01 of the PHP license, | | that is bundled with this package in the file LICENSE, and is | @@ -17,7 +17,7 @@ +----------------------------------------------------------------------+ */ -/* $Id: mbstring.c,v 1.224.2.22.2.15 2006/09/24 07:10:54 hirokawa Exp $ */ +/* $Id: mbstring.c,v 1.224.2.22.2.22 2007/04/04 15:25:41 masugata Exp $ */ /* * PHP 4 Multibyte String module "mbstring" @@ -933,7 +933,10 @@ PHP_RINIT_FUNCTION(mbstring) MBSTRG(current_http_output_encoding) = MBSTRG(http_output_encoding); MBSTRG(current_filter_illegal_mode) = MBSTRG(filter_illegal_mode); MBSTRG(current_filter_illegal_substchar) = MBSTRG(filter_illegal_substchar); - MBSTRG(illegalchars) = 0; + + if (!MBSTRG(encoding_translation)) { + MBSTRG(illegalchars) = 0; + } n = 0; if (MBSTRG(detect_order_list)) { @@ -1045,7 +1048,7 @@ PHP_MINFO_FUNCTION(mbstring) { char buf[32]; php_info_print_table_row(2, "Multibyte (japanese) regex support", "enabled"); - sprintf(buf, "%d.%d.%d", + snprintf(buf, sizeof(buf), "%d.%d.%d", ONIGURUMA_VERSION_MAJOR,ONIGURUMA_VERSION_MINOR,ONIGURUMA_VERSION_TEENY); php_info_print_table_row(2, "Multibyte regex (oniguruma) version", buf); #ifdef USE_COMBINATION_EXPLOSION_CHECK @@ -1343,7 +1346,7 @@ PHP_FUNCTION(mb_substitute_character) MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR; MBSTRG(current_filter_illegal_substchar) = Z_LVAL_PP(arg1); } else { - php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown character."); + php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown character."); RETVAL_FALSE; } } @@ -1602,11 +1605,11 @@ PHP_FUNCTION(mb_strpos) } if (offset < 0 || (unsigned long)offset > haystack.len) { - php_error_docref(NULL TSRMLS_CC, E_WARNING, "Offset is out of range"); + php_error_docref(NULL TSRMLS_CC, E_WARNING, "Offset not contained in string."); RETURN_FALSE; } if (needle.len == 0) { - php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty needle"); + php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty delimiter."); RETURN_FALSE; } @@ -1636,7 +1639,7 @@ PHP_FUNCTION(mb_strpos) /* }}} */ /* {{{ proto int mb_strrpos(string haystack, string needle [, int offset [, string encoding]]) - Find the last occurrence of a character in a string within another */ + Find position of last occurrence of a string within another */ PHP_FUNCTION(mb_strrpos) { int n; @@ -1709,11 +1712,9 @@ PHP_FUNCTION(mb_strrpos) } if (haystack.len <= 0) { - php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty haystack"); RETURN_FALSE; } if (needle.len <= 0) { - php_error_docref(NULL TSRMLS_CC, E_WARNING,"Empty needle"); RETURN_FALSE; } n = mbfl_strpos(&haystack, &needle, offset, 1); @@ -1741,7 +1742,7 @@ PHP_FUNCTION(mb_stripos) RETURN_FALSE; } - n = php_mb_stripos(0, old_haystack, old_haystack_len, old_needle, old_needle_len, offset, from_encoding TSRMLS_CC); + n = php_mb_stripos(0, old_haystack, old_haystack_len, old_needle, old_needle_len, offset, from_encoding TSRMLS_CC); if (n >= 0) { RETVAL_LONG(n); @@ -1767,7 +1768,11 @@ PHP_FUNCTION(mb_strripos) RETURN_FALSE; } - n = php_mb_stripos(1, old_haystack, old_haystack_len, old_needle, old_needle_len, offset, from_encoding TSRMLS_CC); + if(offset > old_haystack_len){ + RETURN_FALSE; + } + + n = php_mb_stripos(1, old_haystack, old_haystack_len, old_needle, old_needle_len, offset, from_encoding TSRMLS_CC); if (n >= 0) { RETVAL_LONG(n); @@ -1806,12 +1811,8 @@ PHP_FUNCTION(mb_strstr) } } - if (haystack.len <= 0) { - php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty haystack"); - RETURN_FALSE; - } if (needle.len <= 0) { - php_error_docref(NULL TSRMLS_CC, E_WARNING,"Empty needle"); + php_error_docref(NULL TSRMLS_CC, E_WARNING,"Empty delimiter."); RETURN_FALSE; } n = mbfl_strpos(&haystack, &needle, 0, 0); @@ -1869,11 +1870,9 @@ PHP_FUNCTION(mb_strrchr) } if (haystack.len <= 0) { - php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty haystack"); RETURN_FALSE; } if (needle.len <= 0) { - php_error_docref(NULL TSRMLS_CC, E_WARNING,"Empty needle"); RETURN_FALSE; } n = mbfl_strpos(&haystack, &needle, 0, 1); @@ -1921,6 +1920,11 @@ PHP_FUNCTION(mb_stristr) RETURN_FALSE; } + if(!needle.len){ + php_error_docref(NULL TSRMLS_CC, E_WARNING,"Empty delimiter."); + RETURN_FALSE; + } + haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(from_encoding); if (haystack.no_encoding == mbfl_no_encoding_invalid) { php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", from_encoding); @@ -1952,6 +1956,7 @@ PHP_FUNCTION(mb_stristr) } } } +/* }}} */ /* {{{ proto string mb_strrichr(string haystack, string needle[, bool part[, string encoding]]) Finds the last occurrence of a character in a string within another, case insensitive */ @@ -1979,7 +1984,7 @@ PHP_FUNCTION(mb_strrichr) RETURN_FALSE; } - n = php_mb_stripos(1, haystack.val, haystack.len, needle.val, needle.len, 0, from_encoding TSRMLS_CC); + n = php_mb_stripos(1, haystack.val, haystack.len, needle.val, needle.len, 0, from_encoding TSRMLS_CC); if (n <0) { RETURN_FALSE; @@ -2004,6 +2009,7 @@ PHP_FUNCTION(mb_strrichr) } } } +/* }}} */ /* {{{ proto int mb_substr_count(string haystack, string needle [, string encoding]) Count the number of substring occurrences */ @@ -2034,7 +2040,7 @@ PHP_FUNCTION(mb_substr_count) } if (needle.len <= 0) { - php_error_docref(NULL TSRMLS_CC, E_WARNING,"Empty needle"); + php_error_docref(NULL TSRMLS_CC, E_WARNING,"Empty substring."); RETURN_FALSE; } @@ -3295,13 +3301,20 @@ PHP_FUNCTION(mb_decode_numericentity) #define SKIP_LONG_HEADER_SEP_MBSTRING(str, pos) \ if (str[pos] == '\r' && str[pos + 1] == '\n' && (str[pos + 2] == ' ' || str[pos + 2] == '\t')) { \ - pos += 3; \ - while (str[pos] == ' ' || str[pos] == '\t') { \ + pos += 2; \ + while (str[pos + 1] == ' ' || str[pos + 1] == '\t') { \ pos++; \ } \ continue; \ } +#define MAIL_ASCIIZ_CHECK_MBSTRING(str, len) \ + pp = str; \ + ee = pp + len; \ + while ((pp = memchr(pp, '\0', (ee - pp)))) { \ + *pp = ' '; \ + } \ + #define APPEND_ONE_CHAR(ch) do { \ if (token.a > 0) { \ smart_str_appendc(&token, ch); \ @@ -3534,6 +3547,7 @@ PHP_FUNCTION(mb_send_mail) HashTable ht_headers; smart_str *s; extern void mbfl_memory_device_unput(mbfl_memory_device *device); + char *pp, *ee; if (PG(safe_mode) && (ZEND_NUM_ARGS() == 5)) { php_error_docref(NULL TSRMLS_CC, E_WARNING, "SAFE MODE Restriction in effect. The fifth parameter is disabled in SAFE MODE."); @@ -3560,6 +3574,17 @@ PHP_FUNCTION(mb_send_mail) return; } + /* ASCIIZ check */ + MAIL_ASCIIZ_CHECK_MBSTRING(to, to_len); + MAIL_ASCIIZ_CHECK_MBSTRING(subject, subject_len); + MAIL_ASCIIZ_CHECK_MBSTRING(message, message_len); + if (headers) { + MAIL_ASCIIZ_CHECK_MBSTRING(headers, headers_len); + } + if (extra_cmd) { + MAIL_ASCIIZ_CHECK_MBSTRING(extra_cmd, extra_cmd_len); + } + zend_hash_init(&ht_headers, 0, NULL, (dtor_func_t) my_smart_str_dtor, 0); if (headers != NULL) { @@ -3774,6 +3799,7 @@ PHP_FUNCTION(mb_send_mail) } #undef SKIP_LONG_HEADER_SEP_MBSTRING +#undef MAIL_ASCIIZ_CHECK_MBSTRING #undef APPEND_ONE_CHAR #undef SEPARATE_SMART_STR #undef PHP_MBSTR_MAIL_MIME_HEADER1 @@ -4053,8 +4079,13 @@ PHP_FUNCTION(mb_check_encoding) if (ret != NULL) { MBSTRG(illegalchars) += illegalchars; - efree(ret->val); - RETURN_BOOL(illegalchars == 0); + if (illegalchars == 0 && strncmp(string.val, ret->val, string.len) == 0) { + efree(ret->val); + RETURN_TRUE; + } else { + efree(ret->val); + RETURN_FALSE; + } } else { RETURN_FALSE; } @@ -4221,12 +4252,14 @@ MBSTRING_API int php_mb_gpc_encoding_converter(char **str, int *len, int num, co str[i] = ret->val; len[i] = ret->len; } + MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd); mbfl_buffer_converter_delete(convd); } return ret ? 0 : -1; } +/* }}} */ /* {{{ MBSTRING_API int php_mb_gpc_encoding_detector() */ @@ -4330,7 +4363,6 @@ MBSTRING_API int php_mb_stripos(int mode, char *old_haystack, int old_haystack_l } if (haystack.len <= 0) { - php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty haystack"); break; } @@ -4341,7 +4373,6 @@ MBSTRING_API int php_mb_stripos(int mode, char *old_haystack, int old_haystack_l } if (needle.len <= 0) { - php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty needle"); break; } @@ -4352,7 +4383,7 @@ MBSTRING_API int php_mb_stripos(int mode, char *old_haystack, int old_haystack_l } if (offset < 0 || (unsigned long)offset > haystack.len) { - php_error_docref(NULL TSRMLS_CC, E_WARNING, "Offset is out of range"); + php_error_docref(NULL TSRMLS_CC, E_WARNING, "Offset not contained in string."); break; } @@ -4503,6 +4534,7 @@ int php_mb_encoding_converter(char **to, int *to_length, const char *from, *to = ret->val; *to_length = ret->len; } + MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd); mbfl_buffer_converter_delete(convd); diff --git a/ext/mbstring/mbstring.h b/ext/mbstring/mbstring.h index e68086905..ae184a0ac 100644 --- a/ext/mbstring/mbstring.h +++ b/ext/mbstring/mbstring.h @@ -2,7 +2,7 @@ +----------------------------------------------------------------------+ | PHP Version 5 | +----------------------------------------------------------------------+ - | Copyright (c) 1997-2006 The PHP Group | + | Copyright (c) 1997-2007 The PHP Group | +----------------------------------------------------------------------+ | This source file is subject to version 3.01 of the PHP license, | | that is bundled with this package in the file LICENSE, and is | @@ -16,7 +16,7 @@ +----------------------------------------------------------------------+ */ -/* $Id: mbstring.h,v 1.66.2.4.2.4 2006/05/30 15:47:53 masugata Exp $ */ +/* $Id: mbstring.h,v 1.66.2.4.2.5 2007/01/01 09:36:02 sebastian Exp $ */ /* * PHP 4 Multibyte String module "mbstring" (currently only for Japanese) diff --git a/ext/mbstring/oniguruma/enc/utf16_be.c b/ext/mbstring/oniguruma/enc/utf16_be.c index 0dd2832f7..250a0cf34 100755 --- a/ext/mbstring/oniguruma/enc/utf16_be.c +++ b/ext/mbstring/oniguruma/enc/utf16_be.c @@ -2,7 +2,7 @@ utf16_be.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/ext/mbstring/oniguruma/enc/utf16_le.c b/ext/mbstring/oniguruma/enc/utf16_le.c index 93cc6138a..c2d860ed0 100755 --- a/ext/mbstring/oniguruma/enc/utf16_le.c +++ b/ext/mbstring/oniguruma/enc/utf16_le.c @@ -2,7 +2,7 @@ utf16_le.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/ext/mbstring/oniguruma/enc/utf32_be.c b/ext/mbstring/oniguruma/enc/utf32_be.c index 36b477286..c98b910ba 100755 --- a/ext/mbstring/oniguruma/enc/utf32_be.c +++ b/ext/mbstring/oniguruma/enc/utf32_be.c @@ -2,7 +2,7 @@ utf32_be.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/ext/mbstring/oniguruma/enc/utf32_le.c b/ext/mbstring/oniguruma/enc/utf32_le.c index 1e9487d1d..043a86074 100755 --- a/ext/mbstring/oniguruma/enc/utf32_le.c +++ b/ext/mbstring/oniguruma/enc/utf32_le.c @@ -2,7 +2,7 @@ utf32_le.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/ext/mbstring/oniguruma/enc/utf8.c b/ext/mbstring/oniguruma/enc/utf8.c index 0e816176b..4188aa910 100644 --- a/ext/mbstring/oniguruma/enc/utf8.c +++ b/ext/mbstring/oniguruma/enc/utf8.c @@ -2,7 +2,7 @@ utf8.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/ext/mbstring/oniguruma/oniguruma.h b/ext/mbstring/oniguruma/oniguruma.h index a0107cbe3..0cd7e2593 100644 --- a/ext/mbstring/oniguruma/oniguruma.h +++ b/ext/mbstring/oniguruma/oniguruma.h @@ -4,7 +4,7 @@ oniguruma.h - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/ext/mbstring/oniguruma/regcomp.c b/ext/mbstring/oniguruma/regcomp.c index 9b862657d..e441516c3 100644 --- a/ext/mbstring/oniguruma/regcomp.c +++ b/ext/mbstring/oniguruma/regcomp.c @@ -2,7 +2,7 @@ regcomp.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/ext/mbstring/oniguruma/regenc.h b/ext/mbstring/oniguruma/regenc.h index 58ee3e7f2..a10c738b5 100644 --- a/ext/mbstring/oniguruma/regenc.h +++ b/ext/mbstring/oniguruma/regenc.h @@ -4,7 +4,7 @@ regenc.h - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/ext/mbstring/oniguruma/regerror.c b/ext/mbstring/oniguruma/regerror.c index ad73b76c3..4888fe00f 100644 --- a/ext/mbstring/oniguruma/regerror.c +++ b/ext/mbstring/oniguruma/regerror.c @@ -2,7 +2,7 @@ regerror.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -278,6 +278,9 @@ onig_snprintf_with_pattern(buf, bufsize, enc, pat, pat_end, fmt, va_alist) va_init_list(args, fmt); n = vsnprintf((char* )buf, bufsize, (const char* )fmt, args); + if (n < 0 || n >= bufsize) { + n = bufsize - 1; + } va_end(args); need = (pat_end - pat) * 4 + 4; diff --git a/ext/mbstring/oniguruma/regexec.c b/ext/mbstring/oniguruma/regexec.c index 769ed30c9..0715b2afc 100644 --- a/ext/mbstring/oniguruma/regexec.c +++ b/ext/mbstring/oniguruma/regexec.c @@ -2,7 +2,7 @@ regexec.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/ext/mbstring/oniguruma/regext.c b/ext/mbstring/oniguruma/regext.c index f5ad1f35a..f03b40e2a 100755 --- a/ext/mbstring/oniguruma/regext.c +++ b/ext/mbstring/oniguruma/regext.c @@ -2,7 +2,7 @@ regext.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/ext/mbstring/oniguruma/reggnu.c b/ext/mbstring/oniguruma/reggnu.c index 248957c9d..b52486595 100644 --- a/ext/mbstring/oniguruma/reggnu.c +++ b/ext/mbstring/oniguruma/reggnu.c @@ -2,7 +2,7 @@ reggnu.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/ext/mbstring/oniguruma/regint.h b/ext/mbstring/oniguruma/regint.h index c06bf5763..1be9efb97 100644 --- a/ext/mbstring/oniguruma/regint.h +++ b/ext/mbstring/oniguruma/regint.h @@ -4,7 +4,7 @@ regint.h - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/ext/mbstring/oniguruma/regparse.c b/ext/mbstring/oniguruma/regparse.c index 407b73fc4..6066631ed 100644 --- a/ext/mbstring/oniguruma/regparse.c +++ b/ext/mbstring/oniguruma/regparse.c @@ -2,7 +2,7 @@ regparse.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/ext/mbstring/oniguruma/regparse.h b/ext/mbstring/oniguruma/regparse.h index ca62dddf7..007d982ce 100644 --- a/ext/mbstring/oniguruma/regparse.h +++ b/ext/mbstring/oniguruma/regparse.h @@ -4,7 +4,7 @@ regparse.h - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/ext/mbstring/oniguruma/regposerr.c b/ext/mbstring/oniguruma/regposerr.c index e54b5c408..38f8ed55a 100644 --- a/ext/mbstring/oniguruma/regposerr.c +++ b/ext/mbstring/oniguruma/regposerr.c @@ -76,7 +76,7 @@ regerror(int posix_ecode, const regex_t* reg, char* buf, size_t size) s = ""; } else { - sprintf(tbuf, "undefined error code (%d)", posix_ecode); + sprintf(tbuf, "undefined error code (%d)", posix_ecode); s = tbuf; } diff --git a/ext/mbstring/oniguruma/regposix.c b/ext/mbstring/oniguruma/regposix.c index a3bacf722..c61a6ce7f 100644 --- a/ext/mbstring/oniguruma/regposix.c +++ b/ext/mbstring/oniguruma/regposix.c @@ -2,7 +2,7 @@ regposix.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/ext/mbstring/oniguruma/regsyntax.c b/ext/mbstring/oniguruma/regsyntax.c index 9114e39e6..4c2f39b67 100644 --- a/ext/mbstring/oniguruma/regsyntax.c +++ b/ext/mbstring/oniguruma/regsyntax.c @@ -2,7 +2,7 @@ regsyntax.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/ext/mbstring/oniguruma/regversion.c b/ext/mbstring/oniguruma/regversion.c index 5fad0cc18..d1e01d75d 100644 --- a/ext/mbstring/oniguruma/regversion.c +++ b/ext/mbstring/oniguruma/regversion.c @@ -2,7 +2,7 @@ regversion.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -47,7 +47,7 @@ onig_copyright(void) { static char s[58]; - sprintf(s, "Oniguruma %d.%d.%d : Copyright (C) 2002-2006 K.Kosako", + sprintf(s, "Oniguruma %d.%d.%d : Copyright (C) 2002-2007 K.Kosako", ONIGURUMA_VERSION_MAJOR, ONIGURUMA_VERSION_MINOR, ONIGURUMA_VERSION_TEENY); diff --git a/ext/mbstring/php_mbregex.c b/ext/mbstring/php_mbregex.c index e4b23af97..f22d54098 100644 --- a/ext/mbstring/php_mbregex.c +++ b/ext/mbstring/php_mbregex.c @@ -2,7 +2,7 @@ +----------------------------------------------------------------------+ | PHP Version 5 | +----------------------------------------------------------------------+ - | Copyright (c) 1997-2006 The PHP Group | + | Copyright (c) 1997-2007 The PHP Group | +----------------------------------------------------------------------+ | This source file is subject to version 3.01 of the PHP license, | | that is bundled with this package in the file LICENSE, and is | @@ -16,7 +16,7 @@ +----------------------------------------------------------------------+ */ -/* $Id: php_mbregex.c,v 1.53.2.1.2.2 2006/10/11 11:53:02 tony2001 Exp $ */ +/* $Id: php_mbregex.c,v 1.53.2.1.2.4 2007/01/11 22:23:20 tony2001 Exp $ */ #ifdef HAVE_CONFIG_H @@ -607,7 +607,7 @@ PHP_FUNCTION(mb_eregi) /* {{{ _php_mb_regex_ereg_replace_exec */ static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, OnigOptionType options) { - zval *arg_pattern_zval; + zval **arg_pattern_zval; char *arg_pattern; int arg_pattern_len; @@ -647,7 +647,7 @@ static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, OnigOp char *option_str = NULL; int option_str_len = 0; - if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "zss|s", + if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "Zss|s", &arg_pattern_zval, &replace, &replace_len, &string, &string_len, @@ -662,13 +662,13 @@ static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, OnigOp syntax = MBSTRG(regex_default_syntax); } } - if (Z_TYPE_P(arg_pattern_zval) == IS_STRING) { - arg_pattern = Z_STRVAL_P(arg_pattern_zval); - arg_pattern_len = Z_STRLEN_P(arg_pattern_zval); + if (Z_TYPE_PP(arg_pattern_zval) == IS_STRING) { + arg_pattern = Z_STRVAL_PP(arg_pattern_zval); + arg_pattern_len = Z_STRLEN_PP(arg_pattern_zval); } else { /* FIXME: this code is not multibyte aware! */ - convert_to_long_ex(&arg_pattern_zval); - pat_buf[0] = (char)Z_LVAL_P(arg_pattern_zval); + convert_to_long_ex(arg_pattern_zval); + pat_buf[0] = (char)Z_LVAL_PP(arg_pattern_zval); pat_buf[1] = '\0'; arg_pattern = pat_buf; diff --git a/ext/mbstring/php_mbregex.h b/ext/mbstring/php_mbregex.h index 70460676f..ff90e9d2a 100644 --- a/ext/mbstring/php_mbregex.h +++ b/ext/mbstring/php_mbregex.h @@ -2,7 +2,7 @@ +----------------------------------------------------------------------+ | PHP Version 5 | +----------------------------------------------------------------------+ - | Copyright (c) 1997-2006 The PHP Group | + | Copyright (c) 1997-2007 The PHP Group | +----------------------------------------------------------------------+ | This source file is subject to version 3.01 of the PHP license, | | that is bundled with this package in the file LICENSE, and is | @@ -16,7 +16,7 @@ +----------------------------------------------------------------------+ */ -/* $Id: php_mbregex.h,v 1.12.2.1 2006/01/01 12:50:08 sniper Exp $ */ +/* $Id: php_mbregex.h,v 1.12.2.1.2.1 2007/01/01 09:36:02 sebastian Exp $ */ #ifndef _PHP_MBREGEX_H #define _PHP_MBREGEX_H diff --git a/ext/mbstring/php_unicode.c b/ext/mbstring/php_unicode.c index e65b4b4e2..78121cdb2 100644 --- a/ext/mbstring/php_unicode.c +++ b/ext/mbstring/php_unicode.c @@ -2,7 +2,7 @@ +----------------------------------------------------------------------+ | PHP Version 5 | +----------------------------------------------------------------------+ - | Copyright (c) 1997-2006 The PHP Group | + | Copyright (c) 1997-2007 The PHP Group | +----------------------------------------------------------------------+ | This source file is subject to version 3.01 of the PHP license, | | that is bundled with this package in the file LICENSE, and is | diff --git a/ext/mbstring/php_unicode.h b/ext/mbstring/php_unicode.h index 6ed23e23b..3ae4246d4 100644 --- a/ext/mbstring/php_unicode.h +++ b/ext/mbstring/php_unicode.h @@ -2,7 +2,7 @@ +----------------------------------------------------------------------+ | PHP Version 5 | +----------------------------------------------------------------------+ - | Copyright (c) 1997-2006 The PHP Group | + | Copyright (c) 1997-2007 The PHP Group | +----------------------------------------------------------------------+ | This source file is subject to version 3.01 of the PHP license, | | that is bundled with this package in the file LICENSE, and is | |
