summaryrefslogtreecommitdiff
path: root/ext/mbstring/oniguruma
diff options
context:
space:
mode:
Diffstat (limited to 'ext/mbstring/oniguruma')
-rw-r--r--ext/mbstring/oniguruma/AUTHORS1
-rw-r--r--ext/mbstring/oniguruma/COPYING4
-rw-r--r--ext/mbstring/oniguruma/HISTORY387
-rw-r--r--ext/mbstring/oniguruma/README51
-rw-r--r--ext/mbstring/oniguruma/README.ja53
-rw-r--r--ext/mbstring/oniguruma/config.h.in125
-rw-r--r--ext/mbstring/oniguruma/doc/API586
-rw-r--r--ext/mbstring/oniguruma/doc/API.ja593
-rw-r--r--ext/mbstring/oniguruma/doc/FAQ33
-rw-r--r--ext/mbstring/oniguruma/doc/FAQ.ja115
-rw-r--r--ext/mbstring/oniguruma/doc/RE412
-rw-r--r--ext/mbstring/oniguruma/doc/RE.ja424
-rw-r--r--ext/mbstring/oniguruma/enc/big5.c2
-rw-r--r--ext/mbstring/oniguruma/enc/euc_jp.c20
-rw-r--r--ext/mbstring/oniguruma/enc/euc_kr.c2
-rw-r--r--ext/mbstring/oniguruma/enc/euc_tw.c2
-rw-r--r--ext/mbstring/oniguruma/enc/gb18030.c501
-rw-r--r--ext/mbstring/oniguruma/enc/iso8859_1.c2
-rw-r--r--ext/mbstring/oniguruma/enc/iso8859_10.c8
-rw-r--r--ext/mbstring/oniguruma/enc/iso8859_11.c2
-rw-r--r--ext/mbstring/oniguruma/enc/iso8859_13.c8
-rw-r--r--ext/mbstring/oniguruma/enc/iso8859_14.c8
-rw-r--r--ext/mbstring/oniguruma/enc/iso8859_15.c8
-rw-r--r--ext/mbstring/oniguruma/enc/iso8859_16.c8
-rw-r--r--ext/mbstring/oniguruma/enc/iso8859_2.c8
-rw-r--r--ext/mbstring/oniguruma/enc/iso8859_3.c8
-rw-r--r--ext/mbstring/oniguruma/enc/iso8859_4.c8
-rw-r--r--ext/mbstring/oniguruma/enc/iso8859_5.c8
-rw-r--r--ext/mbstring/oniguruma/enc/iso8859_6.c2
-rw-r--r--ext/mbstring/oniguruma/enc/iso8859_7.c8
-rw-r--r--ext/mbstring/oniguruma/enc/iso8859_8.c2
-rw-r--r--ext/mbstring/oniguruma/enc/iso8859_9.c8
-rw-r--r--ext/mbstring/oniguruma/enc/koi8.c16
-rw-r--r--ext/mbstring/oniguruma/enc/koi8_r.c8
-rw-r--r--ext/mbstring/oniguruma/enc/sjis.c21
-rw-r--r--ext/mbstring/oniguruma/enc/unicode.c37
-rwxr-xr-xext/mbstring/oniguruma/enc/utf16_be.c10
-rwxr-xr-xext/mbstring/oniguruma/enc/utf16_le.c10
-rwxr-xr-xext/mbstring/oniguruma/enc/utf32_be.c10
-rwxr-xr-xext/mbstring/oniguruma/enc/utf32_le.c10
-rw-r--r--ext/mbstring/oniguruma/enc/utf8.c90
-rwxr-xr-xext/mbstring/oniguruma/index.html124
-rw-r--r--ext/mbstring/oniguruma/onigcmpt200.h6
-rw-r--r--ext/mbstring/oniguruma/oniggnu.h8
-rw-r--r--ext/mbstring/oniguruma/oniguruma.h219
-rw-r--r--ext/mbstring/oniguruma/regcomp.c854
-rw-r--r--ext/mbstring/oniguruma/regenc.c66
-rw-r--r--ext/mbstring/oniguruma/regenc.h26
-rw-r--r--ext/mbstring/oniguruma/regerror.c34
-rw-r--r--ext/mbstring/oniguruma/regexec.c562
-rwxr-xr-xext/mbstring/oniguruma/regext.c4
-rw-r--r--ext/mbstring/oniguruma/reggnu.c12
-rw-r--r--ext/mbstring/oniguruma/regint.h102
-rw-r--r--ext/mbstring/oniguruma/regparse.c459
-rw-r--r--ext/mbstring/oniguruma/regparse.h22
-rw-r--r--ext/mbstring/oniguruma/regposix.c11
-rw-r--r--ext/mbstring/oniguruma/regsyntax.c33
-rw-r--r--ext/mbstring/oniguruma/regversion.c4
-rw-r--r--ext/mbstring/oniguruma/st.c140
-rw-r--r--ext/mbstring/oniguruma/st.h16
-rw-r--r--ext/mbstring/oniguruma/win32/config.h168
61 files changed, 5363 insertions, 1126 deletions
diff --git a/ext/mbstring/oniguruma/AUTHORS b/ext/mbstring/oniguruma/AUTHORS
new file mode 100644
index 000000000..93167bd43
--- /dev/null
+++ b/ext/mbstring/oniguruma/AUTHORS
@@ -0,0 +1 @@
+sndgk393 AT ybb DOT ne DOT jp (K.Kosako)
diff --git a/ext/mbstring/oniguruma/COPYING b/ext/mbstring/oniguruma/COPYING
index ed3fa53b2..4d321bb93 100644
--- a/ext/mbstring/oniguruma/COPYING
+++ b/ext/mbstring/oniguruma/COPYING
@@ -1,4 +1,4 @@
-OniGuruma LICENSE
+Oniguruma LICENSE
-----------------
When this software is partly used or it is distributed with Ruby,
@@ -6,7 +6,7 @@ this of Ruby follows the license of Ruby.
It follows the BSD license in the case of the one except for it.
/*-
- * Copyright (c) 2002-2004 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/ext/mbstring/oniguruma/HISTORY b/ext/mbstring/oniguruma/HISTORY
index c648c5455..6c824a697 100644
--- a/ext/mbstring/oniguruma/HISTORY
+++ b/ext/mbstring/oniguruma/HISTORY
@@ -1,5 +1,364 @@
History
+2006/09/19: Version 4.4.4
+
+2006/09/19: [test] success in ruby 1.9.0 (2006-08-22) [i686-linux].
+2006/09/19: [impl] (thanks KOYAMA Tetsuji)
+ HAVE_STDARG_PROTOTYPES was not defined in Mac OS X
+ by Xcode 2.4(gcc 4.0.1) problem. [php-dev 1312] etc...
+
+2006/09/15: Version 4.4.3
+
+2006/09/15: [test] success in ruby 1.9.0 (2006-08-22) [i686-linux].
+2006/09/15: [bug] (thanks Allan Odgaard)
+ out of range access in bm_search_notrev().
+ (p < s)
+
+2006/09/08: Version 4.4.2
+
+2006/09/08: [test] success in ruby 1.9.0 (2006-08-22) [i686-linux].
+2006/09/08: [bug] (thanks K.Takata)
+ out of range access in bm_search_notrev().
+2006/09/04: [spec] (thanks K.Takata)
+ allow look-behind in negative look-behind.
+ ex. /(?<!(?<=a)b|c)d/
+
+2006/08/29: Version 4.4.1
+
+2006/08/29: [test] success in ruby 1.9.0 (2006-08-22) [i686-linux].
+2006/08/29: [dist] (thanks Seiji Masugata)
+ add configure option --enable-combination-explosion-check
+
+2006/08/25: Version 4.4.0
+
+2006/08/25: [test] success in ruby 1.9.0 (2006-08-22) [i686-linux].
+2006/08/25: [impl] add_state_check_num() should be enclosed in
+ ifdef USE_COMBINATION_EXPLOSION_CHECK.
+2006/08/23: [spec] config USE_COMBINATION_EXPLOSION_CHECK is enabled
+ in Ruby mode only.
+2006/08/22: [impl] remove last line comma in enum OpCode.
+2006/08/22: [impl] remove OP_STATE_CHECK_ANYCHAR_STAR_PEEK_NEXT and
+ OP_STATE_CHECK_ANYCHAR_ML_STAR_PEEK_NEXT.
+2006/08/22: [impl] remove OP_BACKREF3.
+
+2006/08/21: Version 4.3.1
+
+2006/08/21: [test] success in ruby 1.9.0 (2006-07-28) [i686-linux].
+2006/08/21: [impl] change stack type values
+ and re-define STK_MASK_TO_VOID_TARGET etc...
+2006/08/21: [impl] set repeat_range[].upper to 0x7fffffff as infinite.
+2006/08/21: [impl] add STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE.
+2006/08/21: [impl] reduce (?:a*){n,m}, (?:a+){n,m} => (?:a*){n,n}, (?:a+){n,n}
+2006/09/21: [impl] reduce (a*){n,m}, (a+){n,m} => (a*){n,n}, (a+){n,n}
+ if backreference is not used.
+2006/08/17: [bug] should check scan_env.num_call > 0 for backrefed pattern
+ in combination explosion check.
+
+2006/08/17: Version 4.3.0
+
+2006/08/17: [test] success in ruby 1.9.0 (2006-07-28) [i686-linux].
+2006/08/17: [new] add config USE_COMBINATION_EXPLOSION_CHECK.
+ check /(.+)*/, /(\s*foo\s*)*/ etc...
+ [API] add num_comb_exp_check member in regex_t.
+ [dist] change LTVERSION value to "1:0:0" in configure.in.
+2006/08/15: [bug] OP_REPEAT_INC process in match_at().
+ should check repeat-count >= range-upper and
+ range-upper may be infinite.
+
+2006/08/11: Version 4.2.3
+
+2006/08/11: [test] success in ruby 1.9.0 (2006-07-28) [i686-linux].
+2006/08/10: [impl] remove double call in set_qualifier().
+2006/08/10: [impl] remove by_number member in QualifierNode.
+2006/08/09: [impl] remove a comma at the end of enum ReduceType
+ for escape warning on Mac OS X.
+2006/08/07: [impl] remove warning in regcomp.c.
+2006/08/07: [spec] move definition of USE_BACKREF_AT_LEVEL into NOT_RUBY.
+
+2006/08/03: Version 4.2.2
+
+2006/08/03: [test] success in ruby 1.9.0 (2006-07-28) [i686-linux].
+2006/08/03: [bug] (thanks Hiroyuki Yamamoto)
+ segmentation fault in regexec(). (POSIX API)
+2006/08/02: [bug] combination of \G in look-ahead/look-behind and other
+ anchors(\A, \z, \Z) cause invalid result.
+ ex. /(?!\G)a\z/.match("ba")
+ start arg. of MATCH_ARG_INIT() should be original
+ arg. of onig_search().
+
+2006/07/31: Version 4.2.1
+
+2006/07/31: [test] success in ruby 1.9.0 (2006-07-28) [i686-linux].
+2006/07/31: [bug] (thanks Kimura Minoru)
+ re-implement bm_search_notrev().
+2006/07/31: [impl] bm_search_notrev() refactoring.
+2006/07/31: [bug] (thanks Kimura Minoru)
+ fix incomplete multibyte string in exact info.
+2006/07/31: [impl] (thanks Seiji Masugata)
+ remove cast in va_init_list() for Intel C Compiler.
+
+2006/07/18: Version 4.2.0
+
+2006/07/18: [test] success in ruby 1.9.0 (2006-03-01) [i686-linux].
+2006/07/18: [new] (thanks Wolfgang Nadasi-Donner)
+ add back reference with nest level.
+ \k<name+n>, \k<name-n>
+2006/07/11: [impl] change long to unsigned long for ONIG_OPTION_XXX
+ and ONIG_SYN_XXX number literals.
+
+2006/07/03: Version 4.1.2
+
+2006/07/03: [test] success in ruby 1.9.0 (2006-03-01) [i686-linux].
+2006/07/03: [spec] (thanks Wolfgang Nadasi-Donner)
+ allow \G in look-behind.
+ add ANCHOR_BEGIN_POSITION flag in setup_tree().
+2006/06/12: [impl] (thanks matz)
+ fix cast from char* to const char*
+ in onig_snprintf_with_pattern().
+ fix cast from char* to const char*
+ for PopularQStr[] and ReduceQStr[].
+
+2006/05/22: Version 4.1.1
+
+2006/05/22: [test] success in ruby 1.9.0 (2006-03-01) [i686-linux].
+2006/05/22: [impl] add position string argument to STACK_BASE_CHECK().
+2006/05/22: [bug] (thanks NARUSE, Yui)
+ add STK_NULL_CHECK_END to IS_TO_VOID_TARGET().
+ ex. core dump in
+ /(?<pare>\(([^\(\)]++|\g<pare>)*+\))/.match('((a))')
+
+2006/05/15: Version 4.1.0
+
+2006/05/15: [test] success in ruby 1.9.0 (2006-03-01) [i686-linux].
+2006/05/15: [impl] thread atomic changes for onig_end() and
+ onig_free_node_list().
+2006/05/15: [test] success in ruby 1.9.0 (2006-03-01) [i686-linux].
+2005/05/15: [dist] update API, API.ja, FAQ, FAQ.ja.
+2006/05/15: [spec] remove onig_recompile(), onig_recompile_deluxe()
+ and re_recompile_pattern().
+ add config USE_RECOMPILE_API.
+2006/05/15: [impl] improved thread safe implementation of onig_search()
+ and onig_match().
+
+2006/05/11: Version 4.0.4
+
+2006/05/11: [test] success in ruby 1.9.0 (2006-03-01) [i686-linux].
+2006/05/11: [bug] (thanks Yuji Kaneda)
+ dead-lock in onig_end().
+2006/05/11: [dist] update index.html.
+
+2006/05/08: Version 4.0.3
+
+2006/05/08: [test] success in ruby 1.9.0 (2006-03-01) [i686-linux].
+2006/05/08: [bug] (thanks Allan Odgaard)
+ Segmentation fault in backward search.
+ ex. /^\t.*$/
+2006/04/18: [dist] update index.html.
+2006/04/05: [dist] update index.html.
+2006/03/24: [dist] update doc/RE, doc/RE.ja.
+
+2006/03/23: Version 4.0.2
+
+2006/03/22: [test] success in ruby 1.9.0 (2006-03-01) [i686-linux].
+2006/03/22: [impl] add both of ONIG_OPTION_DONT_CAPTURE_GROUP
+ and ONIG_OPTION_CAPTURE_GROUP check.
+2006/03/22: [spec] add error code ONIGERR_INVALID_COMBINATION_OF_OPTIONS.
+2006/03/22: [impl] remove USE_NAMED_GROUP condition from
+ ONIG_OPTION_DONT_CAPTURE_GROUP check in parse_effect().
+2006/03/22: [new] add API onig_noname_group_capture_is_active().
+2006/03/01: [spec] rename regex object type from regex_t to OnigRegexType.
+ add typedef OnigRegexType regex_t
+ unless ONIG_ESCAPE_REGEX_T_COLLISION is defined.
+2006/02/27: [spec] change ONIG_MAX_MULTI_BYTE_RANGES_NUM from 1000
+ to 10000. (for docdiff program)
+2006/02/17: [dist] change COPYING year 2005 -> 2006.
+
+2006/02/07: Version 4.0.1
+
+2006/02/07: [test] success in ruby 1.9.0 (2005-11-28) [i686-linux].
+2006/02/07: [bug] memory leaks in onig_free_shared_cclass_table().
+2006/02/03: [ruby] add -m 0644 option to install command in "make 19".
+2006/02/03: [impl] rename ANCHOR_ANYCHAR_STAR_PL to ANCHOR_ANYCHAR_STAR_ML.
+ change from IS_POSIXLINE() to IS_MULTILINE()
+ for ANCHOR_ANYCHAR_START/_ML decision
+ in optimize_node_left().
+2006/01/26: [dist] update index.html for Oniguruma 2.5.3.
+2006/01/25: [dist] update URL in index.html.
+
+2006/01/24: Version 4.0.0
+
+2006/01/24: [test] success in ruby 1.9.0 (2005-11-28) [i386-cygwin].
+2006/01/24: [test] success in ruby 1.9.0 (2005-11-28) [i686-linux].
+2006/01/24: [dist] remove warnings from sample/encode.c.
+2006/01/24: [dist] change install description in README(.ja).
+2006/01/24: [dist] remove re.c.XXX.patch from distribution and CVS.
+2006/01/24: [dist] --- support shared library ---
+ use GNU libtool/automake.
+ change configure.in and add Makefile.am, sample/Makefile.am.
+ add AUTHORS file.
+2006/01/24: [dist] test programs return exit code -1 when test fails.
+2006/01/24: [bug] (thanks KIMURA Koichi)
+ invalid syntax definition in ONIG_SYNTAX_GREP.
+ ONIG_SYN_OP_BRACE_INTERVAL
+ -> ONIG_SYN_OP_ESC_BRACE_INTERVAL
+2006/01/23: [dist] fix configure.in for onig-config.
+2006/01/19: [new] add new config USE_UNICODE_ALL_LINE_TERMINATORS.
+ (U+000d, U+0085, U+2028, U+2029)
+2005/12/29: [dist] change pmatch array size to 25 in testconv.rb.
+2005/12/26: [dist] fix name in test.rb.
+2005/12/26: [dist] update index.html for 2.5.1.
+
+2005/11/29: Version 3.9.1
+
+2005/11/29: [test] success in ruby 1.9.0 (2005-11-28) [i686-linux].
+2005/11/24: [test] success in ruby 1.9.0 (2005-08-09) [i686-linux].
+2005/11/21: [test] success in ruby 1.9.0 (2005-11-20) [i386-cygwin].
+2005/11/21: [bug] (thanks Allan Odgaard)
+ utf-8 character comments in extended mode leads
+ invalid result.
+ ex. /(?x)(?<= # <any-utf-8 multibyte char>o\n~) /
+ fix onigenc_unicode_is_code_ctype() and
+ utf8_is_code_ctype().
+2005/11/20: [bug] (thanks MATSUMOTO Satoshi) (thanks Isao Sonobe)
+ begin-line anchor and BM search optimization leads
+ invalid result in UTF-16/32.
+ fix in set_optimize_exact_info().
+
+2005/11/20: Version 3.9.0
+
+2005/11/20: [test] success in ruby 1.9.0 (2005-11-20) [i386-cygwin].
+2005/11/20: [test] success in ruby 1.9.0 (2005-10-18) [i386-cygwin].
+2005/11/20: [new] add new config USE_CRNL_AS_LINE_TERMINATOR.
+ (!!! NO SUPPORT experimental option !!!)
+2005/11/15: [bug] (thanks Allan Odgaard)
+ tok->escape was not cleared in fetch_token_in_cc().
+ ex. [\s&&[^\n]] makes wrong result.
+2005/10/18: [impl] (thanks nobu)
+ change sjis_mbc_enc_len()
+ and node_new_cclass_by_codepoint_range() scope to static.
+2005/09/05: [dist] remove link to MultiFind.
+2005/09/01: [dist] add link to yagrep.
+
+2005/08/23: Version 3.8.9
+
+2005/08/23: [test] success in ruby 1.9.0 (2005-08-09) [i686-linux].
+2005/08/23: [inst] fix Makefile.in for make ctest/ptest.
+
+2005/08/23: Version 3.8.8
+
+2005/08/23: [test] success in ruby 1.9.0 (2005-08-09) [i686-linux].
+2005/08/23: [impl] split is_code_in_cc() from onig_is_code_in_cc().
+2005/08/23: [impl] should check DATA_ENSURE() at OP_CCLASS_NODE in match_at().
+2005/08/23: [impl] (thanks akr)
+ add ONIG_OPTION_MAXBIT for escape conflict with
+ Ruby's option.
+2005/08/22: [impl] escape GCC 4.0 warnings for testc.c.
+2005/08/22: [bug] (thanks nobu, matz) [ruby-dev:26840]
+ UTF-8 0xFE, 0xFF handling bug in code_is_in_cclass_node().
+ abort on /\S*/ =~ "\xfe"
+2005/08/22: [impl] escape GCC 4.0 warnings for sample/*.c.
+2005/08/22: [impl] fix testconvu.rb.
+2005/08/22: [impl] escape GCC 4.0 warnings.
+
+2005/08/09: Version 3.8.7
+
+2005/08/09: [test] success in ruby 1.9.0 (2005-08-09) [i686-linux].
+2005/08/09: [bug] (thanks Allan Odgaard)
+ should not call enc_len() for s == range
+ in onig_search().
+2005/08/01: [dist] add mkdir $prefix, mkdir $exec_prefix to make install.
+
+2005/07/27: Version 3.8.6
+
+2005/07/27: [test] success in ruby 1.9.0 (2005-07-26) [i686-linux].
+2005/07/27: [impl] update onig-config.in.
+2005/07/26: [new] (thanks Yen-Ju Chen)
+ add Oniguruma configuration check program.
+ (onig-config.in)
+
+2005/07/14: Version 3.8.5
+
+2005/07/14: [test] success in ruby 1.9.0 (2005-07-14) [i686-linux].
+2005/07/11: [test] success in ruby 1.9.0 (2005-07-04) [i686-linux].
+2005/07/11: [bug] (thanks nobu) [ruby-dev:26505]
+ invalid handling for /\c\x/ and /\C-\x/.
+ fix fetch_escaped_value().
+2005/07/05: [impl] (thanks Alexey Zakhlestine)
+ escape GCC 4.0 warnings.
+
+2005/07/01: Version 3.8.4
+
+2005/07/01: [test] success in ruby 1.9.0 (2005-07-01) [i686-linux].
+2005/06/30: [test] success in ruby 1.9.0 (2005-06-28) [i686-linux].
+2005/06/30: [dist] add GB 18030 test to sample/encode.c.
+2005/06/30: [impl] escape warning of gb18030_left_adjust_char_head().
+2005/06/30: [new] (contributed by KUBO Takehiro)
+ add new character encoding ONIG_ENCODING_GB18030.
+2005/06/30: [bug] invalid ctype check for multibyte encodings.
+ ("graph", "print")
+ fix onigenc_mb2/4_is_code_ctype(),
+ eucjp_is_code_ctype() and sjis_is_code_ctype().
+2005/06/30: [bug] invalid conversion from code point to mbc in
+ onigenc_mb4_code_to_mbc().
+
+2005/06/28: Version 3.8.3
+
+2005/06/28: [test] success in ruby 1.9.0 (2005-06-28) [i686-linux].
+2005/06/27: [test] success in ruby 1.9.0 (2005-05-31) [i686-linux].
+2005/06/27: [bug] (thanks Wolfgang Nadasi-Donner)
+ invalid check for never ending recursion.
+ lower zero quantifier should be treated as
+ a non-recursive call alternative.
+ ex. /(?<bal>[^()]*(\(\g<bal>\)[^()]*)*)/
+2005/06/15: [impl] add divide_ambig_string_node_sub().
+2005/06/15: [dist] add a test to sample/encode.c.
+2005/06/10: [new] add ONIG_SYNTAX_PERL_NG. (Perl + named group)
+
+2005/06/01: Version 3.8.2
+
+2005/06/01: [test] success in ruby 1.9.0 (2005-05-31) [i686-linux].
+2005/05/31: [dist] add doc/FAQ and doc/FAQ.ja.
+2005/05/31: [impl] minor change in node_new().
+2005/05/30: [test] success in ruby 1.9.0 (2005-05-11) [i686-linux].
+2005/05/30: [bug] (thanks Allan Odgaard)
+ FreeNodeList null check should be on thread-atomic
+ in node_new().
+
+2005/05/11: Version 3.8.1
+
+2005/05/11: [test] success in ruby 1.9.0 (2005-05-11) [i386-mswin32].
+2005/05/11: [dist] update win32/Makefile (make 19).
+2005/05/11: [test] success in ruby 1.9.0 (2005-05-11) [i686-linux].
+2005/05/06: [test] success in ruby 1.9.0 (2005-05-06) [i686-linux].
+2005/05/06: [impl] (thanks nobu) [ruby-core:4815]
+ add #ifdef USE_VARIABLE_META_CHARS to goto label.
+2005/04/25: [test] success in ruby 1.9.0 (2005-04-25) [i686-linux].
+2005/04/25: [impl] change DEFAULT_WARN_FUNCTION and DEFAULT_VERB_WARN_FUNCTION
+ to onig_rb_warn() and onig_rb_warning().
+
+2005/04/15: Version 3.8.0
+
+2005/04/15: [test] success in ruby 1.9.0 (2005-04-14) [i686-linux].
+2005/04/01: [test] success in ruby 1.9.0 (2005-03-24) [i686-linux].
+2005/04/01: [impl] (thanks Joe Orton)
+ (thanks Moriyoshi Koizumi)
+ many const-ification to many *.[ch] files.
+
+2005/03/25: Version 3.7.2
+
+2005/03/25: [test] success in ruby 1.9.0 (2005-03-24) [i686-linux].
+2005/03/23: [test] success in ruby 1.9.0 (2005-03-20) [i686-linux].
+2005/03/23: [test] success in ruby 1.9.0 (2005-03-08) [i686-linux].
+2005/03/23: [new] add ONIG_SYNTAX_ASIS.
+2005/03/23: [new] add ONIG_SYN_OP2_INEFFECTIVE_ESCAPE.
+2005/03/09: [spec] rename MBCTYPE_XXX to RE_MBCTYPE_XXX. (GNU API)
+2005/03/08: [test] success in ruby 1.9.0 (2005-03-08) [i686-linux].
+2005/03/08: [impl] (thanks matz) [ruby-dev:25783]
+ should not allocate memory for key data in st.c.
+ move st_*_strend() functions from st.c. fixed some
+ potential memory leaks.
+ (imported from Ruby 1.9 2005-03-08)
+
2005/03/07: Version 3.7.1
2005/03/07: [test] success in ruby 1.9.0 (2005-03-07) [i686-linux].
@@ -24,7 +383,7 @@ History
remove reggnu.c from make 19.
2005/02/19: [dist] update doc/API and doc/API.ja.
2005/02/19: [test] success in ruby 1.9.0 (2005-02-19) [i386-cygwin].
-2005/02/19: [impl] (thanks Alexey Zakhlestin)
+2005/02/19: [impl] (thanks Alexey Zakhlestine)
change UChar* to const UChar* in oniguruma.h,
regenc.h and regparse.h.
2005/02/13: [impl] change UChar* to const UChar* in oniguruma.h and
@@ -1366,8 +1725,30 @@ svn copy http://localhost/repos/trunk/oniguruma http://localhost/repos/branches/
<create tag>
svn copy http://localhost/repos/trunk/oniguruma http://localhost/repos/tags/oniguruma/X.X.X -m "onigdXXXXXXXX"
-<show all tags>
+
+<CVS: show all tags>
cvs history -T
-<add tag>
+<CVS: add tag>
cvs rtag "VERSION_X_X_X" oniguruma
+
+
+<GNU Autotools: bootstrap>
+* write Makefile.am and configure.in.
+> aclocal
+> libtoolize
+> automake --foreign --add-missing
+> autoconf
+> configure --with-rubydir=... CFLAGS="-O2 -Wall"
+
+
+<GNU libtool: version management>
+
+ VERSION = current:revision:age
+
+ current: interface number (from 0)
+ revision: implementation number of same interface (from 0)
+ age: number of supported previous interfaces
+ (if current only supported then age == 0)
+
+//END
diff --git a/ext/mbstring/oniguruma/README b/ext/mbstring/oniguruma/README
index dc4fb3b64..f2cc7c981 100644
--- a/ext/mbstring/oniguruma/README
+++ b/ext/mbstring/oniguruma/README
@@ -1,4 +1,4 @@
-README 2005/02/04
+README 2006/05/15
Oniguruma ---- (C) K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
@@ -14,11 +14,12 @@ Supported character encodings:
ASCII, UTF-8, UTF-16BE, UTF-16LE, UTF-32BE, UTF-32LE,
EUC-JP, EUC-TW, EUC-KR, EUC-CN,
- Shift_JIS, Big5, KOI8-R, KOI8 (*),
+ Shift_JIS, Big5, GB 18030, KOI8-R, KOI8,
ISO-8859-1, ISO-8859-2, ISO-8859-3, ISO-8859-4, ISO-8859-5,
ISO-8859-6, ISO-8859-7, ISO-8859-8, ISO-8859-9, ISO-8859-10,
ISO-8859-11, ISO-8859-13, ISO-8859-14, ISO-8859-15, ISO-8859-16
+* GB 18030: contributed by KUBO Takehiro
* KOI8 is not included in library archive by default setup.
(need to edit Makefile if you want to use it.)
------------------------------------------------------------
@@ -31,15 +32,20 @@ Install
2. make
3. make install
- library file: libonig.a
+ * uninstall
- test (ASCII/EUC-JP)
+ make uninstall
- make ctest
+ * test (ASCII/EUC-JP)
- uninstall
+ make atest
- make uninstall
+ * configuration check
+
+ onig-config --cflags
+ onig-config --libs
+ onig-config --prefix
+ onig-config --exec-prefix
@@ -73,8 +79,21 @@ Regular Expressions
Usage
- Include oniguruma.h in your program. (native API)
- See doc/API for native API.
+ Include oniguruma.h in your program. (Oniguruma API)
+ See doc/API for Oniguruma API.
+
+ If you want to disable UChar type (== unsigned char) definition
+ in oniguruma.h, define ONIG_ESCAPE_UCHAR_COLLISION and then
+ include oniguruma.h.
+
+ If you want to disable regex_t type definition in oniguruma.h,
+ define ONIG_ESCAPE_REGEX_T_COLLISION and then include oniguruma.h.
+
+ Example of the compiling/linking command line in Unix or Cygwin,
+ (prefix == /usr/local case)
+
+ cc sample.c -L/usr/local/lib -lonig
+
If you want to use static link library(onig_s.lib) in Win32,
add option -DONIG_EXTERN=extern to C compiler.
@@ -83,19 +102,20 @@ Usage
Sample Programs
- sample/simple.c example of the minimum (native API)
+ sample/simple.c example of the minimum (Oniguruma API)
sample/names.c example of the named group callback.
sample/encode.c example of some encodings.
sample/listcap.c example of the capture history.
sample/posix.c POSIX API sample.
sample/sql.c example of the variable meta characters.
(SQL-like pattern matching)
- sample/syntax.c Perl and Java syntax test.
+ sample/syntax.c Perl, Java and ASIS syntax test.
Source Files
oniguruma.h Oniguruma API header file. (public)
+ onig-config.in configuration check program template.
regenc.h character encodings framework header file.
regint.h internal definitions
@@ -125,9 +145,10 @@ Source Files
enc/euc_tw.c EUC-TW encoding.
enc/euc_kr.c EUC-KR, EUC-CN encoding.
enc/sjis.c Shift_JIS encoding.
- enc/big5.c Big5 encoding.
- enc/koi8.c KOI8 encoding.
- enc/koi8_r.c KOI8-R encoding.
+ enc/big5.c Big5 encoding.
+ enc/gb18030.c GB 18030 encoding (contributed by KUBO Takehiro)
+ enc/koi8.c KOI8 encoding.
+ enc/koi8_r.c KOI8-R encoding.
enc/iso8859_1.c ISO-8859-1 encoding. (Latin-1)
enc/iso8859_2.c ISO-8859-2 encoding. (Latin-2)
enc/iso8859_3.c ISO-8859-3 encoding. (Latin-3)
@@ -159,7 +180,6 @@ Source Files
API differences with Japanized GNU regex(version 0.12) of Ruby 1.8/1.6
+ re_compile_fastmap() is removed.
- + re_recompile_pattern() is added.
+ re_alloc_pattern() is added.
@@ -169,7 +189,6 @@ ToDo
? Unicode Property.
? ambig-flag Katakana <-> Hiragana.
? add ONIG_OPTION_NOTBOS/NOTEOS. (\A, \z, \Z)
- ? add ONIG_SYNTAX_ASIS.
?? \X (== \PM\pM*)
?? implement syntax behavior ONIG_SYN_CONTEXT_INDEP_ANCHORS.
?? variable line separator.
diff --git a/ext/mbstring/oniguruma/README.ja b/ext/mbstring/oniguruma/README.ja
index 44553abfe..2394e958f 100644
--- a/ext/mbstring/oniguruma/README.ja
+++ b/ext/mbstring/oniguruma/README.ja
@@ -1,4 +1,4 @@
-README.ja 2005/02/04
+README.ja 2006/05/15
µ´¼Ö ---- (C) K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
@@ -14,11 +14,12 @@ http://www.freebsd.org/cgi/cvsweb.cgi/ports/devel/oniguruma/
ASCII, UTF-8, UTF-16BE, UTF-16LE, UTF-32BE, UTF-32LE,
EUC-JP, EUC-TW, EUC-KR, EUC-CN,
- Shift_JIS, Big5, KOI8-R, KOI8 (*),
+ Shift_JIS, Big5, GB 18030, KOI8-R, KOI8,
ISO-8859-1, ISO-8859-2, ISO-8859-3, ISO-8859-4, ISO-8859-5,
ISO-8859-6, ISO-8859-7, ISO-8859-8, ISO-8859-9, ISO-8859-10,
ISO-8859-11, ISO-8859-13, ISO-8859-14, ISO-8859-15, ISO-8859-16
+* GB 18030: µ×ÊÝ·òÍλáÄó¶¡
* KOI8¤Ï¥Ç¥Õ¥©¥ë¥È¤Î¥»¥Ã¥È¥¢¥Ã¥×¤Ç¤Ï¥é¥¤¥Ö¥é¥ê¤ÎÃæ¤Ë´Þ¤Þ¤ì¤Ê¤¤¡£
(ɬÍפǤ¢¤ì¤ÐMakefile¤òÊÔ½¸¤¹¤ë¤³¤È)
------------------------------------------------------------
@@ -31,15 +32,21 @@ http://www.freebsd.org/cgi/cvsweb.cgi/ports/devel/oniguruma/
2. make
3. make install
- ¥é¥¤¥Ö¥é¥ê¥Õ¥¡¥¤¥ë: libonig.a
+ ¥¢¥ó¥¤¥ó¥¹¥È¡¼¥ë
+
+ make uninstall
ưºî¥Æ¥¹¥È (ASCII/EUC-JP)
- make ctest
+ make atest
- ¥¢¥ó¥¤¥ó¥¹¥È¡¼¥ë
- make uninstall
+ ¹½À®³Îǧ
+
+ onig-config --cflags
+ onig-config --libs
+ onig-config --prefix
+ onig-config --exec-prefix
@@ -71,8 +78,28 @@ http://www.freebsd.org/cgi/cvsweb.cgi/ports/devel/oniguruma/
»ÈÍÑÊýË¡
- »ÈÍѤ¹¤ë¥×¥í¥°¥é¥à¤Ç¡¢oniguruma.h¤ò¥¤¥ó¥¯¥ë¡¼¥É¤¹¤ë(Native API¤Î¾ì¹ç)¡£
- Native API¤Ë¤Ä¤¤¤Æ¤Ï¡¢doc/API.ja¤ò»²¾È¡£
+ »ÈÍѤ¹¤ë¥×¥í¥°¥é¥à¤Ç¡¢oniguruma.h¤ò¥¤¥ó¥¯¥ë¡¼¥É¤¹¤ë(Oniguruma API¤Î¾ì¹ç)¡£
+ Oniguruma API¤Ë¤Ä¤¤¤Æ¤Ï¡¢doc/API.ja¤ò»²¾È¡£
+
+ oniguruma.h¤ÇÄêµÁ¤µ¤ì¤Æ¤¤¤ë·¿Ì¾UChar(== unsigned char)¤ò̵¸ú¤Ë¤·¤¿¤¤¾ì¹ç
+ ¤Ë¤Ï¡¢ONIG_ESCAPE_UCHAR_COLLISION¤òdefine¤·¤Æ¤«¤éoniguruma.h¤ò¥¤¥ó¥¯¥ë¡¼¥É
+ ¤¹¤ë¤³¤È¡£¤³¤Î¤È¤­¤Ë¤ÏUChar¤ÏÄêµÁ¤µ¤ì¤º¡¢OnigUChar¤È¤¤¤¦Ì¾Á°¤ÎÄêµÁ¤Î¤ß¤¬
+ Í­¸ú¤Ë¤Ê¤ë¡£
+
+ oniguruma.h¤ÇÄêµÁ¤µ¤ì¤Æ¤¤¤ë·¿Ì¾regex_t¤ò̵¸ú¤Ë¤·¤¿¤¤¾ì¹ç¤Ë¤Ï¡¢
+ ONIG_ESCAPE_REGEX_T_COLLISION¤òdefine¤·¤Æ¤«¤éoniguruma.h¤ò¥¤¥ó¥¯¥ë¡¼¥É
+ ¤¹¤ë¤³¤È¡£¤³¤Î¤È¤­¤Ë¤Ïregex_t¤ÏÄêµÁ¤µ¤ì¤º¡¢OnigRegexType, OnigRegex¤È¤¤¤¦
+ ̾Á°¤ÎÄêµÁ¤Î¤ß¤¬Í­¸ú¤Ë¤Ê¤ë¡£
+
+ Unix/Cygwin¾å¤Ç¥³¥ó¥Ñ¥¤¥ë¡¢¥ê¥ó¥¯¤¹¤ë¾ì¹ç¤ÎÎã¡§
+ (prefix¤¬/usr/local¤Î¤È¤­)
+ cc sample.c -L/usr/local/lib -lonig
+
+ GNU libtool¤ò»ÈÍѤ·¤Æ¤¤¤ë¤Î¤Ç¡¢¥×¥é¥Ã¥È¥Õ¥©¡¼¥à¤¬¶¦Í­¥é¥¤¥Ö¥é¥ê¤ò¥µ¥Ý¡¼¥È¤·¤Æ
+ ¤¤¤ì¤Ð¡¢»ÈÍѤǤ­¤ë¤è¤¦¤Ë¤Ê¤Ã¤Æ¤¤¤ë¡£
+ ÀÅۥ饤¥Ö¥é¥ê¤È¶¦Í­¥é¥¤¥Ö¥é¥ê¤Î¤É¤Á¤é¤ò»ÈÍѤ¹¤ë¤«¤ò»ØÄꤹ¤ëÊýË¡¡¢¼Â¹Ô»þÅÀ¤Ç¤Î
+ ´Ä¶­ÀßÄêÊýË¡¤Ë¤Ä¤Æ¤Ï¡¢¼«Ê¬¤ÇÄ´¤Ù¤Æ²¼¤µ¤¤¡£
+
Win32¤Ç¥¹¥¿¥Æ¥£¥Ã¥¯¥ê¥ó¥¯¥é¥¤¥Ö¥é¥ê(onig_s.lib)¤ò¥ê¥ó¥¯¤¹¤ë¾ì¹ç¤Ë¤Ï¡¢
¥³¥ó¥Ñ¥¤¥ë¤¹¤ë¤È¤­¤Ë -DONIG_EXTERN=extern ¤ò¥³¥ó¥Ñ¥¤¥ë°ú¿ô¤ËÄɲ乤뤳¤È¡£
@@ -80,18 +107,19 @@ http://www.freebsd.org/cgi/cvsweb.cgi/ports/devel/oniguruma/
»ÈÍÑÎã¥×¥í¥°¥é¥à
- sample/simple.c ºÇ¾®Îã (native API)
+ sample/simple.c ºÇ¾®Îã (Oniguruma API)
sample/names.c ̾Á°ÉÕ¤­¥°¥ë¡¼¥×¥³¡¼¥ë¥Ð¥Ã¥¯»ÈÍÑÎã
sample/encode.c ´ö¤Ä¤«¤Îʸ»ú¥¨¥ó¥³¡¼¥Ç¥£¥ó¥°»ÈÍÑÎã
sample/listcap.c Êá³ÍÍúÎòµ¡Ç½¤Î»ÈÍÑÎã
sample/posix.c POSIX API»ÈÍÑÎã
sample/sql.c ²ÄÊѥ᥿ʸ»úµ¡Ç½»ÈÍÑÎã (SQL-like ¥Ñ¥¿¡¼¥ó)
- sample/syntax.c Perl¤ÈJavaʸˡ¤Î¥Æ¥¹¥È
+ sample/syntax.c Perl¡¢Java¡¢ASISʸˡ¤Î¥Æ¥¹¥È
¥½¡¼¥¹¥Õ¥¡¥¤¥ë
oniguruma.h µ´¼ÖAPI¥Ø¥Ã¥À (¸ø³«)
+ onig-config.in onig-config¥×¥í¥°¥é¥à ¥Æ¥ó¥×¥ì¡¼¥È
regenc.h ʸ»ú¥¨¥ó¥³¡¼¥Ç¥£¥ó¥°ÏÈÁȤߥإåÀ
regint.h ÆâÉôÀë¸À
@@ -122,6 +150,7 @@ http://www.freebsd.org/cgi/cvsweb.cgi/ports/devel/oniguruma/
enc/euc_kr.c EUC-KR, EUC-CN ¥¨¥ó¥³¡¼¥Ç¥£¥ó¥°
enc/sjis.c Shift_JIS ¥¨¥ó¥³¡¼¥Ç¥£¥ó¥°
enc/big5.c Big5 ¥¨¥ó¥³¡¼¥Ç¥£¥ó¥°
+ enc/gb18030.c GB 18030 ¥¨¥ó¥³¡¼¥Ç¥£¥ó¥° (µ×ÊÝ·òÍλá Äó¶¡)
enc/koi8.c KOI8 ¥¨¥ó¥³¡¼¥Ç¥£¥ó¥°
enc/koi8_r.c KOI8-R ¥¨¥ó¥³¡¼¥Ç¥£¥ó¥°
enc/iso8859_1.c ISO-8859-1 (Latin-1)
@@ -155,7 +184,6 @@ http://www.freebsd.org/cgi/cvsweb.cgi/ports/devel/oniguruma/
Ruby 1.8/1.6¤ÎÆüËܸ첽GNU regex¤È¤ÎAPI¤Î°ã¤¤
+ re_compile_fastmap() ¤Ïºï½ü¤µ¤ì¤¿¡£
- + re_recompile_pattern() ¤¬Äɲ䵤줿¡£
+ re_alloc_pattern() ¤¬Äɲ䵤줿¡£
@@ -165,7 +193,6 @@ Ruby 1.8/1.6¤ÎÆüËܸ첽GNU regex¤È¤ÎAPI¤Î°ã¤¤
? Unicode¥×¥í¥Ñ¥Æ¥£
? ambig-flag Katakana <-> Hiragana
? ONIG_OPTION_NOTBOS/NOTEOSÄɲà (\A, \z, \Z)
- ? ONIG_SYNTAX_ASISÄɲÃ
?? \X (== \PM\pM*)
?? ʸˡÍ×ÁÇ ONIG_SYN_CONTEXT_INDEP_ANCHORS¤Î¼ÂÁõ
?? ²þ¹Ôʸ»ú(ʸ»úÎó)¤òÊѹ¹¤Ç¤­¤ë
@@ -174,4 +201,4 @@ Ruby 1.8/1.6¤ÎÆüËܸ첽GNU regex¤È¤ÎAPI¤Î°ã¤¤
and I'm thankful to Akinori MUSHA.
-Mail Address: K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+¥¢¥É¥ì¥¹: K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
diff --git a/ext/mbstring/oniguruma/config.h.in b/ext/mbstring/oniguruma/config.h.in
index 5ca2056fb..4a2fc28d8 100644
--- a/ext/mbstring/oniguruma/config.h.in
+++ b/ext/mbstring/oniguruma/config.h.in
@@ -1,69 +1,108 @@
-/* config.h.in. Generated automatically from configure.in by autoheader. */
+/* config.h.in. Generated from configure.in by autoheader. */
-/* Define if using alloca.c. */
-#undef C_ALLOCA
-
-/* Define to empty if the keyword does not work. */
-#undef const
-
-/* Define to one of _getb67, GETB67, getb67 for Cray-2 and Cray-YMP systems.
- This function is required for alloca.c support on those systems. */
+/* Define to one of `_getb67', `GETB67', `getb67' for Cray-2 and Cray-YMP
+ systems. This function is required for `alloca.c' support on those systems.
+ */
#undef CRAY_STACKSEG_END
-/* Define if you have alloca, as a function or macro. */
+/* Define to 1 if using `alloca.c'. */
+#undef C_ALLOCA
+
+/* Define to 1 if you have `alloca', as a function or macro. */
#undef HAVE_ALLOCA
-/* Define if you have <alloca.h> and it should be used (not on Ultrix). */
+/* Define to 1 if you have <alloca.h> and it should be used (not on Ultrix).
+ */
#undef HAVE_ALLOCA_H
-/* If using the C implementation of alloca, define if you know the
- direction of stack growth for your system; otherwise it will be
- automatically deduced at run-time.
- STACK_DIRECTION > 0 => grows toward higher addresses
- STACK_DIRECTION < 0 => grows toward lower addresses
- STACK_DIRECTION = 0 => direction of growth unknown
- */
-#undef STACK_DIRECTION
+/* Define to 1 if you have the <dlfcn.h> header file. */
+#undef HAVE_DLFCN_H
-/* Define if you have the ANSI C header files. */
-#undef STDC_HEADERS
+/* Define to 1 if you have the <inttypes.h> header file. */
+#undef HAVE_INTTYPES_H
-/* Define if you can safely include both <sys/time.h> and <time.h>. */
-#undef TIME_WITH_SYS_TIME
+/* Define to 1 if you have the <memory.h> header file. */
+#undef HAVE_MEMORY_H
-/* The number of bytes in a int. */
-#undef SIZEOF_INT
+/* Define if compilerr supports prototypes */
+#undef HAVE_PROTOTYPES
-/* The number of bytes in a long. */
-#undef SIZEOF_LONG
+/* Define if compiler supports stdarg prototypes */
+#undef HAVE_STDARG_PROTOTYPES
-/* The number of bytes in a short. */
-#undef SIZEOF_SHORT
+/* Define to 1 if you have the <stdint.h> header file. */
+#undef HAVE_STDINT_H
-/* Define if you have the <stdlib.h> header file. */
+/* Define to 1 if you have the <stdlib.h> header file. */
#undef HAVE_STDLIB_H
-/* Define if you have the <string.h> header file. */
+/* Define to 1 if you have the <strings.h> header file. */
+#undef HAVE_STRINGS_H
+
+/* Define to 1 if you have the <string.h> header file. */
#undef HAVE_STRING_H
-/* Define if you have the <strings.h> header file. */
-#undef HAVE_STRINGS_H
+/* Define to 1 if you have the <sys/stat.h> header file. */
+#undef HAVE_SYS_STAT_H
-/* Define if you have the <sys/types.h> header file. */
-#undef HAVE_SYS_TYPES_H
+/* Define to 1 if you have the <sys/times.h> header file. */
+#undef HAVE_SYS_TIMES_H
-/* Define if you have the <sys/time.h> header file. */
+/* Define to 1 if you have the <sys/time.h> header file. */
#undef HAVE_SYS_TIME_H
-/* Define if you have the <sys/times.h> header file. */
-#undef HAVE_SYS_TIMES_H
+/* Define to 1 if you have the <sys/types.h> header file. */
+#undef HAVE_SYS_TYPES_H
-/* Define if you have the <unistd.h> header file. */
+/* Define to 1 if you have the <unistd.h> header file. */
#undef HAVE_UNISTD_H
-/* Define if you have the function argument prototype */
-#undef HAVE_PROTOTYPES
+/* Name of package */
+#undef PACKAGE
-/* Define if you have the variable length prototypes and stdarg.h */
-#undef HAVE_STDARG_PROTOTYPES
+/* Define to the address where bug reports for this package should be sent. */
+#undef PACKAGE_BUGREPORT
+
+/* Define to the full name of this package. */
+#undef PACKAGE_NAME
+
+/* Define to the full name and version of this package. */
+#undef PACKAGE_STRING
+
+/* Define to the one symbol short name of this package. */
+#undef PACKAGE_TARNAME
+
+/* Define to the version of this package. */
+#undef PACKAGE_VERSION
+
+/* The size of a `int', as computed by sizeof. */
+#undef SIZEOF_INT
+
+/* The size of a `long', as computed by sizeof. */
+#undef SIZEOF_LONG
+
+/* The size of a `short', as computed by sizeof. */
+#undef SIZEOF_SHORT
+/* If using the C implementation of alloca, define if you know the
+ direction of stack growth for your system; otherwise it will be
+ automatically deduced at run-time.
+ STACK_DIRECTION > 0 => grows toward higher addresses
+ STACK_DIRECTION < 0 => grows toward lower addresses
+ STACK_DIRECTION = 0 => direction of growth unknown */
+#undef STACK_DIRECTION
+
+/* Define to 1 if you have the ANSI C header files. */
+#undef STDC_HEADERS
+
+/* Define to 1 if you can safely include both <sys/time.h> and <time.h>. */
+#undef TIME_WITH_SYS_TIME
+
+/* Define if combination explosion check */
+#undef USE_COMBINATION_EXPLOSION_CHECK
+
+/* Version number of package */
+#undef VERSION
+
+/* Define to empty if `const' does not conform to ANSI C. */
+#undef const
diff --git a/ext/mbstring/oniguruma/doc/API b/ext/mbstring/oniguruma/doc/API
new file mode 100644
index 000000000..7374f65bd
--- /dev/null
+++ b/ext/mbstring/oniguruma/doc/API
@@ -0,0 +1,586 @@
+Oniguruma API Version 4.1.0 2006/05/15
+
+#include <oniguruma.h>
+
+
+# int onig_init(void)
+
+ Initialize library.
+
+ You don't have to call it explicitly, because it is called in onig_new().
+
+
+# int onig_error_code_to_str(UChar* err_buf, int err_code, ...)
+
+ Get error message string.
+ If this function is used for onig_new(),
+ don't call this after the pattern argument of onig_new() is freed.
+
+ normal return: error message string length
+
+ arguments
+ 1 err_buf: error message string buffer.
+ (required size: ONIG_MAX_ERROR_MESSAGE_LEN)
+ 2 err_code: error code returned by other API functions.
+ 3 err_info (optional): error info returned by onig_new().
+
+
+# void onig_set_warn_func(OnigWarnFunc func)
+
+ Set warning function.
+
+ WARNING:
+ '[', '-', ']' in character class without escape.
+ ']' in pattern without escape.
+
+ arguments
+ 1 func: function pointer. void (*func)(char* warning_message)
+
+
+# void onig_set_verb_warn_func(OnigWarnFunc func)
+
+ Set verbose warning function.
+
+ WARNING:
+ redundant nested repeat operator.
+
+ arguments
+ 1 func: function pointer. void (*func)(char* warning_message)
+
+
+# int onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
+ OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax,
+ OnigErrorInfo* err_info)
+
+ Create a regex object.
+
+ normal return: ONIG_NORMAL
+
+ arguments
+ 1 reg: return regex object's address.
+ 2 pattern: regex pattern string.
+ 3 pattern_end: terminate address of pattern. (pattern + pattern length)
+ 4 option: compile time options.
+
+ ONIG_OPTION_NONE no option
+ ONIG_OPTION_SINGLELINE '^' -> '\A', '$' -> '\z', '\Z' -> '\z'
+ ONIG_OPTION_MULTILINE '.' match with newline
+ ONIG_OPTION_IGNORECASE ambiguity match on
+ ONIG_OPTION_EXTEND extended pattern form
+ ONIG_OPTION_FIND_LONGEST find longest match
+ ONIG_OPTION_FIND_NOT_EMPTY ignore empty match
+ ONIG_OPTION_NEGATE_SINGLELINE
+ clear ONIG_OPTION_SINGLELINE which is enabled on
+ ONIG_SYNTAX_POSIX_BASIC, ONIG_SYNTAX_POSIX_EXTENDED,
+ ONIG_SYNTAX_PERL, ONIG_SYNTAX_PERL_NG, ONIG_SYNTAX_JAVA
+
+ ONIG_OPTION_DONT_CAPTURE_GROUP only named group captured.
+ ONIG_OPTION_CAPTURE_GROUP named and no-named group captured.
+
+ 5 enc: character encoding.
+
+ ONIG_ENCODING_ASCII ASCII
+ ONIG_ENCODING_ISO_8859_1 ISO 8859-1
+ ONIG_ENCODING_ISO_8859_2 ISO 8859-2
+ ONIG_ENCODING_ISO_8859_3 ISO 8859-3
+ ONIG_ENCODING_ISO_8859_4 ISO 8859-4
+ ONIG_ENCODING_ISO_8859_5 ISO 8859-5
+ ONIG_ENCODING_ISO_8859_6 ISO 8859-6
+ ONIG_ENCODING_ISO_8859_7 ISO 8859-7
+ ONIG_ENCODING_ISO_8859_8 ISO 8859-8
+ ONIG_ENCODING_ISO_8859_9 ISO 8859-9
+ ONIG_ENCODING_ISO_8859_10 ISO 8859-10
+ ONIG_ENCODING_ISO_8859_11 ISO 8859-11
+ ONIG_ENCODING_ISO_8859_13 ISO 8859-13
+ ONIG_ENCODING_ISO_8859_14 ISO 8859-14
+ ONIG_ENCODING_ISO_8859_15 ISO 8859-15
+ ONIG_ENCODING_ISO_8859_16 ISO 8859-16
+ ONIG_ENCODING_UTF8 UTF-8
+ ONIG_ENCODING_UTF16_BE UTF-16BE
+ ONIG_ENCODING_UTF16_LE UTF-16LE
+ ONIG_ENCODING_UTF32_BE UTF-32BE
+ ONIG_ENCODING_UTF32_LE UTF-32LE
+ ONIG_ENCODING_EUC_JP EUC-JP
+ ONIG_ENCODING_EUC_TW EUC-TW
+ ONIG_ENCODING_EUC_KR EUC-KR
+ ONIG_ENCODING_EUC_CN EUC-CN
+ ONIG_ENCODING_SJIS Shift_JIS
+ ONIG_ENCODING_KOI8 KOI8
+ ONIG_ENCODING_KOI8_R KOI8-R
+ ONIG_ENCODING_BIG5 Big5
+ ONIG_ENCODING_GB18030 GB 18030
+
+ or any OnigEncodingType data address defined by user.
+
+ 6 syntax: address of pattern syntax definition.
+
+ ONIG_SYNTAX_ASIS plain text
+ ONIG_SYNTAX_POSIX_BASIC POSIX Basic RE
+ ONIG_SYNTAX_POSIX_EXTENDED POSIX Extended RE
+ ONIG_SYNTAX_EMACS Emacs
+ ONIG_SYNTAX_GREP grep
+ ONIG_SYNTAX_GNU_REGEX GNU regex
+ ONIG_SYNTAX_JAVA Java (Sun java.util.regex)
+ ONIG_SYNTAX_PERL Perl
+ ONIG_SYNTAX_PERL_NG Perl + named group
+ ONIG_SYNTAX_RUBY Ruby
+ ONIG_SYNTAX_DEFAULT default (== Ruby)
+ onig_set_default_syntax()
+
+ or any OnigSyntaxType data address defined by user.
+
+ 7 err_info: address for return optional error info.
+ Use this value as 3rd argument of onig_error_code_to_str().
+
+
+
+# int onig_new_deluxe(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
+ OnigCompileInfo* ci, OnigErrorInfo* einfo)
+
+ Create a regex object.
+ This function is deluxe version of onig_new().
+
+ normal return: ONIG_NORMAL
+
+ arguments
+ 1 reg: return address of regex object.
+ 2 pattern: regex pattern string.
+ 3 pattern_end: terminate address of pattern. (pattern + pattern length)
+ 4 ci: compile time info.
+
+ ci->num_of_elements: number of elements in ci. (current version: 5)
+ ci->pattern_enc: pattern string character encoding.
+ ci->target_enc: target string character encoding.
+ ci->syntax: address of pattern syntax definition.
+ ci->option: compile time option.
+ ci->ambig_flag: character matching ambiguity bit flag for
+ ONIG_OPTION_IGNORECASE mode.
+
+ ONIGENC_AMBIGUOUS_MATCH_NONE: exact
+ ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE: ignore case for ASCII
+ ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE: ignore case for non-ASCII
+ ONIGENC_AMBIGUOUS_MATCH_COMPOUND: grapheme cluster as a char
+ ONIGENC_AMBIGUOUS_MATCH_FULL: all ambiguity on
+ ONIGENC_AMBIGUOUS_MATCH_DEFAULT: (ASCII | NONASCII)
+ onig_set_default_ambig_flag()
+
+ 5 err_info: address for return optional error info.
+ Use this value as 3rd argument of onig_error_code_to_str().
+
+
+ Different character encoding combination is allowed for
+ the following cases only.
+
+ pattern_enc: ASCII, ISO_8859_1
+ target_enc: UTF16_BE, UTF16_LE, UTF32_BE, UTF32_LE
+
+ pattern_enc: UTF16_BE/LE
+ target_enc: UTF16_LE/BE
+
+ pattern_enc: UTF32_BE/LE
+ target_enc: UTF32_LE/BE
+
+
+# void onig_free(regex_t* reg)
+
+ Free memory used by regex object.
+
+ arguments
+ 1 reg: regex object.
+
+
+# int onig_search(regex_t* reg, const UChar* str, const UChar* end, const UChar* start,
+ const UChar* range, OnigRegion* region, OnigOptionType option)
+
+ Search string and return search result and matching region.
+
+ normal return: match position offset (i.e. p - str >= 0)
+ not found: ONIG_MISMATCH (< 0)
+
+ arguments
+ 1 reg: regex object
+ 2 str: target string
+ 3 end: terminate address of target string
+ 4 start: search start address of target string
+ 5 range: search terminate address of target string
+ in forward search (start <= searched string head < range)
+ in backward search (range <= searched string head <= start)
+ 6 region: address for return group match range info (NULL is allowed)
+ 7 option: search time option
+
+ ONIG_OPTION_NOTBOL string head(str) isn't considered as begin of line
+ ONIG_OPTION_NOTEOL string end (end) isn't considered as end of line
+ ONIG_OPTION_POSIX_REGION region argument is regmatch_t[] of POSIX API.
+
+
+# int onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at,
+ OnigRegion* region, OnigOptionType option)
+
+ Match string and return result and matching region.
+
+ normal return: match length (>= 0)
+ not match: ONIG_MISMATCH ( < 0)
+
+ arguments
+ 1 reg: regex object
+ 2 str: target string
+ 3 end: terminate address of target string
+ 4 at: match address of target string
+ 5 region: address for return group match range info (NULL is allowed)
+ 6 option: search time option
+
+ ONIG_OPTION_NOTBOL string head(str) isn't considered as begin of line
+ ONIG_OPTION_NOTEOL string end (end) isn't considered as end of line
+ ONIG_OPTION_POSIX_REGION region argument is regmatch_t[] type of POSIX API.
+
+
+# OnigRegion* onig_region_new(void)
+
+ Create a region.
+
+
+# void onig_region_free(OnigRegion* region, int free_self)
+
+ Free memory used by region.
+
+ arguments
+ 1 region: target region
+ 2 free_self: [1: free all, 0: free memory used in region but not self]
+
+
+# void onig_region_copy(OnigRegion* to, OnigRegion* from)
+
+ Copy contents of region.
+
+ arguments
+ 1 to: target region
+ 2 from: source region
+
+
+# void onig_region_clear(OnigRegion* region)
+
+ Clear contents of region.
+
+ arguments
+ 1 region: target region
+
+
+# int onig_region_resize(OnigRegion* region, int n)
+
+ Resize group range area of region.
+
+ normal return: ONIG_NORMAL
+
+ arguments
+ 1 region: target region
+ 2 n: new size
+
+
+# int onig_name_to_group_numbers(regex_t* reg, const UChar* name, const UChar* name_end,
+ int** num_list)
+
+ Return the group number list of the name.
+ Named subexp is defined by (?<name>....).
+
+ normal return: number of groups for the name.
+ (ex. /(?<x>..)(?<x>..)/ ==> 2)
+ name not found: -1
+
+ arguments
+ 1 reg: regex object.
+ 2 name: group name.
+ 3 name_end: terminate address of group name.
+ 4 num_list: return list of group number.
+
+
+# int onig_name_to_backref_number(regex_t* reg, const UChar* name, const UChar* name_end,
+ OnigRegion *region)
+
+ Return the group number corresponding to the named backref (\k<name>).
+ If two or more regions for the groups of the name are effective,
+ the greatest number in it is obtained.
+
+ normal return: group number.
+
+ arguments
+ 1 reg: regex object.
+ 2 name: group name.
+ 3 name_end: terminate address of group name.
+ 4 region: search/match result region.
+
+
+# int onig_foreach_name(regex_t* reg,
+ int (*func)(const UChar*, const UChar*, int,int*,regex_t*,void*),
+ void* arg)
+
+ Iterate function call for all names.
+
+ normal return: 0
+ error: func's return value.
+
+ arguments
+ 1 reg: regex object.
+ 2 func: callback function.
+ func(name, name_end, <number of groups>, <group number's list>,
+ reg, arg);
+ if func does not return 0, then iteration is stopped.
+ 3 arg: argument for func.
+
+
+# int onig_number_of_names(regex_t* reg)
+
+ Return the number of names defined in the pattern.
+ Multiple definitions of one name is counted as one.
+
+ arguments
+ 1 reg: regex object.
+
+
+# OnigEncoding onig_get_encoding(regex_t* reg)
+# OnigOptionType onig_get_options(regex_t* reg)
+# OnigAmbigType onig_get_ambig_flag(regex_t* reg)
+# OnigSyntaxType* onig_get_syntax(regex_t* reg)
+
+ Return a value of the regex object.
+
+ arguments
+ 1 reg: regex object.
+
+
+# int onig_number_of_captures(regex_t* reg)
+
+ Return the number of capture group in the pattern.
+
+ arguments
+ 1 reg: regex object.
+
+
+# int onig_number_of_capture_histories(regex_t* reg)
+
+ Return the number of capture history defined in the pattern.
+
+ You can't use capture history if ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY
+ is disabled in the pattern syntax.(disabled in the default syntax)
+
+ arguments
+ 1 reg: regex object.
+
+
+
+# OnigCaptureTreeNode* onig_get_capture_tree(OnigRegion* region)
+
+ Return the root node of capture history data tree.
+
+ This value is undefined if matching has faild.
+
+ arguments
+ 1 region: matching result.
+
+
+# int onig_capture_tree_traverse(OnigRegion* region, int at,
+ int(*func)(int,int,int,int,int,void*), void* arg)
+
+ Traverse and callback in capture history data tree.
+
+ normal return: 0
+ error: callback func's return value.
+
+ arguments
+ 1 region: match region data.
+ 2 at: callback position.
+
+ ONIG_TRAVERSE_CALLBACK_AT_FIRST: callback first, then traverse childs.
+ ONIG_TRAVERSE_CALLBACK_AT_LAST: traverse childs first, then callback.
+ ONIG_TRAVERSE_CALLBACK_AT_BOTH: callback first, then traverse childs,
+ and at last callback again.
+
+ 3 func: callback function.
+ if func does not return 0, then traverse is stopped.
+
+ int func(int group, int beg, int end, int level, int at,
+ void* arg)
+
+ group: group number
+ beg: capture start position
+ end: capture end position
+ level: nest level (from 0)
+ at: callback position
+ ONIG_TRAVERSE_CALLBACK_AT_FIRST
+ ONIG_TRAVERSE_CALLBACK_AT_LAST
+ arg: optional callback argument
+
+ 4 arg; optional callback argument.
+
+
+# int onig_noname_group_capture_is_active(regex_t* reg)
+
+ Return noname group capture activity.
+
+ active: 1
+ inactive: 0
+
+ arguments
+ 1 reg: regex object.
+
+ if option ONIG_OPTION_DONT_CAPTURE_GROUP == ON
+ --> inactive
+
+ if the regex pattern have named group
+ and syntax ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP == ON
+ and option ONIG_OPTION_CAPTURE_GROUP == OFF
+ --> inactive
+
+ else --> active
+
+
+# UChar* onigenc_get_prev_char_head(OnigEncoding enc, const UChar* start, const UChar* s)
+
+ Return previous character head address.
+
+ arguments
+ 1 enc: character encoding
+ 2 start: string address
+ 3 s: target address of string
+
+
+# UChar* onigenc_get_left_adjust_char_head(OnigEncoding enc,
+ const UChar* start, const UChar* s)
+
+ Return left-adjusted head address of a character.
+
+ arguments
+ 1 enc: character encoding
+ 2 start: string address
+ 3 s: target address of string
+
+
+# UChar* onigenc_get_right_adjust_char_head(OnigEncoding enc,
+ const UChar* start, const UChar* s)
+
+ Return right-adjusted head address of a character.
+
+ arguments
+ 1 enc: character encoding
+ 2 start: string address
+ 3 s: target address of string
+
+
+# int onigenc_strlen(OnigEncoding enc, const UChar* s, const UChar* end)
+# int onigenc_strlen_null(OnigEncoding enc, const UChar* s)
+
+ Return number of characters in the string.
+
+
+# int onigenc_str_bytelen_null(OnigEncoding enc, const UChar* s)
+
+ Return number of bytes in the string.
+
+
+# int onig_set_default_syntax(OnigSyntaxType* syntax)
+
+ Set default syntax.
+
+ arguments
+ 1 syntax: address of pattern syntax definition.
+
+
+# void onig_copy_syntax(OnigSyntaxType* to, OnigSyntaxType* from)
+
+ Copy syntax.
+
+ arguments
+ 1 to: destination address.
+ 2 from: source address.
+
+
+# unsigned int onig_get_syntax_op(OnigSyntaxType* syntax)
+# unsigned int onig_get_syntax_op2(OnigSyntaxType* syntax)
+# unsigned int onig_get_syntax_behavior(OnigSyntaxType* syntax)
+# OnigOptionType onig_get_syntax_options(OnigSyntaxType* syntax)
+
+# void onig_set_syntax_op(OnigSyntaxType* syntax, unsigned int op)
+# void onig_set_syntax_op2(OnigSyntaxType* syntax, unsigned int op2)
+# void onig_set_syntax_behavior(OnigSyntaxType* syntax, unsigned int behavior)
+# void onig_set_syntax_options(OnigSyntaxType* syntax, OnigOptionType options)
+
+ Get/Set elements of the syntax.
+
+ arguments
+ 1 syntax: syntax
+ 2 op, op2, behavior, options: value of element.
+
+
+# void onig_copy_encoding(OnigEncoding to, OnigOnigEncoding from)
+
+ Copy encoding.
+
+ arguments
+ 1 to: destination address.
+ 2 from: source address.
+
+
+# int onig_set_meta_char(OnigEncoding enc, unsigned int what,
+ OnigCodePoint code)
+
+ Set a variable meta character to the code point value.
+ Except for an escape character, this meta characters specification
+ is not work, if ONIG_SYN_OP_VARIABLE_META_CHARACTERS is not effective
+ by the syntax. (Build-in syntaxes are not effective.)
+
+ normal return: ONIG_NORMAL
+
+ arguments
+ 1 enc: target encoding
+ 2 what: specifies which meta character it is.
+
+ ONIG_META_CHAR_ESCAPE
+ ONIG_META_CHAR_ANYCHAR
+ ONIG_META_CHAR_ANYTIME
+ ONIG_META_CHAR_ZERO_OR_ONE_TIME
+ ONIG_META_CHAR_ONE_OR_MORE_TIME
+ ONIG_META_CHAR_ANYCHAR_ANYTIME
+
+ 3 code: meta character or ONIG_INEFFECTIVE_META_CHAR.
+
+
+# OnigAmbigType onig_get_default_ambig_flag()
+
+ Get default ambig flag.
+
+
+# int onig_set_default_ambig_flag(OnigAmbigType ambig_flag)
+
+ Set default ambig flag.
+
+ 1 ambig_flag: ambiguity flag
+
+
+# unsigned int onig_get_match_stack_limit_size(void)
+
+ Return the maximum number of stack size.
+ (default: 0 == unlimited)
+
+
+# int onig_set_match_stack_limit_size(unsigned int size)
+
+ Set the maximum number of stack size.
+ (size = 0: unlimited)
+
+ normal return: ONIG_NORMAL
+
+
+# int onig_end(void)
+
+ The use of this library is finished.
+
+ normal return: ONIG_NORMAL
+
+ It is not allowed to use regex objects which created
+ before onig_end() call.
+
+
+# const char* onig_version(void)
+
+ Return version string. (ex. "2.2.8")
+
+// END
diff --git a/ext/mbstring/oniguruma/doc/API.ja b/ext/mbstring/oniguruma/doc/API.ja
new file mode 100644
index 000000000..2682da480
--- /dev/null
+++ b/ext/mbstring/oniguruma/doc/API.ja
@@ -0,0 +1,593 @@
+µ´¼Ö¥¤¥ó¥¿¡¼¥Õ¥§¡¼¥¹ Version 4.1.0 2006/05/15
+
+#include <oniguruma.h>
+
+
+# int onig_init(void)
+
+ ¥é¥¤¥Ö¥é¥ê¤Î½é´ü²½
+
+ onig_new()¤ÎÃæ¤Ç¸Æ¤Ó½Ð¤µ¤ì¤ë¤Î¤Ç¡¢¤³¤Î´Ø¿ô¤òÌÀ¼¨Åª¤Ë¸Æ¤Ó½Ð¤µ¤Ê¤¯¤Æ¤â¤è¤¤¡£
+
+
+# int onig_error_code_to_str(UChar* err_buf, int err_code, ...)
+
+ ¥¨¥é¡¼¥á¥Ã¥»¡¼¥¸¤ò¼èÆÀ¤¹¤ë¡£
+
+ ¤³¤Î´Ø¿ô¤ò¡¢onig_new()¤Î·ë²Ì¤ËÂФ·¤Æ¸Æ¤Ó½Ð¤¹¾ì¹ç¤Ë¤Ï¡¢onig_new()¤Îpattern°ú¿ô¤ò
+ ¥á¥â¥ê²òÊü¤¹¤ë¤è¤ê¤âÁ°¤Ë¸Æ¤Ó½Ð¤µ¤Ê¤±¤ì¤Ð¤Ê¤é¤Ê¤¤¡£
+
+ Àµ¾ï½ªÎ»Ìá¤êÃÍ: ¥¨¥é¡¼¥á¥Ã¥»¡¼¥¸Ê¸»úÎó¤Î¥Ð¥¤¥ÈĹ
+
+ °ú¿ô
+ 1 err_buf: ¥¨¥é¡¼¥á¥Ã¥»¡¼¥¸¤ò³ÊǼ¤¹¤ëÎΰè
+ (ɬÍפʥµ¥¤¥º: ONIG_MAX_ERROR_MESSAGE_LEN)
+ 2 err_code: ¥¨¥é¡¼¥³¡¼¥É
+ 3 err_info (optional): onig_new()¤Îerr_info
+
+
+# void onig_set_warn_func(OnigWarnFunc func)
+
+ ·Ù¹ðÄÌÃδؿô¤ò¥»¥Ã¥È¤¹¤ë¡£
+
+ ·Ù¹ð:
+ '[', '-', ']' in character class without escape.
+ ']' in pattern without escape.
+
+ °ú¿ô
+ 1 func: ·Ù¹ð´Ø¿ô void (*func)(char* warning_message)
+
+
+# void onig_set_verb_warn_func(OnigWarnFunc func)
+
+ ¾ÜºÙ·Ù¹ðÄÌÃδؿô¤ò¥»¥Ã¥È¤¹¤ë¡£
+
+ ¾ÜºÙ·Ù¹ð:
+ redundant nested repeat operator.
+
+ °ú¿ô
+ 1 func: ¾ÜºÙ·Ù¹ð´Ø¿ô void (*func)(char* warning_message)
+
+
+# int onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
+ OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax,
+ OnigErrorInfo* err_info)
+
+ Àµµ¬É½¸½¥ª¥Ö¥¸¥§¥¯¥È(regex)¤òºîÀ®¤¹¤ë¡£
+
+ Àµ¾ï½ªÎ»Ìá¤êÃÍ: ONIG_NORMAL
+
+ °ú¿ô
+ 1 reg: ºîÀ®¤µ¤ì¤¿Àµµ¬É½¸½¥ª¥Ö¥¸¥§¥¯¥È¤òÊÖ¤¹¥¢¥É¥ì¥¹
+ 2 pattern: Àµµ¬É½¸½¥Ñ¥¿¡¼¥óʸ»úÎó
+ 3 pattern_end: Àµµ¬É½¸½¥Ñ¥¿¡¼¥óʸ»úÎó¤Î½ªÃ¼¥¢¥É¥ì¥¹(pattern + pattern length)
+ 4 option: Àµµ¬É½¸½¥³¥ó¥Ñ¥¤¥ë»þ¥ª¥×¥·¥ç¥ó
+
+ ONIG_OPTION_NONE ¥ª¥×¥·¥ç¥ó¤Ê¤·
+ ONIG_OPTION_SINGLELINE '^' -> '\A', '$' -> '\z', '\Z' -> '\z'
+ ONIG_OPTION_MULTILINE '.'¤¬²þ¹Ô¤Ë¥Þ¥Ã¥Á¤¹¤ë
+ ONIG_OPTION_IGNORECASE Û£Ëæ¥Þ¥Ã¥Á ¥ª¥ó
+ ONIG_OPTION_EXTEND ¥Ñ¥¿¡¼¥ó³ÈÄ¥·Á¼°
+ ONIG_OPTION_FIND_LONGEST ºÇĹ¥Þ¥Ã¥Á
+ ONIG_OPTION_FIND_NOT_EMPTY ¶õ¥Þ¥Ã¥Á¤ò̵»ë
+ ONIG_OPTION_NEGATE_SINGLELINE
+ ONIG_SYNTAX_POSIX_BASIC, ONIG_SYNTAX_POSIX_EXTENDED,
+ ONIG_SYNTAX_PERL, ONIG_SYNTAX_PERL_NG, ONIG_SYNTAX_JAVA¤Ç
+ ¥Ç¥Õ¥©¥ë¥È¤ÇÍ­¸ú¤ÊONIG_OPTION_SINGLELINE¤ò¥¯¥ê¥¢¤¹¤ë¡£
+
+ ONIG_OPTION_DONT_CAPTURE_GROUP ̾Á°ÉÕ¤­Êá³Í¼°½¸¹ç¤Î¤ßÊá³Í
+ ONIG_OPTION_CAPTURE_GROUP ̾Á°Ìµ¤·Êá³Í¼°½¸¹ç¤âÊá³Í
+
+ 5 enc: ʸ»ú¥¨¥ó¥³¡¼¥Ç¥£¥ó¥°
+
+ ONIG_ENCODING_ASCII ASCII
+ ONIG_ENCODING_ISO_8859_1 ISO 8859-1
+ ONIG_ENCODING_ISO_8859_2 ISO 8859-2
+ ONIG_ENCODING_ISO_8859_3 ISO 8859-3
+ ONIG_ENCODING_ISO_8859_4 ISO 8859-4
+ ONIG_ENCODING_ISO_8859_5 ISO 8859-5
+ ONIG_ENCODING_ISO_8859_6 ISO 8859-6
+ ONIG_ENCODING_ISO_8859_7 ISO 8859-7
+ ONIG_ENCODING_ISO_8859_8 ISO 8859-8
+ ONIG_ENCODING_ISO_8859_9 ISO 8859-9
+ ONIG_ENCODING_ISO_8859_10 ISO 8859-10
+ ONIG_ENCODING_ISO_8859_11 ISO 8859-11
+ ONIG_ENCODING_ISO_8859_13 ISO 8859-13
+ ONIG_ENCODING_ISO_8859_14 ISO 8859-14
+ ONIG_ENCODING_ISO_8859_15 ISO 8859-15
+ ONIG_ENCODING_ISO_8859_16 ISO 8859-16
+ ONIG_ENCODING_UTF8 UTF-8
+ ONIG_ENCODING_UTF16_BE UTF-16BE
+ ONIG_ENCODING_UTF16_LE UTF-16LE
+ ONIG_ENCODING_UTF32_BE UTF-32BE
+ ONIG_ENCODING_UTF32_LE UTF-32LE
+ ONIG_ENCODING_EUC_JP EUC-JP
+ ONIG_ENCODING_EUC_TW EUC-TW
+ ONIG_ENCODING_EUC_KR EUC-KR
+ ONIG_ENCODING_EUC_CN EUC-CN
+ ONIG_ENCODING_SJIS Shift_JIS
+ ONIG_ENCODING_KOI8 KOI8
+ ONIG_ENCODING_KOI8_R KOI8-R
+ ONIG_ENCODING_BIG5 Big5
+ ONIG_ENCODING_GB18030 GB 18030
+
+ ¤Þ¤¿¤Ï¡¢¥æ¡¼¥¶¤¬ÄêµÁ¤·¤¿OnigEncodingType¥Ç¡¼¥¿¤Î¥¢¥É¥ì¥¹
+
+ 6 syntax: Àµµ¬É½¸½¥Ñ¥¿¡¼¥óʸˡÄêµÁ
+
+ ONIG_SYNTAX_ASIS plain text
+ ONIG_SYNTAX_POSIX_BASIC POSIX Basic RE
+ ONIG_SYNTAX_POSIX_EXTENDED POSIX Extended RE
+ ONIG_SYNTAX_EMACS Emacs
+ ONIG_SYNTAX_GREP grep
+ ONIG_SYNTAX_GNU_REGEX GNU regex
+ ONIG_SYNTAX_JAVA Java (Sun java.util.regex)
+ ONIG_SYNTAX_PERL Perl
+ ONIG_SYNTAX_PERL_NG Perl + ̾Á°ÉÕ¤­Êá³Í¼°½¸¹ç
+ ONIG_SYNTAX_RUBY Ruby
+ ONIG_SYNTAX_DEFAULT default (== Ruby)
+ onig_set_default_syntax()
+
+ ¤Þ¤¿¤Ï¡¢¥æ¡¼¥¶¤¬ÄêµÁ¤·¤¿OnigSyntaxType¥Ç¡¼¥¿¤Î¥¢¥É¥ì¥¹
+
+ 7 err_info: ¥¨¥é¡¼¾ðÊó¤òÊÖ¤¹¤¿¤á¤Î¥¢¥É¥ì¥¹
+ onig_error_code_to_str()¤Î»°ÈÖÌܤΰú¿ô¤È¤·¤Æ»ÈÍѤ¹¤ë
+
+
+# int onig_new_deluxe(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
+ OnigCompileInfo* ci, OnigErrorInfo* einfo)
+
+ Àµµ¬É½¸½¥ª¥Ö¥¸¥§¥¯¥È(regex)¤òºîÀ®¤¹¤ë¡£
+ ¤³¤Î´Ø¿ô¤Ï¡¢onig_new()¤Î¥Ç¥é¥Ã¥¯¥¹ÈÇ¡£
+
+ Àµ¾ï½ªÎ»Ìá¤êÃÍ: ONIG_NORMAL
+
+ °ú¿ô
+ 1 reg: ºîÀ®¤µ¤ì¤¿Àµµ¬É½¸½¥ª¥Ö¥¸¥§¥¯¥È¤òÊÖ¤¹¥¢¥É¥ì¥¹
+ 2 pattern: Àµµ¬É½¸½¥Ñ¥¿¡¼¥óʸ»úÎó
+ 3 pattern_end: Àµµ¬É½¸½¥Ñ¥¿¡¼¥óʸ»úÎó¤Î½ªÃ¼¥¢¥É¥ì¥¹(pattern + pattern length)
+ 4 ci: ¥³¥ó¥Ñ¥¤¥ë¾ðÊó
+
+ ci->num_of_elements: ci¤ÎÍ×ÁÇ¿ô (¸½ºß¤ÎÈǤǤÏ: 5)
+ ci->pattern_enc: ¥Ñ¥¿¡¼¥óʸ»úÎó¤Îʸ»ú¥¨¥ó¥³¡¼¥Ç¥£¥ó¥°
+ ci->target_enc: ÂоÝʸ»úÎó¤Îʸ»ú¥¨¥ó¥³¡¼¥Ç¥£¥ó¥°
+ ci->syntax: Àµµ¬É½¸½¥Ñ¥¿¡¼¥óʸˡÄêµÁ
+ ci->option: Àµµ¬É½¸½¥³¥ó¥Ñ¥¤¥ë»þ¥ª¥×¥·¥ç¥ó
+ ci->ambig_flag: ONIG_OPTION_IGNORECASE¥â¡¼¥É¤Ç¤Î
+ ʸ»úÛ£Ëæ¥Þ¥Ã¥Á»ØÄê¥Ó¥Ã¥È¥Õ¥é¥°
+
+ ONIGENC_AMBIGUOUS_MATCH_NONE: Û£ËæÌµ¤·
+ ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE: ASCII¤ÎÂçʸ»ú¾®Ê¸»ú
+ ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE: ASCII°Ê³°¤ÎÂçʸ»ú¾®Ê¸»ú
+ ONIGENC_AMBIGUOUS_MATCH_COMPOUND: ¹çÀ®Ê¸»ú
+ ONIGENC_AMBIGUOUS_MATCH_FULL: Á´¤Æ¤ÎÛ£Ëæ¥Õ¥é¥°Í­¸ú
+ ONIGENC_AMBIGUOUS_MATCH_DEFAULT: (ASCII | NONASCII)
+ onig_set_default_ambig_flag()
+
+ 5 err_info: ¥¨¥é¡¼¾ðÊó¤òÊÖ¤¹¤¿¤á¤Î¥¢¥É¥ì¥¹
+ onig_error_code_to_str()¤Î»°ÈÖÌܤΰú¿ô¤È¤·¤Æ»ÈÍѤ¹¤ë
+
+
+ °Û¤Ê¤ëʸ»ú¥¨¥ó¥³¡¼¥Ç¥£¥ó¥°¤ÎÁȤ߹ç¤ï¤»¤Ï¡¢°Ê²¼¤Î¾ì¹ç¤Ë¤Î¤ßµö¤µ¤ì¤ë¡£
+
+ pattern_enc: ASCII, ISO_8859_1
+ target_enc: UTF16_BE, UTF16_LE, UTF32_BE, UTF32_LE
+
+ pattern_enc: UTF16_BE/LE
+ target_enc: UTF16_LE/BE
+
+ pattern_enc: UTF32_BE/LE
+ target_enc: UTF32_LE/BE
+
+
+# void onig_free(regex_t* reg)
+
+ Àµµ¬É½¸½¥ª¥Ö¥¸¥§¥¯¥È¤Î¥á¥â¥ê¤ò²òÊü¤¹¤ë¡£
+
+ °ú¿ô
+ 1 reg: Àµµ¬É½¸½¥ª¥Ö¥¸¥§¥¯¥È
+
+
+
+# int onig_search(regex_t* reg, const UChar* str, const UChar* end, const UChar* start,
+ const UChar* range, OnigRegion* region, OnigOptionType option)
+
+ Àµµ¬É½¸½¤Çʸ»úÎó¤ò¸¡º÷¤·¡¢¸¡º÷·ë²Ì¤È¥Þ¥Ã¥ÁÎΰè¤òÊÖ¤¹¡£
+
+ Àµ¾ï½ªÎ»Ìá¤êÃÍ: ¥Þ¥Ã¥Á°ÌÃÖ (p - str >= 0)
+ ¸¡º÷¼ºÇÔ: ONIG_MISMATCH (< 0)
+
+ °ú¿ô
+ 1 reg: Àµµ¬É½¸½¥ª¥Ö¥¸¥§¥¯¥È
+ 2 str: ¸¡º÷ÂоÝʸ»úÎó
+ 3 end: ¸¡º÷ÂоÝʸ»úÎó¤Î½ªÃ¼¥¢¥É¥ì¥¹
+ 4 start: ¸¡º÷ÂоÝʸ»úÎó¤Î¸¡º÷ÀèÆ¬°ÌÃÖ³«»Ï¥¢¥É¥ì¥¹
+ 5 range: ¸¡º÷ÂоÝʸ»úÎó¤Î¸¡º÷ÀèÆ¬°ÌÃÖ½ªÃ¼¥¢¥É¥ì¥¹
+ Á°Êýõº÷ (start <= õº÷¤µ¤ì¤ëʸ»úÎó¤ÎÀèÆ¬ < range)
+ ¸åÊýõº÷ (range <= õº÷¤µ¤ì¤ëʸ»úÎó¤ÎÀèÆ¬ <= start)
+ 6 region: ¥Þ¥Ã¥ÁÎΰè¾ðÊó(region) (NULL¤âµö¤µ¤ì¤ë)
+ 7 option: ¸¡º÷»þ¥ª¥×¥·¥ç¥ó
+
+ ONIG_OPTION_NOTBOL ʸ»úÎó¤ÎÀèÆ¬(str)¤ò¹ÔƬ¤È´ÇÐö¤µ¤Ê¤¤
+ ONIG_OPTION_NOTEOL ʸ»úÎó¤Î½ªÃ¼(end)¤ò¹ÔËö¤È´ÇÐö¤µ¤Ê¤¤
+ ONIG_OPTION_POSIX_REGION region°ú¿ô¤òPOSIX API¤Îregmatch_t[]¤Ë¤¹¤ë
+
+
+# int onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at,
+ OnigRegion* region, OnigOptionType option)
+
+ ʸ»úÎó¤Î»ØÄê°ÌÃ֤ǥޥåÁ¥ó¥°¤ò¹Ô¤¤¡¢·ë²Ì¤È¥Þ¥Ã¥ÁÎΰè¤òÊÖ¤¹¡£
+
+ Àµ¾ï½ªÎ»Ìá¤êÃÍ: ¥Þ¥Ã¥Á¤·¤¿¥Ð¥¤¥ÈĹ (>= 0)
+ not match: ONIG_MISMATCH ( < 0)
+
+ °ú¿ô
+ 1 reg: Àµµ¬É½¸½¥ª¥Ö¥¸¥§¥¯¥È
+ 2 str: ¸¡º÷ÂоÝʸ»úÎó
+ 3 end: ¸¡º÷ÂоÝʸ»úÎó¤Î½ªÃ¼¥¢¥É¥ì¥¹
+ 4 at: ¸¡º÷ÂоÝʸ»úÎó¤Î¸¡º÷¥¢¥É¥ì¥¹
+ 5 region: ¥Þ¥Ã¥ÁÎΰè¾ðÊó(region) (NULL¤âµö¤µ¤ì¤ë)
+ 6 option: ¸¡º÷»þ¥ª¥×¥·¥ç¥ó
+
+ ONIG_OPTION_NOTBOL ʸ»úÎó¤ÎÀèÆ¬(str)¤ò¹ÔƬ¤È´ÇÐö¤µ¤Ê¤¤
+ ONIG_OPTION_NOTEOL ʸ»úÎó¤Î½ªÃ¼(end)¤ò¹ÔËö¤È´ÇÐö¤µ¤Ê¤¤
+ ONIG_OPTION_POSIX_REGION region°ú¿ô¤òPOSIX API¤Îregmatch_t[]¤Ë¤¹¤ë
+
+
+# OnigRegion* onig_region_new(void)
+
+ ¥Þ¥Ã¥ÁÎΰè¾ðÊó(region)¤òºîÀ®¤¹¤ë¡£
+
+
+# void onig_region_free(OnigRegion* region, int free_self)
+
+ ¥Þ¥Ã¥ÁÎΰè¾ðÊó(region)¤Ç»ÈÍѤµ¤ì¤Æ¤¤¤ë¥á¥â¥ê¤ò²òÊü¤¹¤ë¡£
+
+ °ú¿ô
+ 1 region: ¥Þ¥Ã¥ÁÎΰè¾ðÊ󥪥֥¸¥§¥¯¥È
+ 2 free_self: [1: region¼«¿È¤ò´Þ¤á¤ÆÁ´¤Æ²òÊü, 0: region¼«¿È¤Ï²òÊü¤·¤Ê¤¤]
+
+
+# void onig_region_copy(OnigRegion* to, OnigRegion* from)
+
+ ¥Þ¥Ã¥ÁÎΰè¾ðÊó(region)¤òÊ£À½¤¹¤ë¡£
+
+ °ú¿ô
+ 1 to: ÂоÝÎΰè
+ 2 from: ¸µÎΰè
+
+
+# void onig_region_clear(OnigRegion* region)
+
+ ¥Þ¥Ã¥ÁÎΰè¾ðÊó(region)¤ÎÃæÌ£¤ò¥¯¥ê¥¢¤¹¤ë¡£
+
+ °ú¿ô
+ 1 region: ÂоÝÎΰè
+
+
+# int onig_region_resize(OnigRegion* region, int n)
+
+ ¥Þ¥Ã¥ÁÎΰè¾ðÊó(region)¤ÎÊá³Í¼°½¸¹ç(¥°¥ë¡¼¥×)¿ô¤òÊѹ¹¤¹¤ë¡£
+
+ Àµ¾ï½ªÎ»Ìá¤êÃÍ: ONIG_NORMAL
+
+ °ú¿ô
+ 1 region: ÂоÝÎΰè
+ 2 n: ¿·¤·¤¤¥µ¥¤¥º
+
+
+# int onig_name_to_group_numbers(regex_t* reg, const UChar* name, const UChar* name_end,
+ int** num_list)
+
+ »ØÄꤷ¤¿Ì¾Á°¤ËÂФ¹¤ë̾Á°ÉÕ¤­Êá³Í¼°½¸¹ç(¥°¥ë¡¼¥×)¤Î
+ ¥°¥ë¡¼¥×ÈÖ¹æ¥ê¥¹¥È¤òÊÖ¤¹¡£
+ ̾Á°ÉÕ¤­Êá³Í¼°½¸¹ç¤Ï¡¢(?<name>....)¤Ë¤è¤Ã¤ÆÄêµÁ¤Ç¤­¤ë¡£
+
+ Àµ¾ï½ªÎ»Ìá¤êÃÍ: »ØÄꤵ¤ì¤¿Ì¾Á°¤ËÂФ¹¤ë¥°¥ë¡¼¥×¿ô
+ (Îã /(?<x>..)(?<x>..)/ ==> 2)
+ ̾Á°¤ËÂФ¹¤ë¥°¥ë¡¼¥×¤¬Â¸ºß¤·¤Ê¤¤: -1
+
+ °ú¿ô
+ 1 reg: Àµµ¬É½¸½¥ª¥Ö¥¸¥§¥¯¥È
+ 2 name: Êá³Í¼°½¸¹ç(¥°¥ë¡¼¥×)̾
+ 3 name_end: Êá³Í¼°½¸¹ç(¥°¥ë¡¼¥×)̾¤Î½ªÃ¼¥¢¥É¥ì¥¹
+ 4 num_list: ÈÖ¹æ¥ê¥¹¥È¤òÊÖ¤¹¥¢¥É¥ì¥¹
+
+
+# int onig_name_to_backref_number(regex_t* reg, const UChar* name, const UChar* name_end,
+ OnigRegion *region)
+
+ »ØÄꤵ¤ì¤¿Ì¾Á°¤Î¸åÊý»²¾È(\k<name>)¤ËÂФ¹¤ëÊá³Í¼°½¸¹ç(¥°¥ë¡¼¥×)¤ÎÈÖ¹æ¤òÊÖ¤¹¡£
+ ̾Á°¤ËÂФ·¤Æ¡¢Ê£¿ô¤Î¥Þ¥Ã¥ÁÎΰ褬ͭ¸ú¤Ç¤¢¤ì¤Ð¡¢¤½¤ÎÃæ¤ÎºÇÂç¤ÎÈÖ¹æ¤òÊÖ¤¹¡£
+ ̾Á°¤ËÂФ¹¤ëÊá³Í¼°½¸¹ç¤¬°ì¸Ä¤·¤«¤Ê¤¤¤È¤­¤Ë¤Ï¡¢Âбþ¤¹¤ë¥Þ¥Ã¥ÁÎΰ褬ͭ¸ú¤«
+ ¤É¤¦¤«¤Ë´Ø·¸¤Ê¤¯¡¢¤½¤ÎÈÖ¹æ¤òÊÖ¤¹¡£(½¾¤Ã¤Æ¡¢region¤Ë¤ÏNULL¤òÅϤ·¤Æ¤â¤è¤¤¡£)
+
+ Àµ¾ï½ªÎ»Ìá¤êÃÍ: ÈÖ¹æ
+
+ °ú¿ô
+ 1 reg: Àµµ¬É½¸½¥ª¥Ö¥¸¥§¥¯¥È
+ 2 name: Êá³Í¼°½¸¹ç(¥°¥ë¡¼¥×)̾
+ 3 name_end: Êá³Í¼°½¸¹ç(¥°¥ë¡¼¥×)̾¤Î½ªÃ¼¥¢¥É¥ì¥¹
+ 4 region: search/match·ë²Ì¤Î¥Þ¥Ã¥ÁÎΰè
+
+
+# int onig_foreach_name(regex_t* reg,
+ int (*func)(const UChar*, const UChar*, int,int*,regex_t*,void*),
+ void* arg)
+
+ Á´¤Æ¤Î̾Á°¤ËÂФ·¤Æ¥³¡¼¥ë¥Ð¥Ã¥¯´Ø¿ô¸Æ¤Ó½Ð¤·¤ò¼Â¹Ô¤¹¤ë¡£
+
+ Àµ¾ï½ªÎ»Ìá¤êÃÍ: 0
+ ¥¨¥é¡¼: ¥³¡¼¥ë¥Ð¥Ã¥¯´Ø¿ô¤ÎÌá¤êÃÍ
+
+ °ú¿ô
+ 1 reg: Àµµ¬É½¸½¥ª¥Ö¥¸¥§¥¯¥È
+ 2 func: ¥³¡¼¥ë¥Ð¥Ã¥¯´Ø¿ô
+ func(name, name_end, <number of groups>, <group number's list>,
+ reg, arg);
+
+ func¤¬0°Ê³°¤ÎÃͤòÊÖ¤¹¤È¡¢¤½¤ì°Ê¹ß¤Î¥³¡¼¥ë¥Ð¥Ã¥¯¤Ï¹Ô¤Ê¤ï¤º¤Ë
+ ½ªÎ»¤¹¤ë¡£
+
+ 3 arg: func¤ËÂФ¹¤ëÄɲðú¿ô
+
+
+# int onig_number_of_names(regex_t* reg)
+
+ ¥Ñ¥¿¡¼¥óÃæ¤ÇÄêµÁ¤µ¤ì¤¿Ì¾Á°¤Î¿ô¤òÊÖ¤¹¡£
+ °ì¸Ä¤Î̾Á°¤Î¿½ÅÄêµÁ¤Ï°ì¸Ä¤È´ÇÐö¤¹¡£
+
+ °ú¿ô
+ 1 reg: Àµµ¬É½¸½¥ª¥Ö¥¸¥§¥¯¥È
+
+
+# OnigEncoding onig_get_encoding(regex_t* reg)
+# OnigOptionType onig_get_options(regex_t* reg)
+# OnigAmbigType onig_get_ambig_flag(regex_t* reg)
+# OnigSyntaxType* onig_get_syntax(regex_t* reg)
+
+ Àµµ¬É½¸½¥ª¥Ö¥¸¥§¥¯¥È¤ËÂФ·¤Æ¡¢Âбþ¤¹¤ëÃͤòÊÖ¤¹¡£
+
+ °ú¿ô
+ 1 reg: Àµµ¬É½¸½¥ª¥Ö¥¸¥§¥¯¥È
+
+
+# int onig_number_of_captures(regex_t* reg)
+
+ ¥Ñ¥¿¡¼¥óÃæ¤ÇÄêµÁ¤µ¤ì¤¿Êá³Í¥°¥ë¡¼¥×¤Î¿ô¤òÊÖ¤¹¡£
+
+ °ú¿ô
+ 1 reg: Àµµ¬É½¸½¥ª¥Ö¥¸¥§¥¯¥È
+
+
+# int onig_number_of_capture_histories(regex_t* reg)
+
+ ¥Ñ¥¿¡¼¥óÃæ¤ÇÄêµÁ¤µ¤ì¤¿Êá³ÍÍúÎò(?@...)¤Î¿ô¤òÊÖ¤¹¡£
+
+ »ÈÍѤ¹¤ëʸˡ¤ÇÊá³ÍÍúÎòµ¡Ç½¤¬Í­¸ú(ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY)
+ ¤Ç¤Ê¤±¤ì¤Ð¡¢Êá³ÍÍúÎòµ¡Ç½¤Ï»ÈÍѤǤ­¤Ê¤¤¡£
+
+ °ú¿ô
+ 1 reg: Àµµ¬É½¸½¥ª¥Ö¥¸¥§¥¯¥È
+
+
+# OnigCaptureTreeNode* onig_get_capture_tree(OnigRegion* region)
+
+ Êá³ÍÍúÎò¥Ç¡¼¥¿¤Î¥ë¡¼¥È¥Î¡¼¥É¤òÊÖ¤¹¡£
+
+ ¥Þ¥Ã¥Á¤¬¼ºÇÔ¤·¤Æ¤¤¤ë¾ì¹ç¤Ë¤Ï¡¢¤³¤ÎÃͤÏÉÔÄê¤Ç¤¢¤ë¡£
+
+ °ú¿ô
+ 1 region: ¥Þ¥Ã¥ÁÎΰè
+
+
+# int onig_capture_tree_traverse(OnigRegion* region, int at,
+ int(*func)(int,int,int,int,int,void*), void* arg)
+
+ Êá³ÍÍúÎò¥Ç¡¼¥¿ÌÚ¤ò½ä²ó¤·¤Æ¥³¡¼¥ë¥Ð¥Ã¥¯¤¹¤ë¡£
+
+ Àµ¾ï½ªÎ»Ìá¤êÃÍ: 0
+ ¥¨¥é¡¼: ¥³¡¼¥ë¥Ð¥Ã¥¯´Ø¿ô¤ÎÌá¤êÃÍ
+
+ °ú¿ô
+ 1 region: ¥Þ¥Ã¥ÁÎΰè
+ 2 at: ¥³¡¼¥ë¥Ð¥Ã¥¯¤ò¹Ô¤Ê¤¦¥¿¥¤¥ß¥ó¥°
+
+ ONIG_TRAVERSE_CALLBACK_AT_FIRST:
+ ºÇ½é¤Ë¥³¡¼¥ë¥Ð¥Ã¥¯¤·¤Æ¡¢»Ò¥Î¡¼¥É¤ò½ä²ó
+ ONIG_TRAVERSE_CALLBACK_AT_LAST:
+ »Ò¥Î¡¼¥É¤ò½ä²ó¤·¤Æ¡¢¥³¡¼¥ë¥Ð¥Ã¥¯
+ ONIG_TRAVERSE_CALLBACK_AT_BOTH:
+ ºÇ½é¤Ë¥³¡¼¥ë¥Ð¥Ã¥¯¤·¤Æ¡¢»Ò¥Î¡¼¥É¤ò½ä²ó¡¢ºÇ¸å¤Ë¤â¤¦°ìÅÙ¥³¡¼¥ë¥Ð¥Ã¥¯
+
+ 3 func: ¥³¡¼¥ë¥Ð¥Ã¥¯´Ø¿ô
+ func¤¬0°Ê³°¤ÎÃͤòÊÖ¤¹¤È¡¢¤½¤ì°Ê¹ß¤Î½ä²ó¤Ï¹Ô¤Ê¤ï¤º¤Ë
+ ½ªÎ»¤¹¤ë¡£
+
+ int func(int group, int beg, int end, int level, int at,
+ void* arg)
+ group: ¥°¥ë¡¼¥×ÈÖ¹æ
+ beg: ¥Þ¥Ã¥Á³«»Ï°ÌÃÖ
+ end ¥Þ¥Ã¥Á½ªÎ»°ÌÃÖ
+ level: ¥Í¥¹¥È¥ì¥Ù¥ë (0¤«¤é)
+ at: ¥³¡¼¥ë¥Ð¥Ã¥¯¤¬¸Æ¤Ó½Ð¤µ¤ì¤¿¥¿¥¤¥ß¥ó¥°
+ ONIG_TRAVERSE_CALLBACK_AT_FIRST
+ ONIG_TRAVERSE_CALLBACK_AT_LAST
+ arg: Äɲðú¿ô
+
+ 4 arg; func¤ËÂФ¹¤ëÄɲðú¿ô
+
+
+# int onig_noname_group_capture_is_active(regex_t* reg)
+
+ ̾Á°¤Ê¤·¼°½¸¹ç¤ÎÊá³Íµ¡Ç½¤¬Í­¸ú¤«¤É¤¦¤«¤òÊÖ¤¹¡£
+
+ Í­¸ú: 1
+ ̵¸ú: 0
+
+ °ú¿ô
+ 1 reg: Àµµ¬É½¸½¥ª¥Ö¥¸¥§¥¯¥È
+
+
+ ¥ª¥×¥·¥ç¥ó¤ÎONIG_OPTION_DONT_CAPTURE_GROUP¤¬ON --> ̵¸ú
+
+ ¥Ñ¥¿¡¼¥ó¤¬Ì¾Á°¤Ä¤­¼°½¸¹ç¤ò»ÈÍѤ·¤Æ¤¤¤ë
+ AND »ÈÍÑʸˡ¤Ç¡¢ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP¤¬ON
+ AND ¥ª¥×¥·¥ç¥ó¤ÎONIG_OPTION_CAPTURE_GROUP¤¬OFF
+ --> ̵¸ú
+
+ ¾åµ­°Ê³°¤Î¾ì¹ç --> Í­¸ú
+
+
+# UChar* onigenc_get_prev_char_head(OnigEncoding enc, const UChar* start, const UChar* s)
+
+ ʸ»ú°ì¸ÄʬÁ°¤Îʸ»úÎó°ÌÃÖ¤òÊÖ¤¹¡£
+
+ °ú¿ô
+ 1 enc: ʸ»ú¥¨¥ó¥³¡¼¥Ç¥£¥ó¥°
+ 2 start: ʸ»úÎó¤ÎÀèÆ¬¥¢¥É¥ì¥¹
+ 3 s: ʸ»úÎóÃæ¤Î°ÌÃÖ
+
+
+# UChar* onigenc_get_left_adjust_char_head(OnigEncoding enc,
+ const UChar* start, const UChar* s)
+
+ ʸ»ú¤ÎÀèÆ¬¥Ð¥¤¥È°ÌÃ֤ˤʤë¤è¤¦¤Ëº¸Â¦¤ËÄ´À°¤·¤¿¥¢¥É¥ì¥¹¤òÊÖ¤¹¡£
+
+ °ú¿ô
+ 1 enc: ʸ»ú¥¨¥ó¥³¡¼¥Ç¥£¥ó¥°
+ 2 start: ʸ»úÎó¤ÎÀèÆ¬¥¢¥É¥ì¥¹
+ 3 s: ʸ»úÎóÃæ¤Î°ÌÃÖ
+
+
+# UChar* onigenc_get_right_adjust_char_head(OnigEncoding enc,
+ const UChar* start, const UChar* s)
+
+ ʸ»ú¤ÎÀèÆ¬¥Ð¥¤¥È°ÌÃ֤ˤʤë¤è¤¦¤Ë±¦Â¦¤ËÄ´À°¤·¤¿¥¢¥É¥ì¥¹¤òÊÖ¤¹¡£
+
+ °ú¿ô
+ 1 enc: ʸ»ú¥¨¥ó¥³¡¼¥Ç¥£¥ó¥°
+ 2 start: ʸ»úÎó¤ÎÀèÆ¬¥¢¥É¥ì¥¹
+ 3 s: ʸ»úÎóÃæ¤Î°ÌÃÖ
+
+
+# int onigenc_strlen(OnigEncoding enc, const UChar* s, const UChar* end)
+# int onigenc_strlen_null(OnigEncoding enc, const UChar* s)
+
+ ʸ»úÎó¤Îʸ»ú¿ô¤òÊÖ¤¹¡£
+
+
+# int onigenc_str_bytelen_null(OnigEncoding enc, const UChar* s)
+
+ ʸ»úÎó¤Î¥Ð¥¤¥È¿ô¤òÊÖ¤¹¡£
+
+
+# int onig_set_default_syntax(OnigSyntaxType* syntax)
+
+ ¥Ç¥Õ¥©¥ë¥È¤ÎÀµµ¬É½¸½¥Ñ¥¿¡¼¥óʸˡ¤ò¥»¥Ã¥È¤¹¤ë¡£
+
+ °ú¿ô
+ 1 syntax: Àµµ¬É½¸½¥Ñ¥¿¡¼¥óʸˡ
+
+
+# void onig_copy_syntax(OnigSyntaxType* to, OnigSyntaxType* from)
+
+ Àµµ¬É½¸½¥Ñ¥¿¡¼¥óʸˡ¤ò¥³¥Ô¡¼¤¹¤ë¡£
+
+ °ú¿ô
+ 1 to: ÂоÝ
+ 2 from: ¸µ
+
+
+# unsigned int onig_get_syntax_op(OnigSyntaxType* syntax)
+# unsigned int onig_get_syntax_op2(OnigSyntaxType* syntax)
+# unsigned int onig_get_syntax_behavior(OnigSyntaxType* syntax)
+# OnigOptionType onig_get_syntax_options(OnigSyntaxType* syntax)
+
+# void onig_set_syntax_op(OnigSyntaxType* syntax, unsigned int op)
+# void onig_set_syntax_op2(OnigSyntaxType* syntax, unsigned int op2)
+# void onig_set_syntax_behavior(OnigSyntaxType* syntax, unsigned int behavior)
+# void onig_set_syntax_options(OnigSyntaxType* syntax, OnigOptionType options)
+
+ Àµµ¬É½¸½¥Ñ¥¿¡¼¥óʸˡ¤ÎÍ×ÁǤò»²¾È/¼èÆÀ¤¹¤ë¡£
+
+ °ú¿ô
+ 1 syntax: Àµµ¬É½¸½¥Ñ¥¿¡¼¥óʸˡ
+ 2 op, op2, behavior, options: Í×ÁǤÎÃÍ
+
+
+# void onig_copy_encoding(OnigEncoding to, OnigOnigEncoding from)
+
+ ʸ»ú¥¨¥ó¥³¡¼¥Ç¥£¥ó¥°¤ò¥³¥Ô¡¼¤¹¤ë¡£
+
+ °ú¿ô
+ 1 to: ÂоÝ
+ 2 from: ¸µ
+
+
+# int onig_set_meta_char(OnigEncoding enc, unsigned int what,
+ OnigCodePoint code)
+
+ ¥á¥¿Ê¸»ú¤ò»ØÄꤷ¤¿¥³¡¼¥É¥Ý¥¤¥ó¥ÈÃͤ˥»¥Ã¥È¤¹¤ë¡£
+ ONIG_SYN_OP_VARIABLE_META_CHARACTERS¤¬Àµµ¬É½¸½¥Ñ¥¿¡¼¥óʸˡ¤ÇÍ­¸ú¤Ë
+ ¤Ê¤Ã¤Æ¤¤¤Ê¤¤¾ì¹ç¤Ë¤Ï¡¢¥¨¥¹¥±¡¼¥×ʸ»ú¤ò½ü¤¤¤Æ¡¢¤³¤³¤Ç»ØÄꤷ¤¿¥á¥¿Ê¸»ú¤Ï
+ µ¡Ç½¤·¤Ê¤¤¡£(Áȹþ¤ß¤Îʸˡ¤Ç¤ÏÍ­¸ú¤Ë¤·¤Æ¤¤¤Ê¤¤¡£)
+
+ Àµ¾ï½ªÎ»Ìá¤êÃÍ: ONIG_NORMAL
+
+ °ú¿ô
+ 1 enc: ÂоÝʸ»ú¥¨¥ó¥³¡¼¥Ç¥£¥ó¥°
+ 2 what: ¥á¥¿Ê¸»úµ¡Ç½¤Î»ØÄê
+
+ ONIG_META_CHAR_ESCAPE
+ ONIG_META_CHAR_ANYCHAR
+ ONIG_META_CHAR_ANYTIME
+ ONIG_META_CHAR_ZERO_OR_ONE_TIME
+ ONIG_META_CHAR_ONE_OR_MORE_TIME
+ ONIG_META_CHAR_ANYCHAR_ANYTIME
+
+ 3 code: ¥á¥¿Ê¸»ú¤Î¥³¡¼¥É¥Ý¥¤¥ó¥È ¤Þ¤¿¤Ï ONIG_INEFFECTIVE_META_CHAR.
+
+
+# OnigAmbigType onig_get_default_ambig_flag()
+
+ ¥Ç¥Õ¥©¥ë¥È¤ÎÛ£Ëæ¥Þ¥Ã¥Á¥Õ¥é¥°¤ò¼èÆÀ¤¹¤ë¡£
+
+
+# int onig_set_default_ambig_flag(OnigAmbigType ambig_flag)
+
+ ¥Ç¥Õ¥©¥ë¥È¤ÎÛ£Ëæ¥Þ¥Ã¥Á¥Õ¥é¥°¤ò¥»¥Ã¥È¤¹¤ë¡£
+
+ °ú¿ô
+ 1 ambig_flag: Û£Ëæ¥Þ¥Ã¥Á¥Õ¥é¥°
+
+
+# unsigned int onig_get_match_stack_limit_size(void)
+
+ ¥Þ¥Ã¥Á¥¹¥¿¥Ã¥¯¥µ¥¤¥º¤ÎºÇÂçÃͤòÊÖ¤¹¡£
+ (¥Ç¥Õ¥©¥ë¥È: 0 == ̵À©¸Â)
+
+
+# int onig_set_match_stack_limit_size(unsigned int size)
+
+ ¥Þ¥Ã¥Á¥¹¥¿¥Ã¥¯¥µ¥¤¥º¤ÎºÇÂçÃͤò»ØÄꤹ¤ë¡£
+ (size = 0: ̵À©¸Â)
+
+ Àµ¾ï½ªÎ»Ìá¤êÃÍ: ONIG_NORMAL
+
+
+# int onig_end(void)
+
+ ¥é¥¤¥Ö¥é¥ê¤Î»ÈÍѤò½ªÎ»¤¹¤ë¡£
+
+ Àµ¾ï½ªÎ»Ìá¤êÃÍ: ONIG_NORMAL
+
+ onig_init()¤òºÆÅٸƤӽФ·¤Æ¤â¡¢°ÊÁ°¤ËºîÀ®¤·¤¿Àµµ¬É½¸½¥ª¥Ö¥¸¥§¥¯¥È
+ ¤ò»ÈÍѤ¹¤ë¤³¤È¤Ï¤Ç¤­¤Ê¤¤¡£
+
+
+# const char* onig_version(void)
+
+ ¥Ð¡¼¥¸¥ç¥óʸ»úÎó¤òÊÖ¤¹¡£(Îã "2.2.8")
+
+// END
diff --git a/ext/mbstring/oniguruma/doc/FAQ b/ext/mbstring/oniguruma/doc/FAQ
new file mode 100644
index 000000000..1621a359e
--- /dev/null
+++ b/ext/mbstring/oniguruma/doc/FAQ
@@ -0,0 +1,33 @@
+FAQ 2006/05/15
+
+1. Lognest match
+
+ You can execute longest match by using ONIG_OPTION_FIND_LONGEST option
+ in onig_new().
+
+
+2. Thread safe
+
+ In order to make thread safe, which of (A) or (B) must be done.
+
+ (A) Oniguruma Layer
+
+ Define the macro below at NOT_RUBY case in oniguruma/regint.h.
+
+ USE_MULTI_THREAD_SYSTEM
+ THREAD_ATOMIC_START
+ THREAD_ATOMIC_END
+ THREAD_PASS
+
+ (B) Application Layer
+
+ The plural threads should not do simultaneously that making
+ new regexp objects or re-compiling objects or freeing objects,
+ even if these objects are differ.
+
+
+3. Mailing list
+
+ There is no mailing list about Oniguruma.
+
+// END
diff --git a/ext/mbstring/oniguruma/doc/FAQ.ja b/ext/mbstring/oniguruma/doc/FAQ.ja
new file mode 100644
index 000000000..5f61b0955
--- /dev/null
+++ b/ext/mbstring/oniguruma/doc/FAQ.ja
@@ -0,0 +1,115 @@
+FAQ 2006/05/15
+
+1. ºÇĹ¥Þ¥Ã¥Á
+
+ onig_new()¤ÎÃæ¤Ç¡¢ONIG_OPTION_FIND_LONGEST¥ª¥×¥·¥ç¥ó
+ ¤ò»ÈÍѤ¹¤ì¤ÐºÇĹ¥Þ¥Ã¥Á¤Ë¤Ê¤ë¡£
+
+
+2. ¥¹¥ì¥Ã¥É¥»¡¼¥Õ
+
+ ¥¹¥ì¥Ã¥É¥»¡¼¥Õ¤Ë¤¹¤ë¤Ë¤Ï¡¢°Ê²¼¤Î(A)¤È(B)¤Î¤É¤Á¤é¤«¤ò¹Ô¤Ê¤¨¤Ð
+ ¤è¤¤¡£
+
+ (A) Oniguruma Layer
+
+ oniguruma/regint.h¤ÎÃæ¤ÎNOT_RUBY¤ÎÉôʬ¤Î°Ê²¼¤Î¥Þ¥¯¥í¤òÄêµÁ¤¹¤ë¡£
+
+ USE_MULTI_THREAD_SYSTEM
+ THREAD_ATOMIC_START
+ THREAD_ATOMIC_END
+ THREAD_PASS
+
+ (B) Application Layer
+
+ Ʊ»þ¤ËÊ£¿ô¤Î¥¹¥ì¥Ã¥É¤¬¡¢Àµµ¬É½¸½¥ª¥Ö¥¸¥§¥¯¥È¤òºîÀ®¤¹¤ë¡¢
+ ¤Þ¤¿¤Ï²òÊü¤¹¤ë¡¢¤³¤È¤ò¹Ô¤Ê¤Ã¤Æ¤Ï¤Ê¤é¤Ê¤¤¡£
+ ¤½¤ì¤é¤Î¥ª¥Ö¥¸¥§¥¯¥È¤¬Á´¤¯Ê̤Τâ¤Î¤Ç¤¢¤Ã¤Æ¤â¡£
+
+ ¤â¤¦¾¯¤·¾Ü¤·¤¤ÀâÌÀ¤Ï¡¢¤³¤Î¥É¥­¥å¥á¥ó¥È¤ÎÃæ¤Î
+ "¥¹¥ì¥Ã¥É¥»¡¼¥Õ¤Ë´Ø¤¹¤ëÊä­"¤Ë½ñ¤¤¤Æ¤ª¤¤¤¿¡£
+
+
+3. ¥á¡¼¥ê¥ó¥°¥ê¥¹¥È
+
+ µ´¼Ö¤Ë´Ø¤¹¤ë¥á¡¼¥ê¥ó¥°¥ê¥¹¥È¤Ï¸ºß¤·¤Ê¤¤¡£
+
+//END
+
+
+
+¥¹¥ì¥Ã¥É¥»¡¼¥Õ¤Ë´Ø¤¹¤ëÊä­
+
+¥¹¥ì¥Ã¥É¥»¡¼¥Õ¤Ë¤¹¤ë¤Ë¤Ï¡¢¸ÄÊ̤Υ¢¥×¥ê¥±¡¼¥·¥ç¥ó¤ÎÃæ¤Ç¹Ô¤¦¤«¡¢
+Oniguruma¥é¥¤¥Ö¥é¥ê¤ÎÃæ¤Ç¹Ô¤¦¤«¡¢¤É¤Á¤é¤«¤òÁª¤Ö¤³¤È¤¬¤Ç¤­¤Þ¤¹¡£
+(Oniguruma¤ò»ÈÍѤ¹¤ë¦¤ÇÂн褹¤ë¤«¡¢Oniguruma¤ËÂн褵¤»¤ë¤«
+¤É¤Á¤é¤«ÊÒÊý¤Ç¹Ô¤¦É¬Íפ¬¤¢¤ë¤È¤¤¤¦¤³¤È¤Ç¤¹¡£)
+
+¤³¤ì¤é¤ÎÊýË¡¤Ë¤Ä¤¤¤Æ¡¢°Ê²¼(A)¤È(B)¤ÇÀâÌÀ¤·¤Þ¤¹¡£
+
+¥Þ¥ë¥Á¥¹¥ì¥Ã¥ÉAPI¤Ï¡¢¤½¤ì¤¾¤ì¤Î¥×¥é¥Ã¥È¥Õ¥©¡¼¥à¤Ë¤è¤Ã¤Æ¤â
+°Û¤Ê¤ê¤Þ¤¹¤Î¤Ç¡¢°Ê²¼¤ÎÀâÌÀ¤ÎÃæ¤Ç¶ñÂÎŪ¤Ë²¿¤ò¸Æ¤Ö¤Î¤«¤ò
+½ñ¤¯¤³¤È¤Ï̵Íý¤Ç¤¹¡£¼ÂºÝ¤Ë»ÈÍѤµ¤ì¤ë¥Þ¥ë¥Á¥¹¥ì¥Ã¥ÉAPI¤Ç¡¢
+Âбþ¤¹¤ëµ¡Ç½¤Î¤â¤Î¤ò»ØÄꤷ¤Æ¤¯¤À¤µ¤¤¡£
+
+(A) Oniguruma¤ÎÃæ¤ÇÂбþ¤¹¤ë¾ì¹ç
+
+oniguruma/regint.h¤ÎÃæ¤ÎNOT_RUBY¤Ç°Ï¤Þ¤ì¤Æ¤¤¤ëÉôʬ¤ÎÃæ¤Ç
+°Ê²¼¤Î¥Þ¥¯¥í¤òÄêµÁ¤·¤ÆºÆ¥³¥ó¥Ñ¥¤¥ë¤·¤Æ¤¯¤À¤µ¤¤¡£
+
+USE_MULTI_THREAD_SYSTEM
+
+ ñ¤ËÍ­¸ú¤Ë¤¹¤ì¤Ð¤è¤¤¤Ç¤¹¡£
+
+THREAD_ATOMIC_START
+THREAD_ATOMIC_END
+
+ THREAD_ATOMIC_START¤«¤éTHREAD_ATOMIC_END¤Ç°Ï¤Þ¤ì¤¿
+ ¥×¥í¥°¥é¥à¤Î¥³¡¼¥ÉÉôʬ¤ò¤¢¤ë¥¹¥ì¥Ã¥É¤¬¼Â¹ÔÃæ¤Ë¡¢Â¾¤Î
+ ¥¹¥ì¥Ã¥É¤Ë¼Â¹Ô¸¢¤¬°Üư¤·¤Ê¤¤¤³¤È¤òÊݾ㤹¤ë¤â¤Î¤ËÄêµÁ
+ ¤·¤Æ¤¯¤À¤µ¤¤¡£
+ (̾Á°¤ÎÄ̤ꡢ°Ï¤Þ¤ì¤¿¥³¡¼¥ÉÉôʬ¤ò¥¹¥ì¥Ã¥É¥¢¥È¥ß¥Ã¥¯¤Ë
+ ¤¹¤ë¤È¤¤¤¦°ÕÌ£)
+
+THREAD_PASS
+
+ ¤³¤ì¤ò¼Â¹Ô¤·¤¿¥¹¥ì¥Ã¥É¤«¤é¡¢Â¾¤Î¥¹¥ì¥Ã¥É¤Ë¼Â¹Ô¸¢¤ò°Ñ¾ù
+ ¤¹¤ë¤â¤Î¤ËÄêµÁ¤ò¤·¤Æ¤¯¤À¤µ¤¤¡£(ºÆ¥¹¥±¥¸¥å¡¼¥ë¤ò¸Æ¤Ó½Ð¤¹
+ ¤È¤¤¤¦°ÕÌ£)
+ Âбþ¤¹¤ëµ¡Ç½¤¬Á´¤¯¤Ê¤±¤ì¤Ð¡¢¶õÄêµÁ¤Ë¤·¤Æ¤¯¤À¤µ¤¤¡£
+
+(»²¹ÍÎã)
+Ruby¤Î¾ì¹ç¤òÎã¤Ë¤¹¤ë¤È¡¢
+Ruby¤Ï¼«Ê¬¼«¿È¤ÇÆÈ¼«¤Î¥¹¥ì¥Ã¥Éµ¡Ç½¤ò¼ÂÁõ¤·¤Æ¤¤¤Þ¤¹¡£
+¤½¤Îµ¡Ç½¤ò»ÈÍѤ¹¤ë¤È¡¢°Ê²¼¤Î¤è¤¦¤ËÄêµÁ¤¹¤ì¤Ð¤è¤¤¤³¤È¤Ë
+¤Ê¤ê¤Þ¤¹¡£
+
+#define USE_MULTI_THREAD_SYSTEM
+#define THREAD_ATOMIC_START DEFER_INTS
+#define THREAD_ATOMIC_END ENABLE_INTS
+#define THREAD_PASS rb_thread_schedule()
+
+Ruby¤Î¾ì¹ç¡¢¥¿¥¤¥Þ³ä¤ê¹þ¤ß¤ò»ÈÍѤ·¤Æ¡¢¥¹¥ì¥Ã¥É¤ÎÀÚ¤êÂØ¤¨¤ò
+¹Ô¤Ã¤Æ¤¤¤Þ¤¹¡£DEFER_INTS¤Ï³ä¤ê¹þ¤ß¥Ï¥ó¥É¥é¤Î¼Â¹Ô¤ò°ì»þŪ¤Ë
+»ß¤á¤ë¤¿¤á¤Î¥Þ¥¯¥í¤Ç¤¹¡£ENABLE_INTS¥Þ¥¯¥í¤Ç³ä¤ê¹þ¤ß¥Ï¥ó¥É¥é
+¤Î¼Â¹Ô¤òµö²Ä¤·¤Þ¤¹¡£
+¤³¤ì¤Ë¤è¤Ã¤Æ¡¢THREAD_ATOMIC_START¤«¤éTHREAD_ATOMIC_END
+¤Ç°Ï¤Þ¤ì¤¿Éôʬ¤Î¼Â¹ÔÃæ¤Ë¡¢Â¾¤Î¥¹¥ì¥Ã¥É¤Ë¼Â¹Ô¸¢¤¬°Üư¤·¤Þ¤»¤ó¡£
+
+
+(B) ¥¢¥×¥ê¥±¡¼¥·¥ç¥ó¤ÎÃæ¤ÇÂбþ¤¹¤ë¾ì¹ç
+
+°Ê²¼¤òÊݾ㤹¤ë¤è¤¦¤Ë¡¢¥¹¥ì¥Ã¥É¤Î¼Â¹Ô¤òÀ©¸æ¤·¤Æ¤¯¤À¤µ¤¤¡£
+
+Ʊ»þ¤ËÊ£¿ô¤Î¥¹¥ì¥Ã¥É¤¬¡¢Àµµ¬É½¸½¥ª¥Ö¥¸¥§¥¯¥È¤òºîÀ®¤¹¤ë¡¢¤Þ¤¿¤Ï²òÊü¤¹¤ë¡¢¤³¤È¤ò
+¹Ô¤Ê¤Ã¤Æ¤Ï¤Ê¤é¤Ê¤¤¡£¤½¤ì¤é¤Î¥ª¥Ö¥¸¥§¥¯¥È¤¬Á´¤¯Ê̤Τâ¤Î¤Ç¤¢¤Ã¤Æ¤â¡£
+
+onig_new(), onig_new_deluxe(), onig_free()¤Î¤É¤ì¤«¤Î¸Æ¤Ó½Ð¤·¤ò¡¢
+Ê£¿ô¤Î¥¹¥ì¥Ã¥É¤¬Æ±»þ¤Ë¼Â¹Ô¤¹¤ë¤³¤È¤òÈò¤±¤Æ¤¯¤À¤µ¤¤¡£Æ±»þ¤Ç¤Ê¤±¤ì¤ÐÊ̤ˤ«¤Þ¤¤¤Þ¤»¤ó¡£
+
+¤³¤ì¤Ï²¿¸ÎɬÍפʤΤ«¤È¤¤¤¦¤È¡¢Àµµ¬É½¸½¥ª¥Ö¥¸¥§¥¯¥È¤òºîÀ®¤¹¤ë
+²áÄø¤Ç¡¢ÆâÉô¤Ç¶¦Ä̤˻²¾È¤¹¤ë¥Æ¡¼¥Ö¥ë¤¬¤¢¤ê¤Þ¤¹¡£
+¤³¤Î¥Æ¡¼¥Ö¥ë¤ËÂФ·¤Æ¤Î¥Ç¡¼¥¿ÅÐÏ¿½èÍý¤¬Ê£¿ô¤Î¥¹¥ì¥Ã¥É¤Ç¾×ÆÍ¤·¤Æ
+°Û¾ï¤Ê¾õÂ֤ˤʤé¤Ê¤¤¤¿¤á¤ËɬÍפǤ¹¡£
+
+// END
diff --git a/ext/mbstring/oniguruma/doc/RE b/ext/mbstring/oniguruma/doc/RE
new file mode 100644
index 000000000..5a2783d16
--- /dev/null
+++ b/ext/mbstring/oniguruma/doc/RE
@@ -0,0 +1,412 @@
+Oniguruma Regular Expressions Version 4.3.0 2006/08/17
+
+syntax: ONIG_SYNTAX_RUBY (default)
+
+
+1. Syntax elements
+
+ \ escape (enable or disable meta character meaning)
+ | alternation
+ (...) group
+ [...] character class
+
+
+2. Characters
+
+ \t horizontal tab (0x09)
+ \v vertical tab (0x0B)
+ \n newline (0x0A)
+ \r return (0x0D)
+ \b back space (0x08)
+ \f form feed (0x0C)
+ \a bell (0x07)
+ \e escape (0x1B)
+ \nnn octal char (encoded byte value)
+ \xHH hexadecimal char (encoded byte value)
+ \x{7HHHHHHH} wide hexadecimal char (character code point value)
+ \cx control char (character code point value)
+ \C-x control char (character code point value)
+ \M-x meta (x|0x80) (character code point value)
+ \M-\C-x meta control char (character code point value)
+
+ (* \b is effective in character class [...] only)
+
+
+3. Character types
+
+ . any character (except newline)
+
+ \w word character
+
+ Not Unicode:
+ alphanumeric, "_" and multibyte char.
+
+ Unicode:
+ General_Category -- (Letter|Mark|Number|Connector_Punctuation)
+
+ \W non word char
+
+ \s whitespace char
+
+ Not Unicode:
+ \t, \n, \v, \f, \r, \x20
+
+ Unicode:
+ 0009, 000A, 000B, 000C, 000D, 0085(NEL),
+ General_Category -- Line_Separator
+ -- Paragraph_Separator
+ -- Space_Separator
+
+ \S non whitespace char
+
+ \d decimal digit char
+
+ Unicode: General_Category -- Decimal_Number
+
+ \D non decimal digit char
+
+ \h hexadecimal digit char [0-9a-fA-F]
+
+ \H non hexadecimal digit char
+
+
+4. Quantifier
+
+ greedy
+
+ ? 1 or 0 times
+ * 0 or more times
+ + 1 or more times
+ {n,m} at least n but not more than m times
+ {n,} at least n times
+ {,n} at least 0 but not more than n times ({0,n})
+ {n} n times
+
+ reluctant
+
+ ?? 1 or 0 times
+ *? 0 or more times
+ +? 1 or more times
+ {n,m}? at least n but not more than m times
+ {n,}? at least n times
+ {,n}? at least 0 but not more than n times (== {0,n}?)
+
+ possessive (greedy and does not backtrack after repeated)
+
+ ?+ 1 or 0 times
+ *+ 0 or more times
+ ++ 1 or more times
+
+ ({n,m}+, {n,}+, {n}+ are possessive op. in ONIG_SYNTAX_JAVA only)
+
+ ex. /a*+/ === /(?>a*)/
+
+
+5. Anchors
+
+ ^ beginning of the line
+ $ end of the line
+ \b word boundary
+ \B not word boundary
+ \A beginning of string
+ \Z end of string, or before newline at the end
+ \z end of string
+ \G matching start position (*)
+
+ * Ruby Regexp:
+ previous end-of-match position
+ (This specification is not related to this library.)
+
+
+6. Character class
+
+ ^... negative class (lowest precedence operator)
+ x-y range from x to y
+ [...] set (character class in character class)
+ ..&&.. intersection (low precedence at the next of ^)
+
+ ex. [a-w&&[^c-g]z] ==> ([a-w] AND ([^c-g] OR z)) ==> [abh-w]
+
+ * If you want to use '[', '-', ']' as a normal character
+ in a character class, you should escape these characters by '\'.
+
+
+ POSIX bracket ([:xxxxx:], negate [:^xxxxx:])
+
+ Not Unicode Case:
+
+ alnum alphabet or digit char
+ alpha alphabet
+ ascii code value: [0 - 127]
+ blank \t, \x20
+ cntrl
+ digit 0-9
+ graph include all of multibyte encoded characters
+ lower
+ print include all of multibyte encoded characters
+ punct
+ space \t, \n, \v, \f, \r, \x20
+ upper
+ xdigit 0-9, a-f, A-F
+
+
+ Unicode Case:
+
+ alnum Letter | Mark | Decimal_Number
+ alpha Letter | Mark
+ ascii 0000 - 007F
+ blank Space_Separator | 0009
+ cntrl Control | Format | Unassigned | Private_Use | Surrogate
+ digit Decimal_Number
+ graph [[:^space:]] && ^Control && ^Unassigned && ^Surrogate
+ lower Lowercase_Letter
+ print [[:graph:]] | [[:space:]]
+ punct Connector_Punctuation | Dash_Punctuation | Close_Punctuation |
+ Final_Punctuation | Initial_Punctuation | Other_Punctuation |
+ Open_Punctuation
+ space Space_Separator | Line_Separator | Paragraph_Separator |
+ 0009 | 000A | 000B | 000C | 000D | 0085
+ upper Uppercase_Letter
+ xdigit 0030 - 0039 | 0041 - 0046 | 0061 - 0066
+ (0-9, a-f, A-F)
+
+
+7. Extended groups
+
+ (?#...) comment
+
+ (?imx-imx) option on/off
+ i: ignore case
+ m: multi-line (dot(.) match newline)
+ x: extended form
+ (?imx-imx:subexp) option on/off for subexp
+
+ (?:subexp) not captured group
+ (subexp) captured group
+
+ (?=subexp) look-ahead
+ (?!subexp) negative look-ahead
+ (?<=subexp) look-behind
+ (?<!subexp) negative look-behind
+
+ Subexp of look-behind must be fixed character length.
+ But different character length is allowed in top level
+ alternatives only.
+ ex. (?<=a|bc) is OK. (?<=aaa(?:b|cd)) is not allowed.
+
+ In negative-look-behind, captured group isn't allowed,
+ but shy group(?:) is allowed.
+
+ (?>subexp) atomic group
+ don't backtrack in subexp.
+
+ (?<name>subexp) define named group
+ (All characters of the name must be a word character.
+ And first character must not be a digit or uppper case)
+
+ Not only a name but a number is assigned like a captured
+ group.
+
+ Assigning the same name as two or more subexps is allowed.
+ In this case, a subexp call can not be performed although
+ the back reference is possible.
+
+
+8. Back reference
+
+ \n back reference by group number (n >= 1)
+ \k<name> back reference by group name
+
+ In the back reference by the multiplex definition name,
+ a subexp with a large number is referred to preferentially.
+ (When not matched, a group of the small number is referred to.)
+
+ * Back reference by group number is forbidden if named group is defined
+ in the pattern and ONIG_OPTION_CAPTURE_GROUP is not setted.
+
+
+ back reference with nest level
+
+ (This function is disabled in Ruby 1.9.)
+
+ \k<name+n> n: 0, 1, 2, ...
+ \k<name-n> n: 0, 1, 2, ...
+
+ Destinate relative nest level from back reference position.
+
+ ex 1.
+
+ /\A(?<a>|.|(?:(?<b>.)\g<a>\k<b+0>))\z/.match("reer")
+
+ ex 2.
+
+ r = Regexp.compile(<<'__REGEXP__'.strip, Regexp::EXTENDED)
+ (?<element> \g<stag> \g<content>* \g<etag> ){0}
+ (?<stag> < \g<name> \s* > ){0}
+ (?<name> [a-zA-Z_:]+ ){0}
+ (?<content> [^<&]+ (\g<element> | [^<&]+)* ){0}
+ (?<etag> </ \k<name+1> >){0}
+ \g<element>
+ __REGEXP__
+
+ p r.match('<foo>f<bar>bbb</bar>f</foo>').captures
+
+
+
+9. Subexp call ("Tanaka Akira special")
+
+ \g<name> call by group name
+ \g<n> call by group number (n >= 1)
+
+ * left-most recursive call is not allowed.
+ ex. (?<name>a|\g<name>b) => error
+ (?<name>a|b\g<name>c) => OK
+
+ * Call by group number is forbidden if named group is defined in the pattern
+ and ONIG_OPTION_CAPTURE_GROUP is not setted.
+
+ * If the option status of called group is different from calling position
+ then the group's option is effective.
+
+ ex. (?-i:\g<name>)(?i:(?<name>a)){0} match to "A"
+
+
+10. Captured group
+
+ Behavior of the no-named group (...) changes with the following conditions.
+ (But named group is not changed.)
+
+ case 1. /.../ (named group is not used, no option)
+
+ (...) is treated as a captured group.
+
+ case 2. /.../g (named group is not used, 'g' option)
+
+ (...) is treated as a no-captured group (?:...).
+
+ case 3. /..(?<name>..)../ (named group is used, no option)
+
+ (...) is treated as a no-captured group (?:...).
+ numbered-backref/call is not allowed.
+
+ case 4. /..(?<name>..)../G (named group is used, 'G' option)
+
+ (...) is treated as a captured group.
+ numbered-backref/call is allowed.
+
+ where
+ g: ONIG_OPTION_DONT_CAPTURE_GROUP
+ G: ONIG_OPTION_CAPTURE_GROUP
+
+ ('g' and 'G' options are argued in ruby-dev ML)
+
+ These options are not implemented in Ruby level.
+
+
+-----------------------------
+A-1. Syntax depend options
+
+ + ONIG_SYNTAX_RUBY
+ (?m): dot(.) match newline
+
+ + ONIG_SYNTAX_PERL and ONIG_SYNTAX_JAVA
+ (?s): dot(.) match newline
+ (?m): ^ match after newline, $ match before newline
+
+
+A-2. Original extensions
+
+ + hexadecimal digit char type \h, \H
+ + named group (?<name>...)
+ + named backref \k<name>
+ + subexp call \g<name>, \g<group-num>
+
+
+A-3. Lacked features compare with perl 5.8.0
+
+ + [:word:]
+ + \N{name}
+ + \l,\u,\L,\U, \X, \C
+ + (?{code})
+ + (??{code})
+ + (?(condition)yes-pat|no-pat)
+
+ * \Q...\E
+ This is effective on ONIG_SYNTAX_PERL and ONIG_SYNTAX_JAVA.
+
+ * \p{property}, \P{property}
+ This is effective on ONIG_SYNTAX_PERL and ONIG_SYNTAX_JAVA.
+ Alnum, Alpha, Blank, Cntrl, Digit, Graph, Lower,
+ Print, Punct, Space, Upper, XDigit, ASCII are supported.
+
+ Prefix 'Is' of property name is allowed in ONIG_SYNTAX_PERL only.
+ ex. \p{IsXDigit}.
+
+ Negation operator of property is supported in ONIG_SYNTAX_PERL only.
+ \p{^...}, \P{^...}
+
+
+A-4. Differences with Japanized GNU regex(version 0.12) of Ruby
+
+ + add hexadecimal digit char type (\h, \H)
+ + add look-behind
+ (?<=fixed-char-length-pattern), (?<!fixed-char-length-pattern)
+ + add possessive quantifier. ?+, *+, ++
+ + add operations in character class. [], &&
+ ('[' must be escaped as an usual char in character class.)
+ + add named group and subexp call.
+ + octal or hexadecimal number sequence can be treated as
+ a multibyte code char in character class if multibyte encoding
+ is specified.
+ (ex. [\xa1\xa2], [\xa1\xa7-\xa4\xa1])
+ + allow the range of single byte char and multibyte char in character
+ class.
+ ex. /[a-<<any EUC-JP character>>]/ in EUC-JP encoding.
+ + effect range of isolated option is to next ')'.
+ ex. (?:(?i)a|b) is interpreted as (?:(?i:a|b)), not (?:(?i:a)|b).
+ + isolated option is not transparent to previous pattern.
+ ex. a(?i)* is a syntax error pattern.
+ + allowed incompleted left brace as an usual string.
+ ex. /{/, /({)/, /a{2,3/ etc...
+ + negative POSIX bracket [:^xxxx:] is supported.
+ + POSIX bracket [:ascii:] is added.
+ + repeat of look-ahead is not allowed.
+ ex. /(?=a)*/, /(?!b){5}/
+ + Ignore case option is effective to numbered character.
+ ex. /\x61/i =~ "A"
+ + In the range quantifier, the number of the minimum is omissible.
+ /a{,n}/ == /a{0,n}/
+ The simultanious abbreviation of the number of times of the minimum
+ and the maximum is not allowed. (/a{,}/)
+ + /a{n}?/ is not a non-greedy operator.
+ /a{n}?/ == /(?:a{n})?/
+ + invalid back reference is checked and cause error.
+ /\1/, /(a)\2/
+ + Zero-length match in infinite repeat stops the repeat,
+ then changes of the capture group status are checked as stop condition.
+ /(?:()|())*\1\2/ =~ ""
+ /(?:\1a|())*/ =~ "a"
+
+
+A-5. Disabled functions by default syntax
+
+ + capture history
+
+ (?@...) and (?@<name>...)
+
+ ex. /(?@a)*/.match("aaa") ==> [<0-1>, <1-2>, <2-3>]
+
+ see sample/listcap.c file.
+
+
+A-6. Problems
+
+ + Invalid encoding byte sequence is not checked in UTF-8.
+
+ * Invalid first byte is treated as a character.
+ /./u =~ "\xa3"
+
+ * Incomplete byte sequence is not checked.
+ /\w+/ =~ "a\xf3\x8ec"
+
+// END
diff --git a/ext/mbstring/oniguruma/doc/RE.ja b/ext/mbstring/oniguruma/doc/RE.ja
new file mode 100644
index 000000000..51681715c
--- /dev/null
+++ b/ext/mbstring/oniguruma/doc/RE.ja
@@ -0,0 +1,424 @@
+µ´¼Ö Àµµ¬É½¸½ Version 4.3.0 2006/08/17
+
+»ÈÍÑʸˡ: ONIG_SYNTAX_RUBY (´ûÄêÃÍ)
+
+
+1. ´ðËÜÍ×ÁÇ
+
+ \ ÂàÈò½¤¾þ (¥¨¥¹¥±¡¼¥×) Àµµ¬É½¸½µ­¹æ¤ÎÍ­¸ú/̵¸ú¤ÎÀ©¸æ
+ | ÁªÂò»Ò
+ (...) ¼°½¸¹ç (¥°¥ë¡¼¥×)
+ [...] ʸ»ú½¸¹ç (ʸ»ú¥¯¥é¥¹)
+
+
+2. ʸ»ú
+
+ \t ¿åÊ¿¥¿¥Ö (0x09)
+ \v ¿âľ¥¿¥Ö (0x0B)
+ \n ²þ¹Ô (0x0A)
+ \r Éüµ¢ (0x0D)
+ \b ¸åÂà¶õÇò (0x08)
+ \f ²þÊÇ (0x0C)
+ \a ¾â (0x07)
+ \e ÂàÈò½¤¾þ (0x1B)
+ \nnn Ȭ¿Ê¿ôɽ¸½ É乿²½¥Ð¥¤¥ÈÃÍ(¤Î°ìÉô)
+ \xHH ½½Ï»¿Ê¿ôɽ¸½ É乿²½¥Ð¥¤¥ÈÃÍ(¤Î°ìÉô)
+ \x{7HHHHHHH} ³ÈÄ¥½½Ï»¿Ê¿ôɽ¸½ ¥³¡¼¥É¥Ý¥¤¥ó¥ÈÃÍ
+ \cx À©¸æÊ¸»úɽ¸½ ¥³¡¼¥É¥Ý¥¤¥ó¥ÈÃÍ
+ \C-x À©¸æÊ¸»úɽ¸½ ¥³¡¼¥É¥Ý¥¤¥ó¥ÈÃÍ
+ \M-x Ķ (x|0x80) ¥³¡¼¥É¥Ý¥¤¥ó¥ÈÃÍ
+ \M-\C-x Ķ + À©¸æÊ¸»úɽ¸½ ¥³¡¼¥É¥Ý¥¤¥ó¥ÈÃÍ
+
+ ¢¨ \b¤Ï¡¢Ê¸»ú½¸¹çÆâ¤Ç¤Î¤ßÍ­¸ú
+
+
+3. ʸ»ú¼ï
+
+ . Ǥ°Õʸ»ú (²þ¹Ô¤ò½ü¤¯)
+
+ \w ñ¸ì¹½À®Ê¸»ú
+
+ Unicode°Ê³°¤Î¾ì¹ç:
+ ±Ñ¿ô»ú, "_" ¤ª¤è¤Ó ¿¥Ð¥¤¥Èʸ»ú¡£
+
+ Unicode¤Î¾ì¹ç:
+ General_Category -- (Letter|Mark|Number|Connector_Punctuation)
+
+ \W Èóñ¸ì¹½À®Ê¸»ú
+
+ \s ¶õÇòʸ»ú
+
+ Unicode°Ê³°¤Î¾ì¹ç:
+ \t, \n, \v, \f, \r, \x20
+
+ Unicode¤Î¾ì¹ç:
+ 0009, 000A, 000B, 000C, 000D, 0085(NEL),
+ General_Category -- Line_Separator
+ -- Paragraph_Separator
+ -- Space_Separator
+
+ \S Èó¶õÇòʸ»ú
+
+ \d 10¿Ê¿ô»ú
+
+ Unicode¤Î¾ì¹ç: General_Category -- Decimal_Number
+
+ \D Èó10¿Ê¿ô»ú
+
+ \h 16¿Ê¿ô»ú [0-9a-fA-F]
+
+ \H Èó16¿Ê¿ô»ú
+
+
+
+4. ÎÌ»ØÄê»Ò
+
+ ÍßÄ¥¤ê
+
+ ? °ì²ó¤Þ¤¿¤ÏÎí²ó
+ * Îí²ó°Ê¾å
+ + °ì²ó°Ê¾å
+ {n,m} n²ó°Ê¾åm²ó°Ê²¼
+ {n,} n²ó°Ê¾å
+ {,n} Îí²ó°Ê¾ån²ó°Ê²¼ ({0,n})
+ {n} n²ó
+
+ ̵Íß
+
+ ?? °ì²ó¤Þ¤¿¤ÏÎí²ó
+ *? Îí²ó°Ê¾å
+ +? °ì²ó°Ê¾å
+ {n,m}? n²ó°Ê¾åm²ó°Ê²¼
+ {n,}? n²ó°Ê¾å
+ {,n}? Îí²ó°Ê¾ån²ó°Ê²¼ (== {0,n}?)
+
+ ¶¯Íß (ÍßÄ¥¤ê¤Ç¡¢·«¤êÊÖ¤·¤ËÀ®¸ù¤·¤¿¸å¤Ï²ó¿ô¤ò¸º¤é¤¹¤è¤¦¤Ê¸åÂàºÆ»î¹Ô¤ò¤·¤Ê¤¤)
+
+ ?+ °ì²ó¤Þ¤¿¤ÏÎí²ó
+ *+ Îí²ó°Ê¾å
+ ++ °ì²ó°Ê¾å
+
+ ({n,m}+, {n,}+, {n}+ ¤Ï¡¢ONIG_SYNTAX_JAVA¤Ç¤Î¤ß¶¯ÍߤʻØÄê»Ò)
+
+ Îã. /a*+/ === /(?>a*)/
+
+
+5. ÉÅ
+
+ ^ ¹ÔƬ
+ $ ¹ÔËö
+ \b ñ¸ì¶­³¦
+ \B Èóñ¸ì¶­³¦
+ \A ʸ»úÎóÀèÆ¬
+ \Z ʸ»úÎóËöÈø¡¢¤Þ¤¿¤Ïʸ»úÎóËöÈø¤Î²þ¹Ô¤ÎľÁ°
+ \z ʸ»úÎóËöÈø
+ \G ¾È¹ç³«»Ï°ÌÃÖ(*)
+
+ * Ruby Regexp:
+ Á°²ó¾È¹çÀ®¸ùËöÈø°ÌÃÖ
+ (¤³¤Î»ÅÍͤÏRuby¤Î¼ÂÁõ¤Ë´Ø¤¹¤ë¤â¤Î¤Ç¤¢¤ê¡¢
+ Àµµ¬É½¸½¥é¥¤¥Ö¥é¥ê¤È¤Ï̵´Ø·¸)
+
+
+6. ʸ»ú½¸¹ç
+
+ ^... ÈÝÄê (ºÇÄãÍ¥ÀèÅٱ黻»Ò)
+ x-y ÈÏ°Ï (x¤«¤éy¤Þ¤Ç)
+ [...] ½¸¹ç (ʸ»ú½¸¹çÆâʸ»ú½¸¹ç)
+ ..&&.. Àѱ黻 (^¤Î¼¡¤ËÍ¥ÀèÅÙ¤¬Ä㤤±é»»»Ò)
+
+ Îã. [a-w&&[^c-g]z] ==> ([a-w] and ([^c-g] or z)) ==> [abh-w]
+
+ ¢¨ '[', '-', ']'¤ò¡¢Ê¸»ú½¸¹çÆâ¤ÇÄ̾ïʸ»ú¤Î°ÕÌ£¤Ç»ÈÍѤ·¤¿¤¤¾ì¹ç¤Ë¤Ï¡¢
+ ¤³¤ì¤é¤Îʸ»ú¤ò'\'¤ÇÂàÈò½¤¾þ¤·¤Ê¤±¤ì¤Ð¤Ê¤é¤Ê¤¤¡£
+
+
+ POSIX¥Ö¥é¥±¥Ã¥È ([:xxxxx:], ÈÝÄê [:^xxxxx:])
+
+ Unicode°Ê³°¤Î¾ì¹ç:
+
+ alnum ±Ñ¿ô»ú
+ alpha 񥯣
+ ascii 0 - 127
+ blank \t, \x20
+ cntrl
+ digit 0-9
+ graph ¿¥Ð¥¤¥Èʸ»úÁ´Éô¤ò´Þ¤à
+ lower
+ print ¿¥Ð¥¤¥Èʸ»úÁ´Éô¤ò´Þ¤à
+ punct
+ space \t, \n, \v, \f, \r, \x20
+ upper
+ xdigit 0-9, a-f, A-F
+
+ Unicode¤Î¾ì¹ç:
+
+ alnum Letter | Mark | Decimal_Number
+ alpha Letter | Mark
+ ascii 0000 - 007F
+ blank Space_Separator | 0009
+ cntrl Control | Format | Unassigned | Private_Use | Surrogate
+ digit Decimal_Number
+ graph [[:^space:]] && ^Control && ^Unassigned && ^Surrogate
+ lower Lowercase_Letter
+ print [[:graph:]] | [[:space:]]
+ punct Connector_Punctuation | Dash_Punctuation | Close_Punctuation |
+ Final_Punctuation | Initial_Punctuation | Other_Punctuation |
+ Open_Punctuation
+ space Space_Separator | Line_Separator | Paragraph_Separator |
+ 0009 | 000A | 000B | 000C | 000D | 0085
+ upper Uppercase_Letter
+ xdigit 0030 - 0039 | 0041 - 0046 | 0061 - 0066
+ (0-9, a-f, A-F)
+
+
+7. ³ÈÄ¥¼°½¸¹ç
+
+ (?#...) Ãí¼á
+ (?imx-imx) ¸ÉΩ¥ª¥×¥·¥ç¥ó
+ i: Âçʸ»ú¾®Ê¸»ú¾È¹ç
+ m: Ê£¿ô¹Ô
+ x: ³ÈÄ¥·Á¼°
+ (?imx-imx:¼°) ¼°¥ª¥×¥·¥ç¥ó
+
+ (¼°) Êá³Í¼°½¸¹ç
+ (?:¼°) ÈóÊá³Í¼°½¸¹ç
+
+ (?=¼°) ÀèÆÉ¤ß
+ (?!¼°) ÈÝÄêÀèÆÉ¤ß
+ (?<=¼°) Ìá¤êÆÉ¤ß
+ (?<!¼°) ÈÝÄêÌá¤êÆÉ¤ß
+
+ Ìá¤êÆÉ¤ß¤Î¼°¤Ï¸ÇÄêʸ»úĹ¤Ç¤Ê¤±¤ì¤Ð¤Ê¤é¤Ê¤¤¡£
+ ¤·¤«¤·¡¢ºÇ¾å°Ì¤ÎÁªÂò»Ò¤À¤±¤Ï°Û¤Ê¤Ã¤¿Ê¸»úŤ¬µö¤µ¤ì¤ë¡£
+ Îã. (?<=a|bc) ¤Ïµö²Ä. (?<=aaa(?:b|cd)) ¤ÏÉÔµö²Ä
+
+ ÈÝÄêÌá¤êÆÉ¤ß¤Ç¤Ï¡¢Êá³Í¼°½¸¹ç¤Ïµö¤µ¤ì¤Ê¤¤¤¬¡¢
+ ÈóÊá³Í¼°½¸¹ç¤Ïµö¤µ¤ì¤ë¡£
+
+ (?>¼°) ¸¶»ÒŪ¼°½¸¹ç
+ ¼°Á´ÂΤòÄ̲ᤷ¤¿¤È¤­¡¢¼°¤ÎÃæ¤Ç¤Î¸åÂàºÆ»î¹Ô¤ò¹Ô¤Ê¤ï¤Ê¤¤
+
+ (?<name>¼°) ̾Á°ÉÕ¤­Êá³Í¼°½¸¹ç
+ ¼°½¸¹ç¤Ë̾Á°¤ò³ä¤êÅö¤Æ¤ë(ÄêµÁ¤¹¤ë)¡£
+ (̾Á°¤Ïñ¸ì¹½À®Ê¸»ú¤Ç¤Ê¤±¤ì¤Ð¤Ê¤é¤Ê¤¤¡£ºÇ½é¤Îʸ»ú¤Ï
+ ±ÑÂçʸ»ú¤Ç¤¢¤Ã¤Æ¤Ï¤¤¤±¤Ê¤¤¡£)
+
+ ̾Á°¤À¤±¤Ç¤Ê¤¯¡¢Êá³Í¼°½¸¹ç¤ÈƱÍͤËÈÖ¹æ¤â³ä¤êÅö¤Æ¤é¤ì¤ë¡£
+ ÈÖ¹æ»ØÄ꤬¶Ø»ß¤µ¤ì¤Æ¤¤¤Ê¤¤¾õÂÖ (10. Êá³Í¼°½¸¹ç ¤ò»²¾È)
+ ¤Î¤È¤­¤Ï¡¢Ì¾Á°¤ò»È¤ï¤Ê¤¤¤ÇÈÖ¹æ¤Ç¤â»²¾È¤Ç¤­¤ë¡£
+
+ Ê£¿ô¤Î¼°½¸¹ç¤ËƱ¤¸Ì¾Á°¤òÍ¿¤¨¤ë¤³¤È¤Ïµö¤µ¤ì¤Æ¤¤¤ë¡£
+ ¤³¤Î¾ì¹ç¤Ë¤Ï¡¢¤³¤Î̾Á°¤ò»ÈÍѤ·¤¿¸åÊý»²¾È¤Ï²Äǽ¤Ç¤¢¤ë¤¬¡¢
+ Éôʬ¼°¸Æ½Ð¤·¤Ï¤Ç¤­¤Ê¤¤¡£
+
+
+8. ¸åÊý»²¾È
+
+ \n ÈÖ¹æ»ØÄ껲¾È (n >= 1)
+ \k<name> ̾Á°»ØÄ껲¾È
+
+ ̾Á°»ØÄ껲¾È¤Ç¡¢¤½¤Î̾Á°¤¬Ê£¿ô¤Î¼°½¸¹ç¤Ç¿½ÅÄêµÁ¤µ¤ì¤Æ¤¤¤ë¾ì¹ç¤Ë¤Ï¡¢
+ ÈÖ¹æ¤ÎÂ礭¤¤¼°½¸¹ç¤«¤éÍ¥ÀèŪ¤Ë»²¾È¤µ¤ì¤ë¡£
+ (¥Þ¥Ã¥Á¤·¤Ê¤¤¤È¤­¤Ë¤ÏÈÖ¹æ¤Î¾®¤µ¤¤¼°½¸¹ç¤¬»²¾È¤µ¤ì¤ë)
+
+ ¢¨ ÈÖ¹æ»ØÄ껲¾È¤Ï¡¢Ì¾Á°ÉÕ¤­Êá³Í¼°½¸¹ç¤¬ÄêµÁ¤µ¤ì¡¢
+ ¤«¤Ä ONIG_OPTION_CAPTURE_GROUP¤¬»ØÄꤵ¤ì¤Æ¤¤¤Ê¤¤¾ì¹ç¤Ë¤Ï¡¢
+ ¶Ø»ß¤µ¤ì¤ë¡£(10. Êá³Í¼°½¸¹ç ¤ò»²¾È)
+
+
+ ¥Í¥¹¥È¥ì¥Ù¥ëÉÕ¤­¸åÊý»²¾È
+
+ ¤³¤Îµ¡Ç½¤Ï¸½ºß¡¢Ruby 1.9¤Ç¤Ï̵¸ú¤Ë¤·¤Æ¤¤¤ë¡£
+
+ \k<name+n> n: 0, 1, 2, ...
+ \k<name-n> n: 0, 1, 2, ...
+
+ ¸åÊý»²¾È¤Î°ÌÃÖ¤«¤éÁêÂÐŪ¤ÊÉôʬ¼°¸Æ½Ð¤·¥Í¥¹¥È¥ì¥Ù¥ë¤ò»ØÄꤷ¤Æ¡¢¤½¤Î¥ì¥Ù¥ë¤Ç¤Î
+ Êá³ÍÃͤò»²¾È¤¹¤ë¡£
+
+ Îã-1.
+
+ /\A(?<a>|.|(?:(?<b>.)\g<a>\k<b+0>))\z/.match("reer")
+
+ Îã-2.
+
+ r = Regexp.compile(<<'__REGEXP__'.strip, Regexp::EXTENDED)
+ (?<element> \g<stag> \g<content>* \g<etag> ){0}
+ (?<stag> < \g<name> \s* > ){0}
+ (?<name> [a-zA-Z_:]+ ){0}
+ (?<content> [^<&]+ (\g<element> | [^<&]+)* ){0}
+ (?<etag> </ \k<name+1> >){0}
+ \g<element>
+ __REGEXP__
+
+ p r.match('<foo>f<bar>bbb</bar>f</foo>').captures
+
+
+
+9. Éôʬ¼°¸Æ½Ð¤· ("ÅÄÃæÅ¯¥¹¥Ú¥·¥ã¥ë")
+
+ \g<name> ̾Á°»ØÄê¸Æ½Ð¤·
+ \g<n> ÈÖ¹æ»ØÄê¸Æ½Ð¤· (n >= 1)
+
+ ¢¨ ºÇº¸°ÌÃ֤ǤκƵ¢¸Æ½Ð¤·¤Ï¶Ø»ß¤µ¤ì¤ë¡£
+ Îã. (?<name>a|\g<name>b) => error
+ (?<name>a|b\g<name>c) => OK
+
+ ¢¨ ÈÖ¹æ»ØÄê¸Æ½Ð¤·¤Ï¡¢Ì¾Á°ÉÕ¤­Êá³Í¼°½¸¹ç¤¬ÄêµÁ¤µ¤ì¡¢
+ ¤«¤Ä ONIG_OPTION_CAPTURE_GROUP¤¬»ØÄꤵ¤ì¤Æ¤¤¤Ê¤¤¾ì¹ç¤Ë¤Ï¡¢
+ ¶Ø»ß¤µ¤ì¤ë¡£ (10. Êá³Í¼°½¸¹ç ¤ò»²¾È)
+
+ ¢¨ ¸Æ¤Ó½Ð¤µ¤ì¤¿¼°½¸¹ç¤Î¥ª¥×¥·¥ç¥ó¾õÂÖ¤¬¸Æ½Ð¤·Â¦¤Î¥ª¥×¥·¥ç¥ó¾õÂ֤ȰۤʤäƤ¤¤ë
+ ¤È¤­¡¢¸Æ¤Ó½Ð¤µ¤ì¤¿Â¦¤Î¥ª¥×¥·¥ç¥ó¾õÂÖ¤¬Í­¸ú¤Ç¤¢¤ë¡£
+
+ Îã. (?-i:\g<name>)(?i:(?<name>a)){0} ¤Ï "A" ¤Ë¾È¹çÀ®¸ù¤¹¤ë¡£
+
+
+10. Êá³Í¼°½¸¹ç
+
+ Êá³Í¼°½¸¹ç(...)¤Ï¡¢°Ê²¼¤Î¾ò·ï¤Ë±þ¤¸¤Æ¿¶Éñ¤¬ÊѲ½¤¹¤ë¡£
+ (̾Á°ÉÕ¤­Êá³Í¼°½¸¹ç¤ÏÊѲ½¤·¤Ê¤¤)
+
+ case 1. /.../ (̾Á°ÉÕ¤­Êá³Í¼°½¸¹ç¤ÏÉÔ»ÈÍÑ¡¢¥ª¥×¥·¥ç¥ó¤Ê¤·)
+
+ (...) ¤Ï¡¢Êá³Í¼°½¸¹ç¤È¤·¤Æ°·¤ï¤ì¤ë¡£
+
+ case 2. /.../g (̾Á°ÉÕ¤­Êá³Í¼°½¸¹ç¤ÏÉÔ»ÈÍÑ¡¢¥ª¥×¥·¥ç¥ó 'g'¤ò»ØÄê)
+
+ (...) ¤Ï¡¢ÈóÊá³Í¼°½¸¹ç¤È¤·¤Æ°·¤ï¤ì¤ë¡£
+
+ case 3. /..(?<name>..)../ (̾Á°ÉÕ¤­Êá³Í¼°½¸¹ç¤Ï»ÈÍÑ¡¢¥ª¥×¥·¥ç¥ó¤Ê¤·)
+
+ (...) ¤Ï¡¢ÈóÊá³Í¼°½¸¹ç¤È¤·¤Æ°·¤ï¤ì¤ë¡£
+ ÈÖ¹æ»ØÄ껲¾È/¸Æ¤Ó½Ð¤·¤ÏÉÔµö²Ä¡£
+
+ case 4. /..(?<name>..)../G (̾Á°ÉÕ¤­Êá³Í¼°½¸¹ç¤Ï»ÈÍÑ¡¢¥ª¥×¥·¥ç¥ó 'G'¤ò»ØÄê)
+
+ (...) ¤Ï¡¢Êá³Í¼°½¸¹ç¤È¤·¤Æ°·¤ï¤ì¤ë¡£
+ ÈÖ¹æ»ØÄ껲¾È/¸Æ¤Ó½Ð¤·¤Ïµö²Ä¡£
+
+ ⤷
+ g: ONIG_OPTION_DONT_CAPTURE_GROUP
+ G: ONIG_OPTION_CAPTURE_GROUP
+ ('g'¤È'G'¥ª¥×¥·¥ç¥ó¤Ï¡¢ruby-dev ML¤ÇµÄÏÀ¤µ¤ì¤¿¡£)
+
+ ¤³¤ì¤é¤Î¿¶Éñ¤Î°ÕÌ£¤Ï¡¢
+ ̾Á°ÉÕ¤­Êá³Í¤È̾Á°Ìµ¤·Êá³Í¤òƱ»þ¤Ë»ÈÍѤ¹¤ëɬÁ³À­¤Î¤¢¤ë¾ìÌ̤Ͼ¯¤Ê¤¤¤Ç¤¢¤í¤¦
+ ¤È¤¤¤¦Íýͳ¤«¤é¹Í¤¨¤é¤ì¤¿¤â¤Î¤Ç¤¢¤ë¡£
+ ¤³¤ì¤é¤Î¥ª¥×¥·¥ç¥ó¤Ë¤Ä¤¤¤Æ¤Ï¡¢Ruby¤Ç¤Ï¸½ºß¼ÂÁõ¤µ¤ì¤Æ¤¤¤Ê¤¤¡£
+
+
+-----------------------------
+Êäµ­ 1. ʸˡ°Í¸¥ª¥×¥·¥ç¥ó
+
+ + ONIG_SYNTAX_RUBY
+ (?m): ½ª»ßÉäµ­¹æ(.)¤Ï²þ¹Ô¤È¾È¹çÀ®¸ù
+
+ + ONIG_SYNTAX_PERL ¤È ONIG_SYNTAX_JAVA
+ (?s): ½ª»ßÉäµ­¹æ(.)¤Ï²þ¹Ô¤È¾È¹çÀ®¸ù
+ (?m): ^ ¤Ï²þ¹Ô¤Îľ¸å¤Ë¾È¹ç¤¹¤ë¡¢$ ¤Ï²þ¹Ô¤ÎľÁ°¤Ë¾È¹ç¤¹¤ë
+
+
+Êäµ­ 2. ÆÈ¼«³ÈÄ¥µ¡Ç½
+
+ + 16¿Ê¿ô¿ô»ú¡¢Èó16¿Ê¿ô»ú \h, \H
+ + ̾Á°ÉÕ¤­Êá³Í¼°½¸¹ç (?<name>...)
+ + ̾Á°»ØÄê¸åÊý»²¾È \k<name>
+ + Éôʬ¼°¸Æ½Ð¤· \g<name>, \g<group-num>
+
+
+Êäµ­ 3. Perl 5.8.0¤ÈÈæ³Ó¤·¤ÆÂ¸ºß¤·¤Ê¤¤µ¡Ç½
+
+ + [:word:]
+ + \N{name}
+ + \l,\u,\L,\U, \X, \C
+ + (?{code})
+ + (??{code})
+ + (?(condition)yes-pat|no-pat)
+
+ * \Q...\E
+ ⤷ONIG_SYNTAX_PERL¤ÈONIG_SYNTAX_JAVA¤Ç¤ÏÍ­¸ú
+
+ * \p{property}, \P{property}
+ ⤷ONIG_SYNTAX_PERL¤ÈONIG_SYNTAX_JAVA¤Ç¤ÏÍ­¸ú
+ Alnum, Alpha, Blank, Cntrl, Digit, Graph, Lower,
+ Print, Punct, Space, Upper, XDigit, ASCII¤¬»ØÄê¤Ç¤­¤ë¡£
+
+ ÆÃÀ­Ì¾¤ÎÁ°¤Ë 'Is'Á°ÃÖ»ì¤ò»ÈÍѤ¹¤ë¤³¤È¤Ï¡¢ONIG_SYNTAX_PERL¤Ç¤Î¤ß
+ µö¤µ¤ì¤Æ¤¤¤ë¡£
+ ex. \p{IsXDigit}.
+
+ ÆÃÀ­¤ÎÈÝÄê±é»»»Ò¤Ï¡¢ONIG_SYNTAX_PERL¤Ç¤Î¤ßµö¤µ¤ì¤Æ¤¤¤ë¡£
+ \p{^...}, \P{^...}
+
+
+Êäµ­ 4. Ruby¤ÎÆüËܸ첽 GNU regex(version 0.12)¤È¤Î°ã¤¤
+
+ + 16¿Ê¿ô»ú¥¿¥¤¥×Äɲà (\h, \H)
+ + Ìá¤êÆÉ¤ßµ¡Ç½¤òÄɲÃ
+ + ¶¯Íߤʷ«¤êÊÖ¤·»ØÄê»Ò¤òÄɲà (?+, *+, ++)
+ + ʸ»ú½¸¹ç¤ÎÃæ¤Î±é»»»Ò¤òÄɲà ([...], &&)
+ ('[' ¤Ï¡¢Ê¸»ú½¸¹ç¤ÎÃæ¤ÇÄ̾ï¤Îʸ»ú¤È¤·¤Æ»ÈÍѤ¹¤ë¤È¤­¤Ë¤Ï
+ ÂàÈò½¤¾þ¤·¤Ê¤±¤ì¤Ð¤Ê¤é¤Ê¤¤)
+ + ̾Á°ÉÕ¤­Êá³Í¼°½¸¹ç¤È¡¢Éôʬ¼°¸Æ½Ð¤·µ¡Ç½ÄɲÃ
+ + ¿¥Ð¥¤¥Èʸ»ú¥³¡¼¥É¤¬»ØÄꤵ¤ì¤Æ¤¤¤ë¤È¤­¡¢
+ ʸ»ú½¸¹ç¤ÎÃæ¤ÇȬ¿Ê¿ô¤Þ¤¿¤Ï½½Ï»¿Ê¿ôɽ¸½¤ÎϢ³¤Ï¡¢Â¿¥Ð¥¤¥ÈÉä¹ç¤Çɽ¸½¤µ¤ì¤¿
+ °ì¸Ä¤Îʸ»ú¤È²ò¼á¤µ¤ì¤ë
+ (Îã. [\xa1\xa2], [\xa1\xa7-\xa4\xa1])
+ + ʸ»ú½¸¹ç¤ÎÃæ¤Ç¡¢°ì¥Ð¥¤¥Èʸ»ú¤È¿¥Ð¥¤¥Èʸ»ú¤ÎÈϰϻØÄê¤Ïµö¤µ¤ì¤ë¡£
+ ex. /[a-¤¢]/
+ + ¸ÉΩ¥ª¥×¥·¥ç¥ó¤ÎÍ­¸úÈϰϤϡ¢¤½¤Î¸ÉΩ¥ª¥×¥·¥ç¥ó¤ò´Þ¤ó¤Ç¤¤¤ë¼°½¸¹ç¤Î
+ ½ª¤ï¤ê¤Þ¤Ç¤Ç¤¢¤ë
+ Îã. (?:(?i)a|b) ¤Ï (?:(?i:a|b)) ¤È²ò¼á¤µ¤ì¤ë¡¢(?:(?i:a)|b)¤Ç¤Ï¤Ê¤¤
+ + ¸ÉΩ¥ª¥×¥·¥ç¥ó¤Ï¤½¤ÎÁ°¤Î¼°¤ËÂФ·¤ÆÆ©²áŪ¤Ç¤Ï¤Ê¤¤
+ Îã. /a(?i)*/ ¤Ïʸˡ¥¨¥é¡¼¤È¤Ê¤ë
+ + ÉÔ´°Á´¤Ê·«¤êÊÖ¤·ÈϰϻØÄê»Ò¤ÏÄ̾ï¤Îʸ»úÎó¤È¤·¤Æµö²Ä¤µ¤ì¤ë
+ Îã. /{/, /({)/, /a{2,3/
+ + ÈÝÄêŪPOSIX¥Ö¥é¥±¥Ã¥È [:^xxxx:] ¤òÄɲÃ
+ + POSIX¥Ö¥é¥±¥Ã¥È [:ascii:] ¤òÄɲÃ
+ + ÀèÆÉ¤ß¤Î·«¤êÊÖ¤·¤ÏÉÔµö²Ä
+ Îã. /(?=a)*/, /(?!b){5}/
+ + ¿ôÃͤǻØÄꤵ¤ì¤¿Ê¸»ú¤ËÂФ·¤Æ¤â¡¢Âçʸ»ú¾®Ê¸»ú¾È¹ç¥ª¥×¥·¥ç¥ó¤ÏÍ­¸ú
+ Îã. /\x61/i =~ "A"
+ + ·«¤êÊÖ¤·²ó¿ô»ØÄê¤Ç¡¢ºÇÄã²ó¿ô¤Î¾Êά(0²ó)¤¬¤Ç¤­¤ë
+ /a{,n}/ == /a{0,n}/
+ ºÇÄã²ó¿ô¤ÈºÇÂç²ó¿ô¤ÎƱ»þ¾Êά¤Ïµö¤µ¤ì¤Ê¤¤¡£(/a{,}/)
+ + /a{n}?/¤Ï̵Íߤʱ黻»Ò¤Ç¤Ï¤Ê¤¤¡£
+ /a{n}?/ == /(?:a{n})?/
+ + ̵¸ú¤Ê¸åÊý»²¾È¤ò¥Á¥§¥Ã¥¯¤·¤Æ¥¨¥é¡¼¤Ë¤¹¤ë¡£
+ /\1/, /(a)\2/
+ + ̵¸Â·«¤êÊÖ¤·¤ÎÃæ¤Ç¡¢Ä¹¤µÎí¤Ç¤Î¾È¹çÀ®¸ù¤Ï·«¤êÊÖ¤·¤òÃæÃǤµ¤»¤ë¤¬¡¢
+ ¤³¤Î¤È¤­¡¢ÃæÃǤ¹¤Ù¤­¤«¤É¤¦¤«¤ÎȽÄê¤È¤·¤Æ¡¢Êá³Í¼°½¸¹ç¤ÎÊá³Í¾õÂÖ¤Î
+ ÊѲ½¤Þ¤Ç¹Íθ¤·¤Æ¤¤¤ë
+ /(?:()|())*\1\2/ =~ ""
+ /(?:\1a|())*/ =~ "a"
+
+
+
+Êäµ­ 5. ¼ÂÁõ¤µ¤ì¤Æ¤¤¤ë¤¬¡¢´ûÄêÃͤǤÏÍ­¸ú¤Ë¤·¤Æ¤¤¤Ê¤¤µ¡Ç½
+
+ + Êá³ÍÍúÎò»²¾È
+
+ (?@...) ¤È (?@<name>...)
+
+ Îã. /(?@a)*/.match("aaa") ==> [<0-1>, <1-2>, <2-3>]
+
+ »ÈÍÑÊýË¡¤Ï¡¢sample/listcap.c¤ò»²¾È
+
+ Í­¸ú¤Ë¤·¤Æ¤¤¤Ê¤¤Íýͳ¤Ï¡¢¤É¤ÎÄøÅÙÌò¤ËΩ¤Ä¤«¤Ï¤Ã¤­¤ê¤·¤Ê¤¤¤¿¤á¡£
+
+
+Êäµ­ 6. ÌäÂêÅÀ
+
+ + UTF-8¤Ç¡¢¥Ð¥¤¥ÈÃͤ¬Å¬Àµ¤Ê²Á¤«¤É¤¦¤«¤Î¥Á¥§¥Ã¥¯¤Ï¹Ô¤Ê¤Ã¤Æ¤¤¤Ê¤¤¡£
+
+ * ÀèÆ¬¥Ð¥¤¥È¤È¤·¤ÆÉÔÀµ¤Ê¥Ð¥¤¥È¤ò°ìʸ»ú¤È¤ß¤Ê¤¹
+ /./u =~ "\xa3"
+
+ * ÉÔ´°Á´¤Ê¥Ð¥¤¥È¥·¡¼¥±¥ó¥¹¤Î¥Á¥§¥Ã¥¯¤ò¤·¤Ê¤¤
+ /\w+/ =~ "a\xf3\x8ec"
+
+ ¤³¤ì¤òÄ´¤Ù¤ë¤³¤È¤Ï²Äǽ¤Ç¤Ï¤¢¤ë¤¬¡¢ÃÙ¤¯¤Ê¤ë¤Î¤Ç¹Ô¤Ê¤ï¤Ê¤¤¡£
+
+½ª¤ê
diff --git a/ext/mbstring/oniguruma/enc/big5.c b/ext/mbstring/oniguruma/enc/big5.c
index 763872e96..86792666a 100644
--- a/ext/mbstring/oniguruma/enc/big5.c
+++ b/ext/mbstring/oniguruma/enc/big5.c
@@ -29,7 +29,7 @@
#include "regenc.h"
-static int EncLen_BIG5[] = {
+static const int EncLen_BIG5[] = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
diff --git a/ext/mbstring/oniguruma/enc/euc_jp.c b/ext/mbstring/oniguruma/enc/euc_jp.c
index 5f13e33eb..71c81ee9f 100644
--- a/ext/mbstring/oniguruma/enc/euc_jp.c
+++ b/ext/mbstring/oniguruma/enc/euc_jp.c
@@ -31,7 +31,7 @@
#define eucjp_islead(c) ((UChar )((c) - 0xa1) > 0xfe - 0xa1)
-static int EncLen_EUCJP[] = {
+static const int EncLen_EUCJP[] = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
@@ -158,20 +158,16 @@ eucjp_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
static int
eucjp_is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
- if ((ctype & ONIGENC_CTYPE_WORD) != 0) {
- if (code < 128)
- return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
- else
+ if (code < 128)
+ return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
+ else {
+ if ((ctype & (ONIGENC_CTYPE_WORD |
+ ONIGENC_CTYPE_GRAPH | ONIGENC_CTYPE_PRINT)) != 0) {
return (eucjp_code_to_mbclen(code) > 1 ? TRUE : FALSE);
-
- ctype &= ~ONIGENC_CTYPE_WORD;
- if (ctype == 0) return FALSE;
+ }
}
- if (code < 128)
- return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
- else
- return FALSE;
+ return FALSE;
}
static UChar*
diff --git a/ext/mbstring/oniguruma/enc/euc_kr.c b/ext/mbstring/oniguruma/enc/euc_kr.c
index c1e83b7e6..57bf80153 100644
--- a/ext/mbstring/oniguruma/enc/euc_kr.c
+++ b/ext/mbstring/oniguruma/enc/euc_kr.c
@@ -29,7 +29,7 @@
#include "regenc.h"
-static int EncLen_EUCKR[] = {
+static const int EncLen_EUCKR[] = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
diff --git a/ext/mbstring/oniguruma/enc/euc_tw.c b/ext/mbstring/oniguruma/enc/euc_tw.c
index 4e5851a45..6f396e75e 100644
--- a/ext/mbstring/oniguruma/enc/euc_tw.c
+++ b/ext/mbstring/oniguruma/enc/euc_tw.c
@@ -29,7 +29,7 @@
#include "regenc.h"
-static int EncLen_EUCTW[] = {
+static const int EncLen_EUCTW[] = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
diff --git a/ext/mbstring/oniguruma/enc/gb18030.c b/ext/mbstring/oniguruma/enc/gb18030.c
new file mode 100644
index 000000000..01995ea09
--- /dev/null
+++ b/ext/mbstring/oniguruma/enc/gb18030.c
@@ -0,0 +1,501 @@
+/**********************************************************************
+ gb18030.c - Oniguruma (regular expression library)
+**********************************************************************/
+/*-
+ * Copyright (c) 2005 KUBO Takehiro <kubo AT jiubao DOT org>
+ * K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "regenc.h"
+
+#if 1
+#define DEBUG_GB18030(arg)
+#else
+#define DEBUG_GB18030(arg) printf arg
+#endif
+
+enum {
+ C1, /* one-byte char */
+ C2, /* one-byte or second of two-byte char */
+ C4, /* one-byte or second or fourth of four-byte char */
+ CM /* first of two- or four-byte char or second of two-byte char */
+};
+
+static const char GB18030_MAP[] = {
+ C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1,
+ C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1,
+ C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1,
+ C4, C4, C4, C4, C4, C4, C4, C4, C4, C4, C1, C1, C1, C1, C1, C1,
+ C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2,
+ C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2,
+ C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2,
+ C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C1,
+ C2, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM,
+ CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM,
+ CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM,
+ CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM,
+ CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM,
+ CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM,
+ CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM,
+ CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, C1
+};
+
+static int
+gb18030_mbc_enc_len(const UChar* p)
+{
+ if (GB18030_MAP[*p] != CM)
+ return 1;
+ p++;
+ if (GB18030_MAP[*p] == C4)
+ return 4;
+ if (GB18030_MAP[*p] == C1)
+ return 1; /* illegal sequence */
+ return 2;
+}
+
+static OnigCodePoint
+gb18030_mbc_to_code(const UChar* p, const UChar* end)
+{
+ return onigenc_mbn_mbc_to_code(ONIG_ENCODING_GB18030, p, end);
+}
+
+static int
+gb18030_code_to_mbc(OnigCodePoint code, UChar *buf)
+{
+ return onigenc_mb4_code_to_mbc(ONIG_ENCODING_GB18030, code, buf);
+}
+
+static int
+gb18030_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end,
+ UChar* lower)
+{
+ return onigenc_mbn_mbc_to_normalize(ONIG_ENCODING_GB18030, flag,
+ pp, end, lower);
+}
+
+static int
+gb18030_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
+{
+ return onigenc_mbn_is_mbc_ambiguous(ONIG_ENCODING_GB18030, flag, pp, end);
+}
+
+static int
+gb18030_is_code_ctype(OnigCodePoint code, unsigned int ctype)
+{
+ return onigenc_mb4_is_code_ctype(ONIG_ENCODING_GB18030, code, ctype);
+}
+
+enum state {
+ S_START,
+ S_one_C2,
+ S_one_C4,
+ S_one_CM,
+
+ S_odd_CM_one_CX,
+ S_even_CM_one_CX,
+
+ /* CMC4 : pair of "CM C4" */
+ S_one_CMC4,
+ S_odd_CMC4,
+ S_one_C4_odd_CMC4,
+ S_even_CMC4,
+ S_one_C4_even_CMC4,
+
+ S_odd_CM_odd_CMC4,
+ S_even_CM_odd_CMC4,
+
+ S_odd_CM_even_CMC4,
+ S_even_CM_even_CMC4,
+
+ /* C4CM : pair of "C4 CM" */
+ S_odd_C4CM,
+ S_one_CM_odd_C4CM,
+ S_even_C4CM,
+ S_one_CM_even_C4CM,
+
+ S_even_CM_odd_C4CM,
+ S_odd_CM_odd_C4CM,
+ S_even_CM_even_C4CM,
+ S_odd_CM_even_C4CM,
+};
+
+static UChar*
+gb18030_left_adjust_char_head(const UChar* start, const UChar* s)
+{
+ const UChar *p;
+ enum state state = S_START;
+
+ DEBUG_GB18030(("----------------\n"));
+ for (p = s; p >= start; p--) {
+ DEBUG_GB18030(("state %d --(%02x)-->\n", state, *p));
+ switch (state) {
+ case S_START:
+ switch (GB18030_MAP[*p]) {
+ case C1:
+ return (UChar *)s;
+ case C2:
+ state = S_one_C2; /* C2 */
+ break;
+ case C4:
+ state = S_one_C4; /* C4 */
+ break;
+ case CM:
+ state = S_one_CM; /* CM */
+ break;
+ }
+ break;
+ case S_one_C2: /* C2 */
+ switch (GB18030_MAP[*p]) {
+ case C1:
+ case C2:
+ case C4:
+ return (UChar *)s;
+ case CM:
+ state = S_odd_CM_one_CX; /* CM C2 */
+ break;
+ }
+ break;
+ case S_one_C4: /* C4 */
+ switch (GB18030_MAP[*p]) {
+ case C1:
+ case C2:
+ case C4:
+ return (UChar *)s;
+ case CM:
+ state = S_one_CMC4;
+ break;
+ }
+ break;
+ case S_one_CM: /* CM */
+ switch (GB18030_MAP[*p]) {
+ case C1:
+ case C2:
+ return (UChar *)s;
+ case C4:
+ state = S_odd_C4CM;
+ break;
+ case CM:
+ state = S_odd_CM_one_CX; /* CM CM */
+ break;
+ }
+ break;
+
+ case S_odd_CM_one_CX: /* CM C2 */ /* CM CM */ /* CM CM CM C4 */
+ switch (GB18030_MAP[*p]) {
+ case C1:
+ case C2:
+ case C4:
+ return (UChar *)(s - 1);
+ case CM:
+ state = S_even_CM_one_CX;
+ break;
+ }
+ break;
+ case S_even_CM_one_CX: /* CM CM C2 */ /* CM CM CM */ /* CM CM C4 */
+ switch (GB18030_MAP[*p]) {
+ case C1:
+ case C2:
+ case C4:
+ return (UChar *)s;
+ case CM:
+ state = S_odd_CM_one_CX;
+ break;
+ }
+ break;
+
+ case S_one_CMC4: /* CM C4 */
+ switch (GB18030_MAP[*p]) {
+ case C1:
+ case C2:
+ return (UChar *)(s - 1);
+ case C4:
+ state = S_one_C4_odd_CMC4; /* C4 CM C4 */
+ break;
+ case CM:
+ state = S_even_CM_one_CX; /* CM CM C4 */
+ break;
+ }
+ break;
+ case S_odd_CMC4: /* CM C4 CM C4 CM C4 */
+ switch (GB18030_MAP[*p]) {
+ case C1:
+ case C2:
+ return (UChar *)(s - 1);
+ case C4:
+ state = S_one_C4_odd_CMC4;
+ break;
+ case CM:
+ state = S_odd_CM_odd_CMC4;
+ break;
+ }
+ break;
+ case S_one_C4_odd_CMC4: /* C4 CM C4 */
+ switch (GB18030_MAP[*p]) {
+ case C1:
+ case C2:
+ case C4:
+ return (UChar *)(s - 1);
+ case CM:
+ state = S_even_CMC4; /* CM C4 CM C4 */
+ break;
+ }
+ break;
+ case S_even_CMC4: /* CM C4 CM C4 */
+ switch (GB18030_MAP[*p]) {
+ case C1:
+ case C2:
+ return (UChar *)(s - 3);
+ case C4:
+ state = S_one_C4_even_CMC4;
+ break;
+ case CM:
+ state = S_odd_CM_even_CMC4;
+ break;
+ }
+ break;
+ case S_one_C4_even_CMC4: /* C4 CM C4 CM C4 */
+ switch (GB18030_MAP[*p]) {
+ case C1:
+ case C2:
+ case C4:
+ return (UChar *)(s - 3);
+ case CM:
+ state = S_odd_CMC4;
+ break;
+ }
+ break;
+
+ case S_odd_CM_odd_CMC4: /* CM CM C4 CM C4 CM C4 */
+ switch (GB18030_MAP[*p]) {
+ case C1:
+ case C2:
+ case C4:
+ return (UChar *)(s - 3);
+ case CM:
+ state = S_even_CM_odd_CMC4;
+ break;
+ }
+ break;
+ case S_even_CM_odd_CMC4: /* CM CM CM C4 CM C4 CM C4 */
+ switch (GB18030_MAP[*p]) {
+ case C1:
+ case C2:
+ case C4:
+ return (UChar *)(s - 1);
+ case CM:
+ state = S_odd_CM_odd_CMC4;
+ break;
+ }
+ break;
+
+ case S_odd_CM_even_CMC4: /* CM CM C4 CM C4 */
+ switch (GB18030_MAP[*p]) {
+ case C1:
+ case C2:
+ case C4:
+ return (UChar *)(s - 1);
+ case CM:
+ state = S_even_CM_even_CMC4;
+ break;
+ }
+ break;
+ case S_even_CM_even_CMC4: /* CM CM CM C4 CM C4 */
+ switch (GB18030_MAP[*p]) {
+ case C1:
+ case C2:
+ case C4:
+ return (UChar *)(s - 3);
+ case CM:
+ state = S_odd_CM_even_CMC4;
+ break;
+ }
+ break;
+
+ case S_odd_C4CM: /* C4 CM */ /* C4 CM C4 CM C4 CM*/
+ switch (GB18030_MAP[*p]) {
+ case C1:
+ case C2:
+ case C4:
+ return (UChar *)s;
+ case CM:
+ state = S_one_CM_odd_C4CM; /* CM C4 CM */
+ break;
+ }
+ break;
+ case S_one_CM_odd_C4CM: /* CM C4 CM */ /* CM C4 CM C4 CM C4 CM */
+ switch (GB18030_MAP[*p]) {
+ case C1:
+ case C2:
+ return (UChar *)(s - 2); /* |CM C4 CM */
+ case C4:
+ state = S_even_C4CM;
+ break;
+ case CM:
+ state = S_even_CM_odd_C4CM;
+ break;
+ }
+ break;
+ case S_even_C4CM: /* C4 CM C4 CM */
+ switch (GB18030_MAP[*p]) {
+ case C1:
+ case C2:
+ case C4:
+ return (UChar *)(s - 2); /* C4|CM C4 CM */
+ case CM:
+ state = S_one_CM_even_C4CM;
+ break;
+ }
+ break;
+ case S_one_CM_even_C4CM: /* CM C4 CM C4 CM */
+ switch (GB18030_MAP[*p]) {
+ case C1:
+ case C2:
+ return (UChar *)(s - 0); /*|CM C4 CM C4|CM */
+ case C4:
+ state = S_odd_C4CM;
+ break;
+ case CM:
+ state = S_even_CM_even_C4CM;
+ break;
+ }
+ break;
+
+ case S_even_CM_odd_C4CM: /* CM CM C4 CM */
+ switch (GB18030_MAP[*p]) {
+ case C1:
+ case C2:
+ case C4:
+ return (UChar *)(s - 0); /* |CM CM|C4|CM */
+ case CM:
+ state = S_odd_CM_odd_C4CM;
+ break;
+ }
+ break;
+ case S_odd_CM_odd_C4CM: /* CM CM CM C4 CM */
+ switch (GB18030_MAP[*p]) {
+ case C1:
+ case C2:
+ case C4:
+ return (UChar *)(s - 2); /* |CM CM|CM C4 CM */
+ case CM:
+ state = S_even_CM_odd_C4CM;
+ break;
+ }
+ break;
+
+ case S_even_CM_even_C4CM: /* CM CM C4 CM C4 CM */
+ switch (GB18030_MAP[*p]) {
+ case C1:
+ case C2:
+ case C4:
+ return (UChar *)(s - 2); /* |CM CM|C4|CM C4 CM */
+ case CM:
+ state = S_odd_CM_even_C4CM;
+ break;
+ }
+ break;
+ case S_odd_CM_even_C4CM: /* CM CM CM C4 CM C4 CM */
+ switch (GB18030_MAP[*p]) {
+ case C1:
+ case C2:
+ case C4:
+ return (UChar *)(s - 0); /* |CM CM|CM C4 CM C4|CM */
+ case CM:
+ state = S_even_CM_even_C4CM;
+ break;
+ }
+ break;
+ }
+ }
+
+ DEBUG_GB18030(("state %d\n", state));
+ switch (state) {
+ case S_START: return (UChar *)(s - 0);
+ case S_one_C2: return (UChar *)(s - 0);
+ case S_one_C4: return (UChar *)(s - 0);
+ case S_one_CM: return (UChar *)(s - 0);
+
+ case S_odd_CM_one_CX: return (UChar *)(s - 1);
+ case S_even_CM_one_CX: return (UChar *)(s - 0);
+
+ case S_one_CMC4: return (UChar *)(s - 1);
+ case S_odd_CMC4: return (UChar *)(s - 1);
+ case S_one_C4_odd_CMC4: return (UChar *)(s - 1);
+ case S_even_CMC4: return (UChar *)(s - 3);
+ case S_one_C4_even_CMC4: return (UChar *)(s - 3);
+
+ case S_odd_CM_odd_CMC4: return (UChar *)(s - 3);
+ case S_even_CM_odd_CMC4: return (UChar *)(s - 1);
+
+ case S_odd_CM_even_CMC4: return (UChar *)(s - 1);
+ case S_even_CM_even_CMC4: return (UChar *)(s - 3);
+
+ case S_odd_C4CM: return (UChar *)(s - 0);
+ case S_one_CM_odd_C4CM: return (UChar *)(s - 2);
+ case S_even_C4CM: return (UChar *)(s - 2);
+ case S_one_CM_even_C4CM: return (UChar *)(s - 0);
+
+ case S_even_CM_odd_C4CM: return (UChar *)(s - 0);
+ case S_odd_CM_odd_C4CM: return (UChar *)(s - 2);
+ case S_even_CM_even_C4CM: return (UChar *)(s - 2);
+ case S_odd_CM_even_C4CM: return (UChar *)(s - 0);
+ }
+
+ return (UChar* )s; /* never come here. (escape warning) */
+}
+
+static int
+gb18030_is_allowed_reverse_match(const UChar* s, const UChar* end)
+{
+ return GB18030_MAP[*s] == C1 ? TRUE : FALSE;
+}
+
+OnigEncodingType OnigEncodingGB18030 = {
+ gb18030_mbc_enc_len,
+ "GB18030", /* name */
+ 4, /* max enc length */
+ 1, /* min enc length */
+ ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE,
+ {
+ (OnigCodePoint )'\\' /* esc */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
+ },
+ onigenc_is_mbc_newline_0x0a,
+ gb18030_mbc_to_code,
+ onigenc_mb4_code_to_mbclen,
+ gb18030_code_to_mbc,
+ gb18030_mbc_to_normalize,
+ gb18030_is_mbc_ambiguous,
+ onigenc_ascii_get_all_pair_ambig_codes,
+ onigenc_nothing_get_all_comp_ambig_codes,
+ gb18030_is_code_ctype,
+ onigenc_not_support_get_ctype_code_range,
+ gb18030_left_adjust_char_head,
+ gb18030_is_allowed_reverse_match
+};
diff --git a/ext/mbstring/oniguruma/enc/iso8859_1.c b/ext/mbstring/oniguruma/enc/iso8859_1.c
index 53ad52ee1..4dd708d84 100644
--- a/ext/mbstring/oniguruma/enc/iso8859_1.c
+++ b/ext/mbstring/oniguruma/enc/iso8859_1.c
@@ -32,7 +32,7 @@
#define ENC_IS_ISO_8859_1_CTYPE(code,ctype) \
((EncISO_8859_1_CtypeTable[code] & ctype) != 0)
-static unsigned short EncISO_8859_1_CtypeTable[256] = {
+static const unsigned short EncISO_8859_1_CtypeTable[256] = {
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
diff --git a/ext/mbstring/oniguruma/enc/iso8859_10.c b/ext/mbstring/oniguruma/enc/iso8859_10.c
index a9331cebf..e317f4975 100644
--- a/ext/mbstring/oniguruma/enc/iso8859_10.c
+++ b/ext/mbstring/oniguruma/enc/iso8859_10.c
@@ -33,7 +33,7 @@
#define ENC_IS_ISO_8859_10_CTYPE(code,ctype) \
((EncISO_8859_10_CtypeTable[code] & ctype) != 0)
-static UChar EncISO_8859_10_ToLowerCaseTable[256] = {
+static const UChar EncISO_8859_10_ToLowerCaseTable[256] = {
'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
@@ -68,7 +68,7 @@ static UChar EncISO_8859_10_ToLowerCaseTable[256] = {
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
};
-static unsigned short EncISO_8859_10_CtypeTable[256] = {
+static const unsigned short EncISO_8859_10_CtypeTable[256] = {
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
@@ -186,9 +186,9 @@ iso_8859_10_is_code_ctype(OnigCodePoint code, unsigned int ctype)
static int
iso_8859_10_get_all_pair_ambig_codes(OnigAmbigType flag,
- OnigPairAmbigCodes** ccs)
+ const OnigPairAmbigCodes** ccs)
{
- static OnigPairAmbigCodes cc[] = {
+ static const OnigPairAmbigCodes cc[] = {
{ 0xa1, 0xb1 },
{ 0xa2, 0xb2 },
{ 0xa3, 0xb3 },
diff --git a/ext/mbstring/oniguruma/enc/iso8859_11.c b/ext/mbstring/oniguruma/enc/iso8859_11.c
index bb1098807..6afaa27f4 100644
--- a/ext/mbstring/oniguruma/enc/iso8859_11.c
+++ b/ext/mbstring/oniguruma/enc/iso8859_11.c
@@ -32,7 +32,7 @@
#define ENC_IS_ISO_8859_11_CTYPE(code,ctype) \
((EncISO_8859_11_CtypeTable[code] & ctype) != 0)
-static unsigned short EncISO_8859_11_CtypeTable[256] = {
+static const unsigned short EncISO_8859_11_CtypeTable[256] = {
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
diff --git a/ext/mbstring/oniguruma/enc/iso8859_13.c b/ext/mbstring/oniguruma/enc/iso8859_13.c
index 827ca508e..abd764452 100644
--- a/ext/mbstring/oniguruma/enc/iso8859_13.c
+++ b/ext/mbstring/oniguruma/enc/iso8859_13.c
@@ -33,7 +33,7 @@
#define ENC_IS_ISO_8859_13_CTYPE(code,ctype) \
((EncISO_8859_13_CtypeTable[code] & ctype) != 0)
-static UChar EncISO_8859_13_ToLowerCaseTable[256] = {
+static const UChar EncISO_8859_13_ToLowerCaseTable[256] = {
'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
@@ -68,7 +68,7 @@ static UChar EncISO_8859_13_ToLowerCaseTable[256] = {
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
};
-static unsigned short EncISO_8859_13_CtypeTable[256] = {
+static const unsigned short EncISO_8859_13_CtypeTable[256] = {
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
@@ -186,9 +186,9 @@ iso_8859_13_is_code_ctype(OnigCodePoint code, unsigned int ctype)
static int
iso_8859_13_get_all_pair_ambig_codes(OnigAmbigType flag,
- OnigPairAmbigCodes** ccs)
+ const OnigPairAmbigCodes** ccs)
{
- static OnigPairAmbigCodes cc[] = {
+ static const OnigPairAmbigCodes cc[] = {
{ 0xc0, 0xe0 },
{ 0xc1, 0xe1 },
{ 0xc2, 0xe2 },
diff --git a/ext/mbstring/oniguruma/enc/iso8859_14.c b/ext/mbstring/oniguruma/enc/iso8859_14.c
index 4fe5ab29d..d76771a1c 100644
--- a/ext/mbstring/oniguruma/enc/iso8859_14.c
+++ b/ext/mbstring/oniguruma/enc/iso8859_14.c
@@ -33,7 +33,7 @@
#define ENC_IS_ISO_8859_14_CTYPE(code,ctype) \
((EncISO_8859_14_CtypeTable[code] & ctype) != 0)
-static UChar EncISO_8859_14_ToLowerCaseTable[256] = {
+static const UChar EncISO_8859_14_ToLowerCaseTable[256] = {
'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
@@ -68,7 +68,7 @@ static UChar EncISO_8859_14_ToLowerCaseTable[256] = {
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
};
-static unsigned short EncISO_8859_14_CtypeTable[256] = {
+static const unsigned short EncISO_8859_14_CtypeTable[256] = {
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
@@ -186,9 +186,9 @@ iso_8859_14_is_code_ctype(OnigCodePoint code, unsigned int ctype)
static int
iso_8859_14_get_all_pair_ambig_codes(OnigAmbigType flag,
- OnigPairAmbigCodes** ccs)
+ const OnigPairAmbigCodes** ccs)
{
- static OnigPairAmbigCodes cc[] = {
+ static const OnigPairAmbigCodes cc[] = {
{ 0xa1, 0xa2 },
{ 0xa2, 0xa1 },
{ 0xa4, 0xa5 },
diff --git a/ext/mbstring/oniguruma/enc/iso8859_15.c b/ext/mbstring/oniguruma/enc/iso8859_15.c
index 1a8bd7b4c..d6611ed29 100644
--- a/ext/mbstring/oniguruma/enc/iso8859_15.c
+++ b/ext/mbstring/oniguruma/enc/iso8859_15.c
@@ -33,7 +33,7 @@
#define ENC_IS_ISO_8859_15_CTYPE(code,ctype) \
((EncISO_8859_15_CtypeTable[code] & ctype) != 0)
-static UChar EncISO_8859_15_ToLowerCaseTable[256] = {
+static const UChar EncISO_8859_15_ToLowerCaseTable[256] = {
'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
@@ -68,7 +68,7 @@ static UChar EncISO_8859_15_ToLowerCaseTable[256] = {
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
};
-static unsigned short EncISO_8859_15_CtypeTable[256] = {
+static const unsigned short EncISO_8859_15_CtypeTable[256] = {
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
@@ -186,9 +186,9 @@ iso_8859_15_is_code_ctype(OnigCodePoint code, unsigned int ctype)
static int
iso_8859_15_get_all_pair_ambig_codes(OnigAmbigType flag,
- OnigPairAmbigCodes** ccs)
+ const OnigPairAmbigCodes** ccs)
{
- static OnigPairAmbigCodes cc[] = {
+ static const OnigPairAmbigCodes cc[] = {
{ 0xa6, 0xa8 },
{ 0xa8, 0xa6 },
diff --git a/ext/mbstring/oniguruma/enc/iso8859_16.c b/ext/mbstring/oniguruma/enc/iso8859_16.c
index e283db17c..23b868065 100644
--- a/ext/mbstring/oniguruma/enc/iso8859_16.c
+++ b/ext/mbstring/oniguruma/enc/iso8859_16.c
@@ -33,7 +33,7 @@
#define ENC_IS_ISO_8859_16_CTYPE(code,ctype) \
((EncISO_8859_16_CtypeTable[code] & ctype) != 0)
-static UChar EncISO_8859_16_ToLowerCaseTable[256] = {
+static const UChar EncISO_8859_16_ToLowerCaseTable[256] = {
'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
@@ -68,7 +68,7 @@ static UChar EncISO_8859_16_ToLowerCaseTable[256] = {
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
};
-static unsigned short EncISO_8859_16_CtypeTable[256] = {
+static const unsigned short EncISO_8859_16_CtypeTable[256] = {
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
@@ -186,9 +186,9 @@ iso_8859_16_is_code_ctype(OnigCodePoint code, unsigned int ctype)
static int
iso_8859_16_get_all_pair_ambig_codes(OnigAmbigType flag,
- OnigPairAmbigCodes** ccs)
+ const OnigPairAmbigCodes** ccs)
{
- static OnigPairAmbigCodes cc[] = {
+ static const OnigPairAmbigCodes cc[] = {
{ 0xa1, 0xa2 },
{ 0xa2, 0xa1 },
{ 0xa3, 0xb3 },
diff --git a/ext/mbstring/oniguruma/enc/iso8859_2.c b/ext/mbstring/oniguruma/enc/iso8859_2.c
index e86415b9c..5f21ff78a 100644
--- a/ext/mbstring/oniguruma/enc/iso8859_2.c
+++ b/ext/mbstring/oniguruma/enc/iso8859_2.c
@@ -33,7 +33,7 @@
#define ENC_IS_ISO_8859_2_CTYPE(code,ctype) \
((EncISO_8859_2_CtypeTable[code] & ctype) != 0)
-static UChar EncISO_8859_2_ToLowerCaseTable[256] = {
+static const UChar EncISO_8859_2_ToLowerCaseTable[256] = {
'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
@@ -68,7 +68,7 @@ static UChar EncISO_8859_2_ToLowerCaseTable[256] = {
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
};
-static unsigned short EncISO_8859_2_CtypeTable[256] = {
+static const unsigned short EncISO_8859_2_CtypeTable[256] = {
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
@@ -177,9 +177,9 @@ iso_8859_2_is_mbc_ambiguous(OnigAmbigType flag,
static int
iso_8859_2_get_all_pair_ambig_codes(OnigAmbigType flag,
- OnigPairAmbigCodes** ccs)
+ const OnigPairAmbigCodes** ccs)
{
- static OnigPairAmbigCodes cc[] = {
+ static const OnigPairAmbigCodes cc[] = {
{ 0xa1, 0xb1 },
{ 0xa3, 0xb3 },
{ 0xa5, 0xb5 },
diff --git a/ext/mbstring/oniguruma/enc/iso8859_3.c b/ext/mbstring/oniguruma/enc/iso8859_3.c
index 76d2bec8a..9ac3dab17 100644
--- a/ext/mbstring/oniguruma/enc/iso8859_3.c
+++ b/ext/mbstring/oniguruma/enc/iso8859_3.c
@@ -33,7 +33,7 @@
#define ENC_IS_ISO_8859_3_CTYPE(code,ctype) \
((EncISO_8859_3_CtypeTable[code] & ctype) != 0)
-static UChar EncISO_8859_3_ToLowerCaseTable[256] = {
+static const UChar EncISO_8859_3_ToLowerCaseTable[256] = {
'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
@@ -68,7 +68,7 @@ static UChar EncISO_8859_3_ToLowerCaseTable[256] = {
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
};
-static unsigned short EncISO_8859_3_CtypeTable[256] = {
+static const unsigned short EncISO_8859_3_CtypeTable[256] = {
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
@@ -186,9 +186,9 @@ iso_8859_3_is_code_ctype(OnigCodePoint code, unsigned int ctype)
static int
iso_8859_3_get_all_pair_ambig_codes(OnigAmbigType flag,
- OnigPairAmbigCodes** ccs)
+ const OnigPairAmbigCodes** ccs)
{
- static OnigPairAmbigCodes cc[] = {
+ static const OnigPairAmbigCodes cc[] = {
{ 0xa1, 0xb1 },
{ 0xa6, 0xb6 },
{ 0xa9, 0xb9 },
diff --git a/ext/mbstring/oniguruma/enc/iso8859_4.c b/ext/mbstring/oniguruma/enc/iso8859_4.c
index 756900672..c54a2fa14 100644
--- a/ext/mbstring/oniguruma/enc/iso8859_4.c
+++ b/ext/mbstring/oniguruma/enc/iso8859_4.c
@@ -33,7 +33,7 @@
#define ENC_IS_ISO_8859_4_CTYPE(code,ctype) \
((EncISO_8859_4_CtypeTable[code] & ctype) != 0)
-static UChar EncISO_8859_4_ToLowerCaseTable[256] = {
+static const UChar EncISO_8859_4_ToLowerCaseTable[256] = {
'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
@@ -68,7 +68,7 @@ static UChar EncISO_8859_4_ToLowerCaseTable[256] = {
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
};
-static unsigned short EncISO_8859_4_CtypeTable[256] = {
+static const unsigned short EncISO_8859_4_CtypeTable[256] = {
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
@@ -186,9 +186,9 @@ iso_8859_4_is_code_ctype(OnigCodePoint code, unsigned int ctype)
static int
iso_8859_4_get_all_pair_ambig_codes(OnigAmbigType flag,
- OnigPairAmbigCodes** ccs)
+ const OnigPairAmbigCodes** ccs)
{
- static OnigPairAmbigCodes cc[] = {
+ static const OnigPairAmbigCodes cc[] = {
{ 0xa1, 0xb1 },
{ 0xa3, 0xb3 },
{ 0xa5, 0xb5 },
diff --git a/ext/mbstring/oniguruma/enc/iso8859_5.c b/ext/mbstring/oniguruma/enc/iso8859_5.c
index 2f7677b3e..5b941e2eb 100644
--- a/ext/mbstring/oniguruma/enc/iso8859_5.c
+++ b/ext/mbstring/oniguruma/enc/iso8859_5.c
@@ -33,7 +33,7 @@
#define ENC_IS_ISO_8859_5_CTYPE(code,ctype) \
((EncISO_8859_5_CtypeTable[code] & ctype) != 0)
-static UChar EncISO_8859_5_ToLowerCaseTable[256] = {
+static const UChar EncISO_8859_5_ToLowerCaseTable[256] = {
'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
@@ -68,7 +68,7 @@ static UChar EncISO_8859_5_ToLowerCaseTable[256] = {
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
};
-static unsigned short EncISO_8859_5_CtypeTable[256] = {
+static const unsigned short EncISO_8859_5_CtypeTable[256] = {
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
@@ -152,9 +152,9 @@ iso_8859_5_is_code_ctype(OnigCodePoint code, unsigned int ctype)
static int
iso_8859_5_get_all_pair_ambig_codes(OnigAmbigType flag,
- OnigPairAmbigCodes** ccs)
+ const OnigPairAmbigCodes** ccs)
{
- static OnigPairAmbigCodes cc[] = {
+ static const OnigPairAmbigCodes cc[] = {
{ 0xa1, 0xf1 },
{ 0xa2, 0xf2 },
{ 0xa3, 0xf3 },
diff --git a/ext/mbstring/oniguruma/enc/iso8859_6.c b/ext/mbstring/oniguruma/enc/iso8859_6.c
index 0fcb9e8b8..bb5515d30 100644
--- a/ext/mbstring/oniguruma/enc/iso8859_6.c
+++ b/ext/mbstring/oniguruma/enc/iso8859_6.c
@@ -32,7 +32,7 @@
#define ENC_IS_ISO_8859_6_CTYPE(code,ctype) \
((EncISO_8859_6_CtypeTable[code] & ctype) != 0)
-static unsigned short EncISO_8859_6_CtypeTable[256] = {
+static const unsigned short EncISO_8859_6_CtypeTable[256] = {
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
diff --git a/ext/mbstring/oniguruma/enc/iso8859_7.c b/ext/mbstring/oniguruma/enc/iso8859_7.c
index 8b2cb9ec5..2529dae66 100644
--- a/ext/mbstring/oniguruma/enc/iso8859_7.c
+++ b/ext/mbstring/oniguruma/enc/iso8859_7.c
@@ -33,7 +33,7 @@
#define ENC_IS_ISO_8859_7_CTYPE(code,ctype) \
((EncISO_8859_7_CtypeTable[code] & ctype) != 0)
-static UChar EncISO_8859_7_ToLowerCaseTable[256] = {
+static const UChar EncISO_8859_7_ToLowerCaseTable[256] = {
'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
@@ -68,7 +68,7 @@ static UChar EncISO_8859_7_ToLowerCaseTable[256] = {
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
};
-static unsigned short EncISO_8859_7_CtypeTable[256] = {
+static const unsigned short EncISO_8859_7_CtypeTable[256] = {
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
@@ -159,9 +159,9 @@ iso_8859_7_is_code_ctype(OnigCodePoint code, unsigned int ctype)
static int
iso_8859_7_get_all_pair_ambig_codes(OnigAmbigType flag,
- OnigPairAmbigCodes** ccs)
+ const OnigPairAmbigCodes** ccs)
{
- static OnigPairAmbigCodes cc[] = {
+ static const OnigPairAmbigCodes cc[] = {
{ 0xb6, 0xdc },
{ 0xb8, 0xdd },
{ 0xb9, 0xde },
diff --git a/ext/mbstring/oniguruma/enc/iso8859_8.c b/ext/mbstring/oniguruma/enc/iso8859_8.c
index 3c95b9b13..d7f0fc594 100644
--- a/ext/mbstring/oniguruma/enc/iso8859_8.c
+++ b/ext/mbstring/oniguruma/enc/iso8859_8.c
@@ -32,7 +32,7 @@
#define ENC_IS_ISO_8859_8_CTYPE(code,ctype) \
((EncISO_8859_8_CtypeTable[code] & ctype) != 0)
-static unsigned short EncISO_8859_8_CtypeTable[256] = {
+static const unsigned short EncISO_8859_8_CtypeTable[256] = {
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
diff --git a/ext/mbstring/oniguruma/enc/iso8859_9.c b/ext/mbstring/oniguruma/enc/iso8859_9.c
index 1b061ff6e..f4bcac1ae 100644
--- a/ext/mbstring/oniguruma/enc/iso8859_9.c
+++ b/ext/mbstring/oniguruma/enc/iso8859_9.c
@@ -33,7 +33,7 @@
#define ENC_IS_ISO_8859_9_CTYPE(code,ctype) \
((EncISO_8859_9_CtypeTable[code] & ctype) != 0)
-static UChar EncISO_8859_9_ToLowerCaseTable[256] = {
+static const UChar EncISO_8859_9_ToLowerCaseTable[256] = {
'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
@@ -68,7 +68,7 @@ static UChar EncISO_8859_9_ToLowerCaseTable[256] = {
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
};
-static unsigned short EncISO_8859_9_CtypeTable[256] = {
+static const unsigned short EncISO_8859_9_CtypeTable[256] = {
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
@@ -186,9 +186,9 @@ iso_8859_9_is_code_ctype(OnigCodePoint code, unsigned int ctype)
static int
iso_8859_9_get_all_pair_ambig_codes(OnigAmbigType flag,
- OnigPairAmbigCodes** ccs)
+ const OnigPairAmbigCodes** ccs)
{
- static OnigPairAmbigCodes cc[] = {
+ static const OnigPairAmbigCodes cc[] = {
{ 0xc0, 0xe0 },
{ 0xc1, 0xe1 },
{ 0xc2, 0xe2 },
diff --git a/ext/mbstring/oniguruma/enc/koi8.c b/ext/mbstring/oniguruma/enc/koi8.c
index f8a5a1da6..27f97f307 100644
--- a/ext/mbstring/oniguruma/enc/koi8.c
+++ b/ext/mbstring/oniguruma/enc/koi8.c
@@ -33,7 +33,7 @@
#define ENC_IS_KOI8_CTYPE(code,ctype) \
((EncKOI8_CtypeTable[code] & ctype) != 0)
-static UChar EncKOI8_ToLowerCaseTable[256] = {
+static const UChar EncKOI8_ToLowerCaseTable[256] = {
'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
@@ -68,7 +68,7 @@ static UChar EncKOI8_ToLowerCaseTable[256] = {
'\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337'
};
-static unsigned short EncKOI8_CtypeTable[256] = {
+static const unsigned short EncKOI8_CtypeTable[256] = {
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
@@ -105,9 +105,9 @@ static unsigned short EncKOI8_CtypeTable[256] = {
static int
koi8_mbc_to_normalize(OnigAmbigType flag,
- const UChar** pp, const UChar* end, UChar* lower)
+ const OnigUChar** pp, const OnigUChar* end, OnigUChar* lower)
{
- UChar* p = (UChar *)*pp;
+ const OnigUChar* p = *pp;
if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
ONIGENC_IS_MBC_ASCII(p)) ||
@@ -123,9 +123,9 @@ koi8_mbc_to_normalize(OnigAmbigType flag,
}
static int
-koi8_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
+koi8_is_mbc_ambiguous(OnigAmbigType flag, const OnigUChar** pp, const OnigUChar* end)
{
- UChar* p = (UChar *)*pp;
+ const OnigUChar* p = *pp;
(*pp)++;
if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
@@ -151,9 +151,9 @@ koi8_is_code_ctype(OnigCodePoint code, unsigned int ctype)
static int
koi8_get_all_pair_ambig_codes(OnigAmbigType flag,
- OnigPairAmbigCodes** ccs)
+ const OnigPairAmbigCodes** ccs)
{
- static OnigPairAmbigCodes cc[] = {
+ static const OnigPairAmbigCodes cc[] = {
{ 0xc0, 0xe0 },
{ 0xc1, 0xe1 },
{ 0xc2, 0xe2 },
diff --git a/ext/mbstring/oniguruma/enc/koi8_r.c b/ext/mbstring/oniguruma/enc/koi8_r.c
index 7c626df61..d2a4440f2 100644
--- a/ext/mbstring/oniguruma/enc/koi8_r.c
+++ b/ext/mbstring/oniguruma/enc/koi8_r.c
@@ -33,7 +33,7 @@
#define ENC_IS_KOI8_R_CTYPE(code,ctype) \
((EncKOI8_R_CtypeTable[code] & ctype) != 0)
-static UChar EncKOI8_R_ToLowerCaseTable[256] = {
+static const UChar EncKOI8_R_ToLowerCaseTable[256] = {
'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
@@ -68,7 +68,7 @@ static UChar EncKOI8_R_ToLowerCaseTable[256] = {
'\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337'
};
-static unsigned short EncKOI8_R_CtypeTable[256] = {
+static const unsigned short EncKOI8_R_CtypeTable[256] = {
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
@@ -150,9 +150,9 @@ koi8_r_is_code_ctype(OnigCodePoint code, unsigned int ctype)
static int
koi8_r_get_all_pair_ambig_codes(OnigAmbigType flag,
- OnigPairAmbigCodes** ccs)
+ const OnigPairAmbigCodes** ccs)
{
- static OnigPairAmbigCodes cc[] = {
+ static const OnigPairAmbigCodes cc[] = {
{ 0xc0, 0xe0 },
{ 0xc1, 0xe1 },
{ 0xc2, 0xe2 },
diff --git a/ext/mbstring/oniguruma/enc/sjis.c b/ext/mbstring/oniguruma/enc/sjis.c
index e13407bcc..f7d7d5226 100644
--- a/ext/mbstring/oniguruma/enc/sjis.c
+++ b/ext/mbstring/oniguruma/enc/sjis.c
@@ -29,7 +29,7 @@
#include "regenc.h"
-static int EncLen_SJIS[] = {
+static const int EncLen_SJIS[] = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
@@ -76,7 +76,7 @@ sjis_mbc_enc_len(const UChar* p)
return EncLen_SJIS[*p];
}
-extern int
+static int
sjis_code_to_mbclen(OnigCodePoint code)
{
if (code < 256) {
@@ -167,21 +167,16 @@ sjis_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
static int
sjis_is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
- if ((ctype & ONIGENC_CTYPE_WORD) != 0) {
- if (code < 128)
- return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
- else {
+ if (code < 128)
+ return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
+ else {
+ if ((ctype & (ONIGENC_CTYPE_WORD |
+ ONIGENC_CTYPE_GRAPH | ONIGENC_CTYPE_PRINT)) != 0) {
return (sjis_code_to_mbclen(code) > 1 ? TRUE : FALSE);
}
-
- ctype &= ~ONIGENC_CTYPE_WORD;
- if (ctype == 0) return FALSE;
}
- if (code < 128)
- return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
- else
- return FALSE;
+ return FALSE;
}
static UChar*
diff --git a/ext/mbstring/oniguruma/enc/unicode.c b/ext/mbstring/oniguruma/enc/unicode.c
index e3be9450a..a8cf53901 100644
--- a/ext/mbstring/oniguruma/enc/unicode.c
+++ b/ext/mbstring/oniguruma/enc/unicode.c
@@ -30,7 +30,7 @@
#include "regenc.h"
-unsigned short OnigEnc_Unicode_ISO_8859_1_CtypeTable[256] = {
+const unsigned short OnigEnc_Unicode_ISO_8859_1_CtypeTable[256] = {
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2008, 0x228c, 0x2289, 0x2288, 0x2288, 0x2288, 0x2008, 0x2008,
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
@@ -65,7 +65,7 @@ unsigned short OnigEnc_Unicode_ISO_8859_1_CtypeTable[256] = {
0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2
};
-static OnigCodePoint CRAlnum[] = {
+static const OnigCodePoint CRAlnum[] = {
#ifdef USE_UNICODE_FULL_RANGE_CTYPE
414,
#else
@@ -490,7 +490,7 @@ static OnigCodePoint CRAlnum[] = {
#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
}; /* end of CRAlnum */
-static OnigCodePoint CRAlpha[] = {
+static const OnigCodePoint CRAlpha[] = {
#ifdef USE_UNICODE_FULL_RANGE_CTYPE
396,
#else
@@ -897,7 +897,7 @@ static OnigCodePoint CRAlpha[] = {
#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
}; /* end of CRAlpha */
-static OnigCodePoint CRBlank[] = {
+static const OnigCodePoint CRBlank[] = {
#ifdef USE_UNICODE_FULL_RANGE_CTYPE
9,
#else
@@ -917,7 +917,7 @@ static OnigCodePoint CRBlank[] = {
#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
}; /* end of CRBlank */
-static OnigCodePoint CRCntrl[] = {
+static const OnigCodePoint CRCntrl[] = {
#ifdef USE_UNICODE_FULL_RANGE_CTYPE
19,
#else
@@ -947,7 +947,7 @@ static OnigCodePoint CRCntrl[] = {
#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
}; /* end of CRCntrl */
-static OnigCodePoint CRDigit[] = {
+static const OnigCodePoint CRDigit[] = {
#ifdef USE_UNICODE_FULL_RANGE_CTYPE
23,
#else
@@ -981,7 +981,7 @@ static OnigCodePoint CRDigit[] = {
#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
}; /* end of CRDigit */
-static OnigCodePoint CRGraph[] = {
+static const OnigCodePoint CRGraph[] = {
#ifdef USE_UNICODE_FULL_RANGE_CTYPE
405,
#else
@@ -1397,7 +1397,7 @@ static OnigCodePoint CRGraph[] = {
#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
}; /* end of CRGraph */
-static OnigCodePoint CRLower[] = {
+static const OnigCodePoint CRLower[] = {
#ifdef USE_UNICODE_FULL_RANGE_CTYPE
424,
#else
@@ -1832,7 +1832,7 @@ static OnigCodePoint CRLower[] = {
#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
}; /* end of CRLower */
-static OnigCodePoint CRPrint[] = {
+static const OnigCodePoint CRPrint[] = {
#ifdef USE_UNICODE_FULL_RANGE_CTYPE
405,
#else
@@ -2248,7 +2248,7 @@ static OnigCodePoint CRPrint[] = {
#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
}; /* end of CRPrint */
-static OnigCodePoint CRPunct[] = {
+static const OnigCodePoint CRPunct[] = {
#ifdef USE_UNICODE_FULL_RANGE_CTYPE
86,
#else
@@ -2345,7 +2345,7 @@ static OnigCodePoint CRPunct[] = {
#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
}; /* end of CRPunct */
-static OnigCodePoint CRSpace[] = {
+static const OnigCodePoint CRSpace[] = {
#ifdef USE_UNICODE_FULL_RANGE_CTYPE
11,
#else
@@ -2367,7 +2367,7 @@ static OnigCodePoint CRSpace[] = {
#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
}; /* end of CRSpace */
-static OnigCodePoint CRUpper[] = {
+static const OnigCodePoint CRUpper[] = {
#ifdef USE_UNICODE_FULL_RANGE_CTYPE
421,
#else
@@ -2799,7 +2799,7 @@ static OnigCodePoint CRUpper[] = {
#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
}; /* end of CRUpper */
-static OnigCodePoint CRXDigit[] = {
+static const OnigCodePoint CRXDigit[] = {
#ifdef USE_UNICODE_FULL_RANGE_CTYPE
3,
#else
@@ -2810,7 +2810,7 @@ static OnigCodePoint CRXDigit[] = {
0x0061, 0x0066
};
-static OnigCodePoint CRASCII[] = {
+static const OnigCodePoint CRASCII[] = {
#ifdef USE_UNICODE_FULL_RANGE_CTYPE
1,
#else
@@ -2819,7 +2819,7 @@ static OnigCodePoint CRASCII[] = {
0x0000, 0x007f
};
-static OnigCodePoint CRWord[] = {
+static const OnigCodePoint CRWord[] = {
#ifdef USE_UNICODE_FULL_RANGE_CTYPE
436,
#else
@@ -3320,6 +3320,9 @@ onigenc_unicode_is_code_ctype(OnigCodePoint code, unsigned int ctype)
case ONIGENC_CTYPE_ALNUM:
return onig_is_in_code_range((UChar* )CRAlnum, code);
break;
+ case ONIGENC_CTYPE_NEWLINE:
+ return FALSE;
+ break;
default:
return ONIGENCERR_TYPE_BUG;
@@ -3337,9 +3340,9 @@ onigenc_unicode_is_code_ctype(OnigCodePoint code, unsigned int ctype)
extern int
onigenc_unicode_get_ctype_code_range(int ctype,
- OnigCodePoint* sbr[], OnigCodePoint* mbr[])
+ const OnigCodePoint* sbr[], const OnigCodePoint* mbr[])
{
- static OnigCodePoint EmptyRange[] = { 0 };
+ static const OnigCodePoint EmptyRange[] = { 0 };
#define CR_SET(list) do { \
*mbr = list; \
diff --git a/ext/mbstring/oniguruma/enc/utf16_be.c b/ext/mbstring/oniguruma/enc/utf16_be.c
index ad33ddbee..0dd2832f7 100755
--- a/ext/mbstring/oniguruma/enc/utf16_be.c
+++ b/ext/mbstring/oniguruma/enc/utf16_be.c
@@ -2,7 +2,7 @@
utf16_be.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -32,7 +32,7 @@
#define UTF16_IS_SURROGATE_FIRST(c) (c >= 0xd8 && c <= 0xdb)
#define UTF16_IS_SURROGATE_SECOND(c) (c >= 0xdc && c <= 0xdf)
-static int EncLen_UTF16[] = {
+static const int EncLen_UTF16[] = {
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
@@ -63,6 +63,12 @@ utf16be_is_mbc_newline(const UChar* p, const UChar* end)
if (p + 1 < end) {
if (*(p+1) == 0x0a && *p == 0x00)
return 1;
+#ifdef USE_UNICODE_ALL_LINE_TERMINATORS
+ if ((*(p+1) == 0x0d || *(p+1) == 0x85) && *p == 0x00)
+ return 1;
+ if (*p == 0x20 && (*(p+1) == 0x29 || *(p+1) == 0x28))
+ return 1;
+#endif
}
return 0;
}
diff --git a/ext/mbstring/oniguruma/enc/utf16_le.c b/ext/mbstring/oniguruma/enc/utf16_le.c
index db892dcd1..93cc6138a 100755
--- a/ext/mbstring/oniguruma/enc/utf16_le.c
+++ b/ext/mbstring/oniguruma/enc/utf16_le.c
@@ -2,7 +2,7 @@
utf16_le.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -32,7 +32,7 @@
#define UTF16_IS_SURROGATE_FIRST(c) (c >= 0xd8 && c <= 0xdb)
#define UTF16_IS_SURROGATE_SECOND(c) (c >= 0xdc && c <= 0xdf)
-static int EncLen_UTF16[] = {
+static const int EncLen_UTF16[] = {
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
@@ -69,6 +69,12 @@ utf16le_is_mbc_newline(const UChar* p, const UChar* end)
if (p + 1 < end) {
if (*p == 0x0a && *(p+1) == 0x00)
return 1;
+#ifdef USE_UNICODE_ALL_LINE_TERMINATORS
+ if ((*p == 0x0d || *p == 0x85) && *(p+1) == 0x00)
+ return 1;
+ if (*(p+1) == 0x20 && (*p == 0x29 || *p == 0x28))
+ return 1;
+#endif
}
return 0;
}
diff --git a/ext/mbstring/oniguruma/enc/utf32_be.c b/ext/mbstring/oniguruma/enc/utf32_be.c
index 60feb040b..36b477286 100755
--- a/ext/mbstring/oniguruma/enc/utf32_be.c
+++ b/ext/mbstring/oniguruma/enc/utf32_be.c
@@ -2,7 +2,7 @@
utf32_be.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -41,6 +41,14 @@ utf32be_is_mbc_newline(const UChar* p, const UChar* end)
if (p + 3 < end) {
if (*(p+3) == 0x0a && *(p+2) == 0 && *(p+1) == 0 && *p == 0)
return 1;
+#ifdef USE_UNICODE_ALL_LINE_TERMINATORS
+ if ((*(p+3) == 0x0d || *(p+3) == 0x85)
+ && *(p+2) == 0 && *(p+1) == 0 && *p == 0x00)
+ return 1;
+ if (*(p+2) == 0x20 && (*(p+3) == 0x29 || *(p+3) == 0x28)
+ && *(p+1) == 0 && *p == 0)
+ return 1;
+#endif
}
return 0;
}
diff --git a/ext/mbstring/oniguruma/enc/utf32_le.c b/ext/mbstring/oniguruma/enc/utf32_le.c
index bba9689f7..1e9487d1d 100755
--- a/ext/mbstring/oniguruma/enc/utf32_le.c
+++ b/ext/mbstring/oniguruma/enc/utf32_le.c
@@ -2,7 +2,7 @@
utf32_le.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -41,6 +41,14 @@ utf32le_is_mbc_newline(const UChar* p, const UChar* end)
if (p + 3 < end) {
if (*p == 0x0a && *(p+1) == 0 && *(p+2) == 0 && *(p+3) == 0)
return 1;
+#ifdef USE_UNICODE_ALL_LINE_TERMINATORS
+ if ((*p == 0x0d || *p == 0x85) && *(p+1) == 0x00
+ && (p+2) == 0x00 && *(p+3) == 0x00)
+ return 1;
+ if (*(p+1) == 0x20 && (*p == 0x29 || *p == 0x28)
+ && *(p+2) == 0x00 && *(p+3) == 0x00)
+ return 1;
+#endif
}
return 0;
}
diff --git a/ext/mbstring/oniguruma/enc/utf8.c b/ext/mbstring/oniguruma/enc/utf8.c
index 592bebfe8..0e816176b 100644
--- a/ext/mbstring/oniguruma/enc/utf8.c
+++ b/ext/mbstring/oniguruma/enc/utf8.c
@@ -2,7 +2,7 @@
utf8.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -40,7 +40,7 @@
#define utf8_islead(c) ((UChar )((c) & 0xc0) != 0x80)
-static int EncLen_UTF8[] = {
+static const int EncLen_UTF8[] = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
@@ -65,6 +65,29 @@ utf8_mbc_enc_len(const UChar* p)
return EncLen_UTF8[*p];
}
+static int
+utf8_is_mbc_newline(const UChar* p, const UChar* end)
+{
+ if (p < end) {
+ if (*p == 0x0a) return 1;
+
+#ifdef USE_UNICODE_ALL_LINE_TERMINATORS
+ if (*p == 0x0d) return 1;
+ if (p + 1 < end) {
+ if (*(p+1) == 0x85 && *p == 0xc2) /* U+0085 */
+ return 1;
+ if (p + 2 < end) {
+ if ((*(p+2) == 0xa8 || *(p+2) == 0xa9)
+ && *(p+1) == 0x80 && *p == 0xe2) /* U+2028, U+2029 */
+ return 1;
+ }
+ }
+#endif
+ }
+
+ return 0;
+}
+
static OnigCodePoint
utf8_mbc_to_code(const UChar* p, const UChar* end)
{
@@ -307,16 +330,16 @@ utf8_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
}
-static OnigCodePoint EmptyRange[] = { 0 };
+static const OnigCodePoint EmptyRange[] = { 0 };
-static OnigCodePoint SBAlnum[] = {
+static const OnigCodePoint SBAlnum[] = {
3,
0x0030, 0x0039,
0x0041, 0x005a,
0x0061, 0x007a
};
-static OnigCodePoint MBAlnum[] = {
+static const OnigCodePoint MBAlnum[] = {
#ifdef USE_UNICODE_FULL_RANGE_CTYPE
411,
#else
@@ -738,13 +761,13 @@ static OnigCodePoint MBAlnum[] = {
#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
}; /* end of MBAlnum */
-static OnigCodePoint SBAlpha[] = {
+static const OnigCodePoint SBAlpha[] = {
2,
0x0041, 0x005a,
0x0061, 0x007a
};
-static OnigCodePoint MBAlpha[] = {
+static const OnigCodePoint MBAlpha[] = {
#ifdef USE_UNICODE_FULL_RANGE_CTYPE
394,
#else
@@ -1149,13 +1172,13 @@ static OnigCodePoint MBAlpha[] = {
#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
}; /* end of MBAlpha */
-static OnigCodePoint SBBlank[] = {
+static const OnigCodePoint SBBlank[] = {
2,
0x0009, 0x0009,
0x0020, 0x0020
};
-static OnigCodePoint MBBlank[] = {
+static const OnigCodePoint MBBlank[] = {
#ifdef USE_UNICODE_FULL_RANGE_CTYPE
7,
#else
@@ -1173,13 +1196,13 @@ static OnigCodePoint MBBlank[] = {
#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
}; /* end of MBBlank */
-static OnigCodePoint SBCntrl[] = {
+static const OnigCodePoint SBCntrl[] = {
2,
0x0000, 0x001f,
0x007f, 0x007f
};
-static OnigCodePoint MBCntrl[] = {
+static const OnigCodePoint MBCntrl[] = {
#ifdef USE_UNICODE_FULL_RANGE_CTYPE
18,
#else
@@ -1208,12 +1231,12 @@ static OnigCodePoint MBCntrl[] = {
#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
}; /* end of MBCntrl */
-static OnigCodePoint SBDigit[] = {
+static const OnigCodePoint SBDigit[] = {
1,
0x0030, 0x0039
};
-static OnigCodePoint MBDigit[] = {
+static const OnigCodePoint MBDigit[] = {
#ifdef USE_UNICODE_FULL_RANGE_CTYPE
22,
#else
@@ -1245,12 +1268,12 @@ static OnigCodePoint MBDigit[] = {
#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
}; /* end of MBDigit */
-static OnigCodePoint SBGraph[] = {
+static const OnigCodePoint SBGraph[] = {
1,
0x0021, 0x007e
};
-static OnigCodePoint MBGraph[] = {
+static const OnigCodePoint MBGraph[] = {
#ifdef USE_UNICODE_FULL_RANGE_CTYPE
404,
#else
@@ -1665,12 +1688,12 @@ static OnigCodePoint MBGraph[] = {
#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
}; /* end of MBGraph */
-static OnigCodePoint SBLower[] = {
+static const OnigCodePoint SBLower[] = {
1,
0x0061, 0x007a
};
-static OnigCodePoint MBLower[] = {
+static const OnigCodePoint MBLower[] = {
#ifdef USE_UNICODE_FULL_RANGE_CTYPE
423,
#else
@@ -2104,13 +2127,13 @@ static OnigCodePoint MBLower[] = {
#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
}; /* end of MBLower */
-static OnigCodePoint SBPrint[] = {
+static const OnigCodePoint SBPrint[] = {
2,
0x0009, 0x000d,
0x0020, 0x007e
};
-static OnigCodePoint MBPrint[] = {
+static const OnigCodePoint MBPrint[] = {
#ifdef USE_UNICODE_FULL_RANGE_CTYPE
403,
#else
@@ -2524,7 +2547,7 @@ static OnigCodePoint MBPrint[] = {
#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
}; /* end of MBPrint */
-static OnigCodePoint SBPunct[] = {
+static const OnigCodePoint SBPunct[] = {
9,
0x0021, 0x0023,
0x0025, 0x002a,
@@ -2537,7 +2560,7 @@ static OnigCodePoint SBPunct[] = {
0x007d, 0x007d
}; /* end of SBPunct */
-static OnigCodePoint MBPunct[] = {
+static const OnigCodePoint MBPunct[] = {
#ifdef USE_UNICODE_FULL_RANGE_CTYPE
77,
#else
@@ -2625,13 +2648,13 @@ static OnigCodePoint MBPunct[] = {
#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
}; /* end of MBPunct */
-static OnigCodePoint SBSpace[] = {
+static const OnigCodePoint SBSpace[] = {
2,
0x0009, 0x000d,
0x0020, 0x0020
};
-static OnigCodePoint MBSpace[] = {
+static const OnigCodePoint MBSpace[] = {
#ifdef USE_UNICODE_FULL_RANGE_CTYPE
9,
#else
@@ -2651,12 +2674,12 @@ static OnigCodePoint MBSpace[] = {
#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
}; /* end of MBSpace */
-static OnigCodePoint SBUpper[] = {
+static const OnigCodePoint SBUpper[] = {
1,
0x0041, 0x005a
};
-static OnigCodePoint MBUpper[] = {
+static const OnigCodePoint MBUpper[] = {
#ifdef USE_UNICODE_FULL_RANGE_CTYPE
420,
#else
@@ -3087,19 +3110,19 @@ static OnigCodePoint MBUpper[] = {
#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
}; /* end of MBUpper */
-static OnigCodePoint SBXDigit[] = {
+static const OnigCodePoint SBXDigit[] = {
3,
0x0030, 0x0039,
0x0041, 0x0046,
0x0061, 0x0066
};
-static OnigCodePoint SBASCII[] = {
+static const OnigCodePoint SBASCII[] = {
1,
0x0000, 0x007f
};
-static OnigCodePoint SBWord[] = {
+static const OnigCodePoint SBWord[] = {
4,
0x0030, 0x0039,
0x0041, 0x005a,
@@ -3107,7 +3130,7 @@ static OnigCodePoint SBWord[] = {
0x0061, 0x007a
};
-static OnigCodePoint MBWord[] = {
+static const OnigCodePoint MBWord[] = {
#ifdef USE_UNICODE_FULL_RANGE_CTYPE
432,
#else
@@ -3554,7 +3577,7 @@ static OnigCodePoint MBWord[] = {
static int
utf8_get_ctype_code_range(int ctype,
- OnigCodePoint* sbr[], OnigCodePoint* mbr[])
+ const OnigCodePoint* sbr[], const OnigCodePoint* mbr[])
{
#define CR_SET(sbl,mbl) do { \
*sbr = sbl; \
@@ -3622,7 +3645,7 @@ static int
utf8_is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
#ifdef USE_UNICODE_FULL_RANGE_CTYPE
- OnigCodePoint *range;
+ const OnigCodePoint *range;
#endif
if (code < 256) {
@@ -3674,6 +3697,9 @@ utf8_is_code_ctype(OnigCodePoint code, unsigned int ctype)
case ONIGENC_CTYPE_ALNUM:
range = MBAlnum;
break;
+ case ONIGENC_CTYPE_NEWLINE:
+ return FALSE;
+ break;
default:
return ONIGENCERR_TYPE_BUG;
@@ -3723,7 +3749,7 @@ OnigEncodingType OnigEncodingUTF8 = {
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
},
- onigenc_is_mbc_newline_0x0a,
+ utf8_is_mbc_newline,
utf8_mbc_to_code,
utf8_code_to_mbclen,
utf8_code_to_mbc,
diff --git a/ext/mbstring/oniguruma/index.html b/ext/mbstring/oniguruma/index.html
index 02e844c36..fbf4fc095 100755
--- a/ext/mbstring/oniguruma/index.html
+++ b/ext/mbstring/oniguruma/index.html
@@ -5,18 +5,10 @@
</head>
<body BGCOLOR="#ffffff" VLINK="#808040" TEXT="#696969">
-<!--
-<a href="http://miuras.net/matsushita.html">
-<img src="anti_matsushita.PNG" height="46" width="266">
-</a>
--->
-<a href="http://miuras.net/matsushita.html">M</a>
-<a href="http://www.micropac.co.jp/nec/">N</a>
-
<h1>Oniguruma</h1>
<p>
-2005/03/07 (C) K.Kosako
+2006/09/19 (C) K.Kosako
</p>
<p>
@@ -29,10 +21,13 @@ The characteristics of this library is that different character encoding
<dt><b>Supported character encodings:</b><br>
ASCII, UTF-8, UTF-16BE, UTF-16LE, UTF-32BE, UTF-32LE,<br>
EUC-JP, EUC-TW, EUC-KR, EUC-CN,<br>
-Shift_JIS, Big5, KOI8-R, KOI8,<br>
+Shift_JIS, Big5, GB 18030, KOI8-R, KOI8,<br>
ISO-8859-1, ISO-8859-2, ISO-8859-3, ISO-8859-4, ISO-8859-5,<br>
ISO-8859-6, ISO-8859-7, ISO-8859-8, ISO-8859-9, ISO-8859-10,<br>
-ISO-8859-11, ISO-8859-13, ISO-8859-14, ISO-8859-15, ISO-8859-16
+ISO-8859-11, ISO-8859-13, ISO-8859-14, ISO-8859-15, ISO-8859-16<br>
+<font color="red">
+(GB 18030 encoding was contributed by KUBO Takehiro)
+</font>
</p>
</dl>
<p>
@@ -42,8 +37,8 @@ ISO-8859-11, ISO-8859-13, ISO-8859-14, ISO-8859-15, ISO-8859-16
<dt><b>What's new</b>
</font>
<ul>
-<li>released Version 3.7.1 (2005/03/07)
-<li>released Version 2.4.2 (2005/03/05)
+<li>Version 4.4.4 released. (2006/09/19)
+<li>Version 2.5.7 released. (2006/07/28)
</ul>
</dl>
@@ -75,23 +70,24 @@ It follows the BSD license in the case of the one except for it.
<dt><b>Download:</b>
<ul>
-<li> <a href="archive/onigd20050307.tar.gz">Latest release version 3.7.1</a> (2005/03/07) <a href="HISTORY_3X.txt">Change Log</a>
-<li> <a href="archive/onigd20050219.tar.gz">3.7.0</a> (2005/02/19)
-<li> <a href="archive/onigd20050204.tar.gz">3.6.0</a> (2005/02/04)
-<li> <a href="archive/onigd2_4_2.tar.gz">Latest release version 2.4.2</a> (2005/03/05) <a href="HISTORY_2X.txt">Change Log</a>
-<li> <a href="archive/onigd2_4_1.tar.gz">2.4.1</a> (2005/01/05)
-<li> <a href="archive/onigd2_4_0.tar.gz">2.4.0</a> (2004/12/01)
+<li> <a href="archive/onig-4.4.4.tar.gz">Latest release version 4.4.4</a> (2006/09/19) <a href="HISTORY_4X.txt">Change Log</a>
+<li> <a href="archive/onig-4.4.3.tar.gz">4.4.3</a> (2006/09/15)
+<li> <a href="archive/onig-4.4.2.tar.gz">4.4.2</a> (2006/09/08)
+<li> <a href="archive/onig-4.4.1.tar.gz">4.4.1</a> (2006/08/29)
+<li> <a href="archive/onigd2_5_7.tar.gz">Latest release version 2.5.7</a> (2006/07/28) <a href="HISTORY_2X.txt">Change Log</a>
+<li> <a href="archive/onigd2_5_6.tar.gz">2.5.6</a> (2006/05/29)
+<li> <a href="archive/onigd2_5_5.tar.gz">2.5.5</a> (2006/05/08)
</ul>
<br>
<font color="red">
-* 3.X.X supports UTF-16/UTF-32, Ruby 1.9.X.<br>
-* 2.X.X does not support UTF-16/UTF-32, supports Ruby 1.6/1.8.
+* 4.X.X supports UTF-16/UTF-32, Ruby 1.9.X.<br>
+* 2.X.X does not support UTF-16/UTF-32, supports Ruby 1.6/1.8.[2-4]
</font>
<br>
<br>
-<dt><b>Documents:</b> (version 3.7.1)
+<dt><b>Documents:</b> (version 4.4.4)
<ul>
<li> <a href="doc/RE.txt">Regular Expressions</a>
<a href="doc/RE.ja.txt">(Japanese: EUC-JP)</a>
@@ -112,23 +108,39 @@ It follows the BSD license in the case of the one except for it.
<li> <a href="http://www.ruby-lang.org/cgi-bin/cvsweb.cgi/oniguruma/">Oniguruma in Ruby CVS</a> (old version)
<li> <a href="http://raa.ruby-lang.org/project/oniguruma/">Oniguruma in RAA</a> (Ruby Application Archive)
<li> <a href="http://www.freebsd.org/cgi/cvsweb.cgi/ports/devel/oniguruma/">FreeBSD ports</a>
-<li> <a href="http://www.halbiz.com/osaru/cnregex.html">cnRegex 4D Plugin</a> (Japanese page)
+<li> <a href="http://www.softantenna.com/lib/1953/index.html">SoftAntenna &gt; Lib &gt; Oniguruma</a> (Japanese page)
+<li> <a href="http://homepage3.nifty.com/k-takata/mysoft/bregonig.html">bregonig.dll</a> (Japanese page)
+<li> <a href="http://www.halbiz.com/osaru/cnregex.html">cnRegex 4D Plugin (Mac OS X)</a> (Japanese page)
+<li> <a href="http://kmaebashi.com/">new script language crowbar</a> (Japanese page)
+<li> <a href="http://homepage2.nifty.com/Km/onig.htm">Delphi interface (Win32)</a> (Japanese page)
+<li> <a href="http://pyxis-project.net/ensemble/">Ensemble (Mac OS X)</a> (Japanese page)
<li> <a href="http://www.tom.sfc.keio.ac.jp/~sakai/d/?date=20050209">GHC patch</a> Masahiro Sakai (Japanese Blog)
-<li> <a href="http://www.gyazsquare.com/gyazmail/index.php">GyazMail</a>
-<li> <a href="http://www.artman21.net/">Jedit X</a>
+<li> <a href="http://www.gyazsquare.com/gyazmail/index.php">GyazMail (Mac OS X)</a>
+<li> <a href="http://www.artman21.net/">Jedit X (Mac OS X)</a>
<li> <a href="http://www.chitora.jp/lhaz.html">Lhaz</a> (Japanese page)
+<li> <a href="http://limechat.net/">LimeChat</a> (Japanese page)
<li> <a href="http://www.irori.org/tool/mregexp.html">mregexp</a> (Japanese page)
-<li> <a href="http://www.trinity-site.net/wiki/index.php?MultiFind">MultiFind</a> (Japanese page)
<li> <a href="http://ochusha.sourceforge.jp/">Ochusha</a> (Japanese page)
-<li> <a href="http://www-gauge.scphys.kyoto-u.ac.jp/~sonobe/OgreKit/index.html">OgreKit</a> Regular Expression Framework for Cocoa (Japanese page)
-<li> <a href ="http://www.kanetaka.net/4dapi/wiki4d.dll/4dcgi/wiki.cgi?plugins-oniguruma">OnigRegexp</a> (Japanese page)
-<li> <a href ="http://www.moriq.com/onig/">Oniguruma / FireBird (Win32)</a>
-<li> <a href ="http://openspace.timedia.co.jp/~yasuyuki/wiliki/wiliki.cgi?Oniguruma-mysqld&l=jp">Oniguruma-mysqld</a>
-<li> <a href ="http://www.kt.rim.or.jp/~kbk/sed/index.html">Onigsed (Win32)</a> (Japanese page)
+<li> <a href="http://www8.ocn.ne.jp/%7esonoisa/OgreKit/index.html">OgreKit (Mac OS X)</a> Regular Expression Framework for Cocoa (Japanese page)
+<li> <a href="http://www.kanetaka.net/4dapi/wiki4d.dll/4dcgi/wiki.cgi?plugins-oniguruma">OnigRegexp</a> (Japanese page)
+<li> <a href="http://www.moriq.com/onig/">Oniguruma / FireBird (Win32)</a>
+<li> <a href="http://openspace.timedia.co.jp/~yasuyuki/wiliki/wiliki.cgi?Oniguruma-mysqld&l=jp">Oniguruma-mysqld</a>
+<li> <a href="http://www.void.in/wiki/OnigPP">OnigPP</a> (Japanese page)
+<li> <a href="http://www.kt.rim.or.jp/~kbk/sed/index.html">Onigsed (Win32)</a> (Japanese page)
+<li> <a href="http://www.kt.rim.or.jp/~kbk/yagrep/index.html">yagrep (Win32)</a> (Japanese page)
<li> <a href="http://www.php.gr.jp/">Japan PHP User Group</a> PHP 5.0 mb_ereg (Japanese page)
+<li> <a href="http://yatsu.info/wiki/Pufui/">Pufui (Mac OS X)</a> (Japanese page)
+<li> <a href="http://harumune.s56.xrea.com/assari/index.php?RSSTyping">RSSTyping</a> (Japanese page)
<li> <a href="http://www.ruby-lang.org/">Ruby</a>
-<li> <a href="http://quux.s74.xrea.com/">SevenFour</a> (Japanese page)
-<li> <a href="http://www8.ocn.ne.jp/~sonoisa/TiddlyWikiPod/">TiddlyWikiPod</a>
+<li> <a href="http://tobysoft.net/wiki/index.php?Ruby%2Fruby-win32-oniguruma">ruby-win32-oniguruma</a> (Japanese page)
+<li> <a href="http://quux.s74.xrea.com/">SevenFour (Mac OS X)</a> (Japanese page)
+<li> <a href="http://storklab.cyber-ninja.jp/">Stork Lab. Products (Mac OS X)</a> (Japanese page)
+<li> <a href="http://sourceforge.jp/projects/ttssh2/">TeraTerm</a>
+<li> <a href="http://macromates.com/">TextMate (Mac OS X)</a>
+<li> <a href="http://www8.ocn.ne.jp/~sonoisa/TiddlyWikiPod/">TiddlyWikiPod (Mac OS X)</a>
+<li> <a href="http://www.cyanworks.net/mac.html">TunesTEXT (Mac OS X)</a>
+<li> <a href="http://sourceforge.jp/projects/frogger/">XML parser</a>
+<li> <a href="http://www.yokkasoft.net/">YokkaSoft</a>
</ul>
<br>
@@ -138,41 +150,41 @@ It follows the BSD license in the case of the one except for it.
<li> <a href="http://www.perldoc.com/perl5.8.0/pod/perlre.html">Perl regular expressions</a>
<li> <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/util/regex/Pattern.html">java.util.regex.Pattern (J2SE 1.4.2)</a>
<li> <a href="http://www.opengroup.org/onlinepubs/007908799/xbd/re.html">The Open Group</a>
-<li> <a href="http://www.pcre.org/">PCRE</a>
-<!--
-<li> <a href="http://www.jajakarta.org/regexp/">Jakarta Project Regexp</a> (Japanese page)
-<li> <a href="http://www.jajakarta.org/oro/">Jakarta Project ORO</a> (Japanese page)
--->
-<li> <a href="http://www.kt.rim.or.jp/~kbk/regex/regex.html">Regular expressions memo</a> (Japanese page)
-<li> <a href="http://www.din.or.jp/~ohzaki/regex.htm">Regular expressions technique</a> (Japanese page)
<li> <a href="http://regex.info/">Mastering Regular Expressions</a>
+<li> <a href="http://www.unicode.org/">Unicode Home Page</a>
</ul>
<br>
-<!--
-<dt><b>ToDo:</b>
+<dt><b>Resources:</b>
<ul>
-<li> support character types for all code point range.
+<li> <a href="http://www.kt.rim.or.jp/~kbk/regex/regex.html">Regular expressions memo</a> (Japanese page)
+<li> <a href="http://www.din.or.jp/~ohzaki/regex.htm">Regular expressions technique</a> (Japanese page)
+<li> <a href="http://staff.aist.go.jp/tanaka-akira/textprocess/">"Text Processing" Lecture documents (Tanaka Akira)</a> (Japanese page)
</ul>
--->
+
+<br>
</dl>
<p>
and I'm thankful to Akinori MUSHA.
</p>
-<!--
<hr>
-<font color="red">
-2004-06-14<br>
-To: "Greg A. Woods"<br>
-I can't send mail to you. (rejected)<br>
-Please set the nmatch argument of regexec() to 1,
-and use Oniguruma 3.7.1 or 2.4.2.<br>
-The nmatch argument should be array size of a pmatch.<br>
-But I don't know whether this problem is related to the crash
-that you reported.
-</font>
--->
+<dl>
+<dt><b>Other Libraries:</b>
+<ul>
+<li> <a href="http://www.boost.org/libs/regex/doc/">Boost.Regex</a>
+<li> <a href="http://www.pcre.org/">PCRE</a>
+<li> <a href="http://arglist.com/regex/">A copy of Henry Spencer's</a>
+<li> <a href="http://re2c.org/">re2c</a>
+<li> <a href="http://tiny-rex.sourceforge.net/">T-Rex</a>
+<li> <a href="http://laurikari.net/tre/">TRE</a>
+<li> <a href="http://www.cacas.org/java/gnu/regexp/">gnu.regexp for Java</a>
+<li> <a href="http://jakarta.apache.org/regexp/index.html">Jakarta Project Regexp</a>
+<li> <a href="http://jakarta.apache.org/oro/">Jakarta Project ORO</a>
+</ul>
+</dl>
+
<hr>
+<a href="../">Back to Home</a>
</body>
</html>
diff --git a/ext/mbstring/oniguruma/onigcmpt200.h b/ext/mbstring/oniguruma/onigcmpt200.h
index 4c029304b..d9b141914 100644
--- a/ext/mbstring/oniguruma/onigcmpt200.h
+++ b/ext/mbstring/oniguruma/onigcmpt200.h
@@ -29,6 +29,12 @@
#define REGCODE_EUCJP REG_ENCODING_EUC_JP
#define REGCODE_SJIS REG_ENCODING_SJIS
+/* Don't use REGCODE_XXXX. (obsoleted) */
+#define MBCTYPE_ASCII RE_MBCTYPE_ASCII
+#define MBCTYPE_EUC RE_MBCTYPE_EUC
+#define MBCTYPE_SJIS RE_MBCTYPE_SJIS
+#define MBCTYPE_UTF8 RE_MBCTYPE_UTF8
+
typedef unsigned char* RegTransTableType;
#define RegOptionType OnigOptionType
#define RegDistance OnigDistance
diff --git a/ext/mbstring/oniguruma/oniggnu.h b/ext/mbstring/oniguruma/oniggnu.h
index b203f6c8a..3da9f235c 100644
--- a/ext/mbstring/oniguruma/oniggnu.h
+++ b/ext/mbstring/oniguruma/oniggnu.h
@@ -35,10 +35,10 @@
extern "C" {
#endif
-#define MBCTYPE_ASCII 0
-#define MBCTYPE_EUC 1
-#define MBCTYPE_SJIS 2
-#define MBCTYPE_UTF8 3
+#define RE_MBCTYPE_ASCII 0
+#define RE_MBCTYPE_EUC 1
+#define RE_MBCTYPE_SJIS 2
+#define RE_MBCTYPE_UTF8 3
/* GNU regex options */
#ifndef RE_NREGS
diff --git a/ext/mbstring/oniguruma/oniguruma.h b/ext/mbstring/oniguruma/oniguruma.h
index 279035610..a0107cbe3 100644
--- a/ext/mbstring/oniguruma/oniguruma.h
+++ b/ext/mbstring/oniguruma/oniguruma.h
@@ -4,7 +4,7 @@
oniguruma.h - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -36,9 +36,9 @@ extern "C" {
#endif
#define ONIGURUMA
-#define ONIGURUMA_VERSION_MAJOR 3
-#define ONIGURUMA_VERSION_MINOR 7
-#define ONIGURUMA_VERSION_TEENY 1
+#define ONIGURUMA_VERSION_MAJOR 4
+#define ONIGURUMA_VERSION_MINOR 4
+#define ONIGURUMA_VERSION_TEENY 4
#ifdef __cplusplus
# ifndef HAVE_PROTOTYPES
@@ -49,6 +49,13 @@ extern "C" {
# endif
#endif
+/* escape Mac OS X/Xcode 2.4/gcc 4.0.1 problem */
+#if defined(__APPLE__) && defined(__GNUC__) && __GNUC__ >= 4
+# ifndef HAVE_STDARG_PROTOTYPES
+# define HAVE_STDARG_PROTOTYPES 1
+# endif
+#endif
+
#ifndef P_
#if defined(__STDC__) || defined(_WIN32)
# define P_(args) args
@@ -167,10 +174,10 @@ typedef struct {
int (*code_to_mbc)(OnigCodePoint code, OnigUChar *buf);
int (*mbc_to_normalize)(OnigAmbigType flag, const OnigUChar** pp, const OnigUChar* end, OnigUChar* to);
int (*is_mbc_ambiguous)(OnigAmbigType flag, const OnigUChar** pp, const OnigUChar* end);
- int (*get_all_pair_ambig_codes)(OnigAmbigType flag, OnigPairAmbigCodes** acs);
- int (*get_all_comp_ambig_codes)(OnigAmbigType flag, OnigCompAmbigCodes** acs);
+ int (*get_all_pair_ambig_codes)(OnigAmbigType flag, const OnigPairAmbigCodes** acs);
+ int (*get_all_comp_ambig_codes)(OnigAmbigType flag, const OnigCompAmbigCodes** acs);
int (*is_code_ctype)(OnigCodePoint code, unsigned int ctype);
- int (*get_ctype_code_range)(int ctype, OnigCodePoint* sb_range[], OnigCodePoint* mb_range[]);
+ int (*get_ctype_code_range)(int ctype, const OnigCodePoint* sb_range[], const OnigCodePoint* mb_range[]);
OnigUChar* (*left_adjust_char_head)(const OnigUChar* start, const OnigUChar* p);
int (*is_allowed_reverse_match)(const OnigUChar* p, const OnigUChar* end);
} OnigEncodingType;
@@ -206,6 +213,7 @@ ONIG_EXTERN OnigEncodingType OnigEncodingSJIS;
ONIG_EXTERN OnigEncodingType OnigEncodingKOI8;
ONIG_EXTERN OnigEncodingType OnigEncodingKOI8_R;
ONIG_EXTERN OnigEncodingType OnigEncodingBIG5;
+ONIG_EXTERN OnigEncodingType OnigEncodingGB18030;
#define ONIG_ENCODING_ASCII (&OnigEncodingASCII)
#define ONIG_ENCODING_ISO_8859_1 (&OnigEncodingISO_8859_1)
@@ -236,6 +244,7 @@ ONIG_EXTERN OnigEncodingType OnigEncodingBIG5;
#define ONIG_ENCODING_KOI8 (&OnigEncodingKOI8)
#define ONIG_ENCODING_KOI8_R (&OnigEncodingKOI8_R)
#define ONIG_ENCODING_BIG5 (&OnigEncodingBIG5)
+#define ONIG_ENCODING_GB18030 (&OnigEncodingGB18030)
#endif /* else RUBY && M17N */
@@ -448,7 +457,7 @@ int onigenc_str_bytelen_null P_((OnigEncoding enc, const OnigUChar* p));
#define ONIG_NREGION 10
#define ONIG_MAX_BACKREF_NUM 1000
#define ONIG_MAX_REPEAT_NUM 100000
-#define ONIG_MAX_MULTI_BYTE_RANGES_NUM 1000
+#define ONIG_MAX_MULTI_BYTE_RANGES_NUM 10000
/* constants */
#define ONIG_MAX_ERROR_MESSAGE_LEN 90
@@ -457,8 +466,8 @@ typedef unsigned int OnigOptionType;
#define ONIG_OPTION_DEFAULT ONIG_OPTION_NONE
/* options */
-#define ONIG_OPTION_NONE 0
-#define ONIG_OPTION_IGNORECASE 1L
+#define ONIG_OPTION_NONE 0U
+#define ONIG_OPTION_IGNORECASE 1U
#define ONIG_OPTION_EXTEND (ONIG_OPTION_IGNORECASE << 1)
#define ONIG_OPTION_MULTILINE (ONIG_OPTION_EXTEND << 1)
#define ONIG_OPTION_SINGLELINE (ONIG_OPTION_MULTILINE << 1)
@@ -471,6 +480,7 @@ typedef unsigned int OnigOptionType;
#define ONIG_OPTION_NOTBOL (ONIG_OPTION_CAPTURE_GROUP << 1)
#define ONIG_OPTION_NOTEOL (ONIG_OPTION_NOTBOL << 1)
#define ONIG_OPTION_POSIX_REGION (ONIG_OPTION_NOTEOL << 1)
+#define ONIG_OPTION_MAXBIT ONIG_OPTION_POSIX_REGION /* limit */
#define ONIG_OPTION_ON(options,regopt) ((options) |= (regopt))
#define ONIG_OPTION_OFF(options,regopt) ((options) &= ~(regopt))
@@ -484,6 +494,7 @@ typedef struct {
OnigOptionType options; /* default option */
} OnigSyntaxType;
+ONIG_EXTERN OnigSyntaxType OnigSyntaxASIS;
ONIG_EXTERN OnigSyntaxType OnigSyntaxPosixBasic;
ONIG_EXTERN OnigSyntaxType OnigSyntaxPosixExtended;
ONIG_EXTERN OnigSyntaxType OnigSyntaxEmacs;
@@ -491,9 +502,11 @@ ONIG_EXTERN OnigSyntaxType OnigSyntaxGrep;
ONIG_EXTERN OnigSyntaxType OnigSyntaxGnuRegex;
ONIG_EXTERN OnigSyntaxType OnigSyntaxJava;
ONIG_EXTERN OnigSyntaxType OnigSyntaxPerl;
+ONIG_EXTERN OnigSyntaxType OnigSyntaxPerl_NG;
ONIG_EXTERN OnigSyntaxType OnigSyntaxRuby;
/* predefined syntaxes (see regsyntax.c) */
+#define ONIG_SYNTAX_ASIS (&OnigSyntaxASIS)
#define ONIG_SYNTAX_POSIX_BASIC (&OnigSyntaxPosixBasic)
#define ONIG_SYNTAX_POSIX_EXTENDED (&OnigSyntaxPosixExtended)
#define ONIG_SYNTAX_EMACS (&OnigSyntaxEmacs)
@@ -501,6 +514,7 @@ ONIG_EXTERN OnigSyntaxType OnigSyntaxRuby;
#define ONIG_SYNTAX_GNU_REGEX (&OnigSyntaxGnuRegex)
#define ONIG_SYNTAX_JAVA (&OnigSyntaxJava)
#define ONIG_SYNTAX_PERL (&OnigSyntaxPerl)
+#define ONIG_SYNTAX_PERL_NG (&OnigSyntaxPerl_NG)
#define ONIG_SYNTAX_RUBY (&OnigSyntaxRuby)
/* default syntax */
@@ -508,80 +522,81 @@ ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax;
#define ONIG_SYNTAX_DEFAULT OnigDefaultSyntax
/* syntax (operators) */
-#define ONIG_SYN_OP_VARIABLE_META_CHARACTERS (1<<0)
-#define ONIG_SYN_OP_DOT_ANYCHAR (1<<1) /* . */
-#define ONIG_SYN_OP_ASTERISK_ZERO_INF (1<<2) /* * */
-#define ONIG_SYN_OP_ESC_ASTERISK_ZERO_INF (1<<3)
-#define ONIG_SYN_OP_PLUS_ONE_INF (1<<4) /* + */
-#define ONIG_SYN_OP_ESC_PLUS_ONE_INF (1<<5)
-#define ONIG_SYN_OP_QMARK_ZERO_ONE (1<<6) /* ? */
-#define ONIG_SYN_OP_ESC_QMARK_ZERO_ONE (1<<7)
-#define ONIG_SYN_OP_BRACE_INTERVAL (1<<8) /* {lower,upper} */
-#define ONIG_SYN_OP_ESC_BRACE_INTERVAL (1<<9) /* \{lower,upper\} */
-#define ONIG_SYN_OP_VBAR_ALT (1<<10) /* | */
-#define ONIG_SYN_OP_ESC_VBAR_ALT (1<<11) /* \| */
-#define ONIG_SYN_OP_LPAREN_SUBEXP (1<<12) /* (...) */
-#define ONIG_SYN_OP_ESC_LPAREN_SUBEXP (1<<13) /* \(...\) */
-#define ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR (1<<14) /* \A, \Z, \z */
-#define ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR (1<<15) /* \G */
-#define ONIG_SYN_OP_DECIMAL_BACKREF (1<<16) /* \num */
-#define ONIG_SYN_OP_BRACKET_CC (1<<17) /* [...] */
-#define ONIG_SYN_OP_ESC_W_WORD (1<<18) /* \w, \W */
-#define ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END (1<<19) /* \<. \> */
-#define ONIG_SYN_OP_ESC_B_WORD_BOUND (1<<20) /* \b, \B */
-#define ONIG_SYN_OP_ESC_S_WHITE_SPACE (1<<21) /* \s, \S */
-#define ONIG_SYN_OP_ESC_D_DIGIT (1<<22) /* \d, \D */
-#define ONIG_SYN_OP_LINE_ANCHOR (1<<23) /* ^, $ */
-#define ONIG_SYN_OP_POSIX_BRACKET (1<<24) /* [:xxxx:] */
-#define ONIG_SYN_OP_QMARK_NON_GREEDY (1<<25) /* ??,*?,+?,{n,m}? */
-#define ONIG_SYN_OP_ESC_CONTROL_CHARS (1<<26) /* \n,\r,\t,\a ... */
-#define ONIG_SYN_OP_ESC_C_CONTROL (1<<27) /* \cx */
-#define ONIG_SYN_OP_ESC_OCTAL3 (1<<28) /* \OOO */
-#define ONIG_SYN_OP_ESC_X_HEX2 (1<<29) /* \xHH */
-#define ONIG_SYN_OP_ESC_X_BRACE_HEX8 (1<<30) /* \x{7HHHHHHH} */
-
-#define ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE (1<<0) /* \Q...\E */
-#define ONIG_SYN_OP2_QMARK_GROUP_EFFECT (1<<1) /* (?...) */
-#define ONIG_SYN_OP2_OPTION_PERL (1<<2) /* (?imsx),(?-imsx) */
-#define ONIG_SYN_OP2_OPTION_RUBY (1<<3) /* (?imx), (?-imx) */
-#define ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT (1<<4) /* ?+,*+,++ */
-#define ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL (1<<5) /* {n,m}+ */
-#define ONIG_SYN_OP2_CCLASS_SET_OP (1<<6) /* [...&&..[..]..] */
-#define ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP (1<<7) /* (?<name>...) */
-#define ONIG_SYN_OP2_ESC_K_NAMED_BACKREF (1<<8) /* \k<name> */
-#define ONIG_SYN_OP2_ESC_G_SUBEXP_CALL (1<<9) /* \g<name>, \g<n> */
-#define ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY (1<<10) /* (?@..),(?@<x>..) */
-#define ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL (1<<11) /* \C-x */
-#define ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META (1<<12) /* \M-x */
-#define ONIG_SYN_OP2_ESC_V_VTAB (1<<13) /* \v as VTAB */
-#define ONIG_SYN_OP2_ESC_U_HEX4 (1<<14) /* \uHHHH */
-#define ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR (1<<15) /* \`, \' */
-#define ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY (1<<16) /* \p{...}, \P{...} */
-#define ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT (1<<17) /* \p{^..}, \P{^..} */
-#define ONIG_SYN_OP2_CHAR_PROPERTY_PREFIX_IS (1<<18) /* \p{IsXDigit} */
-#define ONIG_SYN_OP2_ESC_H_XDIGIT (1<<19) /* \h, \H */
+#define ONIG_SYN_OP_VARIABLE_META_CHARACTERS (1U<<0)
+#define ONIG_SYN_OP_DOT_ANYCHAR (1U<<1) /* . */
+#define ONIG_SYN_OP_ASTERISK_ZERO_INF (1U<<2) /* * */
+#define ONIG_SYN_OP_ESC_ASTERISK_ZERO_INF (1U<<3)
+#define ONIG_SYN_OP_PLUS_ONE_INF (1U<<4) /* + */
+#define ONIG_SYN_OP_ESC_PLUS_ONE_INF (1U<<5)
+#define ONIG_SYN_OP_QMARK_ZERO_ONE (1U<<6) /* ? */
+#define ONIG_SYN_OP_ESC_QMARK_ZERO_ONE (1U<<7)
+#define ONIG_SYN_OP_BRACE_INTERVAL (1U<<8) /* {lower,upper} */
+#define ONIG_SYN_OP_ESC_BRACE_INTERVAL (1U<<9) /* \{lower,upper\} */
+#define ONIG_SYN_OP_VBAR_ALT (1U<<10) /* | */
+#define ONIG_SYN_OP_ESC_VBAR_ALT (1U<<11) /* \| */
+#define ONIG_SYN_OP_LPAREN_SUBEXP (1U<<12) /* (...) */
+#define ONIG_SYN_OP_ESC_LPAREN_SUBEXP (1U<<13) /* \(...\) */
+#define ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR (1U<<14) /* \A, \Z, \z */
+#define ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR (1U<<15) /* \G */
+#define ONIG_SYN_OP_DECIMAL_BACKREF (1U<<16) /* \num */
+#define ONIG_SYN_OP_BRACKET_CC (1U<<17) /* [...] */
+#define ONIG_SYN_OP_ESC_W_WORD (1U<<18) /* \w, \W */
+#define ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END (1U<<19) /* \<. \> */
+#define ONIG_SYN_OP_ESC_B_WORD_BOUND (1U<<20) /* \b, \B */
+#define ONIG_SYN_OP_ESC_S_WHITE_SPACE (1U<<21) /* \s, \S */
+#define ONIG_SYN_OP_ESC_D_DIGIT (1U<<22) /* \d, \D */
+#define ONIG_SYN_OP_LINE_ANCHOR (1U<<23) /* ^, $ */
+#define ONIG_SYN_OP_POSIX_BRACKET (1U<<24) /* [:xxxx:] */
+#define ONIG_SYN_OP_QMARK_NON_GREEDY (1U<<25) /* ??,*?,+?,{n,m}? */
+#define ONIG_SYN_OP_ESC_CONTROL_CHARS (1U<<26) /* \n,\r,\t,\a ... */
+#define ONIG_SYN_OP_ESC_C_CONTROL (1U<<27) /* \cx */
+#define ONIG_SYN_OP_ESC_OCTAL3 (1U<<28) /* \OOO */
+#define ONIG_SYN_OP_ESC_X_HEX2 (1U<<29) /* \xHH */
+#define ONIG_SYN_OP_ESC_X_BRACE_HEX8 (1U<<30) /* \x{7HHHHHHH} */
+
+#define ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE (1U<<0) /* \Q...\E */
+#define ONIG_SYN_OP2_QMARK_GROUP_EFFECT (1U<<1) /* (?...) */
+#define ONIG_SYN_OP2_OPTION_PERL (1U<<2) /* (?imsx),(?-imsx) */
+#define ONIG_SYN_OP2_OPTION_RUBY (1U<<3) /* (?imx), (?-imx) */
+#define ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT (1U<<4) /* ?+,*+,++ */
+#define ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL (1U<<5) /* {n,m}+ */
+#define ONIG_SYN_OP2_CCLASS_SET_OP (1U<<6) /* [...&&..[..]..] */
+#define ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP (1U<<7) /* (?<name>...) */
+#define ONIG_SYN_OP2_ESC_K_NAMED_BACKREF (1U<<8) /* \k<name> */
+#define ONIG_SYN_OP2_ESC_G_SUBEXP_CALL (1U<<9) /* \g<name>, \g<n> */
+#define ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY (1U<<10) /* (?@..),(?@<x>..) */
+#define ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL (1U<<11) /* \C-x */
+#define ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META (1U<<12) /* \M-x */
+#define ONIG_SYN_OP2_ESC_V_VTAB (1U<<13) /* \v as VTAB */
+#define ONIG_SYN_OP2_ESC_U_HEX4 (1U<<14) /* \uHHHH */
+#define ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR (1U<<15) /* \`, \' */
+#define ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY (1U<<16) /* \p{...}, \P{...} */
+#define ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT (1U<<17) /* \p{^..}, \P{^..} */
+#define ONIG_SYN_OP2_CHAR_PROPERTY_PREFIX_IS (1U<<18) /* \p{IsXDigit} */
+#define ONIG_SYN_OP2_ESC_H_XDIGIT (1U<<19) /* \h, \H */
+#define ONIG_SYN_OP2_INEFFECTIVE_ESCAPE (1U<<20) /* \ */
/* syntax (behavior) */
-#define ONIG_SYN_CONTEXT_INDEP_ANCHORS (1<<31) /* not implemented */
-#define ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS (1<<0) /* ?, *, +, {n,m} */
-#define ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS (1<<1) /* error or ignore */
-#define ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP (1<<2) /* ...)... */
-#define ONIG_SYN_ALLOW_INVALID_INTERVAL (1<<3) /* {??? */
-#define ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV (1<<4) /* {,n} => {0,n} */
-#define ONIG_SYN_STRICT_CHECK_BACKREF (1<<5) /* /(\1)/,/\1()/ ..*/
-#define ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND (1<<6) /* (?<=a|bc) */
-#define ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP (1<<7) /* see doc/RE */
-#define ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME (1<<8) /* (?<x>)(?<x>) */
-#define ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY (1<<9) /* a{n}?=(?:a{n})? */
+#define ONIG_SYN_CONTEXT_INDEP_ANCHORS (1U<<31) /* not implemented */
+#define ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS (1U<<0) /* ?, *, +, {n,m} */
+#define ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS (1U<<1) /* error or ignore */
+#define ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP (1U<<2) /* ...)... */
+#define ONIG_SYN_ALLOW_INVALID_INTERVAL (1U<<3) /* {??? */
+#define ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV (1U<<4) /* {,n} => {0,n} */
+#define ONIG_SYN_STRICT_CHECK_BACKREF (1U<<5) /* /(\1)/,/\1()/ ..*/
+#define ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND (1U<<6) /* (?<=a|bc) */
+#define ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP (1U<<7) /* see doc/RE */
+#define ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME (1U<<8) /* (?<x>)(?<x>) */
+#define ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY (1U<<9) /* a{n}?=(?:a{n})? */
/* syntax (behavior) in char class [...] */
-#define ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC (1<<20) /* [^...] */
-#define ONIG_SYN_BACKSLASH_ESCAPE_IN_CC (1<<21) /* [..\w..] etc.. */
-#define ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC (1<<22)
-#define ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC (1<<23) /* [0-9-a]=[0-9\-a] */
+#define ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC (1U<<20) /* [^...] */
+#define ONIG_SYN_BACKSLASH_ESCAPE_IN_CC (1U<<21) /* [..\w..] etc.. */
+#define ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC (1U<<22)
+#define ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC (1U<<23) /* [0-9-a]=[0-9\-a] */
/* syntax (behavior) warning */
-#define ONIG_SYN_WARN_CC_OP_NOT_ESCAPED (1<<24) /* [,-,] */
-#define ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT (1<<25) /* (?:a*)+ */
+#define ONIG_SYN_WARN_CC_OP_NOT_ESCAPED (1U<<24) /* [,-,] */
+#define ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT (1U<<25) /* (?:a*)+ */
/* meta character specifiers (onig_set_meta_char()) */
#define ONIG_META_CHAR_ESCAPE 0
@@ -660,6 +675,7 @@ ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax;
#define ONIGERR_INVALID_WIDE_CHAR_VALUE -400
#define ONIGERR_TOO_BIG_WIDE_CHAR_VALUE -401
#define ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION -402
+#define ONIGERR_INVALID_COMBINATION_OF_OPTIONS -403
/* errors related to thread */
#define ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT -1001
@@ -735,6 +751,7 @@ typedef struct re_pattern_buffer {
int num_mem; /* used memory(...) num counted from 1 */
int num_repeat; /* OP_REPEAT/OP_REPEAT_NG id-counter */
int num_null_check; /* OP_NULL_CHECK_START/END id counter */
+ int num_comb_exp_check; /* combination explosion check */
int num_call; /* number of subexp call */
unsigned int capture_history; /* (?@...) flag (1-31) */
unsigned int bt_mem_start; /* need backtrack flag */
@@ -766,7 +783,13 @@ typedef struct re_pattern_buffer {
/* regex_t link chain */
struct re_pattern_buffer* chain; /* escape compile-conflict */
-} regex_t;
+} OnigRegexType;
+
+typedef OnigRegexType* OnigRegex;
+
+#ifndef ONIG_ESCAPE_REGEX_T_COLLISION
+ typedef OnigRegexType regex_t;
+#endif
typedef struct {
@@ -788,19 +811,19 @@ void onig_set_warn_func P_((OnigWarnFunc f));
ONIG_EXTERN
void onig_set_verb_warn_func P_((OnigWarnFunc f));
ONIG_EXTERN
-int onig_new P_((regex_t**, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo));
+int onig_new P_((OnigRegex*, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo));
ONIG_EXTERN
-int onig_new_deluxe P_((regex_t** reg, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigCompileInfo* ci, OnigErrorInfo* einfo));
+int onig_new_deluxe P_((OnigRegex* reg, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigCompileInfo* ci, OnigErrorInfo* einfo));
ONIG_EXTERN
-void onig_free P_((regex_t*));
+void onig_free P_((OnigRegex));
ONIG_EXTERN
-int onig_recompile P_((regex_t*, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo));
+int onig_recompile P_((OnigRegex, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo));
ONIG_EXTERN
-int onig_recompile_deluxe P_((regex_t* reg, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigCompileInfo* ci, OnigErrorInfo* einfo));
+int onig_recompile_deluxe P_((OnigRegex reg, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigCompileInfo* ci, OnigErrorInfo* einfo));
ONIG_EXTERN
-int onig_search P_((regex_t*, const OnigUChar* str, const OnigUChar* end, const OnigUChar* start, const OnigUChar* range, OnigRegion* region, OnigOptionType option));
+int onig_search P_((OnigRegex, const OnigUChar* str, const OnigUChar* end, const OnigUChar* start, const OnigUChar* range, OnigRegion* region, OnigOptionType option));
ONIG_EXTERN
-int onig_match P_((regex_t*, const OnigUChar* str, const OnigUChar* end, const OnigUChar* at, OnigRegion* region, OnigOptionType option));
+int onig_match P_((OnigRegex, const OnigUChar* str, const OnigUChar* end, const OnigUChar* at, OnigRegion* region, OnigOptionType option));
ONIG_EXTERN
OnigRegion* onig_region_new P_((void));
ONIG_EXTERN
@@ -816,29 +839,31 @@ int onig_region_resize P_((OnigRegion* region, int n));
ONIG_EXTERN
int onig_region_set P_((OnigRegion* region, int at, int beg, int end));
ONIG_EXTERN
-int onig_name_to_group_numbers P_((regex_t* reg, const OnigUChar* name, const OnigUChar* name_end, int** nums));
+int onig_name_to_group_numbers P_((OnigRegex reg, const OnigUChar* name, const OnigUChar* name_end, int** nums));
ONIG_EXTERN
-int onig_name_to_backref_number P_((regex_t* reg, const OnigUChar* name, const OnigUChar* name_end, OnigRegion *region));
+int onig_name_to_backref_number P_((OnigRegex reg, const OnigUChar* name, const OnigUChar* name_end, OnigRegion *region));
ONIG_EXTERN
-int onig_foreach_name P_((regex_t* reg, int (*func)(const OnigUChar*, const OnigUChar*,int,int*,regex_t*,void*), void* arg));
+int onig_foreach_name P_((OnigRegex reg, int (*func)(const OnigUChar*, const OnigUChar*,int,int*,OnigRegex,void*), void* arg));
ONIG_EXTERN
-int onig_number_of_names P_((regex_t* reg));
+int onig_number_of_names P_((OnigRegex reg));
ONIG_EXTERN
-int onig_number_of_captures P_((regex_t* reg));
+int onig_number_of_captures P_((OnigRegex reg));
ONIG_EXTERN
-int onig_number_of_capture_histories P_((regex_t* reg));
+int onig_number_of_capture_histories P_((OnigRegex reg));
ONIG_EXTERN
OnigCaptureTreeNode* onig_get_capture_tree P_((OnigRegion* region));
ONIG_EXTERN
int onig_capture_tree_traverse P_((OnigRegion* region, int at, int(*callback_func)(int,int,int,int,int,void*), void* arg));
ONIG_EXTERN
-OnigEncoding onig_get_encoding P_((regex_t* reg));
+int onig_noname_group_capture_is_active P_((OnigRegex reg));
+ONIG_EXTERN
+OnigEncoding onig_get_encoding P_((OnigRegex reg));
ONIG_EXTERN
-OnigOptionType onig_get_options P_((regex_t* reg));
+OnigOptionType onig_get_options P_((OnigRegex reg));
ONIG_EXTERN
-OnigAmbigType onig_get_ambig_flag P_((regex_t* reg));
+OnigAmbigType onig_get_ambig_flag P_((OnigRegex reg));
ONIG_EXTERN
-OnigSyntaxType* onig_get_syntax P_((regex_t* reg));
+OnigSyntaxType* onig_get_syntax P_((OnigRegex reg));
ONIG_EXTERN
int onig_set_default_syntax P_((OnigSyntaxType* syntax));
ONIG_EXTERN
diff --git a/ext/mbstring/oniguruma/regcomp.c b/ext/mbstring/oniguruma/regcomp.c
index a2315fcec..9b862657d 100644
--- a/ext/mbstring/oniguruma/regcomp.c
+++ b/ext/mbstring/oniguruma/regcomp.c
@@ -2,7 +2,7 @@
regcomp.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -186,6 +186,17 @@ add_opcode(regex_t* reg, int opcode)
return 0;
}
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+static int
+add_state_check_num(regex_t* reg, int num)
+{
+ StateCheckNumType n = (StateCheckNumType )num;
+
+ BBUF_ADD(reg, &n, SIZE_STATE_CHECK_NUM);
+ return 0;
+}
+#endif
+
static int
add_rel_addr(regex_t* reg, int addr)
{
@@ -644,7 +655,7 @@ entry_repeat_range(regex_t* reg, int id, int lower, int upper)
}
p[id].lower = lower;
- p[id].upper = upper;
+ p[id].upper = (IS_REPEAT_INFINITE(upper) ? 0x7fffffff : upper);
return 0;
}
@@ -684,7 +695,254 @@ compile_range_repeat_node(QualifierNode* qn, int target_len, int empty_info,
return r;
}
+static int
+is_anychar_star_qualifier(QualifierNode* qn)
+{
+ if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) &&
+ NTYPE(qn->target) == N_ANYCHAR)
+ return 1;
+ else
+ return 0;
+}
+
#define QUALIFIER_EXPAND_LIMIT_SIZE 50
+#define CKN_ON (ckn > 0)
+
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+
+static int
+compile_length_qualifier_node(QualifierNode* qn, regex_t* reg)
+{
+ int len, mod_tlen, cklen;
+ int ckn;
+ int infinite = IS_REPEAT_INFINITE(qn->upper);
+ int empty_info = qn->target_empty_info;
+ int tlen = compile_length_tree(qn->target, reg);
+
+ if (tlen < 0) return tlen;
+
+ ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
+
+ cklen = (CKN_ON ? SIZE_STATE_CHECK_NUM: 0);
+
+ /* anychar repeat */
+ if (NTYPE(qn->target) == N_ANYCHAR) {
+ if (qn->greedy && infinite) {
+ if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON)
+ return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower + cklen;
+ else
+ return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower + cklen;
+ }
+ }
+
+ if (empty_info != 0)
+ mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
+ else
+ mod_tlen = tlen;
+
+ if (infinite && qn->lower <= 1) {
+ if (qn->greedy) {
+ if (qn->lower == 1)
+ len = SIZE_OP_JUMP;
+ else
+ len = 0;
+
+ len += SIZE_OP_PUSH + cklen + mod_tlen + SIZE_OP_JUMP;
+ }
+ else {
+ if (qn->lower == 0)
+ len = SIZE_OP_JUMP;
+ else
+ len = 0;
+
+ len += mod_tlen + SIZE_OP_PUSH + cklen;
+ }
+ }
+ else if (qn->upper == 0) {
+ if (qn->is_refered != 0) /* /(?<n>..){0}/ */
+ len = SIZE_OP_JUMP + tlen;
+ else
+ len = 0;
+ }
+ else if (qn->upper == 1 && qn->greedy) {
+ if (qn->lower == 0) {
+ if (CKN_ON) {
+ len = SIZE_OP_STATE_CHECK_PUSH + tlen;
+ }
+ else {
+ len = SIZE_OP_PUSH + tlen;
+ }
+ }
+ else {
+ len = tlen;
+ }
+ }
+ else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
+ len = SIZE_OP_PUSH + cklen + SIZE_OP_JUMP + tlen;
+ }
+ else {
+ len = SIZE_OP_REPEAT_INC
+ + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
+ if (CKN_ON)
+ len += SIZE_OP_STATE_CHECK;
+ }
+
+ return len;
+}
+
+static int
+compile_qualifier_node(QualifierNode* qn, regex_t* reg)
+{
+ int r, mod_tlen;
+ int ckn;
+ int infinite = IS_REPEAT_INFINITE(qn->upper);
+ int empty_info = qn->target_empty_info;
+ int tlen = compile_length_tree(qn->target, reg);
+
+ if (tlen < 0) return tlen;
+
+ ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
+
+ if (is_anychar_star_qualifier(qn)) {
+ r = compile_tree_n_times(qn->target, qn->lower, reg);
+ if (r) return r;
+ if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) {
+ if (IS_MULTILINE(reg->options))
+ r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
+ else
+ r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
+ if (r) return r;
+ if (CKN_ON) {
+ r = add_state_check_num(reg, ckn);
+ if (r) return r;
+ }
+
+ return add_bytes(reg, NSTRING(qn->next_head_exact).s, 1);
+ }
+ else {
+ if (IS_MULTILINE(reg->options)) {
+ r = add_opcode(reg, (CKN_ON ?
+ OP_STATE_CHECK_ANYCHAR_ML_STAR
+ : OP_ANYCHAR_ML_STAR));
+ }
+ else {
+ r = add_opcode(reg, (CKN_ON ?
+ OP_STATE_CHECK_ANYCHAR_STAR
+ : OP_ANYCHAR_STAR));
+ }
+ if (r) return r;
+ if (CKN_ON)
+ r = add_state_check_num(reg, ckn);
+
+ return r;
+ }
+ }
+
+ if (empty_info != 0)
+ mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
+ else
+ mod_tlen = tlen;
+
+ if (infinite && qn->lower <= 1) {
+ if (qn->greedy) {
+ if (qn->lower == 1) {
+ r = add_opcode_rel_addr(reg, OP_JUMP,
+ (CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH));
+ if (r) return r;
+ }
+
+ if (CKN_ON) {
+ r = add_opcode(reg, OP_STATE_CHECK_PUSH);
+ if (r) return r;
+ r = add_state_check_num(reg, ckn);
+ if (r) return r;
+ r = add_rel_addr(reg, mod_tlen + SIZE_OP_JUMP);
+ }
+ else {
+ r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
+ }
+ if (r) return r;
+ r = compile_tree_empty_check(qn->target, reg, empty_info);
+ if (r) return r;
+ r = add_opcode_rel_addr(reg, OP_JUMP,
+ -(mod_tlen + (int )SIZE_OP_JUMP
+ + (int )(CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH)));
+ }
+ else {
+ if (qn->lower == 0) {
+ r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
+ if (r) return r;
+ }
+ r = compile_tree_empty_check(qn->target, reg, empty_info);
+ if (r) return r;
+ if (CKN_ON) {
+ r = add_opcode(reg, OP_STATE_CHECK_PUSH_OR_JUMP);
+ if (r) return r;
+ r = add_state_check_num(reg, ckn);
+ if (r) return r;
+ r = add_rel_addr(reg,
+ -(mod_tlen + (int )SIZE_OP_STATE_CHECK_PUSH_OR_JUMP));
+ }
+ else
+ r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
+ }
+ }
+ else if (qn->upper == 0) {
+ if (qn->is_refered != 0) { /* /(?<n>..){0}/ */
+ r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
+ if (r) return r;
+ r = compile_tree(qn->target, reg);
+ }
+ else
+ r = 0;
+ }
+ else if (qn->upper == 1 && qn->greedy) {
+ if (qn->lower == 0) {
+ if (CKN_ON) {
+ r = add_opcode(reg, OP_STATE_CHECK_PUSH);
+ if (r) return r;
+ r = add_state_check_num(reg, ckn);
+ if (r) return r;
+ r = add_rel_addr(reg, tlen);
+ }
+ else {
+ r = add_opcode_rel_addr(reg, OP_PUSH, tlen);
+ }
+ if (r) return r;
+ }
+
+ r = compile_tree(qn->target, reg);
+ }
+ else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
+ if (CKN_ON) {
+ r = add_opcode(reg, OP_STATE_CHECK_PUSH);
+ if (r) return r;
+ r = add_state_check_num(reg, ckn);
+ if (r) return r;
+ r = add_rel_addr(reg, SIZE_OP_JUMP);
+ }
+ else {
+ r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
+ }
+
+ if (r) return r;
+ r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
+ if (r) return r;
+ r = compile_tree(qn->target, reg);
+ }
+ else {
+ r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
+ if (CKN_ON) {
+ if (r) return r;
+ r = add_opcode(reg, OP_STATE_CHECK);
+ if (r) return r;
+ r = add_state_check_num(reg, ckn);
+ }
+ }
+ return r;
+}
+
+#else /* USE_COMBINATION_EXPLOSION_CHECK */
static int
compile_length_qualifier_node(QualifierNode* qn, regex_t* reg)
@@ -752,16 +1010,6 @@ compile_length_qualifier_node(QualifierNode* qn, regex_t* reg)
}
static int
-is_anychar_star_qualifier(QualifierNode* qn)
-{
- if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) &&
- NTYPE(qn->target) == N_ANYCHAR)
- return 1;
- else
- return 0;
-}
-
-static int
compile_qualifier_node(QualifierNode* qn, regex_t* reg)
{
int i, r, mod_tlen;
@@ -887,6 +1135,7 @@ compile_qualifier_node(QualifierNode* qn, regex_t* reg)
}
return r;
}
+#endif /* USE_COMBINATION_EXPLOSION_CHECK */
static int
compile_length_option_node(EffectNode* node, regex_t* reg)
@@ -1268,8 +1517,15 @@ compile_length_tree(Node* node, regex_t* reg)
{
BackrefNode* br = &(NBACKREF(node));
+#ifdef USE_BACKREF_AT_LEVEL
+ if (IS_BACKREF_NEST_LEVEL(br)) {
+ r = SIZE_OPCODE + SIZE_OPTION + SIZE_LENGTH +
+ SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
+ }
+ else
+#endif
if (br->back_num == 1) {
- r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 3)
+ r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 2)
? SIZE_OPCODE : (SIZE_OPCODE + SIZE_MEMNUM));
}
else {
@@ -1381,9 +1637,21 @@ compile_tree(Node* node, regex_t* reg)
case N_BACKREF:
{
- int i;
BackrefNode* br = &(NBACKREF(node));
+#ifdef USE_BACKREF_AT_LEVEL
+ if (IS_BACKREF_NEST_LEVEL(br)) {
+ r = add_opcode(reg, OP_BACKREF_AT_LEVEL);
+ if (r) return r;
+ r = add_option(reg, (reg->options & ONIG_OPTION_IGNORECASE));
+ if (r) return r;
+ r = add_length(reg, br->nest_level);
+ if (r) return r;
+
+ goto add_bacref_mems;
+ }
+ else
+#endif
if (br->back_num == 1) {
n = br->back_static[0];
if (IS_IGNORECASE(reg->options)) {
@@ -1395,7 +1663,6 @@ compile_tree(Node* node, regex_t* reg)
switch (n) {
case 1: r = add_opcode(reg, OP_BACKREF1); break;
case 2: r = add_opcode(reg, OP_BACKREF2); break;
- case 3: r = add_opcode(reg, OP_BACKREF3); break;
default:
r = add_opcode(reg, OP_BACKREFN);
if (r) return r;
@@ -1405,17 +1672,21 @@ compile_tree(Node* node, regex_t* reg)
}
}
else {
+ int i;
int* p;
if (IS_IGNORECASE(reg->options)) {
- add_opcode(reg, OP_BACKREF_MULTI_IC);
+ r = add_opcode(reg, OP_BACKREF_MULTI_IC);
}
else {
- add_opcode(reg, OP_BACKREF_MULTI);
+ r = add_opcode(reg, OP_BACKREF_MULTI);
}
-
if (r) return r;
- add_length(reg, br->back_num);
+
+#ifdef USE_BACKREF_AT_LEVEL
+ add_bacref_mems:
+#endif
+ r = add_length(reg, br->back_num);
if (r) return r;
p = BACKREFS_P(br);
for (i = br->back_num - 1; i >= 0; i--) {
@@ -2120,29 +2391,6 @@ get_char_length_tree(Node* node, regex_t* reg, int* len)
return get_char_length_tree1(node, reg, len, 0);
}
-extern int
-onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc)
-{
- int found;
-
- if (ONIGENC_MBC_MINLEN(enc) > 1 || (code >= SINGLE_BYTE_SIZE)) {
- if (IS_NULL(cc->mbuf)) {
- found = 0;
- }
- else {
- found = (onig_is_in_code_range(cc->mbuf->p, code) != 0 ? 1 : 0);
- }
- }
- else {
- found = (BITSET_AT(cc->bs, code) == 0 ? 0 : 1);
- }
-
- if (IS_CCLASS_NOT(cc))
- return !found;
- else
- return found;
-}
-
/* x is not included y ==> 1 : 0 */
static int
is_not_included(Node* x, Node* y, regex_t* reg)
@@ -2516,6 +2764,9 @@ subexp_inf_recursive_check(Node* node, ScanEnv* env, int head)
case N_QUALIFIER:
r = subexp_inf_recursive_check(NQUALIFIER(node).target, env, head);
+ if (r == RECURSION_EXIST) {
+ if (NQUALIFIER(node).lower == 0) r = 0;
+ }
break;
case N_ANCHOR:
@@ -2943,15 +3194,55 @@ next_setup(Node* node, Node* next_node, regex_t* reg)
return 0;
}
+
+static int
+divide_ambig_string_node_sub(regex_t* reg, int prev_ambig,
+ UChar* prev_start, UChar* prev,
+ UChar* end, Node*** tailp, Node** root)
+{
+ UChar *tmp, *wp;
+ Node* snode;
+
+ if (prev_ambig != 0) {
+ tmp = prev_start;
+ wp = prev_start;
+ while (tmp < prev) {
+ wp += ONIGENC_MBC_TO_NORMALIZE(reg->enc, reg->ambig_flag,
+ &tmp, end, wp);
+ }
+ snode = onig_node_new_str(prev_start, wp);
+ CHECK_NULL_RETURN_VAL(snode, ONIGERR_MEMORY);
+ NSTRING_SET_AMBIG(snode);
+ if (wp != prev) NSTRING_SET_AMBIG_REDUCE(snode);
+ }
+ else {
+ snode = onig_node_new_str(prev_start, prev);
+ CHECK_NULL_RETURN_VAL(snode, ONIGERR_MEMORY);
+ }
+
+ if (*tailp == (Node** )0) {
+ *root = onig_node_new_list(snode, NULL);
+ CHECK_NULL_RETURN_VAL(*root, ONIGERR_MEMORY);
+ *tailp = &(NCONS(*root).right);
+ }
+ else {
+ **tailp = onig_node_new_list(snode, NULL);
+ CHECK_NULL_RETURN_VAL(**tailp, ONIGERR_MEMORY);
+ *tailp = &(NCONS(**tailp).right);
+ }
+
+ return 0;
+}
+
static int
divide_ambig_string_node(Node* node, regex_t* reg)
{
StrNode* sn = &NSTRING(node);
int ambig, prev_ambig;
UChar *prev, *p, *end, *prev_start, *start, *tmp, *wp;
- Node *snode;
Node *root = NULL_NODE;
Node **tailp = (Node** )0;
+ int r;
start = prev_start = p = sn->s;
end = sn->end;
@@ -2964,33 +3255,9 @@ divide_ambig_string_node(Node* node, regex_t* reg)
if (prev_ambig != (ambig = ONIGENC_IS_MBC_AMBIGUOUS(reg->enc,
reg->ambig_flag, &p, end))) {
- if (prev_ambig != 0) {
- tmp = prev_start;
- wp = prev_start;
- while (tmp < prev) {
- wp += ONIGENC_MBC_TO_NORMALIZE(reg->enc, reg->ambig_flag,
- &tmp, end, wp);
- }
- snode = onig_node_new_str(prev_start, wp);
- CHECK_NULL_RETURN_VAL(snode, ONIGERR_MEMORY);
- NSTRING_SET_AMBIG(snode);
- if (wp != prev) NSTRING_SET_AMBIG_REDUCE(snode);
- }
- else {
- snode = onig_node_new_str(prev_start, prev);
- CHECK_NULL_RETURN_VAL(snode, ONIGERR_MEMORY);
- }
-
- if (tailp == (Node** )0) {
- root = onig_node_new_list(snode, NULL);
- CHECK_NULL_RETURN_VAL(root, ONIGERR_MEMORY);
- tailp = &(NCONS(root).right);
- }
- else {
- *tailp = onig_node_new_list(snode, NULL);
- CHECK_NULL_RETURN_VAL(*tailp, ONIGERR_MEMORY);
- tailp = &(NCONS(*tailp).right);
- }
+ r = divide_ambig_string_node_sub(reg, prev_ambig, prev_start, prev,
+ end, &tailp, &root);
+ if (r != 0) return r;
prev_ambig = ambig;
prev_start = prev;
@@ -3011,41 +3278,157 @@ divide_ambig_string_node(Node* node, regex_t* reg)
}
}
else {
- if (prev_ambig != 0) {
- tmp = prev_start;
- wp = prev_start;
- while (tmp < end) {
- wp += ONIGENC_MBC_TO_NORMALIZE(reg->enc, reg->ambig_flag,
- &tmp, end, wp);
- }
- snode = onig_node_new_str(prev_start, wp);
- CHECK_NULL_RETURN_VAL(snode, ONIGERR_MEMORY);
- NSTRING_SET_AMBIG(snode);
- if (wp != end) NSTRING_SET_AMBIG_REDUCE(snode);
+ r = divide_ambig_string_node_sub(reg, prev_ambig, prev_start, end,
+ end, &tailp, &root);
+ if (r != 0) return r;
+
+ swap_node(node, root);
+ onig_node_str_clear(root); /* should be after swap! */
+ onig_node_free(root); /* free original string node */
+ }
+
+ return 0;
+}
+
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+
+#define CEC_THRES_NUM_BIG_REPEAT 512
+#define CEC_INFINITE_NUM 0x7fffffff
+
+#define CEC_IN_INFINITE_REPEAT (1<<0)
+#define CEC_IN_FINITE_REPEAT (1<<1)
+#define CEC_CONT_BIG_REPEAT (1<<2)
+
+static int
+setup_comb_exp_check(Node* node, int state, ScanEnv* env)
+{
+ int type;
+ int r = state;
+
+ type = NTYPE(node);
+ switch (type) {
+ case N_LIST:
+ {
+ Node* prev = NULL_NODE;
+ do {
+ r = setup_comb_exp_check(NCONS(node).left, r, env);
+ prev = NCONS(node).left;
+ } while (r >= 0 && IS_NOT_NULL(node = NCONS(node).right));
}
- else {
- snode = onig_node_new_str(prev_start, end);
- CHECK_NULL_RETURN_VAL(snode, ONIGERR_MEMORY);
+ break;
+
+ case N_ALT:
+ {
+ int ret;
+ do {
+ ret = setup_comb_exp_check(NCONS(node).left, state, env);
+ r |= ret;
+ } while (ret >= 0 && IS_NOT_NULL(node = NCONS(node).right));
}
+ break;
+
+ case N_QUALIFIER:
+ {
+ int child_state = state;
+ int add_state = 0;
+ QualifierNode* qn = &(NQUALIFIER(node));
+ Node* target = qn->target;
+ int var_num;
+
+ if (! IS_REPEAT_INFINITE(qn->upper)) {
+ if (qn->upper > 1) {
+ /* {0,1}, {1,1} are allowed */
+ child_state |= CEC_IN_FINITE_REPEAT;
+
+ /* check (a*){n,m}, (a+){n,m} => (a*){n,n}, (a+){n,n} */
+ if (env->backrefed_mem == 0) {
+ if (NTYPE(qn->target) == N_EFFECT) {
+ EffectNode* en = &(NEFFECT(qn->target));
+ if (en->type == EFFECT_MEMORY) {
+ if (NTYPE(en->target) == N_QUALIFIER) {
+ QualifierNode* q = &(NQUALIFIER(en->target));
+ if (IS_REPEAT_INFINITE(q->upper)
+ && q->greedy == qn->greedy) {
+ qn->upper = (qn->lower == 0 ? 1 : qn->lower);
+ if (qn->upper == 1)
+ child_state = state;
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+
+ if (state & CEC_IN_FINITE_REPEAT) {
+ qn->comb_exp_check_num = -1;
+ }
+ else {
+ if (IS_REPEAT_INFINITE(qn->upper)) {
+ var_num = CEC_INFINITE_NUM;
+ child_state |= CEC_IN_INFINITE_REPEAT;
+ }
+ else {
+ var_num = qn->upper - qn->lower;
+ }
- if (tailp == (Node** )0) {
- root = onig_node_new_list(snode, NULL);
- CHECK_NULL_RETURN_VAL(root, ONIGERR_MEMORY);
- tailp = &(NCONS(node).right);
+ if (var_num >= CEC_THRES_NUM_BIG_REPEAT)
+ add_state |= CEC_CONT_BIG_REPEAT;
+
+ if (((state & CEC_IN_INFINITE_REPEAT) != 0 && var_num != 0) ||
+ ((state & CEC_CONT_BIG_REPEAT) != 0 &&
+ var_num >= CEC_THRES_NUM_BIG_REPEAT)) {
+ if (qn->comb_exp_check_num == 0) {
+ env->num_comb_exp_check++;
+ qn->comb_exp_check_num = env->num_comb_exp_check;
+ if (env->curr_max_regnum > env->comb_exp_max_regnum)
+ env->comb_exp_max_regnum = env->curr_max_regnum;
+ }
+ }
+ }
+
+ r = setup_comb_exp_check(target, child_state, env);
+ r |= add_state;
}
- else {
- *tailp = onig_node_new_list(snode, NULL);
- CHECK_NULL_RETURN_VAL(*tailp, ONIGERR_MEMORY);
- tailp = &(NCONS(*tailp).right);
+ break;
+
+ case N_EFFECT:
+ {
+ EffectNode* en = &(NEFFECT(node));
+
+ switch (en->type) {
+ case EFFECT_MEMORY:
+ {
+ if (env->curr_max_regnum < en->regnum)
+ env->curr_max_regnum = en->regnum;
+
+ r = setup_comb_exp_check(en->target, state, env);
+ }
+ break;
+
+ default:
+ r = setup_comb_exp_check(en->target, state, env);
+ break;
+ }
}
+ break;
- swap_node(node, root);
- onig_node_str_clear(root); /* should be after swap! */
- onig_node_free(root); /* free original string node */
+#ifdef USE_SUBEXP_CALL
+ case N_CALL:
+ if (IS_CALL_RECURSION(&(NCALL(node))))
+ env->has_recursion = 1;
+ else
+ r = setup_comb_exp_check(NCALL(node).target, state, env);
+ break;
+#endif
+
+ default:
+ break;
}
- return 0;
+ return r;
}
+#endif
#define IN_ALT (1<<0)
#define IN_NOT (1<<1)
@@ -3116,6 +3499,11 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
if (p[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
BIT_STATUS_ON_AT(env->backrefed_mem, p[i]);
BIT_STATUS_ON_AT(env->bt_mem_start, p[i]);
+#ifdef USE_BACKREF_AT_LEVEL
+ if (IS_BACKREF_NEST_LEVEL(br)) {
+ BIT_STATUS_ON_AT(env->bt_mem_end, p[i]);
+ }
+#endif
SET_EFFECT_STATUS(nodes[p[i]], NST_MEM_BACKREFED);
}
}
@@ -3263,11 +3651,9 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
#define ALLOWED_EFFECT_IN_LB_NOT 0
#define ALLOWED_ANCHOR_IN_LB \
-( ANCHOR_LOOK_BEHIND | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF )
+( ANCHOR_LOOK_BEHIND | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION )
#define ALLOWED_ANCHOR_IN_LB_NOT \
-( ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF )
- /* can't allow all anchors, because \G in look-behind through Search().
- ex. /(?<=\G)zz/.match("azz") => success. */
+( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION )
case ANCHOR_LOOK_BEHIND:
{
@@ -3383,7 +3769,7 @@ typedef struct {
static int
map_position_value(OnigEncoding enc, int i)
{
- static short int ByteValTable[] = {
+ static const short int ByteValTable[] = {
5, 1, 1, 1, 1, 1, 1, 1, 1, 10, 10, 1, 1, 10, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
12, 4, 7, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5,
@@ -3408,7 +3794,7 @@ static int
distance_value(MinMaxLen* mm)
{
/* 1000 / (min-max-dist + 1) */
- static short int dist_vals[] = {
+ static const short int dist_vals[] = {
1000, 500, 333, 250, 200, 167, 143, 125, 111, 100,
91, 83, 77, 71, 67, 63, 59, 56, 53, 50,
48, 45, 43, 42, 40, 38, 37, 36, 34, 33,
@@ -3604,9 +3990,10 @@ copy_opt_exact_info(OptExactInfo* to, OptExactInfo* from)
}
static void
-concat_opt_exact_info(OptExactInfo* to, OptExactInfo* add)
+concat_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OnigEncoding enc)
{
- int i, n;
+ int i, j, len;
+ UChar *p, *end;
OptAncInfo tanc;
if (! to->ignore_case && add->ignore_case) {
@@ -3615,11 +4002,17 @@ concat_opt_exact_info(OptExactInfo* to, OptExactInfo* add)
to->ignore_case = 1;
}
- for (i = to->len, n = 0; n < add->len && i < OPT_EXACT_MAXLEN; i++, n++)
- to->s[i] = add->s[n];
+ p = add->s;
+ end = p + add->len;
+ for (i = to->len; p < end; ) {
+ len = enc_len(enc, p);
+ if (i + len > OPT_EXACT_MAXLEN) break;
+ for (j = 0; j < len && p < end; j++)
+ to->s[i++] = *p++;
+ }
to->len = i;
- to->reach_end = (n == add->len ? add->reach_end : 0);
+ to->reach_end = (p == end ? add->reach_end : 0);
concat_opt_anc_info(&tanc, &to->anc, &add->anc, 1, 1);
if (! to->reach_end) tanc.right_anchor = 0;
@@ -3634,15 +4027,10 @@ concat_opt_exact_info_str(OptExactInfo* to,
UChar *p;
for (i = to->len, p = s; p < end && i < OPT_EXACT_MAXLEN; ) {
- if (raw) {
+ len = enc_len(enc, p);
+ if (i + len > OPT_EXACT_MAXLEN) break;
+ for (j = 0; j < len && p < end; j++)
to->s[i++] = *p++;
- }
- else {
- len = enc_len(enc, p);
- if (i + len > OPT_EXACT_MAXLEN) break;
- for (j = 0; j < len; j++)
- to->s[i++] = *p++;
- }
}
to->len = i;
@@ -3711,7 +4099,7 @@ select_opt_exact_info(OnigEncoding enc, OptExactInfo* now, OptExactInfo* alt)
static void
clear_opt_map_info(OptMapInfo* map)
{
- static OptMapInfo clean_info = {
+ static const OptMapInfo clean_info = {
{0, 0}, {0, 0}, 0,
{
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@@ -3758,8 +4146,8 @@ add_char_amb_opt_map_info(OptMapInfo* map, UChar* p, UChar* end,
int i, j, n, len;
UChar buf[ONIGENC_MBC_NORMALIZE_MAXLEN];
OnigCodePoint code, ccode;
- OnigCompAmbigCodes* ccs;
- OnigPairAmbigCodes* pccs;
+ const OnigCompAmbigCodes* ccs;
+ const OnigPairAmbigCodes* pccs;
OnigAmbigType amb;
add_char_opt_map_info(map, p[0], enc);
@@ -3907,11 +4295,11 @@ concat_left_node_opt_info(OnigEncoding enc, NodeOptInfo* to, NodeOptInfo* add)
if (add->exb.len > 0) {
if (exb_reach) {
- concat_opt_exact_info(&to->exb, &add->exb);
+ concat_opt_exact_info(&to->exb, &add->exb, enc);
clear_opt_exact_info(&add->exb);
}
else if (exm_reach) {
- concat_opt_exact_info(&to->exm, &add->exb);
+ concat_opt_exact_info(&to->exm, &add->exb, enc);
clear_opt_exact_info(&add->exb);
}
}
@@ -4197,8 +4585,8 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
if (qn->lower == 0 && IS_REPEAT_INFINITE(qn->upper)) {
if (env->mmd.max == 0 &&
NTYPE(qn->target) == N_ANYCHAR && qn->greedy) {
- if (IS_POSIXLINE(env->options))
- add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_PL);
+ if (IS_MULTILINE(env->options))
+ add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_ML);
else
add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR);
}
@@ -4210,7 +4598,7 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
if (nopt.exb.reach_end) {
for (i = 2; i < qn->lower &&
! is_full_opt_exact_info(&opt->exb); i++) {
- concat_opt_exact_info(&opt->exb, &nopt.exb);
+ concat_opt_exact_info(&opt->exb, &nopt.exb, env->enc);
}
if (i < qn->lower) {
opt->exb.reach_end = 0;
@@ -4316,10 +4704,7 @@ set_optimize_exact_info(regex_t* reg, OptExactInfo* e)
CHECK_NULL_RETURN_VAL(reg->exact, ONIGERR_MEMORY);
reg->exact_end = reg->exact + e->len;
- if (e->anc.left_anchor & ANCHOR_BEGIN_LINE)
- allow_reverse = 1;
- else
- allow_reverse =
+ allow_reverse =
ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg->enc, reg->exact, reg->exact_end);
if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
@@ -4391,7 +4776,7 @@ set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env)
if (r) return r;
reg->anchor = opt.anc.left_anchor & (ANCHOR_BEGIN_BUF |
- ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_PL);
+ ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML);
reg->anchor |= opt.anc.right_anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF);
@@ -4503,7 +4888,7 @@ print_anchor(FILE* f, int anchor)
q = 1;
fprintf(f, "anychar-star");
}
- if (anchor & ANCHOR_ANYCHAR_STAR_PL) {
+ if (anchor & ANCHOR_ANYCHAR_STAR_ML) {
if (q) fprintf(f, ", ");
fprintf(f, "anychar-star-pl");
}
@@ -4514,8 +4899,8 @@ print_anchor(FILE* f, int anchor)
static void
print_optimize_info(FILE* f, regex_t* reg)
{
- static char* on[] = { "NONE", "EXACT", "EXACT_BM", "EXACT_BM_NOT_REV",
- "EXACT_IC", "MAP" };
+ static const char* on[] = { "NONE", "EXACT", "EXACT_BM", "EXACT_BM_NOT_REV",
+ "EXACT_IC", "MAP" };
fprintf(f, "optimize: %s\n", on[reg->optimize]);
fprintf(f, " anchor: "); print_anchor(f, reg->anchor);
@@ -4624,7 +5009,6 @@ onig_chain_reduce(regex_t* reg)
{
regex_t *head, *prev;
- THREAD_ATOMIC_START;
prev = reg;
head = prev->chain;
if (IS_NOT_NULL(head)) {
@@ -4636,7 +5020,6 @@ onig_chain_reduce(regex_t* reg)
prev->chain = (regex_t* )NULL;
REGEX_TRANSFER(reg, head);
}
- THREAD_ATOMIC_END;
}
#if 0
@@ -4753,6 +5136,9 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
reg->num_null_check = 0;
reg->repeat_range_alloc = 0;
reg->repeat_range = (OnigRepeatRange* )NULL;
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+ reg->num_comb_exp_check = 0;
+#endif
r = onig_parse_make_tree(&root, pattern, pattern_end, reg, &scan_env);
if (r != 0) goto err;
@@ -4806,6 +5192,33 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
reg->bt_mem_end |= reg->capture_history;
}
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+ if (scan_env.backrefed_mem == 0
+#ifdef USE_SUBEXP_CALL
+ || scan_env.num_call == 0
+#endif
+ ) {
+ setup_comb_exp_check(root, 0, &scan_env);
+#ifdef USE_SUBEXP_CALL
+ if (scan_env.has_recursion != 0) {
+ scan_env.num_comb_exp_check = 0;
+ }
+ else
+#endif
+ if (scan_env.comb_exp_max_regnum > 0) {
+ int i;
+ for (i = 1; i <= scan_env.comb_exp_max_regnum; i++) {
+ if (BIT_STATUS_AT(scan_env.backrefed_mem, i) != 0) {
+ scan_env.num_comb_exp_check = 0;
+ break;
+ }
+ }
+ }
+ }
+
+ reg->num_comb_exp_check = scan_env.num_comb_exp_check;
+#endif
+
clear_optimize_info(reg);
#ifndef ONIG_DONT_OPTIMIZE
r = set_optimize_info_from_tree(root, reg, &scan_env);
@@ -4875,6 +5288,7 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
return r;
}
+#ifdef USE_RECOMPILE_API
extern int
onig_recompile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax,
@@ -4893,6 +5307,7 @@ onig_recompile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
}
return 0;
}
+#endif
static int onig_inited = 0;
@@ -4906,6 +5321,11 @@ onig_alloc_init(regex_t** reg, OnigOptionType option, OnigAmbigType ambig_flag,
if (ONIGENC_IS_UNDEF(enc))
return ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED;
+ if ((option & (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP))
+ == (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP)) {
+ return ONIGERR_INVALID_COMBINATION_OF_OPTIONS;
+ }
+
*reg = (regex_t* )xmalloc(sizeof(regex_t));
if (IS_NULL(*reg)) return ONIGERR_MEMORY;
(*reg)->state = ONIG_STATE_MODIFY;
@@ -4991,14 +5411,14 @@ onig_end()
onig_print_statistics(stderr);
#endif
-#ifdef USE_RECYCLE_NODE
- onig_free_node_list();
-#endif
-
#ifdef USE_SHARED_CCLASS_TABLE
onig_free_shared_cclass_table();
#endif
+#ifdef USE_RECYCLE_NODE
+ onig_free_node_list();
+#endif
+
onig_inited = 0;
THREAD_ATOMIC_END;
@@ -5008,6 +5428,16 @@ onig_end()
#ifdef ONIG_DEBUG
+/* arguments type */
+#define ARG_SPECIAL -1
+#define ARG_NON 0
+#define ARG_RELADDR 1
+#define ARG_ABSADDR 2
+#define ARG_LENGTH 3
+#define ARG_MEMNUM 4
+#define ARG_OPTION 5
+#define ARG_STATE_CHECK 6
+
OnigOpInfoType OnigOpInfo[] = {
{ OP_FINISH, "finish", ARG_NON },
{ OP_END, "end", ARG_NON },
@@ -5038,62 +5468,68 @@ OnigOpInfoType OnigOpInfo[] = {
{ OP_ANYCHAR_ML_STAR, "anychar-ml*", ARG_NON },
{ OP_ANYCHAR_STAR_PEEK_NEXT, "anychar*-peek-next", ARG_SPECIAL },
{ OP_ANYCHAR_ML_STAR_PEEK_NEXT, "anychar-ml*-peek-next", ARG_SPECIAL },
- { OP_WORD, "word", ARG_NON },
- { OP_NOT_WORD, "not-word", ARG_NON },
- { OP_WORD_SB, "word-sb", ARG_NON },
- { OP_WORD_MB, "word-mb", ARG_NON },
- { OP_WORD_BOUND, "word-bound", ARG_NON },
- { OP_NOT_WORD_BOUND, "not-word-bound", ARG_NON },
- { OP_WORD_BEGIN, "word-begin", ARG_NON },
- { OP_WORD_END, "word-end", ARG_NON },
- { OP_BEGIN_BUF, "begin-buf", ARG_NON },
- { OP_END_BUF, "end-buf", ARG_NON },
- { OP_BEGIN_LINE, "begin-line", ARG_NON },
- { OP_END_LINE, "end-line", ARG_NON },
- { OP_SEMI_END_BUF, "semi-end-buf", ARG_NON },
- { OP_BEGIN_POSITION, "begin-position", ARG_NON },
- { OP_BACKREF1, "backref1", ARG_NON },
- { OP_BACKREF2, "backref2", ARG_NON },
- { OP_BACKREF3, "backref3", ARG_NON },
- { OP_BACKREFN, "backrefn", ARG_MEMNUM },
- { OP_BACKREFN_IC, "backrefn-ic", ARG_SPECIAL },
- { OP_BACKREF_MULTI, "backref_multi", ARG_SPECIAL },
- { OP_BACKREF_MULTI_IC, "backref_multi-ic",ARG_SPECIAL },
- { OP_MEMORY_START_PUSH, "mem-start-push", ARG_MEMNUM },
- { OP_MEMORY_START, "mem-start", ARG_MEMNUM },
- { OP_MEMORY_END_PUSH, "mem-end-push", ARG_MEMNUM },
- { OP_MEMORY_END_PUSH_REC, "mem-end-push-rec", ARG_MEMNUM },
- { OP_MEMORY_END, "mem-end", ARG_MEMNUM },
- { OP_MEMORY_END_REC, "mem-end-rec", ARG_MEMNUM },
- { OP_SET_OPTION_PUSH, "set-option-push", ARG_OPTION },
- { OP_SET_OPTION, "set-option", ARG_OPTION },
- { OP_FAIL, "fail", ARG_NON },
- { OP_JUMP, "jump", ARG_RELADDR },
- { OP_PUSH, "push", ARG_RELADDR },
- { OP_POP, "pop", ARG_NON },
- { OP_PUSH_OR_JUMP_EXACT1, "push-or-jump-e1", ARG_SPECIAL },
- { OP_PUSH_IF_PEEK_NEXT, "push-if-peek-next", ARG_SPECIAL },
- { OP_REPEAT, "repeat", ARG_SPECIAL },
- { OP_REPEAT_NG, "repeat-ng", ARG_SPECIAL },
- { OP_REPEAT_INC, "repeat-inc", ARG_MEMNUM },
- { OP_REPEAT_INC_NG, "repeat-inc-ng", ARG_MEMNUM },
- { OP_REPEAT_INC_SG, "repeat-inc-sg", ARG_MEMNUM },
- { OP_REPEAT_INC_NG_SG, "repeat-inc-ng-sg", ARG_MEMNUM },
- { OP_NULL_CHECK_START, "null-check-start",ARG_MEMNUM },
- { OP_NULL_CHECK_END, "null-check-end", ARG_MEMNUM },
- { OP_NULL_CHECK_END_MEMST,"null-check-end-memst", ARG_MEMNUM },
- { OP_NULL_CHECK_END_MEMST_PUSH,"null-check-end-memst-push", ARG_MEMNUM },
- { OP_PUSH_POS, "push-pos", ARG_NON },
- { OP_POP_POS, "pop-pos", ARG_NON },
- { OP_PUSH_POS_NOT, "push-pos-not", ARG_RELADDR },
- { OP_FAIL_POS, "fail-pos", ARG_NON },
- { OP_PUSH_STOP_BT, "push-stop-bt", ARG_NON },
- { OP_POP_STOP_BT, "pop-stop-bt", ARG_NON },
- { OP_LOOK_BEHIND, "look-behind", ARG_SPECIAL },
+ { OP_WORD, "word", ARG_NON },
+ { OP_NOT_WORD, "not-word", ARG_NON },
+ { OP_WORD_SB, "word-sb", ARG_NON },
+ { OP_WORD_MB, "word-mb", ARG_NON },
+ { OP_WORD_BOUND, "word-bound", ARG_NON },
+ { OP_NOT_WORD_BOUND, "not-word-bound", ARG_NON },
+ { OP_WORD_BEGIN, "word-begin", ARG_NON },
+ { OP_WORD_END, "word-end", ARG_NON },
+ { OP_BEGIN_BUF, "begin-buf", ARG_NON },
+ { OP_END_BUF, "end-buf", ARG_NON },
+ { OP_BEGIN_LINE, "begin-line", ARG_NON },
+ { OP_END_LINE, "end-line", ARG_NON },
+ { OP_SEMI_END_BUF, "semi-end-buf", ARG_NON },
+ { OP_BEGIN_POSITION, "begin-position", ARG_NON },
+ { OP_BACKREF1, "backref1", ARG_NON },
+ { OP_BACKREF2, "backref2", ARG_NON },
+ { OP_BACKREFN, "backrefn", ARG_MEMNUM },
+ { OP_BACKREFN_IC, "backrefn-ic", ARG_SPECIAL },
+ { OP_BACKREF_MULTI, "backref_multi", ARG_SPECIAL },
+ { OP_BACKREF_MULTI_IC, "backref_multi-ic", ARG_SPECIAL },
+ { OP_BACKREF_AT_LEVEL, "backref_at_level", ARG_SPECIAL },
+ { OP_MEMORY_START_PUSH, "mem-start-push", ARG_MEMNUM },
+ { OP_MEMORY_START, "mem-start", ARG_MEMNUM },
+ { OP_MEMORY_END_PUSH, "mem-end-push", ARG_MEMNUM },
+ { OP_MEMORY_END_PUSH_REC, "mem-end-push-rec", ARG_MEMNUM },
+ { OP_MEMORY_END, "mem-end", ARG_MEMNUM },
+ { OP_MEMORY_END_REC, "mem-end-rec", ARG_MEMNUM },
+ { OP_SET_OPTION_PUSH, "set-option-push", ARG_OPTION },
+ { OP_SET_OPTION, "set-option", ARG_OPTION },
+ { OP_FAIL, "fail", ARG_NON },
+ { OP_JUMP, "jump", ARG_RELADDR },
+ { OP_PUSH, "push", ARG_RELADDR },
+ { OP_POP, "pop", ARG_NON },
+ { OP_PUSH_OR_JUMP_EXACT1, "push-or-jump-e1", ARG_SPECIAL },
+ { OP_PUSH_IF_PEEK_NEXT, "push-if-peek-next", ARG_SPECIAL },
+ { OP_REPEAT, "repeat", ARG_SPECIAL },
+ { OP_REPEAT_NG, "repeat-ng", ARG_SPECIAL },
+ { OP_REPEAT_INC, "repeat-inc", ARG_MEMNUM },
+ { OP_REPEAT_INC_NG, "repeat-inc-ng", ARG_MEMNUM },
+ { OP_REPEAT_INC_SG, "repeat-inc-sg", ARG_MEMNUM },
+ { OP_REPEAT_INC_NG_SG, "repeat-inc-ng-sg", ARG_MEMNUM },
+ { OP_NULL_CHECK_START, "null-check-start", ARG_MEMNUM },
+ { OP_NULL_CHECK_END, "null-check-end", ARG_MEMNUM },
+ { OP_NULL_CHECK_END_MEMST,"null-check-end-memst", ARG_MEMNUM },
+ { OP_NULL_CHECK_END_MEMST_PUSH,"null-check-end-memst-push", ARG_MEMNUM },
+ { OP_PUSH_POS, "push-pos", ARG_NON },
+ { OP_POP_POS, "pop-pos", ARG_NON },
+ { OP_PUSH_POS_NOT, "push-pos-not", ARG_RELADDR },
+ { OP_FAIL_POS, "fail-pos", ARG_NON },
+ { OP_PUSH_STOP_BT, "push-stop-bt", ARG_NON },
+ { OP_POP_STOP_BT, "pop-stop-bt", ARG_NON },
+ { OP_LOOK_BEHIND, "look-behind", ARG_SPECIAL },
{ OP_PUSH_LOOK_BEHIND_NOT, "push-look-behind-not", ARG_SPECIAL },
{ OP_FAIL_LOOK_BEHIND_NOT, "fail-look-behind-not", ARG_NON },
- { OP_CALL, "call", ARG_ABSADDR },
- { OP_RETURN, "return", ARG_NON },
+ { OP_CALL, "call", ARG_ABSADDR },
+ { OP_RETURN, "return", ARG_NON },
+ { OP_STATE_CHECK_PUSH, "state-check-push", ARG_SPECIAL },
+ { OP_STATE_CHECK_PUSH_OR_JUMP, "state-check-push-or-jump", ARG_SPECIAL },
+ { OP_STATE_CHECK, "state-check", ARG_STATE_CHECK },
+ { OP_STATE_CHECK_ANYCHAR_STAR, "state-check-anychar*", ARG_STATE_CHECK },
+ { OP_STATE_CHECK_ANYCHAR_ML_STAR,
+ "state-check-anychar-ml*", ARG_STATE_CHECK },
{ -1, "", ARG_NON }
};
@@ -5152,6 +5588,7 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp,
RelAddrType addr;
LengthType len;
MemNumType mem;
+ StateCheckNumType scn;
OnigCodePoint code;
UChar *q;
@@ -5186,6 +5623,12 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp,
fprintf(f, ":%d", option);
}
break;
+
+ case ARG_STATE_CHECK:
+ scn = *((StateCheckNumType* )bp);
+ bp += SIZE_STATE_CHECK_NUM;
+ fprintf(f, ":%d", scn);
+ break;
}
}
else {
@@ -5312,6 +5755,26 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp,
}
break;
+ case OP_BACKREF_AT_LEVEL:
+ {
+ OnigOptionType option;
+ LengthType level;
+
+ GET_OPTION_INC(option, bp);
+ fprintf(f, ":%d", option);
+ GET_LENGTH_INC(level, bp);
+ fprintf(f, ":%d", level);
+
+ fputs(" ", f);
+ GET_LENGTH_INC(len, bp);
+ for (i = 0; i < len; i++) {
+ GET_MEMNUM_INC(mem, bp);
+ if (i > 0) fputs(", ", f);
+ fprintf(f, "%d", mem);
+ }
+ }
+ break;
+
case OP_REPEAT:
case OP_REPEAT_NG:
{
@@ -5343,6 +5806,15 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp,
fprintf(f, ":%d:(%d)", len, addr);
break;
+ case OP_STATE_CHECK_PUSH:
+ case OP_STATE_CHECK_PUSH_OR_JUMP:
+ scn = *((StateCheckNumType* )bp);
+ bp += SIZE_STATE_CHECK_NUM;
+ addr = *((RelAddrType* )bp);
+ bp += SIZE_RELADDR;
+ fprintf(f, ":%d:(%d)", scn, addr);
+ break;
+
default:
fprintf(stderr, "onig_print_compiled_byte_code: undefined code %d\n",
*--bp);
diff --git a/ext/mbstring/oniguruma/regenc.c b/ext/mbstring/oniguruma/regenc.c
index a767ca60b..bbbf1a2f9 100644
--- a/ext/mbstring/oniguruma/regenc.c
+++ b/ext/mbstring/oniguruma/regenc.c
@@ -175,7 +175,7 @@ onigenc_str_bytelen_null(OnigEncoding enc, const UChar* s)
#define USE_APPLICATION_TO_LOWER_CASE_TABLE
-unsigned short OnigEnc_Unicode_ISO_8859_1_CtypeTable[256] = {
+const unsigned short OnigEnc_Unicode_ISO_8859_1_CtypeTable[256] = {
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2008, 0x228c, 0x2289, 0x2288, 0x2288, 0x2288, 0x2008, 0x2008,
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
@@ -251,7 +251,7 @@ static const UChar BuiltInAsciiToLowerCaseTable[] = {
#endif /* not USE_APPLICATION_TO_LOWER_CASE_TABLE */
#ifdef USE_UPPER_CASE_TABLE
-UChar OnigEncAsciiToUpperCaseTable[256] = {
+const UChar OnigEncAsciiToUpperCaseTable[256] = {
'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
@@ -287,7 +287,7 @@ UChar OnigEncAsciiToUpperCaseTable[256] = {
};
#endif
-unsigned short OnigEncAsciiCtypeTable[256] = {
+const unsigned short OnigEncAsciiCtypeTable[256] = {
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
@@ -323,7 +323,7 @@ unsigned short OnigEncAsciiCtypeTable[256] = {
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
};
-UChar OnigEncISO_8859_1_ToLowerCaseTable[256] = {
+const UChar OnigEncISO_8859_1_ToLowerCaseTable[256] = {
'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
@@ -359,7 +359,7 @@ UChar OnigEncISO_8859_1_ToLowerCaseTable[256] = {
};
#ifdef USE_UPPER_CASE_TABLE
-UChar OnigEncISO_8859_1_ToUpperCaseTable[256] = {
+const UChar OnigEncISO_8859_1_ToUpperCaseTable[256] = {
'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
@@ -417,7 +417,7 @@ onigenc_get_left_adjust_char_head(OnigEncoding enc, const UChar* start, const UC
return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s);
}
-OnigPairAmbigCodes OnigAsciiPairAmbigCodes[] = {
+const OnigPairAmbigCodes OnigAsciiPairAmbigCodes[] = {
{ 0x41, 0x61 },
{ 0x42, 0x62 },
{ 0x43, 0x63 },
@@ -475,7 +475,7 @@ OnigPairAmbigCodes OnigAsciiPairAmbigCodes[] = {
extern int
onigenc_ascii_get_all_pair_ambig_codes(OnigAmbigType flag,
- OnigPairAmbigCodes** ccs)
+ const OnigPairAmbigCodes** ccs)
{
if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) {
*ccs = OnigAsciiPairAmbigCodes;
@@ -488,16 +488,16 @@ onigenc_ascii_get_all_pair_ambig_codes(OnigAmbigType flag,
extern int
onigenc_nothing_get_all_comp_ambig_codes(OnigAmbigType flag,
- OnigCompAmbigCodes** ccs)
+ const OnigCompAmbigCodes** ccs)
{
return 0;
}
extern int
onigenc_iso_8859_1_get_all_pair_ambig_codes(OnigAmbigType flag,
- OnigPairAmbigCodes** ccs)
+ const OnigPairAmbigCodes** ccs)
{
- static OnigPairAmbigCodes cc[] = {
+ static const OnigPairAmbigCodes cc[] = {
{ 0xc0, 0xe0 },
{ 0xc1, 0xe1 },
{ 0xc2, 0xe2 },
@@ -577,9 +577,9 @@ onigenc_iso_8859_1_get_all_pair_ambig_codes(OnigAmbigType flag,
extern int
onigenc_ess_tsett_get_all_comp_ambig_codes(OnigAmbigType flag,
- OnigCompAmbigCodes** ccs)
+ const OnigCompAmbigCodes** ccs)
{
- static OnigCompAmbigCodes folds[] = {
+ static const OnigCompAmbigCodes folds[] = {
{ 2, 0xdf, {{ 2, { 0x53, 0x53 } }, { 2, { 0x73, 0x73} } } }
};
@@ -593,7 +593,7 @@ onigenc_ess_tsett_get_all_comp_ambig_codes(OnigAmbigType flag,
extern int
onigenc_not_support_get_ctype_code_range(int ctype,
- OnigCodePoint* sbr[], OnigCodePoint* mbr[])
+ const OnigCodePoint* sbr[], const OnigCodePoint* mbr[])
{
return ONIG_NO_SUPPORT_CONFIG;
}
@@ -830,10 +830,10 @@ onigenc_mb4_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf)
if ((code & 0xff000000) != 0) {
*p++ = (UChar )((code >> 24) & 0xff);
}
- if ((code & 0xff0000) != 0) {
+ if ((code & 0xff0000) != 0 || p != buf) {
*p++ = (UChar )((code >> 16) & 0xff);
}
- if ((code & 0xff00) != 0) {
+ if ((code & 0xff00) != 0 || p != buf) {
*p++ = (UChar )((code >> 8) & 0xff);
}
*p++ = (UChar )(code & 0xff);
@@ -849,40 +849,32 @@ extern int
onigenc_mb2_is_code_ctype(OnigEncoding enc, OnigCodePoint code,
unsigned int ctype)
{
- if ((ctype & ONIGENC_CTYPE_WORD) != 0) {
- if (code < 128)
- return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
- else
+ if (code < 128)
+ return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
+ else {
+ if ((ctype & (ONIGENC_CTYPE_WORD |
+ ONIGENC_CTYPE_GRAPH | ONIGENC_CTYPE_PRINT)) != 0) {
return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE);
-
- ctype &= ~ONIGENC_CTYPE_WORD;
- if (ctype == 0) return FALSE;
+ }
}
- if (code < 128)
- return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
- else
- return FALSE;
+ return FALSE;
}
extern int
onigenc_mb4_is_code_ctype(OnigEncoding enc, OnigCodePoint code,
unsigned int ctype)
{
- if ((ctype & ONIGENC_CTYPE_WORD) != 0) {
- if (code < 128)
- return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
- else
+ if (code < 128)
+ return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
+ else {
+ if ((ctype & (ONIGENC_CTYPE_WORD |
+ ONIGENC_CTYPE_GRAPH | ONIGENC_CTYPE_PRINT)) != 0) {
return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE);
-
- ctype &= ~ONIGENC_CTYPE_WORD;
- if (ctype == 0) return FALSE;
+ }
}
- if (code < 128)
- return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
- else
- return FALSE;
+ return FALSE;
}
extern int
diff --git a/ext/mbstring/oniguruma/regenc.h b/ext/mbstring/oniguruma/regenc.h
index 510455146..58ee3e7f2 100644
--- a/ext/mbstring/oniguruma/regenc.h
+++ b/ext/mbstring/oniguruma/regenc.h
@@ -4,7 +4,7 @@
regenc.h - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -65,15 +65,17 @@
#else /* ONIG_RUBY_M17N */
#define USE_UNICODE_FULL_RANGE_CTYPE
+/* following must not use with USE_CRNL_AS_LINE_TERMINATOR */
+/* #define USE_UNICODE_ALL_LINE_TERMINATORS */ /* see Unicode.org UTF#18 */
#define ONIG_ENCODING_INIT_DEFAULT ONIG_ENCODING_ASCII
/* for encoding system implementation (internal) */
-ONIG_EXTERN int onigenc_ascii_get_all_pair_ambig_codes P_((OnigAmbigType flag, OnigPairAmbigCodes** acs));
-ONIG_EXTERN int onigenc_nothing_get_all_comp_ambig_codes P_((OnigAmbigType flag, OnigCompAmbigCodes** acs));
-ONIG_EXTERN int onigenc_iso_8859_1_get_all_pair_ambig_codes P_((OnigAmbigType flag, OnigPairAmbigCodes** acs));
-ONIG_EXTERN int onigenc_ess_tsett_get_all_comp_ambig_codes P_((OnigAmbigType flag, OnigCompAmbigCodes** acs));
-ONIG_EXTERN int onigenc_not_support_get_ctype_code_range P_((int ctype, OnigCodePoint* sbr[], OnigCodePoint* mbr[]));
+ONIG_EXTERN int onigenc_ascii_get_all_pair_ambig_codes P_((OnigAmbigType flag, const OnigPairAmbigCodes** acs));
+ONIG_EXTERN int onigenc_nothing_get_all_comp_ambig_codes P_((OnigAmbigType flag, const OnigCompAmbigCodes** acs));
+ONIG_EXTERN int onigenc_iso_8859_1_get_all_pair_ambig_codes P_((OnigAmbigType flag, const OnigPairAmbigCodes** acs));
+ONIG_EXTERN int onigenc_ess_tsett_get_all_comp_ambig_codes P_((OnigAmbigType flag, const OnigCompAmbigCodes** acs));
+ONIG_EXTERN int onigenc_not_support_get_ctype_code_range P_((int ctype, const OnigCodePoint* sbr[], const OnigCodePoint* mbr[]));
ONIG_EXTERN int onigenc_is_mbc_newline_0x0a P_((const UChar* p, const UChar* end));
/* methods for single byte encoding */
@@ -105,7 +107,7 @@ ONIG_EXTERN int onigenc_get_all_fold_match_code_ss_0xdf P_((OnigCodePoint** code
/* in enc/unicode.c */
ONIG_EXTERN int onigenc_unicode_is_code_ctype P_((OnigCodePoint code, unsigned int ctype));
-ONIG_EXTERN int onigenc_unicode_get_ctype_code_range P_((int ctype, OnigCodePoint* sbr[], OnigCodePoint* mbr[]));
+ONIG_EXTERN int onigenc_unicode_get_ctype_code_range P_((int ctype, const OnigCodePoint* sbr[], const OnigCodePoint* mbr[]));
#define ONIGENC_ISO_8859_1_TO_LOWER_CASE(c) \
@@ -115,10 +117,10 @@ ONIG_EXTERN int onigenc_unicode_get_ctype_code_range P_((int ctype, OnigCodePoin
#define ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(code,ctype) \
((OnigEnc_Unicode_ISO_8859_1_CtypeTable[code] & ctype) != 0)
-ONIG_EXTERN UChar OnigEncISO_8859_1_ToLowerCaseTable[];
-ONIG_EXTERN UChar OnigEncISO_8859_1_ToUpperCaseTable[];
-ONIG_EXTERN unsigned short OnigEnc_Unicode_ISO_8859_1_CtypeTable[];
-ONIG_EXTERN OnigPairAmbigCodes OnigAsciiPairAmbigCodes[];
+ONIG_EXTERN const UChar OnigEncISO_8859_1_ToLowerCaseTable[];
+ONIG_EXTERN const UChar OnigEncISO_8859_1_ToUpperCaseTable[];
+ONIG_EXTERN const unsigned short OnigEnc_Unicode_ISO_8859_1_CtypeTable[];
+ONIG_EXTERN const OnigPairAmbigCodes OnigAsciiPairAmbigCodes[];
#endif /* is not ONIG_RUBY_M17N */
@@ -133,7 +135,7 @@ extern int onig_is_in_code_range P_((const UChar* p, OnigCodePoint code));
ONIG_EXTERN OnigEncoding OnigEncDefaultCharEncoding;
ONIG_EXTERN const UChar* OnigEncAsciiToLowerCaseTable;
ONIG_EXTERN const UChar OnigEncAsciiToUpperCaseTable[];
-ONIG_EXTERN unsigned short OnigEncAsciiCtypeTable[];
+ONIG_EXTERN const unsigned short OnigEncAsciiCtypeTable[];
#define ONIGENC_ASCII_CODE_TO_LOWER_CASE(c) OnigEncAsciiToLowerCaseTable[c]
#define ONIGENC_ASCII_CODE_TO_UPPER_CASE(c) OnigEncAsciiToUpperCaseTable[c]
diff --git a/ext/mbstring/oniguruma/regerror.c b/ext/mbstring/oniguruma/regerror.c
index 560b5e12c..ad73b76c3 100644
--- a/ext/mbstring/oniguruma/regerror.c
+++ b/ext/mbstring/oniguruma/regerror.c
@@ -2,7 +2,7 @@
regerror.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -38,12 +38,12 @@
#define va_init_list(a,b) va_start(a)
#endif
-extern char*
+extern UChar*
onig_error_code_to_format(int code)
{
char *p;
- if (code >= 0) return (char* )0;
+ if (code >= 0) return (UChar* )0;
switch (code) {
case ONIG_MISMATCH:
@@ -170,6 +170,8 @@ onig_error_code_to_format(int code)
p = "invalid character property name {%n}"; break;
case ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION:
p = "not supported encoding combination"; break;
+ case ONIGERR_INVALID_COMBINATION_OF_OPTIONS:
+ p = "invalid combination of options"; break;
case ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT:
p = "over thread pass limit count"; break;
@@ -177,7 +179,7 @@ onig_error_code_to_format(int code)
p = "undefined error code"; break;
}
- return p;
+ return (UChar* )p;
}
@@ -256,36 +258,36 @@ onig_error_code_to_str(s, code, va_alist)
void
#ifdef HAVE_STDARG_PROTOTYPES
-onig_snprintf_with_pattern(char buf[], int bufsize, OnigEncoding enc,
- char* pat, char* pat_end, char *fmt, ...)
+onig_snprintf_with_pattern(UChar buf[], int bufsize, OnigEncoding enc,
+ UChar* pat, UChar* pat_end, const UChar *fmt, ...)
#else
onig_snprintf_with_pattern(buf, bufsize, enc, pat, pat_end, fmt, va_alist)
- char buf[];
+ UChar buf[];
int bufsize;
OnigEncoding enc;
- char* pat;
- char* pat_end;
- const char *fmt;
+ UChar* pat;
+ UChar* pat_end;
+ const UChar *fmt;
va_dcl
#endif
{
int n, need, len;
UChar *p, *s, *bp;
- char bs[6];
+ UChar bs[6];
va_list args;
va_init_list(args, fmt);
- n = vsnprintf(buf, bufsize, fmt, args);
+ n = vsnprintf((char* )buf, bufsize, (const char* )fmt, args);
va_end(args);
need = (pat_end - pat) * 4 + 4;
if (n + need < bufsize) {
- strcat(buf, ": /");
+ strcat((char* )buf, ": /");
s = buf + onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, buf);
p = pat;
- while (p < (UChar* )pat_end) {
+ while (p < pat_end) {
if (*p == MC_ESC(enc)) {
*s++ = *p++;
len = enc_len(enc, p);
@@ -304,7 +306,7 @@ onig_snprintf_with_pattern(buf, bufsize, enc, pat, pat_end, fmt, va_alist)
int blen;
while (len-- > 0) {
- sprintf(bs, "\\%03o", *p++ & 0377);
+ sprintf((char* )bs, "\\%03o", *p++ & 0377);
blen = onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, bs);
bp = bs;
while (blen-- > 0) *s++ = *bp++;
@@ -313,7 +315,7 @@ onig_snprintf_with_pattern(buf, bufsize, enc, pat, pat_end, fmt, va_alist)
}
else if (!ONIGENC_IS_CODE_PRINT(enc, *p) &&
!ONIGENC_IS_CODE_SPACE(enc, *p)) {
- sprintf(bs, "\\%03o", *p++ & 0377);
+ sprintf((char* )bs, "\\%03o", *p++ & 0377);
len = onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, bs);
bp = bs;
while (len-- > 0) *s++ = *bp++;
diff --git a/ext/mbstring/oniguruma/regexec.c b/ext/mbstring/oniguruma/regexec.c
index 25d97773f..769ed30c9 100644
--- a/ext/mbstring/oniguruma/regexec.c
+++ b/ext/mbstring/oniguruma/regexec.c
@@ -2,7 +2,7 @@
regexec.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -29,6 +29,12 @@
#include "regint.h"
+#ifdef USE_CRNL_AS_LINE_TERMINATOR
+#define ONIGENC_IS_MBC_CRNL(enc,p,end) \
+ (ONIGENC_MBC_TO_CODE(enc,p,end) == 13 && \
+ ONIGENC_IS_MBC_NEWLINE(enc,(p+enc_len(enc,p)),end))
+#endif
+
#ifdef USE_CAPTURE_HISTORY
static void history_tree_free(OnigCaptureTreeNode* node);
@@ -300,6 +306,9 @@ typedef struct _StackType {
UChar *pcode; /* byte code position */
UChar *pstr; /* string position */
UChar *pstr_prev; /* previous char position of pstr */
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+ unsigned int state_check;
+#endif
} state;
struct {
int count; /* for OP_REPEAT_INC, OP_REPEAT_INC_NG */
@@ -333,28 +342,28 @@ typedef struct _StackType {
/* stack type */
/* used by normal-POP */
#define STK_ALT 0x0001
-#define STK_LOOK_BEHIND_NOT 0x0003
-#define STK_POS_NOT 0x0005
-/* avoided by normal-POP, but value should be small */
-#define STK_NULL_CHECK_START 0x0100
+#define STK_LOOK_BEHIND_NOT 0x0002
+#define STK_POS_NOT 0x0003
/* handled by normal-POP */
-#define STK_MEM_START 0x0200
-#define STK_MEM_END 0x0300
-#define STK_REPEAT_INC 0x0400
+#define STK_MEM_START 0x0100
+#define STK_MEM_END 0x8200
+#define STK_REPEAT_INC 0x0300
+#define STK_STATE_CHECK_MARK 0x1000
/* avoided by normal-POP */
+#define STK_NULL_CHECK_START 0x3000
+#define STK_NULL_CHECK_END 0x5000 /* for recursive call */
+#define STK_MEM_END_MARK 0x8400
#define STK_POS 0x0500 /* used when POP-POS */
#define STK_STOP_BT 0x0600 /* mark for "(?>...)" */
#define STK_REPEAT 0x0700
#define STK_CALL_FRAME 0x0800
#define STK_RETURN 0x0900
-#define STK_MEM_END_MARK 0x0a00
-#define STK_VOID 0x0b00 /* for fill a blank */
-#define STK_NULL_CHECK_END 0x0c00 /* for recursive call */
+#define STK_VOID 0x0a00 /* for fill a blank */
/* stack type check mask */
-#define STK_MASK_POP_USED 0x00ff
-#define IS_TO_VOID_TARGET(stk) \
- (((stk)->type & STK_MASK_POP_USED) || (stk)->type == STK_NULL_CHECK_START)
+#define STK_MASK_POP_USED 0x00ff
+#define STK_MASK_TO_VOID_TARGET 0x10ff
+#define STK_MASK_MEM_END_OR_MARK 0x8000 /* MEM_END or MEM_END_MARK */
typedef struct {
void* stack_p;
@@ -362,6 +371,10 @@ typedef struct {
OnigOptionType options;
OnigRegion* region;
const UChar* start; /* search start position (for \G: BEGIN_POSITION) */
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+ void* state_check_buff;
+ int state_check_buff_size;
+#endif
} MatchArg;
#define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start) do {\
@@ -371,7 +384,37 @@ typedef struct {
(msa).start = (arg_start);\
} while (0)
-#define MATCH_ARG_FREE(msa) if ((msa).stack_p) xfree((msa).stack_p)
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+
+#define STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE 16
+
+#define STATE_CHECK_BUFF_INIT(msa, str_len, state_num) do { \
+ (msa).state_check_buff = (void* )0;\
+ (msa).state_check_buff_size = 0;\
+ if ((state_num) > 0 && str_len >= STATE_CHECK_STRING_THRESHOLD_LEN) {\
+ int size = ((int )((str_len) + 1) * (state_num) + 7) / 8;\
+ (msa).state_check_buff_size = size; \
+ if (size > 0 && size < STATE_CHECK_BUFF_MAX_SIZE) {\
+ if (size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) \
+ (msa).state_check_buff = (void* )xmalloc(size);\
+ else \
+ (msa).state_check_buff = (void* )xalloca(size);\
+ xmemset((msa).state_check_buff, 0, (size_t )size);\
+ }\
+ }\
+} while (0)
+
+#define MATCH_ARG_FREE(msa) do {\
+ if ((msa).stack_p) xfree((msa).stack_p);\
+ if ((msa).state_check_buff_size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) { \
+ if ((msa).state_check_buff) xfree((msa).state_check_buff);\
+ }\
+} while (0);
+#else
+#define STATE_CHECK_BUFF_INIT(msa, str_len, state_num)
+#define MATCH_ARG_FREE(msa) if ((msa).stack_p) xfree((msa).stack_p)
+#endif
+
#define STACK_INIT(alloc_addr, ptr_num, stack_num) do {\
@@ -465,26 +508,88 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
#define STACK_AT(index) (stk_base + (index))
#define GET_STACK_INDEX(stk) ((stk) - stk_base)
+#define STACK_PUSH_TYPE(stack_type) do {\
+ STACK_ENSURE(1);\
+ stk->type = (stack_type);\
+ STACK_INC;\
+} while(0)
+
+#define IS_TO_VOID_TARGET(stk) (((stk)->type & STK_MASK_TO_VOID_TARGET) != 0)
+
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+#define STATE_CHECK_POS(s,snum) \
+ (((s) - str) * num_comb_exp_check + ((snum) - 1))
+#define STATE_CHECK_VAL(v,snum) do {\
+ if (state_check_buff != NULL) {\
+ int x = STATE_CHECK_POS(s,snum);\
+ (v) = state_check_buff[x/8] & (1<<(x%8));\
+ }\
+ else (v) = 0;\
+} while(0)
+
+
+#define ELSE_IF_STATE_CHECK_MARK(stk) \
+ else if ((stk)->type == STK_STATE_CHECK_MARK) { \
+ int x = STATE_CHECK_POS(stk->u.state.pstr, stk->u.state.state_check);\
+ state_check_buff[x/8] |= (1<<(x%8)); \
+ }
+
#define STACK_PUSH(stack_type,pat,s,sprev) do {\
STACK_ENSURE(1);\
stk->type = (stack_type);\
stk->u.state.pcode = (pat);\
stk->u.state.pstr = (s);\
stk->u.state.pstr_prev = (sprev);\
+ stk->u.state.state_check = 0;\
STACK_INC;\
} while(0)
#define STACK_PUSH_ENSURED(stack_type,pat) do {\
stk->type = (stack_type);\
stk->u.state.pcode = (pat);\
+ stk->u.state.state_check = 0;\
STACK_INC;\
} while(0)
-#define STACK_PUSH_TYPE(stack_type) do {\
+#define STACK_PUSH_ALT_WITH_STATE_CHECK(pat,s,sprev,snum) do {\
STACK_ENSURE(1);\
+ stk->type = STK_ALT;\
+ stk->u.state.pcode = (pat);\
+ stk->u.state.pstr = (s);\
+ stk->u.state.pstr_prev = (sprev);\
+ stk->u.state.state_check = ((state_check_buff != NULL) ? (snum) : 0);\
+ STACK_INC;\
+} while(0)
+
+#define STACK_PUSH_STATE_CHECK(s,snum) do {\
+ if (state_check_buff != NULL) {\
+ STACK_ENSURE(1);\
+ stk->type = STK_STATE_CHECK_MARK;\
+ stk->u.state.pstr = (s);\
+ stk->u.state.state_check = (snum);\
+ STACK_INC;\
+ }\
+} while(0)
+
+#else /* USE_COMBINATION_EXPLOSION_CHECK */
+
+#define ELSE_IF_STATE_CHECK_MARK(stk)
+
+#define STACK_PUSH(stack_type,pat,s,sprev) do {\
+ STACK_ENSURE(1);\
+ stk->type = (stack_type);\
+ stk->u.state.pcode = (pat);\
+ stk->u.state.pstr = (s);\
+ stk->u.state.pstr_prev = (sprev);\
+ STACK_INC;\
+} while(0)
+
+#define STACK_PUSH_ENSURED(stack_type,pat) do {\
stk->type = (stack_type);\
+ stk->u.state.pcode = (pat);\
STACK_INC;\
} while(0)
+#endif /* USE_COMBINATION_EXPLOSION_CHECK */
#define STACK_PUSH_ALT(pat,s,sprev) STACK_PUSH(STK_ALT,pat,s,sprev)
#define STACK_PUSH_POS(s,sprev) STACK_PUSH(STK_POS,NULL_UCHARP,s,sprev)
@@ -544,7 +649,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
k = stk;\
while (k > stk_base) {\
k--;\
- if ((k->type == STK_MEM_END_MARK || k->type == STK_MEM_END) \
+ if ((k->type & STK_MASK_MEM_END_OR_MARK) != 0 \
&& k->u.mem.num == (mnum)) {\
level++;\
}\
@@ -603,15 +708,18 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
#ifdef ONIG_DEBUG
-#define STACK_BASE_CHECK(p) \
- if ((p) < stk_base) goto stack_error;
+#define STACK_BASE_CHECK(p, at) \
+ if ((p) < stk_base) {\
+ fprintf(stderr, "at %s\n", at);\
+ goto stack_error;\
+ }
#else
-#define STACK_BASE_CHECK(p)
+#define STACK_BASE_CHECK(p, at)
#endif
#define STACK_POP_ONE do {\
stk--;\
- STACK_BASE_CHECK(stk); \
+ STACK_BASE_CHECK(stk, "STACK_POP_ONE"); \
} while(0)
#define STACK_POP do {\
@@ -619,25 +727,27 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
case STACK_POP_LEVEL_FREE:\
while (1) {\
stk--;\
- STACK_BASE_CHECK(stk); \
+ STACK_BASE_CHECK(stk, "STACK_POP"); \
if ((stk->type & STK_MASK_POP_USED) != 0) break;\
+ ELSE_IF_STATE_CHECK_MARK(stk);\
}\
break;\
case STACK_POP_LEVEL_MEM_START:\
while (1) {\
stk--;\
- STACK_BASE_CHECK(stk); \
+ STACK_BASE_CHECK(stk, "STACK_POP 2"); \
if ((stk->type & STK_MASK_POP_USED) != 0) break;\
else if (stk->type == STK_MEM_START) {\
mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
}\
+ ELSE_IF_STATE_CHECK_MARK(stk);\
}\
break;\
default:\
while (1) {\
stk--;\
- STACK_BASE_CHECK(stk); \
+ STACK_BASE_CHECK(stk, "STACK_POP 3"); \
if ((stk->type & STK_MASK_POP_USED) != 0) break;\
else if (stk->type == STK_MEM_START) {\
mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
@@ -650,6 +760,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
}\
+ ELSE_IF_STATE_CHECK_MARK(stk);\
}\
break;\
}\
@@ -658,7 +769,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
#define STACK_POP_TIL_POS_NOT do {\
while (1) {\
stk--;\
- STACK_BASE_CHECK(stk); \
+ STACK_BASE_CHECK(stk, "STACK_POP_TIL_POS_NOT"); \
if (stk->type == STK_POS_NOT) break;\
else if (stk->type == STK_MEM_START) {\
mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
@@ -671,13 +782,14 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
}\
+ ELSE_IF_STATE_CHECK_MARK(stk);\
}\
} while(0)
#define STACK_POP_TIL_LOOK_BEHIND_NOT do {\
while (1) {\
stk--;\
- STACK_BASE_CHECK(stk); \
+ STACK_BASE_CHECK(stk, "STACK_POP_TIL_LOOK_BEHIND_NOT"); \
if (stk->type == STK_LOOK_BEHIND_NOT) break;\
else if (stk->type == STK_MEM_START) {\
mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
@@ -690,6 +802,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
}\
+ ELSE_IF_STATE_CHECK_MARK(stk);\
}\
} while(0)
@@ -697,7 +810,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
k = stk;\
while (1) {\
k--;\
- STACK_BASE_CHECK(k); \
+ STACK_BASE_CHECK(k, "STACK_POS_END"); \
if (IS_TO_VOID_TARGET(k)) {\
k->type = STK_VOID;\
}\
@@ -712,7 +825,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
StackType *k = stk;\
while (1) {\
k--;\
- STACK_BASE_CHECK(k); \
+ STACK_BASE_CHECK(k, "STACK_STOP_BT_END"); \
if (IS_TO_VOID_TARGET(k)) {\
k->type = STK_VOID;\
}\
@@ -727,7 +840,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
StackType* k = stk;\
while (1) {\
k--;\
- STACK_BASE_CHECK(k); \
+ STACK_BASE_CHECK(k, "STACK_NULL_CHECK"); \
if (k->type == STK_NULL_CHECK_START) {\
if (k->u.null_check.num == (id)) {\
(isnull) = (k->u.null_check.pstr == (s));\
@@ -742,7 +855,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
StackType* k = stk;\
while (1) {\
k--;\
- STACK_BASE_CHECK(k); \
+ STACK_BASE_CHECK(k, "STACK_NULL_CHECK_REC"); \
if (k->type == STK_NULL_CHECK_START) {\
if (k->u.null_check.num == (id)) {\
if (level == 0) {\
@@ -762,7 +875,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
StackType* k = stk;\
while (1) {\
k--;\
- STACK_BASE_CHECK(k); \
+ STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST"); \
if (k->type == STK_NULL_CHECK_START) {\
if (k->u.null_check.num == (id)) {\
if (k->u.null_check.pstr != (s)) {\
@@ -802,7 +915,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
StackType* k = stk;\
while (1) {\
k--;\
- STACK_BASE_CHECK(k); \
+ STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST_REC"); \
if (k->type == STK_NULL_CHECK_START) {\
if (k->u.null_check.num == (id)) {\
if (level == 0) {\
@@ -850,7 +963,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
k = stk;\
while (1) {\
k--;\
- STACK_BASE_CHECK(k); \
+ STACK_BASE_CHECK(k, "STACK_GET_REPEAT"); \
if (k->type == STK_REPEAT) {\
if (level == 0) {\
if (k->u.repeat.num == (id)) {\
@@ -868,7 +981,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
StackType* k = stk;\
while (1) {\
k--;\
- STACK_BASE_CHECK(k); \
+ STACK_BASE_CHECK(k, "STACK_RETURN"); \
if (k->type == STK_CALL_FRAME) {\
if (level == 0) {\
(addr) = k->u.call_frame.ret_addr;\
@@ -937,6 +1050,7 @@ static int string_cmp_ic(OnigEncoding enc, int ambig_flag,
is_fail = 0; \
} while(0)
+
#define ON_STR_BEGIN(s) ((s) == str)
#define ON_STR_END(s) ((s) == end)
#define IS_EMPTY_STR (str == end)
@@ -988,6 +1102,77 @@ make_capture_history_tree(OnigCaptureTreeNode* node, StackType** kp,
}
#endif
+#ifdef USE_BACKREF_AT_LEVEL
+static int mem_is_in_memp(int mem, int num, UChar* memp)
+{
+ int i;
+ MemNumType m;
+
+ for (i = 0; i < num; i++) {
+ GET_MEMNUM_INC(m, memp);
+ if (mem == (int )m) return 1;
+ }
+ return 0;
+}
+
+static int backref_match_at_nested_level(regex_t* reg
+ , StackType* top, StackType* stk_base
+ , int ignore_case, int ambig_flag
+ , int nest, int mem_num, UChar* memp, UChar** s, const UChar* send)
+{
+ UChar *ss, *p, *pstart, *pend = NULL_UCHARP;
+ int level;
+ StackType* k;
+
+ level = 0;
+ k = top;
+ k--;
+ while (k >= stk_base) {
+ if (k->type == STK_CALL_FRAME) {
+ level--;
+ }
+ else if (k->type == STK_RETURN) {
+ level++;
+ }
+ else if (level == nest) {
+ if (k->type == STK_MEM_START) {
+ if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) {
+ pstart = k->u.mem.pstr;
+ if (pend != NULL_UCHARP) {
+ if (pend - pstart > send - *s) return 0; /* or goto next_mem; */
+ p = pstart;
+ ss = *s;
+
+ if (ignore_case != 0) {
+ if (string_cmp_ic(reg->enc, ambig_flag,
+ pstart, &ss, (int )(pend - pstart)) == 0)
+ return 0; /* or goto next_mem; */
+ }
+ else {
+ while (p < pend) {
+ if (*p++ != *ss++) return 0; /* or goto next_mem; */
+ }
+ }
+
+ *s = ss;
+ return 1;
+ }
+ }
+ }
+ else if (k->type == STK_MEM_END) {
+ if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) {
+ pend = k->u.mem.pstr;
+ }
+ }
+ }
+ k--;
+ }
+
+ return 0;
+}
+#endif /* USE_BACKREF_AT_LEVEL */
+
+
#ifdef RUBY_PLATFORM
typedef struct {
@@ -1003,7 +1188,7 @@ trap_ensure(VALUE arg)
TrapEnsureArg* ta = (TrapEnsureArg* )arg;
if (ta->state == 0) { /* trap_exec() is not normal return */
- ONIG_STATE_DEC(ta->reg);
+ ONIG_STATE_DEC_THREAD(ta->reg);
if (! IS_NULL(ta->msa->stack_p) && ta->stk_base != ta->msa->stack_p)
xfree(ta->stk_base);
@@ -1165,27 +1350,43 @@ onig_is_in_code_range(const UChar* p, OnigCodePoint code)
}
static int
-code_is_in_cclass_node(void* node, OnigCodePoint code, int enclen)
+is_code_in_cc(int enclen, OnigCodePoint code, CClassNode* cc)
{
- unsigned int in_cc;
- CClassNode* cc = (CClassNode* )node;
+ int found;
- if (enclen == 1) {
- in_cc = BITSET_AT(cc->bs, code);
+ if (enclen > 1 || (code >= SINGLE_BYTE_SIZE)) {
+ if (IS_NULL(cc->mbuf)) {
+ found = 0;
+ }
+ else {
+ found = (onig_is_in_code_range(cc->mbuf->p, code) != 0 ? 1 : 0);
+ }
}
else {
- UChar* p = ((BBuf* )(cc->mbuf))->p;
- in_cc = onig_is_in_code_range(p, code);
+ found = (BITSET_AT(cc->bs, code) == 0 ? 0 : 1);
}
- if (IS_CCLASS_NOT(cc)) {
- return (in_cc ? 0 : 1);
+ if (IS_CCLASS_NOT(cc))
+ return !found;
+ else
+ return found;
+}
+
+extern int
+onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc)
+{
+ int len;
+
+ if (ONIGENC_MBC_MINLEN(enc) > 1) {
+ len = 2;
}
else {
- return (in_cc ? 1 : 0);
+ len = ONIGENC_CODE_TO_MBCLEN(enc, code);
}
+ return is_code_in_cc(len, code, cc);
}
+
/* matching region of POSIX API */
typedef int regoff_t;
@@ -1217,6 +1418,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
StackIndex si;
StackIndex *repeat_stk;
StackIndex *mem_start_stk, *mem_end_stk;
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+ int scv;
+ unsigned char* state_check_buff = msa->state_check_buff;
+ int num_comb_exp_check = reg->num_comb_exp_check;
+#endif
n = reg->num_repeat + reg->num_mem * 2;
STACK_INIT(alloca_base, n, INIT_MATCH_STACK_SIZE);
@@ -1739,8 +1945,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
mb_len = enc_len(encode, s);
ss = s;
s += mb_len;
+ DATA_ENSURE(0);
code = ONIGENC_MBC_TO_CODE(encode, ss, s);
- if (code_is_in_cclass_node(node, code, mb_len) == 0) goto fail;
+ if (is_code_in_cc(mb_len, code, node) == 0) goto fail;
}
STAT_OP_OUT;
break;
@@ -1826,6 +2033,47 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
STAT_OP_OUT;
break;
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+ case OP_STATE_CHECK_ANYCHAR_STAR: STAT_OP_IN(OP_STATE_CHECK_ANYCHAR_STAR);
+ GET_STATE_CHECK_NUM_INC(mem, p);
+ while (s < end) {
+ STATE_CHECK_VAL(scv, mem);
+ if (scv) goto fail;
+
+ STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem);
+ n = enc_len(encode, s);
+ DATA_ENSURE(n);
+ if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
+ sprev = s;
+ s += n;
+ }
+ STAT_OP_OUT;
+ break;
+
+ case OP_STATE_CHECK_ANYCHAR_ML_STAR:
+ STAT_OP_IN(OP_STATE_CHECK_ANYCHAR_ML_STAR);
+
+ GET_STATE_CHECK_NUM_INC(mem, p);
+ while (s < end) {
+ STATE_CHECK_VAL(scv, mem);
+ if (scv) goto fail;
+
+ STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem);
+ n = enc_len(encode, s);
+ if (n > 1) {
+ DATA_ENSURE(n);
+ sprev = s;
+ s += n;
+ }
+ else {
+ sprev = s;
+ s++;
+ }
+ }
+ STAT_OP_OUT;
+ break;
+#endif /* USE_COMBINATION_EXPLOSION_CHECK */
+
case OP_WORD: STAT_OP_IN(OP_WORD);
DATA_ENSURE(1);
if (! ONIGENC_IS_MBC_WORD(encode, s, end))
@@ -1946,6 +2194,12 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
STAT_OP_OUT;
continue;
}
+#ifdef USE_CRNL_AS_LINE_TERMINATOR
+ else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) {
+ STAT_OP_OUT;
+ continue;
+ }
+#endif
goto fail;
break;
@@ -1966,6 +2220,15 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
STAT_OP_OUT;
continue;
}
+#ifdef USE_CRNL_AS_LINE_TERMINATOR
+ else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) {
+ UChar* ss = s + enc_len(encode, s);
+ if (ON_STR_END(ss + enc_len(encode, ss))) {
+ STAT_OP_OUT;
+ continue;
+ }
+ }
+#endif
goto fail;
break;
@@ -2041,11 +2304,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
goto backref;
break;
- case OP_BACKREF3: STAT_OP_IN(OP_BACKREF3);
- mem = 3;
- goto backref;
- break;
-
case OP_BACKREFN: STAT_OP_IN(OP_BACKREFN);
GET_MEMNUM_INC(mem, p);
backref:
@@ -2188,6 +2446,35 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
continue;
}
break;
+
+#ifdef USE_BACKREF_AT_LEVEL
+ case OP_BACKREF_AT_LEVEL:
+ {
+ int len;
+ OnigOptionType ic;
+ LengthType level;
+
+ GET_OPTION_INC(ic, p);
+ GET_LENGTH_INC(level, p);
+ GET_LENGTH_INC(tlen, p);
+
+ sprev = s;
+ if (backref_match_at_nested_level(reg, stk, stk_base, ic, ambig_flag
+ , (int )level, (int )tlen, p, &s, end)) {
+ while (sprev + (len = enc_len(encode, sprev)) < s)
+ sprev += len;
+
+ p += (SIZE_MEMNUM * tlen);
+ }
+ else
+ goto fail;
+
+ STAT_OP_OUT;
+ continue;
+ }
+
+ break;
+#endif
case OP_SET_OPTION_PUSH: STAT_OP_IN(OP_SET_OPTION_PUSH);
GET_OPTION_INC(option, p);
@@ -2309,6 +2596,43 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
continue;
break;
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+ case OP_STATE_CHECK_PUSH: STAT_OP_IN(OP_STATE_CHECK_PUSH);
+ GET_STATE_CHECK_NUM_INC(mem, p);
+ STATE_CHECK_VAL(scv, mem);
+ if (scv) goto fail;
+
+ GET_RELADDR_INC(addr, p);
+ STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem);
+ STAT_OP_OUT;
+ continue;
+ break;
+
+ case OP_STATE_CHECK_PUSH_OR_JUMP: STAT_OP_IN(OP_STATE_CHECK_PUSH_OR_JUMP);
+ GET_STATE_CHECK_NUM_INC(mem, p);
+ GET_RELADDR_INC(addr, p);
+ STATE_CHECK_VAL(scv, mem);
+ if (scv) {
+ p += addr;
+ }
+ else {
+ STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem);
+ }
+ STAT_OP_OUT;
+ continue;
+ break;
+
+ case OP_STATE_CHECK: STAT_OP_IN(OP_STATE_CHECK);
+ GET_STATE_CHECK_NUM_INC(mem, p);
+ STATE_CHECK_VAL(scv, mem);
+ if (scv) goto fail;
+
+ STACK_PUSH_STATE_CHECK(s, mem);
+ STAT_OP_OUT;
+ continue;
+ break;
+#endif /* USE_COMBINATION_EXPLOSION_CHECK */
+
case OP_POP: STAT_OP_IN(OP_POP);
STACK_POP_ONE;
STAT_OP_OUT;
@@ -2383,7 +2707,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
repeat_inc:
stkp->u.repeat.count++;
- if (stkp->u.repeat.count == reg->repeat_range[mem].upper) {
+ if (stkp->u.repeat.count >= reg->repeat_range[mem].upper) {
/* end of repeat. Nothing to do. */
}
else if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
@@ -2413,8 +2737,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
repeat_inc_ng:
stkp->u.repeat.count++;
- if (stkp->u.repeat.count < reg->repeat_range[mem].upper ||
- IS_REPEAT_INFINITE(reg->repeat_range[mem].upper)) {
+ if (stkp->u.repeat.count < reg->repeat_range[mem].upper) {
if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
UChar* pcode = stkp->u.repeat.pcode;
@@ -2543,6 +2866,14 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
p = stk->u.state.pcode;
s = stk->u.state.pstr;
sprev = stk->u.state.pstr_prev;
+
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+ if (stk->u.state.state_check != 0) {
+ stk->type = STK_STATE_CHECK_MARK;
+ stk++;
+ }
+#endif
+
STAT_OP_OUT;
continue;
break;
@@ -2727,66 +3058,56 @@ bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end,
const UChar* text, const UChar* text_end,
const UChar* text_range)
{
- const UChar *s, *t, *p, *end;
+ const UChar *s, *se, *t, *p, *end;
const UChar *tail;
- int skip;
+ int skip, tlen1;
#ifdef ONIG_DEBUG_SEARCH
fprintf(stderr, "bm_search_notrev: text: %d, text_end: %d, text_range: %d\n",
(int )text, (int )text_end, (int )text_range);
#endif
- end = text_range + (target_end - target) - 1;
- if (end > text_end)
- end = text_end;
-
tail = target_end - 1;
+ tlen1 = tail - target;
+ end = text_range;
+ if (end + tlen1 > text_end)
+ end = text_end - tlen1;
+
s = text;
- while ((s - text) < target_end - target) {
- s += enc_len(reg->enc, s);
- }
- s--; /* set to text check tail position. */
if (IS_NULL(reg->int_map)) {
while (s < end) {
- p = s;
+ p = se = s + tlen1;
t = tail;
while (t >= target && *p == *t) {
- p--; t--;
+ p--; t--;
}
- if (t < target) return (UChar* )(p + 1);
+ if (t < target) return (UChar* )s;
- skip = reg->map[*s];
- p = s + 1;
- if (p >= text_end) return (UChar* )NULL;
- t = p;
+ skip = reg->map[*se];
+ t = s;
do {
- p += enc_len(reg->enc, p);
- } while ((p - t) < skip && p < text_end);
-
- s += (p - t);
+ s += enc_len(reg->enc, s);
+ } while ((s - t) < skip && s < end);
}
}
else {
while (s < end) {
- p = s;
+ p = se = s + tlen1;
t = tail;
while (t >= target && *p == *t) {
- p--; t--;
+ p--; t--;
}
- if (t < target) return (UChar* )(p + 1);
+ if (t < target) return (UChar* )s;
- skip = reg->int_map[*s];
- p = s + 1;
- if (p >= text_end) return (UChar* )NULL;
- t = p;
+ skip = reg->int_map[*se];
+ t = s;
do {
- p += enc_len(reg->enc, p);
- } while ((p - t) < skip && p < text_end);
-
- s += (p - t);
+ s += enc_len(reg->enc, s);
+ } while ((s - t) < skip && s < end);
}
}
+
return (UChar* )NULL;
}
@@ -2915,7 +3236,9 @@ onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, On
UChar *prev;
MatchArg msa;
-#ifdef USE_MULTI_THREAD_SYSTEM
+#if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM)
+ start:
+ THREAD_ATOMIC_START;
if (ONIG_STATE(reg) >= ONIG_STATE_NORMAL) {
ONIG_STATE_INC(reg);
if (IS_NOT_NULL(reg->chain) && ONIG_STATE(reg) == ONIG_STATE_NORMAL) {
@@ -2924,17 +3247,22 @@ onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, On
}
}
else {
- int n = 0;
+ int n;
+
+ THREAD_ATOMIC_END;
+ n = 0;
while (ONIG_STATE(reg) < ONIG_STATE_NORMAL) {
if (++n > THREAD_PASS_LIMIT_COUNT)
return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT;
THREAD_PASS;
}
- ONIG_STATE_INC(reg);
+ goto start;
}
-#endif /* USE_MULTI_THREAD_SYSTEM */
+ THREAD_ATOMIC_END;
+#endif /* USE_RECOMPILE_API && USE_MULTI_THREAD_SYSTEM */
MATCH_ARG_INIT(msa, option, region, at);
+ STATE_CHECK_BUFF_INIT(msa, end - str, reg->num_comb_exp_check);
if (region
#ifdef USE_POSIX_REGION_OPTION
@@ -2952,7 +3280,7 @@ onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, On
}
MATCH_ARG_FREE(msa);
- ONIG_STATE_DEC(reg);
+ ONIG_STATE_DEC_THREAD(reg);
return r;
}
@@ -3029,7 +3357,11 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
if (prev && ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end))
goto retry_gate;
}
- else if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end))
+ else if (! ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end)
+#ifdef USE_CRNL_AS_LINE_TERMINATOR
+ && ! ONIGENC_IS_MBC_CRNL(reg->enc, p, end)
+#endif
+ )
goto retry_gate;
break;
}
@@ -3132,7 +3464,7 @@ backward_search_range(regex_t* reg, const UChar* str, const UChar* end,
switch (reg->sub_anchor) {
case ANCHOR_BEGIN_LINE:
if (!ON_STR_BEGIN(p)) {
- prev = onigenc_get_prev_char_head(reg->enc, adjrange, p);
+ prev = onigenc_get_prev_char_head(reg->enc, str, p);
if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) {
p = prev;
goto retry;
@@ -3149,7 +3481,11 @@ backward_search_range(regex_t* reg, const UChar* str, const UChar* end,
goto retry;
}
}
- else if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end)) {
+ else if (! ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end)
+#ifdef USE_CRNL_AS_LINE_TERMINATOR
+ && ! ONIGENC_IS_MBC_CRNL(reg->enc, p, end)
+#endif
+ ) {
p = onigenc_get_prev_char_head(reg->enc, adjrange, p);
if (IS_NULL(p)) goto fail;
goto retry;
@@ -3187,8 +3523,11 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
int r;
UChar *s, *prev;
MatchArg msa;
+ const UChar *orig_start = start;
-#ifdef USE_MULTI_THREAD_SYSTEM
+#if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM)
+ start:
+ THREAD_ATOMIC_START;
if (ONIG_STATE(reg) >= ONIG_STATE_NORMAL) {
ONIG_STATE_INC(reg);
if (IS_NOT_NULL(reg->chain) && ONIG_STATE(reg) == ONIG_STATE_NORMAL) {
@@ -3197,15 +3536,19 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
}
}
else {
- int n = 0;
+ int n;
+
+ THREAD_ATOMIC_END;
+ n = 0;
while (ONIG_STATE(reg) < ONIG_STATE_NORMAL) {
if (++n > THREAD_PASS_LIMIT_COUNT)
return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT;
THREAD_PASS;
}
- ONIG_STATE_INC(reg);
+ goto start;
}
-#endif /* USE_MULTI_THREAD_SYSTEM */
+ THREAD_ATOMIC_END;
+#endif /* USE_RECOMPILE_API && USE_MULTI_THREAD_SYSTEM */
#ifdef ONIG_DEBUG_SEARCH
fprintf(stderr,
@@ -3305,12 +3648,12 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
goto end_buf;
}
}
- else if ((reg->anchor & ANCHOR_ANYCHAR_STAR_PL)) {
+ else if ((reg->anchor & ANCHOR_ANYCHAR_STAR_ML)) {
goto begin_position;
}
}
else if (str == end) { /* empty string */
- static const UChar* address_for_empty_string = "";
+ static const UChar* address_for_empty_string = (UChar* )"";
#ifdef ONIG_DEBUG_SEARCH
fprintf(stderr, "onig_search: empty string.\n");
@@ -3322,6 +3665,10 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
prev = (UChar* )NULL;
MATCH_ARG_INIT(msa, option, region, start);
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+ msa.state_check_buff = (void* )0;
+ msa.state_check_buff_size = 0;
+#endif
MATCH_AND_RETURN_CHECK;
goto mismatch;
}
@@ -3333,7 +3680,8 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
(int )(end - str), (int )(start - str), (int )(range - str));
#endif
- MATCH_ARG_INIT(msa, option, region, start);
+ MATCH_ARG_INIT(msa, option, region, orig_start);
+ STATE_CHECK_BUFF_INIT(msa, end - str, reg->num_comb_exp_check);
s = (UChar* )start;
if (range > start) { /* forward search */
@@ -3398,7 +3746,11 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
MATCH_AND_RETURN_CHECK;
prev = s;
s += enc_len(reg->enc, s);
- } while (s <= range); /* exec s == range, because empty match with /$/. */
+ } while (s < range);
+
+ if (s == range) { /* because empty match with /$/. */
+ MATCH_AND_RETURN_CHECK;
+ }
}
else { /* backward search */
if (reg->optimize != ONIG_OPTIMIZE_NONE) {
@@ -3461,7 +3813,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
finish:
MATCH_ARG_FREE(msa);
- ONIG_STATE_DEC(reg);
+ ONIG_STATE_DEC_THREAD(reg);
/* If result is mismatch and no FIND_NOT_EMPTY option,
then the region is not setted in match_at(). */
@@ -3482,7 +3834,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
mismatch_no_msa:
r = ONIG_MISMATCH;
finish_no_msa:
- ONIG_STATE_DEC(reg);
+ ONIG_STATE_DEC_THREAD(reg);
#ifdef ONIG_DEBUG
if (r != ONIG_MISMATCH)
fprintf(stderr, "onig_search: error %d\n", r);
@@ -3490,7 +3842,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
return r;
match:
- ONIG_STATE_DEC(reg);
+ ONIG_STATE_DEC_THREAD(reg);
MATCH_ARG_FREE(msa);
return s - str;
}
diff --git a/ext/mbstring/oniguruma/regext.c b/ext/mbstring/oniguruma/regext.c
index 6839708be..f5ad1f35a 100755
--- a/ext/mbstring/oniguruma/regext.c
+++ b/ext/mbstring/oniguruma/regext.c
@@ -2,7 +2,7 @@
regext.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -194,6 +194,7 @@ onig_new_deluxe(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
return r;
}
+#ifdef USE_RECOMPILE_API
extern int
onig_recompile_deluxe(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
OnigCompileInfo* ci, OnigErrorInfo* einfo)
@@ -211,3 +212,4 @@ onig_recompile_deluxe(regex_t* reg, const UChar* pattern, const UChar* pattern_e
}
return 0;
}
+#endif
diff --git a/ext/mbstring/oniguruma/reggnu.c b/ext/mbstring/oniguruma/reggnu.c
index 70e8582ff..248957c9d 100644
--- a/ext/mbstring/oniguruma/reggnu.c
+++ b/ext/mbstring/oniguruma/reggnu.c
@@ -2,7 +2,7 @@
reggnu.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -93,6 +93,7 @@ re_compile_pattern(const char* pattern, int size, regex_t* reg, char* ebuf)
return r;
}
+#ifdef USE_RECOMPILE_API
extern int
re_recompile_pattern(const char* pattern, int size, regex_t* reg, char* ebuf)
{
@@ -113,6 +114,7 @@ re_recompile_pattern(const char* pattern, int size, regex_t* reg, char* ebuf)
}
return r;
}
+#endif
extern void
re_free_pattern(regex_t* reg)
@@ -151,16 +153,16 @@ re_mbcinit(int mb_code)
OnigEncoding enc;
switch (mb_code) {
- case MBCTYPE_ASCII:
+ case RE_MBCTYPE_ASCII:
enc = ONIG_ENCODING_ASCII;
break;
- case MBCTYPE_EUC:
+ case RE_MBCTYPE_EUC:
enc = ONIG_ENCODING_EUC_JP;
break;
- case MBCTYPE_SJIS:
+ case RE_MBCTYPE_SJIS:
enc = ONIG_ENCODING_SJIS;
break;
- case MBCTYPE_UTF8:
+ case RE_MBCTYPE_UTF8:
enc = ONIG_ENCODING_UTF8;
break;
default:
diff --git a/ext/mbstring/oniguruma/regint.h b/ext/mbstring/oniguruma/regint.h
index 2bd514b7c..c06bf5763 100644
--- a/ext/mbstring/oniguruma/regint.h
+++ b/ext/mbstring/oniguruma/regint.h
@@ -4,7 +4,7 @@
regint.h - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -62,6 +62,12 @@
#define USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK /* /(?:()|())*\2/ */
#define USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE /* /\n$/ =~ "\n" */
#define USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR
+/* #define USE_RECOMPILE_API */
+/* treat \r\n as line terminator.
+ !!! NO SUPPORT !!!
+ use this configuration on your own responsibility */
+/* #define USE_CRNL_AS_LINE_TERMINATOR */
+
/* internal config */
#define USE_RECYCLE_NODE
#define USE_OP_PUSH_OR_JUMP_EXACT
@@ -75,10 +81,12 @@
/* interface to external system */
#ifdef NOT_RUBY /* given from Makefile */
#include "config.h"
+#define USE_BACKREF_AT_LEVEL
#define USE_CAPTURE_HISTORY
#define USE_VARIABLE_META_CHARS
#define USE_WORD_BEGIN_END /* "\<": word-begin, "\>": word-end */
#define USE_POSIX_REGION_OPTION /* needed for POSIX API support */
+/* #define USE_COMBINATION_EXPLOSION_CHECK */ /* (X*)* */
/* #define USE_MULTI_THREAD_SYSTEM */
#define THREAD_ATOMIC_START /* depend on thread system */
#define THREAD_ATOMIC_END /* depend on thread system */
@@ -93,7 +101,9 @@
#include "version.h"
#include "rubysig.h" /* for DEFER_INTS, ENABLE_INTS */
+#define USE_COMBINATION_EXPLOSION_CHECK /* (X*)* */
#define USE_MULTI_THREAD_SYSTEM
+
#define THREAD_ATOMIC_START DEFER_INTS
#define THREAD_ATOMIC_END ENABLE_INTS
#define THREAD_PASS rb_thread_schedule()
@@ -105,11 +115,14 @@
}\
} while (0)
-#define DEFAULT_WARN_FUNCTION rb_warn
-#define DEFAULT_VERB_WARN_FUNCTION rb_warning
+#define DEFAULT_WARN_FUNCTION onig_rb_warn
+#define DEFAULT_VERB_WARN_FUNCTION onig_rb_warning
#endif /* else NOT_RUBY */
+#define STATE_CHECK_STRING_THRESHOLD_LEN 7
+#define STATE_CHECK_BUFF_MAX_SIZE 0x08000000
+
#define THREAD_PASS_LIMIT_COUNT 8
#define xmemset memset
#define xmemcpy memcpy
@@ -124,13 +137,26 @@
#endif
-#ifdef USE_MULTI_THREAD_SYSTEM
-#define ONIG_STATE_INC(reg) (reg)->state++
-#define ONIG_STATE_DEC(reg) (reg)->state--
+#if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM)
+#define ONIG_STATE_INC(reg) (reg)->state++
+#define ONIG_STATE_DEC(reg) (reg)->state--
+
+#define ONIG_STATE_INC_THREAD(reg) do {\
+ THREAD_ATOMIC_START;\
+ (reg)->state++;\
+ THREAD_ATOMIC_END;\
+} while(0)
+#define ONIG_STATE_DEC_THREAD(reg) do {\
+ THREAD_ATOMIC_START;\
+ (reg)->state--;\
+ THREAD_ATOMIC_END;\
+} while(0)
#else
-#define ONIG_STATE_INC(reg) /* Nothing */
-#define ONIG_STATE_DEC(reg) /* Nothing */
-#endif /* USE_MULTI_THREAD_SYSTEM */
+#define ONIG_STATE_INC(reg) /* Nothing */
+#define ONIG_STATE_DEC(reg) /* Nothing */
+#define ONIG_STATE_INC_THREAD(reg) /* Nothing */
+#define ONIG_STATE_DEC_THREAD(reg) /* Nothing */
+#endif /* USE_RECOMPILE_API && USE_MULTI_THREAD_SYSTEM */
#define onig_st_is_member st_is_member
@@ -518,7 +544,7 @@ typedef struct _BBuf {
#define ANCHOR_LOOK_BEHIND_NOT (1<<13)
#define ANCHOR_ANYCHAR_STAR (1<<14) /* ".*" optimize info */
-#define ANCHOR_ANYCHAR_STAR_PL (1<<15) /* ".*" optimize info (posix-line) */
+#define ANCHOR_ANYCHAR_STAR_ML (1<<15) /* ".*" optimize info (multi-line) */
/* operation code */
enum OpCode {
@@ -574,11 +600,11 @@ enum OpCode {
OP_BACKREF1,
OP_BACKREF2,
- OP_BACKREF3,
OP_BACKREFN,
OP_BACKREFN_IC,
OP_BACKREF_MULTI,
OP_BACKREF_MULTI_IC,
+ OP_BACKREF_AT_LEVEL, /* \k<xxx+n>, \k<xxx-n> */
OP_MEMORY_START,
OP_MEMORY_START_PUSH, /* push back-tracker to stack */
@@ -618,34 +644,33 @@ enum OpCode {
OP_FAIL_LOOK_BEHIND_NOT, /* (?<!...) end */
OP_CALL, /* \g<name> */
- OP_RETURN
-};
+ OP_RETURN,
-/* arguments type */
-#define ARG_SPECIAL -1
-#define ARG_NON 0
-#define ARG_RELADDR 1
-#define ARG_ABSADDR 2
-#define ARG_LENGTH 3
-#define ARG_MEMNUM 4
-#define ARG_OPTION 5
+ OP_STATE_CHECK_PUSH, /* combination explosion check and push */
+ OP_STATE_CHECK_PUSH_OR_JUMP, /* check ok -> push, else jump */
+ OP_STATE_CHECK, /* check only */
+ OP_STATE_CHECK_ANYCHAR_STAR,
+ OP_STATE_CHECK_ANYCHAR_ML_STAR
+};
typedef int RelAddrType;
typedef int AbsAddrType;
typedef int LengthType;
typedef int RepeatNumType;
typedef short int MemNumType;
+typedef short int StateCheckNumType;
typedef void* PointerType;
-#define SIZE_OPCODE 1
-#define SIZE_RELADDR sizeof(RelAddrType)
-#define SIZE_ABSADDR sizeof(AbsAddrType)
-#define SIZE_LENGTH sizeof(LengthType)
-#define SIZE_MEMNUM sizeof(MemNumType)
-#define SIZE_REPEATNUM sizeof(RepeatNumType)
-#define SIZE_OPTION sizeof(OnigOptionType)
-#define SIZE_CODE_POINT sizeof(OnigCodePoint)
-#define SIZE_POINTER sizeof(PointerType)
+#define SIZE_OPCODE 1
+#define SIZE_RELADDR sizeof(RelAddrType)
+#define SIZE_ABSADDR sizeof(AbsAddrType)
+#define SIZE_LENGTH sizeof(LengthType)
+#define SIZE_MEMNUM sizeof(MemNumType)
+#define SIZE_STATE_CHECK_NUM sizeof(StateCheckNumType)
+#define SIZE_REPEATNUM sizeof(RepeatNumType)
+#define SIZE_OPTION sizeof(OnigOptionType)
+#define SIZE_CODE_POINT sizeof(OnigCodePoint)
+#define SIZE_POINTER sizeof(PointerType)
#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
@@ -671,6 +696,7 @@ typedef void* PointerType;
#define GET_REPEATNUM_INC(num,p) PLATFORM_GET_INC(num, p, RepeatNumType)
#define GET_OPTION_INC(option,p) PLATFORM_GET_INC(option, p, OnigOptionType)
#define GET_POINTER_INC(ptr,p) PLATFORM_GET_INC(ptr, p, PointerType)
+#define GET_STATE_CHECK_NUM_INC(num,p) PLATFORM_GET_INC(num, p, StateCheckNumType)
/* code point's address must be aligned address. */
#define GET_CODE_POINT(code,p) code = *((OnigCodePoint* )(p))
@@ -713,6 +739,12 @@ typedef void* PointerType;
#define SIZE_OP_CALL (SIZE_OPCODE + SIZE_ABSADDR)
#define SIZE_OP_RETURN SIZE_OPCODE
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+#define SIZE_OP_STATE_CHECK (SIZE_OPCODE + SIZE_STATE_CHECK_NUM)
+#define SIZE_OP_STATE_CHECK_PUSH (SIZE_OPCODE + SIZE_STATE_CHECK_NUM + SIZE_RELADDR)
+#define SIZE_OP_STATE_CHECK_PUSH_OR_JUMP (SIZE_OPCODE + SIZE_STATE_CHECK_NUM + SIZE_RELADDR)
+#define SIZE_OP_STATE_CHECK_ANYCHAR_STAR (SIZE_OPCODE + SIZE_STATE_CHECK_NUM)
+#endif
#define MC_ESC(enc) (enc)->meta_char_table.esc
#define MC_ANYCHAR(enc) (enc)->meta_char_table.anychar
@@ -721,6 +753,11 @@ typedef void* PointerType;
#define MC_ONE_OR_MORE_TIME(enc) (enc)->meta_char_table.one_or_more_time
#define MC_ANYCHAR_ANYTIME(enc) (enc)->meta_char_table.anychar_anytime
+#define IS_MC_ESC_CODE(code, enc, syn) \
+ ((code) == MC_ESC(enc) && \
+ !IS_SYNTAX_OP2((syn), ONIG_SYN_OP2_INEFFECTIVE_ESCAPE))
+
+
#define SYN_POSIX_COMMON_OP \
( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_POSIX_BRACKET | \
ONIG_SYN_OP_DECIMAL_BACKREF | \
@@ -781,13 +818,14 @@ extern void onig_print_statistics P_((FILE* f));
#endif
#endif
-extern char* onig_error_code_to_format P_((int code));
-extern void onig_snprintf_with_pattern PV_((char buf[], int bufsize, OnigEncoding enc, char* pat, char* pat_end, char *fmt, ...));
+extern UChar* onig_error_code_to_format P_((int code));
+extern void onig_snprintf_with_pattern PV_((UChar buf[], int bufsize, OnigEncoding enc, UChar* pat, UChar* pat_end, const UChar *fmt, ...));
extern int onig_bbuf_init P_((BBuf* buf, int size));
extern int onig_alloc_init P_((regex_t** reg, OnigOptionType option, OnigAmbigType ambig_flag, OnigEncoding enc, OnigSyntaxType* syntax));
extern int onig_compile P_((regex_t* reg, const UChar* pattern, const UChar* pattern_end, OnigErrorInfo* einfo));
extern void onig_chain_reduce P_((regex_t* reg));
extern void onig_chain_link_add P_((regex_t* to, regex_t* add));
extern void onig_transfer P_((regex_t* to, regex_t* from));
+extern int onig_is_code_in_cc P_((OnigEncoding enc, OnigCodePoint code, CClassNode* cc));
#endif /* REGINT_H */
diff --git a/ext/mbstring/oniguruma/regparse.c b/ext/mbstring/oniguruma/regparse.c
index 58e122f48..407b73fc4 100644
--- a/ext/mbstring/oniguruma/regparse.c
+++ b/ext/mbstring/oniguruma/regparse.c
@@ -2,7 +2,7 @@
regparse.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -60,6 +60,20 @@ OnigSyntaxType* OnigDefaultSyntax = ONIG_SYNTAX_RUBY;
extern void onig_null_warn(const char* s) { }
+#ifdef RUBY_PLATFORM
+extern void
+onig_rb_warn(const char* s)
+{
+ rb_warn(s);
+}
+
+extern void
+onig_rb_warning(const char* s)
+{
+ rb_warning(s);
+}
+#endif
+
#ifdef DEFAULT_WARN_FUNCTION
static OnigWarnFunc onig_warn = (OnigWarnFunc )DEFAULT_WARN_FUNCTION;
#else
@@ -305,6 +319,88 @@ typedef struct {
#include "st.h"
+typedef struct {
+ unsigned char* s;
+ unsigned char* end;
+} st_strend_key;
+
+static int strend_cmp(st_strend_key*, st_strend_key*);
+static int strend_hash(st_strend_key*);
+
+static struct st_hash_type type_strend_hash = {
+ strend_cmp,
+ strend_hash,
+};
+
+static st_table*
+onig_st_init_strend_table_with_size(int size)
+{
+ return onig_st_init_table_with_size(&type_strend_hash, size);
+}
+
+static int
+onig_st_lookup_strend(st_table *table, const UChar* str_key, const UChar* end_key, st_data_t *value)
+{
+ st_strend_key key;
+
+ key.s = (unsigned char* )str_key;
+ key.end = (unsigned char* )end_key;
+
+ return onig_st_lookup(table, (st_data_t )(&key), value);
+}
+
+static int
+onig_st_insert_strend(st_table *table, const UChar* str_key, const UChar* end_key, st_data_t value)
+{
+ st_strend_key* key;
+ int result;
+
+ key = (st_strend_key* )xmalloc(sizeof(st_strend_key));
+ key->s = (unsigned char* )str_key;
+ key->end = (unsigned char* )end_key;
+ result = onig_st_insert(table, (st_data_t )key, value);
+ if (result) {
+ xfree(key);
+ }
+ return result;
+}
+
+static int
+strend_cmp(st_strend_key* x, st_strend_key* y)
+{
+ unsigned char *p, *q;
+ int c;
+
+ if ((x->end - x->s) != (y->end - y->s))
+ return 1;
+
+ p = x->s;
+ q = y->s;
+ while (p < x->end) {
+ c = (int )*p - (int )*q;
+ if (c != 0) return c;
+
+ p++; q++;
+ }
+
+ return 0;
+}
+
+static int
+strend_hash(st_strend_key* x)
+{
+ int val;
+ unsigned char *p;
+
+ val = 0;
+ p = x->s;
+ while (p < x->end) {
+ val = val * 997 + (int )*p++;
+ }
+
+ return val + (val >> 5);
+}
+
typedef st_table NameTable;
typedef st_data_t HashDataType; /* 1.6 st.h doesn't define st_data_t type */
@@ -350,8 +446,10 @@ onig_print_names(FILE* fp, regex_t* reg)
static int
i_free_name_entry(UChar* key, NameEntry* e, void* arg)
{
- xfree(e->name); /* == key */
+ xfree(e->name);
if (IS_NOT_NULL(e->back_refs)) xfree(e->back_refs);
+ xfree(key);
+ xfree(e);
return ST_DELETE;
}
@@ -801,6 +899,23 @@ onig_number_of_names(regex_t* reg)
}
#endif /* else USE_NAMED_GROUP */
+extern int
+onig_noname_group_capture_is_active(regex_t* reg)
+{
+ if (ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_DONT_CAPTURE_GROUP))
+ return 0;
+
+#ifdef USE_NAMED_GROUP
+ if (onig_number_of_names(reg) > 0 &&
+ IS_SYNTAX_BV(reg->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
+ !ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) {
+ return 0;
+ }
+#endif
+
+ return 1;
+}
+
#define INIT_SCANENV_MEMNODES_ALLOC_SIZE 16
@@ -825,6 +940,13 @@ scan_env_clear(ScanEnv* env)
for (i = 0; i < SCANENV_MEMNODES_SIZE; i++)
env->mem_nodes_static[i] = NULL_NODE;
+
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+ env->num_comb_exp_check = 0;
+ env->comb_exp_max_regnum = 0;
+ env->curr_max_regnum = 0;
+ env->has_recursion = 0;
+#endif
}
static int
@@ -970,13 +1092,13 @@ onig_free_node_list()
{
FreeNode* n;
- THREAD_ATOMIC_START;
- while (FreeNodeList) {
+ /* THREAD_ATOMIC_START; */
+ while (IS_NOT_NULL(FreeNodeList)) {
n = FreeNodeList;
FreeNodeList = FreeNodeList->next;
xfree(n);
}
- THREAD_ATOMIC_END;
+ /* THREAD_ATOMIC_END; */
return 0;
}
#endif
@@ -987,13 +1109,14 @@ node_new()
Node* node;
#ifdef USE_RECYCLE_NODE
+ THREAD_ATOMIC_START;
if (IS_NOT_NULL(FreeNodeList)) {
- THREAD_ATOMIC_START;
node = (Node* )FreeNodeList;
FreeNodeList = FreeNodeList->next;
THREAD_ATOMIC_END;
return node;
}
+ THREAD_ATOMIC_END;
#endif
node = (Node* )xmalloc(sizeof(Node));
@@ -1020,9 +1143,9 @@ node_new_cclass()
return node;
}
-extern Node*
+static Node*
node_new_cclass_by_codepoint_range(int not,
- OnigCodePoint sbr[], OnigCodePoint mbr[])
+ const OnigCodePoint sbr[], const OnigCodePoint mbr[])
{
CClassNode* cc;
int n, i, j;
@@ -1128,7 +1251,11 @@ onig_node_new_anchor(int type)
}
static Node*
-node_new_backref(int back_num, int* backrefs, int by_name, ScanEnv* env)
+node_new_backref(int back_num, int* backrefs, int by_name,
+#ifdef USE_BACKREF_AT_LEVEL
+ int exist_level, int nest_level,
+#endif
+ ScanEnv* env)
{
int i;
Node* node = node_new();
@@ -1141,6 +1268,13 @@ node_new_backref(int back_num, int* backrefs, int by_name, ScanEnv* env)
if (by_name != 0)
NBACKREF(node).state |= NST_NAME_REF;
+#ifdef USE_BACKREF_AT_LEVEL
+ if (exist_level != 0) {
+ NBACKREF(node).state |= NST_NEST_LEVEL;
+ NBACKREF(node).nest_level = nest_level;
+ }
+#endif
+
for (i = 0; i < back_num; i++) {
if (backrefs[i] <= env->num_mem &&
IS_NULL(SCANENV_MEM_NODES(env)[backrefs[i]])) {
@@ -1194,11 +1328,17 @@ node_new_qualifier(int lower, int upper, int by_number)
NQUALIFIER(node).lower = lower;
NQUALIFIER(node).upper = upper;
NQUALIFIER(node).greedy = 1;
- NQUALIFIER(node).by_number = by_number;
NQUALIFIER(node).target_empty_info = NQ_TARGET_ISNOT_EMPTY;
NQUALIFIER(node).head_exact = NULL_NODE;
NQUALIFIER(node).next_head_exact = NULL_NODE;
NQUALIFIER(node).is_refered = 0;
+ if (by_number != 0)
+ NQUALIFIER(node).state |= NST_BY_NUMBER;
+
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+ NQUALIFIER(node).comb_exp_check_num = 0;
+#endif
+
return node;
}
@@ -2013,7 +2153,7 @@ enum ReduceType {
RQ_AQ, /* to '*?' */
RQ_QQ, /* to '??' */
RQ_P_QQ, /* to '+)??' */
- RQ_PQ_Q, /* to '+?)?' */
+ RQ_PQ_Q /* to '+?)?' */
};
static enum ReduceType ReduceTypeTable[6][6] = {
@@ -2125,6 +2265,10 @@ typedef struct {
int ref1;
int* refs;
int by_name;
+#ifdef USE_BACKREF_AT_LEVEL
+ int exist_level;
+ int level; /* \k<name+n> */
+#endif
} backref;
struct {
UChar* name;
@@ -2274,15 +2418,17 @@ fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env)
control:
if (PEND) return ONIGERR_END_PATTERN_AT_CONTROL;
PFETCH(c);
- if (c == MC_ESC(enc)) {
- v = fetch_escaped_value(&p, end, env);
- if (v < 0) return v;
- c = (OnigCodePoint )v;
- }
- else if (c == '?')
+ if (c == '?') {
c = 0177;
- else
+ }
+ else {
+ if (c == MC_ESC(enc)) {
+ v = fetch_escaped_value(&p, end, env);
+ if (v < 0) return v;
+ c = (OnigCodePoint )v;
+ }
c &= 0x9f;
+ }
break;
}
/* fall through */
@@ -2302,6 +2448,89 @@ fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env)
static int fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env);
#ifdef USE_NAMED_GROUP
+#ifdef USE_BACKREF_AT_LEVEL
+/*
+ \k<name+n>, \k<name-n>
+*/
+static int
+fetch_name_with_level(UChar** src, UChar* end, UChar** rname_end
+ , ScanEnv* env, int* level)
+{
+ int r, exist_level = 0;
+ OnigCodePoint c = 0;
+ OnigCodePoint first_code;
+ OnigEncoding enc = env->enc;
+ UChar *name_end;
+ UChar *p = *src;
+ PFETCH_READY;
+
+ name_end = end;
+ r = 0;
+ if (PEND) {
+ return ONIGERR_EMPTY_GROUP_NAME;
+ }
+ else {
+ PFETCH(c);
+ first_code = c;
+ if (c == '>')
+ return ONIGERR_EMPTY_GROUP_NAME;
+
+ if (!ONIGENC_IS_CODE_WORD(enc, c)) {
+ r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
+ }
+ }
+
+ while (!PEND) {
+ name_end = p;
+ PFETCH(c);
+ if (c == '>' || c == ')' || c == '+' || c == '-') break;
+
+ if (!ONIGENC_IS_CODE_WORD(enc, c)) {
+ r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
+ }
+ }
+
+ if (c != '>') {
+ if (c == '+' || c == '-') {
+ int num;
+ int flag = (c == '-' ? -1 : 1);
+
+ PFETCH(c);
+ if (! ONIGENC_IS_CODE_DIGIT(enc, c)) goto err;
+ PUNFETCH;
+ num = onig_scan_unsigned_number(&p, end, enc);
+ if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
+ *level = (num * flag);
+ exist_level = 1;
+
+ PFETCH(c);
+ if (c == '>')
+ goto first_check;
+ }
+
+ err:
+ r = ONIGERR_INVALID_GROUP_NAME;
+ name_end = end;
+ }
+ else {
+ first_check:
+ if (ONIGENC_IS_CODE_ASCII(first_code) &&
+ ONIGENC_IS_CODE_UPPER(enc, first_code))
+ r = ONIGERR_INVALID_GROUP_NAME;
+ }
+
+ if (r == 0) {
+ *rname_end = name_end;
+ *src = p;
+ return (exist_level ? 1 : 0);
+ }
+ else {
+ onig_scan_env_set_error_string(env, r, *src, name_end);
+ return r;
+ }
+}
+#endif /* USE_BACKREF_AT_LEVEL */
+
/*
def: 0 -> define name (don't allow number name)
1 -> reference name (allow number name)
@@ -2428,11 +2657,11 @@ CC_ESC_WARN(ScanEnv* env, UChar *c)
if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_CC_OP_NOT_ESCAPED) &&
IS_SYNTAX_BV(env->syntax, ONIG_SYN_BACKSLASH_ESCAPE_IN_CC)) {
- char buf[WARN_BUFSIZE];
+ UChar buf[WARN_BUFSIZE];
onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,
env->pattern, env->pattern_end,
- "character class has '%s' without escape", c);
- (*onig_warn)(buf);
+ (UChar* )"character class has '%s' without escape", c);
+ (*onig_warn)((char* )buf);
}
}
@@ -2442,11 +2671,11 @@ CCEND_ESC_WARN(ScanEnv* env, UChar* c)
if (onig_warn == onig_null_warn) return ;
if (IS_SYNTAX_BV((env)->syntax, ONIG_SYN_WARN_CC_OP_NOT_ESCAPED)) {
- char buf[WARN_BUFSIZE];
+ UChar buf[WARN_BUFSIZE];
onig_snprintf_with_pattern(buf, WARN_BUFSIZE, (env)->enc,
(env)->pattern, (env)->pattern_end,
- "regular expression has '%s' without escape", c);
- (*onig_warn)(buf);
+ (UChar* )"regular expression has '%s' without escape", c);
+ (*onig_warn)((char* )buf);
}
}
@@ -2537,6 +2766,8 @@ fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
tok->type = TK_CHAR;
tok->base = 0;
tok->u.c = c;
+ tok->escaped = 0;
+
if (c == ']') {
tok->type = TK_CC_CLOSE;
}
@@ -2708,7 +2939,7 @@ fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
tok->type = TK_CC_CC_OPEN;
}
else {
- CC_ESC_WARN(env, "[");
+ CC_ESC_WARN(env, (UChar* )"[");
}
}
}
@@ -2747,7 +2978,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
tok->backp = p;
PFETCH(c);
- if (c == MC_ESC(enc)) {
+ if (IS_MC_ESC_CODE(c, enc, syn)) {
if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;
tok->backp = p;
@@ -3012,6 +3243,9 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
tok->u.backref.num = 1;
tok->u.backref.ref1 = num;
tok->u.backref.by_name = 0;
+#ifdef USE_BACKREF_AT_LEVEL
+ tok->u.backref.exist_level = 0;
+#endif
break;
}
@@ -3050,8 +3284,17 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
int* backs;
prev = p;
+
+#ifdef USE_BACKREF_AT_LEVEL
+ name_end = NULL_UCHARP; /* no need. escape gcc warning. */
+ r = fetch_name_with_level(&p, end, &name_end, env, &tok->u.backref.level);
+ if (r == 1) tok->u.backref.exist_level = 1;
+ else tok->u.backref.exist_level = 0;
+#else
r = fetch_name(&p, end, &name_end, env, 1);
+#endif
if (r < 0) return r;
+
num = onig_name_to_group_numbers(env->reg, prev, name_end, &backs);
if (num <= 0) {
onig_scan_env_set_error_string(env,
@@ -3170,13 +3413,17 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
switch (c) {
case '.':
if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_DOT_ANYCHAR)) break;
+#ifdef USE_VARIABLE_META_CHARS
any_char:
+#endif
tok->type = TK_ANYCHAR;
break;
case '*':
if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ASTERISK_ZERO_INF)) break;
+#ifdef USE_VARIABLE_META_CHARS
anytime:
+#endif
tok->type = TK_OP_REPEAT;
tok->u.repeat.lower = 0;
tok->u.repeat.upper = REPEAT_INFINITE;
@@ -3185,7 +3432,9 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
case '+':
if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_PLUS_ONE_INF)) break;
+#ifdef USE_VARIABLE_META_CHARS
one_or_more_time:
+#endif
tok->type = TK_OP_REPEAT;
tok->u.repeat.lower = 1;
tok->u.repeat.upper = REPEAT_INFINITE;
@@ -3194,7 +3443,9 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
case '?':
if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_QMARK_ZERO_ONE)) break;
+#ifdef USE_VARIABLE_META_CHARS
zero_or_one_time:
+#endif
tok->type = TK_OP_REPEAT;
tok->u.repeat.lower = 0;
tok->u.repeat.upper = 1;
@@ -3271,7 +3522,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
case ']':
if (*src > env->pattern) /* /].../ is allowed. */
- CCEND_ESC_WARN(env, "]");
+ CCEND_ESC_WARN(env, (UChar* )"]");
break;
case '#':
@@ -3297,14 +3548,16 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
}
}
+#ifdef USE_VARIABLE_META_CHARS
out:
+#endif
*src = p;
return tok->type;
}
static int
add_ctype_to_cc_by_range(CClassNode* cc, int ctype, int not, OnigEncoding enc,
- OnigCodePoint sbr[], OnigCodePoint mbr[])
+ const OnigCodePoint sbr[], const OnigCodePoint mbr[])
{
int i, r;
OnigCodePoint j;
@@ -3368,7 +3621,7 @@ static int
add_ctype_to_cc(CClassNode* cc, int ctype, int not, ScanEnv* env)
{
int c, r;
- OnigCodePoint *sbr, *mbr;
+ const OnigCodePoint *sbr, *mbr;
OnigEncoding enc = env->enc;
r = ONIGENC_GET_CTYPE_CODE_RANGE(enc, ctype, &sbr, &mbr);
@@ -3506,19 +3759,19 @@ parse_posix_bracket(CClassNode* cc, UChar** src, UChar* end, ScanEnv* env)
#define POSIX_BRACKET_NAME_MAX_LEN 6
static PosixBracketEntryType PBS[] = {
- { "alnum", ONIGENC_CTYPE_ALNUM, 5 },
- { "alpha", ONIGENC_CTYPE_ALPHA, 5 },
- { "blank", ONIGENC_CTYPE_BLANK, 5 },
- { "cntrl", ONIGENC_CTYPE_CNTRL, 5 },
- { "digit", ONIGENC_CTYPE_DIGIT, 5 },
- { "graph", ONIGENC_CTYPE_GRAPH, 5 },
- { "lower", ONIGENC_CTYPE_LOWER, 5 },
- { "print", ONIGENC_CTYPE_PRINT, 5 },
- { "punct", ONIGENC_CTYPE_PUNCT, 5 },
- { "space", ONIGENC_CTYPE_SPACE, 5 },
- { "upper", ONIGENC_CTYPE_UPPER, 5 },
- { "xdigit", ONIGENC_CTYPE_XDIGIT, 6 },
- { "ascii", ONIGENC_CTYPE_ASCII, 5 }, /* I don't know origin. Perl? */
+ { (UChar* )"alnum", ONIGENC_CTYPE_ALNUM, 5 },
+ { (UChar* )"alpha", ONIGENC_CTYPE_ALPHA, 5 },
+ { (UChar* )"blank", ONIGENC_CTYPE_BLANK, 5 },
+ { (UChar* )"cntrl", ONIGENC_CTYPE_CNTRL, 5 },
+ { (UChar* )"digit", ONIGENC_CTYPE_DIGIT, 5 },
+ { (UChar* )"graph", ONIGENC_CTYPE_GRAPH, 5 },
+ { (UChar* )"lower", ONIGENC_CTYPE_LOWER, 5 },
+ { (UChar* )"print", ONIGENC_CTYPE_PRINT, 5 },
+ { (UChar* )"punct", ONIGENC_CTYPE_PUNCT, 5 },
+ { (UChar* )"space", ONIGENC_CTYPE_SPACE, 5 },
+ { (UChar* )"upper", ONIGENC_CTYPE_UPPER, 5 },
+ { (UChar* )"xdigit", ONIGENC_CTYPE_XDIGIT, 6 },
+ { (UChar* )"ascii", ONIGENC_CTYPE_ASCII, 5 },
{ (UChar* )NULL, -1, 0 }
};
@@ -3542,7 +3795,7 @@ parse_posix_bracket(CClassNode* cc, UChar** src, UChar* end, ScanEnv* env)
for (pb = PBS; IS_NOT_NULL(pb->name); pb++) {
if (onigenc_with_ascii_strncmp(enc, p, end, pb->name, pb->len) == 0) {
p = (UChar* )onigenc_step(enc, p, end, pb->len);
- if (onigenc_with_ascii_strncmp(enc, p, end, ":]", 2) != 0)
+ if (onigenc_with_ascii_strncmp(enc, p, end, (UChar* )":]", 2) != 0)
return ONIGERR_INVALID_POSIX_BRACKET_TYPE;
r = add_ctype_to_cc(cc, pb->ctype, not, env);
@@ -3577,19 +3830,19 @@ static int
property_name_to_ctype(UChar* p, UChar* end, OnigEncoding enc)
{
static PosixBracketEntryType PBS[] = {
- { "Alnum", ONIGENC_CTYPE_ALNUM, 5 },
- { "Alpha", ONIGENC_CTYPE_ALPHA, 5 },
- { "Blank", ONIGENC_CTYPE_BLANK, 5 },
- { "Cntrl", ONIGENC_CTYPE_CNTRL, 5 },
- { "Digit", ONIGENC_CTYPE_DIGIT, 5 },
- { "Graph", ONIGENC_CTYPE_GRAPH, 5 },
- { "Lower", ONIGENC_CTYPE_LOWER, 5 },
- { "Print", ONIGENC_CTYPE_PRINT, 5 },
- { "Punct", ONIGENC_CTYPE_PUNCT, 5 },
- { "Space", ONIGENC_CTYPE_SPACE, 5 },
- { "Upper", ONIGENC_CTYPE_UPPER, 5 },
- { "XDigit", ONIGENC_CTYPE_XDIGIT, 6 },
- { "ASCII", ONIGENC_CTYPE_ASCII, 5 },
+ { (UChar* )"Alnum", ONIGENC_CTYPE_ALNUM, 5 },
+ { (UChar* )"Alpha", ONIGENC_CTYPE_ALPHA, 5 },
+ { (UChar* )"Blank", ONIGENC_CTYPE_BLANK, 5 },
+ { (UChar* )"Cntrl", ONIGENC_CTYPE_CNTRL, 5 },
+ { (UChar* )"Digit", ONIGENC_CTYPE_DIGIT, 5 },
+ { (UChar* )"Graph", ONIGENC_CTYPE_GRAPH, 5 },
+ { (UChar* )"Lower", ONIGENC_CTYPE_LOWER, 5 },
+ { (UChar* )"Print", ONIGENC_CTYPE_PRINT, 5 },
+ { (UChar* )"Punct", ONIGENC_CTYPE_PUNCT, 5 },
+ { (UChar* )"Space", ONIGENC_CTYPE_SPACE, 5 },
+ { (UChar* )"Upper", ONIGENC_CTYPE_UPPER, 5 },
+ { (UChar* )"XDigit", ONIGENC_CTYPE_XDIGIT, 6 },
+ { (UChar* )"ASCII", ONIGENC_CTYPE_ASCII, 5 },
{ (UChar* )NULL, -1, 0 }
};
@@ -3839,7 +4092,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
*src, env->pattern_end, 1, env->enc))
return ONIGERR_EMPTY_CHAR_CLASS;
- CC_ESC_WARN(env, "]");
+ CC_ESC_WARN(env, (UChar* )"]");
r = tok->type = TK_CHAR; /* allow []...] */
}
@@ -3942,7 +4195,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
r = parse_posix_bracket(cc, &p, end, env);
if (r < 0) goto err;
if (r == 1) { /* is not POSIX bracket */
- CC_ESC_WARN(env, "[");
+ CC_ESC_WARN(env, (UChar* )"[");
p = tok->backp;
v = (OnigCodePoint )tok->u.c;
in_israw = 0;
@@ -3988,7 +4241,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
goto val_entry;
}
else if (r == TK_CC_AND) {
- CC_ESC_WARN(env, "-");
+ CC_ESC_WARN(env, (UChar* )"-");
goto range_end_val;
}
state = CCS_RANGE;
@@ -4003,12 +4256,12 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
fetched = 1;
/* [--x] or [a&&-x] is warned. */
if (r == TK_CC_RANGE || and_start != 0)
- CC_ESC_WARN(env, "-");
+ CC_ESC_WARN(env, (UChar* )"-");
goto val_entry;
}
else if (state == CCS_RANGE) {
- CC_ESC_WARN(env, "-");
+ CC_ESC_WARN(env, (UChar* )"-");
goto sb_char; /* [!--x] is allowed */
}
else { /* CCS_COMPLETE */
@@ -4017,12 +4270,12 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
fetched = 1;
if (r == TK_CC_CLOSE) goto range_end_val; /* allow [a-b-] */
else if (r == TK_CC_AND) {
- CC_ESC_WARN(env, "-");
+ CC_ESC_WARN(env, (UChar* )"-");
goto range_end_val;
}
if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC)) {
- CC_ESC_WARN(env, "-");
+ CC_ESC_WARN(env, (UChar* )"-");
goto sb_char; /* [0-9-a] is allowed as [0-9\-a] */
}
r = ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS;
@@ -4326,10 +4579,9 @@ parse_effect(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
}
}
else {
-#ifdef USE_NAMED_GROUP
if (ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_DONT_CAPTURE_GROUP))
goto group;
-#endif
+
*np = node_new_effect_memory(env->option, 0);
CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
num = scan_env_add_mem_entry(env);
@@ -4358,11 +4610,11 @@ parse_effect(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
return 0;
}
-static char* PopularQStr[] = {
+static const char* PopularQStr[] = {
"?", "*", "+", "??", "*?", "+?"
};
-static char* ReduceQStr[] = {
+static const char* ReduceQStr[] = {
"", "", "*", "*?", "??", "+ and ??", "+? and ?"
};
@@ -4394,15 +4646,13 @@ set_qualifier(Node* qnode, Node* target, int group, ScanEnv* env)
{ /* check redundant double repeat. */
/* verbose warn (?:.?)? etc... but not warn (.?)? etc... */
QualifierNode* qnt = &(NQUALIFIER(target));
+ int nestq_num = popular_qualifier_num(qn);
+ int targetq_num = popular_qualifier_num(qnt);
#ifdef USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR
- if (qn->by_number == 0 && qnt->by_number == 0 &&
+ if (!IS_QUALIFIER_BY_NUMBER(qn) && !IS_QUALIFIER_BY_NUMBER(qnt) &&
IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT)) {
- int nestq_num, targetq_num;
- char buf[WARN_BUFSIZE];
-
- nestq_num = popular_qualifier_num(qn);
- targetq_num = popular_qualifier_num(qnt);
+ UChar buf[WARN_BUFSIZE];
switch(ReduceTypeTable[targetq_num][nestq_num]) {
case RQ_ASIS:
@@ -4411,9 +4661,9 @@ set_qualifier(Node* qnode, Node* target, int group, ScanEnv* env)
case RQ_DEL:
if (onig_verb_warn != onig_null_warn) {
onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,
- env->pattern, env->pattern_end,
- "redundant nested repeat operator");
- (*onig_verb_warn)(buf);
+ env->pattern, env->pattern_end,
+ (UChar* )"redundant nested repeat operator");
+ (*onig_verb_warn)((char* )buf);
}
goto warn_exit;
break;
@@ -4422,10 +4672,10 @@ set_qualifier(Node* qnode, Node* target, int group, ScanEnv* env)
if (onig_verb_warn != onig_null_warn) {
onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,
env->pattern, env->pattern_end,
- "nested repeat operator %s and %s was replaced with '%s'",
+ (UChar* )"nested repeat operator %s and %s was replaced with '%s'",
PopularQStr[targetq_num], PopularQStr[nestq_num],
ReduceQStr[ReduceTypeTable[targetq_num][nestq_num]]);
- (*onig_verb_warn)(buf);
+ (*onig_verb_warn)((char* )buf);
}
goto warn_exit;
break;
@@ -4434,9 +4684,17 @@ set_qualifier(Node* qnode, Node* target, int group, ScanEnv* env)
warn_exit:
#endif
- if (popular_qualifier_num(qnt) >= 0 && popular_qualifier_num(qn) >= 0) {
- onig_reduce_nested_qualifier(qnode, target);
- goto q_exit;
+ if (targetq_num >= 0) {
+ if (nestq_num >= 0) {
+ onig_reduce_nested_qualifier(qnode, target);
+ goto q_exit;
+ }
+ else if (targetq_num == 1 || targetq_num == 2) { /* * or + */
+ /* (?:a*){n,m}, (?:a+){n,m} => (?:a*){n,n}, (?:a+){n,n} */
+ if (! IS_REPEAT_INFINITE(qn->upper) && qn->upper > 1 && qn->greedy) {
+ qn->upper = (qn->lower == 0 ? 1 : qn->lower);
+ }
+ }
}
}
break;
@@ -4457,8 +4715,8 @@ make_compound_alt_node_from_cc(OnigAmbigType ambig_flag, OnigEncoding enc,
int r, i, j, k, clen, len, ncode, n;
UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
Node **ptail, *snode = NULL_NODE;
- OnigCompAmbigCodes* ccs;
- OnigCompAmbigCodeItem* ci;
+ const OnigCompAmbigCodes* ccs;
+ const OnigCompAmbigCodeItem* ci;
OnigAmbigType amb;
n = 0;
@@ -4546,27 +4804,9 @@ static int type_cclass_hash(type_cclass_key* key)
return val + (val >> 5);
}
-static int type_cclass_key_free(st_data_t x)
-{
- xfree((void* )x);
- return 0;
-}
-
-static st_data_t type_cclass_key_clone(st_data_t x)
-{
- type_cclass_key* new_key;
- type_cclass_key* key = (type_cclass_key* )x;
-
- new_key = (type_cclass_key* )xmalloc(sizeof(type_cclass_key));
- *new_key = *key;
- return (st_data_t )new_key;
-}
-
static struct st_hash_type type_type_cclass_hash = {
type_cclass_cmp,
type_cclass_hash,
- type_cclass_key_free,
- type_cclass_key_clone
};
static st_table* OnigTypeCClassTable;
@@ -4580,6 +4820,8 @@ i_free_shared_class(type_cclass_key* key, Node* node, void* arg)
if (IS_NOT_NULL(cc->mbuf)) xfree(cc->mbuf);
xfree(node);
}
+
+ if (IS_NOT_NULL(key)) xfree(key);
return ST_DELETE;
}
@@ -4588,6 +4830,8 @@ onig_free_shared_cclass_table()
{
if (IS_NOT_NULL(OnigTypeCClassTable)) {
onig_st_foreach(OnigTypeCClassTable, i_free_shared_class, 0);
+ xfree(OnigTypeCClassTable);
+ OnigTypeCClassTable = NULL;
}
return 0;
@@ -4741,7 +4985,7 @@ parse_exp(Node** np, OnigToken* tok, int term,
int ctype, not;
#ifdef USE_SHARED_CCLASS_TABLE
- OnigCodePoint *sbr, *mbr;
+ const OnigCodePoint *sbr, *mbr;
ctype = parse_ctype_to_enc_ctype(tok->u.subtype, &not);
r = ONIGENC_GET_CTYPE_CODE_RANGE(env->enc, ctype, &sbr, &mbr);
@@ -4823,7 +5067,7 @@ parse_exp(Node** np, OnigToken* tok, int term,
if (IS_IGNORECASE(env->option)) {
int i, n, in_cc;
- OnigPairAmbigCodes* ccs;
+ const OnigPairAmbigCodes* ccs;
BitSetRef bs = cc->bs;
OnigAmbigType amb;
@@ -4892,8 +5136,13 @@ parse_exp(Node** np, OnigToken* tok, int term,
case TK_BACKREF:
len = tok->u.backref.num;
*np = node_new_backref(len,
- (len > 1 ? tok->u.backref.refs : &(tok->u.backref.ref1)),
- tok->u.backref.by_name, env);
+ (len > 1 ? tok->u.backref.refs : &(tok->u.backref.ref1)),
+ tok->u.backref.by_name,
+#ifdef USE_BACKREF_AT_LEVEL
+ tok->u.backref.exist_level,
+ tok->u.backref.level,
+#endif
+ env);
CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
break;
diff --git a/ext/mbstring/oniguruma/regparse.h b/ext/mbstring/oniguruma/regparse.h
index 1a4ac7dea..ca62dddf7 100644
--- a/ext/mbstring/oniguruma/regparse.h
+++ b/ext/mbstring/oniguruma/regparse.h
@@ -4,7 +4,7 @@
regparse.h - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -67,7 +67,7 @@
#define CTYPE_XDIGIT (1<<6)
#define CTYPE_NOT_XDIGIT (1<<7)
-#define ANCHOR_ANYCHAR_STAR_MASK (ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_PL)
+#define ANCHOR_ANYCHAR_STAR_MASK (ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML)
#define ANCHOR_END_BUF_MASK (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)
#define EFFECT_MEMORY (1<<0)
@@ -76,7 +76,7 @@
#define NODE_STR_MARGIN 16
#define NODE_STR_BUF_SIZE 24 /* sizeof(CClassNode) - sizeof(int)*4 */
-#define NODE_BACKREFS_SIZE 7
+#define NODE_BACKREFS_SIZE 6
#define NSTR_RAW (1<<0) /* by backslashed number */
#define NSTR_AMBIG (1<<1)
@@ -124,11 +124,13 @@ typedef struct {
int lower;
int upper;
int greedy;
- int by_number; /* {n,m} */
int target_empty_info;
struct _Node* head_exact;
struct _Node* next_head_exact;
int is_refered; /* include called node. don't eliminate even if {0} */
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+ int comb_exp_check_num; /* 1,2,3...: check, 0: no check */
+#endif
} QualifierNode;
/* status bits */
@@ -145,6 +147,8 @@ typedef struct {
#define NST_NAMED_GROUP (1<<10)
#define NST_NAME_REF (1<<11)
#define NST_IN_REPEAT (1<<12) /* STK_REPEAT is nested in stack. */
+#define NST_NEST_LEVEL (1<<13)
+#define NST_BY_NUMBER (1<<14) /* {n,m} */
#define SET_EFFECT_STATUS(node,f) (node)->u.effect.state |= (f)
#define CLEAR_EFFECT_STATUS(node,f) (node)->u.effect.state &= ~(f)
@@ -165,7 +169,9 @@ typedef struct {
#define IS_CALL_RECURSION(cn) (((cn)->state & NST_RECURSION) != 0)
#define IS_CALL_NAME_REF(cn) (((cn)->state & NST_NAME_REF) != 0)
#define IS_BACKREF_NAME_REF(bn) (((bn)->state & NST_NAME_REF) != 0)
+#define IS_BACKREF_NEST_LEVEL(bn) (((bn)->state & NST_NEST_LEVEL) != 0)
#define IS_QUALIFIER_IN_REPEAT(qn) (((qn)->state & NST_IN_REPEAT) != 0)
+#define IS_QUALIFIER_BY_NUMBER(qn) (((qn)->state & NST_BY_NUMBER) != 0)
typedef struct {
int state;
@@ -212,6 +218,7 @@ typedef struct {
int back_num;
int back_static[NODE_BACKREFS_SIZE];
int* back_dynamic;
+ int nest_level;
} BackrefNode;
typedef struct {
@@ -274,6 +281,12 @@ typedef struct {
int mem_alloc;
Node* mem_nodes_static[SCANENV_MEMNODES_SIZE];
Node** mem_nodes_dynamic;
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+ int num_comb_exp_check;
+ int comb_exp_max_regnum;
+ int curr_max_regnum;
+ int has_recursion;
+#endif
} ScanEnv;
@@ -290,7 +303,6 @@ typedef struct {
extern int onig_renumber_name_table P_((regex_t* reg, GroupNumRemap* map));
#endif
-extern int onig_is_code_in_cc P_((OnigEncoding enc, OnigCodePoint code, CClassNode* cc));
extern int onig_strncmp P_((const UChar* s1, const UChar* s2, int n));
extern void onig_scan_env_set_error_string P_((ScanEnv* env, int ecode, UChar* arg, UChar* arg_end));
extern int onig_scan_unsigned_number P_((UChar** src, const UChar* end, OnigEncoding enc));
diff --git a/ext/mbstring/oniguruma/regposix.c b/ext/mbstring/oniguruma/regposix.c
index 34cbeb9a4..a3bacf722 100644
--- a/ext/mbstring/oniguruma/regposix.c
+++ b/ext/mbstring/oniguruma/regposix.c
@@ -2,7 +2,7 @@
regposix.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -55,7 +55,7 @@ typedef struct {
static int
onig2posix_error_code(int code)
{
- static O2PERR o2p[] = {
+ static const O2PERR o2p[] = {
{ ONIG_MISMATCH, REG_NOMATCH },
{ ONIG_NO_SUPPORT_CONFIG, REG_EONIG_INTERNAL },
{ ONIGERR_MEMORY, REG_ESPACE },
@@ -192,7 +192,7 @@ regexec(regex_t* reg, const char* str, size_t nmatch,
ENC_STRING_LEN(ONIG_C(reg)->enc, str, len);
end = (UChar* )(str + len);
r = onig_search(ONIG_C(reg), (UChar* )str, end, (UChar* )str, end,
- (OnigRegion* )pmatch, options);
+ (OnigRegion* )pm, options);
if (r >= 0) {
r = 0; /* Match */
@@ -212,6 +212,11 @@ regexec(regex_t* reg, const char* str, size_t nmatch,
if (pm != pmatch && pm != NULL)
xfree(pm);
+#if 0
+ if (reg->re_nsub > nmatch - 1)
+ reg->re_nsub = (nmatch <= 1 ? 0 : nmatch - 1);
+#endif
+
return r;
}
diff --git a/ext/mbstring/oniguruma/regsyntax.c b/ext/mbstring/oniguruma/regsyntax.c
index a0f36b8c3..9114e39e6 100644
--- a/ext/mbstring/oniguruma/regsyntax.c
+++ b/ext/mbstring/oniguruma/regsyntax.c
@@ -2,7 +2,7 @@
regsyntax.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2004 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -29,6 +29,13 @@
#include "regint.h"
+OnigSyntaxType OnigSyntaxASIS = {
+ 0
+ , ONIG_SYN_OP2_INEFFECTIVE_ESCAPE
+ , 0
+ , ONIG_OPTION_NONE
+};
+
OnigSyntaxType OnigSyntaxPosixBasic = {
( SYN_POSIX_COMMON_OP | ONIG_SYN_OP_ESC_LPAREN_SUBEXP |
ONIG_SYN_OP_ESC_BRACE_INTERVAL )
@@ -63,7 +70,7 @@ OnigSyntaxType OnigSyntaxEmacs = {
OnigSyntaxType OnigSyntaxGrep = {
( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_BRACKET_CC | ONIG_SYN_OP_POSIX_BRACKET |
- ONIG_SYN_OP_BRACE_INTERVAL | ONIG_SYN_OP_ESC_LPAREN_SUBEXP |
+ ONIG_SYN_OP_ESC_BRACE_INTERVAL | ONIG_SYN_OP_ESC_LPAREN_SUBEXP |
ONIG_SYN_OP_ESC_VBAR_ALT |
ONIG_SYN_OP_ASTERISK_ZERO_INF | ONIG_SYN_OP_ESC_PLUS_ONE_INF |
ONIG_SYN_OP_ESC_QMARK_ZERO_ONE | ONIG_SYN_OP_LINE_ANCHOR |
@@ -110,6 +117,28 @@ OnigSyntaxType OnigSyntaxPerl = {
, ONIG_OPTION_SINGLELINE
};
+/* Perl + named group */
+OnigSyntaxType OnigSyntaxPerl_NG = {
+ (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |
+ ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 |
+ ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_CONTROL_CHARS |
+ ONIG_SYN_OP_ESC_C_CONTROL )
+ & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )
+ , ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE |
+ ONIG_SYN_OP2_QMARK_GROUP_EFFECT | ONIG_SYN_OP2_OPTION_PERL |
+ ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY |
+ ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT |
+ ONIG_SYN_OP2_CHAR_PROPERTY_PREFIX_IS |
+ ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP |
+ ONIG_SYN_OP2_ESC_K_NAMED_BACKREF |
+ ONIG_SYN_OP2_ESC_G_SUBEXP_CALL )
+ , ( SYN_GNU_REGEX_BV |
+ ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP |
+ ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME )
+ , ONIG_OPTION_SINGLELINE
+};
+
+
extern int
onig_set_default_syntax(OnigSyntaxType* syntax)
diff --git a/ext/mbstring/oniguruma/regversion.c b/ext/mbstring/oniguruma/regversion.c
index 5f15c10e6..5fad0cc18 100644
--- a/ext/mbstring/oniguruma/regversion.c
+++ b/ext/mbstring/oniguruma/regversion.c
@@ -2,7 +2,7 @@
regversion.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -47,7 +47,7 @@ onig_copyright(void)
{
static char s[58];
- sprintf(s, "Oniguruma %d.%d.%d : Copyright (C) 2002-2005 K.Kosako",
+ sprintf(s, "Oniguruma %d.%d.%d : Copyright (C) 2002-2006 K.Kosako",
ONIGURUMA_VERSION_MAJOR,
ONIGURUMA_VERSION_MINOR,
ONIGURUMA_VERSION_TEENY);
diff --git a/ext/mbstring/oniguruma/st.c b/ext/mbstring/oniguruma/st.c
index 65c2cc58b..2324da263 100644
--- a/ext/mbstring/oniguruma/st.c
+++ b/ext/mbstring/oniguruma/st.c
@@ -56,8 +56,6 @@ static int numhash(long);
static struct st_hash_type type_numhash = {
numcmp,
numhash,
- st_nothing_key_free,
- st_nothing_key_clone
};
/* extern int strcmp(const char *, const char *); */
@@ -65,20 +63,6 @@ static int strhash(const char *);
static struct st_hash_type type_strhash = {
strcmp,
strhash,
- st_nothing_key_free,
- st_nothing_key_clone
-};
-
-static int strend_cmp(st_strend_key*, st_strend_key*);
-static int strend_hash(st_strend_key*);
-static int strend_key_free(st_data_t key);
-static st_data_t strend_key_clone(st_data_t x);
-
-static struct st_hash_type type_strend_hash = {
- strend_cmp,
- strend_hash,
- strend_key_free,
- strend_key_clone
};
static void rehash(st_table *);
@@ -100,7 +84,7 @@ static void rehash(st_table *);
/*
Table of prime numbers 2^n+a, 2<=n<=30.
*/
-static long primes[] = {
+static const long primes[] = {
8 + 3,
16 + 3,
32 + 5,
@@ -228,13 +212,6 @@ st_init_strtable_with_size(size)
return st_init_table_with_size(&type_strhash, size);
}
-st_table*
-st_init_strend_table_with_size(size)
- int size;
-{
- return st_init_table_with_size(&type_strend_hash, size);
-}
-
void
st_free_table(table)
st_table *table;
@@ -246,7 +223,6 @@ st_free_table(table)
ptr = table->bins[i];
while (ptr != 0) {
next = ptr->next;
- table->type->key_free(ptr->key);
free(ptr);
ptr = next;
}
@@ -297,21 +273,6 @@ st_lookup(table, key, value)
}
}
-int
-st_lookup_strend(table, str_key, end_key, value)
- st_table *table;
- const unsigned char* str_key;
- const unsigned char* end_key;
- st_data_t *value;
-{
- st_strend_key key;
-
- key.s = (unsigned char* )str_key;
- key.end = (unsigned char* )end_key;
-
- return st_lookup(table, (st_data_t )(&key), value);
-}
-
#define ADD_DIRECT(table, key, value, hash_val, bin_pos)\
do {\
st_table_entry *entry;\
@@ -352,22 +313,6 @@ st_insert(table, key, value)
}
}
-int
-st_insert_strend(table, str_key, end_key, value)
- st_table *table;
- const unsigned char* str_key;
- const unsigned char* end_key;
- st_data_t value;
-{
- st_strend_key* key;
-
- key = alloc(st_strend_key);
- key->s = (unsigned char* )str_key;
- key->end = (unsigned char* )end_key;
-
- return st_insert(table, (st_data_t )key, value);
-}
-
void
st_add_direct(table, key, value)
st_table *table;
@@ -381,21 +326,6 @@ st_add_direct(table, key, value)
ADD_DIRECT(table, key, value, hash_val, bin_pos);
}
-void
-st_add_direct_strend(table, str_key, end_key, value)
- st_table *table;
- const unsigned char* str_key;
- const unsigned char* end_key;
- st_data_t value;
-{
- st_strend_key* key;
-
- key = alloc(st_strend_key);
- key->s = (unsigned char* )str_key;
- key->end = (unsigned char* )end_key;
- st_add_direct(table, (st_data_t )key, value);
-}
-
static void
rehash(table)
register st_table *table;
@@ -455,7 +385,6 @@ st_copy(old_table)
return 0;
}
*entry = *ptr;
- entry->key = old_table->type->key_clone(ptr->key);
entry->next = new_table->bins[i];
new_table->bins[i] = entry;
ptr = ptr->next;
@@ -556,7 +485,7 @@ st_cleanup_safe(table, never)
table->num_entries = num_entries;
}
-void
+int
st_foreach(table, func, arg)
st_table *table;
int (*func)();
@@ -569,7 +498,7 @@ st_foreach(table, func, arg)
for(i = 0; i < table->num_bins; i++) {
last = 0;
for(ptr = table->bins[i]; ptr != 0;) {
- retval = (*func)(ptr->key, ptr->record, arg, 0);
+ retval = (*func)(ptr->key, ptr->record, arg);
switch (retval) {
case ST_CHECK: /* check if hash is modified during iteration */
tmp = 0;
@@ -580,8 +509,7 @@ st_foreach(table, func, arg)
}
if (!tmp) {
/* call func with error notice */
- retval = (*func)(0, 0, arg, 1);
- return;
+ return 1;
}
/* fall through */
case ST_CONTINUE:
@@ -589,7 +517,7 @@ st_foreach(table, func, arg)
ptr = ptr->next;
break;
case ST_STOP:
- return;
+ return 0;
case ST_DELETE:
tmp = ptr;
if (last == 0) {
@@ -599,12 +527,12 @@ st_foreach(table, func, arg)
last->next = ptr->next;
}
ptr = ptr->next;
- table->type->key_free(tmp->key);
free(tmp);
table->num_entries--;
}
}
}
+ return 0;
}
static int
@@ -659,59 +587,3 @@ numhash(n)
{
return n;
}
-
-extern int
-st_nothing_key_free(st_data_t key) { return 0; }
-
-extern st_data_t
-st_nothing_key_clone(st_data_t x) { return x; }
-
-static int strend_cmp(st_strend_key* x, st_strend_key* y)
-{
- unsigned char *p, *q;
- int c;
-
- if ((x->end - x->s) != (y->end - y->s))
- return 1;
-
- p = x->s;
- q = y->s;
- while (p < x->end) {
- c = (int )*p - (int )*q;
- if (c != 0) return c;
-
- p++; q++;
- }
-
- return 0;
-}
-
-static int strend_hash(st_strend_key* x)
-{
- int val;
- unsigned char *p;
-
- val = 0;
- p = x->s;
- while (p < x->end) {
- val = val * 997 + (int )*p++;
- }
-
- return val + (val >> 5);
-}
-
-static int strend_key_free(st_data_t x)
-{
- xfree((void* )x);
- return 0;
-}
-
-static st_data_t strend_key_clone(st_data_t x)
-{
- st_strend_key* new_key;
- st_strend_key* key = (st_strend_key* )x;
-
- new_key = alloc(st_strend_key);
- *new_key = *key;
- return (st_data_t )new_key;
-}
diff --git a/ext/mbstring/oniguruma/st.h b/ext/mbstring/oniguruma/st.h
index c5cc4e625..da65e7fef 100644
--- a/ext/mbstring/oniguruma/st.h
+++ b/ext/mbstring/oniguruma/st.h
@@ -14,8 +14,6 @@ typedef struct st_table st_table;
struct st_hash_type {
int (*compare)();
int (*hash)();
- int (*key_free)();
- st_data_t (*key_clone)();
};
struct st_table {
@@ -25,11 +23,6 @@ struct st_table {
struct st_table_entry **bins;
};
-typedef struct {
- unsigned char* s;
- unsigned char* end;
-} st_strend_key;
-
#define st_is_member(table,key) st_lookup(table,key,(st_data_t *)0)
enum st_retval {ST_CONTINUE, ST_STOP, ST_DELETE, ST_CHECK};
@@ -51,23 +44,16 @@ st_table *st_init_numtable _((void));
st_table *st_init_numtable_with_size _((int));
st_table *st_init_strtable _((void));
st_table *st_init_strtable_with_size _((int));
-st_table *st_init_strend_table_with_size _((int));
int st_delete _((st_table *, st_data_t *, st_data_t *));
int st_delete_safe _((st_table *, st_data_t *, st_data_t *, st_data_t));
int st_insert _((st_table *, st_data_t, st_data_t));
-int st_insert_strend _((st_table *, const unsigned char*, const unsigned char*, st_data_t));
int st_lookup _((st_table *, st_data_t, st_data_t *));
-int st_lookup_strend _((st_table *, const unsigned char*, const unsigned char*, st_data_t*));
-void st_foreach _((st_table *, int (*)(ANYARGS), st_data_t));
+int st_foreach _((st_table *, int (*)(ANYARGS), st_data_t));
void st_add_direct _((st_table *, st_data_t, st_data_t));
-void st_add_direct_strend _((st_table *, const unsigned char*, const unsigned char*, st_data_t));
void st_free_table _((st_table *));
void st_cleanup_safe _((st_table *, st_data_t));
st_table *st_copy _((st_table *));
-extern st_data_t st_nothing_key_clone _((st_data_t key));
-extern int st_nothing_key_free _((st_data_t key));
-
#define ST_NUMCMP ((int (*)()) 0)
#define ST_NUMHASH ((int (*)()) -2)
diff --git a/ext/mbstring/oniguruma/win32/config.h b/ext/mbstring/oniguruma/win32/config.h
index 7ee9e2506..bdbdaf25c 100644
--- a/ext/mbstring/oniguruma/win32/config.h
+++ b/ext/mbstring/oniguruma/win32/config.h
@@ -1,84 +1,84 @@
-#define STDC_HEADERS 1
-#define HAVE_SYS_TYPES_H 1
-#define HAVE_SYS_STAT_H 1
-#define HAVE_STDLIB_H 1
-#define HAVE_STRING_H 1
-#define HAVE_MEMORY_H 1
-#define HAVE_FLOAT_H 1
-#define HAVE_OFF_T 1
-#define SIZEOF_INT 4
-#define SIZEOF_SHORT 2
-#define SIZEOF_LONG 4
-#define SIZEOF_LONG_LONG 0
-#define SIZEOF___INT64 8
-#define SIZEOF_OFF_T 4
-#define SIZEOF_VOIDP 4
-#define SIZEOF_FLOAT 4
-#define SIZEOF_DOUBLE 8
-#define HAVE_PROTOTYPES 1
-#define TOKEN_PASTE(x,y) x##y
-#define HAVE_STDARG_PROTOTYPES 1
-#ifndef NORETURN
-#if _MSC_VER > 1100
-#define NORETURN(x) __declspec(noreturn) x
-#else
-#define NORETURN(x) x
-#endif
-#endif
-#define HAVE_DECL_SYS_NERR 1
-#define STDC_HEADERS 1
-#define HAVE_STDLIB_H 1
-#define HAVE_STRING_H 1
-#define HAVE_LIMITS_H 1
-#define HAVE_FCNTL_H 1
-#define HAVE_SYS_UTIME_H 1
-#define HAVE_MEMORY_H 1
-#define uid_t int
-#define gid_t int
-#define HAVE_STRUCT_STAT_ST_RDEV 1
-#define HAVE_ST_RDEV 1
-#define GETGROUPS_T int
-#define RETSIGTYPE void
-#define HAVE_ALLOCA 1
-#define HAVE_DUP2 1
-#define HAVE_MEMCMP 1
-#define HAVE_MEMMOVE 1
-#define HAVE_MKDIR 1
-#define HAVE_STRCASECMP 1
-#define HAVE_STRNCASECMP 1
-#define HAVE_STRERROR 1
-#define HAVE_STRFTIME 1
-#define HAVE_STRCHR 1
-#define HAVE_STRSTR 1
-#define HAVE_STRTOD 1
-#define HAVE_STRTOL 1
-#define HAVE_STRTOUL 1
-#define HAVE_FLOCK 1
-#define HAVE_VSNPRINTF 1
-#define HAVE_FINITE 1
-#define HAVE_FMOD 1
-#define HAVE_FREXP 1
-#define HAVE_HYPOT 1
-#define HAVE_MODF 1
-#define HAVE_WAITPID 1
-#define HAVE_CHSIZE 1
-#define HAVE_TIMES 1
-#define HAVE__SETJMP 1
-#define HAVE_TELLDIR 1
-#define HAVE_SEEKDIR 1
-#define HAVE_MKTIME 1
-#define HAVE_COSH 1
-#define HAVE_SINH 1
-#define HAVE_TANH 1
-#define HAVE_EXECVE 1
-#define HAVE_TZNAME 1
-#define HAVE_DAYLIGHT 1
-#define SETPGRP_VOID 1
-#define inline __inline
-#define NEED_IO_SEEK_BETWEEN_RW 1
-#define RSHIFT(x,y) ((x)>>(int)y)
-#define FILE_COUNT _cnt
-#define FILE_READPTR _ptr
-#define DEFAULT_KCODE KCODE_NONE
-#define DLEXT ".so"
-#define DLEXT2 ".dll"
+#define STDC_HEADERS 1
+#define HAVE_SYS_TYPES_H 1
+#define HAVE_SYS_STAT_H 1
+#define HAVE_STDLIB_H 1
+#define HAVE_STRING_H 1
+#define HAVE_MEMORY_H 1
+#define HAVE_FLOAT_H 1
+#define HAVE_OFF_T 1
+#define SIZEOF_INT 4
+#define SIZEOF_SHORT 2
+#define SIZEOF_LONG 4
+#define SIZEOF_LONG_LONG 0
+#define SIZEOF___INT64 8
+#define SIZEOF_OFF_T 4
+#define SIZEOF_VOIDP 4
+#define SIZEOF_FLOAT 4
+#define SIZEOF_DOUBLE 8
+#define HAVE_PROTOTYPES 1
+#define TOKEN_PASTE(x,y) x##y
+#define HAVE_STDARG_PROTOTYPES 1
+#ifndef NORETURN
+#if _MSC_VER > 1100
+#define NORETURN(x) __declspec(noreturn) x
+#else
+#define NORETURN(x) x
+#endif
+#endif
+#define HAVE_DECL_SYS_NERR 1
+#define STDC_HEADERS 1
+#define HAVE_STDLIB_H 1
+#define HAVE_STRING_H 1
+#define HAVE_LIMITS_H 1
+#define HAVE_FCNTL_H 1
+#define HAVE_SYS_UTIME_H 1
+#define HAVE_MEMORY_H 1
+#define uid_t int
+#define gid_t int
+#define HAVE_STRUCT_STAT_ST_RDEV 1
+#define HAVE_ST_RDEV 1
+#define GETGROUPS_T int
+#define RETSIGTYPE void
+#define HAVE_ALLOCA 1
+#define HAVE_DUP2 1
+#define HAVE_MEMCMP 1
+#define HAVE_MEMMOVE 1
+#define HAVE_MKDIR 1
+#define HAVE_STRCASECMP 1
+#define HAVE_STRNCASECMP 1
+#define HAVE_STRERROR 1
+#define HAVE_STRFTIME 1
+#define HAVE_STRCHR 1
+#define HAVE_STRSTR 1
+#define HAVE_STRTOD 1
+#define HAVE_STRTOL 1
+#define HAVE_STRTOUL 1
+#define HAVE_FLOCK 1
+#define HAVE_VSNPRINTF 1
+#define HAVE_FINITE 1
+#define HAVE_FMOD 1
+#define HAVE_FREXP 1
+#define HAVE_HYPOT 1
+#define HAVE_MODF 1
+#define HAVE_WAITPID 1
+#define HAVE_CHSIZE 1
+#define HAVE_TIMES 1
+#define HAVE__SETJMP 1
+#define HAVE_TELLDIR 1
+#define HAVE_SEEKDIR 1
+#define HAVE_MKTIME 1
+#define HAVE_COSH 1
+#define HAVE_SINH 1
+#define HAVE_TANH 1
+#define HAVE_EXECVE 1
+#define HAVE_TZNAME 1
+#define HAVE_DAYLIGHT 1
+#define SETPGRP_VOID 1
+#define inline __inline
+#define NEED_IO_SEEK_BETWEEN_RW 1
+#define RSHIFT(x,y) ((x)>>(int)y)
+#define FILE_COUNT _cnt
+#define FILE_READPTR _ptr
+#define DEFAULT_KCODE KCODE_NONE
+#define DLEXT ".so"
+#define DLEXT2 ".dll"