diff options
Diffstat (limited to 'ext/mbstring/oniguruma/regparse.c')
-rw-r--r-- | ext/mbstring/oniguruma/regparse.c | 459 |
1 files changed, 354 insertions, 105 deletions
diff --git a/ext/mbstring/oniguruma/regparse.c b/ext/mbstring/oniguruma/regparse.c index 58e122f48..407b73fc4 100644 --- a/ext/mbstring/oniguruma/regparse.c +++ b/ext/mbstring/oniguruma/regparse.c @@ -2,7 +2,7 @@ regparse.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -60,6 +60,20 @@ OnigSyntaxType* OnigDefaultSyntax = ONIG_SYNTAX_RUBY; extern void onig_null_warn(const char* s) { } +#ifdef RUBY_PLATFORM +extern void +onig_rb_warn(const char* s) +{ + rb_warn(s); +} + +extern void +onig_rb_warning(const char* s) +{ + rb_warning(s); +} +#endif + #ifdef DEFAULT_WARN_FUNCTION static OnigWarnFunc onig_warn = (OnigWarnFunc )DEFAULT_WARN_FUNCTION; #else @@ -305,6 +319,88 @@ typedef struct { #include "st.h" +typedef struct { + unsigned char* s; + unsigned char* end; +} st_strend_key; + +static int strend_cmp(st_strend_key*, st_strend_key*); +static int strend_hash(st_strend_key*); + +static struct st_hash_type type_strend_hash = { + strend_cmp, + strend_hash, +}; + +static st_table* +onig_st_init_strend_table_with_size(int size) +{ + return onig_st_init_table_with_size(&type_strend_hash, size); +} + +static int +onig_st_lookup_strend(st_table *table, const UChar* str_key, const UChar* end_key, st_data_t *value) +{ + st_strend_key key; + + key.s = (unsigned char* )str_key; + key.end = (unsigned char* )end_key; + + return onig_st_lookup(table, (st_data_t )(&key), value); +} + +static int +onig_st_insert_strend(st_table *table, const UChar* str_key, const UChar* end_key, st_data_t value) +{ + st_strend_key* key; + int result; + + key = (st_strend_key* )xmalloc(sizeof(st_strend_key)); + key->s = (unsigned char* )str_key; + key->end = (unsigned char* )end_key; + result = onig_st_insert(table, (st_data_t )key, value); + if (result) { + xfree(key); + } + return result; +} + +static int +strend_cmp(st_strend_key* x, st_strend_key* y) +{ + unsigned char *p, *q; + int c; + + if ((x->end - x->s) != (y->end - y->s)) + return 1; + + p = x->s; + q = y->s; + while (p < x->end) { + c = (int )*p - (int )*q; + if (c != 0) return c; + + p++; q++; + } + + return 0; +} + +static int +strend_hash(st_strend_key* x) +{ + int val; + unsigned char *p; + + val = 0; + p = x->s; + while (p < x->end) { + val = val * 997 + (int )*p++; + } + + return val + (val >> 5); +} + typedef st_table NameTable; typedef st_data_t HashDataType; /* 1.6 st.h doesn't define st_data_t type */ @@ -350,8 +446,10 @@ onig_print_names(FILE* fp, regex_t* reg) static int i_free_name_entry(UChar* key, NameEntry* e, void* arg) { - xfree(e->name); /* == key */ + xfree(e->name); if (IS_NOT_NULL(e->back_refs)) xfree(e->back_refs); + xfree(key); + xfree(e); return ST_DELETE; } @@ -801,6 +899,23 @@ onig_number_of_names(regex_t* reg) } #endif /* else USE_NAMED_GROUP */ +extern int +onig_noname_group_capture_is_active(regex_t* reg) +{ + if (ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_DONT_CAPTURE_GROUP)) + return 0; + +#ifdef USE_NAMED_GROUP + if (onig_number_of_names(reg) > 0 && + IS_SYNTAX_BV(reg->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) && + !ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) { + return 0; + } +#endif + + return 1; +} + #define INIT_SCANENV_MEMNODES_ALLOC_SIZE 16 @@ -825,6 +940,13 @@ scan_env_clear(ScanEnv* env) for (i = 0; i < SCANENV_MEMNODES_SIZE; i++) env->mem_nodes_static[i] = NULL_NODE; + +#ifdef USE_COMBINATION_EXPLOSION_CHECK + env->num_comb_exp_check = 0; + env->comb_exp_max_regnum = 0; + env->curr_max_regnum = 0; + env->has_recursion = 0; +#endif } static int @@ -970,13 +1092,13 @@ onig_free_node_list() { FreeNode* n; - THREAD_ATOMIC_START; - while (FreeNodeList) { + /* THREAD_ATOMIC_START; */ + while (IS_NOT_NULL(FreeNodeList)) { n = FreeNodeList; FreeNodeList = FreeNodeList->next; xfree(n); } - THREAD_ATOMIC_END; + /* THREAD_ATOMIC_END; */ return 0; } #endif @@ -987,13 +1109,14 @@ node_new() Node* node; #ifdef USE_RECYCLE_NODE + THREAD_ATOMIC_START; if (IS_NOT_NULL(FreeNodeList)) { - THREAD_ATOMIC_START; node = (Node* )FreeNodeList; FreeNodeList = FreeNodeList->next; THREAD_ATOMIC_END; return node; } + THREAD_ATOMIC_END; #endif node = (Node* )xmalloc(sizeof(Node)); @@ -1020,9 +1143,9 @@ node_new_cclass() return node; } -extern Node* +static Node* node_new_cclass_by_codepoint_range(int not, - OnigCodePoint sbr[], OnigCodePoint mbr[]) + const OnigCodePoint sbr[], const OnigCodePoint mbr[]) { CClassNode* cc; int n, i, j; @@ -1128,7 +1251,11 @@ onig_node_new_anchor(int type) } static Node* -node_new_backref(int back_num, int* backrefs, int by_name, ScanEnv* env) +node_new_backref(int back_num, int* backrefs, int by_name, +#ifdef USE_BACKREF_AT_LEVEL + int exist_level, int nest_level, +#endif + ScanEnv* env) { int i; Node* node = node_new(); @@ -1141,6 +1268,13 @@ node_new_backref(int back_num, int* backrefs, int by_name, ScanEnv* env) if (by_name != 0) NBACKREF(node).state |= NST_NAME_REF; +#ifdef USE_BACKREF_AT_LEVEL + if (exist_level != 0) { + NBACKREF(node).state |= NST_NEST_LEVEL; + NBACKREF(node).nest_level = nest_level; + } +#endif + for (i = 0; i < back_num; i++) { if (backrefs[i] <= env->num_mem && IS_NULL(SCANENV_MEM_NODES(env)[backrefs[i]])) { @@ -1194,11 +1328,17 @@ node_new_qualifier(int lower, int upper, int by_number) NQUALIFIER(node).lower = lower; NQUALIFIER(node).upper = upper; NQUALIFIER(node).greedy = 1; - NQUALIFIER(node).by_number = by_number; NQUALIFIER(node).target_empty_info = NQ_TARGET_ISNOT_EMPTY; NQUALIFIER(node).head_exact = NULL_NODE; NQUALIFIER(node).next_head_exact = NULL_NODE; NQUALIFIER(node).is_refered = 0; + if (by_number != 0) + NQUALIFIER(node).state |= NST_BY_NUMBER; + +#ifdef USE_COMBINATION_EXPLOSION_CHECK + NQUALIFIER(node).comb_exp_check_num = 0; +#endif + return node; } @@ -2013,7 +2153,7 @@ enum ReduceType { RQ_AQ, /* to '*?' */ RQ_QQ, /* to '??' */ RQ_P_QQ, /* to '+)??' */ - RQ_PQ_Q, /* to '+?)?' */ + RQ_PQ_Q /* to '+?)?' */ }; static enum ReduceType ReduceTypeTable[6][6] = { @@ -2125,6 +2265,10 @@ typedef struct { int ref1; int* refs; int by_name; +#ifdef USE_BACKREF_AT_LEVEL + int exist_level; + int level; /* \k<name+n> */ +#endif } backref; struct { UChar* name; @@ -2274,15 +2418,17 @@ fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env) control: if (PEND) return ONIGERR_END_PATTERN_AT_CONTROL; PFETCH(c); - if (c == MC_ESC(enc)) { - v = fetch_escaped_value(&p, end, env); - if (v < 0) return v; - c = (OnigCodePoint )v; - } - else if (c == '?') + if (c == '?') { c = 0177; - else + } + else { + if (c == MC_ESC(enc)) { + v = fetch_escaped_value(&p, end, env); + if (v < 0) return v; + c = (OnigCodePoint )v; + } c &= 0x9f; + } break; } /* fall through */ @@ -2302,6 +2448,89 @@ fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env) static int fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env); #ifdef USE_NAMED_GROUP +#ifdef USE_BACKREF_AT_LEVEL +/* + \k<name+n>, \k<name-n> +*/ +static int +fetch_name_with_level(UChar** src, UChar* end, UChar** rname_end + , ScanEnv* env, int* level) +{ + int r, exist_level = 0; + OnigCodePoint c = 0; + OnigCodePoint first_code; + OnigEncoding enc = env->enc; + UChar *name_end; + UChar *p = *src; + PFETCH_READY; + + name_end = end; + r = 0; + if (PEND) { + return ONIGERR_EMPTY_GROUP_NAME; + } + else { + PFETCH(c); + first_code = c; + if (c == '>') + return ONIGERR_EMPTY_GROUP_NAME; + + if (!ONIGENC_IS_CODE_WORD(enc, c)) { + r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; + } + } + + while (!PEND) { + name_end = p; + PFETCH(c); + if (c == '>' || c == ')' || c == '+' || c == '-') break; + + if (!ONIGENC_IS_CODE_WORD(enc, c)) { + r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; + } + } + + if (c != '>') { + if (c == '+' || c == '-') { + int num; + int flag = (c == '-' ? -1 : 1); + + PFETCH(c); + if (! ONIGENC_IS_CODE_DIGIT(enc, c)) goto err; + PUNFETCH; + num = onig_scan_unsigned_number(&p, end, enc); + if (num < 0) return ONIGERR_TOO_BIG_NUMBER; + *level = (num * flag); + exist_level = 1; + + PFETCH(c); + if (c == '>') + goto first_check; + } + + err: + r = ONIGERR_INVALID_GROUP_NAME; + name_end = end; + } + else { + first_check: + if (ONIGENC_IS_CODE_ASCII(first_code) && + ONIGENC_IS_CODE_UPPER(enc, first_code)) + r = ONIGERR_INVALID_GROUP_NAME; + } + + if (r == 0) { + *rname_end = name_end; + *src = p; + return (exist_level ? 1 : 0); + } + else { + onig_scan_env_set_error_string(env, r, *src, name_end); + return r; + } +} +#endif /* USE_BACKREF_AT_LEVEL */ + /* def: 0 -> define name (don't allow number name) 1 -> reference name (allow number name) @@ -2428,11 +2657,11 @@ CC_ESC_WARN(ScanEnv* env, UChar *c) if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_CC_OP_NOT_ESCAPED) && IS_SYNTAX_BV(env->syntax, ONIG_SYN_BACKSLASH_ESCAPE_IN_CC)) { - char buf[WARN_BUFSIZE]; + UChar buf[WARN_BUFSIZE]; onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc, env->pattern, env->pattern_end, - "character class has '%s' without escape", c); - (*onig_warn)(buf); + (UChar* )"character class has '%s' without escape", c); + (*onig_warn)((char* )buf); } } @@ -2442,11 +2671,11 @@ CCEND_ESC_WARN(ScanEnv* env, UChar* c) if (onig_warn == onig_null_warn) return ; if (IS_SYNTAX_BV((env)->syntax, ONIG_SYN_WARN_CC_OP_NOT_ESCAPED)) { - char buf[WARN_BUFSIZE]; + UChar buf[WARN_BUFSIZE]; onig_snprintf_with_pattern(buf, WARN_BUFSIZE, (env)->enc, (env)->pattern, (env)->pattern_end, - "regular expression has '%s' without escape", c); - (*onig_warn)(buf); + (UChar* )"regular expression has '%s' without escape", c); + (*onig_warn)((char* )buf); } } @@ -2537,6 +2766,8 @@ fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) tok->type = TK_CHAR; tok->base = 0; tok->u.c = c; + tok->escaped = 0; + if (c == ']') { tok->type = TK_CC_CLOSE; } @@ -2708,7 +2939,7 @@ fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) tok->type = TK_CC_CC_OPEN; } else { - CC_ESC_WARN(env, "["); + CC_ESC_WARN(env, (UChar* )"["); } } } @@ -2747,7 +2978,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) tok->backp = p; PFETCH(c); - if (c == MC_ESC(enc)) { + if (IS_MC_ESC_CODE(c, enc, syn)) { if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE; tok->backp = p; @@ -3012,6 +3243,9 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) tok->u.backref.num = 1; tok->u.backref.ref1 = num; tok->u.backref.by_name = 0; +#ifdef USE_BACKREF_AT_LEVEL + tok->u.backref.exist_level = 0; +#endif break; } @@ -3050,8 +3284,17 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) int* backs; prev = p; + +#ifdef USE_BACKREF_AT_LEVEL + name_end = NULL_UCHARP; /* no need. escape gcc warning. */ + r = fetch_name_with_level(&p, end, &name_end, env, &tok->u.backref.level); + if (r == 1) tok->u.backref.exist_level = 1; + else tok->u.backref.exist_level = 0; +#else r = fetch_name(&p, end, &name_end, env, 1); +#endif if (r < 0) return r; + num = onig_name_to_group_numbers(env->reg, prev, name_end, &backs); if (num <= 0) { onig_scan_env_set_error_string(env, @@ -3170,13 +3413,17 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) switch (c) { case '.': if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_DOT_ANYCHAR)) break; +#ifdef USE_VARIABLE_META_CHARS any_char: +#endif tok->type = TK_ANYCHAR; break; case '*': if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ASTERISK_ZERO_INF)) break; +#ifdef USE_VARIABLE_META_CHARS anytime: +#endif tok->type = TK_OP_REPEAT; tok->u.repeat.lower = 0; tok->u.repeat.upper = REPEAT_INFINITE; @@ -3185,7 +3432,9 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) case '+': if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_PLUS_ONE_INF)) break; +#ifdef USE_VARIABLE_META_CHARS one_or_more_time: +#endif tok->type = TK_OP_REPEAT; tok->u.repeat.lower = 1; tok->u.repeat.upper = REPEAT_INFINITE; @@ -3194,7 +3443,9 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) case '?': if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_QMARK_ZERO_ONE)) break; +#ifdef USE_VARIABLE_META_CHARS zero_or_one_time: +#endif tok->type = TK_OP_REPEAT; tok->u.repeat.lower = 0; tok->u.repeat.upper = 1; @@ -3271,7 +3522,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) case ']': if (*src > env->pattern) /* /].../ is allowed. */ - CCEND_ESC_WARN(env, "]"); + CCEND_ESC_WARN(env, (UChar* )"]"); break; case '#': @@ -3297,14 +3548,16 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) } } +#ifdef USE_VARIABLE_META_CHARS out: +#endif *src = p; return tok->type; } static int add_ctype_to_cc_by_range(CClassNode* cc, int ctype, int not, OnigEncoding enc, - OnigCodePoint sbr[], OnigCodePoint mbr[]) + const OnigCodePoint sbr[], const OnigCodePoint mbr[]) { int i, r; OnigCodePoint j; @@ -3368,7 +3621,7 @@ static int add_ctype_to_cc(CClassNode* cc, int ctype, int not, ScanEnv* env) { int c, r; - OnigCodePoint *sbr, *mbr; + const OnigCodePoint *sbr, *mbr; OnigEncoding enc = env->enc; r = ONIGENC_GET_CTYPE_CODE_RANGE(enc, ctype, &sbr, &mbr); @@ -3506,19 +3759,19 @@ parse_posix_bracket(CClassNode* cc, UChar** src, UChar* end, ScanEnv* env) #define POSIX_BRACKET_NAME_MAX_LEN 6 static PosixBracketEntryType PBS[] = { - { "alnum", ONIGENC_CTYPE_ALNUM, 5 }, - { "alpha", ONIGENC_CTYPE_ALPHA, 5 }, - { "blank", ONIGENC_CTYPE_BLANK, 5 }, - { "cntrl", ONIGENC_CTYPE_CNTRL, 5 }, - { "digit", ONIGENC_CTYPE_DIGIT, 5 }, - { "graph", ONIGENC_CTYPE_GRAPH, 5 }, - { "lower", ONIGENC_CTYPE_LOWER, 5 }, - { "print", ONIGENC_CTYPE_PRINT, 5 }, - { "punct", ONIGENC_CTYPE_PUNCT, 5 }, - { "space", ONIGENC_CTYPE_SPACE, 5 }, - { "upper", ONIGENC_CTYPE_UPPER, 5 }, - { "xdigit", ONIGENC_CTYPE_XDIGIT, 6 }, - { "ascii", ONIGENC_CTYPE_ASCII, 5 }, /* I don't know origin. Perl? */ + { (UChar* )"alnum", ONIGENC_CTYPE_ALNUM, 5 }, + { (UChar* )"alpha", ONIGENC_CTYPE_ALPHA, 5 }, + { (UChar* )"blank", ONIGENC_CTYPE_BLANK, 5 }, + { (UChar* )"cntrl", ONIGENC_CTYPE_CNTRL, 5 }, + { (UChar* )"digit", ONIGENC_CTYPE_DIGIT, 5 }, + { (UChar* )"graph", ONIGENC_CTYPE_GRAPH, 5 }, + { (UChar* )"lower", ONIGENC_CTYPE_LOWER, 5 }, + { (UChar* )"print", ONIGENC_CTYPE_PRINT, 5 }, + { (UChar* )"punct", ONIGENC_CTYPE_PUNCT, 5 }, + { (UChar* )"space", ONIGENC_CTYPE_SPACE, 5 }, + { (UChar* )"upper", ONIGENC_CTYPE_UPPER, 5 }, + { (UChar* )"xdigit", ONIGENC_CTYPE_XDIGIT, 6 }, + { (UChar* )"ascii", ONIGENC_CTYPE_ASCII, 5 }, { (UChar* )NULL, -1, 0 } }; @@ -3542,7 +3795,7 @@ parse_posix_bracket(CClassNode* cc, UChar** src, UChar* end, ScanEnv* env) for (pb = PBS; IS_NOT_NULL(pb->name); pb++) { if (onigenc_with_ascii_strncmp(enc, p, end, pb->name, pb->len) == 0) { p = (UChar* )onigenc_step(enc, p, end, pb->len); - if (onigenc_with_ascii_strncmp(enc, p, end, ":]", 2) != 0) + if (onigenc_with_ascii_strncmp(enc, p, end, (UChar* )":]", 2) != 0) return ONIGERR_INVALID_POSIX_BRACKET_TYPE; r = add_ctype_to_cc(cc, pb->ctype, not, env); @@ -3577,19 +3830,19 @@ static int property_name_to_ctype(UChar* p, UChar* end, OnigEncoding enc) { static PosixBracketEntryType PBS[] = { - { "Alnum", ONIGENC_CTYPE_ALNUM, 5 }, - { "Alpha", ONIGENC_CTYPE_ALPHA, 5 }, - { "Blank", ONIGENC_CTYPE_BLANK, 5 }, - { "Cntrl", ONIGENC_CTYPE_CNTRL, 5 }, - { "Digit", ONIGENC_CTYPE_DIGIT, 5 }, - { "Graph", ONIGENC_CTYPE_GRAPH, 5 }, - { "Lower", ONIGENC_CTYPE_LOWER, 5 }, - { "Print", ONIGENC_CTYPE_PRINT, 5 }, - { "Punct", ONIGENC_CTYPE_PUNCT, 5 }, - { "Space", ONIGENC_CTYPE_SPACE, 5 }, - { "Upper", ONIGENC_CTYPE_UPPER, 5 }, - { "XDigit", ONIGENC_CTYPE_XDIGIT, 6 }, - { "ASCII", ONIGENC_CTYPE_ASCII, 5 }, + { (UChar* )"Alnum", ONIGENC_CTYPE_ALNUM, 5 }, + { (UChar* )"Alpha", ONIGENC_CTYPE_ALPHA, 5 }, + { (UChar* )"Blank", ONIGENC_CTYPE_BLANK, 5 }, + { (UChar* )"Cntrl", ONIGENC_CTYPE_CNTRL, 5 }, + { (UChar* )"Digit", ONIGENC_CTYPE_DIGIT, 5 }, + { (UChar* )"Graph", ONIGENC_CTYPE_GRAPH, 5 }, + { (UChar* )"Lower", ONIGENC_CTYPE_LOWER, 5 }, + { (UChar* )"Print", ONIGENC_CTYPE_PRINT, 5 }, + { (UChar* )"Punct", ONIGENC_CTYPE_PUNCT, 5 }, + { (UChar* )"Space", ONIGENC_CTYPE_SPACE, 5 }, + { (UChar* )"Upper", ONIGENC_CTYPE_UPPER, 5 }, + { (UChar* )"XDigit", ONIGENC_CTYPE_XDIGIT, 6 }, + { (UChar* )"ASCII", ONIGENC_CTYPE_ASCII, 5 }, { (UChar* )NULL, -1, 0 } }; @@ -3839,7 +4092,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, *src, env->pattern_end, 1, env->enc)) return ONIGERR_EMPTY_CHAR_CLASS; - CC_ESC_WARN(env, "]"); + CC_ESC_WARN(env, (UChar* )"]"); r = tok->type = TK_CHAR; /* allow []...] */ } @@ -3942,7 +4195,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, r = parse_posix_bracket(cc, &p, end, env); if (r < 0) goto err; if (r == 1) { /* is not POSIX bracket */ - CC_ESC_WARN(env, "["); + CC_ESC_WARN(env, (UChar* )"["); p = tok->backp; v = (OnigCodePoint )tok->u.c; in_israw = 0; @@ -3988,7 +4241,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, goto val_entry; } else if (r == TK_CC_AND) { - CC_ESC_WARN(env, "-"); + CC_ESC_WARN(env, (UChar* )"-"); goto range_end_val; } state = CCS_RANGE; @@ -4003,12 +4256,12 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, fetched = 1; /* [--x] or [a&&-x] is warned. */ if (r == TK_CC_RANGE || and_start != 0) - CC_ESC_WARN(env, "-"); + CC_ESC_WARN(env, (UChar* )"-"); goto val_entry; } else if (state == CCS_RANGE) { - CC_ESC_WARN(env, "-"); + CC_ESC_WARN(env, (UChar* )"-"); goto sb_char; /* [!--x] is allowed */ } else { /* CCS_COMPLETE */ @@ -4017,12 +4270,12 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, fetched = 1; if (r == TK_CC_CLOSE) goto range_end_val; /* allow [a-b-] */ else if (r == TK_CC_AND) { - CC_ESC_WARN(env, "-"); + CC_ESC_WARN(env, (UChar* )"-"); goto range_end_val; } if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC)) { - CC_ESC_WARN(env, "-"); + CC_ESC_WARN(env, (UChar* )"-"); goto sb_char; /* [0-9-a] is allowed as [0-9\-a] */ } r = ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS; @@ -4326,10 +4579,9 @@ parse_effect(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, } } else { -#ifdef USE_NAMED_GROUP if (ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_DONT_CAPTURE_GROUP)) goto group; -#endif + *np = node_new_effect_memory(env->option, 0); CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY); num = scan_env_add_mem_entry(env); @@ -4358,11 +4610,11 @@ parse_effect(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, return 0; } -static char* PopularQStr[] = { +static const char* PopularQStr[] = { "?", "*", "+", "??", "*?", "+?" }; -static char* ReduceQStr[] = { +static const char* ReduceQStr[] = { "", "", "*", "*?", "??", "+ and ??", "+? and ?" }; @@ -4394,15 +4646,13 @@ set_qualifier(Node* qnode, Node* target, int group, ScanEnv* env) { /* check redundant double repeat. */ /* verbose warn (?:.?)? etc... but not warn (.?)? etc... */ QualifierNode* qnt = &(NQUALIFIER(target)); + int nestq_num = popular_qualifier_num(qn); + int targetq_num = popular_qualifier_num(qnt); #ifdef USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR - if (qn->by_number == 0 && qnt->by_number == 0 && + if (!IS_QUALIFIER_BY_NUMBER(qn) && !IS_QUALIFIER_BY_NUMBER(qnt) && IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT)) { - int nestq_num, targetq_num; - char buf[WARN_BUFSIZE]; - - nestq_num = popular_qualifier_num(qn); - targetq_num = popular_qualifier_num(qnt); + UChar buf[WARN_BUFSIZE]; switch(ReduceTypeTable[targetq_num][nestq_num]) { case RQ_ASIS: @@ -4411,9 +4661,9 @@ set_qualifier(Node* qnode, Node* target, int group, ScanEnv* env) case RQ_DEL: if (onig_verb_warn != onig_null_warn) { onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc, - env->pattern, env->pattern_end, - "redundant nested repeat operator"); - (*onig_verb_warn)(buf); + env->pattern, env->pattern_end, + (UChar* )"redundant nested repeat operator"); + (*onig_verb_warn)((char* )buf); } goto warn_exit; break; @@ -4422,10 +4672,10 @@ set_qualifier(Node* qnode, Node* target, int group, ScanEnv* env) if (onig_verb_warn != onig_null_warn) { onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc, env->pattern, env->pattern_end, - "nested repeat operator %s and %s was replaced with '%s'", + (UChar* )"nested repeat operator %s and %s was replaced with '%s'", PopularQStr[targetq_num], PopularQStr[nestq_num], ReduceQStr[ReduceTypeTable[targetq_num][nestq_num]]); - (*onig_verb_warn)(buf); + (*onig_verb_warn)((char* )buf); } goto warn_exit; break; @@ -4434,9 +4684,17 @@ set_qualifier(Node* qnode, Node* target, int group, ScanEnv* env) warn_exit: #endif - if (popular_qualifier_num(qnt) >= 0 && popular_qualifier_num(qn) >= 0) { - onig_reduce_nested_qualifier(qnode, target); - goto q_exit; + if (targetq_num >= 0) { + if (nestq_num >= 0) { + onig_reduce_nested_qualifier(qnode, target); + goto q_exit; + } + else if (targetq_num == 1 || targetq_num == 2) { /* * or + */ + /* (?:a*){n,m}, (?:a+){n,m} => (?:a*){n,n}, (?:a+){n,n} */ + if (! IS_REPEAT_INFINITE(qn->upper) && qn->upper > 1 && qn->greedy) { + qn->upper = (qn->lower == 0 ? 1 : qn->lower); + } + } } } break; @@ -4457,8 +4715,8 @@ make_compound_alt_node_from_cc(OnigAmbigType ambig_flag, OnigEncoding enc, int r, i, j, k, clen, len, ncode, n; UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN]; Node **ptail, *snode = NULL_NODE; - OnigCompAmbigCodes* ccs; - OnigCompAmbigCodeItem* ci; + const OnigCompAmbigCodes* ccs; + const OnigCompAmbigCodeItem* ci; OnigAmbigType amb; n = 0; @@ -4546,27 +4804,9 @@ static int type_cclass_hash(type_cclass_key* key) return val + (val >> 5); } -static int type_cclass_key_free(st_data_t x) -{ - xfree((void* )x); - return 0; -} - -static st_data_t type_cclass_key_clone(st_data_t x) -{ - type_cclass_key* new_key; - type_cclass_key* key = (type_cclass_key* )x; - - new_key = (type_cclass_key* )xmalloc(sizeof(type_cclass_key)); - *new_key = *key; - return (st_data_t )new_key; -} - static struct st_hash_type type_type_cclass_hash = { type_cclass_cmp, type_cclass_hash, - type_cclass_key_free, - type_cclass_key_clone }; static st_table* OnigTypeCClassTable; @@ -4580,6 +4820,8 @@ i_free_shared_class(type_cclass_key* key, Node* node, void* arg) if (IS_NOT_NULL(cc->mbuf)) xfree(cc->mbuf); xfree(node); } + + if (IS_NOT_NULL(key)) xfree(key); return ST_DELETE; } @@ -4588,6 +4830,8 @@ onig_free_shared_cclass_table() { if (IS_NOT_NULL(OnigTypeCClassTable)) { onig_st_foreach(OnigTypeCClassTable, i_free_shared_class, 0); + xfree(OnigTypeCClassTable); + OnigTypeCClassTable = NULL; } return 0; @@ -4741,7 +4985,7 @@ parse_exp(Node** np, OnigToken* tok, int term, int ctype, not; #ifdef USE_SHARED_CCLASS_TABLE - OnigCodePoint *sbr, *mbr; + const OnigCodePoint *sbr, *mbr; ctype = parse_ctype_to_enc_ctype(tok->u.subtype, ¬); r = ONIGENC_GET_CTYPE_CODE_RANGE(env->enc, ctype, &sbr, &mbr); @@ -4823,7 +5067,7 @@ parse_exp(Node** np, OnigToken* tok, int term, if (IS_IGNORECASE(env->option)) { int i, n, in_cc; - OnigPairAmbigCodes* ccs; + const OnigPairAmbigCodes* ccs; BitSetRef bs = cc->bs; OnigAmbigType amb; @@ -4892,8 +5136,13 @@ parse_exp(Node** np, OnigToken* tok, int term, case TK_BACKREF: len = tok->u.backref.num; *np = node_new_backref(len, - (len > 1 ? tok->u.backref.refs : &(tok->u.backref.ref1)), - tok->u.backref.by_name, env); + (len > 1 ? tok->u.backref.refs : &(tok->u.backref.ref1)), + tok->u.backref.by_name, +#ifdef USE_BACKREF_AT_LEVEL + tok->u.backref.exist_level, + tok->u.backref.level, +#endif + env); CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY); break; |