summaryrefslogtreecommitdiff
path: root/ext/mbstring/oniguruma/regcomp.c
diff options
context:
space:
mode:
Diffstat (limited to 'ext/mbstring/oniguruma/regcomp.c')
-rw-r--r--ext/mbstring/oniguruma/regcomp.c854
1 files changed, 663 insertions, 191 deletions
diff --git a/ext/mbstring/oniguruma/regcomp.c b/ext/mbstring/oniguruma/regcomp.c
index a2315fcec..9b862657d 100644
--- a/ext/mbstring/oniguruma/regcomp.c
+++ b/ext/mbstring/oniguruma/regcomp.c
@@ -2,7 +2,7 @@
regcomp.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -186,6 +186,17 @@ add_opcode(regex_t* reg, int opcode)
return 0;
}
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+static int
+add_state_check_num(regex_t* reg, int num)
+{
+ StateCheckNumType n = (StateCheckNumType )num;
+
+ BBUF_ADD(reg, &n, SIZE_STATE_CHECK_NUM);
+ return 0;
+}
+#endif
+
static int
add_rel_addr(regex_t* reg, int addr)
{
@@ -644,7 +655,7 @@ entry_repeat_range(regex_t* reg, int id, int lower, int upper)
}
p[id].lower = lower;
- p[id].upper = upper;
+ p[id].upper = (IS_REPEAT_INFINITE(upper) ? 0x7fffffff : upper);
return 0;
}
@@ -684,7 +695,254 @@ compile_range_repeat_node(QualifierNode* qn, int target_len, int empty_info,
return r;
}
+static int
+is_anychar_star_qualifier(QualifierNode* qn)
+{
+ if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) &&
+ NTYPE(qn->target) == N_ANYCHAR)
+ return 1;
+ else
+ return 0;
+}
+
#define QUALIFIER_EXPAND_LIMIT_SIZE 50
+#define CKN_ON (ckn > 0)
+
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+
+static int
+compile_length_qualifier_node(QualifierNode* qn, regex_t* reg)
+{
+ int len, mod_tlen, cklen;
+ int ckn;
+ int infinite = IS_REPEAT_INFINITE(qn->upper);
+ int empty_info = qn->target_empty_info;
+ int tlen = compile_length_tree(qn->target, reg);
+
+ if (tlen < 0) return tlen;
+
+ ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
+
+ cklen = (CKN_ON ? SIZE_STATE_CHECK_NUM: 0);
+
+ /* anychar repeat */
+ if (NTYPE(qn->target) == N_ANYCHAR) {
+ if (qn->greedy && infinite) {
+ if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON)
+ return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower + cklen;
+ else
+ return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower + cklen;
+ }
+ }
+
+ if (empty_info != 0)
+ mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
+ else
+ mod_tlen = tlen;
+
+ if (infinite && qn->lower <= 1) {
+ if (qn->greedy) {
+ if (qn->lower == 1)
+ len = SIZE_OP_JUMP;
+ else
+ len = 0;
+
+ len += SIZE_OP_PUSH + cklen + mod_tlen + SIZE_OP_JUMP;
+ }
+ else {
+ if (qn->lower == 0)
+ len = SIZE_OP_JUMP;
+ else
+ len = 0;
+
+ len += mod_tlen + SIZE_OP_PUSH + cklen;
+ }
+ }
+ else if (qn->upper == 0) {
+ if (qn->is_refered != 0) /* /(?<n>..){0}/ */
+ len = SIZE_OP_JUMP + tlen;
+ else
+ len = 0;
+ }
+ else if (qn->upper == 1 && qn->greedy) {
+ if (qn->lower == 0) {
+ if (CKN_ON) {
+ len = SIZE_OP_STATE_CHECK_PUSH + tlen;
+ }
+ else {
+ len = SIZE_OP_PUSH + tlen;
+ }
+ }
+ else {
+ len = tlen;
+ }
+ }
+ else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
+ len = SIZE_OP_PUSH + cklen + SIZE_OP_JUMP + tlen;
+ }
+ else {
+ len = SIZE_OP_REPEAT_INC
+ + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
+ if (CKN_ON)
+ len += SIZE_OP_STATE_CHECK;
+ }
+
+ return len;
+}
+
+static int
+compile_qualifier_node(QualifierNode* qn, regex_t* reg)
+{
+ int r, mod_tlen;
+ int ckn;
+ int infinite = IS_REPEAT_INFINITE(qn->upper);
+ int empty_info = qn->target_empty_info;
+ int tlen = compile_length_tree(qn->target, reg);
+
+ if (tlen < 0) return tlen;
+
+ ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
+
+ if (is_anychar_star_qualifier(qn)) {
+ r = compile_tree_n_times(qn->target, qn->lower, reg);
+ if (r) return r;
+ if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) {
+ if (IS_MULTILINE(reg->options))
+ r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
+ else
+ r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
+ if (r) return r;
+ if (CKN_ON) {
+ r = add_state_check_num(reg, ckn);
+ if (r) return r;
+ }
+
+ return add_bytes(reg, NSTRING(qn->next_head_exact).s, 1);
+ }
+ else {
+ if (IS_MULTILINE(reg->options)) {
+ r = add_opcode(reg, (CKN_ON ?
+ OP_STATE_CHECK_ANYCHAR_ML_STAR
+ : OP_ANYCHAR_ML_STAR));
+ }
+ else {
+ r = add_opcode(reg, (CKN_ON ?
+ OP_STATE_CHECK_ANYCHAR_STAR
+ : OP_ANYCHAR_STAR));
+ }
+ if (r) return r;
+ if (CKN_ON)
+ r = add_state_check_num(reg, ckn);
+
+ return r;
+ }
+ }
+
+ if (empty_info != 0)
+ mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
+ else
+ mod_tlen = tlen;
+
+ if (infinite && qn->lower <= 1) {
+ if (qn->greedy) {
+ if (qn->lower == 1) {
+ r = add_opcode_rel_addr(reg, OP_JUMP,
+ (CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH));
+ if (r) return r;
+ }
+
+ if (CKN_ON) {
+ r = add_opcode(reg, OP_STATE_CHECK_PUSH);
+ if (r) return r;
+ r = add_state_check_num(reg, ckn);
+ if (r) return r;
+ r = add_rel_addr(reg, mod_tlen + SIZE_OP_JUMP);
+ }
+ else {
+ r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
+ }
+ if (r) return r;
+ r = compile_tree_empty_check(qn->target, reg, empty_info);
+ if (r) return r;
+ r = add_opcode_rel_addr(reg, OP_JUMP,
+ -(mod_tlen + (int )SIZE_OP_JUMP
+ + (int )(CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH)));
+ }
+ else {
+ if (qn->lower == 0) {
+ r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
+ if (r) return r;
+ }
+ r = compile_tree_empty_check(qn->target, reg, empty_info);
+ if (r) return r;
+ if (CKN_ON) {
+ r = add_opcode(reg, OP_STATE_CHECK_PUSH_OR_JUMP);
+ if (r) return r;
+ r = add_state_check_num(reg, ckn);
+ if (r) return r;
+ r = add_rel_addr(reg,
+ -(mod_tlen + (int )SIZE_OP_STATE_CHECK_PUSH_OR_JUMP));
+ }
+ else
+ r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
+ }
+ }
+ else if (qn->upper == 0) {
+ if (qn->is_refered != 0) { /* /(?<n>..){0}/ */
+ r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
+ if (r) return r;
+ r = compile_tree(qn->target, reg);
+ }
+ else
+ r = 0;
+ }
+ else if (qn->upper == 1 && qn->greedy) {
+ if (qn->lower == 0) {
+ if (CKN_ON) {
+ r = add_opcode(reg, OP_STATE_CHECK_PUSH);
+ if (r) return r;
+ r = add_state_check_num(reg, ckn);
+ if (r) return r;
+ r = add_rel_addr(reg, tlen);
+ }
+ else {
+ r = add_opcode_rel_addr(reg, OP_PUSH, tlen);
+ }
+ if (r) return r;
+ }
+
+ r = compile_tree(qn->target, reg);
+ }
+ else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
+ if (CKN_ON) {
+ r = add_opcode(reg, OP_STATE_CHECK_PUSH);
+ if (r) return r;
+ r = add_state_check_num(reg, ckn);
+ if (r) return r;
+ r = add_rel_addr(reg, SIZE_OP_JUMP);
+ }
+ else {
+ r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
+ }
+
+ if (r) return r;
+ r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
+ if (r) return r;
+ r = compile_tree(qn->target, reg);
+ }
+ else {
+ r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
+ if (CKN_ON) {
+ if (r) return r;
+ r = add_opcode(reg, OP_STATE_CHECK);
+ if (r) return r;
+ r = add_state_check_num(reg, ckn);
+ }
+ }
+ return r;
+}
+
+#else /* USE_COMBINATION_EXPLOSION_CHECK */
static int
compile_length_qualifier_node(QualifierNode* qn, regex_t* reg)
@@ -752,16 +1010,6 @@ compile_length_qualifier_node(QualifierNode* qn, regex_t* reg)
}
static int
-is_anychar_star_qualifier(QualifierNode* qn)
-{
- if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) &&
- NTYPE(qn->target) == N_ANYCHAR)
- return 1;
- else
- return 0;
-}
-
-static int
compile_qualifier_node(QualifierNode* qn, regex_t* reg)
{
int i, r, mod_tlen;
@@ -887,6 +1135,7 @@ compile_qualifier_node(QualifierNode* qn, regex_t* reg)
}
return r;
}
+#endif /* USE_COMBINATION_EXPLOSION_CHECK */
static int
compile_length_option_node(EffectNode* node, regex_t* reg)
@@ -1268,8 +1517,15 @@ compile_length_tree(Node* node, regex_t* reg)
{
BackrefNode* br = &(NBACKREF(node));
+#ifdef USE_BACKREF_AT_LEVEL
+ if (IS_BACKREF_NEST_LEVEL(br)) {
+ r = SIZE_OPCODE + SIZE_OPTION + SIZE_LENGTH +
+ SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
+ }
+ else
+#endif
if (br->back_num == 1) {
- r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 3)
+ r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 2)
? SIZE_OPCODE : (SIZE_OPCODE + SIZE_MEMNUM));
}
else {
@@ -1381,9 +1637,21 @@ compile_tree(Node* node, regex_t* reg)
case N_BACKREF:
{
- int i;
BackrefNode* br = &(NBACKREF(node));
+#ifdef USE_BACKREF_AT_LEVEL
+ if (IS_BACKREF_NEST_LEVEL(br)) {
+ r = add_opcode(reg, OP_BACKREF_AT_LEVEL);
+ if (r) return r;
+ r = add_option(reg, (reg->options & ONIG_OPTION_IGNORECASE));
+ if (r) return r;
+ r = add_length(reg, br->nest_level);
+ if (r) return r;
+
+ goto add_bacref_mems;
+ }
+ else
+#endif
if (br->back_num == 1) {
n = br->back_static[0];
if (IS_IGNORECASE(reg->options)) {
@@ -1395,7 +1663,6 @@ compile_tree(Node* node, regex_t* reg)
switch (n) {
case 1: r = add_opcode(reg, OP_BACKREF1); break;
case 2: r = add_opcode(reg, OP_BACKREF2); break;
- case 3: r = add_opcode(reg, OP_BACKREF3); break;
default:
r = add_opcode(reg, OP_BACKREFN);
if (r) return r;
@@ -1405,17 +1672,21 @@ compile_tree(Node* node, regex_t* reg)
}
}
else {
+ int i;
int* p;
if (IS_IGNORECASE(reg->options)) {
- add_opcode(reg, OP_BACKREF_MULTI_IC);
+ r = add_opcode(reg, OP_BACKREF_MULTI_IC);
}
else {
- add_opcode(reg, OP_BACKREF_MULTI);
+ r = add_opcode(reg, OP_BACKREF_MULTI);
}
-
if (r) return r;
- add_length(reg, br->back_num);
+
+#ifdef USE_BACKREF_AT_LEVEL
+ add_bacref_mems:
+#endif
+ r = add_length(reg, br->back_num);
if (r) return r;
p = BACKREFS_P(br);
for (i = br->back_num - 1; i >= 0; i--) {
@@ -2120,29 +2391,6 @@ get_char_length_tree(Node* node, regex_t* reg, int* len)
return get_char_length_tree1(node, reg, len, 0);
}
-extern int
-onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc)
-{
- int found;
-
- if (ONIGENC_MBC_MINLEN(enc) > 1 || (code >= SINGLE_BYTE_SIZE)) {
- if (IS_NULL(cc->mbuf)) {
- found = 0;
- }
- else {
- found = (onig_is_in_code_range(cc->mbuf->p, code) != 0 ? 1 : 0);
- }
- }
- else {
- found = (BITSET_AT(cc->bs, code) == 0 ? 0 : 1);
- }
-
- if (IS_CCLASS_NOT(cc))
- return !found;
- else
- return found;
-}
-
/* x is not included y ==> 1 : 0 */
static int
is_not_included(Node* x, Node* y, regex_t* reg)
@@ -2516,6 +2764,9 @@ subexp_inf_recursive_check(Node* node, ScanEnv* env, int head)
case N_QUALIFIER:
r = subexp_inf_recursive_check(NQUALIFIER(node).target, env, head);
+ if (r == RECURSION_EXIST) {
+ if (NQUALIFIER(node).lower == 0) r = 0;
+ }
break;
case N_ANCHOR:
@@ -2943,15 +3194,55 @@ next_setup(Node* node, Node* next_node, regex_t* reg)
return 0;
}
+
+static int
+divide_ambig_string_node_sub(regex_t* reg, int prev_ambig,
+ UChar* prev_start, UChar* prev,
+ UChar* end, Node*** tailp, Node** root)
+{
+ UChar *tmp, *wp;
+ Node* snode;
+
+ if (prev_ambig != 0) {
+ tmp = prev_start;
+ wp = prev_start;
+ while (tmp < prev) {
+ wp += ONIGENC_MBC_TO_NORMALIZE(reg->enc, reg->ambig_flag,
+ &tmp, end, wp);
+ }
+ snode = onig_node_new_str(prev_start, wp);
+ CHECK_NULL_RETURN_VAL(snode, ONIGERR_MEMORY);
+ NSTRING_SET_AMBIG(snode);
+ if (wp != prev) NSTRING_SET_AMBIG_REDUCE(snode);
+ }
+ else {
+ snode = onig_node_new_str(prev_start, prev);
+ CHECK_NULL_RETURN_VAL(snode, ONIGERR_MEMORY);
+ }
+
+ if (*tailp == (Node** )0) {
+ *root = onig_node_new_list(snode, NULL);
+ CHECK_NULL_RETURN_VAL(*root, ONIGERR_MEMORY);
+ *tailp = &(NCONS(*root).right);
+ }
+ else {
+ **tailp = onig_node_new_list(snode, NULL);
+ CHECK_NULL_RETURN_VAL(**tailp, ONIGERR_MEMORY);
+ *tailp = &(NCONS(**tailp).right);
+ }
+
+ return 0;
+}
+
static int
divide_ambig_string_node(Node* node, regex_t* reg)
{
StrNode* sn = &NSTRING(node);
int ambig, prev_ambig;
UChar *prev, *p, *end, *prev_start, *start, *tmp, *wp;
- Node *snode;
Node *root = NULL_NODE;
Node **tailp = (Node** )0;
+ int r;
start = prev_start = p = sn->s;
end = sn->end;
@@ -2964,33 +3255,9 @@ divide_ambig_string_node(Node* node, regex_t* reg)
if (prev_ambig != (ambig = ONIGENC_IS_MBC_AMBIGUOUS(reg->enc,
reg->ambig_flag, &p, end))) {
- if (prev_ambig != 0) {
- tmp = prev_start;
- wp = prev_start;
- while (tmp < prev) {
- wp += ONIGENC_MBC_TO_NORMALIZE(reg->enc, reg->ambig_flag,
- &tmp, end, wp);
- }
- snode = onig_node_new_str(prev_start, wp);
- CHECK_NULL_RETURN_VAL(snode, ONIGERR_MEMORY);
- NSTRING_SET_AMBIG(snode);
- if (wp != prev) NSTRING_SET_AMBIG_REDUCE(snode);
- }
- else {
- snode = onig_node_new_str(prev_start, prev);
- CHECK_NULL_RETURN_VAL(snode, ONIGERR_MEMORY);
- }
-
- if (tailp == (Node** )0) {
- root = onig_node_new_list(snode, NULL);
- CHECK_NULL_RETURN_VAL(root, ONIGERR_MEMORY);
- tailp = &(NCONS(root).right);
- }
- else {
- *tailp = onig_node_new_list(snode, NULL);
- CHECK_NULL_RETURN_VAL(*tailp, ONIGERR_MEMORY);
- tailp = &(NCONS(*tailp).right);
- }
+ r = divide_ambig_string_node_sub(reg, prev_ambig, prev_start, prev,
+ end, &tailp, &root);
+ if (r != 0) return r;
prev_ambig = ambig;
prev_start = prev;
@@ -3011,41 +3278,157 @@ divide_ambig_string_node(Node* node, regex_t* reg)
}
}
else {
- if (prev_ambig != 0) {
- tmp = prev_start;
- wp = prev_start;
- while (tmp < end) {
- wp += ONIGENC_MBC_TO_NORMALIZE(reg->enc, reg->ambig_flag,
- &tmp, end, wp);
- }
- snode = onig_node_new_str(prev_start, wp);
- CHECK_NULL_RETURN_VAL(snode, ONIGERR_MEMORY);
- NSTRING_SET_AMBIG(snode);
- if (wp != end) NSTRING_SET_AMBIG_REDUCE(snode);
+ r = divide_ambig_string_node_sub(reg, prev_ambig, prev_start, end,
+ end, &tailp, &root);
+ if (r != 0) return r;
+
+ swap_node(node, root);
+ onig_node_str_clear(root); /* should be after swap! */
+ onig_node_free(root); /* free original string node */
+ }
+
+ return 0;
+}
+
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+
+#define CEC_THRES_NUM_BIG_REPEAT 512
+#define CEC_INFINITE_NUM 0x7fffffff
+
+#define CEC_IN_INFINITE_REPEAT (1<<0)
+#define CEC_IN_FINITE_REPEAT (1<<1)
+#define CEC_CONT_BIG_REPEAT (1<<2)
+
+static int
+setup_comb_exp_check(Node* node, int state, ScanEnv* env)
+{
+ int type;
+ int r = state;
+
+ type = NTYPE(node);
+ switch (type) {
+ case N_LIST:
+ {
+ Node* prev = NULL_NODE;
+ do {
+ r = setup_comb_exp_check(NCONS(node).left, r, env);
+ prev = NCONS(node).left;
+ } while (r >= 0 && IS_NOT_NULL(node = NCONS(node).right));
}
- else {
- snode = onig_node_new_str(prev_start, end);
- CHECK_NULL_RETURN_VAL(snode, ONIGERR_MEMORY);
+ break;
+
+ case N_ALT:
+ {
+ int ret;
+ do {
+ ret = setup_comb_exp_check(NCONS(node).left, state, env);
+ r |= ret;
+ } while (ret >= 0 && IS_NOT_NULL(node = NCONS(node).right));
}
+ break;
+
+ case N_QUALIFIER:
+ {
+ int child_state = state;
+ int add_state = 0;
+ QualifierNode* qn = &(NQUALIFIER(node));
+ Node* target = qn->target;
+ int var_num;
+
+ if (! IS_REPEAT_INFINITE(qn->upper)) {
+ if (qn->upper > 1) {
+ /* {0,1}, {1,1} are allowed */
+ child_state |= CEC_IN_FINITE_REPEAT;
+
+ /* check (a*){n,m}, (a+){n,m} => (a*){n,n}, (a+){n,n} */
+ if (env->backrefed_mem == 0) {
+ if (NTYPE(qn->target) == N_EFFECT) {
+ EffectNode* en = &(NEFFECT(qn->target));
+ if (en->type == EFFECT_MEMORY) {
+ if (NTYPE(en->target) == N_QUALIFIER) {
+ QualifierNode* q = &(NQUALIFIER(en->target));
+ if (IS_REPEAT_INFINITE(q->upper)
+ && q->greedy == qn->greedy) {
+ qn->upper = (qn->lower == 0 ? 1 : qn->lower);
+ if (qn->upper == 1)
+ child_state = state;
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+
+ if (state & CEC_IN_FINITE_REPEAT) {
+ qn->comb_exp_check_num = -1;
+ }
+ else {
+ if (IS_REPEAT_INFINITE(qn->upper)) {
+ var_num = CEC_INFINITE_NUM;
+ child_state |= CEC_IN_INFINITE_REPEAT;
+ }
+ else {
+ var_num = qn->upper - qn->lower;
+ }
- if (tailp == (Node** )0) {
- root = onig_node_new_list(snode, NULL);
- CHECK_NULL_RETURN_VAL(root, ONIGERR_MEMORY);
- tailp = &(NCONS(node).right);
+ if (var_num >= CEC_THRES_NUM_BIG_REPEAT)
+ add_state |= CEC_CONT_BIG_REPEAT;
+
+ if (((state & CEC_IN_INFINITE_REPEAT) != 0 && var_num != 0) ||
+ ((state & CEC_CONT_BIG_REPEAT) != 0 &&
+ var_num >= CEC_THRES_NUM_BIG_REPEAT)) {
+ if (qn->comb_exp_check_num == 0) {
+ env->num_comb_exp_check++;
+ qn->comb_exp_check_num = env->num_comb_exp_check;
+ if (env->curr_max_regnum > env->comb_exp_max_regnum)
+ env->comb_exp_max_regnum = env->curr_max_regnum;
+ }
+ }
+ }
+
+ r = setup_comb_exp_check(target, child_state, env);
+ r |= add_state;
}
- else {
- *tailp = onig_node_new_list(snode, NULL);
- CHECK_NULL_RETURN_VAL(*tailp, ONIGERR_MEMORY);
- tailp = &(NCONS(*tailp).right);
+ break;
+
+ case N_EFFECT:
+ {
+ EffectNode* en = &(NEFFECT(node));
+
+ switch (en->type) {
+ case EFFECT_MEMORY:
+ {
+ if (env->curr_max_regnum < en->regnum)
+ env->curr_max_regnum = en->regnum;
+
+ r = setup_comb_exp_check(en->target, state, env);
+ }
+ break;
+
+ default:
+ r = setup_comb_exp_check(en->target, state, env);
+ break;
+ }
}
+ break;
- swap_node(node, root);
- onig_node_str_clear(root); /* should be after swap! */
- onig_node_free(root); /* free original string node */
+#ifdef USE_SUBEXP_CALL
+ case N_CALL:
+ if (IS_CALL_RECURSION(&(NCALL(node))))
+ env->has_recursion = 1;
+ else
+ r = setup_comb_exp_check(NCALL(node).target, state, env);
+ break;
+#endif
+
+ default:
+ break;
}
- return 0;
+ return r;
}
+#endif
#define IN_ALT (1<<0)
#define IN_NOT (1<<1)
@@ -3116,6 +3499,11 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
if (p[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
BIT_STATUS_ON_AT(env->backrefed_mem, p[i]);
BIT_STATUS_ON_AT(env->bt_mem_start, p[i]);
+#ifdef USE_BACKREF_AT_LEVEL
+ if (IS_BACKREF_NEST_LEVEL(br)) {
+ BIT_STATUS_ON_AT(env->bt_mem_end, p[i]);
+ }
+#endif
SET_EFFECT_STATUS(nodes[p[i]], NST_MEM_BACKREFED);
}
}
@@ -3263,11 +3651,9 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
#define ALLOWED_EFFECT_IN_LB_NOT 0
#define ALLOWED_ANCHOR_IN_LB \
-( ANCHOR_LOOK_BEHIND | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF )
+( ANCHOR_LOOK_BEHIND | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION )
#define ALLOWED_ANCHOR_IN_LB_NOT \
-( ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF )
- /* can't allow all anchors, because \G in look-behind through Search().
- ex. /(?<=\G)zz/.match("azz") => success. */
+( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION )
case ANCHOR_LOOK_BEHIND:
{
@@ -3383,7 +3769,7 @@ typedef struct {
static int
map_position_value(OnigEncoding enc, int i)
{
- static short int ByteValTable[] = {
+ static const short int ByteValTable[] = {
5, 1, 1, 1, 1, 1, 1, 1, 1, 10, 10, 1, 1, 10, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
12, 4, 7, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5,
@@ -3408,7 +3794,7 @@ static int
distance_value(MinMaxLen* mm)
{
/* 1000 / (min-max-dist + 1) */
- static short int dist_vals[] = {
+ static const short int dist_vals[] = {
1000, 500, 333, 250, 200, 167, 143, 125, 111, 100,
91, 83, 77, 71, 67, 63, 59, 56, 53, 50,
48, 45, 43, 42, 40, 38, 37, 36, 34, 33,
@@ -3604,9 +3990,10 @@ copy_opt_exact_info(OptExactInfo* to, OptExactInfo* from)
}
static void
-concat_opt_exact_info(OptExactInfo* to, OptExactInfo* add)
+concat_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OnigEncoding enc)
{
- int i, n;
+ int i, j, len;
+ UChar *p, *end;
OptAncInfo tanc;
if (! to->ignore_case && add->ignore_case) {
@@ -3615,11 +4002,17 @@ concat_opt_exact_info(OptExactInfo* to, OptExactInfo* add)
to->ignore_case = 1;
}
- for (i = to->len, n = 0; n < add->len && i < OPT_EXACT_MAXLEN; i++, n++)
- to->s[i] = add->s[n];
+ p = add->s;
+ end = p + add->len;
+ for (i = to->len; p < end; ) {
+ len = enc_len(enc, p);
+ if (i + len > OPT_EXACT_MAXLEN) break;
+ for (j = 0; j < len && p < end; j++)
+ to->s[i++] = *p++;
+ }
to->len = i;
- to->reach_end = (n == add->len ? add->reach_end : 0);
+ to->reach_end = (p == end ? add->reach_end : 0);
concat_opt_anc_info(&tanc, &to->anc, &add->anc, 1, 1);
if (! to->reach_end) tanc.right_anchor = 0;
@@ -3634,15 +4027,10 @@ concat_opt_exact_info_str(OptExactInfo* to,
UChar *p;
for (i = to->len, p = s; p < end && i < OPT_EXACT_MAXLEN; ) {
- if (raw) {
+ len = enc_len(enc, p);
+ if (i + len > OPT_EXACT_MAXLEN) break;
+ for (j = 0; j < len && p < end; j++)
to->s[i++] = *p++;
- }
- else {
- len = enc_len(enc, p);
- if (i + len > OPT_EXACT_MAXLEN) break;
- for (j = 0; j < len; j++)
- to->s[i++] = *p++;
- }
}
to->len = i;
@@ -3711,7 +4099,7 @@ select_opt_exact_info(OnigEncoding enc, OptExactInfo* now, OptExactInfo* alt)
static void
clear_opt_map_info(OptMapInfo* map)
{
- static OptMapInfo clean_info = {
+ static const OptMapInfo clean_info = {
{0, 0}, {0, 0}, 0,
{
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@@ -3758,8 +4146,8 @@ add_char_amb_opt_map_info(OptMapInfo* map, UChar* p, UChar* end,
int i, j, n, len;
UChar buf[ONIGENC_MBC_NORMALIZE_MAXLEN];
OnigCodePoint code, ccode;
- OnigCompAmbigCodes* ccs;
- OnigPairAmbigCodes* pccs;
+ const OnigCompAmbigCodes* ccs;
+ const OnigPairAmbigCodes* pccs;
OnigAmbigType amb;
add_char_opt_map_info(map, p[0], enc);
@@ -3907,11 +4295,11 @@ concat_left_node_opt_info(OnigEncoding enc, NodeOptInfo* to, NodeOptInfo* add)
if (add->exb.len > 0) {
if (exb_reach) {
- concat_opt_exact_info(&to->exb, &add->exb);
+ concat_opt_exact_info(&to->exb, &add->exb, enc);
clear_opt_exact_info(&add->exb);
}
else if (exm_reach) {
- concat_opt_exact_info(&to->exm, &add->exb);
+ concat_opt_exact_info(&to->exm, &add->exb, enc);
clear_opt_exact_info(&add->exb);
}
}
@@ -4197,8 +4585,8 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
if (qn->lower == 0 && IS_REPEAT_INFINITE(qn->upper)) {
if (env->mmd.max == 0 &&
NTYPE(qn->target) == N_ANYCHAR && qn->greedy) {
- if (IS_POSIXLINE(env->options))
- add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_PL);
+ if (IS_MULTILINE(env->options))
+ add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_ML);
else
add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR);
}
@@ -4210,7 +4598,7 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
if (nopt.exb.reach_end) {
for (i = 2; i < qn->lower &&
! is_full_opt_exact_info(&opt->exb); i++) {
- concat_opt_exact_info(&opt->exb, &nopt.exb);
+ concat_opt_exact_info(&opt->exb, &nopt.exb, env->enc);
}
if (i < qn->lower) {
opt->exb.reach_end = 0;
@@ -4316,10 +4704,7 @@ set_optimize_exact_info(regex_t* reg, OptExactInfo* e)
CHECK_NULL_RETURN_VAL(reg->exact, ONIGERR_MEMORY);
reg->exact_end = reg->exact + e->len;
- if (e->anc.left_anchor & ANCHOR_BEGIN_LINE)
- allow_reverse = 1;
- else
- allow_reverse =
+ allow_reverse =
ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg->enc, reg->exact, reg->exact_end);
if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
@@ -4391,7 +4776,7 @@ set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env)
if (r) return r;
reg->anchor = opt.anc.left_anchor & (ANCHOR_BEGIN_BUF |
- ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_PL);
+ ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML);
reg->anchor |= opt.anc.right_anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF);
@@ -4503,7 +4888,7 @@ print_anchor(FILE* f, int anchor)
q = 1;
fprintf(f, "anychar-star");
}
- if (anchor & ANCHOR_ANYCHAR_STAR_PL) {
+ if (anchor & ANCHOR_ANYCHAR_STAR_ML) {
if (q) fprintf(f, ", ");
fprintf(f, "anychar-star-pl");
}
@@ -4514,8 +4899,8 @@ print_anchor(FILE* f, int anchor)
static void
print_optimize_info(FILE* f, regex_t* reg)
{
- static char* on[] = { "NONE", "EXACT", "EXACT_BM", "EXACT_BM_NOT_REV",
- "EXACT_IC", "MAP" };
+ static const char* on[] = { "NONE", "EXACT", "EXACT_BM", "EXACT_BM_NOT_REV",
+ "EXACT_IC", "MAP" };
fprintf(f, "optimize: %s\n", on[reg->optimize]);
fprintf(f, " anchor: "); print_anchor(f, reg->anchor);
@@ -4624,7 +5009,6 @@ onig_chain_reduce(regex_t* reg)
{
regex_t *head, *prev;
- THREAD_ATOMIC_START;
prev = reg;
head = prev->chain;
if (IS_NOT_NULL(head)) {
@@ -4636,7 +5020,6 @@ onig_chain_reduce(regex_t* reg)
prev->chain = (regex_t* )NULL;
REGEX_TRANSFER(reg, head);
}
- THREAD_ATOMIC_END;
}
#if 0
@@ -4753,6 +5136,9 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
reg->num_null_check = 0;
reg->repeat_range_alloc = 0;
reg->repeat_range = (OnigRepeatRange* )NULL;
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+ reg->num_comb_exp_check = 0;
+#endif
r = onig_parse_make_tree(&root, pattern, pattern_end, reg, &scan_env);
if (r != 0) goto err;
@@ -4806,6 +5192,33 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
reg->bt_mem_end |= reg->capture_history;
}
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+ if (scan_env.backrefed_mem == 0
+#ifdef USE_SUBEXP_CALL
+ || scan_env.num_call == 0
+#endif
+ ) {
+ setup_comb_exp_check(root, 0, &scan_env);
+#ifdef USE_SUBEXP_CALL
+ if (scan_env.has_recursion != 0) {
+ scan_env.num_comb_exp_check = 0;
+ }
+ else
+#endif
+ if (scan_env.comb_exp_max_regnum > 0) {
+ int i;
+ for (i = 1; i <= scan_env.comb_exp_max_regnum; i++) {
+ if (BIT_STATUS_AT(scan_env.backrefed_mem, i) != 0) {
+ scan_env.num_comb_exp_check = 0;
+ break;
+ }
+ }
+ }
+ }
+
+ reg->num_comb_exp_check = scan_env.num_comb_exp_check;
+#endif
+
clear_optimize_info(reg);
#ifndef ONIG_DONT_OPTIMIZE
r = set_optimize_info_from_tree(root, reg, &scan_env);
@@ -4875,6 +5288,7 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
return r;
}
+#ifdef USE_RECOMPILE_API
extern int
onig_recompile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax,
@@ -4893,6 +5307,7 @@ onig_recompile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
}
return 0;
}
+#endif
static int onig_inited = 0;
@@ -4906,6 +5321,11 @@ onig_alloc_init(regex_t** reg, OnigOptionType option, OnigAmbigType ambig_flag,
if (ONIGENC_IS_UNDEF(enc))
return ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED;
+ if ((option & (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP))
+ == (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP)) {
+ return ONIGERR_INVALID_COMBINATION_OF_OPTIONS;
+ }
+
*reg = (regex_t* )xmalloc(sizeof(regex_t));
if (IS_NULL(*reg)) return ONIGERR_MEMORY;
(*reg)->state = ONIG_STATE_MODIFY;
@@ -4991,14 +5411,14 @@ onig_end()
onig_print_statistics(stderr);
#endif
-#ifdef USE_RECYCLE_NODE
- onig_free_node_list();
-#endif
-
#ifdef USE_SHARED_CCLASS_TABLE
onig_free_shared_cclass_table();
#endif
+#ifdef USE_RECYCLE_NODE
+ onig_free_node_list();
+#endif
+
onig_inited = 0;
THREAD_ATOMIC_END;
@@ -5008,6 +5428,16 @@ onig_end()
#ifdef ONIG_DEBUG
+/* arguments type */
+#define ARG_SPECIAL -1
+#define ARG_NON 0
+#define ARG_RELADDR 1
+#define ARG_ABSADDR 2
+#define ARG_LENGTH 3
+#define ARG_MEMNUM 4
+#define ARG_OPTION 5
+#define ARG_STATE_CHECK 6
+
OnigOpInfoType OnigOpInfo[] = {
{ OP_FINISH, "finish", ARG_NON },
{ OP_END, "end", ARG_NON },
@@ -5038,62 +5468,68 @@ OnigOpInfoType OnigOpInfo[] = {
{ OP_ANYCHAR_ML_STAR, "anychar-ml*", ARG_NON },
{ OP_ANYCHAR_STAR_PEEK_NEXT, "anychar*-peek-next", ARG_SPECIAL },
{ OP_ANYCHAR_ML_STAR_PEEK_NEXT, "anychar-ml*-peek-next", ARG_SPECIAL },
- { OP_WORD, "word", ARG_NON },
- { OP_NOT_WORD, "not-word", ARG_NON },
- { OP_WORD_SB, "word-sb", ARG_NON },
- { OP_WORD_MB, "word-mb", ARG_NON },
- { OP_WORD_BOUND, "word-bound", ARG_NON },
- { OP_NOT_WORD_BOUND, "not-word-bound", ARG_NON },
- { OP_WORD_BEGIN, "word-begin", ARG_NON },
- { OP_WORD_END, "word-end", ARG_NON },
- { OP_BEGIN_BUF, "begin-buf", ARG_NON },
- { OP_END_BUF, "end-buf", ARG_NON },
- { OP_BEGIN_LINE, "begin-line", ARG_NON },
- { OP_END_LINE, "end-line", ARG_NON },
- { OP_SEMI_END_BUF, "semi-end-buf", ARG_NON },
- { OP_BEGIN_POSITION, "begin-position", ARG_NON },
- { OP_BACKREF1, "backref1", ARG_NON },
- { OP_BACKREF2, "backref2", ARG_NON },
- { OP_BACKREF3, "backref3", ARG_NON },
- { OP_BACKREFN, "backrefn", ARG_MEMNUM },
- { OP_BACKREFN_IC, "backrefn-ic", ARG_SPECIAL },
- { OP_BACKREF_MULTI, "backref_multi", ARG_SPECIAL },
- { OP_BACKREF_MULTI_IC, "backref_multi-ic",ARG_SPECIAL },
- { OP_MEMORY_START_PUSH, "mem-start-push", ARG_MEMNUM },
- { OP_MEMORY_START, "mem-start", ARG_MEMNUM },
- { OP_MEMORY_END_PUSH, "mem-end-push", ARG_MEMNUM },
- { OP_MEMORY_END_PUSH_REC, "mem-end-push-rec", ARG_MEMNUM },
- { OP_MEMORY_END, "mem-end", ARG_MEMNUM },
- { OP_MEMORY_END_REC, "mem-end-rec", ARG_MEMNUM },
- { OP_SET_OPTION_PUSH, "set-option-push", ARG_OPTION },
- { OP_SET_OPTION, "set-option", ARG_OPTION },
- { OP_FAIL, "fail", ARG_NON },
- { OP_JUMP, "jump", ARG_RELADDR },
- { OP_PUSH, "push", ARG_RELADDR },
- { OP_POP, "pop", ARG_NON },
- { OP_PUSH_OR_JUMP_EXACT1, "push-or-jump-e1", ARG_SPECIAL },
- { OP_PUSH_IF_PEEK_NEXT, "push-if-peek-next", ARG_SPECIAL },
- { OP_REPEAT, "repeat", ARG_SPECIAL },
- { OP_REPEAT_NG, "repeat-ng", ARG_SPECIAL },
- { OP_REPEAT_INC, "repeat-inc", ARG_MEMNUM },
- { OP_REPEAT_INC_NG, "repeat-inc-ng", ARG_MEMNUM },
- { OP_REPEAT_INC_SG, "repeat-inc-sg", ARG_MEMNUM },
- { OP_REPEAT_INC_NG_SG, "repeat-inc-ng-sg", ARG_MEMNUM },
- { OP_NULL_CHECK_START, "null-check-start",ARG_MEMNUM },
- { OP_NULL_CHECK_END, "null-check-end", ARG_MEMNUM },
- { OP_NULL_CHECK_END_MEMST,"null-check-end-memst", ARG_MEMNUM },
- { OP_NULL_CHECK_END_MEMST_PUSH,"null-check-end-memst-push", ARG_MEMNUM },
- { OP_PUSH_POS, "push-pos", ARG_NON },
- { OP_POP_POS, "pop-pos", ARG_NON },
- { OP_PUSH_POS_NOT, "push-pos-not", ARG_RELADDR },
- { OP_FAIL_POS, "fail-pos", ARG_NON },
- { OP_PUSH_STOP_BT, "push-stop-bt", ARG_NON },
- { OP_POP_STOP_BT, "pop-stop-bt", ARG_NON },
- { OP_LOOK_BEHIND, "look-behind", ARG_SPECIAL },
+ { OP_WORD, "word", ARG_NON },
+ { OP_NOT_WORD, "not-word", ARG_NON },
+ { OP_WORD_SB, "word-sb", ARG_NON },
+ { OP_WORD_MB, "word-mb", ARG_NON },
+ { OP_WORD_BOUND, "word-bound", ARG_NON },
+ { OP_NOT_WORD_BOUND, "not-word-bound", ARG_NON },
+ { OP_WORD_BEGIN, "word-begin", ARG_NON },
+ { OP_WORD_END, "word-end", ARG_NON },
+ { OP_BEGIN_BUF, "begin-buf", ARG_NON },
+ { OP_END_BUF, "end-buf", ARG_NON },
+ { OP_BEGIN_LINE, "begin-line", ARG_NON },
+ { OP_END_LINE, "end-line", ARG_NON },
+ { OP_SEMI_END_BUF, "semi-end-buf", ARG_NON },
+ { OP_BEGIN_POSITION, "begin-position", ARG_NON },
+ { OP_BACKREF1, "backref1", ARG_NON },
+ { OP_BACKREF2, "backref2", ARG_NON },
+ { OP_BACKREFN, "backrefn", ARG_MEMNUM },
+ { OP_BACKREFN_IC, "backrefn-ic", ARG_SPECIAL },
+ { OP_BACKREF_MULTI, "backref_multi", ARG_SPECIAL },
+ { OP_BACKREF_MULTI_IC, "backref_multi-ic", ARG_SPECIAL },
+ { OP_BACKREF_AT_LEVEL, "backref_at_level", ARG_SPECIAL },
+ { OP_MEMORY_START_PUSH, "mem-start-push", ARG_MEMNUM },
+ { OP_MEMORY_START, "mem-start", ARG_MEMNUM },
+ { OP_MEMORY_END_PUSH, "mem-end-push", ARG_MEMNUM },
+ { OP_MEMORY_END_PUSH_REC, "mem-end-push-rec", ARG_MEMNUM },
+ { OP_MEMORY_END, "mem-end", ARG_MEMNUM },
+ { OP_MEMORY_END_REC, "mem-end-rec", ARG_MEMNUM },
+ { OP_SET_OPTION_PUSH, "set-option-push", ARG_OPTION },
+ { OP_SET_OPTION, "set-option", ARG_OPTION },
+ { OP_FAIL, "fail", ARG_NON },
+ { OP_JUMP, "jump", ARG_RELADDR },
+ { OP_PUSH, "push", ARG_RELADDR },
+ { OP_POP, "pop", ARG_NON },
+ { OP_PUSH_OR_JUMP_EXACT1, "push-or-jump-e1", ARG_SPECIAL },
+ { OP_PUSH_IF_PEEK_NEXT, "push-if-peek-next", ARG_SPECIAL },
+ { OP_REPEAT, "repeat", ARG_SPECIAL },
+ { OP_REPEAT_NG, "repeat-ng", ARG_SPECIAL },
+ { OP_REPEAT_INC, "repeat-inc", ARG_MEMNUM },
+ { OP_REPEAT_INC_NG, "repeat-inc-ng", ARG_MEMNUM },
+ { OP_REPEAT_INC_SG, "repeat-inc-sg", ARG_MEMNUM },
+ { OP_REPEAT_INC_NG_SG, "repeat-inc-ng-sg", ARG_MEMNUM },
+ { OP_NULL_CHECK_START, "null-check-start", ARG_MEMNUM },
+ { OP_NULL_CHECK_END, "null-check-end", ARG_MEMNUM },
+ { OP_NULL_CHECK_END_MEMST,"null-check-end-memst", ARG_MEMNUM },
+ { OP_NULL_CHECK_END_MEMST_PUSH,"null-check-end-memst-push", ARG_MEMNUM },
+ { OP_PUSH_POS, "push-pos", ARG_NON },
+ { OP_POP_POS, "pop-pos", ARG_NON },
+ { OP_PUSH_POS_NOT, "push-pos-not", ARG_RELADDR },
+ { OP_FAIL_POS, "fail-pos", ARG_NON },
+ { OP_PUSH_STOP_BT, "push-stop-bt", ARG_NON },
+ { OP_POP_STOP_BT, "pop-stop-bt", ARG_NON },
+ { OP_LOOK_BEHIND, "look-behind", ARG_SPECIAL },
{ OP_PUSH_LOOK_BEHIND_NOT, "push-look-behind-not", ARG_SPECIAL },
{ OP_FAIL_LOOK_BEHIND_NOT, "fail-look-behind-not", ARG_NON },
- { OP_CALL, "call", ARG_ABSADDR },
- { OP_RETURN, "return", ARG_NON },
+ { OP_CALL, "call", ARG_ABSADDR },
+ { OP_RETURN, "return", ARG_NON },
+ { OP_STATE_CHECK_PUSH, "state-check-push", ARG_SPECIAL },
+ { OP_STATE_CHECK_PUSH_OR_JUMP, "state-check-push-or-jump", ARG_SPECIAL },
+ { OP_STATE_CHECK, "state-check", ARG_STATE_CHECK },
+ { OP_STATE_CHECK_ANYCHAR_STAR, "state-check-anychar*", ARG_STATE_CHECK },
+ { OP_STATE_CHECK_ANYCHAR_ML_STAR,
+ "state-check-anychar-ml*", ARG_STATE_CHECK },
{ -1, "", ARG_NON }
};
@@ -5152,6 +5588,7 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp,
RelAddrType addr;
LengthType len;
MemNumType mem;
+ StateCheckNumType scn;
OnigCodePoint code;
UChar *q;
@@ -5186,6 +5623,12 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp,
fprintf(f, ":%d", option);
}
break;
+
+ case ARG_STATE_CHECK:
+ scn = *((StateCheckNumType* )bp);
+ bp += SIZE_STATE_CHECK_NUM;
+ fprintf(f, ":%d", scn);
+ break;
}
}
else {
@@ -5312,6 +5755,26 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp,
}
break;
+ case OP_BACKREF_AT_LEVEL:
+ {
+ OnigOptionType option;
+ LengthType level;
+
+ GET_OPTION_INC(option, bp);
+ fprintf(f, ":%d", option);
+ GET_LENGTH_INC(level, bp);
+ fprintf(f, ":%d", level);
+
+ fputs(" ", f);
+ GET_LENGTH_INC(len, bp);
+ for (i = 0; i < len; i++) {
+ GET_MEMNUM_INC(mem, bp);
+ if (i > 0) fputs(", ", f);
+ fprintf(f, "%d", mem);
+ }
+ }
+ break;
+
case OP_REPEAT:
case OP_REPEAT_NG:
{
@@ -5343,6 +5806,15 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp,
fprintf(f, ":%d:(%d)", len, addr);
break;
+ case OP_STATE_CHECK_PUSH:
+ case OP_STATE_CHECK_PUSH_OR_JUMP:
+ scn = *((StateCheckNumType* )bp);
+ bp += SIZE_STATE_CHECK_NUM;
+ addr = *((RelAddrType* )bp);
+ bp += SIZE_RELADDR;
+ fprintf(f, ":%d:(%d)", scn, addr);
+ break;
+
default:
fprintf(stderr, "onig_print_compiled_byte_code: undefined code %d\n",
*--bp);