diff options
Diffstat (limited to 'ext/pcre/php_pcre.c')
-rw-r--r-- | ext/pcre/php_pcre.c | 169 |
1 files changed, 109 insertions, 60 deletions
diff --git a/ext/pcre/php_pcre.c b/ext/pcre/php_pcre.c index e728c7354..19bd50bbc 100644 --- a/ext/pcre/php_pcre.c +++ b/ext/pcre/php_pcre.c @@ -2,7 +2,7 @@ +----------------------------------------------------------------------+ | PHP Version 5 | +----------------------------------------------------------------------+ - | Copyright (c) 1997-2008 The PHP Group | + | Copyright (c) 1997-2009 The PHP Group | +----------------------------------------------------------------------+ | This source file is subject to version 3.01 of the PHP license, | | that is bundled with this package in the file LICENSE, and is | @@ -16,7 +16,7 @@ +----------------------------------------------------------------------+ */ -/* $Id: php_pcre.c,v 1.168.2.9.2.25 2008/02/20 22:08:18 felipe Exp $ */ +/* $Id: php_pcre.c,v 1.168.2.9.2.30 2009/01/13 19:23:31 andrei Exp $ */ #include "php.h" #include "php_ini.h" @@ -48,7 +48,8 @@ enum { PHP_PCRE_INTERNAL_ERROR, PHP_PCRE_BACKTRACK_LIMIT_ERROR, PHP_PCRE_RECURSION_LIMIT_ERROR, - PHP_PCRE_BAD_UTF8_ERROR + PHP_PCRE_BAD_UTF8_ERROR, + PHP_PCRE_BAD_UTF8_OFFSET_ERROR }; @@ -72,6 +73,10 @@ static void pcre_handle_exec_error(int pcre_code TSRMLS_DC) /* {{{ */ preg_code = PHP_PCRE_BAD_UTF8_ERROR; break; + case PCRE_ERROR_BADUTF8_OFFSET: + preg_code = PHP_PCRE_BAD_UTF8_OFFSET_ERROR; + break; + default: preg_code = PHP_PCRE_INTERNAL_ERROR; break; @@ -145,6 +150,7 @@ static PHP_MINIT_FUNCTION(pcre) REGISTER_LONG_CONSTANT("PREG_BACKTRACK_LIMIT_ERROR", PHP_PCRE_BACKTRACK_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT); REGISTER_LONG_CONSTANT("PREG_RECURSION_LIMIT_ERROR", PHP_PCRE_RECURSION_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT); REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_ERROR", PHP_PCRE_BAD_UTF8_ERROR, CONST_CS | CONST_PERSISTENT); + REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_OFFSET_ERROR", PHP_PCRE_BAD_UTF8_OFFSET_ERROR, CONST_CS | CONST_PERSISTENT); REGISTER_STRING_CONSTANT("PCRE_VERSION", (char *)pcre_version(), CONST_CS | CONST_PERSISTENT); return SUCCESS; @@ -174,6 +180,50 @@ static int pcre_clean_cache(void *data, void *arg TSRMLS_DC) } /* }}} */ +/* {{{ static make_subpats_table */ +static char **make_subpats_table(int num_subpats, pcre_cache_entry *pce TSRMLS_DC) +{ + pcre_extra *extra = pce->extra; + int name_cnt = 0, name_size, ni = 0; + int rc; + char *name_table; + unsigned short name_idx; + char **subpat_names = (char **)ecalloc(num_subpats, sizeof(char *)); + + rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMECOUNT, &name_cnt); + if (rc < 0) { + php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc); + efree(subpat_names); + return NULL; + } + if (name_cnt > 0) { + int rc1, rc2; + + rc1 = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMETABLE, &name_table); + rc2 = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMEENTRYSIZE, &name_size); + rc = rc2 ? rc2 : rc1; + if (rc < 0) { + php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc); + efree(subpat_names); + return NULL; + } + + while (ni++ < name_cnt) { + name_idx = 0xff * name_table[0] + name_table[1]; + subpat_names[name_idx] = name_table + 2; + if (is_numeric_string(subpat_names[name_idx], strlen(subpat_names[name_idx]), NULL, NULL, 0) > 0) { + php_error_docref(NULL TSRMLS_CC, E_WARNING, "Numeric named subpatterns are not allowed"); + efree(subpat_names); + return NULL; + } + name_table += name_size; + } + } + + return subpat_names; +} +/* }}} */ + /* {{{ pcre_get_compiled_regex_cache */ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(char *regex, int regex_len TSRMLS_DC) @@ -484,7 +534,7 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec int g_notempty = 0; /* If the match should not be empty */ const char **stringlist; /* Holds list of subpatterns */ char *match; /* The current match */ - char **subpat_names = NULL;/* Array for named subpatterns */ + char **subpat_names; /* Array for named subpatterns */ int i, rc; int subpats_order; /* Order of subpattern matches */ int offset_capture; /* Capture match offsets: yes/no */ @@ -539,54 +589,19 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec } num_subpats++; size_offsets = num_subpats * 3; - offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0); /* * Build a mapping from subpattern numbers to their names. We will always * allocate the table, even though there may be no named subpatterns. This * avoids somewhat more complicated logic in the inner loops. */ - subpat_names = (char **)safe_emalloc(num_subpats, sizeof(char *), 0); - memset(subpat_names, 0, sizeof(char *) * num_subpats); - { - int name_cnt = 0, name_size, ni = 0; - char *name_table; - unsigned short name_idx; - - rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMECOUNT, &name_cnt); - if (rc < 0) { - php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc); - efree(offsets); - efree(subpat_names); - RETURN_FALSE; - } - if (name_cnt > 0) { - int rc1, rc2; - - rc1 = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMETABLE, &name_table); - rc2 = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMEENTRYSIZE, &name_size); - rc = rc2 ? rc2 : rc1; - if (rc < 0) { - php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc); - efree(offsets); - efree(subpat_names); - RETURN_FALSE; - } - - while (ni++ < name_cnt) { - name_idx = 0xff * name_table[0] + name_table[1]; - subpat_names[name_idx] = name_table + 2; - if (is_numeric_string(subpat_names[name_idx], strlen(subpat_names[name_idx]), NULL, NULL, 0) > 0) { - php_error_docref(NULL TSRMLS_CC, E_WARNING, "Numeric named subpatterns are not allowed"); - efree(offsets); - efree(subpat_names); - RETURN_FALSE; - } - name_table += name_size; - } - } + subpat_names = make_subpats_table(num_subpats, pce TSRMLS_CC); + if (!subpat_names) { + RETURN_FALSE; } + offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0); + /* Allocate match sets array and initialize the values. */ if (global && subpats_order == PREG_PATTERN_ORDER) { match_sets = (zval **)safe_emalloc(num_subpats, sizeof(zval *), 0); @@ -606,6 +621,9 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec count = pcre_exec(pce->re, extra, subject, subject_len, start_offset, exoptions|g_notempty, offsets, size_offsets); + /* the string was already proved to be valid UTF-8 */ + exoptions |= PCRE_NO_UTF8_CHECK; + /* Check for too many substrings condition. */ if (count == 0) { php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Matched, but too many substrings"); @@ -794,7 +812,7 @@ static int preg_get_backref(char **str, int *backref) /* {{{ preg_do_repl_func */ -static int preg_do_repl_func(zval *function, char *subject, int *offsets, int count, char **result TSRMLS_DC) +static int preg_do_repl_func(zval *function, char *subject, int *offsets, char **subpat_names, int count, char **result TSRMLS_DC) { zval *retval_ptr; /* Function return value */ zval **args[1]; /* Argument to pass to function */ @@ -804,8 +822,12 @@ static int preg_do_repl_func(zval *function, char *subject, int *offsets, int co MAKE_STD_ZVAL(subpats); array_init(subpats); - for (i = 0; i < count; i++) + for (i = 0; i < count; i++) { + if (subpat_names[i]) { + add_assoc_stringl(subpats, subpat_names[i], &subject[offsets[i<<1]] , offsets[(i<<1)+1] - offsets[i<<1], 1); + } add_next_index_stringl(subpats, &subject[offsets[i<<1]], offsets[(i<<1)+1] - offsets[i<<1], 1); + } args[0] = &subpats; if (call_user_function_ex(EG(function_table), NULL, function, &retval_ptr, 1, args, 0, NULL TSRMLS_CC) == SUCCESS && retval_ptr) { @@ -944,6 +966,8 @@ PHPAPI char *php_pcre_replace_impl(pcre_cache_entry *pce, char *subject, int sub int exoptions = 0; /* Execution options */ int count = 0; /* Count of matched subpatterns */ int *offsets; /* Array of subpattern offsets */ + char **subpat_names; /* Array for named subpatterns */ + int num_subpats; /* Number of captured subpatterns */ int size_offsets; /* Size of the offsets array */ int new_len; /* Length of needed storage */ int alloc_len; /* Actual allocated length */ @@ -987,12 +1011,24 @@ PHPAPI char *php_pcre_replace_impl(pcre_cache_entry *pce, char *subject, int sub } /* Calculate the size of the offsets array, and allocate memory for it. */ - rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT, &size_offsets); + rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT, &num_subpats); if (rc < 0) { php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc); return NULL; } - size_offsets = (size_offsets + 1) * 3; + num_subpats++; + size_offsets = num_subpats * 3; + + /* + * Build a mapping from subpattern numbers to their names. We will always + * allocate the table, even though there may be no named subpatterns. This + * avoids somewhat more complicated logic in the inner loops. + */ + subpat_names = make_subpats_table(num_subpats, pce TSRMLS_CC); + if (!subpat_names) { + return NULL; + } + offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0); alloc_len = 2 * subject_len + 1; @@ -1009,6 +1045,9 @@ PHPAPI char *php_pcre_replace_impl(pcre_cache_entry *pce, char *subject, int sub count = pcre_exec(pce->re, extra, subject, subject_len, start_offset, exoptions|g_notempty, offsets, size_offsets); + /* the string was already proved to be valid UTF-8 */ + exoptions |= PCRE_NO_UTF8_CHECK; + /* Check for too many substrings condition. */ if (count == 0) { php_error_docref(NULL TSRMLS_CC,E_NOTICE, "Matched, but too many substrings"); @@ -1033,8 +1072,7 @@ PHPAPI char *php_pcre_replace_impl(pcre_cache_entry *pce, char *subject, int sub new_len += eval_result_len; } else if (is_callable_replace) { /* Use custom function to get replacement string and its length. */ - eval_result_len = preg_do_repl_func(replace_val, subject, offsets, - count, &eval_result TSRMLS_CC); + eval_result_len = preg_do_repl_func(replace_val, subject, offsets, subpat_names, count, &eval_result TSRMLS_CC); new_len += eval_result_len; } else { /* do regular substitution */ walk = replace; @@ -1149,8 +1187,9 @@ PHPAPI char *php_pcre_replace_impl(pcre_cache_entry *pce, char *subject, int sub /* Advance to the next piece. */ start_offset = offsets[1]; } - + efree(offsets); + efree(subpat_names); return result; } @@ -1446,6 +1485,9 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, char *subject, int subjec subject_len, start_offset, exoptions|g_notempty, offsets, size_offsets); + /* the string was already proved to be valid UTF-8 */ + exoptions |= PCRE_NO_UTF8_CHECK; + /* Check for too many substrings condition. */ if (count == 0) { php_error_docref(NULL TSRMLS_CC,E_NOTICE, "Matched, but too many substrings"); @@ -1501,7 +1543,7 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, char *subject, int subjec if (re_bump == NULL) { int dummy; - if ((re_bump = pcre_get_compiled_regex("/./u", &extra_bump, &dummy TSRMLS_CC)) == NULL) { + if ((re_bump = pcre_get_compiled_regex("/./us", &extra_bump, &dummy TSRMLS_CC)) == NULL) { RETURN_FALSE; } } @@ -1704,13 +1746,17 @@ PHPAPI void php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return /* Go through the input array */ zend_hash_internal_pointer_reset(Z_ARRVAL_P(input)); - while(zend_hash_get_current_data(Z_ARRVAL_P(input), (void **)&entry) == SUCCESS) { + while (zend_hash_get_current_data(Z_ARRVAL_P(input), (void **)&entry) == SUCCESS) { + zval subject = **entry; - convert_to_string_ex(entry); + if (Z_TYPE_PP(entry) != IS_STRING) { + zval_copy_ctor(&subject); + convert_to_string(&subject); + } /* Perform the match */ - count = pcre_exec(pce->re, extra, Z_STRVAL_PP(entry), - Z_STRLEN_PP(entry), 0, + count = pcre_exec(pce->re, extra, Z_STRVAL(subject), + Z_STRLEN(subject), 0, 0, offsets, size_offsets); /* Check for too many substrings condition. */ @@ -1723,9 +1769,8 @@ PHPAPI void php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return } /* If the entry fits our requirements */ - if ((count > 0 && !invert) || - (count == PCRE_ERROR_NOMATCH && invert)) { - (*entry)->refcount++; + if ((count > 0 && !invert) || (count == PCRE_ERROR_NOMATCH && invert)) { + ZVAL_ADDREF(*entry); /* Add to return array */ switch (zend_hash_get_current_key(Z_ARRVAL_P(input), &string_key, &num_key, 0)) @@ -1741,7 +1786,11 @@ PHPAPI void php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return break; } } - + + if (Z_TYPE_PP(entry) != IS_STRING) { + zval_dtor(&subject); + } + zend_hash_move_forward(Z_ARRVAL_P(input)); } zend_hash_internal_pointer_reset(Z_ARRVAL_P(input)); |