summaryrefslogtreecommitdiff
path: root/ext/pcre/php_pcre.c
diff options
context:
space:
mode:
Diffstat (limited to 'ext/pcre/php_pcre.c')
-rw-r--r--ext/pcre/php_pcre.c169
1 files changed, 109 insertions, 60 deletions
diff --git a/ext/pcre/php_pcre.c b/ext/pcre/php_pcre.c
index e728c7354..19bd50bbc 100644
--- a/ext/pcre/php_pcre.c
+++ b/ext/pcre/php_pcre.c
@@ -2,7 +2,7 @@
+----------------------------------------------------------------------+
| PHP Version 5 |
+----------------------------------------------------------------------+
- | Copyright (c) 1997-2008 The PHP Group |
+ | Copyright (c) 1997-2009 The PHP Group |
+----------------------------------------------------------------------+
| This source file is subject to version 3.01 of the PHP license, |
| that is bundled with this package in the file LICENSE, and is |
@@ -16,7 +16,7 @@
+----------------------------------------------------------------------+
*/
-/* $Id: php_pcre.c,v 1.168.2.9.2.25 2008/02/20 22:08:18 felipe Exp $ */
+/* $Id: php_pcre.c,v 1.168.2.9.2.30 2009/01/13 19:23:31 andrei Exp $ */
#include "php.h"
#include "php_ini.h"
@@ -48,7 +48,8 @@ enum {
PHP_PCRE_INTERNAL_ERROR,
PHP_PCRE_BACKTRACK_LIMIT_ERROR,
PHP_PCRE_RECURSION_LIMIT_ERROR,
- PHP_PCRE_BAD_UTF8_ERROR
+ PHP_PCRE_BAD_UTF8_ERROR,
+ PHP_PCRE_BAD_UTF8_OFFSET_ERROR
};
@@ -72,6 +73,10 @@ static void pcre_handle_exec_error(int pcre_code TSRMLS_DC) /* {{{ */
preg_code = PHP_PCRE_BAD_UTF8_ERROR;
break;
+ case PCRE_ERROR_BADUTF8_OFFSET:
+ preg_code = PHP_PCRE_BAD_UTF8_OFFSET_ERROR;
+ break;
+
default:
preg_code = PHP_PCRE_INTERNAL_ERROR;
break;
@@ -145,6 +150,7 @@ static PHP_MINIT_FUNCTION(pcre)
REGISTER_LONG_CONSTANT("PREG_BACKTRACK_LIMIT_ERROR", PHP_PCRE_BACKTRACK_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("PREG_RECURSION_LIMIT_ERROR", PHP_PCRE_RECURSION_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_ERROR", PHP_PCRE_BAD_UTF8_ERROR, CONST_CS | CONST_PERSISTENT);
+ REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_OFFSET_ERROR", PHP_PCRE_BAD_UTF8_OFFSET_ERROR, CONST_CS | CONST_PERSISTENT);
REGISTER_STRING_CONSTANT("PCRE_VERSION", (char *)pcre_version(), CONST_CS | CONST_PERSISTENT);
return SUCCESS;
@@ -174,6 +180,50 @@ static int pcre_clean_cache(void *data, void *arg TSRMLS_DC)
}
/* }}} */
+/* {{{ static make_subpats_table */
+static char **make_subpats_table(int num_subpats, pcre_cache_entry *pce TSRMLS_DC)
+{
+ pcre_extra *extra = pce->extra;
+ int name_cnt = 0, name_size, ni = 0;
+ int rc;
+ char *name_table;
+ unsigned short name_idx;
+ char **subpat_names = (char **)ecalloc(num_subpats, sizeof(char *));
+
+ rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMECOUNT, &name_cnt);
+ if (rc < 0) {
+ php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
+ efree(subpat_names);
+ return NULL;
+ }
+ if (name_cnt > 0) {
+ int rc1, rc2;
+
+ rc1 = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMETABLE, &name_table);
+ rc2 = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMEENTRYSIZE, &name_size);
+ rc = rc2 ? rc2 : rc1;
+ if (rc < 0) {
+ php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
+ efree(subpat_names);
+ return NULL;
+ }
+
+ while (ni++ < name_cnt) {
+ name_idx = 0xff * name_table[0] + name_table[1];
+ subpat_names[name_idx] = name_table + 2;
+ if (is_numeric_string(subpat_names[name_idx], strlen(subpat_names[name_idx]), NULL, NULL, 0) > 0) {
+ php_error_docref(NULL TSRMLS_CC, E_WARNING, "Numeric named subpatterns are not allowed");
+ efree(subpat_names);
+ return NULL;
+ }
+ name_table += name_size;
+ }
+ }
+
+ return subpat_names;
+}
+/* }}} */
+
/* {{{ pcre_get_compiled_regex_cache
*/
PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(char *regex, int regex_len TSRMLS_DC)
@@ -484,7 +534,7 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
int g_notempty = 0; /* If the match should not be empty */
const char **stringlist; /* Holds list of subpatterns */
char *match; /* The current match */
- char **subpat_names = NULL;/* Array for named subpatterns */
+ char **subpat_names; /* Array for named subpatterns */
int i, rc;
int subpats_order; /* Order of subpattern matches */
int offset_capture; /* Capture match offsets: yes/no */
@@ -539,54 +589,19 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
}
num_subpats++;
size_offsets = num_subpats * 3;
- offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
/*
* Build a mapping from subpattern numbers to their names. We will always
* allocate the table, even though there may be no named subpatterns. This
* avoids somewhat more complicated logic in the inner loops.
*/
- subpat_names = (char **)safe_emalloc(num_subpats, sizeof(char *), 0);
- memset(subpat_names, 0, sizeof(char *) * num_subpats);
- {
- int name_cnt = 0, name_size, ni = 0;
- char *name_table;
- unsigned short name_idx;
-
- rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMECOUNT, &name_cnt);
- if (rc < 0) {
- php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
- efree(offsets);
- efree(subpat_names);
- RETURN_FALSE;
- }
- if (name_cnt > 0) {
- int rc1, rc2;
-
- rc1 = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMETABLE, &name_table);
- rc2 = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMEENTRYSIZE, &name_size);
- rc = rc2 ? rc2 : rc1;
- if (rc < 0) {
- php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
- efree(offsets);
- efree(subpat_names);
- RETURN_FALSE;
- }
-
- while (ni++ < name_cnt) {
- name_idx = 0xff * name_table[0] + name_table[1];
- subpat_names[name_idx] = name_table + 2;
- if (is_numeric_string(subpat_names[name_idx], strlen(subpat_names[name_idx]), NULL, NULL, 0) > 0) {
- php_error_docref(NULL TSRMLS_CC, E_WARNING, "Numeric named subpatterns are not allowed");
- efree(offsets);
- efree(subpat_names);
- RETURN_FALSE;
- }
- name_table += name_size;
- }
- }
+ subpat_names = make_subpats_table(num_subpats, pce TSRMLS_CC);
+ if (!subpat_names) {
+ RETURN_FALSE;
}
+ offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
+
/* Allocate match sets array and initialize the values. */
if (global && subpats_order == PREG_PATTERN_ORDER) {
match_sets = (zval **)safe_emalloc(num_subpats, sizeof(zval *), 0);
@@ -606,6 +621,9 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
count = pcre_exec(pce->re, extra, subject, subject_len, start_offset,
exoptions|g_notempty, offsets, size_offsets);
+ /* the string was already proved to be valid UTF-8 */
+ exoptions |= PCRE_NO_UTF8_CHECK;
+
/* Check for too many substrings condition. */
if (count == 0) {
php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Matched, but too many substrings");
@@ -794,7 +812,7 @@ static int preg_get_backref(char **str, int *backref)
/* {{{ preg_do_repl_func
*/
-static int preg_do_repl_func(zval *function, char *subject, int *offsets, int count, char **result TSRMLS_DC)
+static int preg_do_repl_func(zval *function, char *subject, int *offsets, char **subpat_names, int count, char **result TSRMLS_DC)
{
zval *retval_ptr; /* Function return value */
zval **args[1]; /* Argument to pass to function */
@@ -804,8 +822,12 @@ static int preg_do_repl_func(zval *function, char *subject, int *offsets, int co
MAKE_STD_ZVAL(subpats);
array_init(subpats);
- for (i = 0; i < count; i++)
+ for (i = 0; i < count; i++) {
+ if (subpat_names[i]) {
+ add_assoc_stringl(subpats, subpat_names[i], &subject[offsets[i<<1]] , offsets[(i<<1)+1] - offsets[i<<1], 1);
+ }
add_next_index_stringl(subpats, &subject[offsets[i<<1]], offsets[(i<<1)+1] - offsets[i<<1], 1);
+ }
args[0] = &subpats;
if (call_user_function_ex(EG(function_table), NULL, function, &retval_ptr, 1, args, 0, NULL TSRMLS_CC) == SUCCESS && retval_ptr) {
@@ -944,6 +966,8 @@ PHPAPI char *php_pcre_replace_impl(pcre_cache_entry *pce, char *subject, int sub
int exoptions = 0; /* Execution options */
int count = 0; /* Count of matched subpatterns */
int *offsets; /* Array of subpattern offsets */
+ char **subpat_names; /* Array for named subpatterns */
+ int num_subpats; /* Number of captured subpatterns */
int size_offsets; /* Size of the offsets array */
int new_len; /* Length of needed storage */
int alloc_len; /* Actual allocated length */
@@ -987,12 +1011,24 @@ PHPAPI char *php_pcre_replace_impl(pcre_cache_entry *pce, char *subject, int sub
}
/* Calculate the size of the offsets array, and allocate memory for it. */
- rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT, &size_offsets);
+ rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT, &num_subpats);
if (rc < 0) {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
return NULL;
}
- size_offsets = (size_offsets + 1) * 3;
+ num_subpats++;
+ size_offsets = num_subpats * 3;
+
+ /*
+ * Build a mapping from subpattern numbers to their names. We will always
+ * allocate the table, even though there may be no named subpatterns. This
+ * avoids somewhat more complicated logic in the inner loops.
+ */
+ subpat_names = make_subpats_table(num_subpats, pce TSRMLS_CC);
+ if (!subpat_names) {
+ return NULL;
+ }
+
offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
alloc_len = 2 * subject_len + 1;
@@ -1009,6 +1045,9 @@ PHPAPI char *php_pcre_replace_impl(pcre_cache_entry *pce, char *subject, int sub
count = pcre_exec(pce->re, extra, subject, subject_len, start_offset,
exoptions|g_notempty, offsets, size_offsets);
+ /* the string was already proved to be valid UTF-8 */
+ exoptions |= PCRE_NO_UTF8_CHECK;
+
/* Check for too many substrings condition. */
if (count == 0) {
php_error_docref(NULL TSRMLS_CC,E_NOTICE, "Matched, but too many substrings");
@@ -1033,8 +1072,7 @@ PHPAPI char *php_pcre_replace_impl(pcre_cache_entry *pce, char *subject, int sub
new_len += eval_result_len;
} else if (is_callable_replace) {
/* Use custom function to get replacement string and its length. */
- eval_result_len = preg_do_repl_func(replace_val, subject, offsets,
- count, &eval_result TSRMLS_CC);
+ eval_result_len = preg_do_repl_func(replace_val, subject, offsets, subpat_names, count, &eval_result TSRMLS_CC);
new_len += eval_result_len;
} else { /* do regular substitution */
walk = replace;
@@ -1149,8 +1187,9 @@ PHPAPI char *php_pcre_replace_impl(pcre_cache_entry *pce, char *subject, int sub
/* Advance to the next piece. */
start_offset = offsets[1];
}
-
+
efree(offsets);
+ efree(subpat_names);
return result;
}
@@ -1446,6 +1485,9 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, char *subject, int subjec
subject_len, start_offset,
exoptions|g_notempty, offsets, size_offsets);
+ /* the string was already proved to be valid UTF-8 */
+ exoptions |= PCRE_NO_UTF8_CHECK;
+
/* Check for too many substrings condition. */
if (count == 0) {
php_error_docref(NULL TSRMLS_CC,E_NOTICE, "Matched, but too many substrings");
@@ -1501,7 +1543,7 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, char *subject, int subjec
if (re_bump == NULL) {
int dummy;
- if ((re_bump = pcre_get_compiled_regex("/./u", &extra_bump, &dummy TSRMLS_CC)) == NULL) {
+ if ((re_bump = pcre_get_compiled_regex("/./us", &extra_bump, &dummy TSRMLS_CC)) == NULL) {
RETURN_FALSE;
}
}
@@ -1704,13 +1746,17 @@ PHPAPI void php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return
/* Go through the input array */
zend_hash_internal_pointer_reset(Z_ARRVAL_P(input));
- while(zend_hash_get_current_data(Z_ARRVAL_P(input), (void **)&entry) == SUCCESS) {
+ while (zend_hash_get_current_data(Z_ARRVAL_P(input), (void **)&entry) == SUCCESS) {
+ zval subject = **entry;
- convert_to_string_ex(entry);
+ if (Z_TYPE_PP(entry) != IS_STRING) {
+ zval_copy_ctor(&subject);
+ convert_to_string(&subject);
+ }
/* Perform the match */
- count = pcre_exec(pce->re, extra, Z_STRVAL_PP(entry),
- Z_STRLEN_PP(entry), 0,
+ count = pcre_exec(pce->re, extra, Z_STRVAL(subject),
+ Z_STRLEN(subject), 0,
0, offsets, size_offsets);
/* Check for too many substrings condition. */
@@ -1723,9 +1769,8 @@ PHPAPI void php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return
}
/* If the entry fits our requirements */
- if ((count > 0 && !invert) ||
- (count == PCRE_ERROR_NOMATCH && invert)) {
- (*entry)->refcount++;
+ if ((count > 0 && !invert) || (count == PCRE_ERROR_NOMATCH && invert)) {
+ ZVAL_ADDREF(*entry);
/* Add to return array */
switch (zend_hash_get_current_key(Z_ARRVAL_P(input), &string_key, &num_key, 0))
@@ -1741,7 +1786,11 @@ PHPAPI void php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return
break;
}
}
-
+
+ if (Z_TYPE_PP(entry) != IS_STRING) {
+ zval_dtor(&subject);
+ }
+
zend_hash_move_forward(Z_ARRVAL_P(input));
}
zend_hash_internal_pointer_reset(Z_ARRVAL_P(input));