summaryrefslogtreecommitdiff
path: root/ext/intl
diff options
context:
space:
mode:
Diffstat (limited to 'ext/intl')
-rw-r--r--ext/intl/doc/Tutorial.txt2
-rw-r--r--ext/intl/doc/grapheme_api.php4
-rw-r--r--ext/intl/doc/msgfmt_api.php4
-rw-r--r--ext/intl/grapheme/grapheme_string.c40
-rw-r--r--ext/intl/grapheme/grapheme_util.c388
-rw-r--r--ext/intl/grapheme/grapheme_util.h24
-rw-r--r--ext/intl/resourcebundle/resourcebundle.c2
-rw-r--r--ext/intl/resourcebundle/resourcebundle_class.c4
-rw-r--r--ext/intl/tests/badargs.phpt2
-rw-r--r--ext/intl/tests/bug61860.phpt18
-rw-r--r--ext/intl/tests/bug62759.phpt24
-rw-r--r--ext/intl/tests/transliterator_create_error.phpt6
-rw-r--r--ext/intl/tests/transliterator_transliterate_variant1.phpt8
13 files changed, 170 insertions, 356 deletions
diff --git a/ext/intl/doc/Tutorial.txt b/ext/intl/doc/Tutorial.txt
index 4a66dc184..3bb31b00e 100644
--- a/ext/intl/doc/Tutorial.txt
+++ b/ext/intl/doc/Tutorial.txt
@@ -7,7 +7,7 @@ Examle of locales format: 'en_US', 'ru_UA', 'ua_UA' (see http://demo.icu-project
2. Collator::getDisplayName( $obj_locale, $disp_locale ).
-Get name of the object for the desired Locale, in the desired langauge. Both arguments
+Get name of the object for the desired Locale, in the desired language. Both arguments
must be from getAvailableLocales method.
@param string $obj_locale Locale to get display name for.
diff --git a/ext/intl/doc/grapheme_api.php b/ext/intl/doc/grapheme_api.php
index 465453fd3..e22d165cb 100644
--- a/ext/intl/doc/grapheme_api.php
+++ b/ext/intl/doc/grapheme_api.php
@@ -98,7 +98,7 @@
* @param string $haystack The input string.
* @param string $needle The string to look for.
* @param [boolean] $before_needle If TRUE (the default is FALSE), grapheme_strstr() returns the part of the
- haystack before the first occurence of the needle.
+ haystack before the first occurrence of the needle.
* @return string Returns the portion of string, or FALSE if needle is not found.
*/
function grapheme_strstr($haystack, $needle, $before_needle = FALSE) {}
@@ -109,7 +109,7 @@
* @param string $haystack The input string.
* @param string $needle The string to look for.
* @param [boolean] $before_needle If TRUE (the default is FALSE), grapheme_strstr() returns the part of the
- haystack before the first occurence of the needle.
+ haystack before the first occurrence of the needle.
* @return string Returns the portion of string, or FALSE if needle is not found.
*/
function grapheme_stristr($haystack, $needle, $before_needle = FALSE) {}
diff --git a/ext/intl/doc/msgfmt_api.php b/ext/intl/doc/msgfmt_api.php
index e4d047b97..3df6f0de1 100644
--- a/ext/intl/doc/msgfmt_api.php
+++ b/ext/intl/doc/msgfmt_api.php
@@ -31,7 +31,7 @@ class MessageFormatter {
/**
* Format the message
* @param array $args arguments to insert into the pattern string
- * @return string the formatted string, or false if an error ocurred
+ * @return string the formatted string, or false if an error occurred
*/
public function format($args) {}
@@ -124,7 +124,7 @@ class MessageFormatter {
* Format the message
* @param MessageFormatter $fmt The message formatter
* @param array $args arguments to insert into the pattern string
- * @return string the formatted string, or false if an error ocurred
+ * @return string the formatted string, or false if an error occurred
*/
function msgfmt_format($fmt, $args) {}
diff --git a/ext/intl/grapheme/grapheme_string.c b/ext/intl/grapheme/grapheme_string.c
index 475bbe418..8a094e015 100644
--- a/ext/intl/grapheme/grapheme_string.c
+++ b/ext/intl/grapheme/grapheme_string.c
@@ -113,7 +113,7 @@ PHP_FUNCTION(grapheme_strpos)
unsigned char *found;
long loffset = 0;
int32_t offset = 0;
- int ret_pos, uchar_pos;
+ int ret_pos;
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|l", (char **)&haystack, &haystack_len, (char **)&needle, &needle_len, &loffset) == FAILURE) {
@@ -160,10 +160,10 @@ PHP_FUNCTION(grapheme_strpos)
}
/* do utf16 part of the strpos */
- ret_pos = grapheme_strpos_utf16(haystack, haystack_len, needle, needle_len, offset, &uchar_pos, 0 /* fIgnoreCase */ TSRMLS_CC );
+ ret_pos = grapheme_strpos_utf16(haystack, haystack_len, needle, needle_len, offset, NULL, 0 /* fIgnoreCase */, 0 /* last */ TSRMLS_CC );
if ( ret_pos >= 0 ) {
- RETURN_LONG(ret_pos + offset);
+ RETURN_LONG(ret_pos);
} else {
RETURN_FALSE;
}
@@ -180,7 +180,7 @@ PHP_FUNCTION(grapheme_stripos)
unsigned char *found;
long loffset = 0;
int32_t offset = 0;
- int ret_pos, uchar_pos;
+ int ret_pos;
int is_ascii;
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|l", (char **)&haystack, &haystack_len, (char **)&needle, &needle_len, &loffset) == FAILURE) {
@@ -235,10 +235,10 @@ PHP_FUNCTION(grapheme_stripos)
}
/* do utf16 part of the strpos */
- ret_pos = grapheme_strpos_utf16(haystack, haystack_len, needle, needle_len, offset, &uchar_pos, 1 /* fIgnoreCase */ TSRMLS_CC );
+ ret_pos = grapheme_strpos_utf16(haystack, haystack_len, needle, needle_len, offset, NULL, 1 /* fIgnoreCase */, 0 /*last */ TSRMLS_CC );
if ( ret_pos >= 0 ) {
- RETURN_LONG(ret_pos + offset);
+ RETURN_LONG(ret_pos);
} else {
RETURN_FALSE;
}
@@ -304,7 +304,7 @@ PHP_FUNCTION(grapheme_strrpos)
/* else we need to continue via utf16 */
}
- ret_pos = grapheme_strrpos_utf16(haystack, haystack_len, needle, needle_len, offset, 0 /* f_ignore_case */ TSRMLS_CC);
+ ret_pos = grapheme_strpos_utf16(haystack, haystack_len, needle, needle_len, offset, NULL, 0 /* f_ignore_case */, 1/* last */ TSRMLS_CC);
if ( ret_pos >= 0 ) {
RETURN_LONG(ret_pos);
@@ -382,7 +382,7 @@ PHP_FUNCTION(grapheme_strripos)
/* else we need to continue via utf16 */
}
- ret_pos = grapheme_strrpos_utf16(haystack, haystack_len, needle, needle_len, offset, 1 /* f_ignore_case */ TSRMLS_CC);
+ ret_pos = grapheme_strpos_utf16(haystack, haystack_len, needle, needle_len, offset, NULL, 1 /* f_ignore_case */, 1 /*last */ TSRMLS_CC);
if ( ret_pos >= 0 ) {
RETURN_LONG(ret_pos);
@@ -434,6 +434,7 @@ PHP_FUNCTION(grapheme_substr)
grapheme_substr_ascii((char *)str, str_len, start, length, ZEND_NUM_ARGS(), (char **) &sub_str, &sub_str_len);
if ( NULL == sub_str ) {
+ intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_substr: invalid parameters", 1 TSRMLS_CC );
RETURN_FALSE;
}
@@ -530,6 +531,15 @@ PHP_FUNCTION(grapheme_substr)
RETURN_STRINGL(((char *)sub_str), sub_str_len, 0);
}
+ if(length == 0) {
+ /* empty length - we've validated start, we can return "" now */
+ if (ustr) {
+ efree(ustr);
+ }
+ ubrk_close(bi);
+ RETURN_EMPTY_STRING();
+ }
+
/* find the end point of the string to return */
if ( length < 0 ) {
@@ -554,25 +564,31 @@ PHP_FUNCTION(grapheme_substr)
length += iter_val;
}
+ ubrk_close(bi);
+
if ( UBRK_DONE == sub_str_end_pos) {
if(length < 0) {
-
intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_substr: length not contained in string", 1 TSRMLS_CC );
efree(ustr);
- ubrk_close(bi);
RETURN_FALSE;
} else {
sub_str_end_pos = ustr_len;
}
}
+
+ if(sub_str_start_pos > sub_str_end_pos) {
+ intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_substr: length is beyond start", 1 TSRMLS_CC );
+
+ efree(ustr);
+ RETURN_FALSE;
+ }
sub_str = NULL;
status = U_ZERO_ERROR;
intl_convert_utf16_to_utf8((char **)&sub_str, &sub_str_len, ustr + sub_str_start_pos, ( sub_str_end_pos - sub_str_start_pos ), &status);
efree( ustr );
- ubrk_close( bi );
if ( U_FAILURE( status ) ) {
/* Set global error code. */
@@ -643,7 +659,7 @@ static void strstr_common_handler(INTERNAL_FUNCTION_PARAMETERS, int f_ignore_cas
}
/* need to work in utf16 */
- ret_pos = grapheme_strpos_utf16(haystack, haystack_len, needle, needle_len, 0, &uchar_pos, f_ignore_case TSRMLS_CC );
+ ret_pos = grapheme_strpos_utf16(haystack, haystack_len, needle, needle_len, 0, &uchar_pos, f_ignore_case, 0 /*last */ TSRMLS_CC );
if ( ret_pos < 0 ) {
RETURN_FALSE;
diff --git a/ext/intl/grapheme/grapheme_util.c b/ext/intl/grapheme/grapheme_util.c
index 92008554d..c752b0237 100644
--- a/ext/intl/grapheme/grapheme_util.c
+++ b/ext/intl/grapheme/grapheme_util.c
@@ -28,6 +28,7 @@
#include <unicode/ucol.h>
#include <unicode/ustring.h>
#include <unicode/ubrk.h>
+#include <unicode/usearch.h>
#include "ext/standard/php_string.h"
@@ -47,49 +48,8 @@ grapheme_close_global_iterator( TSRMLS_D )
}
/* }}} */
-/* {{{ grapheme_intl_case_fold: convert string to lowercase */
-void
-grapheme_intl_case_fold(UChar** ptr_to_free, UChar **str, int32_t *str_len, UErrorCode *pstatus )
-{
- UChar *dest;
- int32_t dest_len, size_required;
-
- /* allocate a destination string that is a bit larger than the src, hoping that is enough */
- dest_len = (*str_len) + ( *str_len / 10 );
- dest = (UChar*) eumalloc(dest_len);
-
- *pstatus = U_ZERO_ERROR;
- size_required = u_strFoldCase(dest, dest_len, *str, *str_len, U_FOLD_CASE_DEFAULT, pstatus);
-
- dest_len = size_required;
-
- if ( U_BUFFER_OVERFLOW_ERROR == *pstatus ) {
-
- dest = (UChar*) eurealloc(dest, dest_len);
-
- *pstatus = U_ZERO_ERROR;
- size_required = u_strFoldCase(dest, dest_len, *str, *str_len, U_FOLD_CASE_DEFAULT, pstatus);
- }
-
- if ( U_FAILURE(*pstatus) ) {
- return;
- }
-
- if ( NULL != ptr_to_free) {
- efree(*ptr_to_free);
- *ptr_to_free = dest;
- }
-
- *str = dest;
- *str_len = dest_len;
-
- return;
-}
-/* }}} */
-
/* {{{ grapheme_substr_ascii f='from' - starting point, l='length' */
-void
-grapheme_substr_ascii(char *str, int str_len, int f, int l, int argc, char **sub_str, int *sub_str_len)
+void grapheme_substr_ascii(char *str, int str_len, int f, int l, int argc, char **sub_str, int *sub_str_len)
{
*sub_str = NULL;
@@ -147,225 +107,101 @@ grapheme_substr_ascii(char *str, int str_len, int f, int l, int argc, char **sub
}
/* }}} */
-/* {{{ grapheme_strrpos_utf16 - strrpos using utf16 */
-int
-grapheme_strrpos_utf16(unsigned char *haystack, int32_t haystack_len, unsigned char*needle, int32_t needle_len, int32_t offset, int f_ignore_case TSRMLS_DC)
-{
- UChar *uhaystack, *puhaystack, *uhaystack_end, *uneedle;
- int32_t uhaystack_len, uneedle_len;
- UErrorCode status;
- unsigned char u_break_iterator_buffer[U_BRK_SAFECLONE_BUFFERSIZE];
- UBreakIterator* bi = NULL;
- int ret_pos, pos;
-
- /* convert the strings to UTF-16. */
- uhaystack = NULL;
- uhaystack_len = 0;
- status = U_ZERO_ERROR;
- intl_convert_utf8_to_utf16(&uhaystack, &uhaystack_len, (char *) haystack, haystack_len, &status );
-
- if ( U_FAILURE( status ) ) {
- /* Set global error code. */
- intl_error_set_code( NULL, status TSRMLS_CC );
-
- /* Set error messages. */
- intl_error_set_custom_msg( NULL, "Error converting input string to UTF-16", 0 TSRMLS_CC );
- if (uhaystack) {
- efree( uhaystack );
- }
- return -1;
- }
-
- if ( f_ignore_case ) {
- grapheme_intl_case_fold(&uhaystack, &uhaystack, &uhaystack_len, &status );
- }
-
- /* get a pointer to the haystack taking into account the offset */
- bi = NULL;
- status = U_ZERO_ERROR;
- bi = grapheme_get_break_iterator(u_break_iterator_buffer, &status TSRMLS_CC );
-
- puhaystack = grapheme_get_haystack_offset(bi, uhaystack, uhaystack_len, offset);
-
- if ( NULL == puhaystack ) {
- intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_strpos: Offset not contained in string", 1 TSRMLS_CC );
- if (uhaystack) {
- efree( uhaystack );
- }
- ubrk_close (bi);
- return -1;
- }
-
- uneedle = NULL;
- uneedle_len = 0;
- status = U_ZERO_ERROR;
- intl_convert_utf8_to_utf16(&uneedle, &uneedle_len, (char *) needle, needle_len, &status );
-
- if ( U_FAILURE( status ) ) {
- /* Set global error code. */
- intl_error_set_code( NULL, status TSRMLS_CC );
-
- /* Set error messages. */
- intl_error_set_custom_msg( NULL, "Error converting input string to UTF-16", 0 TSRMLS_CC );
- if (uhaystack) {
- efree( uhaystack );
- }
- if (uneedle) {
- efree( uneedle );
- }
- ubrk_close (bi);
- return -1;
- }
-
- if ( f_ignore_case ) {
- grapheme_intl_case_fold(&uneedle, &uneedle, &uneedle_len, &status );
- }
-
- ret_pos = -1; /* -1 represents 'not found' */
-
- /* back up until there's needle_len characters to compare */
-
- uhaystack_end = uhaystack + uhaystack_len;
- pos = ubrk_last(bi);
- puhaystack = uhaystack + pos;
-
- while ( uhaystack_end - puhaystack < uneedle_len ) {
-
- pos = ubrk_previous(bi);
-
- if ( UBRK_DONE == pos ) {
- break;
- }
-
- puhaystack = uhaystack + pos;
- }
-
- /* is there enough haystack left to hold the needle? */
- if ( ( uhaystack_end - puhaystack ) < uneedle_len ) {
- /* not enough, not found */
- goto exit;
- }
-
- while ( UBRK_DONE != pos ) {
-
- if (!u_memcmp(uneedle, puhaystack, uneedle_len)) { /* needle_len - 1 in zend memnstr? */
-
- /* does the grapheme in the haystack end at the same place as the last grapheme in the needle? */
-
- if ( ubrk_isBoundary(bi, pos + uneedle_len) ) {
-
- /* found it, get grapheme count offset */
- ret_pos = grapheme_count_graphemes(bi, uhaystack, pos);
- break;
- }
-
- /* set position back */
- ubrk_isBoundary(bi, pos);
- }
-
- pos = ubrk_previous(bi);
- puhaystack = uhaystack + pos;
- }
-
-exit:
- if (uhaystack) {
- efree( uhaystack );
- }
- if (uneedle) {
- efree( uneedle );
+#define STRPOS_CHECK_STATUS(status, error) \
+ if ( U_FAILURE( (status) ) ) { \
+ intl_error_set_code( NULL, (status) TSRMLS_CC ); \
+ intl_error_set_custom_msg( NULL, (error), 0 TSRMLS_CC ); \
+ if (uhaystack) { \
+ efree( uhaystack ); \
+ } \
+ if (uneedle) { \
+ efree( uneedle ); \
+ } \
+ if(bi) { \
+ ubrk_close (bi); \
+ } \
+ if(src) { \
+ usearch_close(src); \
+ } \
+ return -1; \
}
- ubrk_close (bi);
-
- return ret_pos;
-}
-/* }}} */
/* {{{ grapheme_strpos_utf16 - strrpos using utf16*/
-int
-grapheme_strpos_utf16(unsigned char *haystack, int32_t haystack_len, unsigned char*needle, int32_t needle_len, int32_t offset, int32_t *puchar_pos, int f_ignore_case TSRMLS_DC)
+int grapheme_strpos_utf16(unsigned char *haystack, int32_t haystack_len, unsigned char*needle, int32_t needle_len, int32_t offset, int32_t *puchar_pos, int f_ignore_case, int last TSRMLS_DC)
{
- UChar *uhaystack, *puhaystack, *uneedle;
- int32_t uhaystack_len, uneedle_len;
- int ret_pos;
+ UChar *uhaystack = NULL, *uneedle = NULL;
+ int32_t uhaystack_len = 0, uneedle_len = 0, char_pos, ret_pos, offset_pos = 0;
unsigned char u_break_iterator_buffer[U_BRK_SAFECLONE_BUFFERSIZE];
- UBreakIterator* bi;
+ UBreakIterator* bi = NULL;
UErrorCode status;
+ UStringSearch* src = NULL;
+ UCollator *coll;
- *puchar_pos = -1;
-
+ if(puchar_pos) {
+ *puchar_pos = -1;
+ }
/* convert the strings to UTF-16. */
- uhaystack = NULL;
- uhaystack_len = 0;
status = U_ZERO_ERROR;
intl_convert_utf8_to_utf16(&uhaystack, &uhaystack_len, (char *) haystack, haystack_len, &status );
+ STRPOS_CHECK_STATUS(status, "Error converting input string to UTF-16");
- if ( U_FAILURE( status ) ) {
- /* Set global error code. */
- intl_error_set_code( NULL, status TSRMLS_CC );
-
- /* Set error messages. */
- intl_error_set_custom_msg( NULL, "Error converting input string to UTF-16", 0 TSRMLS_CC );
- if (uhaystack) {
- efree( uhaystack );
- }
- return -1;
- }
+ status = U_ZERO_ERROR;
+ intl_convert_utf8_to_utf16(&uneedle, &uneedle_len, (char *) needle, needle_len, &status );
+ STRPOS_CHECK_STATUS(status, "Error converting input string to UTF-16");
/* get a pointer to the haystack taking into account the offset */
- bi = NULL;
status = U_ZERO_ERROR;
bi = grapheme_get_break_iterator(u_break_iterator_buffer, &status TSRMLS_CC );
-
- puhaystack = grapheme_get_haystack_offset(bi, uhaystack, uhaystack_len, offset);
- uhaystack_len = (uhaystack_len - ( puhaystack - uhaystack));
+ STRPOS_CHECK_STATUS(status, "Failed to get iterator");
+ status = U_ZERO_ERROR;
+ ubrk_setText(bi, uhaystack, uhaystack_len, &status);
+ STRPOS_CHECK_STATUS(status, "Failed to set up iterator");
- if ( NULL == puhaystack ) {
-
- intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_strpos: Offset not contained in string", 1 TSRMLS_CC );
- if (uhaystack) {
- efree( uhaystack );
- }
- ubrk_close (bi);
-
- return -1;
- }
+ status = U_ZERO_ERROR;
+ src = usearch_open(uneedle, uneedle_len, uhaystack, uhaystack_len, "", bi, &status);
+ STRPOS_CHECK_STATUS(status, "Error creating search object");
- if ( f_ignore_case ) {
- grapheme_intl_case_fold(&uhaystack, &puhaystack, &uhaystack_len, &status );
+ if(f_ignore_case) {
+ coll = usearch_getCollator(src);
+ status = U_ZERO_ERROR;
+ ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_SECONDARY, &status);
+ STRPOS_CHECK_STATUS(status, "Error setting collation strength");
+ usearch_reset(src);
}
- uneedle = NULL;
- uneedle_len = 0;
- status = U_ZERO_ERROR;
- intl_convert_utf8_to_utf16(&uneedle, &uneedle_len, (char *) needle, needle_len, &status );
+ if(offset != 0) {
+ offset_pos = grapheme_get_haystack_offset(bi, offset);
+ if(offset_pos == -1) {
+ status = U_ILLEGAL_ARGUMENT_ERROR;
+ STRPOS_CHECK_STATUS(status, "Invalid search offset");
+ }
+ status = U_ZERO_ERROR;
+ usearch_setOffset(src, offset_pos, &status);
+ STRPOS_CHECK_STATUS(status, "Invalid search offset");
+ }
- if ( U_FAILURE( status ) ) {
- /* Set global error code. */
- intl_error_set_code( NULL, status TSRMLS_CC );
- /* Set error messages. */
- intl_error_set_custom_msg( NULL, "Error converting input string to UTF-16", 0 TSRMLS_CC );
- if (uhaystack) {
- efree( uhaystack );
+ if(last) {
+ char_pos = usearch_last(src, &status);
+ if(char_pos < offset_pos) {
+ /* last one is beyound our start offset */
+ char_pos = USEARCH_DONE;
}
- if (uneedle) {
- efree( uneedle );
- }
- ubrk_close (bi);
-
- return -1;
+ } else {
+ char_pos = usearch_next(src, &status);
}
-
- if ( f_ignore_case ) {
- grapheme_intl_case_fold(&uneedle, &uneedle, &uneedle_len, &status );
+ STRPOS_CHECK_STATUS(status, "Error looking up string");
+ if(char_pos != USEARCH_DONE && ubrk_isBoundary(bi, char_pos)) {
+ ret_pos = grapheme_count_graphemes(bi, uhaystack,char_pos);
+ if(puchar_pos) {
+ *puchar_pos = char_pos;
+ }
+ } else {
+ ret_pos = -1;
}
- ret_pos = grapheme_memnstr_grapheme(bi, puhaystack, uneedle, uneedle_len, puhaystack + uhaystack_len );
-
- *puchar_pos = ubrk_current(bi);
-
if (uhaystack) {
efree( uhaystack );
}
@@ -373,6 +209,7 @@ grapheme_strpos_utf16(unsigned char *haystack, int32_t haystack_len, unsigned ch
efree( uneedle );
}
ubrk_close (bi);
+ usearch_close (src);
return ret_pos;
}
@@ -432,8 +269,7 @@ int grapheme_split_string(const UChar *text, int32_t text_length, int boundary_a
/* }}} */
/* {{{ grapheme_count_graphemes */
-int32_t
-grapheme_count_graphemes(UBreakIterator *bi, UChar *string, int32_t string_len)
+int32_t grapheme_count_graphemes(UBreakIterator *bi, UChar *string, int32_t string_len)
{
int ret_len = 0;
int pos = 0;
@@ -455,85 +291,16 @@ grapheme_count_graphemes(UBreakIterator *bi, UChar *string, int32_t string_len)
}
/* }}} */
-/* {{{ grapheme_memnstr_grapheme: find needle in haystack using grapheme boundaries */
-int32_t
-grapheme_memnstr_grapheme(UBreakIterator *bi, UChar *haystack, UChar *needle, int32_t needle_len, UChar *end)
-{
- UChar *p = haystack;
- UChar ne = needle[needle_len-1];
- UErrorCode status;
- int32_t grapheme_offset;
-
- end -= needle_len;
-
- while (p <= end) {
-
- if ((p = u_memchr(p, *needle, (end-p+1))) && ne == p[needle_len-1]) {
-
- if (!u_memcmp(needle, p, needle_len - 1)) { /* needle_len - 1 works because if needle_len is 1, we've already tested the char */
-
- /* does the grapheme end here? */
-
- status = U_ZERO_ERROR;
- ubrk_setText (bi, haystack, (end - haystack) + needle_len, &status);
-
- if ( ubrk_isBoundary (bi, (p - haystack) + needle_len) ) {
-
- /* found it, get grapheme count offset */
- grapheme_offset = grapheme_count_graphemes(bi, haystack, (p - haystack));
-
- return grapheme_offset;
- }
- }
- }
-
- if (p == NULL) {
- return -1;
- }
-
- p++;
- }
-
- return -1;
-}
-
-/* }}} */
-
-/* {{{ grapheme_memrstr_grapheme: reverse find needle in haystack using grapheme boundaries */
-inline void *grapheme_memrchr_grapheme(const void *s, int c, int32_t n)
-{
- register unsigned char *e;
-
- if (n <= 0) {
- return NULL;
- }
-
- for (e = (unsigned char *)s + n - 1; e >= (unsigned char *)s; e--) {
- if (*e == (unsigned char)c) {
- return (void *)e;
- }
- }
-
- return NULL;
-}
-/* }}} */
/* {{{ grapheme_get_haystack_offset - bump the haystack pointer based on the grapheme count offset */
-UChar *
-grapheme_get_haystack_offset(UBreakIterator* bi, UChar *uhaystack, int32_t uhaystack_len, int32_t offset)
+int grapheme_get_haystack_offset(UBreakIterator* bi, int32_t offset)
{
- UErrorCode status;
int32_t pos;
int32_t (*iter_op)(UBreakIterator* bi);
int iter_incr;
- if ( NULL != bi ) {
- status = U_ZERO_ERROR;
- ubrk_setText (bi, uhaystack, uhaystack_len, &status);
- }
-
if ( 0 == offset ) {
- return uhaystack;
+ return 0;
}
if ( offset < 0 ) {
@@ -558,10 +325,10 @@ grapheme_get_haystack_offset(UBreakIterator* bi, UChar *uhaystack, int32_t uhays
}
if ( offset != 0 ) {
- return NULL;
+ return -1;
}
- return uhaystack + pos;
+ return pos;
}
/* }}} */
@@ -607,8 +374,7 @@ grapheme_strrpos_ascii(unsigned char *haystack, int32_t haystack_len, unsigned c
/* }}} */
/* {{{ grapheme_get_break_iterator: get a clone of the global character break iterator */
-UBreakIterator*
-grapheme_get_break_iterator(void *stack_buffer, UErrorCode *status TSRMLS_DC )
+UBreakIterator* grapheme_get_break_iterator(void *stack_buffer, UErrorCode *status TSRMLS_DC )
{
int32_t buffer_size;
diff --git a/ext/intl/grapheme/grapheme_util.h b/ext/intl/grapheme/grapheme_util.h
index c91aeaff7..14f3f22c4 100644
--- a/ext/intl/grapheme/grapheme_util.h
+++ b/ext/intl/grapheme/grapheme_util.h
@@ -23,35 +23,25 @@
/* get_break_interator: get a break iterator from the global structure */
UBreakIterator* grapheme_get_break_iterator(void *stack_buffer, UErrorCode *status TSRMLS_DC );
-void
-grapheme_substr_ascii(char *str, int32_t str_len, int32_t f, int32_t l, int argc, char **sub_str, int *sub_str_len);
+void grapheme_substr_ascii(char *str, int32_t str_len, int32_t f, int32_t l, int argc, char **sub_str, int *sub_str_len);
-int
-grapheme_strrpos_utf16(unsigned char *haystack, int32_t haystack_len, unsigned char*needle, int32_t needle_len, int32_t offset, int f_ignore_case TSRMLS_DC);
+int grapheme_strrpos_utf16(unsigned char *haystack, int32_t haystack_len, unsigned char*needle, int32_t needle_len, int32_t offset, int f_ignore_case TSRMLS_DC);
-int
-grapheme_strpos_utf16(unsigned char *haystack, int32_t haystack_len, unsigned char*needle, int32_t needle_len, int32_t offset, int *puchar_pos, int f_ignore_case TSRMLS_DC);
+int grapheme_strpos_utf16(unsigned char *haystack, int32_t haystack_len, unsigned char*needle, int32_t needle_len, int32_t offset, int *puchar_pos, int f_ignore_case, int last TSRMLS_DC);
int grapheme_ascii_check(const unsigned char *day, int32_t len);
int grapheme_split_string(const UChar *text, int32_t text_length, int boundary_array[], int boundary_array_len TSRMLS_DC );
-int32_t
-grapheme_count_graphemes(UBreakIterator *bi, UChar *string, int32_t string_len);
-
-int32_t
-grapheme_memnstr_grapheme(UBreakIterator *bi, UChar *haystack, UChar *needle, int32_t needle_len, UChar *end);
+int32_t grapheme_count_graphemes(UBreakIterator *bi, UChar *string, int32_t string_len);
inline void *grapheme_memrchr_grapheme(const void *s, int c, int32_t n);
-UChar *
-grapheme_get_haystack_offset(UBreakIterator* bi, UChar *uhaystack, int32_t uhaystack_len, int32_t offset);
+int grapheme_get_haystack_offset(UBreakIterator* bi, int32_t offset);
-int32_t
-grapheme_strrpos_ascii(unsigned char *haystack, int32_t haystack_len, unsigned char *needle, int32_t needle_len, int32_t offset);
+int32_t grapheme_strrpos_ascii(unsigned char *haystack, int32_t haystack_len, unsigned char *needle, int32_t needle_len, int32_t offset);
-UBreakIterator*
-grapheme_get_break_iterator(void *stack_buffer, UErrorCode *status TSRMLS_DC );
+UBreakIterator* grapheme_get_break_iterator(void *stack_buffer, UErrorCode *status TSRMLS_DC );
/* OUTSIDE_STRING: check if (possibly negative) long offset is outside the string with int32_t length */
#define OUTSIDE_STRING(offset, max_len) ( offset <= INT32_MIN || offset > INT32_MAX || (offset < 0 ? -offset > (long) max_len : offset >= (long) max_len) )
diff --git a/ext/intl/resourcebundle/resourcebundle.c b/ext/intl/resourcebundle/resourcebundle.c
index 6d39dfb7e..f5475faf1 100644
--- a/ext/intl/resourcebundle/resourcebundle.c
+++ b/ext/intl/resourcebundle/resourcebundle.c
@@ -41,7 +41,7 @@ void resourcebundle_extract_value( zval *return_value, ResourceBundle_object *so
case URES_STRING:
ufield = ures_getString( source->child, &ilen, &INTL_DATA_ERROR_CODE(source) );
INTL_METHOD_CHECK_STATUS(source, "Failed to retrieve string value");
- INTL_METHOD_RETVAL_UTF8(source, ufield, ilen, 0);
+ INTL_METHOD_RETVAL_UTF8(source, (UChar *)ufield, ilen, 0);
break;
case URES_BINARY:
diff --git a/ext/intl/resourcebundle/resourcebundle_class.c b/ext/intl/resourcebundle/resourcebundle_class.c
index a6a73f5f0..7c1a5c28b 100644
--- a/ext/intl/resourcebundle/resourcebundle_class.c
+++ b/ext/intl/resourcebundle/resourcebundle_class.c
@@ -162,8 +162,8 @@ PHP_FUNCTION( resourcebundle_create )
/* {{{ resourcebundle_array_fetch */
static void resourcebundle_array_fetch(zval *object, zval *offset, zval *return_value, int fallback TSRMLS_DC)
{
- int32_t meindex;
- char * mekey;
+ int32_t meindex = 0;
+ char * mekey = NULL;
long mekeylen;
zend_bool is_numeric = 0;
char *pbuf;
diff --git a/ext/intl/tests/badargs.phpt b/ext/intl/tests/badargs.phpt
index 9232bbf0c..264af73ac 100644
--- a/ext/intl/tests/badargs.phpt
+++ b/ext/intl/tests/badargs.phpt
@@ -1,5 +1,5 @@
--TEST--
-Check that bad argumens return the same
+Check that bad arguments return the same
--SKIPIF--
<?php if( !extension_loaded( 'intl' ) ) print 'skip'; ?>
--FILE--
diff --git a/ext/intl/tests/bug61860.phpt b/ext/intl/tests/bug61860.phpt
new file mode 100644
index 000000000..123d9ff23
--- /dev/null
+++ b/ext/intl/tests/bug61860.phpt
@@ -0,0 +1,18 @@
+--TEST--
+Bug #61860: Offsets may be wrong for grapheme_stri* functions
+--SKIPIF--
+<?php if( !extension_loaded( 'intl' ) ) print 'skip'; ?>
+--FILE--
+<?php
+$haystack = 'Auf der Straße nach Paris habe ich mit dem Fahrer gesprochen';
+var_dump(
+ grapheme_stripos($haystack, 'pariS '),
+ grapheme_stristr($haystack, 'paRis '),
+ grapheme_substr($haystack, grapheme_stripos($haystack, 'Paris'))
+);
+
+?>
+--EXPECT--
+int(20)
+string(40) "Paris habe ich mit dem Fahrer gesprochen"
+string(40) "Paris habe ich mit dem Fahrer gesprochen"
diff --git a/ext/intl/tests/bug62759.phpt b/ext/intl/tests/bug62759.phpt
new file mode 100644
index 000000000..d4126b752
--- /dev/null
+++ b/ext/intl/tests/bug62759.phpt
@@ -0,0 +1,24 @@
+--TEST--
+Bug #62759: Buggy grapheme_substr() on edge case
+--SKIPIF--
+<?php if( !extension_loaded( 'intl' ) ) print 'skip'; ?>
+--FILE--
+<?php
+var_dump(substr('deja', 1, -4));
+var_dump(substr('deja', -1, 0));
+var_dump(grapheme_substr('deja', 1, -4));
+var_dump(intl_get_error_message());
+var_dump(grapheme_substr('deja', -1, 0));
+var_dump(grapheme_substr('déjà', 1, -4));
+var_dump(intl_get_error_message());
+var_dump(grapheme_substr('déjà', -1, 0));
+?>
+--EXPECT--
+bool(false)
+string(0) ""
+bool(false)
+string(61) "grapheme_substr: invalid parameters: U_ILLEGAL_ARGUMENT_ERROR"
+string(0) ""
+bool(false)
+string(65) "grapheme_substr: length is beyond start: U_ILLEGAL_ARGUMENT_ERROR"
+string(0) ""
diff --git a/ext/intl/tests/transliterator_create_error.phpt b/ext/intl/tests/transliterator_create_error.phpt
index 31aef68fe..4f2d04ae7 100644
--- a/ext/intl/tests/transliterator_create_error.phpt
+++ b/ext/intl/tests/transliterator_create_error.phpt
@@ -6,15 +6,15 @@ Transliterator::create (error)
<?php
ini_set("intl.error_level", E_WARNING);
-Transliterator::create("inexistant id");
+Transliterator::create("inexistent id");
echo intl_get_error_message(), "\n";
Transliterator::create("bad UTF-8 \x8F");
echo intl_get_error_message(), "\n";
echo "Done.\n";
--EXPECTF--
-Warning: Transliterator::create(): transliterator_create: unable to open ICU transliterator with id "inexistant id" in %s on line %d
-transliterator_create: unable to open ICU transliterator with id "inexistant id": U_INVALID_ID
+Warning: Transliterator::create(): transliterator_create: unable to open ICU transliterator with id "inexistent id" in %s on line %d
+transliterator_create: unable to open ICU transliterator with id "inexistent id": U_INVALID_ID
Warning: Transliterator::create(): String conversion of id to UTF-16 failed in %s on line %d
String conversion of id to UTF-16 failed: U_INVALID_CHAR_FOUND
diff --git a/ext/intl/tests/transliterator_transliterate_variant1.phpt b/ext/intl/tests/transliterator_transliterate_variant1.phpt
index fc77a4e3a..dcd01efd4 100644
--- a/ext/intl/tests/transliterator_transliterate_variant1.phpt
+++ b/ext/intl/tests/transliterator_transliterate_variant1.phpt
@@ -13,7 +13,7 @@ echo transliterator_transliterate("\x8F", $str), "\n";
echo intl_get_error_message(), "\n";
class A {
-function __toString() { return "inexistant id"; }
+function __toString() { return "inexistent id"; }
}
echo transliterator_transliterate(new A(), $str), "\n";
@@ -29,9 +29,9 @@ Warning: transliterator_transliterate(): Could not create transliterator with ID
String conversion of id to UTF-16 failed: U_INVALID_CHAR_FOUND
-Warning: transliterator_transliterate(): transliterator_create: unable to open ICU transliterator with id "inexistant id" in %s on line %d
+Warning: transliterator_transliterate(): transliterator_create: unable to open ICU transliterator with id "inexistent id" in %s on line %d
-Warning: transliterator_transliterate(): Could not create transliterator with ID "inexistant id" (transliterator_create: unable to open ICU transliterator with id "inexistant id": U_INVALID_ID) in %s on line %d
+Warning: transliterator_transliterate(): Could not create transliterator with ID "inexistent id" (transliterator_create: unable to open ICU transliterator with id "inexistent id": U_INVALID_ID) in %s on line %d
-transliterator_create: unable to open ICU transliterator with id "inexistant id": U_INVALID_ID
+transliterator_create: unable to open ICU transliterator with id "inexistent id": U_INVALID_ID
Done.