1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
|
$NetBSD: patch-CVE-2014-7923+7926,v 1.1 2015/03/06 14:43:15 spz Exp $
patches for CVE-2014-7923 and CVE-2014-7926 from
https://chromium.googlesource.com/chromium/deps/icu52/+/6242e2fbb36f486f2c0addd1c3cef67fc4ed33fb
--- i18n/regexcmp.cpp.orig 2014-10-03 16:10:36.000000000 +0000
+++ i18n/regexcmp.cpp
@@ -2132,6 +2132,10 @@ void RegexCompile::handleCloseParen() {
int32_t patEnd = fRXPat->fCompiledPat->size() - 1;
int32_t minML = minMatchLength(fMatchOpenParen, patEnd);
int32_t maxML = maxMatchLength(fMatchOpenParen, patEnd);
+ if (URX_TYPE(maxML) != 0) {
+ error(U_REGEX_LOOK_BEHIND_LIMIT);
+ break;
+ }
if (maxML == INT32_MAX) {
error(U_REGEX_LOOK_BEHIND_LIMIT);
break;
@@ -2165,6 +2169,10 @@ void RegexCompile::handleCloseParen() {
int32_t patEnd = fRXPat->fCompiledPat->size() - 1;
int32_t minML = minMatchLength(fMatchOpenParen, patEnd);
int32_t maxML = maxMatchLength(fMatchOpenParen, patEnd);
+ if (URX_TYPE(maxML) != 0) {
+ error(U_REGEX_LOOK_BEHIND_LIMIT);
+ break;
+ }
if (maxML == INT32_MAX) {
error(U_REGEX_LOOK_BEHIND_LIMIT);
break;
@@ -2328,7 +2336,15 @@ UBool RegexCompile::compileInlineInterva
int32_t topOfBlock = blockTopLoc(FALSE);
if (fIntervalUpper == 0) {
// Pathological case. Attempt no matches, as if the block doesn't exist.
+ // Discard the generated code for the block.
+ // If the block included parens, discard the info pertaining to them as well.
fRXPat->fCompiledPat->setSize(topOfBlock);
+ if (fMatchOpenParen >= topOfBlock) {
+ fMatchOpenParen = -1;
+ }
+ if (fMatchCloseParen >= topOfBlock) {
+ fMatchCloseParen = -1;
+ }
return TRUE;
}
--- i18n/regexcmp.h.orig 2014-10-03 16:10:36.000000000 +0000
+++ i18n/regexcmp.h
@@ -187,7 +187,9 @@ private:
int32_t fMatchOpenParen; // The position in the compiled pattern
// of the slot reserved for a state save
// at the start of the most recently processed
- // parenthesized block.
+ // parenthesized block. Updated when processing
+ // a close to the location for the corresponding open.
+
int32_t fMatchCloseParen; // The position in the pattern of the first
// location after the most recently processed
// parenthesized block.
--- test/testdata/regextst.txt.orig 2014-10-03 16:09:58.000000000 +0000
+++ test/testdata/regextst.txt
@@ -1178,6 +1178,24 @@
"(?<=a{1,})bc" E "aaaa<0>bc</0>def" # U_REGEX_LOOK_BEHIND_LIMIT error.
"(?<=(?:){11})bc" "<0>bc</0>" # Empty (?:) expression.
+# Bug 11369
+# Incorrect optimization of patterns with a zero length quantifier {0}
+
+"(.|b)(|b){0}\$(?#xxx){3}(?>\D*)" "AAAAABBBBBCCCCCDDDDEEEEE"
+"(|b)ab(c)" "<0><1></1>ab<2>c</2></0>"
+"(|b){0}a{3}(D*)" "<0>aaa<2></2></0>"
+"(|b){0,1}a{3}(D*)" "<0><1></1>aaa<2></2></0>"
+"((|b){0})a{3}(D*)" "<0><1></1>aaa<3></3></0>"
+
+# Bug 11370
+# Max match length computation of look-behind expression gives result that is too big to fit in the
+# in the 24 bit operand portion of the compiled code. Expressions should fail to compile
+# (Look-behind match length must be bounded. This case is treated as unbounded, an error.)
+
+"(?<!(0123456789a){10000000})x" E "no match"
+"(?<!\\ubeaf(\\ubeaf{11000}){11000})" E "no match"
+
+
# Bug 10835
# Match Start Set not being correctly computed for case insensitive patterns.
# (Test here is to dump the compiled pattern & manually check the start set.)
|