1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
|
$NetBSD: patch-bd,v 1.1 2008/03/20 17:07:54 drochner Exp $
--- i18n/rematch.cpp.orig 2005-08-25 20:02:20.000000000 +0200
+++ i18n/rematch.cpp
@@ -30,6 +30,15 @@
U_NAMESPACE_BEGIN
+// Limit the size of the back track stack, to avoid system failures caused
+// by heap exhaustion. Units are in 32 bit words, not bytes.
+// This value puts ICU's limits higher than most other regexp implementations,
+// which use recursion rather than the heap, and take more storage per
+// backtrack point.
+// This constant is _temporary_. Proper API to control the value will added.
+//
+static const int32_t BACKTRACK_STACK_CAPACITY = 8000000;
+
//-----------------------------------------------------------------------------
//
// Constructor and Destructor
@@ -53,6 +62,8 @@ RegexMatcher::RegexMatcher(const RegexPa
}
if (fStack == NULL || fData == NULL) {
fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
+ } else {
+ fStack->setMaxCapacity(BACKTRACK_STACK_CAPACITY);
}
reset(*RegexStaticSets::gStaticSets->fEmptyString);
@@ -78,6 +89,8 @@ RegexMatcher::RegexMatcher(const Unicode
}
if (fStack == NULL || fData == NULL) {
status = U_MEMORY_ALLOCATION_ERROR;
+ } else {
+ fStack->setMaxCapacity(BACKTRACK_STACK_CAPACITY);
}
reset(input);
}
@@ -102,6 +115,8 @@ RegexMatcher::RegexMatcher(const Unicode
}
if (fStack == NULL || fData == NULL) {
status = U_MEMORY_ALLOCATION_ERROR;
+ } else {
+ fStack->setMaxCapacity(BACKTRACK_STACK_CAPACITY);
}
reset(*RegexStaticSets::gStaticSets->fEmptyString);
}
@@ -1015,6 +1030,14 @@ UBool RegexMatcher::isUWordBoundary(int3
inline REStackFrame *RegexMatcher::StateSave(REStackFrame *fp, int32_t savePatIdx, int32_t frameSize, UErrorCode &status) {
// push storage for a new frame.
int32_t *newFP = fStack->reserveBlock(frameSize, status);
+ if (newFP == NULL) {
+ // Heap allocation error on attempted stack expansion.
+ // We need to return a writable stack frame, so just return the
+ // previous frame. The match operation will stop quickly
+ // becuase of the error status, after which the frame will never
+ // be looked at again.
+ return fp;
+ }
fp = (REStackFrame *)(newFP - frameSize); // in case of realloc of stack.
// New stack frame = copy of old top frame.
@@ -1030,8 +1053,8 @@ inline REStackFrame *RegexMatcher::State
fp->fPatIdx = savePatIdx;
return (REStackFrame *)newFP;
}
-
-
+
+
//--------------------------------------------------------------------------------
//
// MatchAt This is the actual matching engine.
@@ -2262,6 +2285,7 @@ GC_Done:
}
if (U_FAILURE(status)) {
+ isMatch = FALSE;
break;
}
}
|