1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
|
/***********************************************************************
* *
* This software is part of the ast package *
* Copyright (c) 1985-2009 AT&T Intellectual Property *
* and is licensed under the *
* Common Public License, Version 1.0 *
* by AT&T Intellectual Property *
* *
* A copy of the License is available at *
* http://www.opensource.org/licenses/cpl1.0.txt *
* (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) *
* *
* Information and Software Systems Research *
* AT&T Research *
* Florham Park NJ *
* *
* Glenn Fowler <gsf@research.att.com> *
* David Korn <dgk@research.att.com> *
* Phong Vo <kpv@research.att.com> *
* *
***********************************************************************/
#pragma prototyped
/*
* posix regex record executor
* multiple record sized-buffer interface
*/
#include "reglib.h"
/*
* call regnexec() on records selected by Boyer-Moore
*/
int
regrexec(const regex_t* p, const char* s, size_t len, size_t nmatch, regmatch_t* match, regflags_t flags, int sep, void* handle, regrecord_t record)
{
register unsigned char* buf = (unsigned char*)s;
register unsigned char* beg;
register unsigned char* l;
register unsigned char* r;
register unsigned char* x;
register size_t* skip;
register size_t* fail;
register Bm_mask_t** mask;
register size_t index;
register int n;
unsigned char* end;
size_t mid;
int complete;
int exactlen;
int leftlen;
int rightlen;
int inv;
Bm_mask_t m;
Env_t* env;
Rex_t* e;
if (!s || !p || !(env = p->env) || (e = env->rex)->type != REX_BM)
return REG_BADPAT;
inv = (flags & REG_INVERT) != 0;
buf = beg = (unsigned char*)s;
end = buf + len;
mid = (len < e->re.bm.right) ? 0 : (len - e->re.bm.right);
skip = e->re.bm.skip;
fail = e->re.bm.fail;
mask = e->re.bm.mask;
complete = e->re.bm.complete && !nmatch;
exactlen = e->re.bm.size;
leftlen = e->re.bm.left + exactlen;
rightlen = exactlen + e->re.bm.right;
index = leftlen++;
for (;;)
{
while ((index += skip[buf[index]]) < mid);
if (index < HIT)
goto impossible;
index -= HIT;
m = mask[n = exactlen - 1][buf[index]];
do
{
if (!n--)
goto possible;
} while (m &= mask[n][buf[--index]]);
if ((index += fail[n + 1]) < len)
continue;
impossible:
if (inv)
{
l = r = buf + len;
goto invert;
}
n = 0;
goto done;
possible:
r = (l = buf + index) + exactlen;
while (l > beg)
if (*--l == sep)
{
l++;
break;
}
if ((r - l) < leftlen)
goto spanned;
while (r < end && *r != sep)
r++;
if ((r - (buf + index)) < rightlen)
goto spanned;
if (complete || (env->rex = ((r - l) > 128) ? e : e->next) && !(n = regnexec(p, (char*)l, r - l, nmatch, match, flags)))
{
if (inv)
{
invert:
x = beg;
while (beg < l)
{
while (x < l && *x != sep)
x++;
if (n = (*record)(handle, (char*)beg, x - beg))
goto done;
beg = ++x;
}
}
else if (n = (*record)(handle, (char*)l, r - l))
goto done;
if ((index = (r - buf) + leftlen) >= len)
{
n = (inv && (++r - buf) < len) ? (*record)(handle, (char*)r, (buf + len) - r): 0;
goto done;
}
beg = r + 1;
}
else if (n != REG_NOMATCH)
goto done;
else
{
spanned:
if ((index += exactlen) >= mid)
goto impossible;
}
}
done:
env->rex = e;
return n;
}
|