1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
|
/***********************************************************************
* *
* This software is part of the ast package *
* Copyright (c) 1985-2009 AT&T Intellectual Property *
* and is licensed under the *
* Common Public License, Version 1.0 *
* by AT&T Intellectual Property *
* *
* A copy of the License is available at *
* http://www.opensource.org/licenses/cpl1.0.txt *
* (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) *
* *
* Information and Software Systems Research *
* AT&T Research *
* Florham Park NJ *
* *
* Glenn Fowler <gsf@research.att.com> *
* David Korn <dgk@research.att.com> *
* Phong Vo <kpv@research.att.com> *
* *
***********************************************************************/
#pragma prototyped
/*
* D. G. Korn
* G. S. Fowler
* AT&T Research
*
* match shell file patterns
* this interface is a wrapper on regex
*
* sh pattern egrep RE description
* ---------- -------- -----------
* * .* 0 or more chars
* ? . any single char
* [.] [.] char class
* [!.] [^.] negated char class
* [[:.:]] [[:.:]] ctype class
* [[=.=]] [[=.=]] equivalence class
* [[...]] [[...]] collation element
* *(.) (.)* 0 or more of
* +(.) (.)+ 1 or more of
* ?(.) (.)? 0 or 1 of
* (.) (.) 1 of
* @(.) (.) 1 of
* a|b a|b a or b
* \# () subgroup back reference [1-9]
* a&b a and b
* !(.) none of
*
* \ used to escape metacharacters
*
* *, ?, (, |, &, ), [, \ must be \'d outside of [...]
* only ] must be \'d inside [...]
*
*/
#include <ast.h>
#include <regex.h>
static struct State_s
{
regmatch_t* match;
int nmatch;
} matchstate;
/*
* subgroup match
* 0 returned if no match
* otherwise number of subgroups matched returned
* match group begin offsets are even elements of sub
* match group end offsets are odd elements of sub
* the matched string is from s+sub[0] up to but not
* including s+sub[1]
*/
int
strgrpmatch(const char* b, const char* p, int* sub, int n, register int flags)
{
register regex_t* re;
register int* end;
register int i;
register regflags_t reflags;
/*
* 0 and empty patterns are special
*/
if (!p || !b)
{
if (!p && !b)
regcache(NiL, 0, NiL);
return 0;
}
if (!*p)
{
if (sub && n > 0)
sub[0] = sub[1] = 0;
return *b == 0;
}
/*
* convert flags
*/
if (flags & REG_ADVANCE)
reflags = flags & ~REG_ADVANCE;
else
{
reflags = REG_SHELL|REG_AUGMENTED;
if (!(flags & STR_MAXIMAL))
reflags |= REG_MINIMAL;
if (flags & STR_GROUP)
reflags |= REG_SHELL_GROUP;
if (flags & STR_LEFT)
reflags |= REG_LEFT;
if (flags & STR_RIGHT)
reflags |= REG_RIGHT;
if (flags & STR_ICASE)
reflags |= REG_ICASE;
}
if (!sub || n <= 0)
reflags |= REG_NOSUB;
if (!(re = regcache(p, reflags, NiL)))
return 0;
if (n > matchstate.nmatch)
{
if (!(matchstate.match = newof(matchstate.match, regmatch_t, n, 0)))
return 0;
matchstate.nmatch = n;
}
if (regexec(re, b, n, matchstate.match, reflags & ~(REG_MINIMAL|REG_SHELL_GROUP|REG_LEFT|REG_RIGHT|REG_ICASE)))
return 0;
if (!sub || n <= 0)
return 1;
i = re->re_nsub;
end = sub + n * 2;
for (n = 0; sub < end && n <= i; n++)
{
*sub++ = matchstate.match[n].rm_so;
*sub++ = matchstate.match[n].rm_eo;
}
return i + 1;
}
/*
* compare the string s with the shell pattern p
* returns 1 for match 0 otherwise
*/
int
strmatch(const char* s, const char* p)
{
return strgrpmatch(s, p, NiL, 0, STR_MAXIMAL|STR_LEFT|STR_RIGHT);
}
/*
* leading substring match
* first char after end of substring returned
* 0 returned if no match
*
* OBSOLETE: use strgrpmatch()
*/
char*
strsubmatch(const char* s, const char* p, int flags)
{
int match[2];
return strgrpmatch(s, p, match, 1, (flags ? STR_MAXIMAL : 0)|STR_LEFT) ? (char*)s + match[1] : (char*)0;
}
|