diff options
Diffstat (limited to 'usr/src/lib/libast/common/regex/regclass.c')
-rw-r--r-- | usr/src/lib/libast/common/regex/regclass.c | 243 |
1 files changed, 243 insertions, 0 deletions
diff --git a/usr/src/lib/libast/common/regex/regclass.c b/usr/src/lib/libast/common/regex/regclass.c new file mode 100644 index 0000000000..85eac41f55 --- /dev/null +++ b/usr/src/lib/libast/common/regex/regclass.c @@ -0,0 +1,243 @@ +/*********************************************************************** +* * +* This software is part of the ast package * +* Copyright (c) 1985-2007 AT&T Knowledge Ventures * +* and is licensed under the * +* Common Public License, Version 1.0 * +* by AT&T Knowledge Ventures * +* * +* A copy of the License is available at * +* http://www.opensource.org/licenses/cpl1.0.txt * +* (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) * +* * +* Information and Software Systems Research * +* AT&T Research * +* Florham Park NJ * +* * +* Glenn Fowler <gsf@research.att.com> * +* David Korn <dgk@research.att.com> * +* Phong Vo <kpv@research.att.com> * +* * +***********************************************************************/ +#pragma prototyped +/* + * RE character class support + */ + +#include "reglib.h" + +struct Ctype_s; typedef struct Ctype_s Ctype_t; + +struct Ctype_s +{ + const char* name; + size_t size; + regclass_t ctype; + Ctype_t* next; +#if _lib_wctype + wctype_t wtype; +#endif +}; + +static Ctype_t* ctypes; + +#define CTYPES 12 +#if _lib_wctype +#define WTYPES 8 +#else +#define WTYPES 0 +#endif + +/* + * this stuff gets around posix failure to define isblank, + * and the fact that ctype functions are macros + * and any local extensions that may not even have functions or macros + */ + +#if _need_iswblank + +int +_reg_iswblank(wint_t wc) +{ + static int initialized; + static wctype_t wt; + + if (!initialized) + { + initialized = 1; + wt = wctype("blank"); + } + return iswctype(wc, wt); +} + +#endif + +static int Isalnum(int c) { return iswalnum(c); } +static int Isalpha(int c) { return iswalpha(c); } +static int Isblank(int c) { return iswblank(c); } +static int Iscntrl(int c) { return iswcntrl(c); } +static int Isdigit(int c) { return iswdigit(c); } +static int Notdigit(int c) { return !iswdigit(c); } +static int Isgraph(int c) { return iswgraph(c); } +static int Islower(int c) { return iswlower(c); } +static int Isprint(int c) { return iswprint(c); } +static int Ispunct(int c) { return iswpunct(c); } +static int Isspace(int c) { return iswspace(c); } +static int Notspace(int c) { return !iswspace(c); } +static int Isupper(int c) { return iswupper(c); } +static int Isword(int c) { return iswalnum(c) || c == '_'; } +static int Notword(int c) { return !iswalnum(c) && c != '_'; } +static int Isxdigit(int c) { return iswxdigit(c);} + +#if _lib_wctype + +static int Is_wc_1(int); +static int Is_wc_2(int); +static int Is_wc_3(int); +static int Is_wc_4(int); +static int Is_wc_5(int); +static int Is_wc_6(int); +static int Is_wc_7(int); +static int Is_wc_8(int); + +#endif + +#define SZ(s) s,(sizeof(s)-1) + +static Ctype_t ctype[] = +{ + { SZ("alnum"), Isalnum }, + { SZ("alpha"), Isalpha }, + { SZ("blank"), Isblank }, + { SZ("cntrl"), Iscntrl }, + { SZ("digit"), Isdigit }, + { SZ("graph"), Isgraph }, + { SZ("lower"), Islower }, + { SZ("print"), Isprint }, + { SZ("punct"), Ispunct }, + { SZ("space"), Isspace }, + { SZ("upper"), Isupper }, + { SZ("word"), Isword }, + { SZ("xdigit"),Isxdigit}, +#if _lib_wctype + { 0, 0, Is_wc_1 }, + { 0, 0, Is_wc_2 }, + { 0, 0, Is_wc_3 }, + { 0, 0, Is_wc_4 }, + { 0, 0, Is_wc_5 }, + { 0, 0, Is_wc_6 }, + { 0, 0, Is_wc_7 }, + { 0, 0, Is_wc_8 }, +#endif +}; + +#if _lib_wctype + +static int Is_wc_1(int c) { return iswctype(c, ctype[CTYPES+0].wtype); } +static int Is_wc_2(int c) { return iswctype(c, ctype[CTYPES+1].wtype); } +static int Is_wc_3(int c) { return iswctype(c, ctype[CTYPES+2].wtype); } +static int Is_wc_4(int c) { return iswctype(c, ctype[CTYPES+3].wtype); } +static int Is_wc_5(int c) { return iswctype(c, ctype[CTYPES+4].wtype); } +static int Is_wc_6(int c) { return iswctype(c, ctype[CTYPES+5].wtype); } +static int Is_wc_7(int c) { return iswctype(c, ctype[CTYPES+6].wtype); } +static int Is_wc_8(int c) { return iswctype(c, ctype[CTYPES+7].wtype); } + +#endif + +/* + * return pointer to ctype function for :class:] in s + * s points to the first char after the initial [ + * if e!=0 it points to next char in s + * 0 returned on error + */ + +regclass_t +regclass(const char* s, char** e) +{ + register Ctype_t* cp; + register int c; + register size_t n; + register const char* t; + + if (c = *s++) + { + for (t = s; *t && (*t != c || *(t + 1) != ']'); t++); + if (*t != c) + return 0; + n = t - s; + for (cp = ctypes; cp; cp = cp->next) + if (n == cp->size && strneq(s, cp->name, n)) + goto found; + for (cp = ctype; cp < &ctype[elementsof(ctype)]; cp++) + { +#if _lib_wctype + if (!cp->size && (cp->name = (const char*)memdup(s, n + 1))) + { + *((char*)cp->name + n) = 0; + /* mvs.390 needs the (char*) cast -- barf */ + if (!(cp->wtype = wctype((char*)cp->name))) + { + free((char*)cp->name); + return 0; + } + cp->size = n; + goto found; + } +#endif + if (n == cp->size && strneq(s, cp->name, n)) + goto found; + } + } + return 0; + found: + if (e) + *e = (char*)t + 2; + return cp->ctype; +} + +/* + * associate the ctype function fun with name + */ + +int +regaddclass(const char* name, regclass_t fun) +{ + register Ctype_t* cp; + register Ctype_t* np; + register size_t n; + + n = strlen(name); + for (cp = ctypes; cp; cp = cp->next) + if (cp->size == n && strneq(name, cp->name, n)) + { + cp->ctype = fun; + return 0; + } + if (!(np = newof(0, Ctype_t, 1, n + 1))) + return REG_ESPACE; + np->size = n; + np->name = strcpy((char*)(np + 1), name); + np->ctype = fun; + np->next = ctypes; + ctypes = np; + return 0; +} + +/* + * return pointer to ctype function for token + */ + +regclass_t +classfun(int type) +{ + switch (type) + { + case T_ALNUM: return Isword; + case T_ALNUM_NOT: return Notword; + case T_DIGIT: return Isdigit; + case T_DIGIT_NOT: return Notdigit; + case T_SPACE: return Isspace; + case T_SPACE_NOT: return Notspace; + } + return 0; +} |