diff options
Diffstat (limited to 'usr/src/lib/libast/common/comp/regcmp.c')
-rw-r--r-- | usr/src/lib/libast/common/comp/regcmp.c | 211 |
1 files changed, 211 insertions, 0 deletions
diff --git a/usr/src/lib/libast/common/comp/regcmp.c b/usr/src/lib/libast/common/comp/regcmp.c new file mode 100644 index 0000000000..bba7a45697 --- /dev/null +++ b/usr/src/lib/libast/common/comp/regcmp.c @@ -0,0 +1,211 @@ +/*********************************************************************** +* * +* This software is part of the ast package * +* Copyright (c) 1985-2007 AT&T Knowledge Ventures * +* and is licensed under the * +* Common Public License, Version 1.0 * +* by AT&T Knowledge Ventures * +* * +* A copy of the License is available at * +* http://www.opensource.org/licenses/cpl1.0.txt * +* (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) * +* * +* Information and Software Systems Research * +* AT&T Research * +* Florham Park NJ * +* * +* Glenn Fowler <gsf@research.att.com> * +* David Korn <dgk@research.att.com> * +* Phong Vo <kpv@research.att.com> * +* * +***********************************************************************/ +#pragma prototyped +/* + * regcmp implementation + */ + +#include <ast.h> +#include <libgen.h> +#include <regex.h> +#include <align.h> + +#define INC (2*1024) +#define TOT (16*1024) +#define SUB 10 + +typedef struct +{ + char* cur; + regex_t re; + unsigned char sub[SUB]; + int nsub; + size_t size; + char buf[ALIGN_BOUND2]; +} Regex_t; + +__DEFINE__(char*, __loc1, 0); + +static void* +block(void* handle, void* data, size_t size) +{ + register Regex_t* re = (Regex_t*)handle; + + if (data || (size = roundof(size, ALIGN_BOUND2)) > (re->buf + re->size - re->cur)) + return 0; + data = (void*)re->cur; + re->cur += size; + return data; +} + +char* +regcmp(const char* pattern, ...) +{ + register char* s; + register Regex_t* re; + register size_t n; + register int c; + register int p; + int b; + int i; + int j; + int nsub; + register Sfio_t* sp; + unsigned char paren[128]; + unsigned char sub[SUB]; + va_list ap; + + va_start(ap, pattern); + if (!pattern || !*pattern || !(sp = sfstropen())) + return 0; + memset(paren, 0, sizeof(paren)); + n = 0; + p = -1; + b = 0; + nsub = 0; + s = (char*)pattern; + do + { + while (c = *s++) + { + if (c == '\\') + { + sfputc(sp, c); + if (!(c = *s++)) + break; + } + else if (b) + { + if (c == ']') + b = 0; + } + else if (c == '[') + { + b = 1; + if (*s == '^') + { + sfputc(sp, c); + c = *s++; + } + if (*s == ']') + { + sfputc(sp, c); + c = *s++; + } + } + else if (c == '(') + { + /* + * someone explain in one sentence why + * a cast is needed to make this work + */ + + if (p < (int)(elementsof(paren) - 1)) + p++; + paren[p] = ++n; + } + else if (c == ')' && p >= 0) + { + for (i = p; i > 0; i--) + if (paren[i]) + break; + if (*s == '$' && (j = *(s + 1)) >= '0' && j <= '9') + { + s += 2; + j -= '0'; + if (nsub <= j) + { + if (!nsub) + memset(sub, 0, sizeof(sub)); + nsub = j + 1; + } + sub[j] = paren[i] + 1; + } + paren[i] = 0; + } + sfputc(sp, c); + } + } while (s = va_arg(ap, char*)); + va_end(ap); + if (!(s = sfstruse(sp))) + { + sfstrclose(sp); + return 0; + } + re = 0; + n = 0; + do + { + if ((n += INC) > TOT || !(re = newof(re, Regex_t, 0, n))) + { + if (re) + free(re); + sfstrclose(sp); + return 0; + } + re->cur = re->buf; + re->size = n + ALIGN_BOUND2 - sizeof(Regex_t); + regalloc(re, block, REG_NOFREE); + c = regcomp(&re->re, s, REG_EXTENDED|REG_LENIENT|REG_NULL); + regalloc(NiL, NiL, 0); + } while (c == REG_ESPACE); + sfstrclose(sp); + if (c) + { + free(re); + return 0; + } + if (re->nsub = nsub) + memcpy(re->sub, sub, (nsub + 1) * sizeof(sub[0])); + return (char*)re; +} + +char* +regex(const char* handle, const char* subject, ...) +{ + register Regex_t* re; + register int n; + register int i; + register int k; + char* sub[SUB + 1]; + regmatch_t match[SUB + 1]; + va_list ap; + + va_start(ap, subject); + if (!(re = (Regex_t*)handle) || !subject) + return 0; + for (n = 0; n < re->nsub; n++) + sub[n] = va_arg(ap, char*); + va_end(ap); + if (regexec(&re->re, subject, SUB + 1, match, 0)) + return 0; + for (n = 0; n < re->nsub; n++) + if (i = re->sub[n]) + { + i--; + k = match[i].rm_eo - match[i].rm_so; + strncpy(sub[n], subject + match[i].rm_so, k); + *(sub[n] + k) = 0; + } + __loc1 = (char*)subject + match[0].rm_so; + return (char*)subject + match[0].rm_eo; +} |