diff options
Diffstat (limited to 'usr/src/lib/libast/common/regex/reglib.h')
-rw-r--r-- | usr/src/lib/libast/common/regex/reglib.h | 582 |
1 files changed, 0 insertions, 582 deletions
diff --git a/usr/src/lib/libast/common/regex/reglib.h b/usr/src/lib/libast/common/regex/reglib.h deleted file mode 100644 index 8d2d3ea707..0000000000 --- a/usr/src/lib/libast/common/regex/reglib.h +++ /dev/null @@ -1,582 +0,0 @@ -/*********************************************************************** -* * -* This software is part of the ast package * -* Copyright (c) 1985-2010 AT&T Intellectual Property * -* and is licensed under the * -* Common Public License, Version 1.0 * -* by AT&T Intellectual Property * -* * -* A copy of the License is available at * -* http://www.opensource.org/licenses/cpl1.0.txt * -* (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) * -* * -* Information and Software Systems Research * -* AT&T Research * -* Florham Park NJ * -* * -* Glenn Fowler <gsf@research.att.com> * -* David Korn <dgk@research.att.com> * -* Phong Vo <kpv@research.att.com> * -* * -***********************************************************************/ -#pragma prototyped - -/* - * posix regex implementation - * - * based on Doug McIlroy's C++ implementation - * Knuth-Morris-Pratt adapted from Corman-Leiserson-Rivest - * Boyer-Moore from conversations with David Korn, Phong Vo, Andrew Hume - */ - -#ifndef _REGLIB_H -#define _REGLIB_H - -#define REG_VERSION_EXEC 20020509L -#define REG_VERSION_MAP 20030916L /* regdisc_t.re_map */ - -#define re_info env - -#define alloc _reg_alloc -#define classfun _reg_classfun -#define drop _reg_drop -#define fatal _reg_fatal -#define state _reg_state - -typedef struct regsubop_s -{ - int op; /* REG_SUB_LOWER,REG_SUB_UPPER */ - int off; /* re_rhs or match[] offset */ - int len; /* re_rhs len or len==0 match[] */ -} regsubop_t; - -#define _REG_SUB_PRIVATE_ \ - char* re_cur; /* re_buf cursor */ \ - char* re_end; /* re_buf end */ \ - regsubop_t* re_ops; /* rhs ops */ \ - char re_rhs[1]; /* substitution rhs */ - -#include <ast.h> -#include <cdt.h> -#include <stk.h> - -#include "regex.h" - -#include <ctype.h> -#include <errno.h> - -#if _BLD_DEBUG && !defined(_AST_REGEX_DEBUG) -#define _AST_REGEX_DEBUG 1 -#endif - -#define MBSIZE(p) ((ast.tmp_int=mbsize(p))>0?ast.tmp_int:1) - -#undef RE_DUP_MAX /* posix puts this in limits.h! */ -#define RE_DUP_MAX (INT_MAX/2-1) /* 2*RE_DUP_MAX won't overflow */ -#define RE_DUP_INF (RE_DUP_MAX+1) /* infinity, for * */ -#define BACK_REF_MAX 9 - -#define REG_COMP (REG_DELIMITED|REG_ESCAPE|REG_EXTENDED|REG_FIRST|REG_ICASE|REG_NOSUB|REG_NEWLINE|REG_SHELL|REG_AUGMENTED|REG_LEFT|REG_LITERAL|REG_MINIMAL|REG_MULTIREF|REG_NULL|REG_RIGHT|REG_LENIENT|REG_MUSTDELIM) -#define REG_EXEC (REG_ADVANCE|REG_INVERT|REG_NOTBOL|REG_NOTEOL|REG_STARTEND) - -#define REX_NULL 0 /* null string (internal) */ -#define REX_ALT 1 /* a|b */ -#define REX_ALT_CATCH 2 /* REX_ALT catcher */ -#define REX_BACK 3 /* \1, \2, etc */ -#define REX_BEG 4 /* initial ^ */ -#define REX_BEG_STR 5 /* initial ^ w/ no newline */ -#define REX_BM 6 /* Boyer-Moore */ -#define REX_CAT 7 /* catenation catcher */ -#define REX_CLASS 8 /* [...] */ -#define REX_COLL_CLASS 9 /* collation order [...] */ -#define REX_CONJ 10 /* a&b */ -#define REX_CONJ_LEFT 11 /* REX_CONJ left catcher */ -#define REX_CONJ_RIGHT 12 /* REX_CONJ right catcher */ -#define REX_DONE 13 /* completed match (internal) */ -#define REX_DOT 14 /* . */ -#define REX_END 15 /* final $ */ -#define REX_END_STR 16 /* final $ before tail newline */ -#define REX_EXEC 17 /* call re.re_exec() */ -#define REX_FIN_STR 18 /* final $ w/ no newline */ -#define REX_GROUP 19 /* \(...\) */ -#define REX_GROUP_CATCH 20 /* REX_GROUP catcher */ -#define REX_GROUP_AHEAD 21 /* 0-width lookahead */ -#define REX_GROUP_AHEAD_CATCH 22 /* REX_GROUP_AHEAD catcher */ -#define REX_GROUP_AHEAD_NOT 23 /* inverted 0-width lookahead */ -#define REX_GROUP_BEHIND 24 /* 0-width lookbehind */ -#define REX_GROUP_BEHIND_CATCH 25 /* REX_GROUP_BEHIND catcher */ -#define REX_GROUP_BEHIND_NOT 26 /* inverted 0-width lookbehind */ -#define REX_GROUP_BEHIND_NOT_CATCH 27 /* REX_GROUP_BEHIND_NOT catcher */ -#define REX_GROUP_COND 28 /* conditional group */ -#define REX_GROUP_COND_CATCH 29 /* conditional group catcher */ -#define REX_GROUP_CUT 30 /* don't backtrack over this */ -#define REX_GROUP_CUT_CATCH 31 /* REX_GROUP_CUT catcher */ -#define REX_KMP 32 /* Knuth-Morris-Pratt */ -#define REX_NEG 33 /* negation */ -#define REX_NEG_CATCH 34 /* REX_NEG catcher */ -#define REX_NEST 35 /* nested match */ -#define REX_ONECHAR 36 /* a single-character literal */ -#define REX_REP 37 /* Kleene closure */ -#define REX_REP_CATCH 38 /* REX_REP catcher */ -#define REX_STRING 39 /* some chars */ -#define REX_TRIE 40 /* alternation of strings */ -#define REX_WBEG 41 /* \< */ -#define REX_WEND 42 /* \> */ -#define REX_WORD 43 /* word boundary */ -#define REX_WORD_NOT 44 /* not word boundary */ - -#define T_META ((int)UCHAR_MAX+1) -#define T_STAR (T_META+0) -#define T_PLUS (T_META+1) -#define T_QUES (T_META+2) -#define T_BANG (T_META+3) -#define T_AT (T_META+4) -#define T_TILDE (T_META+5) -#define T_PERCENT (T_META+6) -#define T_LEFT (T_META+7) -#define T_OPEN (T_META+8) -#define T_CLOSE (T_OPEN+1) -#define T_RIGHT (T_OPEN+2) -#define T_CFLX (T_OPEN+3) -#define T_DOT (T_OPEN+4) -#define T_DOTSTAR (T_OPEN+5) -#define T_END (T_OPEN+6) -#define T_BAD (T_OPEN+7) -#define T_DOLL (T_OPEN+8) -#define T_BRA (T_OPEN+9) -#define T_BAR (T_OPEN+10) -#define T_AND (T_OPEN+11) -#define T_LT (T_OPEN+12) -#define T_GT (T_OPEN+13) -#define T_SLASHPLUS (T_OPEN+14) -#define T_GROUP (T_OPEN+15) -#define T_WORD (T_OPEN+16) -#define T_WORD_NOT (T_WORD+1) -#define T_BEG_STR (T_WORD+2) -#define T_END_STR (T_WORD+3) -#define T_FIN_STR (T_WORD+4) -#define T_ESCAPE (T_WORD+5) -#define T_ALNUM (T_WORD+6) -#define T_ALNUM_NOT (T_ALNUM+1) -#define T_DIGIT (T_ALNUM+2) -#define T_DIGIT_NOT (T_ALNUM+3) -#define T_SPACE (T_ALNUM+4) -#define T_SPACE_NOT (T_ALNUM+5) -#define T_BACK (T_ALNUM+6) - -#define BRE 0 -#define ERE 3 -#define ARE 6 -#define SRE 9 -#define KRE 12 - -#define HIT SSIZE_MAX - -#define bitclr(p,c) ((p)[((c)>>3)&037]&=(~(1<<((c)&07)))) -#define bitset(p,c) ((p)[((c)>>3)&037]|=(1<<((c)&07))) -#define bittst(p,c) ((p)[((c)>>3)&037]&(1<<((c)&07))) - -#define setadd(p,c) bitset((p)->bits,c) -#define setclr(p,c) bitclr((p)->bits,c) -#define settst(p,c) bittst((p)->bits,c) - -#if _hdr_wchar && _lib_wctype && _lib_iswctype - -#include <stdio.h> /* because <wchar.h> includes it and we generate it */ -#include <wchar.h> -#if _hdr_wctype -#include <wctype.h> -#endif - -#if !defined(iswblank) && !_lib_iswblank -#define _need_iswblank 1 -#define iswblank(x) _reg_iswblank(x) -extern int _reg_iswblank(wint_t); -#endif - -#if !defined(towupper) && !_lib_towupper -#define towupper(x) toupper(x) -#endif - -#if !defined(towlower) && !_lib_towlower -#define towlower(x) tolower(x) -#endif - -#else - -#undef _lib_wctype - -#ifndef iswalnum -#define iswalnum(x) isalnum(x) -#endif -#ifndef iswalpha -#define iswalpha(x) isalpha(x) -#endif -#ifndef iswcntrl -#define iswcntrl(x) iscntrl(x) -#endif -#ifndef iswdigit -#define iswdigit(x) isdigit(x) -#endif -#ifndef iswgraph -#define iswgraph(x) isgraph(x) -#endif -#ifndef iswlower -#define iswlower(x) islower(x) -#endif -#ifndef iswprint -#define iswprint(x) isprint(x) -#endif -#ifndef iswpunct -#define iswpunct(x) ispunct(x) -#endif -#ifndef iswspace -#define iswspace(x) isspace(x) -#endif -#ifndef iswupper -#define iswupper(x) isupper(x) -#endif -#ifndef iswxdigit -#define iswxdigit(x) isxdigit(x) -#endif - -#ifndef towlower -#define towlower(x) tolower(x) -#endif -#ifndef towupper -#define towupper(x) toupper(x) -#endif - -#endif - -#ifndef iswblank -#define iswblank(x) ((x)==' '||(x)=='\t') -#endif - -#ifndef iswgraph -#define iswgraph(x) (iswprint(x)&&!iswblank(x)) -#endif - -#define isword(x) (isalnum(x)||(x)=='_') - -/* - * collation element support - */ - -#define COLL_KEY_MAX 32 - -#if COLL_KEY_MAX < MB_LEN_MAX -#undef COLL_KEY_MAX -#define COLL_KEY_MAX MB_LEN_MAX -#endif - -typedef unsigned char Ckey_t[COLL_KEY_MAX+1]; - -#define COLL_end 0 -#define COLL_call 1 -#define COLL_char 2 -#define COLL_range 3 -#define COLL_range_lc 4 -#define COLL_range_uc 5 - -typedef struct Celt_s -{ - short typ; - short min; - short max; - regclass_t fun; - Ckey_t beg; - Ckey_t end; -} Celt_t; - -/* - * private stuff hanging off regex_t - */ - -typedef struct Stk_pos_s -{ - off_t offset; - char* base; -} Stk_pos_t; - -typedef struct Vector_s -{ - Stk_t* stk; /* stack pointer */ - char* vec; /* the data */ - int inc; /* growth increment */ - int siz; /* element size */ - int max; /* max index */ - int cur; /* current index -- user domain */ -} Vector_t; - -/* - * Rex_t subtypes - */ - -typedef struct Cond_s -{ - unsigned char* beg; /* beginning of next match */ - struct Rex_s* next[2]; /* 0:no 1:yes next pattern */ - struct Rex_s* cont; /* right catcher */ - int yes; /* yes condition hit */ -} Cond_t; - -typedef struct Conj_left_s -{ - unsigned char* beg; /* beginning of left match */ - struct Rex_s* right; /* right pattern */ - struct Rex_s* cont; /* right catcher */ -} Conj_left_t; - -typedef struct Conj_right_s -{ - unsigned char* end; /* end of left match */ - struct Rex_s* cont; /* ambient continuation */ -} Conj_right_t; - -typedef unsigned int Bm_mask_t; - -typedef struct Bm_s -{ - Bm_mask_t** mask; - size_t* skip; - size_t* fail; - size_t size; - ssize_t back; - ssize_t left; - ssize_t right; - size_t complete; -} Bm_t; - -typedef struct String_s -{ - int* fail; - unsigned char* base; - size_t size; -} String_t; - -typedef struct Set_s -{ - unsigned char bits[(UCHAR_MAX+1)/CHAR_BIT]; -} Set_t; - -typedef struct Collate_s -{ - int invert; - Celt_t* elements; -} Collate_t; - -typedef struct Binary_s -{ - struct Rex_s* left; - struct Rex_s* right; - int serial; -} Binary_t; - -typedef struct Group_s -{ - int number; /* group number */ - int last; /* last contained group number */ - int size; /* lookbehind size */ - int back; /* backreferenced */ - regflags_t flags; /* group flags */ - union - { - Binary_t binary; - struct Rex_s* rex; - } expr; -} Group_t; - -typedef struct Exec_s -{ - void* data; - const char* text; - size_t size; -} Exec_t; - -#define REX_NEST_open 0x01 -#define REX_NEST_close 0x02 -#define REX_NEST_escape 0x04 -#define REX_NEST_quote 0x08 -#define REX_NEST_literal 0x10 -#define REX_NEST_delimiter 0x20 -#define REX_NEST_terminator 0x40 -#define REX_NEST_separator 0x80 - -#define REX_NEST_SHIFT 8 - -typedef struct Nest_s -{ - int primary; - unsigned short none; /* for Nest_t.type[-1] */ - unsigned short type[1]; -} Nest_t; - -/* - * REX_ALT catcher, solely to get control at the end of an - * alternative to keep records for comparing matches. - */ - -typedef struct Alt_catch_s -{ - struct Rex_s* cont; -} Alt_catch_t; - -typedef struct Group_catch_s -{ - struct Rex_s* cont; - regoff_t* eo; -} Group_catch_t; - -typedef struct Behind_catch_s -{ - struct Rex_s* cont; - unsigned char* beg; - unsigned char* end; -} Behind_catch_t; - -/* - * REX_NEG catcher determines what string lengths can be matched, - * then Neg investigates continuations of other lengths. - * This is inefficient. For !POSITIONS expressions, we can do better: - * since matches to rex will be enumerated in decreasing order, - * we can investigate continuations whenever a length is skipped. - */ - -typedef struct Neg_catch_s -{ - unsigned char* beg; - unsigned char* index; -} Neg_catch_t; - -/* - * REX_REP catcher. One is created on the stack for - * each iteration of a complex repetition. - */ - -typedef struct Rep_catch_s -{ - struct Rex_s* cont; - struct Rex_s* ref; - unsigned char* beg; - int n; -} Rep_catch_t; - -/* - * data structure for an alternation of pure strings - * son points to a subtree of all strings with a common - * prefix ending in character c. sib links alternate - * letters in the same position of a word. end=1 if - * some word ends with c. the order of strings is - * irrelevant, except long words must be investigated - * before short ones. - */ - -typedef struct Trie_node_s -{ - unsigned char c; - unsigned char end; - struct Trie_node_s* son; - struct Trie_node_s* sib; -} Trie_node_t; - -typedef struct Trie_s -{ - Trie_node_t** root; - int min; - int max; -} Trie_t; - -/* - * Rex_t is a node in a regular expression - */ - -typedef struct Rex_s -{ - unsigned char type; /* node type */ - unsigned char marked; /* already marked */ - short serial; /* subpattern number */ - regflags_t flags; /* scoped flags */ - int explicit; /* scoped explicit match*/ - struct Rex_s* next; /* remaining parts */ - int lo; /* lo dup count */ - int hi; /* hi dup count */ - unsigned char* map; /* fold and/or ccode map*/ - union - { - Alt_catch_t alt_catch; /* alt catcher */ - Bm_t bm; /* bm */ - Behind_catch_t behind_catch; /* behind catcher */ - Set_t* charclass; /* char class */ - Collate_t collate; /* collation class */ - Cond_t cond_catch; /* cond catcher */ - Conj_left_t conj_left; /* conj left catcher */ - Conj_right_t conj_right; /* conj right catcher */ - void* data; /* data after Rex_t */ - Exec_t exec; /* re.re_exec() args */ - Group_t group; /* a|b or rep */ - Group_catch_t group_catch; /* group catcher */ - Neg_catch_t neg_catch; /* neg catcher */ - Nest_t nest; /* nested match */ - unsigned char onechar; /* single char */ - Rep_catch_t rep_catch; /* rep catcher */ - String_t string; /* string/kmp */ - Trie_t trie; /* trie */ - } re; -} Rex_t; - -typedef struct reglib_s /* library private regex_t info */ -{ - struct Rex_s* rex; /* compiled expression */ - regdisc_t* disc; /* REG_DISCIPLINE discipline */ - const regex_t* regex; /* from regexec */ - unsigned char* beg; /* beginning of string */ - unsigned char* end; /* end of string */ - Vector_t* pos; /* posns of certain subpatterns */ - Vector_t* bestpos; /* ditto for best match */ - regmatch_t* match; /* subexrs in current match */ - regmatch_t* best; /* ditto in best match yet */ - Stk_pos_t stk; /* exec stack pos */ - size_t min; /* minimum match length */ - size_t nsub; /* internal re_nsub */ - regflags_t flags; /* flags from regcomp() */ - int error; /* last error */ - int explicit; /* explicit match on this char */ - int leading; /* leading match on this char */ - int refs; /* regcomp()+regdup() references*/ - Rex_t done; /* the last continuation */ - regstat_t stats; /* for regstat() */ - unsigned char fold[UCHAR_MAX+1]; /* REG_ICASE map */ - unsigned char hard; /* hard comp */ - unsigned char once; /* if 1st parse fails, quit */ - unsigned char separate; /* cannot combine */ - unsigned char stack; /* hard comp or exec */ - unsigned char sub; /* re_sub is valid */ - unsigned char test; /* debug/test bitmask */ -} Env_t; - -typedef struct State_s /* shared state */ -{ - regmatch_t nomatch; - struct - { - unsigned char key; - short val[15]; - } escape[52]; - short* magic[UCHAR_MAX+1]; - regdisc_t disc; - int fatal; - int initialized; - Dt_t* attrs; - Dt_t* names; - Dtdisc_t dtdisc; -} State_t; - -extern State_t state; - -extern void* alloc(regdisc_t*, void*, size_t); -extern regclass_t classfun(int); -extern void drop(regdisc_t*, Rex_t*); -extern int fatal(regdisc_t*, int, const char*); - -#endif |