diff options
Diffstat (limited to 'usr/src/lib/libpp/common/ppfsm.c')
-rw-r--r-- | usr/src/lib/libpp/common/ppfsm.c | 946 |
1 files changed, 946 insertions, 0 deletions
diff --git a/usr/src/lib/libpp/common/ppfsm.c b/usr/src/lib/libpp/common/ppfsm.c new file mode 100644 index 0000000000..5cef65db2a --- /dev/null +++ b/usr/src/lib/libpp/common/ppfsm.c @@ -0,0 +1,946 @@ +/*********************************************************************** +* * +* This software is part of the ast package * +* Copyright (c) 1986-2007 AT&T Knowledge Ventures * +* and is licensed under the * +* Common Public License, Version 1.0 * +* by AT&T Knowledge Ventures * +* * +* A copy of the License is available at * +* http://www.opensource.org/licenses/cpl1.0.txt * +* (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) * +* * +* Information and Software Systems Research * +* AT&T Research * +* Florham Park NJ * +* * +* Glenn Fowler <gsf@research.att.com> * +* * +***********************************************************************/ +#pragma prototyped +/* + * Glenn Fowler + * AT&T Research + * + * preprocessor and proto lexical analyzer fsm + * define PROTOMAIN for standalone proto + */ + +#include "pplib.h" +#include "ppfsm.h" + +/* + * lexical FSM encoding + * derived from a standalone ansi cpp by Dennis Ritchie + * modified for libpp by Glenn Fowler + * + * fsm[] is initialized from fsminit[]. The encoding is blown out into + * fsm[] for time efficiency. When in state state, and one of the + * characters in ch arrives, enter nextstate. States >= TERMINAL are + * either final, or at least require special action. In fsminit[] there + * is a line for each <state,charset,nextstate>. Early entries are + * overwritten by later ones. C_XXX is the universal set and should + * always be first. Some of the fsminit[] entries are templates for + * groups of states. The OP entries trigger the state copies. States + * above TERMINAL are represented in fsm[] as negative values. S_TOK and + * S_TOKB encode the resulting token type in the upper bits. These actions + * differ in that S_TOKB has a lookahead char. + * + * fsm[] has three start states: + * + * PROTO proto (ANSI -> K&R,C++,ANSI) + * QUICK standalone ppcpp() + * TOKEN tokenizing pplex() + * + * If the next state remains the same then the fsm[] transition value is 0. + * MAX+1 is a power of 2 so that fsm[state][EOF==MAX+1] actually accesses + * fsm[state+1][0] which is ~S_EOB for all states. This preserves the + * power of 2 fsm[] row size for efficient array indexing. Thanks to + * D. G. Korn for the last two observations. The pseudo non-terminal state + * fsm[TERMINAL][state+1] is used to differentiate EOB from EOF. + * + * The bit layout is: + * + * TERM arg SPLICE next + * 15 14-8 7 6-0 + */ + +/* + * NOTE: these must be `control' characters for all native codesets + * currently ok for {ascii,ebcdic1,ebcdic2,ebcdic3} + */ + +#define C_DEC 001 +#define C_EOF 002 +#define C_HEX 003 +#define C_LET 021 +#define C_OCT 022 +#define C_XXX 023 + +#define OP (-1) +#define END 0 +#define COPY 1 + +#define copy(t,f) (memcpy(&fsm[t][1],&fsm[f][1],(MAX+1)*sizeof(short)),fsm[TERMINAL][(t)+1]=fsm[TERMINAL][(f)+1]) + +struct fsminit /* fsm initialization row */ +{ + int state; /* if in this state */ + unsigned char ch[4]; /* and see one of these */ + int nextstate; /* enter this state if <TERMINAL*/ +}; + +static struct fsminit fsminit[] = +{ + /* proto start state */ + { PROTO, { C_XXX }, S_CHR, }, + { PROTO, { C_EOF }, S_EOF, }, + { PROTO, { C_DEC }, BAD1, }, + { PROTO, { '.' }, DOT, }, + { PROTO, { C_LET }, NID, }, + { PROTO, { 'L' }, LIT, }, + { PROTO, { 'd', 'e', 'f', 'i' }, RES1, }, + { PROTO, { 'r', 's', 't', 'v' }, RES1, }, + { PROTO, { 'w', 'N' }, RES1, }, + { PROTO, { '"', '\'' }, S_LITBEG, }, + { PROTO, { '/' }, COM1, }, + { PROTO, { '\n' }, S_NL, }, + { PROTO, { ' ','\t','\f','\v' }, WS1, }, + +/* proto {do,else,extern,for,if,inline,return,static,typedef,va_start,void,while,NoN} */ + { RES1, { C_XXX }, S_MACRO, }, + { RES1, { C_LET, C_DEC }, NID, }, + { RES1, { 'a' }, RES1a, }, + { RES1, { 'e' }, RES1e, }, + { RES1, { 'f' }, RES1f, }, + { RES1, { 'h' }, RES1h, }, + { RES1, { 'l' }, RES1l, }, + { RES1, { 'n' }, RES1n, }, + { RES1, { 'o' }, RES1o, }, + { RES1, { 't' }, RES1t, }, + { RES1, { 'x' }, RES1x, }, + { RES1, { 'y' }, RES1y, }, + + /* proto reserved {va_start} */ + { RES1a, { C_XXX }, S_RESERVED, }, + { RES1a, { C_LET, C_DEC }, NID, }, + { RES1a, { '_','s','t','a' }, RES1a, }, + { RES1a, { 'r' }, RES1a, }, + + /* proto reserved {return} */ + { RES1e, { C_XXX }, S_RESERVED, }, + { RES1e, { C_LET, C_DEC }, NID, }, + { RES1e, { 't','u','r','n' }, RES1e, }, + + /* proto reserved {if} */ + { RES1f, { C_XXX }, S_RESERVED, }, + { RES1f, { C_LET, C_DEC }, NID, }, + + /* proto reserved {while} */ + { RES1h, { C_XXX }, S_RESERVED, }, + { RES1h, { C_LET, C_DEC }, NID, }, + { RES1h, { 'i','l','e' }, RES1h, }, + + /* proto reserved {else} */ + { RES1l, { C_XXX }, S_RESERVED, }, + { RES1l, { C_LET, C_DEC }, NID, }, + { RES1l, { 's','e' }, RES1l, }, + + /* proto reserved {inline} */ + { RES1n, { C_XXX }, S_RESERVED, }, + { RES1n, { C_LET, C_DEC }, NID, }, + { RES1n, { 'l','i','n','e' }, RES1n, }, + + /* proto reserved {do,for,void} */ + { RES1o, { C_XXX }, S_RESERVED, }, + { RES1o, { C_LET, C_DEC }, NID, }, + { RES1o, { 'r','i','d','N' }, RES1o, }, + + /* proto reserved {static} */ + { RES1t, { C_XXX }, S_RESERVED, }, + { RES1t, { C_LET, C_DEC }, NID, }, + { RES1t, { 'a','t','i','c' }, RES1t, }, + + /* proto reserved {extern} */ + { RES1x, { C_XXX }, S_RESERVED, }, + { RES1x, { C_LET, C_DEC }, NID, }, + { RES1x, { 't','e','r','n' }, RES1x, }, + + /* proto reserved {typedef} */ + { RES1y, { C_XXX }, S_RESERVED, }, + { RES1y, { C_LET, C_DEC }, NID, }, + { RES1y, { 'p','e','d','f' }, RES1y, }, + + /* saw /, perhaps start of comment */ + { COM1, { C_XXX }, S_CHRB, }, + { COM1, { '*' }, COM2, }, +#if PROTOMAIN + { COM1, { '/' }, COM5, }, +#endif + + /* saw / *, start of comment */ + { COM2, { C_XXX }, COM2, }, + { COM2, { '\n', C_EOF }, S_COMMENT, }, + { COM2, { '/' }, COM4, }, + { COM2, { '*' }, COM3, }, + { COM2, { '#', ';', ')' }, QUAL(COM2), }, + + /* saw the * possibly ending a comment */ + { COM3, { C_XXX }, COM2, }, + { COM3, { '\n', C_EOF }, S_COMMENT, }, + { COM3, { '#', ';', ')' }, QUAL(COM2), }, + { COM3, { '*' }, COM3, }, + { COM3, { '/' }, S_COMMENT, }, + + /* saw / in / * comment, possible malformed nest */ + { COM4, { C_XXX }, COM2, }, + { COM4, { '*', '\n', C_EOF }, S_COMMENT, }, + { COM4, { '/' }, COM4, }, + + /* saw / /, start of comment */ + { COM5, { C_XXX }, COM5, }, + { COM5, { '\n', C_EOF }, S_COMMENT, }, + { COM5, { '/' }, COM6, }, + { COM5, { '*' }, COM7, }, + + /* saw / in / / comment, possible malformed nest */ + { COM6, { C_XXX }, COM5, }, + { COM6, { '*', '\n', C_EOF }, S_COMMENT, }, + { COM6, { '/' }, COM6, }, + + /* saw * in / /, possible malformed nest */ + { COM7, { C_XXX }, COM5, }, + { COM7, { '\n', C_EOF }, S_COMMENT, }, + { COM7, { '*' }, COM7, }, + { COM7, { '/' }, S_COMMENT, }, + + /* normal identifier -- always a macro candidate */ + { NID, { C_XXX }, S_MACRO, }, + { NID, { C_LET, C_DEC }, NID, }, + + /* saw ., operator or dbl constant */ + { DOT, { C_XXX }, S_CHRB, }, + { DOT, { '.' }, DOT2, }, + { DOT, { C_DEC }, BAD1, }, + + /* saw .., possible ... */ + { DOT2, { C_XXX }, BACK(T_INVALID), }, + { DOT2, { '.' }, KEEP(T_VARIADIC), }, + + /* saw L (possible start of normal wide literal) */ + { LIT, { C_XXX }, S_MACRO, }, + { LIT, { C_LET, C_DEC }, NID, }, + { LIT, { '"', '\'' }, QUAL(LIT1), }, + + /* saw " or ' beginning literal */ + { LIT1, { C_XXX }, LIT1, }, + { LIT1, { '"', '\'' }, S_LITEND, }, + { LIT1, { '\n', C_EOF }, S_LITEND, }, + { LIT1, { '\\' }, LIT2, }, + + /* saw \ in literal */ + { LIT2, { C_XXX }, S_LITESC, }, + { LIT2, { '\n', C_EOF }, S_LITEND, }, + + /* eat malformed numeric constant */ + { BAD1, { C_XXX }, BACK(T_INVALID), }, + { BAD1, { C_LET, C_DEC, '.' }, BAD1, }, + { BAD1, { 'e', 'E' }, BAD2, }, + + /* eat malformed numeric fraction|exponent */ + { BAD2, { C_XXX }, BACK(T_INVALID), }, + { BAD2, { C_LET, C_DEC, '.' }, BAD1, }, + { BAD2, { '+', '-' }, BAD1, }, + + /* saw white space, eat it up */ + { WS1, { C_XXX }, S_WS, }, + { WS1, { ' ', '\t' }, WS1, }, + { WS1, { '\f', '\v' }, S_VS, }, + +#if !PROTOMAIN + + /* quick template */ + { QUICK, { C_XXX }, QTOK, }, + { QUICK, { C_EOF, MARK }, S_CHRB, }, + { QUICK, { C_LET, C_DEC }, QID, }, + { QUICK, { 'L' }, LIT0, }, + { QUICK, { '"', '\'' }, S_LITBEG, }, + { QUICK, { '/' }, S_CHRB, }, + { QUICK, { '*' }, QCOM, }, + { QUICK, { '#' }, SHARP1, }, + { QUICK, { '\n' }, S_NL, }, + { QUICK, { '\f', '\v' }, S_VS, }, + + /* copy QUICK to QUICK+1 through MAC0+1 */ + { OP, {QUICK,QUICK+1,MAC0+1}, COPY, }, + + /* quick start state */ + { QUICK, { C_EOF }, S_EOF, }, + { QUICK, { C_DEC }, QNUM, }, + { QUICK, { MARK }, QTOK, }, + { QUICK, { '/' }, COM1, }, + { QUICK, { ' ', '\t' }, QUICK, }, + + /* grab non-macro tokens */ + { QTOK, { C_DEC }, QNUM, }, + + /* grab numeric and invalid tokens */ + { QNUM, { C_LET, C_DEC, '.' }, QNUM, }, + { QNUM, { 'e', 'E' }, QEXP, }, + + /* grab exponent token */ + { QEXP, { C_LET, C_DEC, '.' }, QNUM, }, + { QEXP, { '+', '-' }, QNUM, }, + + /* saw *, grab possible bad comment terminator */ + { QCOM, { C_DEC }, QNUM, }, + { QCOM, { '/' }, S_COMMENT, }, + + /* saw L (possible start of wide string or first macro char) */ + { MAC0, { 'L' }, QID, }, + { MAC0, { '"', '\'' }, QUAL(LIT1), }, + + /* macro candidate template */ + { MAC0+1, { 'L' }, QID, }, + + /* copy MAC0+1 to MAC0+2 through MACN */ + { OP, {MAC0+1,MAC0+2,MACN}, COPY }, + + /* saw L (possible start of wide string or macro L) */ + { HIT0, { C_XXX }, S_MACRO, }, + { HIT0, { C_LET, C_DEC }, QID, }, + { HIT0, { '"', '\'' }, QUAL(LIT1), }, + + /* macro hit template */ + { HIT0+1, { C_XXX }, S_MACRO, }, + { HIT0+1, { C_LET, C_DEC }, QID, }, + + /* copy HIT0+1 to HIT0+2 through HITN */ + { OP, {HIT0+1,HIT0+2,HITN}, COPY }, + + /* saw L (possible start of wide literal) */ + { LIT0, { C_XXX }, S_MACRO, }, + { LIT0, { C_LET, C_DEC }, QID, }, + { LIT0, { '"', '\'' }, QUAL(LIT1), }, + + /* (!PROTOMAIN COM1) saw /, perhaps start of comment or /= */ + { COM1, { '=' }, KEEP(T_DIVEQ), }, + + /* normal start state */ + { TOKEN, { C_XXX }, S_HUH, }, + { TOKEN, { C_EOF }, S_EOF, }, + { TOKEN, { C_DEC }, DEC1, }, + { TOKEN, { '0' }, OCT1, }, + { TOKEN, { '.' }, DOT1, }, + { TOKEN, { C_LET }, NID, }, + { TOKEN, { 'L' }, LIT, }, + { TOKEN, { '"', '\'', '<' }, S_LITBEG, }, + { TOKEN, { '/' }, COM1, }, + { TOKEN, { '\n' }, S_NL, }, + { TOKEN, { ' ', '\t' }, WS1, }, + { TOKEN, { '\f', '\v' }, S_VS, }, + { TOKEN, { '#' }, SHARP1, }, + { TOKEN, { ':' }, COLON1, }, + { TOKEN, { '%' }, PCT1, }, + { TOKEN, { '&' }, AND1, }, + { TOKEN, { '*' }, STAR1, }, + { TOKEN, { '+' }, PLUS1, }, + { TOKEN, { '-' }, MINUS1, }, + { TOKEN, { '=' }, EQ1, }, + { TOKEN, { '!' }, NOT1, }, + { TOKEN, { '>' }, GT1, }, + { TOKEN, { '^' }, CIRC1, }, + { TOKEN, { '|' }, OR1, }, + { TOKEN, { '(', ')', '[', ']' }, S_CHR, }, + { TOKEN, { '{', '}', ',', ';' }, S_CHR, }, + { TOKEN, { '~', '?' }, S_CHR, }, + + /* saw 0, possible oct|hex|dec|dbl constant */ + { OCT1, { C_XXX }, BACK(T_DECIMAL), }, + { OCT1, { C_LET, C_DEC }, BAD1, }, + { OCT1, { C_OCT }, OCT2, }, + { OCT1, { 'e', 'E' }, DBL2, }, + { OCT1, { 'l', 'L', 'u', 'U' }, QUAL(DEC2), }, + { OCT1, { 'x', 'X' }, HEX1, }, + { OCT1, { '.' }, DBL1, }, + + /* saw 0<oct>, oct constant */ + { OCT2, { C_XXX }, BACK(T_OCTAL), }, + { OCT2, { C_LET, C_DEC }, BAD1, }, + { OCT2, { C_OCT }, OCT2, }, + { OCT2, { 'e', 'E' }, DBL2, }, + { OCT2, { 'l', 'L', 'u', 'U' }, QUAL(OCT3), }, + { OCT2, { '.' }, DBL1, }, + + /* oct constant qualifier */ + { OCT3, { C_XXX }, BACK(T_OCTAL), }, + { OCT3, { C_LET, C_DEC, '.' }, BAD1, }, + { OCT3, { 'l', 'L', 'u', 'U' }, QUAL(OCT3), }, + + /* saw 0 [xX], hex constant */ + { HEX1, { C_XXX }, BACK(T_HEXADECIMAL), }, + { HEX1, { C_LET }, BAD1, }, + { HEX1, { C_HEX }, HEX1, }, + { HEX1, { 'e', 'E' }, HEX3, }, + { HEX1, { 'l', 'L', 'u', 'U' }, QUAL(HEX2), }, + { HEX1, { '.' }, HEX4, }, + { HEX1, { 'p', 'P' }, HEX5, }, + + /* hex constant qualifier */ + { HEX2, { C_XXX }, BACK(T_HEXADECIMAL), }, + { HEX2, { C_LET, C_DEC, '.' }, BAD1, }, + { HEX2, { 'l', 'L', 'u', 'U' }, QUAL(HEX2), }, + + /* hex [eE][-+] botch */ + { HEX3, { C_XXX }, BACK(T_HEXADECIMAL), }, + { HEX3, { C_LET, '.', '-', '+'},BAD1, }, + { HEX3, { C_HEX }, HEX1, }, + { HEX3, { 'e', 'E' }, HEX3, }, + { HEX3, { 'l', 'L', 'u', 'U' }, QUAL(HEX2), }, + + /* hex dbl fraction */ + { HEX4, { C_XXX }, BACK(T_HEXDOUBLE), }, + { HEX4, { C_LET, '.' }, BAD1, }, + { HEX4, { C_HEX }, HEX4, }, + { HEX4, { 'p', 'P' }, HEX5, }, + { HEX4, { 'f', 'F', 'l', 'L' }, QUAL(HEX8), }, + + /* optional hex dbl exponent sign */ + { HEX5, { C_XXX }, BACK(T_INVALID), }, + { HEX5, { C_LET, '.' }, BAD1, }, + { HEX5, { '+', '-' }, HEX6, }, + { HEX5, { C_DEC }, HEX7, }, + + /* mandatory hex dbl exponent first digit */ + { HEX6, { C_XXX }, BACK(T_INVALID), }, + { HEX6, { C_LET, '.' }, BAD1, }, + { HEX6, { C_DEC }, HEX7, }, + + /* hex dbl exponent digits */ + { HEX7, { C_XXX }, BACK(T_HEXDOUBLE), }, + { HEX7, { C_LET, '.' }, BAD1, }, + { HEX7, { C_DEC }, HEX7, }, + { HEX7, { 'f', 'F', 'l', 'L' }, QUAL(HEX8), }, + + /* hex dbl constant qualifier */ + { HEX8, { C_XXX }, BACK(T_HEXDOUBLE), }, + { HEX8, { C_LET, '.' }, BAD1, }, + { HEX8, { 'f', 'F', 'l', 'L' }, QUAL(HEX8), }, + + /* saw <dec>, dec constant */ + { DEC1, { C_XXX }, BACK(T_DECIMAL), }, + { DEC1, { C_LET }, BAD1, }, + { DEC1, { C_DEC }, DEC1, }, + { DEC1, { 'e', 'E' }, DBL2, }, + { DEC1, { 'l', 'L', 'u', 'U' }, QUAL(DEC2), }, + { DEC1, { '.' }, DBL1, }, + + /* dec constant qualifier */ + { DEC2, { C_XXX }, BACK(T_DECIMAL), }, + { DEC2, { C_LET, C_DEC }, BAD1, }, + { DEC2, { 'l', 'L', 'u', 'U' }, QUAL(DEC2), }, + + /* saw ., operator or dbl constant */ + { DOT1, { C_XXX }, S_CHRB, }, + { DOT1, { '.' }, DOT2, }, + { DOT1, { C_DEC }, DBL1, }, + + /* dbl fraction */ + { DBL1, { C_XXX }, BACK(T_DOUBLE), }, + { DBL1, { C_LET, '.' }, BAD1, }, + { DBL1, { C_DEC }, DBL1, }, + { DBL1, { 'e', 'E' }, DBL2, }, + { DBL1, { 'f', 'F', 'l', 'L' }, QUAL(DBL5), }, + + /* optional dbl exponent sign */ + { DBL2, { C_XXX }, BACK(T_INVALID), }, + { DBL2, { C_LET, '.' }, BAD1, }, + { DBL2, { '+', '-' }, DBL3, }, + { DBL2, { C_DEC }, DBL4, }, + + /* mandatory dbl exponent first digit */ + { DBL3, { C_XXX }, BACK(T_INVALID), }, + { DBL3, { C_LET, '.' }, BAD1, }, + { DBL3, { C_DEC }, DBL4, }, + + /* dbl exponent digits */ + { DBL4, { C_XXX }, BACK(T_DOUBLE), }, + { DBL4, { C_LET, '.' }, BAD1, }, + { DBL4, { C_DEC }, DBL4, }, + { DBL4, { 'f', 'F', 'l', 'L' }, QUAL(DBL5), }, + + /* dbl constant qualifier */ + { DBL5, { C_XXX }, BACK(T_DOUBLE), }, + { DBL5, { C_LET, '.' }, BAD1, }, + { DBL5, { 'f', 'F', 'l', 'L' }, QUAL(DBL5), }, + + /* saw < starting include header */ + { HDR1, { C_XXX }, HDR1, }, + { HDR1, { '>', '\n', C_EOF }, S_LITEND, }, + + /* saw <binop><space> expecting = */ + { BIN1, { C_XXX }, S_HUH, }, + { BIN1, { ' ', '\t' }, BIN1, }, + + /* 2-char ops */ + + { SHARP1, { C_XXX }, S_SHARP, }, + + { PCT1, { C_XXX }, S_CHRB, }, + { PCT1, { '=' }, KEEP(T_MODEQ), }, + + { AND1, { C_XXX }, S_CHRB, }, + { AND1, { '=' }, KEEP(T_ANDEQ), }, + { AND1, { '&' }, KEEP(T_ANDAND), }, + + { STAR1, { C_XXX }, S_CHRB, }, + { STAR1, { '=' }, KEEP(T_MPYEQ), }, + { STAR1, { '/' }, S_COMMENT, }, + + { PLUS1, { C_XXX }, S_CHRB, }, + { PLUS1, { '=' }, KEEP(T_ADDEQ), }, + { PLUS1, { '+' }, KEEP(T_ADDADD), }, + + { MINUS1, { C_XXX }, S_CHRB, }, + { MINUS1, { '=' }, KEEP(T_SUBEQ), }, + { MINUS1, { '-' }, KEEP(T_SUBSUB), }, + { MINUS1, { '>' }, KEEP(T_PTRMEM), }, + + { COLON1, { C_XXX }, S_CHRB, }, + { COLON1, { '=', '>' }, S_HUH, }, + + { LT1, { C_XXX }, S_CHRB, }, + { LT1, { '=' }, KEEP(T_LE), }, + { LT1, { '<' }, LSH1, }, + + { EQ1, { C_XXX }, S_CHRB, }, + { EQ1, { '=' }, KEEP(T_EQ), }, + + { NOT1, { C_XXX }, S_CHRB, }, + { NOT1, { '=' }, KEEP(T_NE), }, + + { GT1, { C_XXX }, S_CHRB, }, + { GT1, { '=' }, KEEP(T_GE), }, + { GT1, { '>' }, RSH1, }, + + { CIRC1, { C_XXX }, S_CHRB, }, + { CIRC1, { '=' }, KEEP(T_XOREQ), }, + + { OR1, { C_XXX }, S_CHRB, }, + { OR1, { '=' }, KEEP(T_OREQ), }, + { OR1, { '|' }, KEEP(T_OROR), }, + + /* 3-char ops */ + + { ARROW1, { C_XXX }, BACK(T_PTRMEM), }, + { ARROW1, { '*' }, KEEP(T_PTRMEMREF), }, + + { LSH1, { C_XXX }, BACK(T_LSHIFT), }, + { LSH1, { '=' }, KEEP(T_LSHIFTEQ), }, + + { RSH1, { C_XXX }, BACK(T_RSHIFT), }, + { RSH1, { '=' }, KEEP(T_RSHIFTEQ), }, + +#endif + + /* end */ + { OP, { 0 }, END, } +}; + +short fsm[TERMINAL+1][MAX+1]; + +char trigraph[MAX+1]; + +#if PROTOMAIN +static char spl[] = { '\\', '\r', 0 }; +static char aln[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_$@"; +#else +static char spl[] = { MARK, '?', '\\', '\r', CC_sub, 0 }; +static char aln[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_"; +#endif +static char* let = &aln[10]; +static char hex[] = "fedcbaFEDCBA9876543210"; +static char* dec = &hex[12]; +static char* oct = &hex[14]; + +/* + * runtime FSM modifications + * ppfsm(FSM_INIT,0) must be called first + */ + +void +ppfsm(int op, register char* s) +{ + register int c; + register int n; + register int i; + register short* rp; + register struct fsminit* fp; +#if !PROTOMAIN + char* t; + int x; +#endif + + switch (op) + { + +#if !PROTOMAIN + + case FSM_IDADD: + while (c = *s++) + if (!ppisid(c)) + { + if (fsm[TOKEN][c] == ~S_HUH) + { + setid(c); + for (i = 0; i < TERMINAL; i++) + fsm[i][c] = IDSTATE(fsm[i]['_']); + } + else error(2, "%c: cannot add to identifier set", c); + } + break; + + case FSM_IDDEL: + while (c = *s++) + if (ppisid(c)) + { + clrid(c); + for (i = 0; i < TERMINAL; i++) + fsm[i][c] = ~S_HUH; + } + break; + +#endif + + case FSM_INIT: + for (fp = fsminit;; fp++) + { + if ((n = fp->nextstate) >= TERMINAL) n = ~n; + if (fp->state == OP) + { +#if !PROTOMAIN + switch (n) + { + case COPY: + c = fp->ch[0]; + n = fp->ch[2]; + for (i = fp->ch[1]; i <= n; i++) + copy(i, c); + continue; + default: + break; + } +#endif + break; + } + rp = fsm[fp->state]; + for (i = 0; i < sizeof(fp->ch) && (c = fp->ch[i]); i++) + { + switch (c) + { + case C_XXX: + for (c = 0; c <= MAX; c++) + rp[c] = n; + /*FALLTHROUGH*/ + + case C_EOF: + fsm[TERMINAL][fp->state+1] = n < 0 ? ~n : n; + continue; + + case C_LET: + s = let; + break; + + case C_HEX: + s = hex; + break; + + case C_DEC: + s = dec; + break; + + case C_OCT: + s = oct; + break; + + default: + rp[c] = n; + continue; + } + while (c = *s++) + rp[c] = n; + } + } + + /* + * install splice special cases + * and same non-terminal transitions + */ + + for (i = 0; i < TERMINAL; i++) + { + rp = fsm[i]; + s = spl; + while (c = *s++) + if (c != MARK || !INCOMMENT(rp)) + { + if (rp[c] >= 0) rp[c] = ~rp[c]; + rp[c] &= ~SPLICE; + } + rp[EOB] = ~S_EOB; + for (c = 0; c <= MAX; c++) + if (rp[c] == i) + rp[c] = 0; + } + fsm[TERMINAL][0] = ~S_EOB; + +#if !PROTOMAIN + + /* + * default character types + */ + + s = let; + while (c = *s++) + setid(c); + s = dec; + while (c = *s++) + setdig(c); + s = spl; + do setsplice(c = *s++); while (c); + + /* + * trigraph map + */ + + trigraph['='] = '#'; + trigraph['('] = '['; + trigraph['/'] = '\\'; + trigraph[')'] = ']'; + trigraph['\''] = '^'; + trigraph['<'] = '{'; + trigraph['!'] = '|'; + trigraph['>'] = '}'; + trigraph['-'] = '~'; +#endif + break; + +#if !PROTOMAIN + + case FSM_PLUSPLUS: + if (pp.option & PLUSPLUS) + { + fsm[COLON1][':'] = ~KEEP(T_SCOPE); + fsm[DOT1]['*'] = ~KEEP(T_DOTREF); + fsm[MINUS1]['>'] = ARROW1; + fsm[COM1]['/'] = COM5; + t = "%<:"; + for (i = 0; i < TERMINAL; i++) + { + rp = fsm[i]; + if (!INCOMMENT(rp) && !INQUOTE(rp)) + { + s = t; + while (c = *s++) + { + if (rp[c] > 0) rp[c] = ~rp[c]; + else if (!rp[c]) rp[c] = ~i; + rp[c] &= ~SPLICE; + } + } + } + s = t; + while (c = *s++) setsplice(c); + } + else + { + fsm[COLON1][':'] = ~S_CHRB; + fsm[DOT1]['*'] = ~S_CHRB; + fsm[MINUS1]['>'] = ~KEEP(T_PTRMEM); + fsm[COM1]['/'] = (pp.option & PLUSCOMMENT) ? COM5 : ~S_CHRB; + } + break; + +#if COMPATIBLE + + case FSM_COMPATIBILITY: + if (pp.state & COMPATIBILITY) + { + fsm[HEX1]['e'] = HEX1; + fsm[HEX1]['E'] = HEX1; + fsm[QNUM]['e'] = QNUM; + fsm[QNUM]['E'] = QNUM; + fsm[QNUM]['u'] = ~QUAL(QNUM); + fsm[QNUM]['U'] = ~QUAL(QNUM); + } + else + { + fsm[HEX1]['e'] = HEX3; + fsm[HEX1]['E'] = HEX3; + fsm[QNUM]['e'] = QEXP; + fsm[QNUM]['E'] = QEXP; + fsm[QNUM]['u'] = QNUM; + fsm[QNUM]['U'] = QNUM; + } + break; + +#endif + + case FSM_QUOTADD: + while (c = *s++) + if (fsm[TOKEN][c] == ~S_HUH) + for (i = 0; i < TERMINAL; i++) + fsm[i][c] = fsm[i]['"']; + else error(2, "%c: cannot add to quote set", c); + break; + + case FSM_QUOTDEL: + while (c = *s++) + if (c != '"' && fsm[TOKEN][c] == fsm[TOKEN]['"']) + for (i = 0; i < TERMINAL; i++) + fsm[i][c] = fsm[i]['_']; + break; + + case FSM_OPSPACE: + n = s ? BIN1 : ~S_CHRB; + fsm[COM1][' '] = fsm[COM1]['\t'] = n; + fsm[AND1][' '] = fsm[AND1]['\t'] = n; + fsm[STAR1][' '] = fsm[STAR1]['\t'] = n; + fsm[PCT1][' '] = fsm[PCT1]['\t'] = n; + fsm[PLUS1][' '] = fsm[PLUS1]['\t'] = n; + fsm[MINUS1][' '] = fsm[MINUS1]['\t'] = n; + fsm[CIRC1][' '] = fsm[CIRC1]['\t'] = n; + fsm[OR1][' '] = fsm[OR1]['\t'] = n; + fsm[LSH1][' '] = fsm[LSH1]['\t'] = s ? BIN1 : ~BACK(T_LSHIFT); + fsm[RSH1][' '] = fsm[RSH1]['\t'] = s ? BIN1 : ~BACK(T_RSHIFT); + break; + + case FSM_MACRO: + if (pp.truncate && strlen(s) >= pp.truncate) + { + x = s[pp.truncate]; + s[pp.truncate] = 0; + } + else x = -1; + i = MAC0 + ((c = *s++) != 'L'); + if ((n = fsm[QUICK][c]) != (i + NMAC)) + { + n = i; + if (!*s) n += NMAC; + } + if (fsm[QUICK][c] != n) + fsm[QUICK][c] = fsm[QCOM][c] = fsm[QTOK][c] = n; + if (c = *s++) + { + for (;;) + { + if ((i = n) < HIT0) + { + if (n < MACN) n++; + if (!*s) + { + n += NMAC; + break; + } + if (fsm[i][c] < HIT0) + fsm[i][c] = n; + if (fsm[i + NMAC][c] < HIT0) + fsm[i + NMAC][c] = n; + } + else + { + if (n < HITN) n++; + if (!*s) break; + if (fsm[i][c] < HIT0) + { + n -= NMAC; + fsm[i][c] = n; + } + } + c = *s++; + } + if (x >= 0) + { + *s = x; + for (n = CHAR_MIN; n <= CHAR_MAX; n++) + if (ppisidig(n)) + fsm[HITN][n] = HITN; + n = HITN; + } + if (fsm[i][c] < n) + fsm[i][c] = n; + if (i < HIT0 && fsm[i + NMAC][c] < n) + fsm[i + NMAC][c] = n; + } + break; + +#endif + + } +} + +#if !PROTOMAIN + +/* + * file buffer refill + * c is current input char + */ + +void +refill(register int c) +{ + if (pp.in->flags & IN_eof) + { + pp.in->nextchr--; + c = 0; + } + else + { + *((pp.in->nextchr = pp.in->buffer + PPBAKSIZ) - 1) = c; + c = +#if PROTOTYPE + (pp.in->flags & IN_prototype) ? pppread(pp.in->nextchr) : +#endif + read(pp.in->fd, pp.in->nextchr, PPBUFSIZ); + } + if (c > 0) + { + if (pp.in->nextchr[c - 1] == '\n') pp.in->flags |= IN_newline; + else pp.in->flags &= ~IN_newline; +#if PROTOTYPE + if (!(pp.in->flags & IN_prototype)) +#endif + if (c < PPBUFSIZ && (pp.in->flags & IN_regular)) + { + pp.in->flags |= IN_eof; + close(pp.in->fd); + pp.in->fd = -1; + } + } + else + { + if (c < 0) + { + error(ERROR_SYSTEM|3, "read error"); + c = 0; + } + else if ((pp.in->flags ^ pp.in->prev->flags) & IN_c) + { + static char ket[] = { 0, '}', '\n', 0 }; + + pp.in->flags ^= IN_c; + pp.in->nextchr = ket + 1; + c = 2; + } + pp.in->flags |= IN_eof; + } +#if CHECKPOINT + pp.in->buflen = c; +#endif + pp.in->nextchr[c] = 0; + debug((-7, "refill(\"%s\") = %d = \"%-.*s%s\"", error_info.file, c, (c > 32 ? 32 : c), pp.in->nextchr, c > 32 ? "..." : "")); + if (pp.test & 0x0080) + sfprintf(sfstderr, "===== refill(\"%s\") = %d =====\n%s\n===== eob(\"%s\") =====\n", error_info.file, c, pp.in->nextchr, error_info.file); +} + +#endif |