summaryrefslogtreecommitdiff
path: root/usr/src/lib/libpp/common/ppfsm.c
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src/lib/libpp/common/ppfsm.c')
-rw-r--r--usr/src/lib/libpp/common/ppfsm.c946
1 files changed, 946 insertions, 0 deletions
diff --git a/usr/src/lib/libpp/common/ppfsm.c b/usr/src/lib/libpp/common/ppfsm.c
new file mode 100644
index 0000000000..5cef65db2a
--- /dev/null
+++ b/usr/src/lib/libpp/common/ppfsm.c
@@ -0,0 +1,946 @@
+/***********************************************************************
+* *
+* This software is part of the ast package *
+* Copyright (c) 1986-2007 AT&T Knowledge Ventures *
+* and is licensed under the *
+* Common Public License, Version 1.0 *
+* by AT&T Knowledge Ventures *
+* *
+* A copy of the License is available at *
+* http://www.opensource.org/licenses/cpl1.0.txt *
+* (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) *
+* *
+* Information and Software Systems Research *
+* AT&T Research *
+* Florham Park NJ *
+* *
+* Glenn Fowler <gsf@research.att.com> *
+* *
+***********************************************************************/
+#pragma prototyped
+/*
+ * Glenn Fowler
+ * AT&T Research
+ *
+ * preprocessor and proto lexical analyzer fsm
+ * define PROTOMAIN for standalone proto
+ */
+
+#include "pplib.h"
+#include "ppfsm.h"
+
+/*
+ * lexical FSM encoding
+ * derived from a standalone ansi cpp by Dennis Ritchie
+ * modified for libpp by Glenn Fowler
+ *
+ * fsm[] is initialized from fsminit[]. The encoding is blown out into
+ * fsm[] for time efficiency. When in state state, and one of the
+ * characters in ch arrives, enter nextstate. States >= TERMINAL are
+ * either final, or at least require special action. In fsminit[] there
+ * is a line for each <state,charset,nextstate>. Early entries are
+ * overwritten by later ones. C_XXX is the universal set and should
+ * always be first. Some of the fsminit[] entries are templates for
+ * groups of states. The OP entries trigger the state copies. States
+ * above TERMINAL are represented in fsm[] as negative values. S_TOK and
+ * S_TOKB encode the resulting token type in the upper bits. These actions
+ * differ in that S_TOKB has a lookahead char.
+ *
+ * fsm[] has three start states:
+ *
+ * PROTO proto (ANSI -> K&R,C++,ANSI)
+ * QUICK standalone ppcpp()
+ * TOKEN tokenizing pplex()
+ *
+ * If the next state remains the same then the fsm[] transition value is 0.
+ * MAX+1 is a power of 2 so that fsm[state][EOF==MAX+1] actually accesses
+ * fsm[state+1][0] which is ~S_EOB for all states. This preserves the
+ * power of 2 fsm[] row size for efficient array indexing. Thanks to
+ * D. G. Korn for the last two observations. The pseudo non-terminal state
+ * fsm[TERMINAL][state+1] is used to differentiate EOB from EOF.
+ *
+ * The bit layout is:
+ *
+ * TERM arg SPLICE next
+ * 15 14-8 7 6-0
+ */
+
+/*
+ * NOTE: these must be `control' characters for all native codesets
+ * currently ok for {ascii,ebcdic1,ebcdic2,ebcdic3}
+ */
+
+#define C_DEC 001
+#define C_EOF 002
+#define C_HEX 003
+#define C_LET 021
+#define C_OCT 022
+#define C_XXX 023
+
+#define OP (-1)
+#define END 0
+#define COPY 1
+
+#define copy(t,f) (memcpy(&fsm[t][1],&fsm[f][1],(MAX+1)*sizeof(short)),fsm[TERMINAL][(t)+1]=fsm[TERMINAL][(f)+1])
+
+struct fsminit /* fsm initialization row */
+{
+ int state; /* if in this state */
+ unsigned char ch[4]; /* and see one of these */
+ int nextstate; /* enter this state if <TERMINAL*/
+};
+
+static struct fsminit fsminit[] =
+{
+ /* proto start state */
+ { PROTO, { C_XXX }, S_CHR, },
+ { PROTO, { C_EOF }, S_EOF, },
+ { PROTO, { C_DEC }, BAD1, },
+ { PROTO, { '.' }, DOT, },
+ { PROTO, { C_LET }, NID, },
+ { PROTO, { 'L' }, LIT, },
+ { PROTO, { 'd', 'e', 'f', 'i' }, RES1, },
+ { PROTO, { 'r', 's', 't', 'v' }, RES1, },
+ { PROTO, { 'w', 'N' }, RES1, },
+ { PROTO, { '"', '\'' }, S_LITBEG, },
+ { PROTO, { '/' }, COM1, },
+ { PROTO, { '\n' }, S_NL, },
+ { PROTO, { ' ','\t','\f','\v' }, WS1, },
+
+/* proto {do,else,extern,for,if,inline,return,static,typedef,va_start,void,while,NoN} */
+ { RES1, { C_XXX }, S_MACRO, },
+ { RES1, { C_LET, C_DEC }, NID, },
+ { RES1, { 'a' }, RES1a, },
+ { RES1, { 'e' }, RES1e, },
+ { RES1, { 'f' }, RES1f, },
+ { RES1, { 'h' }, RES1h, },
+ { RES1, { 'l' }, RES1l, },
+ { RES1, { 'n' }, RES1n, },
+ { RES1, { 'o' }, RES1o, },
+ { RES1, { 't' }, RES1t, },
+ { RES1, { 'x' }, RES1x, },
+ { RES1, { 'y' }, RES1y, },
+
+ /* proto reserved {va_start} */
+ { RES1a, { C_XXX }, S_RESERVED, },
+ { RES1a, { C_LET, C_DEC }, NID, },
+ { RES1a, { '_','s','t','a' }, RES1a, },
+ { RES1a, { 'r' }, RES1a, },
+
+ /* proto reserved {return} */
+ { RES1e, { C_XXX }, S_RESERVED, },
+ { RES1e, { C_LET, C_DEC }, NID, },
+ { RES1e, { 't','u','r','n' }, RES1e, },
+
+ /* proto reserved {if} */
+ { RES1f, { C_XXX }, S_RESERVED, },
+ { RES1f, { C_LET, C_DEC }, NID, },
+
+ /* proto reserved {while} */
+ { RES1h, { C_XXX }, S_RESERVED, },
+ { RES1h, { C_LET, C_DEC }, NID, },
+ { RES1h, { 'i','l','e' }, RES1h, },
+
+ /* proto reserved {else} */
+ { RES1l, { C_XXX }, S_RESERVED, },
+ { RES1l, { C_LET, C_DEC }, NID, },
+ { RES1l, { 's','e' }, RES1l, },
+
+ /* proto reserved {inline} */
+ { RES1n, { C_XXX }, S_RESERVED, },
+ { RES1n, { C_LET, C_DEC }, NID, },
+ { RES1n, { 'l','i','n','e' }, RES1n, },
+
+ /* proto reserved {do,for,void} */
+ { RES1o, { C_XXX }, S_RESERVED, },
+ { RES1o, { C_LET, C_DEC }, NID, },
+ { RES1o, { 'r','i','d','N' }, RES1o, },
+
+ /* proto reserved {static} */
+ { RES1t, { C_XXX }, S_RESERVED, },
+ { RES1t, { C_LET, C_DEC }, NID, },
+ { RES1t, { 'a','t','i','c' }, RES1t, },
+
+ /* proto reserved {extern} */
+ { RES1x, { C_XXX }, S_RESERVED, },
+ { RES1x, { C_LET, C_DEC }, NID, },
+ { RES1x, { 't','e','r','n' }, RES1x, },
+
+ /* proto reserved {typedef} */
+ { RES1y, { C_XXX }, S_RESERVED, },
+ { RES1y, { C_LET, C_DEC }, NID, },
+ { RES1y, { 'p','e','d','f' }, RES1y, },
+
+ /* saw /, perhaps start of comment */
+ { COM1, { C_XXX }, S_CHRB, },
+ { COM1, { '*' }, COM2, },
+#if PROTOMAIN
+ { COM1, { '/' }, COM5, },
+#endif
+
+ /* saw / *, start of comment */
+ { COM2, { C_XXX }, COM2, },
+ { COM2, { '\n', C_EOF }, S_COMMENT, },
+ { COM2, { '/' }, COM4, },
+ { COM2, { '*' }, COM3, },
+ { COM2, { '#', ';', ')' }, QUAL(COM2), },
+
+ /* saw the * possibly ending a comment */
+ { COM3, { C_XXX }, COM2, },
+ { COM3, { '\n', C_EOF }, S_COMMENT, },
+ { COM3, { '#', ';', ')' }, QUAL(COM2), },
+ { COM3, { '*' }, COM3, },
+ { COM3, { '/' }, S_COMMENT, },
+
+ /* saw / in / * comment, possible malformed nest */
+ { COM4, { C_XXX }, COM2, },
+ { COM4, { '*', '\n', C_EOF }, S_COMMENT, },
+ { COM4, { '/' }, COM4, },
+
+ /* saw / /, start of comment */
+ { COM5, { C_XXX }, COM5, },
+ { COM5, { '\n', C_EOF }, S_COMMENT, },
+ { COM5, { '/' }, COM6, },
+ { COM5, { '*' }, COM7, },
+
+ /* saw / in / / comment, possible malformed nest */
+ { COM6, { C_XXX }, COM5, },
+ { COM6, { '*', '\n', C_EOF }, S_COMMENT, },
+ { COM6, { '/' }, COM6, },
+
+ /* saw * in / /, possible malformed nest */
+ { COM7, { C_XXX }, COM5, },
+ { COM7, { '\n', C_EOF }, S_COMMENT, },
+ { COM7, { '*' }, COM7, },
+ { COM7, { '/' }, S_COMMENT, },
+
+ /* normal identifier -- always a macro candidate */
+ { NID, { C_XXX }, S_MACRO, },
+ { NID, { C_LET, C_DEC }, NID, },
+
+ /* saw ., operator or dbl constant */
+ { DOT, { C_XXX }, S_CHRB, },
+ { DOT, { '.' }, DOT2, },
+ { DOT, { C_DEC }, BAD1, },
+
+ /* saw .., possible ... */
+ { DOT2, { C_XXX }, BACK(T_INVALID), },
+ { DOT2, { '.' }, KEEP(T_VARIADIC), },
+
+ /* saw L (possible start of normal wide literal) */
+ { LIT, { C_XXX }, S_MACRO, },
+ { LIT, { C_LET, C_DEC }, NID, },
+ { LIT, { '"', '\'' }, QUAL(LIT1), },
+
+ /* saw " or ' beginning literal */
+ { LIT1, { C_XXX }, LIT1, },
+ { LIT1, { '"', '\'' }, S_LITEND, },
+ { LIT1, { '\n', C_EOF }, S_LITEND, },
+ { LIT1, { '\\' }, LIT2, },
+
+ /* saw \ in literal */
+ { LIT2, { C_XXX }, S_LITESC, },
+ { LIT2, { '\n', C_EOF }, S_LITEND, },
+
+ /* eat malformed numeric constant */
+ { BAD1, { C_XXX }, BACK(T_INVALID), },
+ { BAD1, { C_LET, C_DEC, '.' }, BAD1, },
+ { BAD1, { 'e', 'E' }, BAD2, },
+
+ /* eat malformed numeric fraction|exponent */
+ { BAD2, { C_XXX }, BACK(T_INVALID), },
+ { BAD2, { C_LET, C_DEC, '.' }, BAD1, },
+ { BAD2, { '+', '-' }, BAD1, },
+
+ /* saw white space, eat it up */
+ { WS1, { C_XXX }, S_WS, },
+ { WS1, { ' ', '\t' }, WS1, },
+ { WS1, { '\f', '\v' }, S_VS, },
+
+#if !PROTOMAIN
+
+ /* quick template */
+ { QUICK, { C_XXX }, QTOK, },
+ { QUICK, { C_EOF, MARK }, S_CHRB, },
+ { QUICK, { C_LET, C_DEC }, QID, },
+ { QUICK, { 'L' }, LIT0, },
+ { QUICK, { '"', '\'' }, S_LITBEG, },
+ { QUICK, { '/' }, S_CHRB, },
+ { QUICK, { '*' }, QCOM, },
+ { QUICK, { '#' }, SHARP1, },
+ { QUICK, { '\n' }, S_NL, },
+ { QUICK, { '\f', '\v' }, S_VS, },
+
+ /* copy QUICK to QUICK+1 through MAC0+1 */
+ { OP, {QUICK,QUICK+1,MAC0+1}, COPY, },
+
+ /* quick start state */
+ { QUICK, { C_EOF }, S_EOF, },
+ { QUICK, { C_DEC }, QNUM, },
+ { QUICK, { MARK }, QTOK, },
+ { QUICK, { '/' }, COM1, },
+ { QUICK, { ' ', '\t' }, QUICK, },
+
+ /* grab non-macro tokens */
+ { QTOK, { C_DEC }, QNUM, },
+
+ /* grab numeric and invalid tokens */
+ { QNUM, { C_LET, C_DEC, '.' }, QNUM, },
+ { QNUM, { 'e', 'E' }, QEXP, },
+
+ /* grab exponent token */
+ { QEXP, { C_LET, C_DEC, '.' }, QNUM, },
+ { QEXP, { '+', '-' }, QNUM, },
+
+ /* saw *, grab possible bad comment terminator */
+ { QCOM, { C_DEC }, QNUM, },
+ { QCOM, { '/' }, S_COMMENT, },
+
+ /* saw L (possible start of wide string or first macro char) */
+ { MAC0, { 'L' }, QID, },
+ { MAC0, { '"', '\'' }, QUAL(LIT1), },
+
+ /* macro candidate template */
+ { MAC0+1, { 'L' }, QID, },
+
+ /* copy MAC0+1 to MAC0+2 through MACN */
+ { OP, {MAC0+1,MAC0+2,MACN}, COPY },
+
+ /* saw L (possible start of wide string or macro L) */
+ { HIT0, { C_XXX }, S_MACRO, },
+ { HIT0, { C_LET, C_DEC }, QID, },
+ { HIT0, { '"', '\'' }, QUAL(LIT1), },
+
+ /* macro hit template */
+ { HIT0+1, { C_XXX }, S_MACRO, },
+ { HIT0+1, { C_LET, C_DEC }, QID, },
+
+ /* copy HIT0+1 to HIT0+2 through HITN */
+ { OP, {HIT0+1,HIT0+2,HITN}, COPY },
+
+ /* saw L (possible start of wide literal) */
+ { LIT0, { C_XXX }, S_MACRO, },
+ { LIT0, { C_LET, C_DEC }, QID, },
+ { LIT0, { '"', '\'' }, QUAL(LIT1), },
+
+ /* (!PROTOMAIN COM1) saw /, perhaps start of comment or /= */
+ { COM1, { '=' }, KEEP(T_DIVEQ), },
+
+ /* normal start state */
+ { TOKEN, { C_XXX }, S_HUH, },
+ { TOKEN, { C_EOF }, S_EOF, },
+ { TOKEN, { C_DEC }, DEC1, },
+ { TOKEN, { '0' }, OCT1, },
+ { TOKEN, { '.' }, DOT1, },
+ { TOKEN, { C_LET }, NID, },
+ { TOKEN, { 'L' }, LIT, },
+ { TOKEN, { '"', '\'', '<' }, S_LITBEG, },
+ { TOKEN, { '/' }, COM1, },
+ { TOKEN, { '\n' }, S_NL, },
+ { TOKEN, { ' ', '\t' }, WS1, },
+ { TOKEN, { '\f', '\v' }, S_VS, },
+ { TOKEN, { '#' }, SHARP1, },
+ { TOKEN, { ':' }, COLON1, },
+ { TOKEN, { '%' }, PCT1, },
+ { TOKEN, { '&' }, AND1, },
+ { TOKEN, { '*' }, STAR1, },
+ { TOKEN, { '+' }, PLUS1, },
+ { TOKEN, { '-' }, MINUS1, },
+ { TOKEN, { '=' }, EQ1, },
+ { TOKEN, { '!' }, NOT1, },
+ { TOKEN, { '>' }, GT1, },
+ { TOKEN, { '^' }, CIRC1, },
+ { TOKEN, { '|' }, OR1, },
+ { TOKEN, { '(', ')', '[', ']' }, S_CHR, },
+ { TOKEN, { '{', '}', ',', ';' }, S_CHR, },
+ { TOKEN, { '~', '?' }, S_CHR, },
+
+ /* saw 0, possible oct|hex|dec|dbl constant */
+ { OCT1, { C_XXX }, BACK(T_DECIMAL), },
+ { OCT1, { C_LET, C_DEC }, BAD1, },
+ { OCT1, { C_OCT }, OCT2, },
+ { OCT1, { 'e', 'E' }, DBL2, },
+ { OCT1, { 'l', 'L', 'u', 'U' }, QUAL(DEC2), },
+ { OCT1, { 'x', 'X' }, HEX1, },
+ { OCT1, { '.' }, DBL1, },
+
+ /* saw 0<oct>, oct constant */
+ { OCT2, { C_XXX }, BACK(T_OCTAL), },
+ { OCT2, { C_LET, C_DEC }, BAD1, },
+ { OCT2, { C_OCT }, OCT2, },
+ { OCT2, { 'e', 'E' }, DBL2, },
+ { OCT2, { 'l', 'L', 'u', 'U' }, QUAL(OCT3), },
+ { OCT2, { '.' }, DBL1, },
+
+ /* oct constant qualifier */
+ { OCT3, { C_XXX }, BACK(T_OCTAL), },
+ { OCT3, { C_LET, C_DEC, '.' }, BAD1, },
+ { OCT3, { 'l', 'L', 'u', 'U' }, QUAL(OCT3), },
+
+ /* saw 0 [xX], hex constant */
+ { HEX1, { C_XXX }, BACK(T_HEXADECIMAL), },
+ { HEX1, { C_LET }, BAD1, },
+ { HEX1, { C_HEX }, HEX1, },
+ { HEX1, { 'e', 'E' }, HEX3, },
+ { HEX1, { 'l', 'L', 'u', 'U' }, QUAL(HEX2), },
+ { HEX1, { '.' }, HEX4, },
+ { HEX1, { 'p', 'P' }, HEX5, },
+
+ /* hex constant qualifier */
+ { HEX2, { C_XXX }, BACK(T_HEXADECIMAL), },
+ { HEX2, { C_LET, C_DEC, '.' }, BAD1, },
+ { HEX2, { 'l', 'L', 'u', 'U' }, QUAL(HEX2), },
+
+ /* hex [eE][-+] botch */
+ { HEX3, { C_XXX }, BACK(T_HEXADECIMAL), },
+ { HEX3, { C_LET, '.', '-', '+'},BAD1, },
+ { HEX3, { C_HEX }, HEX1, },
+ { HEX3, { 'e', 'E' }, HEX3, },
+ { HEX3, { 'l', 'L', 'u', 'U' }, QUAL(HEX2), },
+
+ /* hex dbl fraction */
+ { HEX4, { C_XXX }, BACK(T_HEXDOUBLE), },
+ { HEX4, { C_LET, '.' }, BAD1, },
+ { HEX4, { C_HEX }, HEX4, },
+ { HEX4, { 'p', 'P' }, HEX5, },
+ { HEX4, { 'f', 'F', 'l', 'L' }, QUAL(HEX8), },
+
+ /* optional hex dbl exponent sign */
+ { HEX5, { C_XXX }, BACK(T_INVALID), },
+ { HEX5, { C_LET, '.' }, BAD1, },
+ { HEX5, { '+', '-' }, HEX6, },
+ { HEX5, { C_DEC }, HEX7, },
+
+ /* mandatory hex dbl exponent first digit */
+ { HEX6, { C_XXX }, BACK(T_INVALID), },
+ { HEX6, { C_LET, '.' }, BAD1, },
+ { HEX6, { C_DEC }, HEX7, },
+
+ /* hex dbl exponent digits */
+ { HEX7, { C_XXX }, BACK(T_HEXDOUBLE), },
+ { HEX7, { C_LET, '.' }, BAD1, },
+ { HEX7, { C_DEC }, HEX7, },
+ { HEX7, { 'f', 'F', 'l', 'L' }, QUAL(HEX8), },
+
+ /* hex dbl constant qualifier */
+ { HEX8, { C_XXX }, BACK(T_HEXDOUBLE), },
+ { HEX8, { C_LET, '.' }, BAD1, },
+ { HEX8, { 'f', 'F', 'l', 'L' }, QUAL(HEX8), },
+
+ /* saw <dec>, dec constant */
+ { DEC1, { C_XXX }, BACK(T_DECIMAL), },
+ { DEC1, { C_LET }, BAD1, },
+ { DEC1, { C_DEC }, DEC1, },
+ { DEC1, { 'e', 'E' }, DBL2, },
+ { DEC1, { 'l', 'L', 'u', 'U' }, QUAL(DEC2), },
+ { DEC1, { '.' }, DBL1, },
+
+ /* dec constant qualifier */
+ { DEC2, { C_XXX }, BACK(T_DECIMAL), },
+ { DEC2, { C_LET, C_DEC }, BAD1, },
+ { DEC2, { 'l', 'L', 'u', 'U' }, QUAL(DEC2), },
+
+ /* saw ., operator or dbl constant */
+ { DOT1, { C_XXX }, S_CHRB, },
+ { DOT1, { '.' }, DOT2, },
+ { DOT1, { C_DEC }, DBL1, },
+
+ /* dbl fraction */
+ { DBL1, { C_XXX }, BACK(T_DOUBLE), },
+ { DBL1, { C_LET, '.' }, BAD1, },
+ { DBL1, { C_DEC }, DBL1, },
+ { DBL1, { 'e', 'E' }, DBL2, },
+ { DBL1, { 'f', 'F', 'l', 'L' }, QUAL(DBL5), },
+
+ /* optional dbl exponent sign */
+ { DBL2, { C_XXX }, BACK(T_INVALID), },
+ { DBL2, { C_LET, '.' }, BAD1, },
+ { DBL2, { '+', '-' }, DBL3, },
+ { DBL2, { C_DEC }, DBL4, },
+
+ /* mandatory dbl exponent first digit */
+ { DBL3, { C_XXX }, BACK(T_INVALID), },
+ { DBL3, { C_LET, '.' }, BAD1, },
+ { DBL3, { C_DEC }, DBL4, },
+
+ /* dbl exponent digits */
+ { DBL4, { C_XXX }, BACK(T_DOUBLE), },
+ { DBL4, { C_LET, '.' }, BAD1, },
+ { DBL4, { C_DEC }, DBL4, },
+ { DBL4, { 'f', 'F', 'l', 'L' }, QUAL(DBL5), },
+
+ /* dbl constant qualifier */
+ { DBL5, { C_XXX }, BACK(T_DOUBLE), },
+ { DBL5, { C_LET, '.' }, BAD1, },
+ { DBL5, { 'f', 'F', 'l', 'L' }, QUAL(DBL5), },
+
+ /* saw < starting include header */
+ { HDR1, { C_XXX }, HDR1, },
+ { HDR1, { '>', '\n', C_EOF }, S_LITEND, },
+
+ /* saw <binop><space> expecting = */
+ { BIN1, { C_XXX }, S_HUH, },
+ { BIN1, { ' ', '\t' }, BIN1, },
+
+ /* 2-char ops */
+
+ { SHARP1, { C_XXX }, S_SHARP, },
+
+ { PCT1, { C_XXX }, S_CHRB, },
+ { PCT1, { '=' }, KEEP(T_MODEQ), },
+
+ { AND1, { C_XXX }, S_CHRB, },
+ { AND1, { '=' }, KEEP(T_ANDEQ), },
+ { AND1, { '&' }, KEEP(T_ANDAND), },
+
+ { STAR1, { C_XXX }, S_CHRB, },
+ { STAR1, { '=' }, KEEP(T_MPYEQ), },
+ { STAR1, { '/' }, S_COMMENT, },
+
+ { PLUS1, { C_XXX }, S_CHRB, },
+ { PLUS1, { '=' }, KEEP(T_ADDEQ), },
+ { PLUS1, { '+' }, KEEP(T_ADDADD), },
+
+ { MINUS1, { C_XXX }, S_CHRB, },
+ { MINUS1, { '=' }, KEEP(T_SUBEQ), },
+ { MINUS1, { '-' }, KEEP(T_SUBSUB), },
+ { MINUS1, { '>' }, KEEP(T_PTRMEM), },
+
+ { COLON1, { C_XXX }, S_CHRB, },
+ { COLON1, { '=', '>' }, S_HUH, },
+
+ { LT1, { C_XXX }, S_CHRB, },
+ { LT1, { '=' }, KEEP(T_LE), },
+ { LT1, { '<' }, LSH1, },
+
+ { EQ1, { C_XXX }, S_CHRB, },
+ { EQ1, { '=' }, KEEP(T_EQ), },
+
+ { NOT1, { C_XXX }, S_CHRB, },
+ { NOT1, { '=' }, KEEP(T_NE), },
+
+ { GT1, { C_XXX }, S_CHRB, },
+ { GT1, { '=' }, KEEP(T_GE), },
+ { GT1, { '>' }, RSH1, },
+
+ { CIRC1, { C_XXX }, S_CHRB, },
+ { CIRC1, { '=' }, KEEP(T_XOREQ), },
+
+ { OR1, { C_XXX }, S_CHRB, },
+ { OR1, { '=' }, KEEP(T_OREQ), },
+ { OR1, { '|' }, KEEP(T_OROR), },
+
+ /* 3-char ops */
+
+ { ARROW1, { C_XXX }, BACK(T_PTRMEM), },
+ { ARROW1, { '*' }, KEEP(T_PTRMEMREF), },
+
+ { LSH1, { C_XXX }, BACK(T_LSHIFT), },
+ { LSH1, { '=' }, KEEP(T_LSHIFTEQ), },
+
+ { RSH1, { C_XXX }, BACK(T_RSHIFT), },
+ { RSH1, { '=' }, KEEP(T_RSHIFTEQ), },
+
+#endif
+
+ /* end */
+ { OP, { 0 }, END, }
+};
+
+short fsm[TERMINAL+1][MAX+1];
+
+char trigraph[MAX+1];
+
+#if PROTOMAIN
+static char spl[] = { '\\', '\r', 0 };
+static char aln[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_$@";
+#else
+static char spl[] = { MARK, '?', '\\', '\r', CC_sub, 0 };
+static char aln[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_";
+#endif
+static char* let = &aln[10];
+static char hex[] = "fedcbaFEDCBA9876543210";
+static char* dec = &hex[12];
+static char* oct = &hex[14];
+
+/*
+ * runtime FSM modifications
+ * ppfsm(FSM_INIT,0) must be called first
+ */
+
+void
+ppfsm(int op, register char* s)
+{
+ register int c;
+ register int n;
+ register int i;
+ register short* rp;
+ register struct fsminit* fp;
+#if !PROTOMAIN
+ char* t;
+ int x;
+#endif
+
+ switch (op)
+ {
+
+#if !PROTOMAIN
+
+ case FSM_IDADD:
+ while (c = *s++)
+ if (!ppisid(c))
+ {
+ if (fsm[TOKEN][c] == ~S_HUH)
+ {
+ setid(c);
+ for (i = 0; i < TERMINAL; i++)
+ fsm[i][c] = IDSTATE(fsm[i]['_']);
+ }
+ else error(2, "%c: cannot add to identifier set", c);
+ }
+ break;
+
+ case FSM_IDDEL:
+ while (c = *s++)
+ if (ppisid(c))
+ {
+ clrid(c);
+ for (i = 0; i < TERMINAL; i++)
+ fsm[i][c] = ~S_HUH;
+ }
+ break;
+
+#endif
+
+ case FSM_INIT:
+ for (fp = fsminit;; fp++)
+ {
+ if ((n = fp->nextstate) >= TERMINAL) n = ~n;
+ if (fp->state == OP)
+ {
+#if !PROTOMAIN
+ switch (n)
+ {
+ case COPY:
+ c = fp->ch[0];
+ n = fp->ch[2];
+ for (i = fp->ch[1]; i <= n; i++)
+ copy(i, c);
+ continue;
+ default:
+ break;
+ }
+#endif
+ break;
+ }
+ rp = fsm[fp->state];
+ for (i = 0; i < sizeof(fp->ch) && (c = fp->ch[i]); i++)
+ {
+ switch (c)
+ {
+ case C_XXX:
+ for (c = 0; c <= MAX; c++)
+ rp[c] = n;
+ /*FALLTHROUGH*/
+
+ case C_EOF:
+ fsm[TERMINAL][fp->state+1] = n < 0 ? ~n : n;
+ continue;
+
+ case C_LET:
+ s = let;
+ break;
+
+ case C_HEX:
+ s = hex;
+ break;
+
+ case C_DEC:
+ s = dec;
+ break;
+
+ case C_OCT:
+ s = oct;
+ break;
+
+ default:
+ rp[c] = n;
+ continue;
+ }
+ while (c = *s++)
+ rp[c] = n;
+ }
+ }
+
+ /*
+ * install splice special cases
+ * and same non-terminal transitions
+ */
+
+ for (i = 0; i < TERMINAL; i++)
+ {
+ rp = fsm[i];
+ s = spl;
+ while (c = *s++)
+ if (c != MARK || !INCOMMENT(rp))
+ {
+ if (rp[c] >= 0) rp[c] = ~rp[c];
+ rp[c] &= ~SPLICE;
+ }
+ rp[EOB] = ~S_EOB;
+ for (c = 0; c <= MAX; c++)
+ if (rp[c] == i)
+ rp[c] = 0;
+ }
+ fsm[TERMINAL][0] = ~S_EOB;
+
+#if !PROTOMAIN
+
+ /*
+ * default character types
+ */
+
+ s = let;
+ while (c = *s++)
+ setid(c);
+ s = dec;
+ while (c = *s++)
+ setdig(c);
+ s = spl;
+ do setsplice(c = *s++); while (c);
+
+ /*
+ * trigraph map
+ */
+
+ trigraph['='] = '#';
+ trigraph['('] = '[';
+ trigraph['/'] = '\\';
+ trigraph[')'] = ']';
+ trigraph['\''] = '^';
+ trigraph['<'] = '{';
+ trigraph['!'] = '|';
+ trigraph['>'] = '}';
+ trigraph['-'] = '~';
+#endif
+ break;
+
+#if !PROTOMAIN
+
+ case FSM_PLUSPLUS:
+ if (pp.option & PLUSPLUS)
+ {
+ fsm[COLON1][':'] = ~KEEP(T_SCOPE);
+ fsm[DOT1]['*'] = ~KEEP(T_DOTREF);
+ fsm[MINUS1]['>'] = ARROW1;
+ fsm[COM1]['/'] = COM5;
+ t = "%<:";
+ for (i = 0; i < TERMINAL; i++)
+ {
+ rp = fsm[i];
+ if (!INCOMMENT(rp) && !INQUOTE(rp))
+ {
+ s = t;
+ while (c = *s++)
+ {
+ if (rp[c] > 0) rp[c] = ~rp[c];
+ else if (!rp[c]) rp[c] = ~i;
+ rp[c] &= ~SPLICE;
+ }
+ }
+ }
+ s = t;
+ while (c = *s++) setsplice(c);
+ }
+ else
+ {
+ fsm[COLON1][':'] = ~S_CHRB;
+ fsm[DOT1]['*'] = ~S_CHRB;
+ fsm[MINUS1]['>'] = ~KEEP(T_PTRMEM);
+ fsm[COM1]['/'] = (pp.option & PLUSCOMMENT) ? COM5 : ~S_CHRB;
+ }
+ break;
+
+#if COMPATIBLE
+
+ case FSM_COMPATIBILITY:
+ if (pp.state & COMPATIBILITY)
+ {
+ fsm[HEX1]['e'] = HEX1;
+ fsm[HEX1]['E'] = HEX1;
+ fsm[QNUM]['e'] = QNUM;
+ fsm[QNUM]['E'] = QNUM;
+ fsm[QNUM]['u'] = ~QUAL(QNUM);
+ fsm[QNUM]['U'] = ~QUAL(QNUM);
+ }
+ else
+ {
+ fsm[HEX1]['e'] = HEX3;
+ fsm[HEX1]['E'] = HEX3;
+ fsm[QNUM]['e'] = QEXP;
+ fsm[QNUM]['E'] = QEXP;
+ fsm[QNUM]['u'] = QNUM;
+ fsm[QNUM]['U'] = QNUM;
+ }
+ break;
+
+#endif
+
+ case FSM_QUOTADD:
+ while (c = *s++)
+ if (fsm[TOKEN][c] == ~S_HUH)
+ for (i = 0; i < TERMINAL; i++)
+ fsm[i][c] = fsm[i]['"'];
+ else error(2, "%c: cannot add to quote set", c);
+ break;
+
+ case FSM_QUOTDEL:
+ while (c = *s++)
+ if (c != '"' && fsm[TOKEN][c] == fsm[TOKEN]['"'])
+ for (i = 0; i < TERMINAL; i++)
+ fsm[i][c] = fsm[i]['_'];
+ break;
+
+ case FSM_OPSPACE:
+ n = s ? BIN1 : ~S_CHRB;
+ fsm[COM1][' '] = fsm[COM1]['\t'] = n;
+ fsm[AND1][' '] = fsm[AND1]['\t'] = n;
+ fsm[STAR1][' '] = fsm[STAR1]['\t'] = n;
+ fsm[PCT1][' '] = fsm[PCT1]['\t'] = n;
+ fsm[PLUS1][' '] = fsm[PLUS1]['\t'] = n;
+ fsm[MINUS1][' '] = fsm[MINUS1]['\t'] = n;
+ fsm[CIRC1][' '] = fsm[CIRC1]['\t'] = n;
+ fsm[OR1][' '] = fsm[OR1]['\t'] = n;
+ fsm[LSH1][' '] = fsm[LSH1]['\t'] = s ? BIN1 : ~BACK(T_LSHIFT);
+ fsm[RSH1][' '] = fsm[RSH1]['\t'] = s ? BIN1 : ~BACK(T_RSHIFT);
+ break;
+
+ case FSM_MACRO:
+ if (pp.truncate && strlen(s) >= pp.truncate)
+ {
+ x = s[pp.truncate];
+ s[pp.truncate] = 0;
+ }
+ else x = -1;
+ i = MAC0 + ((c = *s++) != 'L');
+ if ((n = fsm[QUICK][c]) != (i + NMAC))
+ {
+ n = i;
+ if (!*s) n += NMAC;
+ }
+ if (fsm[QUICK][c] != n)
+ fsm[QUICK][c] = fsm[QCOM][c] = fsm[QTOK][c] = n;
+ if (c = *s++)
+ {
+ for (;;)
+ {
+ if ((i = n) < HIT0)
+ {
+ if (n < MACN) n++;
+ if (!*s)
+ {
+ n += NMAC;
+ break;
+ }
+ if (fsm[i][c] < HIT0)
+ fsm[i][c] = n;
+ if (fsm[i + NMAC][c] < HIT0)
+ fsm[i + NMAC][c] = n;
+ }
+ else
+ {
+ if (n < HITN) n++;
+ if (!*s) break;
+ if (fsm[i][c] < HIT0)
+ {
+ n -= NMAC;
+ fsm[i][c] = n;
+ }
+ }
+ c = *s++;
+ }
+ if (x >= 0)
+ {
+ *s = x;
+ for (n = CHAR_MIN; n <= CHAR_MAX; n++)
+ if (ppisidig(n))
+ fsm[HITN][n] = HITN;
+ n = HITN;
+ }
+ if (fsm[i][c] < n)
+ fsm[i][c] = n;
+ if (i < HIT0 && fsm[i + NMAC][c] < n)
+ fsm[i + NMAC][c] = n;
+ }
+ break;
+
+#endif
+
+ }
+}
+
+#if !PROTOMAIN
+
+/*
+ * file buffer refill
+ * c is current input char
+ */
+
+void
+refill(register int c)
+{
+ if (pp.in->flags & IN_eof)
+ {
+ pp.in->nextchr--;
+ c = 0;
+ }
+ else
+ {
+ *((pp.in->nextchr = pp.in->buffer + PPBAKSIZ) - 1) = c;
+ c =
+#if PROTOTYPE
+ (pp.in->flags & IN_prototype) ? pppread(pp.in->nextchr) :
+#endif
+ read(pp.in->fd, pp.in->nextchr, PPBUFSIZ);
+ }
+ if (c > 0)
+ {
+ if (pp.in->nextchr[c - 1] == '\n') pp.in->flags |= IN_newline;
+ else pp.in->flags &= ~IN_newline;
+#if PROTOTYPE
+ if (!(pp.in->flags & IN_prototype))
+#endif
+ if (c < PPBUFSIZ && (pp.in->flags & IN_regular))
+ {
+ pp.in->flags |= IN_eof;
+ close(pp.in->fd);
+ pp.in->fd = -1;
+ }
+ }
+ else
+ {
+ if (c < 0)
+ {
+ error(ERROR_SYSTEM|3, "read error");
+ c = 0;
+ }
+ else if ((pp.in->flags ^ pp.in->prev->flags) & IN_c)
+ {
+ static char ket[] = { 0, '}', '\n', 0 };
+
+ pp.in->flags ^= IN_c;
+ pp.in->nextchr = ket + 1;
+ c = 2;
+ }
+ pp.in->flags |= IN_eof;
+ }
+#if CHECKPOINT
+ pp.in->buflen = c;
+#endif
+ pp.in->nextchr[c] = 0;
+ debug((-7, "refill(\"%s\") = %d = \"%-.*s%s\"", error_info.file, c, (c > 32 ? 32 : c), pp.in->nextchr, c > 32 ? "..." : ""));
+ if (pp.test & 0x0080)
+ sfprintf(sfstderr, "===== refill(\"%s\") = %d =====\n%s\n===== eob(\"%s\") =====\n", error_info.file, c, pp.in->nextchr, error_info.file);
+}
+
+#endif