/* * yylex.h -- the lexical analyzer. * * This source file contains the lexical analyzer, yylex(), and its * support routines. It is built by inclusion in ../icont/tlex.c and * ../iconc/clex.c, with slight variations depending on whether "Iconc" * is defined. */ #if !defined(Iconc) #include "../h/esctab.h" #endif /* !Iconc */ /* * Prototypes. */ static int bufcmp (char *s); static struct toktab *findres (void); static struct toktab *getident (int ac,int *cc); static struct toktab *getnum (int ac,int *cc); static struct toktab *getstring (int ac,int *cc); static int setfilenm (int c); static int setlineno (void); #if !defined(Iconc) static int ctlesc (void); static int hexesc (void); static int octesc (int ac); #endif /* !Iconc */ #define isletter(s) (isupper(c) | islower(c)) #define tonum(c) (isdigit(c) ? (c - '0') : ((c & 037) + 9)) struct node tok_loc = {0, NULL, 0, 0}; /* "model" node containing location of current token */ struct str_buf lex_sbuf; /* string buffer for lexical analyzer */ /* * yylex - find the next token in the input stream, and return its token * type and value to the parser. * * Variables of interest: * * cc - character following last token. * nlflag - set if a newline was between the last token and the current token * lastend - set if the last token was an Ender. * lastval - when a semicolon is inserted and returned, lastval gets the * token value that would have been returned if the semicolon hadn't * been inserted. */ static struct toktab *lasttok = NULL; static int lastend = 0; static int eofflag = 0; static int cc = '\n'; int yylex() { register struct toktab *t; register int c; int n; int nlflag; static nodeptr lastval; static struct node semi_loc; if (lasttok != NULL) { /* * A semicolon was inserted and returned on the last call to yylex, * instead of going to the input, return lasttok and set the * appropriate variables. */ yylval = lastval; tok_loc = *lastval; t = lasttok; goto ret; } nlflag = 0; loop: c = cc; /* * Remember where a semicolon will go if we insert one. */ semi_loc.n_file = tok_loc.n_file; semi_loc.n_line = in_line; if (cc == '\n') --semi_loc.n_line; semi_loc.n_col = incol; /* * Skip whitespace and comments and process #line directives. */ while (c == Comment || isspace(c)) { if (c == '\n') { nlflag++; c = NextChar; if (c == Comment) { /* * Check for #line directive at start of line. */ if (('l' == (c = NextChar)) && ('i' == (c = NextChar)) && ('n' == (c = NextChar)) && ('e' == (c = NextChar))) { c = setlineno(); while ((c == ' ') || (c == '\t')) c = NextChar; if (c != EOF && c != '\n') c = setfilenm(c); } while (c != EOF && c != '\n') c = NextChar; } } else { if (c == Comment) { while (c != EOF && c != '\n') c = NextChar; } else { c = NextChar; } } } /* * A token is the next thing in the input. Set token location to * the current line and column. */ tok_loc.n_line = in_line; tok_loc.n_col = incol; if (c == EOF) { /* * End of file has been reached. Set eofflag, return T_Eof, and * set cc to EOF so that any subsequent scans also return T_Eof. */ if (eofflag++) { eofflag = 0; cc = '\n'; yylval = NULL; return 0; } cc = EOF; t = T_Eof; yylval = NULL; goto ret; } /* * Look at current input character to determine what class of token * is next and take the appropriate action. Note that the various * token gathering routines write a value into cc. */ if (isalpha(c) || (c == '_')) { /* gather ident or reserved word */ if ((t = getident(c, &cc)) == NULL) goto loop; } else if (isdigit(c) || (c == '.')) { /* gather numeric literal or "." */ if ((t = getnum(c, &cc)) == NULL) goto loop; } else if (c == '"' || c == '\'') { /* gather string or cset literal */ if ((t = getstring(c, &cc)) == NULL) goto loop; } else { /* gather longest legal operator */ if ((n = getopr(c, &cc)) == -1) goto loop; t = &(optab[n].tok); yylval = OpNode(n); } if (nlflag && lastend && (t->t_flags & Beginner)) { /* * A newline was encountered between the current token and the last, * the last token was an Ender, and the current token is a Beginner. * Return a semicolon and save the current token in lastval. */ lastval = yylval; lasttok = t; tok_loc = semi_loc; yylval = OpNode(semicol_loc); return SEMICOL; } ret: /* * Clear lasttok, set lastend if the token being returned is an * Ender, and return the token. */ lasttok = 0; lastend = t->t_flags & Ender; return (t->t_type); } /* * getident - gather an identifier beginning with ac. The character * following identifier goes in cc. */ static struct toktab *getident(ac, cc) int ac; int *cc; { register int c; register struct toktab *t; c = ac; /* * Copy characters into string space until a non-alphanumeric character * is found. */ do { AppChar(lex_sbuf, c); c = NextChar; } while (isalnum(c) || (c == '_')); *cc = c; /* * If the identifier is a reserved word, make a ResNode for it and return * the token value. Otherwise, install it with putid, make an * IdNode for it, and return. */ if ((t = findres()) != NULL) { lex_sbuf.endimage = lex_sbuf.strtimage; yylval = ResNode(t->t_type); return t; } else { yylval = IdNode(str_install(&lex_sbuf)); return (struct toktab *)T_Ident; } } /* * findres - if the string just copied into the string space by getident * is a reserved word, return a pointer to its entry in the token table. * Return NULL if the string isn't a reserved word. */ static struct toktab *findres() { register struct toktab *t; register char c; c = *lex_sbuf.strtimage; if (!islower(c)) return NULL; /* * Point t at first reserved word that starts with c (if any). */ if ((t = restab[c - 'a']) == NULL) return NULL; /* * Search through reserved words, stopping when a match is found * or when the current reserved word doesn't start with c. */ while (t->t_word[0] == c) { if (bufcmp(t->t_word)) return t; t++; } return NULL; } /* * bufcmp - compare a null terminated string to what is in the string buffer. */ static int bufcmp(s) char *s; { register char *s1; s1 = lex_sbuf.strtimage; while (s != '\0' && s1 < lex_sbuf.endimage && *s == *s1) { ++s; ++s1; } if (*s == '\0' && s1 == lex_sbuf.endimage) return 1; else return 0; } /* * getnum - gather a numeric literal starting with ac and put the * character following the literal into *cc. * * getnum also handles the "." operator, which is distinguished from * a numeric literal by what follows it. */ static struct toktab *getnum(ac, cc) int ac; int *cc; { register int c, r, state; int realflag, n, dummy; c = ac; if (c == '.') { r = 0; state = 7; realflag = 1; } else { r = tonum(c); state = 0; realflag = 0; } for (;;) { AppChar(lex_sbuf, c); c = NextChar; switch (state) { case 0: /* integer part */ if (isdigit(c)) { r = r * 10 + tonum(c); continue; } if (c == '.') { state = 1; realflag++; continue; } if (c == 'e' || c == 'E') { state = 2; realflag++; continue; } if (c == 'r' || c == 'R') { state = 5; if (r < 2 || r > 36) tfatal("invalid radix for integer literal", (char *)NULL); continue; } break; case 1: /* fractional part */ if (isdigit(c)) continue; if (c == 'e' || c == 'E') { state = 2; continue; } break; case 2: /* optional exponent sign */ if (c == '+' || c == '-') { state = 3; continue; } case 3: /* first digit after e, e+, or e- */ if (isdigit(c)) { state = 4; continue; } tfatal("invalid real literal", (char *)NULL); break; case 4: /* remaining digits after e */ if (isdigit(c)) continue; break; case 5: /* first digit after r */ if ((isdigit(c) || isletter(c)) && tonum(c) < r) { state = 6; continue; } tfatal("invalid integer literal", (char *)NULL); break; case 6: /* remaining digits after r */ if (isdigit(c) || isletter(c)) { if (tonum(c) >= r) { /* illegal digit for radix r */ tfatal("invalid digit in integer literal", (char *)NULL); r = tonum('z'); /* prevent more messages */ } continue; } break; case 7: /* token began with "." */ if (isdigit(c)) { state = 1; /* followed by digit is a real const */ realflag = 1; continue; } *cc = c; /* anything else is just a dot */ lex_sbuf.endimage--; /* remove dot (undo AppChar) */ n = getopr((int)'.', &dummy); yylval = OpNode(n); return &(optab[n].tok); } break; } *cc = c; if (realflag) { yylval = RealNode(str_install(&lex_sbuf)); return T_Real; } yylval = IntNode(str_install(&lex_sbuf)); return T_Int; } /* * getstring - gather a string literal starting with ac and place the * character following the literal in *cc. */ static struct toktab *getstring(ac, cc) int ac; int *cc; { register int c, sc; int sav_indx; int len; sc = ac; sav_indx = -1; c = NextChar; while (c != sc && c != '\n' && c != EOF) { /* * If a '_' is the last non-white space before a new-line, * we must remember where it is. */ if (c == '_') sav_indx = lex_sbuf.endimage - lex_sbuf.strtimage; else if (!isspace(c)) sav_indx = -1; if (c == Escape) { c = NextChar; if (c == EOF) break; #if defined(Iconc) AppChar(lex_sbuf, Escape); if (c == '^') { c = NextChar; if (c == EOF) break; AppChar(lex_sbuf, '^'); } #else /* Iconc */ if (isoctal(c)) c = octesc(c); else if (c == 'x') c = hexesc(); else if (c == '^') c = ctlesc(); else c = esctab[c]; #endif /* Iconc */ } AppChar(lex_sbuf, c); c = NextChar; /* * If a '_' is the last non-white space before a new-line, the * string continues at the first non-white space on the next line * and everything from the '_' to the end of this line is ignored. */ if (c == '\n' && sav_indx >= 0) { lex_sbuf.endimage = lex_sbuf.strtimage + sav_indx; while ((c = NextChar) != EOF && isspace(c)) ; } } if (c == sc) *cc = ' '; else { tfatal("unclosed quote", (char *)NULL); *cc = c; } len = lex_sbuf.endimage - lex_sbuf.strtimage; if (ac == '"') { /* a string literal */ yylval = StrNode(str_install(&lex_sbuf), len); return T_String; } else { /* a cset literal */ yylval = CsetNode(str_install(&lex_sbuf), len); return T_Cset; } } #if !defined(Iconc) /* * ctlesc - translate a control escape -- backslash followed by * caret and one character. */ static int ctlesc() { register int c; c = NextChar; if (c == EOF) return EOF; return (c & 037); } /* * octesc - translate an octal escape -- backslash followed by * one, two, or three octal digits. */ static int octesc(ac) int ac; { register int c, nc, i; c = 0; nc = ac; i = 1; do { c = (c << 3) | (nc - '0'); nc = NextChar; if (nc == EOF) return EOF; } while (isoctal(nc) && i++ < 3); PushChar(nc); return (c & 0377); } /* * hexesc - translate a hexadecimal escape -- backslash-x * followed by one or two hexadecimal digits. */ static int hexesc() { register int c, nc, i; c = 0; i = 0; while (i++ < 2) { nc = NextChar; if (nc == EOF) return EOF; if (nc >= 'a' && nc <= 'f') nc -= 'a' - 10; else if (nc >= 'A' && nc <= 'F') nc -= 'A' - 10; else if (isdigit(nc)) nc -= '0'; else { PushChar(nc); break; } c = (c << 4) | nc; } return c; } #endif /* !Iconc */ /* * setlineno - set line number from #line comment, return following char. */ static int setlineno() { register int c; while ((c = NextChar) == ' ' || c == '\t') ; if (c < '0' || c > '9') { tfatal("no line number in #line directive", ""); while (c != EOF && c != '\n') c = NextChar; return c; } in_line = 0; while (c >= '0' && c <= '9') { in_line = in_line * 10 + (c - '0'); c = NextChar; } return c; } /* * setfilenm - set file name from #line comment, return following char. */ static int setfilenm(c) register int c; { while (c == ' ' || c == '\t') c = NextChar; if (c != '"') { tfatal("'\"' missing from file name in #line directive", ""); while (c != EOF && c != '\n') c = NextChar; return c; } while ((c = NextChar) != '"' && c != EOF && c != '\n') AppChar(lex_sbuf, c); if (c == '"') { tok_loc.n_file = str_install(&lex_sbuf); return NextChar; } else { tfatal("'\"' missing from file name in #line directive", ""); return c; } } /* * nextchar - return the next character in the input. * * Called from the lexical analyzer; interfaces it to the preprocessor. */ int nextchar() { register int c; if ((c = peekc) != 0) { peekc = 0; return c; } c = ppch(); switch (c) { case EOF: if (incol) { c = '\n'; in_line++; incol = 0; peekc = EOF; break; } else { in_line = 0; incol = 0; break; } case '\n': in_line++; incol = 0; break; case '\t': incol = (incol | 7) + 1; break; case '\b': if (incol) incol--; break; default: incol++; } return c; }