/* * This file contains routines for building tokens out of characters from a * "character source". This source is the top element on the source stack. */ #include "../preproc/preproc.h" #include "../preproc/ptoken.h" /* * Prototypes for static functions. */ static int pp_tok_id (char *s); static struct token *chck_wh_sp (struct char_src *cs); static struct token *pp_number (void); static struct token *char_str (int delim, int tok_id); static struct token *hdr_tok (int delim, int tok_id, struct char_src *cs); int whsp_image = NoSpelling; /* indicate what is in white space tokens */ struct token *zero_tok; /* token for literal 0 */ struct token *one_tok; /* token for literal 1 */ #include "../preproc/pproto.h" /* * IsWhSp(c) - true if c is a white space character. */ #define IsWhSp(c) (c == ' ' || c == '\n' || c == '\t' || c == '\v' || c == '\f') /* * AdvChar() - advance to next character from buffer, filling the buffer * if needed. */ #define AdvChar() \ if (++next_char == last_char) \ fill_cbuf(); static int line; /* current line number */ static char *fname; /* current file name */ static struct str_buf tknize_sbuf; /* string buffer */ /* * List of preprocessing directives and the corresponding token ids. */ static struct rsrvd_wrd pp_rsrvd[] = { PPDirectives {"if", PpIf}, {"else", PpElse}, {"ifdef", PpIfdef}, {"ifndef", PpIfndef}, {"elif", PpElif}, {"endif", PpEndif}, {"include", PpInclude}, {"define", PpDefine}, {"undef", PpUndef}, {"begdef", PpBegdef}, {"enddef", PpEnddef}, {"line", PpLine}, {"error", PpError}, {"pragma", PpPragma}, {NULL, Invalid}}; /* * init_tok - initialize tokenizer. */ void init_tok() { struct rsrvd_wrd *rw; static int first_time = 1; if (first_time) { first_time = 0; init_sbuf(&tknize_sbuf); /* initialize string buffer */ /* * install reserved words into the string table */ for (rw = pp_rsrvd; rw->s != NULL; ++rw) rw->s = spec_str(rw->s); zero_tok = new_token(PpNumber, spec_str("0"), "", 0); one_tok = new_token(PpNumber, spec_str("1"), "", 0); } } /* * pp_tok_id - see if s in the name of a preprocessing directive. */ static int pp_tok_id(s) char *s; { struct rsrvd_wrd *rw; for (rw = pp_rsrvd; rw->s != NULL && rw->s != s; ++rw) ; return rw->tok_id; } /* * chk_eq_sign - look ahead to next character to see if it is an equal sign. * It is used for processing -D options. */ int chk_eq_sign() { if (*next_char == '=') { AdvChar(); return 1; } else return 0; } /* * chck_wh_sp - If the input is at white space, construct a white space token * and return it, otherwise return NULL. This function also helps keeps track * of preprocessor directive boundaries. */ static struct token *chck_wh_sp(cs) struct char_src *cs; { register int c1, c2; struct token *t; int tok_id; /* * See if we are at white space or a comment. */ c1 = *next_char; if (!IsWhSp(c1) && (c1 != '/' || next_char[1] != '*')) return NULL; /* * Fine the line number of the current character in the line number * buffer, and correct it if we have encountered any #line directives. */ line = cs->line_buf[next_char - first_char] + cs->line_adj; if (c1 == '\n') --line; /* a new-line really belongs to the previous line */ tok_id = WhiteSpace; for (;;) { if (IsWhSp(c1)) { /* * The next character is a white space. If we are retaining the * image of the white space in the token, copy the character to * the string buffer. If we are in the midst of a preprocessor * directive and find a new-line, indicate the end of the * the directive. */ AdvChar(); if (whsp_image != NoSpelling) AppChar(tknize_sbuf, c1); if (c1 == '\n') { if (cs->dir_state == Within) tok_id = PpDirEnd; cs->dir_state = CanStart; if (tok_id == PpDirEnd) break; } } else if (c1 == '/' && next_char[1] == '*') { /* * Start of comment. If we are retaining the image of comments, * copy the characters into the string buffer. */ if (whsp_image == FullImage) { AppChar(tknize_sbuf, '/'); AppChar(tknize_sbuf, '*'); } AdvChar(); AdvChar(); /* * Look for the end of the comment. */ c1 = *next_char; c2 = next_char[1]; while (c1 != '*' || c2 != '/') { if (c1 == EOF) errfl1(fname, line, "eof encountered in comment"); AdvChar(); if (whsp_image == FullImage) AppChar(tknize_sbuf, c1); c1 = c2; c2 = next_char[1]; } /* * Determine if we are retaining the image of a comment, replacing * a comment by one space character, or ignoring comments. */ if (whsp_image == FullImage) { AppChar(tknize_sbuf, '*'); AppChar(tknize_sbuf, '/'); } else if (whsp_image == NoComment) AppChar(tknize_sbuf, ' '); AdvChar(); AdvChar(); } else break; /* end of white space */ c1 = *next_char; } /* * If we are not retaining the image of white space, replace it all * with one space character. */ if (whsp_image == NoSpelling) AppChar(tknize_sbuf, ' '); t = new_token(tok_id, str_install(&tknize_sbuf), fname, line); /* * Look ahead to see if a ## operator is next. */ if (*next_char == '#' && next_char[1] == '#') if (tok_id == PpDirEnd) errt1(t, "## expressions must not cross directive boundaries"); else { /* * Discard white space before a ## operator. */ free_t(t); return NULL; } return t; } /* * pp_number - Create a token for a preprocessing number (See ANSI C Standard * for the syntax of such a number). */ static struct token *pp_number() { register int c; c = *next_char; for (;;) { if (c == 'e' || c == 'E') { AppChar(tknize_sbuf, c); AdvChar(); c = *next_char; if (c == '+' || c == '-') { AppChar(tknize_sbuf, c); AdvChar(); c = *next_char; } } else if (isdigit(c) || c == '.' || islower(c) || isupper(c) || c == '_') { AppChar(tknize_sbuf, c); AdvChar(); c = *next_char; } else { return new_token(PpNumber, str_install(&tknize_sbuf), fname, line); } } } /* * char_str - construct a token for a character constant or string literal. */ static struct token *char_str(delim, tok_id) int delim; int tok_id; { register int c; for (c = *next_char; c != EOF && c != '\n' && c != delim; c = *next_char) { AppChar(tknize_sbuf, c); if (c == '\\') { c = next_char[1]; if (c == EOF || c == '\n') break; else { AppChar(tknize_sbuf, c); AdvChar(); } } AdvChar(); } if (c == EOF) errfl1(fname, line, "End-of-file encountered within a literal"); if (c == '\n') errfl1(fname, line, "New-line encountered within a literal"); AdvChar(); return new_token(tok_id, str_install(&tknize_sbuf), fname, line); } /* * hdr_tok - create a token for an #include header. The delimiter may be * > or ". */ static struct token *hdr_tok(delim, tok_id, cs) int delim; int tok_id; struct char_src *cs; { register int c; line = cs->line_buf[next_char - first_char] + cs->line_adj; AdvChar(); for (c = *next_char; c != delim; c = *next_char) { if (c == EOF) errfl1(fname, line, "End-of-file encountered within a header name"); if (c == '\n') errfl1(fname, line, "New-line encountered within a header name"); AppChar(tknize_sbuf, c); AdvChar(); } AdvChar(); return new_token(tok_id, str_install(&tknize_sbuf), fname, line); } /* * tokenize - return the next token from the character source on the top * of the source stack. */ struct token *tokenize() { struct char_src *cs; struct token *t1, *t2; register int c; int tok_id; cs = src_stack->u.cs; /* * Check to see if the last call left a token from a look ahead. */ if (cs->tok_sav != NULL) { t1 = cs->tok_sav; cs->tok_sav = NULL; return t1; } if (*next_char == EOF) return NULL; /* * Find the current line number and file name for the character * source and check for white space. */ line = cs->line_buf[next_char - first_char] + cs->line_adj; fname = cs->fname; if ((t1 = chck_wh_sp(cs)) != NULL) return t1; c = *next_char; /* look at next character */ AdvChar(); /* * If the last thing we saw in this character source was white space * containing a new-line, then we must look for the start of a * preprocessing directive. */ if (cs->dir_state == CanStart) { cs->dir_state = Reset; if (c == '#' && *next_char != '#') { /* * Assume we are within a preprocessing directive and check * for white space to discard. */ cs->dir_state = Within; if ((t1 = chck_wh_sp(cs)) != NULL) if (t1->tok_id == PpDirEnd) { /* * We found a new-line, this is a null preprocessor directive. */ cs->tok_sav = t1; AppChar(tknize_sbuf, '#'); return new_token(PpNull, str_install(&tknize_sbuf), fname, line); } else free_t(t1); /* discard white space */ c = *next_char; if (islower(c) || isupper(c) || c == '_') { /* * Tokenize the identifier following the # */ t1 = tokenize(); if ((tok_id = pp_tok_id(t1->image)) == Invalid) { /* * We have a stringizing operation, not a preprocessing * directive. */ cs->dir_state = Reset; cs->tok_sav = t1; AppChar(tknize_sbuf, '#'); return new_token('#', str_install(&tknize_sbuf), fname, line); } else { t1->tok_id = tok_id; if (tok_id == PpInclude) { /* * A header name has to be tokenized specially. Find * it, then save the token. */ if ((t2 = chck_wh_sp(cs)) != NULL) if (t2->tok_id == PpDirEnd) errt1(t2, "file name missing from #include"); else free_t(t2); c = *next_char; if (c == '"') cs->tok_sav = hdr_tok('"', StrLit, cs); else if (c == '<') cs->tok_sav = hdr_tok('>', PpHeader, cs); } /* * Return the token indicating the kind of preprocessor * directive we have started. */ return t1; } } else errfl1(fname, line, "# must be followed by an identifier or keyword"); } } /* * Check for literals containing wide characters. */ if (c == 'L') { if (*next_char == '\'') { AdvChar(); t1 = char_str('\'', LCharConst); if (t1->image[0] == '\0') errt1(t1, "invalid character constant"); return t1; } else if (*next_char == '"') { AdvChar(); return char_str('"', LStrLit); } } /* * Check for identifier. */ if (islower(c) || isupper(c) || c == '_') { AppChar(tknize_sbuf, c); c = *next_char; while (islower(c) || isupper(c) || isdigit(c) || c == '_') { AppChar(tknize_sbuf, c); AdvChar(); c = *next_char; } return new_token(Identifier, str_install(&tknize_sbuf), fname, line); } /* * Check for number. */ if (isdigit(c)) { AppChar(tknize_sbuf, c); return pp_number(); } /* * Check for character constant. */ if (c == '\'') { t1 = char_str(c, CharConst); if (t1->image[0] == '\0') errt1(t1, "invalid character constant"); return t1; } /* * Check for string constant. */ if (c == '"') return char_str(c, StrLit); /* * Check for operators and punctuation. Anything that does not fit these * categories is a single character token. */ AppChar(tknize_sbuf, c); switch (c) { case '.': c = *next_char; if (isdigit(c)) { /* * Number */ AppChar(tknize_sbuf, c); AdvChar(); return pp_number(); } else if (c == '.' && next_char[1] == '.') { /* * ... */ AdvChar(); AdvChar(); AppChar(tknize_sbuf, '.'); AppChar(tknize_sbuf, '.'); return new_token(Ellipsis, str_install(&tknize_sbuf), fname, line); } else return new_token('.', str_install(&tknize_sbuf), fname, line); case '+': c = *next_char; if (c == '+') { /* * ++ */ AppChar(tknize_sbuf, '+'); AdvChar(); return new_token(Incr, str_install(&tknize_sbuf), fname, line); } else if (c == '=') { /* * += */ AppChar(tknize_sbuf, '='); AdvChar(); return new_token(PlusAsgn, str_install(&tknize_sbuf), fname, line); } else return new_token('+', str_install(&tknize_sbuf), fname, line); case '-': c = *next_char; if (c == '>') { /* * -> */ AppChar(tknize_sbuf, '>'); AdvChar(); return new_token(Arrow, str_install(&tknize_sbuf), fname, line); } else if (c == '-') { /* * -- */ AppChar(tknize_sbuf, '-'); AdvChar(); return new_token(Decr, str_install(&tknize_sbuf), fname, line); } else if (c == '=') { /* * -= */ AppChar(tknize_sbuf, '='); AdvChar(); return new_token(MinusAsgn, str_install(&tknize_sbuf), fname, line); } else return new_token('-', str_install(&tknize_sbuf), fname, line); case '<': c = *next_char; if (c == '<') { AppChar(tknize_sbuf, '<'); AdvChar(); if (*next_char == '=') { /* * <<= */ AppChar(tknize_sbuf, '='); AdvChar(); return new_token(LShftAsgn, str_install(&tknize_sbuf), fname, line); } else /* * << */ return new_token(LShft, str_install(&tknize_sbuf), fname, line); } else if (c == '=') { /* * <= */ AppChar(tknize_sbuf, '='); AdvChar(); return new_token(Leq, str_install(&tknize_sbuf), fname, line); } else return new_token('<', str_install(&tknize_sbuf), fname, line); case '>': c = *next_char; if (c == '>') { AppChar(tknize_sbuf, '>'); AdvChar(); if (*next_char == '=') { /* * >>= */ AppChar(tknize_sbuf, '='); AdvChar(); return new_token(RShftAsgn, str_install(&tknize_sbuf), fname, line); } else /* * >> */ return new_token(RShft, str_install(&tknize_sbuf), fname, line); } else if (c == '=') { /* * >= */ AppChar(tknize_sbuf, '='); AdvChar(); return new_token(Geq, str_install(&tknize_sbuf), fname, line); } else return new_token('>', str_install(&tknize_sbuf), fname, line); case '=': if (*next_char == '=') { /* * == */ AppChar(tknize_sbuf, '='); AdvChar(); return new_token(TokEqual, str_install(&tknize_sbuf), fname, line); } else return new_token('=', str_install(&tknize_sbuf), fname, line); case '!': if (*next_char == '=') { /* * != */ AppChar(tknize_sbuf, '='); AdvChar(); return new_token(Neq, str_install(&tknize_sbuf), fname, line); } else return new_token('!', str_install(&tknize_sbuf), fname, line); case '&': c = *next_char; if (c == '&') { /* * && */ AppChar(tknize_sbuf, '&'); AdvChar(); return new_token(And, str_install(&tknize_sbuf), fname, line); } else if (c == '=') { /* * &= */ AppChar(tknize_sbuf, '='); AdvChar(); return new_token(AndAsgn, str_install(&tknize_sbuf), fname, line); } else return new_token('&', str_install(&tknize_sbuf), fname, line); case '|': c = *next_char; if (c == '|') { /* * || */ AppChar(tknize_sbuf, '|'); AdvChar(); return new_token(Or, str_install(&tknize_sbuf), fname, line); } else if (c == '=') { /* * |= */ AppChar(tknize_sbuf, '='); AdvChar(); return new_token(OrAsgn, str_install(&tknize_sbuf), fname, line); } else return new_token('|', str_install(&tknize_sbuf), fname, line); case '*': if (*next_char == '=') { /* * *= */ AppChar(tknize_sbuf, '='); AdvChar(); return new_token(MultAsgn, str_install(&tknize_sbuf), fname, line); } else return new_token('*', str_install(&tknize_sbuf), fname, line); case '/': if (*next_char == '=') { /* * /= */ AppChar(tknize_sbuf, '='); AdvChar(); return new_token(DivAsgn, str_install(&tknize_sbuf), fname, line); } else return new_token('/', str_install(&tknize_sbuf), fname, line); case '%': if (*next_char == '=') { /* * &= */ AppChar(tknize_sbuf, '='); AdvChar(); return new_token(ModAsgn, str_install(&tknize_sbuf), fname, line); } else return new_token('%', str_install(&tknize_sbuf), fname, line); case '^': if (*next_char == '=') { /* * ^= */ AppChar(tknize_sbuf, '='); AdvChar(); return new_token(XorAsgn, str_install(&tknize_sbuf), fname, line); } else return new_token('^', str_install(&tknize_sbuf), fname, line); case '#': /* * Token pasting or stringizing operator. */ if (*next_char == '#') { /* * ## */ AppChar(tknize_sbuf, '#'); AdvChar(); t1 = new_token(PpPaste, str_install(&tknize_sbuf), fname, line); } else t1 = new_token('#', str_install(&tknize_sbuf), fname, line); /* * The operand must be in the same preprocessing directive. */ if ((t2 = chck_wh_sp(cs)) != NULL) if (t2->tok_id == PpDirEnd) errt2(t2, t1->image, " preprocessing expression must not cross directive boundary"); else free_t(t2); return t1; default: return new_token(c, str_install(&tknize_sbuf), fname, line); } }