summaryrefslogtreecommitdiff
path: root/src/parse/lex.hpp
blob: 522d2d31806728d315badaecb922cb61e6941948 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
/*
 * MRustC - Rust Compiler
 * - By John Hodge (Mutabah/thePowersGang)
 *
 * parse/lex.hpp
 * - Lexer header
 */
#ifndef LEX_HPP_INCLUDED
#define LEX_HPP_INCLUDED

#include "../types.hpp"
#include <string>
#include <fstream>

enum eTokenType
{
    #define _(t)    t,
    #include "eTokenType.enum.h"
    #undef _
};

struct Position
{
    ::std::string   filename;
    unsigned int    line;
    
    Position():
        filename(""),
        line(0)
    {}
    Position(::std::string filename, unsigned int line):
        filename(filename),
        line(line)
    {
    }
};
extern ::std::ostream& operator<<(::std::ostream& os, const Position& p);

class Token:
    public Serialisable
{
    enum eTokenType m_type;
    ::std::string   m_str;
    enum eCoreType  m_datatype;
    union {
        uint64_t    m_intval;
        double  m_floatval;
    };
    Position    m_pos;
public:
    Token();
    Token(const Token& t) = default;
    Token& operator =(const Token& t) = default;
    Token(Token&& t):
        m_type(t.m_type),
        m_str( ::std::move(t.m_str) ),
        m_datatype( t.m_datatype ),
        m_intval( t.m_intval ),
        m_pos( ::std::move(t.m_pos) )
    {
        t.m_type = TOK_NULL;
    }
    Token(enum eTokenType type);
    Token(enum eTokenType type, ::std::string str);
    Token(uint64_t val, enum eCoreType datatype);
    Token(double val, enum eCoreType datatype);

    enum eTokenType type() const { return m_type; }
    const ::std::string& str() const { return m_str; }
    enum eCoreType  datatype() const { return m_datatype; }
    uint64_t intval() const { return m_intval; }
    double floatval() const { return m_floatval; }
    bool operator==(const Token& r) const {
        if(type() != r.type())
            return false;
        switch(type())
        {
        case TOK_STRING:
        case TOK_IDENT:
        case TOK_LIFETIME:
            return str() == r.str();
        case TOK_INTEGER:
            return intval() == r.intval() && datatype() == r.datatype();
        case TOK_FLOAT:
            return floatval() == r.floatval() && datatype() == r.datatype();
        default:
            return true;
        }
    }
    bool operator!=(const Token& r) { return !(*this == r); }

    ::std::string to_str() const;
    
    void set_pos(Position pos) { m_pos = pos; }
    const Position& get_pos() const { return m_pos; }
    
    static const char* typestr(enum eTokenType type);
    static eTokenType typefromstr(const ::std::string& s);
    
    SERIALISABLE_PROTOTYPES();
};

extern ::std::ostream&  operator<<(::std::ostream& os, const Token& tok);

/// State the parser needs to pass down via a second channel.
struct ParseState
{
    // Used for "for/if/while" to handle ambiguity
    bool disallow_struct_literal = false;
    // A debugging hook that disables expansion of macros
    bool no_expand_macros = false;
    
    friend ::std::ostream& operator<<(::std::ostream& os, const ParseState& ps) {
        os << "ParseState {";
        if(ps.disallow_struct_literal)  os << " disallow_struct_literal";
        if(ps.no_expand_macros)  os << " no_expand_macros";
        os << " }";
        return os;
    }
};

class TokenStream
{
    friend class TTLexer;   // needs access to internals to know what was consumed
    
    bool    m_cache_valid;
    Token   m_cache;
    ::std::vector<Token>    m_lookahead;
    ParseState  m_parse_state;
public:
    TokenStream();
    virtual ~TokenStream();
    Token   getToken();
    void    putback(Token tok);
    eTokenType  lookahead(unsigned int count);
    virtual Position getPosition() const = 0;
    
    ParseState& parse_state() { return m_parse_state; }
    
protected:
    virtual Token   realGetToken() = 0;
private:
    Token innerGetToken();
};

class SavedParseState
{
    TokenStream&    m_lex;
    ParseState  m_state;
public:
    SavedParseState(TokenStream& lex, ParseState state):
        m_lex(lex),
        m_state(state)
    {
    }
    ~SavedParseState()
    {
        DEBUG("Restoring " << m_state);
        m_lex.parse_state() = m_state;
    }
};

#define SET_PARSE_FLAG(lex, flag)    SavedParseState _sps(lex, lex.parse_state()); lex.parse_state().flag = true
#define CLEAR_PARSE_FLAG(lex, flag)    SavedParseState _sps(lex, lex.parse_state()); lex.parse_state().flag = false
#define CHECK_PARSE_FLAG(lex, flag) (lex.parse_state().flag == true)

class Lexer:
    public TokenStream
{
    ::std::string   m_path;
    unsigned int m_line;

    ::std::ifstream m_istream;
    bool    m_last_char_valid;
    char    m_last_char;
    Token   m_next_token;   // Used when lexing generated two tokens
public:
    Lexer(::std::string filename);

    virtual Position getPosition() const override;
    virtual Token realGetToken() override;

private:
    Token getTokenInt();
    
    signed int getSymbol();
    double parseFloat(uint64_t whole);
    uint32_t parseEscape(char enclosing);

    char getc();
    char getc_num();
    void ungetc();

    class EndOfFile {};
};

#endif // LEX_HPP_INCLUDED