summaryrefslogtreecommitdiff
path: root/src/parse/lex.hpp
blob: a626a37495b8b337d7408d10b885f09d8b6a4874 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
/*
 * MRustC - Rust Compiler
 * - By John Hodge (Mutabah/thePowersGang)
 *
 * parse/lex.hpp
 * - Lexer header
 */
#ifndef LEX_HPP_INCLUDED
#define LEX_HPP_INCLUDED

#include <debug.hpp>
#include <serialise.hpp>
#include "../coretypes.hpp"
#include <string>
#include <fstream>

#include "../include/span.hpp"

#include "token.hpp"

namespace AST {
    class Module;
    class MetaItems;
}

/// State the parser needs to pass down via a second channel.
struct ParseState
{
    // Used for "for/if/while" to handle ambiguity
    bool disallow_struct_literal = false;
    // A debugging hook that disables expansion of macros
    bool no_expand_macros = false;
    
    ::AST::Module*  module = nullptr;
    ::AST::MetaItems*   parent_attrs = nullptr;
    
    ::AST::Module& get_current_mod() {
        assert(this->module);
        return *this->module;
    }
    
    friend ::std::ostream& operator<<(::std::ostream& os, const ParseState& ps) {
        os << "ParseState {";
        if(ps.disallow_struct_literal)  os << " disallow_struct_literal";
        if(ps.no_expand_macros)  os << " no_expand_macros";
        os << " }";
        return os;
    }
};

class TokenStream
{
    friend class TTLexer;   // needs access to internals to know what was consumed
    
    bool    m_cache_valid;
    Token   m_cache;
    ::std::vector<Token>    m_lookahead;
    ParseState  m_parse_state;
public:
    TokenStream();
    virtual ~TokenStream();
    Token   getToken();
    void    putback(Token tok);
    eTokenType  lookahead(unsigned int count);
    virtual Position getPosition() const = 0;
    
    ParseState& parse_state() { return m_parse_state; }
    
    ProtoSpan   start_span() const;
    Span    end_span(ProtoSpan ps) const;
    
protected:
    virtual Token   realGetToken() = 0;
private:
    Token innerGetToken();
};

class SavedParseState
{
    TokenStream&    m_lex;
    ParseState  m_state;
public:
    SavedParseState(TokenStream& lex, ParseState state):
        m_lex(lex),
        m_state(state)
    {
    }
    ~SavedParseState()
    {
        DEBUG("Restoring " << m_state);
        m_lex.parse_state() = m_state;
    }
};

#define SET_MODULE(lex, mod)    SavedParseState _sps(lex, lex.parse_state()); lex.parse_state().module = &(mod)
#define SET_ATTRS(lex, attrs)    SavedParseState _sps(lex, lex.parse_state()); lex.parse_state().parent_attrs = &(attrs)
#define SET_PARSE_FLAG(lex, flag)    SavedParseState _sps(lex, lex.parse_state()); lex.parse_state().flag = true
#define CLEAR_PARSE_FLAG(lex, flag)    SavedParseState _sps(lex, lex.parse_state()); lex.parse_state().flag = false
#define CHECK_PARSE_FLAG(lex, flag) (lex.parse_state().flag == true)

struct Codepoint {
    uint32_t    v;
    Codepoint(uint32_t v): v(v) { }
};
extern ::std::string& operator+=(::std::string& s, const Codepoint& cp);

class Lexer:
    public TokenStream
{
    RcString    m_path;
    unsigned int m_line;
    unsigned int m_line_ofs;

    ::std::ifstream m_istream;
    bool    m_last_char_valid;
    char    m_last_char;
    Token   m_next_token;   // Used when lexing generated two tokens
public:
    Lexer(const ::std::string& filename);

    virtual Position getPosition() const override;
    virtual Token realGetToken() override;

private:
    Token getTokenInt();
    
    signed int getSymbol();
    Token getTokenInt_RawString(bool is_byte);
    Token getTokenInt_Identifier(char ch, char ch2='\0');
    double parseFloat(uint64_t whole);
    uint32_t parseEscape(char enclosing);

    char getc();
    char getc_num();
    Codepoint getc_codepoint();
    void ungetc();

    class EndOfFile {};
};

#endif // LEX_HPP_INCLUDED