summaryrefslogtreecommitdiff
path: root/src/parse/lex.hpp
blob: 7ff0003e1f570cc36a2d040c299163fb080a5b2a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
/*
 * MRustC - Rust Compiler
 * - By John Hodge (Mutabah/thePowersGang)
 *
 * parse/lex.hpp
 * - Lexer header
 */
#ifndef LEX_HPP_INCLUDED
#define LEX_HPP_INCLUDED

#include <string>
#include <fstream>
#include "tokenstream.hpp"

struct Codepoint {
    uint32_t    v;
    Codepoint(): v(0) { }
    Codepoint(uint32_t v): v(v) { }
    bool isspace() const;
    bool isdigit() const;
    bool isxdigit() const;
    bool operator==(char x) { return v == static_cast<uint32_t>(x); }
    bool operator!=(char x) { return v != static_cast<uint32_t>(x); }
    bool operator==(Codepoint x) { return v == x.v; }
    bool operator!=(Codepoint x) { return v != x.v; }
};
extern ::std::string& operator+=(::std::string& s, const Codepoint& cp);
extern ::std::ostream& operator<<(::std::ostream& s, const Codepoint& cp);

extern Token Lex_FindOperator(const ::std::string& s);
extern Token Lex_FindReservedWord(const ::std::string& s);

typedef Codepoint   uchar;

class Lexer:
    public TokenStream
{
    RcString    m_path;
    unsigned int m_line;
    unsigned int m_line_ofs;

    ::std::ifstream m_istream;
    bool    m_last_char_valid;
    Codepoint   m_last_char;
    ::std::vector<Token>    m_next_tokens;

    Ident::Hygiene m_hygiene;
public:
    Lexer(const ::std::string& filename);

    Position getPosition() const override;
    Ident::Hygiene realGetHygiene() const override;
    Token realGetToken() override;

private:
    Token getTokenInt();

    signed int getSymbol();
    Token getTokenInt_RawString(bool is_byte);
    Token getTokenInt_Identifier(Codepoint ch, Codepoint ch2='\0');
    double parseFloat(uint64_t whole);
    uint32_t parseEscape(char enclosing);

    void push_hygine() override {
        m_hygiene = Ident::Hygiene::new_scope_chained(m_hygiene);
    }
    void pop_hygine() override {
        m_hygiene = m_hygiene.get_parent();
    }

    void ungetc();
    Codepoint getc_num();
    Codepoint getc();
    Codepoint getc_cp();
    char getc_byte();

    class EndOfFile {};
};

#endif // LEX_HPP_INCLUDED