1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
|
# Scanner for the input language used by TCLL1,
# an LL(1) parser generator).
# (written by Dr. Thomas W. Christopher)
#
global inputFile
global inputLine,inputLineNumber,inputColumn,eoiToken
global tokenTypes
procedure initScanner(filename)
inputFile := open(filename,"r") | fail
return
end
procedure scan()
local t,c,b
static whiteSpace,initIdChars,idChars
initial {
/inputFile:=&input
inputLineNumber:=0
inputColumn:=1
inputLine:=""
eoiToken:=&null
whiteSpace:=&ascii[1:34] #control ++ blank
initIdChars := &letters ++ '_'
idChars := &letters ++ &digits ++ '_'
tokenTypes := table()
t := [ ".","DOT",
":","COLON",
"=","EQ",
"|","BAR",
"(","LPAR",
")","RPAR",
"[","LBRACK",
"]","RBRACK",
"{","LBRACE",
"}","RBRACE",
"!","BANG"]
while tokenTypes[get(t)] := get(t)
}
if \eoiToken then return eoiToken
repeat inputLine ? {
tab(inputColumn)
tab(many(whiteSpace))
c := &pos
if any(initIdChars) then {
t := Token("ID",tab(many(idChars)),
inputLineNumber,c)
inputColumn := &pos
return t
} else
if b := tab(any('.:=()[]{}|!')) then {
inputColumn := &pos
return Token(tokenTypes[b],b,inputLineNumber,c)
} else
if ="#" | pos(0) then {
inputColumn := 1
inputLineNumber +:= 1
if not (inputLine := read(inputFile)) then {
eoiToken := Token("EOI","EOI",
inputLineNumber,1)
return eoiToken
}
} else
if ="\"" then {
if t := Token("ID",tab(find("\"")),
inputLineNumber,c) then {
move(1)
} else {
write("unterminated quote at ",
inputLineNumber," ",c)
t:=Token("ID",tab(many(~whiteSpace)),
inputLineNumber,c)
}
inputColumn := &pos
return t
} else
{
write("unexpected character: ",move(1),
" at ",inputLineNumber," ",c)
inputColumn := &pos
}
}
end
|