summaryrefslogtreecommitdiff
path: root/src/cmd/gc/bisonerrors
blob: 1f97fc8cec427610b332e149f1d06b9a2ef56538 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
#!/usr/bin/awk -f
# Copyright 2010 The Go Authors.  All rights reserved.
# Use of this source code is governed by a BSD-style
# license that can be found in the LICENSE file.

# This program implements the core idea from
#
#	Clinton L. Jeffery, Generating LR syntax error messages from examples,
#	ACM TOPLAS 25(5) (September 2003).  http://doi.acm.org/10.1145/937563.937566
# 
# It reads Bison's summary of a grammar followed by a file
# like go.errors, replacing lines beginning with % by the 
# yystate and yychar that will be active when an error happens
# while parsing that line.  
#
# Unlike the system described in the paper, the lines in go.errors
# give grammar symbol name lists, not actual program fragments.
# This is a little less programmer-friendly but doesn't require being
# able to run the text through lex.c.

BEGIN{
	bison = 1
	grammar = 0
	states = 0
}

# In Grammar section of y.output,
# record lhs and length of rhs for each rule.
bison && /^Grammar/ { grammar = 1 }
bison && /^(Terminals|state 0)/ { grammar = 0 }
grammar && NF>0 {
	if($2 != "|") {
		r = $2
		sub(/:$/, "", r)
	}
	rulelhs[$1] = r
	rulesize[$1] = NF-2
	if(rulesize[$1] == 1 && $3 == "%empty") {
		rulesize[$1] = 0
	}
	if(rulesize[$1] == 3 && $3 $4 $5 == "/*empty*/") {
		rulesize[$1] = 0
	}
}

# In state dumps, record shift/reduce actions.
bison && /^[Ss]tate 0/ { grammar = 0; states = 1 }

states && /^[Ss]tate / { state = $2 }
states { statetext[state] = statetext[state] $0 "\n" }

states && / shift/ {
	n = nshift[state]++
	if($0 ~ /and go to/)
		shift[state,n] = $7 # GNU Bison
	else
		shift[state,n] = $3 # Plan 9 Yacc
	shifttoken[state,n] = $1
	next
}
states && / (go to|goto)/ {
	n = nshift[state]++
	if($0 ~ /go to/)
		shift[state,n] = $5 # GNU Bison
	else
		shift[state,n] = $3 # Plan 9 Yacc
	shifttoken[state,n] = $1
	next
}
states && / reduce/ {
	n = nreduce[state]++
	if($0 ~ /reduce using rule/)
		reduce[state,n] = $5 # GNU Bison
	else
		reduce[state,n] = $3 # Plan 9 yacc
	reducetoken[state,n] = $1
	next
}

# Skip over the summary information printed by Plan 9 yacc.
/nonterminals$/,/^maximum spread/ { next }

# First // comment marks the beginning of the pattern file.
/^\/\// { bison = 0; grammar = 0; state = 0 }
bison { next }

# Treat % as first field on line as introducing a pattern (token sequence).
# Run it through the LR machine and print the induced "yystate, yychar,"
# at the point where the error happens.
$1 == "%" {
	nstack = 0
	state = 0
	f = 2
	tok = ""
	for(;;) {
		if(tok == "" && f <= NF) {
			tok = $f
			f++
		}
		found = 0
		for(j=0; j<nshift[state]; j++) {
			if(shifttoken[state,j] == tok) {
				# print "SHIFT " tok " " state " -> " shift[state,j]
				stack[nstack++] = state
				state = shift[state,j]
				found = 1
				tok = ""
				break
			}
		}
		if(found)
			continue
		for(j=0; j<nreduce[state]; j++) {
			t = reducetoken[state,j]
			if(t == tok || t == "$default" || t == ".") {
				stack[nstack++] = state
				rule = reduce[state,j]
				nstack -= rulesize[rule]
				state = stack[--nstack]
				lhs = rulelhs[rule]
				if(tok != "")
					--f
				tok = rulelhs[rule]
				# print "REDUCE " nstack " " state " " tok " rule " rule " size " rulesize[rule]
				found = 1
				break
			}
		}
		if(found)
			continue

		# No shift or reduce applied - found the error.
		printf("\t%s, %s,\n", state, tok);
		break
	}
	next
}

# Print other lines verbatim.
{print}