src/cmd/gc/bisonerrors


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124

#!/usr/bin/awk -f
# Copyright 2010 The Go Authors.  All rights reserved.
# Use of this source code is governed by a BSD-style
# license that can be found in the LICENSE file.

# This program implements the core idea from
#
#	Clinton L. Jeffery, Generating LR syntax error messages from examples,
#	ACM TOPLAS 25(5) (September 2003).  http://doi.acm.org/10.1145/937563.937566
# 
# It reads Bison's summary of a grammar followed by a file
# like go.errors, replacing lines beginning with % by the 
# yystate and yychar that will be active when an error happens
# while parsing that line.  
#
# Unlike the system described in the paper, the lines in go.errors
# give grammar symbol name lists, not actual program fragments.
# This is a little less programmer-friendly but doesn't require being
# able to run the text through lex.c.

BEGIN{
	bison = 1
	grammar = 0
	states = 0
}

# In Grammar section of y.output,
# record lhs and length of rhs for each rule.
bison && /^Grammar/ { grammar = 1 }
bison && /^(Terminals|state 0)/ { grammar = 0 }
grammar && NF>0 {
	if($2 != "|") {
		r = $2
		sub(/:$/, "", r)
	}
	rulelhs[$1] = r
	rulesize[$1] = NF-2
	if(rulesize[$1] == 3 && $3 $4 $5 == "/*empty*/") {
		rulesize[$1] = 0
	}
}

# In state dumps, record shift/reduce actions.
bison && /^state 0/ { grammar = 0; states = 1 }

states && /^state / { state = $2 }
states { statetext[state] = statetext[state] $0 "\n" }

states && / shift, and go to state/ {
	n = nshift[state]++
	shift[state,n] = $7
	shifttoken[state,n] = $1
	next
}
states && / go to state/ {
	n = nshift[state]++
	shift[state,n] = $5
	shifttoken[state,n] = $1
	next
}
states && / reduce using rule/ {
	n = nreduce[state]++
	reduce[state,n] = $5
	reducetoken[state,n] = $1
	next
}	

# First // comment marks the beginning of the pattern file.
/^\/\// { bison = 0; grammar = 0; state = 0 }
bison { next }

# Treat % as first field on line as introducing a pattern (token sequence).
# Run it through the LR machine and print the induced "yystate, yychar,"
# at the point where the error happens.
$1 == "%" {
	nstack = 0
	state = 0
	f = 2
	tok = ""
	for(;;) {
		if(tok == "" && f <= NF) {
			tok = $f
			f++
		}
		found = 0
		for(j=0; j<nshift[state]; j++) {
			if(shifttoken[state,j] == tok) {
				# print "SHIFT " tok " " state " -> " shift[state,j]
				stack[nstack++] = state
				state = shift[state,j]
				found = 1
				tok = ""
				break
			}
		}
		if(found)
			continue
		for(j=0; j<nreduce[state]; j++) {
			if(reducetoken[state,j] == tok || reducetoken[state,j] == "$default") {
				stack[nstack++] = state
				rule = reduce[state,j]
				nstack -= rulesize[rule]
				state = stack[--nstack]
				lhs = rulelhs[rule]
				if(tok != "")
					--f
				tok = rulelhs[rule]
				# print "REDUCE " nstack " " state " " tok " rule " rule " size " rulesize[rule]
				found = 1
				break
			}
		}
		if(found)
			continue

		# No shift or reduce applied - found the error.
		printf("\t%s, %s,\n", state, tok);
		break
	}
	next
}

# Print other lines verbatim.
{print}