diff options
Diffstat (limited to 'src/cmd/gc/bisonerrors')
-rwxr-xr-x | src/cmd/gc/bisonerrors | 124 |
1 files changed, 124 insertions, 0 deletions
diff --git a/src/cmd/gc/bisonerrors b/src/cmd/gc/bisonerrors new file mode 100755 index 000000000..5110f5350 --- /dev/null +++ b/src/cmd/gc/bisonerrors @@ -0,0 +1,124 @@ +#!/usr/bin/awk -f +# Copyright 2010 The Go Authors. All rights reserved. +# Use of this source code is governed by a BSD-style +# license that can be found in the LICENSE file. + +# This program implements the core idea from +# +# Clinton L. Jeffery, Generating LR syntax error messages from examples, +# ACM TOPLAS 25(5) (September 2003). http://doi.acm.org/10.1145/937563.937566 +# +# It reads Bison's summary of a grammar followed by a file +# like go.errors, replacing lines beginning with % by the +# yystate and yychar that will be active when an error happens +# while parsing that line. +# +# Unlike the system described in the paper, the lines in go.errors +# give grammar symbol name lists, not actual program fragments. +# This is a little less programmer-friendly but doesn't require being +# able to run the text through lex.c. + +BEGIN{ + bison = 1 + grammar = 0 + states = 0 +} + +# In Grammar section of y.output, +# record lhs and length of rhs for each rule. +bison && /^Grammar/ { grammar = 1 } +bison && /^(Terminals|state 0)/ { grammar = 0 } +grammar && NF>0 { + if($2 != "|") { + r = $2 + sub(/:$/, "", r) + } + rulelhs[$1] = r + rulesize[$1] = NF-2 + if(rulesize[$1] == 3 && $3 $4 $5 == "/*empty*/") { + rulesize[$1] = 0 + } +} + +# In state dumps, record shift/reduce actions. +bison && /^state 0/ { grammar = 0; states = 1 } + +states && /^state / { state = $2 } +states { statetext[state] = statetext[state] $0 "\n" } + +states && / shift, and go to state/ { + n = nshift[state]++ + shift[state,n] = $7 + shifttoken[state,n] = $1 + next +} +states && / go to state/ { + n = nshift[state]++ + shift[state,n] = $5 + shifttoken[state,n] = $1 + next +} +states && / reduce using rule/ { + n = nreduce[state]++ + reduce[state,n] = $5 + reducetoken[state,n] = $1 + next +} + +# First // comment marks the beginning of the pattern file. +/^\/\// { bison = 0; grammar = 0; state = 0 } +bison { next } + +# Treat % as first field on line as introducing a pattern (token sequence). +# Run it through the LR machine and print the induced "yystate, yychar," +# at the point where the error happens. +$1 == "%" { + nstack = 0 + state = 0 + f = 2 + tok = "" + for(;;) { + if(tok == "" && f <= NF) { + tok = $f + f++ + } + found = 0 + for(j=0; j<nshift[state]; j++) { + if(shifttoken[state,j] == tok) { + # print "SHIFT " tok " " state " -> " shift[state,j] + stack[nstack++] = state + state = shift[state,j] + found = 1 + tok = "" + break + } + } + if(found) + continue + for(j=0; j<nreduce[state]; j++) { + if(reducetoken[state,j] == tok || reducetoken[state,j] == "$default") { + stack[nstack++] = state + rule = reduce[state,j] + nstack -= rulesize[rule] + state = stack[--nstack] + lhs = rulelhs[rule] + if(tok != "") + --f + tok = rulelhs[rule] + # print "REDUCE " nstack " " state " " tok " rule " rule " size " rulesize[rule] + found = 1 + break + } + } + if(found) + continue + + # No shift or reduce applied - found the error. + printf("\t%s, %s,\n", state, tok); + break + } + next +} + +# Print other lines verbatim. +{print} |