diff options
Diffstat (limited to 'ipl/packs/ibpag2/follow.icn')
-rw-r--r-- | ipl/packs/ibpag2/follow.icn | 332 |
1 files changed, 332 insertions, 0 deletions
diff --git a/ipl/packs/ibpag2/follow.icn b/ipl/packs/ibpag2/follow.icn new file mode 100644 index 0000000..fa3c8c6 --- /dev/null +++ b/ipl/packs/ibpag2/follow.icn @@ -0,0 +1,332 @@ +############################################################################ +# +# Name: follow.icn +# +# Title: compute follow sets for grammar +# +# Author: Richard L. Goerwitz +# +# Version: 1.15 +# +############################################################################ +# +# This file contains FIRST(st, symbol...) and FOLLOW(start_symbol, +# st, symbol). For FIRST(), arg1 is a list of productions. Arg 2 is +# a string (nonterminal) or an integer (terminal). FIRST may take +# more than one symbol argument. FOLLOW takes a string as its first +# argument, a list of productions as its second, and a symbol as its +# third. There is never any need to call FOLLOW with any more than +# one symbol. The return values for FIRST() and FOLLOW() may be +# described as follows: +# +# FIRST returns the set of all terminal symbols that begin valid +# prefixes of the first symbol argument, or, if this contains +# epsilon, of the first symbol -- <epsilon> ++ the set of terminals +# beginning valid prefixes of the second symbol, etc.... The first +# argument, st, contains the production list over which FIRST is to +# be computed. +# +# FOLLOW is similar, except that it accepts only one symbol argument, +# and returns the set of nonterminals that begin valid prefixes of +# symbols that may follow symbol in the grammar defined by the +# productions in st. +# +# Both FIRST() and FOLLOW() are optimized. When called for the first +# time with a specific production list (st), both FIRST() and +# FOLLOW() create the necessary data structures to calculate their +# respective return values. Once created, these data structures are +# saved, and re-used for subsequent calls with the same st argument. +# The implications for the user are two: 1) The first call to FOLLOW +# or FIRST for a given production list will take a while to return, +# but 2) subsequent calls will return much faster. Naturally, you +# can call both FIRST() and FOLLOW() with various st arguments +# throughout the life of a given program. +# +############################################################################ +# +# Links: none +# +############################################################################ + + +# +# FIRST: list|set x string|integer... -> set +# (st, symbols...) -> FIRST_set +# +# Where symbols are strings or integers (nonterminal or terminal +# symbols in a production in the list or set of productions, st), +# and where FIRST_set is a set of integers corresponding to +# terminal symbols that begin valid prefixes of symbols[1], or if +# that derives epsilon, of symbols[1] -- epsilon ++ symbols[2], +# unless that derives epsilon, etc... +# +procedure FIRST(st, symbols[]) + + local i, result, FIRST_tbl + static FIRST_tbl_tbl + initial FIRST_tbl_tbl := table() + + /FIRST_tbl_tbl[st] := make_FIRST_sets(st) + FIRST_tbl := FIRST_tbl_tbl[st] + + result := set() + i := 0 + while *symbols >= (i +:= 1) do { + /FIRST_tbl[symbols[i]] & iohno(90, image(symbols[i])) + if not member(FIRST_tbl[symbols[i]], -2) then { + # We're done if no epsilons. + result ++:= FIRST_tbl[symbols[i]] + break + } else { + # Remove the epsilon & try the next symbol in p.RHS. + result ++:= FIRST_tbl[symbols[i]] -- FIRST_tbl[-2] + } + } + # If we get to here without finding a symbol that doesn't derive + # epsilon, then give up and insert <epsilon> into result. + if i > *symbols then + result ++:= FIRST_tbl[-2] + + return result + +end + + +# +# FOLLOW: list|set x string|integer -> set +# (st, symbol) -> FOLLOW_set +# +procedure FOLLOW(start_symbol, st, symbol) + + static FOLLOW_tbl_tbl + initial FOLLOW_tbl_tbl := table() + + /FOLLOW_tbl_tbl[st] := make_slr_FOLLOW_sets(start_symbol, st) + return FOLLOW_tbl_tbl[st][symbol] + +end + + +# +# Below is the procedure make_slr_FOLLOW_sets(start_symbol, st), +# which accepts a string, a set, and a table as its arguments and +# returns another table. The first argument must contain the start +# symbol for the set (or list) of productions contained in the second +# argument. Returns a table of FOLLOW sets, where keys = symbols and +# values = follow sets for those symbols. +# +# The algorithm - somewhat inefficiently implemented here - works out +# as follows: +# +# 1. Place $ (internal 0) in FOLLOW_tbl[start_symbol]. +# 2. Initialize FOLLOW_tbl[symbol] to { } for every other symbol. +# 3. For each production A -> aBb do FOLLOW_tbl[B] ++:= FIRST(b) -- +# FIRST(<epsilon>). +# 4. For each production A -> aBb where FIRST(b) contains +# <epsilon> and for each production A -> aB, do FOLLOW_tbl[B] ++:= +# FOLLOW_tbl[A]. +# +# Repeat steps 3 and 4 until no FOLLOW set can be expanded, at which +# point return the FOLLOW table. +# +# Note that <epsilon> is represented internally by -2. +# + + +# +# make_slr_FOLLOW_sets: string x set/list -> table +# (start_symbol, st) -> FOLLOW_tbl +# +# Where start_symbol is the start symbol for the grammar defined +# by the set/list of productions in st, and where FOLLOW_tbl is a +# table of follow sets (keys = symbols, values = follow sets for +# the symbols). +# +procedure make_slr_FOLLOW_sets(start_symbol, st) + + local FOLLOW_tbl, k, size, old_size, p, i, j + + FOLLOW_tbl := table() + # step 1 above; note that 0 = EOF + FOLLOW_tbl[start_symbol] := set([0]) + + # step 2 + every k := (!st).LHS do + /FOLLOW_tbl[k] := set() + + # steps 3 and 4 + size := 0 + # + # When the old size of the FOLLOW sets equals the new size, we are + # done because nothing was added to the FOLLOW sets on the last + # pass. + # + while old_size ~===:= size do { + size := 0 + every p := !st do { + every i := 1 to *p.RHS-1 do { + type(p.RHS[i]) == "string" | next + /FOLLOW_tbl[p.RHS[i]] & iohno(90, image(p.RHS[i])) + # Go through every RHS symbol until we get a FIRST set + # without an epsilon move. + every j := i+1 to *p.RHS do { + if member(FIRST(st, p.RHS[j]), -2) then { + FOLLOW_tbl[p.RHS[i]] ++:= + FIRST(st, p.RHS[j]) -- FIRST(st, -2) + } else { + FOLLOW_tbl[p.RHS[i]] ++:= FIRST(st, p.RHS[j]) + size +:= *FOLLOW_tbl[p.RHS[i]] + break next + } + } + # If we get past "break next" then b in A -> aBb =>* + # <epsilon>; add FOLLOW_tbl[A] to FOLLOW_tbl[B]. + FOLLOW_tbl[p.RHS[i]] ++:= FOLLOW_tbl[p.LHS] + size +:= *FOLLOW_tbl[p.RHS[i]] + } + # Add FOLLOW_tbl[A] to FOLLOW_tbl[B] for the last symbol in the + # RHS of every rule. + type(p.RHS[*p.RHS]) == "string" | next + /FOLLOW_tbl[p.RHS[*p.RHS]] & iohno(90, image(p.RHS[*p.RHS])) + FOLLOW_tbl[p.RHS[*p.RHS]] ++:= FOLLOW_tbl[p.LHS] + size +:= *FOLLOW_tbl[p.RHS[*p.RHS]] + } + } + + # Print human-readable version of FOLLOW_tbl if instructed to do so. + if \DEBUG then + print_follow_sets(FOLLOW_tbl) + + # check for useless nonterminal symbols + every k := (!st).LHS do + *FOLLOW_tbl[k] = 0 & iohno(91, k) + + return FOLLOW_tbl + +end + + +# +# Below is the routine make_FIRST_sets(st), which accepts as its one +# argument a list or set of production records, and which returns a +# table t, where t's keys are symbols from the grammar defined by the +# productions in st, and where the values assocated with each of +# these keys is the FIRST set for that key. +# +# Production records are structures where the first two fields, LHS +# and RHS, contain the left-hand and right-hand side of each rule in +# a given grammar. The right-hand side is a linked list of integers +# (used for terminals) and strings (used for nonterminals). LHS must +# contain a string. Terminals below 1 are reserved. Currently three +# are actually used: +# +# 0 EOF +# -1 error +# -2 epsilon +# +# For a description of the FIRST() construction algorithm, see Alfred +# Aho, Ravi Sethi, and Jeffrey D. Ullman _Compilers_ (Reading, +# Massachusetts: Addison & Wesley, 1986), section 4.4, page 189. +# Their algorithm is not strictly suitable, as is, for use here. I +# thank Dave Schaumann of the University of Arizona at Tuscon for +# explaining to me the iterative construction algorithm that in fact +# *is* suitable. +# +# FIRST is computed on an iterative basis as follows: +# +# 1. For every terminal symbol a, FIRST(a) = { a } +# 2. For every non-terminal symbol A, initialize FIRST(A) = { } +# 3. For every production A -> <epsilon>, add <epsilon> to FIRST(A) +# 4. For each production of the grammar having the form X -> Y1 +# Y2 ... Yn, perform the following procedure: +# i := 1 +# while i <= number-of-RHS-symbols do { +# if <epsilon> is not in FIRST(Y[i]) then { +# FIRST(X) ++:= FIRST(Y[i]) +# break +# } else { +# FIRST(X) ++:= FIRST(Y[i]) -- FIRST[<epsilon>] +# i +:= 1 +# } +# } +# if i > number-of-RHS-symbols then +# # <epsilon> is in FIRST(Y[i]) +# FIRST(X) ++:= FIRST[epsilon] +# 5. Repeat step 3 until no new symbols or <epsilon> can be added +# to any FIRST set +# + + +# +# make_FIRST_sets: set/list -> table +# st -> t +# +# Where st is a set or list of production records, and t is a +# table of FIRST sets, where the keys = terminal or nonterminal +# symbols and the values = sets of terminal symbols. +# +# Epsilon move is -2; terminals are positive integers; +# nonterminals are strings. Error is -1; EOF is 0. +# +procedure make_FIRST_sets(st) + + local FIRST_tbl, symbol, p, old_size, size, i + + FIRST_tbl := table() + FIRST_tbl[0] := set([0]) + + # steps 1, 2, and 3 above + every p := !st do { + # check for empty RHS (an error) + *p.RHS = 0 & iohno(11, production_2_string(p)) + # step 1 + every symbol := !p.RHS do { + if type(symbol) == "integer" + then FIRST_tbl[symbol] := set([symbol]) + } + # step 2 + /FIRST_tbl[p.LHS] := set() & + # step 3 + if *p.RHS = 1 then { + if p.RHS[1] === -2 # -2 is epsilon + then insert(FIRST_tbl[p.LHS], -2) + } + } + + # steps 4 and 5 above + size := 0 + # + # When the old size of the FIRST sets equals the new size, we are + # done. As long as they're unequal, set old_size to size and try + # to add to the FIRST sets. + # + while old_size ~===:= size do { + size := 0 + every p := !st do { + every i := 1 to *p.RHS do { + \FIRST_tbl[p.RHS[i]] | iohno(90, image(p.RHS[i])) + if not member(FIRST_tbl[p.RHS[i]], -2) then { + # We're done with this pass if no epsilons. + FIRST_tbl[p.LHS] ++:= FIRST_tbl[p.RHS[i]] + size +:= *FIRST_tbl[p.LHS] + break next + } else { + # Remove the epsilon & try the next symbol in p.RHS. + FIRST_tbl[p.LHS] ++:= FIRST_tbl[p.RHS[i]] -- FIRST_tbl[-2] + } + } + # If we get past the every...do structure without + # break+next-ing, then we are still finding epsilons. In + # this case, add epsilon to FIRST_tbl[p.LHS]. + FIRST_tbl[p.LHS] ++:= FIRST_tbl[-2] + size +:= *FIRST_tbl[p.LHS] + } + } + + # Print human-readable version of FIRST_tbl if instructed to do so. + if \DEBUG then + print_first_sets(FIRST_tbl) + + return FIRST_tbl + +end |