summaryrefslogtreecommitdiff
path: root/ipl/packs/ibpag2/follow.icn
diff options
context:
space:
mode:
Diffstat (limited to 'ipl/packs/ibpag2/follow.icn')
-rw-r--r--ipl/packs/ibpag2/follow.icn332
1 files changed, 332 insertions, 0 deletions
diff --git a/ipl/packs/ibpag2/follow.icn b/ipl/packs/ibpag2/follow.icn
new file mode 100644
index 0000000..fa3c8c6
--- /dev/null
+++ b/ipl/packs/ibpag2/follow.icn
@@ -0,0 +1,332 @@
+############################################################################
+#
+# Name: follow.icn
+#
+# Title: compute follow sets for grammar
+#
+# Author: Richard L. Goerwitz
+#
+# Version: 1.15
+#
+############################################################################
+#
+# This file contains FIRST(st, symbol...) and FOLLOW(start_symbol,
+# st, symbol). For FIRST(), arg1 is a list of productions. Arg 2 is
+# a string (nonterminal) or an integer (terminal). FIRST may take
+# more than one symbol argument. FOLLOW takes a string as its first
+# argument, a list of productions as its second, and a symbol as its
+# third. There is never any need to call FOLLOW with any more than
+# one symbol. The return values for FIRST() and FOLLOW() may be
+# described as follows:
+#
+# FIRST returns the set of all terminal symbols that begin valid
+# prefixes of the first symbol argument, or, if this contains
+# epsilon, of the first symbol -- <epsilon> ++ the set of terminals
+# beginning valid prefixes of the second symbol, etc.... The first
+# argument, st, contains the production list over which FIRST is to
+# be computed.
+#
+# FOLLOW is similar, except that it accepts only one symbol argument,
+# and returns the set of nonterminals that begin valid prefixes of
+# symbols that may follow symbol in the grammar defined by the
+# productions in st.
+#
+# Both FIRST() and FOLLOW() are optimized. When called for the first
+# time with a specific production list (st), both FIRST() and
+# FOLLOW() create the necessary data structures to calculate their
+# respective return values. Once created, these data structures are
+# saved, and re-used for subsequent calls with the same st argument.
+# The implications for the user are two: 1) The first call to FOLLOW
+# or FIRST for a given production list will take a while to return,
+# but 2) subsequent calls will return much faster. Naturally, you
+# can call both FIRST() and FOLLOW() with various st arguments
+# throughout the life of a given program.
+#
+############################################################################
+#
+# Links: none
+#
+############################################################################
+
+
+#
+# FIRST: list|set x string|integer... -> set
+# (st, symbols...) -> FIRST_set
+#
+# Where symbols are strings or integers (nonterminal or terminal
+# symbols in a production in the list or set of productions, st),
+# and where FIRST_set is a set of integers corresponding to
+# terminal symbols that begin valid prefixes of symbols[1], or if
+# that derives epsilon, of symbols[1] -- epsilon ++ symbols[2],
+# unless that derives epsilon, etc...
+#
+procedure FIRST(st, symbols[])
+
+ local i, result, FIRST_tbl
+ static FIRST_tbl_tbl
+ initial FIRST_tbl_tbl := table()
+
+ /FIRST_tbl_tbl[st] := make_FIRST_sets(st)
+ FIRST_tbl := FIRST_tbl_tbl[st]
+
+ result := set()
+ i := 0
+ while *symbols >= (i +:= 1) do {
+ /FIRST_tbl[symbols[i]] & iohno(90, image(symbols[i]))
+ if not member(FIRST_tbl[symbols[i]], -2) then {
+ # We're done if no epsilons.
+ result ++:= FIRST_tbl[symbols[i]]
+ break
+ } else {
+ # Remove the epsilon & try the next symbol in p.RHS.
+ result ++:= FIRST_tbl[symbols[i]] -- FIRST_tbl[-2]
+ }
+ }
+ # If we get to here without finding a symbol that doesn't derive
+ # epsilon, then give up and insert <epsilon> into result.
+ if i > *symbols then
+ result ++:= FIRST_tbl[-2]
+
+ return result
+
+end
+
+
+#
+# FOLLOW: list|set x string|integer -> set
+# (st, symbol) -> FOLLOW_set
+#
+procedure FOLLOW(start_symbol, st, symbol)
+
+ static FOLLOW_tbl_tbl
+ initial FOLLOW_tbl_tbl := table()
+
+ /FOLLOW_tbl_tbl[st] := make_slr_FOLLOW_sets(start_symbol, st)
+ return FOLLOW_tbl_tbl[st][symbol]
+
+end
+
+
+#
+# Below is the procedure make_slr_FOLLOW_sets(start_symbol, st),
+# which accepts a string, a set, and a table as its arguments and
+# returns another table. The first argument must contain the start
+# symbol for the set (or list) of productions contained in the second
+# argument. Returns a table of FOLLOW sets, where keys = symbols and
+# values = follow sets for those symbols.
+#
+# The algorithm - somewhat inefficiently implemented here - works out
+# as follows:
+#
+# 1. Place $ (internal 0) in FOLLOW_tbl[start_symbol].
+# 2. Initialize FOLLOW_tbl[symbol] to { } for every other symbol.
+# 3. For each production A -> aBb do FOLLOW_tbl[B] ++:= FIRST(b) --
+# FIRST(<epsilon>).
+# 4. For each production A -> aBb where FIRST(b) contains
+# <epsilon> and for each production A -> aB, do FOLLOW_tbl[B] ++:=
+# FOLLOW_tbl[A].
+#
+# Repeat steps 3 and 4 until no FOLLOW set can be expanded, at which
+# point return the FOLLOW table.
+#
+# Note that <epsilon> is represented internally by -2.
+#
+
+
+#
+# make_slr_FOLLOW_sets: string x set/list -> table
+# (start_symbol, st) -> FOLLOW_tbl
+#
+# Where start_symbol is the start symbol for the grammar defined
+# by the set/list of productions in st, and where FOLLOW_tbl is a
+# table of follow sets (keys = symbols, values = follow sets for
+# the symbols).
+#
+procedure make_slr_FOLLOW_sets(start_symbol, st)
+
+ local FOLLOW_tbl, k, size, old_size, p, i, j
+
+ FOLLOW_tbl := table()
+ # step 1 above; note that 0 = EOF
+ FOLLOW_tbl[start_symbol] := set([0])
+
+ # step 2
+ every k := (!st).LHS do
+ /FOLLOW_tbl[k] := set()
+
+ # steps 3 and 4
+ size := 0
+ #
+ # When the old size of the FOLLOW sets equals the new size, we are
+ # done because nothing was added to the FOLLOW sets on the last
+ # pass.
+ #
+ while old_size ~===:= size do {
+ size := 0
+ every p := !st do {
+ every i := 1 to *p.RHS-1 do {
+ type(p.RHS[i]) == "string" | next
+ /FOLLOW_tbl[p.RHS[i]] & iohno(90, image(p.RHS[i]))
+ # Go through every RHS symbol until we get a FIRST set
+ # without an epsilon move.
+ every j := i+1 to *p.RHS do {
+ if member(FIRST(st, p.RHS[j]), -2) then {
+ FOLLOW_tbl[p.RHS[i]] ++:=
+ FIRST(st, p.RHS[j]) -- FIRST(st, -2)
+ } else {
+ FOLLOW_tbl[p.RHS[i]] ++:= FIRST(st, p.RHS[j])
+ size +:= *FOLLOW_tbl[p.RHS[i]]
+ break next
+ }
+ }
+ # If we get past "break next" then b in A -> aBb =>*
+ # <epsilon>; add FOLLOW_tbl[A] to FOLLOW_tbl[B].
+ FOLLOW_tbl[p.RHS[i]] ++:= FOLLOW_tbl[p.LHS]
+ size +:= *FOLLOW_tbl[p.RHS[i]]
+ }
+ # Add FOLLOW_tbl[A] to FOLLOW_tbl[B] for the last symbol in the
+ # RHS of every rule.
+ type(p.RHS[*p.RHS]) == "string" | next
+ /FOLLOW_tbl[p.RHS[*p.RHS]] & iohno(90, image(p.RHS[*p.RHS]))
+ FOLLOW_tbl[p.RHS[*p.RHS]] ++:= FOLLOW_tbl[p.LHS]
+ size +:= *FOLLOW_tbl[p.RHS[*p.RHS]]
+ }
+ }
+
+ # Print human-readable version of FOLLOW_tbl if instructed to do so.
+ if \DEBUG then
+ print_follow_sets(FOLLOW_tbl)
+
+ # check for useless nonterminal symbols
+ every k := (!st).LHS do
+ *FOLLOW_tbl[k] = 0 & iohno(91, k)
+
+ return FOLLOW_tbl
+
+end
+
+
+#
+# Below is the routine make_FIRST_sets(st), which accepts as its one
+# argument a list or set of production records, and which returns a
+# table t, where t's keys are symbols from the grammar defined by the
+# productions in st, and where the values assocated with each of
+# these keys is the FIRST set for that key.
+#
+# Production records are structures where the first two fields, LHS
+# and RHS, contain the left-hand and right-hand side of each rule in
+# a given grammar. The right-hand side is a linked list of integers
+# (used for terminals) and strings (used for nonterminals). LHS must
+# contain a string. Terminals below 1 are reserved. Currently three
+# are actually used:
+#
+# 0 EOF
+# -1 error
+# -2 epsilon
+#
+# For a description of the FIRST() construction algorithm, see Alfred
+# Aho, Ravi Sethi, and Jeffrey D. Ullman _Compilers_ (Reading,
+# Massachusetts: Addison & Wesley, 1986), section 4.4, page 189.
+# Their algorithm is not strictly suitable, as is, for use here. I
+# thank Dave Schaumann of the University of Arizona at Tuscon for
+# explaining to me the iterative construction algorithm that in fact
+# *is* suitable.
+#
+# FIRST is computed on an iterative basis as follows:
+#
+# 1. For every terminal symbol a, FIRST(a) = { a }
+# 2. For every non-terminal symbol A, initialize FIRST(A) = { }
+# 3. For every production A -> <epsilon>, add <epsilon> to FIRST(A)
+# 4. For each production of the grammar having the form X -> Y1
+# Y2 ... Yn, perform the following procedure:
+# i := 1
+# while i <= number-of-RHS-symbols do {
+# if <epsilon> is not in FIRST(Y[i]) then {
+# FIRST(X) ++:= FIRST(Y[i])
+# break
+# } else {
+# FIRST(X) ++:= FIRST(Y[i]) -- FIRST[<epsilon>]
+# i +:= 1
+# }
+# }
+# if i > number-of-RHS-symbols then
+# # <epsilon> is in FIRST(Y[i])
+# FIRST(X) ++:= FIRST[epsilon]
+# 5. Repeat step 3 until no new symbols or <epsilon> can be added
+# to any FIRST set
+#
+
+
+#
+# make_FIRST_sets: set/list -> table
+# st -> t
+#
+# Where st is a set or list of production records, and t is a
+# table of FIRST sets, where the keys = terminal or nonterminal
+# symbols and the values = sets of terminal symbols.
+#
+# Epsilon move is -2; terminals are positive integers;
+# nonterminals are strings. Error is -1; EOF is 0.
+#
+procedure make_FIRST_sets(st)
+
+ local FIRST_tbl, symbol, p, old_size, size, i
+
+ FIRST_tbl := table()
+ FIRST_tbl[0] := set([0])
+
+ # steps 1, 2, and 3 above
+ every p := !st do {
+ # check for empty RHS (an error)
+ *p.RHS = 0 & iohno(11, production_2_string(p))
+ # step 1
+ every symbol := !p.RHS do {
+ if type(symbol) == "integer"
+ then FIRST_tbl[symbol] := set([symbol])
+ }
+ # step 2
+ /FIRST_tbl[p.LHS] := set() &
+ # step 3
+ if *p.RHS = 1 then {
+ if p.RHS[1] === -2 # -2 is epsilon
+ then insert(FIRST_tbl[p.LHS], -2)
+ }
+ }
+
+ # steps 4 and 5 above
+ size := 0
+ #
+ # When the old size of the FIRST sets equals the new size, we are
+ # done. As long as they're unequal, set old_size to size and try
+ # to add to the FIRST sets.
+ #
+ while old_size ~===:= size do {
+ size := 0
+ every p := !st do {
+ every i := 1 to *p.RHS do {
+ \FIRST_tbl[p.RHS[i]] | iohno(90, image(p.RHS[i]))
+ if not member(FIRST_tbl[p.RHS[i]], -2) then {
+ # We're done with this pass if no epsilons.
+ FIRST_tbl[p.LHS] ++:= FIRST_tbl[p.RHS[i]]
+ size +:= *FIRST_tbl[p.LHS]
+ break next
+ } else {
+ # Remove the epsilon & try the next symbol in p.RHS.
+ FIRST_tbl[p.LHS] ++:= FIRST_tbl[p.RHS[i]] -- FIRST_tbl[-2]
+ }
+ }
+ # If we get past the every...do structure without
+ # break+next-ing, then we are still finding epsilons. In
+ # this case, add epsilon to FIRST_tbl[p.LHS].
+ FIRST_tbl[p.LHS] ++:= FIRST_tbl[-2]
+ size +:= *FIRST_tbl[p.LHS]
+ }
+ }
+
+ # Print human-readable version of FIRST_tbl if instructed to do so.
+ if \DEBUG then
+ print_first_sets(FIRST_tbl)
+
+ return FIRST_tbl
+
+end