1 files changed, 515 insertions, 0 deletions
diff --git a/ipl/packs/ibpag2/ibreader.icn b/ipl/packs/ibpag2/ibreader.icn
new file mode 100644
index 0000000..8401159
--- /dev/null
+++ b/ipl/packs/ibpag2/ibreader.icn
@@ -0,0 +1,515 @@
+############################################################################
+#
+#	Name:	 ibreader.icn
+#
+#	Title:	 reader for Ibpag2 source files
+#
+#	Author:	 Richard L. Goerwitz
+#
+#	Version: 1.29
+#
+############################################################################
+#
+#  This file contains a collection of procedures that 1) read in an
+#  Ibpag2 source file, 2) output token defines, 3) emit action code,
+#  and finally 4) pass a start symbol, list of productions, and token
+#  table back to the calling procedure.  Described formally:
+#
+#      ibreader:  file x file x string  -> ib_grammar record
+#                 (in,   out,   module) -> grammar
+#
+#  In is the input stream; out is the output stream; module is an
+#  optional string that distinguishes this grammar from others that
+#  might also be running simultaneously.  Grammar is an ib_grammar
+#  record containing the start symbol in its first field and the
+#  production list in its second.  Its third field contains a table
+#  used to map integers to actual token names or character literals,
+#  i.e. its keys are things like -1, 0, etc. and its values are things
+#  like "error," "EOF," etc.
+#
+#  Note that if a module argument is supplied to ibreader(), one must
+#  also be supplied to ibwriter().  See ibwriter.icn.
+#
+#  The format of the input file is highly reminiscent of YACC.  It
+#  consists of three basic sections, the first two of which are
+#  followed by %%.  See the main documentation to Ibpag2 for
+#  specifics.  Major differences between Ibpag2 and YACC input format
+#  include:
+#
+#      1) "$$ = x" constructs are replaced by "return x" (e.g. "$$ =
+#         $1 + $3" -> "return $1 + $3")
+#
+#      2) all variables within a given action are, by default, local
+#         to that action; i.e. they cannot be accessed by other
+#         actions unless you declare them global elsewhere (e.g. in
+#         the pass-through part of the declarations section %{ ... %})
+#
+#      3) the %union declaration is not needed by Ibpag
+#
+#      4) tokens and symbols are separated from each other by a comma
+#         (e.g. %token '+', '-' and S : NP, VP)
+#
+#      5) epsilon is indicated by the keyword "epsilon" (e.g. REL :
+#         epsilon)
+#
+#      6) both epsilon and error *may* be declared as %tokens for
+#         reasons of precedence, although they retain hard-coded
+#         internal values (-2 and -1, respectively)
+#
+#      7) all actions must follow the last RHS symbol of the rule they
+#         apply to (preceded by an optional %prec directive); to
+#         achieve S : NP { action1 }, VP { action2 }, insert a dummy
+#         rule: S : NP, dummy, VP { action2 }; dummy : epsilon {
+#         action1 } ;
+#
+#      8) YYERROR, YYACCEPT, yyclearin, and yyerrok are the same,
+#         except they are written IIERROR, IIACCEPT, iiclearin, and
+#         iierrok (i.e. "ii" replaces "yy")
+#
+#      9) Ibpag2's input files are tokenized like modified Icon files,
+#         and, as a consequence, Icon's reserved words must not be
+#         used as symbols (e.g. "if : if, then" is no go)
+#
+############################################################################
+#
+#  Links: itokens, escape
+#
+#  See also: ibwriter
+#
+############################################################################
+
+#link itokens, escape
+link escape
+
+record ib_grammar(start, rules, tbl)
+record tokstats(str, no, prec, assoc)
+
+# Declared in itokens.icn:
+# global line_number
+
+#
+# ibreader:  file x file x string x string        -> ib_grammar record
+#            (in,   out,   module,  source_fname) -> grammar
+#
+#     Where in is an input stream, out is an output stream, module is
+#     some string uniquely identifying this module (optional), and
+#     where grammar is an ib_grammar record containing the start
+#     symbol in its first field and a list of production records in
+#     its second.  Source_fname is the string name of Ibpag2's input
+#     grammar file.  Defaults to "source file."
+#
+procedure ibreader(in, out, module, source_fname)
+
+    local tmp, grammar, toktbl, next_token, next_token_no_nl,
+	token, LHS, t
+
+    /source_fname    := "source file"
+    grammar          := ib_grammar(&null, list(), table())
+    toktbl           := table()
+    next_token       := create itokens(in, 1)
+    next_token_no_nl := create 1(tmp := |@next_token, \tmp.sym)
+    token            := @next_token_no_nl | iohno(4)
+
+    # Do the %{ $} and %token stuff, i.e. everything up to %%
+    # (NEWSECT).
+    #
+    until token.sym == "NEWSECT" do {
+	case token.sym of {
+	    default     : {
+		iohno(48, "token "||image(token.str) ||"; line "|| line_number)
+	    }
+	    "SEMICOL"   :  {
+		# Skip semicolon.  Get another token while we're at it.
+		token := @next_token_no_nl | iohno(47, "line "||line_number)
+	    }
+	    "BEGGLOB" : {
+		write(out, "\n$line ", line_number, " ", image(source_fname))
+		# Copy token values to out until we reach "%}" (ENDGLOB).
+		(token := copy_icon_stuff(next_token, out)).sym == "ENDGLOB"
+		token := @next_token_no_nl
+	    }
+	    "MOD"     : {
+		(token := @next_token_no_nl).sym == "IDENT" |
+		    iohno(30, "line " || line_number)
+		#
+		# Read in token declarations, set associativity and
+		# precedences, and enter the tokens into toktbl.
+		#
+		token := {
+		    case token.str of {
+			 default  : iohno(30, "line " || line_number)
+			"token"   : read_decl(next_token_no_nl, toktbl, &null)
+			"right"   : read_decl(next_token_no_nl, toktbl, "r")
+			"left"    : read_decl(next_token_no_nl, toktbl, "l")
+			"nonassoc": read_decl(next_token_no_nl, toktbl, "n")
+			"union"   : iohno(45, "line "|| line_number)
+			"start"   : {
+			    (token := @next_token_no_nl).sym == "IDENT" |
+				iohno(31, "line " || line_number)
+			    /grammar.start := token.str |
+				iohno(32, "line " || line_number)
+			    @next_token_no_nl | iohno(4)
+			}
+		    }
+		}
+	    }
+	}
+    }
+    # Skip past %% (NEWSECT) and semicolon (if present).
+    token := @next_token_no_nl | iohno(47, "line "|| line_number)
+    (token := token | @next_token_no_nl | iohno(4)).sym ~== "SEMICOL"
+    token.sym == "NEWSECT" & iohno(47, "line "|| line_number)
+
+    #
+    # Fetch start symbol if it wasn't defined above via %start; by
+    # default the start symbol is the LHS of rule 1.
+    #
+    /grammar.start := token.str
+
+    # Having reached the end of the declarations section, we can now
+    # copy out a define for each token number, not counting character
+    # literals (which are stored as integers).  While we're at it,
+    # create a table that maps token numbers back to character
+    # literals and strings (for use in later verbose and debugging
+    # displays).
+    #
+    write(out, "\n")
+    every t := !toktbl do {
+	if type(t.str) == "integer" then
+	    insert(grammar.tbl, t.no, image(char(t.str)))
+	else {
+	    insert(grammar.tbl, t.no, t.str)
+	    write(out, "$define ", t.str, "\t", t.no)
+	}
+    }
+
+    # Now, finally, read in rules up until we reach EOF or %% (i.e.
+    # NEWSECT).  EOF is signaled below by failure of read_RHS().
+    #
+    until token.sym == "NEWSECT" do {
+	token.sym == "IDENT" | iohno(33, token.str ||" line "|| line_number)
+	LHS := token.str
+	token := @next_token_no_nl | iohno(4)
+	token.sym == "COLON" | iohno(34, token.str ||" line "|| line_number)
+	#
+	# Read in RHS, then the action (if any) then the prec (if
+	# any).  If we see a BAR, then repeat, re-using the same
+	# left-hand side symbol.
+	#
+	while token := 
+	    read_RHS(next_token, next_token_no_nl, out, toktbl, LHS,
+		     grammar, module, source_fname) |
+	    # if read_RHS fails, we're at EOF
+	    break break
+	do token.sym == "BAR" | break
+    }
+
+    # Copy the remainder of the file to out as Icon code.
+    write(out, "\n$line ", line_number, " ", image(source_fname))
+    every copy_icon_stuff(next_token, out, "EOFX")
+
+    # Do final setup on the reverse token table.  This table will be
+    # used later to map integers to their original names in verbose or
+    # debugging displays.
+    #
+    insert(grammar.tbl,  0, "$")
+
+    return grammar
+
+end
+
+
+#
+# copy_icon_stuff:  coexpression x file x string  -> ib_TOK records
+#                   (next_token,   out,   except) -> token records
+#
+#     Copy Icon code to output stream, also suspending as we go.
+#     Insert separators between tokens where needed.  Do not output
+#     any token whose sym field matches except.  The point in
+#     suspending tokens as we go is to enable the calling procedure to
+#     look for signal tokens that indicate insertion or termination
+#     points.
+#
+procedure copy_icon_stuff(next_token, out, except)
+
+    local separator, T
+
+    separator := ""
+    while T := @next_token do {
+	if \T.sym then suspend T
+	if \T.sym == \except then next
+	if any(&digits ++ &letters ++ '_.', \T.str, 1, 2) & \T.sym ~== "DOT"
+	then writes(out, separator)
+	writes(out, T.str)
+	if any(&digits ++ &letters ++ '_.', \T.str, -1, 0) & \T.sym ~== "DOT"
+	then separator := " " else separator := ""
+    }
+
+    # unexpected EOF error
+    (except === "EOFX") | iohno(4)
+
+end
+
+
+#
+# read_decl:  coexpression     x table x string -> ib_TOK
+#             (next_token_no_nl, toktbl, assoc) -> token
+#
+#     Read in token declarations, assigning them the correct
+#     precedence and associativity.  Number the tokens for later
+#     $define preprocessor directives.  When done, return the last
+#     token processed.  Toktbl is the table that holds the stats for
+#     each declared token.
+#
+procedure read_decl(next_token_no_nl, toktbl, assoc)
+
+    local   token, c
+    static  token_no, prec
+    initial {
+	token_no := 256
+	prec := 0
+    }
+
+    # All tokens in this list have the same prec and assoc.
+    # Precedence is determined by order.  Associativity is determined
+    # by keyword in the calling procedure, and is passed as arg 3.
+    #
+    prec +:= 1
+    assoc === ("n"|"r"|"l"|&null) | iohno(5, image(assoc))
+
+    # As long as we find commas and token names, keep on adding tokens
+    # to the token table.  Return the unused token when done.  If we
+    # reach EOF, there's been an error.
+    #
+    repeat {
+	token := @next_token_no_nl | iohno(4)
+	case token.sym of {
+	    default  : iohno(31, token.str ||" line "|| line_number)
+	    "CSETLIT" | "STRING": {
+		# Enter character literals as integers.
+		*escape(token.str[2:-1]) = 1 | iohno(49, token.str)
+		c := ord(escape(token.str[2:-1]))
+		toktbl[c] := tokstats(c, c, prec, assoc)
+	    }
+	    "IDENT"  : {
+		case token.str of {
+		    "error"  :
+			toktbl[token.str] := tokstats("error", -1, prec, assoc)
+		    "epsilon":
+			toktbl[token.str] := tokstats("epsilon",-2,prec, assoc)
+		    default  : {
+			# Enter TOKENs as string-keyed records in toktbl.
+			token_no +:= 1
+			toktbl[token.str] :=
+			    tokstats(token.str, token_no, prec, assoc)
+		    }
+		}
+	    }
+	}
+	# As long as we're seeing commas, go back for more tokens.
+	token := @next_token_no_nl | iohno(4)
+	token.sym == "COMMA" | break
+    }
+
+    # Skip past semicolon, if present (as set up now, it shouldn't be).
+    (token := token | @next_token_no_nl | iohno(4)).sym ~== "SEMICOL"
+    return token
+
+end
+
+
+#
+# read_RHS:  coexpression x coexpression x file x table x
+#            string x ib_grammar record x string x string -> token
+#
+#     Read_RHS goes through the RHS of rule definitions, inserting the
+#     resulting productions into a master rule list.  At the same
+#     time, it outputs the actions corresponding to those productions
+#     as procedures that are given names corresponding to the numbers
+#     of the productions.  I.e. production 1, if endowed with an {
+#     action }, will correspond to procedure _1_.  Prec and assoc are
+#     automatically set to that of the last RHS nonterminal, but this
+#     may be changed explicitly by the %prec keyword, as in YACC.
+#     Source_fname is the name of the source grammar file we're pro-
+#     cessing (caller will give us some reasonable default if we're
+#     reading &input).
+#
+#     Fails on EOF.
+#
+procedure read_RHS(next_token, next_token_no_nl, out, toktbl, LHS,
+		   grammar, module, source_fname)
+
+    local   token, rule, c
+    static  rule_no
+    initial rule_no := 0
+
+    rule_no +:= 1
+    #                  LHS  RHS     POS    LOOK   no       prec   assoc
+    rule := production(LHS, list(), &null, &null, rule_no, &null, &null)
+    put(grammar.rules, rule)
+
+    # Read in RHS symbols.
+    #
+    repeat {
+	token := @next_token_no_nl | iohno(4)
+	case token.sym of {
+	    default  :
+		iohno(35, "token "|| image(token.str)||"; line "|| line_number)
+	    "CSETLIT" | "STRING": {
+		*escape(token.str[2:-1]) = 1 | iohno(49, token.str)
+		c := ord(escape(token.str[2:-1]))
+		if \toktbl[c] then {
+		    rule.prec  := toktbl[c].prec
+		    rule.assoc := toktbl[c].assoc
+		}
+		# literals not declared earlier will get caught here
+		else insert(grammar.tbl, c, image(char(c)))
+		put(rule.RHS, c)
+	    }
+	    "IDENT"  : {
+		# If it's a terminal (i.e. a declared token), assign
+		# this rule its precedence and associativity.  If it's
+		# not in toktbl, then it's not a declared token....
+		if \toktbl[token.str] then {
+		    rule.prec  := toktbl[token.str].prec
+		    rule.assoc := toktbl[token.str].assoc
+		    put(rule.RHS, toktbl[token.str].no)
+		    if toktbl[token.str].no = -2 then {
+			*rule.RHS > 1 & iohno(44, "line ", line_number)
+		        rule.POS := 2
+		    }
+		}
+		# ...undeclared stuff.  Could be a nonterminal.  If
+		# error and/or epsilon weren't declared as tokens,
+		# they will get caught here, too.
+		else {
+		    case token.str of {
+			&null     : stop("What is going on here?")
+			default   : put(rule.RHS, token.str)
+			"error"   : {
+			    put(rule.RHS, -1)
+			    insert(grammar.tbl, -1, "error")
+			}
+			"epsilon" : {
+			    if *put(rule.RHS, -2) > 1
+			    then iohno(44, "line ", line_number)
+			    else rule.POS := 2
+			    insert(grammar.tbl, -2, "epsilon")
+			}
+		    }
+		}
+	    }
+	}
+	# Comma means:  Go back for another RHS symbol.
+	token := @next_token_no_nl | fail
+	token.sym == "COMMA" | break
+    }
+
+    # Skip semicolon token, if present.
+    (token := token | @next_token_no_nl | fail).sym ~== "SEMICOL"
+
+    # Read and set (optional) precedence.
+    #
+    if token.sym == "MOD" then {
+	token := @next_token_no_nl | iohno(4)
+	(token.sym == "IDENT" & token.str == "prec") |
+	    iohno(43, token.str || " line " || line_number)
+	token := @next_token_no_nl | iohno(4)
+	case token.sym of {
+	    "CSETLIT" | "STRING" : {
+		*escape(token.str[2:-1]) = 1 | iohno(49, token.str)
+		c := ord(escape(token.str[2:-1])) &
+		rule.prec  := toktbl[c].prec &
+		rule.assoc := toktbl[c].assoc
+	    }
+	    "IDENT"    : {
+		\toktbl[token.str] |
+		    iohno(43, token.str || " line " || line_number)
+		rule.prec  := toktbl[token.str].prec &
+		rule.assoc := toktbl[token.str].assoc
+	    }
+	    default    : 1 = 4	# deliberate failure
+	} | iohno(43, "line ", line_number)
+	token := @next_token_no_nl | fail
+    }
+
+    # Skip semicolon token, if present.
+    (token := token | @next_token_no_nl | fail).sym ~== "SEMICOL"
+
+    # Read in (optional) action.
+    #
+    if token.sym == "LBRACE" then {
+	write_action_as_procedure(next_token, out, rule,
+				  module, source_fname)
+	token := @next_token_no_nl | fail
+    }
+
+    # Skip semicolon token, if present.
+    (token := token | @next_token_no_nl | fail).sym ~== "SEMICOL"
+    return token
+
+end
+
+
+#
+# write_action_as_procedure
+#
+procedure write_action_as_procedure(next_token, out, rule,
+				    module, source_fname)
+
+    local argstr, bracelevel, token, i, neg
+
+    /module := ""
+     argstr := ""
+    #
+    # Decide the number of arguments based on the length of the RHS of
+    # rule.  Exception: Epsilon productions are empty, and pop nothing
+    # off the stack, so take zero args.
+    #
+    if rule.RHS[1] ~=== -2 then {
+	every argstr ||:= "arg" || (1 to *rule.RHS) || ","
+	argstr := trim(argstr, ',')
+    }
+    write(out, "procedure _", rule.no, "_", module, "(", argstr, ")")
+    write(out, "\n$line ", line_number, " ", image(source_fname))
+
+    bracelevel := 1
+    until bracelevel = 0 do {
+	every token := copy_icon_stuff(next_token, out, "RHSARG") do {
+	    case token.sym of {
+		default   : next
+		"LBRACE"  : bracelevel +:= 1
+		"RBRACE"  : bracelevel -:= 1
+		"RHSARG"  : {
+		    until \ (token := @next_token).sym do
+			writes(out, token.str)
+		    if neg := (token.sym == "MINUS") then
+			until \ (token := @next_token).sym do 
+			    writes(out, token.str)
+		    else neg := &null
+		    token.sym == "INTLIT"  | iohno(37, "$"||token.str)
+		    if /neg & token.str ~== "0" then {
+			token.str <= *rule.RHS | iohno(38, "$"||token.str)
+			writes(out, " arg", token.str, " ")
+		    } else {
+			# Code for $0, $-1, etc.
+			#
+			# Warning!  If the name of the stack is changed
+			# in iiparse.lib, it has to be changed here, too.
+			#
+			i := abs(token.str)+1
+			writes(out, " value_stack", module, "[", i, "] ")
+		    }
+	        }
+	    }
+	    if bracelevel = 0 then {
+		write(out, "\nend\n")
+		return token
+	    }
+        }
+    }
+	    
+    iohno(39, "line "|| line_number)
+
+end
+