summaryrefslogtreecommitdiff
path: root/ipl/packs/ibpag2
diff options
context:
space:
mode:
authorIgor Pashev <pashev.igor@gmail.com>2013-01-27 23:51:56 +0000
committerIgor Pashev <pashev.igor@gmail.com>2013-01-27 23:51:56 +0000
commit6ab0c0f5bf14ed9c15370407b9ee7e0b4b089ae1 (patch)
tree926065cf45450116098db664e3c61dced9e1f21a /ipl/packs/ibpag2
downloadicon-6ab0c0f5bf14ed9c15370407b9ee7e0b4b089ae1.tar.gz
Initial upstream version 9.4.3upstream/9.4.3
Diffstat (limited to 'ipl/packs/ibpag2')
-rw-r--r--ipl/packs/ibpag2/Makefile107
-rw-r--r--ipl/packs/ibpag2/README1093
-rw-r--r--ipl/packs/ibpag2/beta2ref.ibp117
-rw-r--r--ipl/packs/ibpag2/follow.icn332
-rw-r--r--ipl/packs/ibpag2/iacc.ibp495
-rw-r--r--ipl/packs/ibpag2/ibpag2.icn303
-rw-r--r--ipl/packs/ibpag2/ibreader.icn515
-rw-r--r--ipl/packs/ibpag2/ibutil.icn296
-rw-r--r--ipl/packs/ibpag2/ibwriter.icn110
-rw-r--r--ipl/packs/ibpag2/iiglrpar.lib946
-rw-r--r--ipl/packs/ibpag2/iiparse.lib419
-rw-r--r--ipl/packs/ibpag2/iohno.icn95
-rw-r--r--ipl/packs/ibpag2/itokens.icn925
-rw-r--r--ipl/packs/ibpag2/outbits.icn100
-rw-r--r--ipl/packs/ibpag2/rewrap.icn144
-rw-r--r--ipl/packs/ibpag2/sample.ibp111
-rw-r--r--ipl/packs/ibpag2/shrnktbl.icn131
-rw-r--r--ipl/packs/ibpag2/slritems.icn244
-rw-r--r--ipl/packs/ibpag2/slrtbls.icn370
-rw-r--r--ipl/packs/ibpag2/slshupto.icn79
-rw-r--r--ipl/packs/ibpag2/sortff.icn82
-rw-r--r--ipl/packs/ibpag2/version.icn19
22 files changed, 7033 insertions, 0 deletions
diff --git a/ipl/packs/ibpag2/Makefile b/ipl/packs/ibpag2/Makefile
new file mode 100644
index 0000000..56d917e
--- /dev/null
+++ b/ipl/packs/ibpag2/Makefile
@@ -0,0 +1,107 @@
+##########################################################################
+#
+ PROGNAME = ibpag2
+#
+##########################################################################
+#
+# User-modifiable section. Read carefully! You will almost
+# certainly have to change some settings here.
+#
+
+#
+# Destination directory for binaries files. Owner and group for
+# public executables. Leave the trailing slash off of directory
+# names.
+#
+OWNER = richard # root
+GROUP = group # root
+DESTDIR = /usr/local/bin
+# Put this path into your LPATH variable (on which, see the Icon
+# documentation). Make sure that the directory exists.
+LIBDIR = /usr/local/lib/icon/data
+
+#
+# Name of your icon compiler and compiler flags.
+#
+ICONC = icont
+IFLAGS = -u -s #-Sc 400 -Sg 400 -Si 2000 -Sn 4000 -SF 40
+
+SHAR = /usr/local/bin/shar
+COMPRESS = /usr/bin/compress
+# COMPRESS = /usr/local/bin/gzip
+
+###########################################################################
+#
+# Don't change anything below this line unless you're really sure of
+# what you're doing.
+#
+
+AUX = slshupto.icn rewrap.icn outbits.icn sortff.icn itokens.icn
+SRC = $(PROGNAME).icn $(AUX) slrtbls.icn slritems.icn follow.icn \
+ ibutil.icn iohno.icn ibreader.icn ibwriter.icn shrnktbl.icn \
+ version.icn
+PARSER = iiparse.lib
+GLRPARSER = iiglrpar.lib
+SHARFILES = $(SRC) $(PARSER) $(GLRPARSER) sample.ibp beta2ref.ibp \
+ iacc.ibp Makefile.dist README
+
+all: $(PROGNAME)
+
+$(PROGNAME): $(SRC)
+ $(ICONC) $(IFLAGS) -o $(PROGNAME) $(SRC)
+
+
+##########################################################################
+#
+# Pseudo-target names (shar, install, clean, clobber)
+#
+
+#
+# Assumes you have a shar program like mine.
+#
+shar: $(SHARFILES)
+ @echo ""
+ @echo "Removing any old shars in this directory."
+ @echo ""
+ -rm -f $(PROGNAME).[0-9][0-9].Z
+ @echo ""
+ $(SHAR) -fVc -o$(PROGNAME) -L32 $(SHARFILES)
+ $(COMPRESS) -f $(PROGNAME).[0-9][0-9]
+ @echo ""
+ @echo "Shell archive finished."
+ @echo ""
+
+# Pessimistic assumptions regarding the environment (in particular,
+# I don't assume you have the BSD "install" shell script).
+install: all
+ @echo ""
+ -test -d $(DESTDIR) || mkdir $(DESTDIR) && chmod 755 $(DESTDIR)
+ cp $(PROGNAME) $(DESTDIR)/$(PROGNAME)
+ -chgrp $(GROUP) $(DESTDIR)/$(PROGNAME)
+ -chown $(OWNER) $(DESTDIR)/$(PROGNAME)
+ -chmod 755 $(DESTDIR)/$(PROGNAME)
+ -test -d $(LIBDIR) || mkdir $(LIBDIR) && chmod 755 $(LIBDIR)
+ cp $(PARSER) $(LIBDIR)/$(PARSER)
+ cp $(GLRPARSER) $(LIBDIR)/$(GLRPARSER)
+ -chgrp $(GROUP) $(LIBDIR)/$(PARSER)
+ -chown $(OWNER) $(LIBDIR)/$(PARSER)
+ -chgrp $(GROUP) $(LIBDIR)/$(GLRPARSER)
+ -chown $(OWNER) $(LIBDIR)/$(GLRPARSER)
+ -chmod 644 $(LIBDIR)/$(PARSER)
+ -chmod 644 $(LIBDIR)/$(GLRPARSER)
+ @echo ""
+ @echo "Done installing."
+ @echo ""
+
+# Build executable and copy to ../../iexe.
+# Nothing done in this case because the executable doesn't stand alone.
+Iexe:
+
+
+#
+# Cleanup
+#
+clean:
+ -rm -f *~ #*# core *.u[12] $(PROGNAME).output
+Clean clobber: clean
+ -rm -f $(PROGNAME)
diff --git a/ipl/packs/ibpag2/README b/ipl/packs/ibpag2/README
new file mode 100644
index 0000000..c2f5d82
--- /dev/null
+++ b/ipl/packs/ibpag2/README
@@ -0,0 +1,1093 @@
+
+
+
+
+
+
+ A User's Manual for Ibpag2
+ (Icon-Based Parser Generation System 2)
+ Version 1.2
+
+ - or -
+
+ How to Use an LR-based Parser Generator
+
+
+ Richard L. Goerwitz, III
+ University of Chicago
+
+
+
+
+
+
+1.__What_is_Ibpag2?
+
+ Ibpag2 is a so-called "parser generator," i.e. a tool for
+automating the process of generating a recognizer and/or parser from
+abstract structural descriptions of an input language. Put in more
+practical terms, Ibpag2 is a piece of software that a) reads a source
+file containing a grammar that defines an input language, and then b)
+outputs an automaton that recognizes that language. The user may, at
+his or her option, specify actions this automaton should take when it
+sees various substructures within its input language. By default,
+however, the parser simply recognizes a given sequence as belonging,
+or not, to that language.
+
+ Ibpag2 utilizes so-called "LR" table generation and parsing
+algorithms. These algorithms facilitate construction of reasonably
+fast deterministic pushdown automata that are powerful enough to
+handle most commonly used programming language constructs. LR-based
+systems come in three main flavors: SLR(1), LALR(1), and LR(1). The
+LR(1) flavor is fairly easy to implement, but uses too many resources
+to be practical. LALR(1) algorithms are harder to implement, but much
+faster, and the parse tables they construct use considerably less
+memory than do those of their LR(1) counterparts. SLR(1) algorithms
+are the easiest to implement, compile the fastest, and use about as
+much memory as LALR(1)s. SLR(1) is the least powerful of the three,
+though, so there is a tradeoff. Ibpag2 is an "enhanced" SLR(1) parser
+generator. It is enhanced in the sense that it can operate both in
+its native SLR(1) mode, and in a more powerful "quasi-GLR" mode (on
+which, see section 5 below).
+
+ As its full title ("Icon-Based Parser Generator 2") implies,
+Ibpag2 is written in Icon [2,3], as are the automata it creates.
+Ibpag2 has been tested with Icon version 8.10. So far I have only run
+it on an i386 box running Xenix 2.3.3, and on a Sun 4 running some
+version of SunOS. I have many reports, though, of it running under
+other UNIX variants. It will probably also run under other operating
+systems, though modifications will in some instances be required.
+Using Ibpag2 under MS-DOS may not be possible, on account of the way
+it manages memory.
+
+ The Ibpag2 distribution adheres to de facto UNIX installation
+standards: Just set the appropriate variables in the makefile, and
+then "make install." For those who are using a non-UNIX system, or
+who have not installed such a package before, there is a section at
+the end entitled "Installing Ibpag2" that details the installation
+procedure (section 6).
+
+ Aside from the above-mentioned installation section (6), the
+remainder of this document aims to provide the reader a) with a
+simple, practical explanation of what LR-family parser generators are
+and how they work (section 2), and b) with a set of directions
+specifically on how to use Ibpag2 (section 3). There is also an
+advanced section on debugging (4), and one on using Ibpag2 with non-LR
+and/or ambiguous languages (5). The discussion is geared for those
+that have little or no experience in parsing or automaton theory. For
+very advanced reading, consult the bibliography. For a brief summary
+of Ibpag's command-line options, see the main Ibpag2 source file,
+ibpag2.icn, or invoke ibpag2 with the -h (help) option.
+
+ In general, be warned that Ibpag2 works best with small or
+medium-sized grammars. Its parse tables have to be reconstructed at
+run-time, and the code for doing this can become a bit cumbersome for
+grammars with more than 100 rules and fifty or so terminal symbols. I
+myself have processed grammars with as many as 300 terminals and 400
+rules. Although the resulting automata run well enough, the output
+files are over 300k, and Ibpag2 takes a long time to create them. If
+you must use Ibpag2 with a very large grammar symbols, try the -c
+command-line option (which produces compressed parse tables). This
+option is discussed below, in section 4. Compiling (rather than
+interpreting) Ibpag2 may result in much faster processing, as will
+resetting your BLOCKSIZE and STRSIZE environment variables. See the
+installation section (6) below on using the Icon compiler to create
+the Ibpag2 executable. Good starting values for BLOCKSIZE and STRSIZE
+are triple their default values (i.e. 3 x 65000). These variables are
+discussed in the Icon manual page.
+
+ My ultimate aim in writing this document has been to make
+accessible to the non-CS portion of the Icon community what for them
+might seem an inaccessible branch of applied parsing and automaton
+theory. I am a philologist myself, and feel that there is a great
+deal that can and ought to be done to make advanced tools accessible
+to people with other interests than twiddling bits or pondering the
+true meaning of epsilon closures :-).
+
+ Any comments on the Ibpag2 system itself or its documentation
+will be gratefully received. Write to me at the address appended to
+the final section (6).
+
+
+2.__What_is_an_LR_Parser_Generator?
+
+ Back in the late 50s and 60s, linguists, mathematicians, and
+software engineers all became intensely interested in the formal
+properties of languages: Can they be described as a series of logical
+structures and relations? Can computers recognize and manipulate
+these structures efficiently? Linguists, in particular, quickly
+realized that the amount of structural complexity, ambiguity, and pure
+noise in natural language would render it computationally intractable,
+especially given the limited memory/throughput of then available CPUs.
+Mathematicians and engineers, however, found that many of the
+formalized notations they dealt with could, in fact, be (re)designed
+in such a way that efficient computer processing could - at least in
+principle - be achieved.
+
+ Principle, in this case, did not squarely meet reality until
+viable parser generation tools came into being. Parser generation
+tools map an abstract structural description of a formal notation or
+"language" to working computer code. Ideally, the designer simply
+makes assertions like:
+
+ an expression is composed of either
+ 1) a term (e.g. 10), or
+ 2) an expression, a "+" or "-", and another expression
+
+Parser generator systems translate these assertions (the "grammar")
+into a machine, i.e. automaton, that can recognize and/or manipulate
+input streams that conform to the "language" so described.
+
+ Let me dwell, for a moment, on the toy expression grammar
+offered above. Note that it describes a set of simple mathematical
+constructs like:
+
+ 9
+ 9 + 3
+ 9 + 3 - 8
+
+According to the specifications given above, the nine, three, and
+eight alone constitute terms - which are also expressions (via rule
+1). Because these terms are also expressions, "9 + 3" can be reduced
+to a larger expression by rule 2. The same is true for "9 + 3 - 8,"
+except that there rule 2 must apply twice - once for "9 + 3," and then
+again for that and the remainder of the line - in effect grouping the
+expressions as ( ( (9) + (3) ) - (8) ). It is also possible to group
+the expression ( (9) + ( (3) - (8) ) ), although for the discussion
+that immediately follows this second grouping will be ignored (see
+below on the terms "precedence" and "associativity").
+
+ If we add actions to the above grammar specification, we can
+create a calculator-like automaton. Traditionally, LR-family automata
+(like the ones Ibpag2 creates) contain a parser, one or more stacks,
+and a set of action tables. The parser reads from an input stream
+segmented into "tokens" (e.g. TERM, '+', '-'), and then manipulates
+its stacks according to directives contained in so-called "action" and
+"goto" tables. As it reads the input stream, the parser matches rules
+with action code specified by the programmer, e.g. rule 2 above might
+be matched with code that added/subtracted the expressions on either
+side of the '+'/'-' operator, and produced (in calculator style) the
+result. Alternatively, it might be matched with code that generated
+an equivalent construct in another language.
+
+ In the case of our toy expression grammar above, the
+corresponding LR automaton operates as follows. Omitting and/or
+simplifying some of the inner details, it first looks at the input
+stream to see what the next token is. If the next token is an
+operator or end-of-input, it checks the top of its stack. If the top
+of the stack has a term on it, that term is popped off, and pushed
+back on, this time renamed as an expression (rule 1 above). The input
+token is then shifted from the input stream onto the stack, unless it
+is the end-of-input token, in which case the parser returns with a
+result. If the top of the stack has an expression on it (rather than
+a term), the parser pops the top three elements off of the stack, and
+then either subtracts the third element from the first or adds the two
+together, depending on whether the second element down was the
+addition or subtraction operator, and the result is pushed onto the
+stack as yet another expression.
+
+ Even in this much-simplified form, the automaton's structure
+is complex. Let us look briefly, therefore, at a practical example of
+its actual workings. If we were to feed it "9 + 3 + 8," our
+calculator would take the following actions:
+
+ 1) read the 9, and push it onto the stack as a term
+ 2) see a plus sign on the input stream
+ 3) pop the term (9) off of the stack and push it back on again
+ (this time calling it an expression)
+ 4) push the plus sign onto the stack
+ 5) read the 3, and push it onto the stack as a term
+ 6) see a minus sign on the input stream
+ 7) pop the 3 off of the stack and push it back on again (this
+ time calling it an expression)
+ 8) see a minus sign still waiting on the input stream
+ 9) pop 9, +, and 3 off of the stack, apply the plus operator
+ to 9 and 3, then push the result onto the stack again a
+ single expression (the stack now has 12 on top)
+ 10) read the minus sign, and push it onto the stack
+ 11) read the 8, and push it onto the stack as a term
+ 12) see the end of input coming up on the input stream
+ 13) pop the 8 off of the stack and push it back on again as an
+ expression
+ 14) see the end-of-input token still sitting on the input
+ stream
+ 15) pop 12, -, and 8 off of the stack, apply the minus operator
+ to 12 and 8, then push the result onto the stack again (the
+ stack now has 4 on top)
+ 16) return the "answer" (i.e. 4)
+
+ This series of actions is hard to describe, and even more so
+to model as part of a hand-written computer program. And, even if
+such a program were written by hand, this program would have to be
+modified, at times radically, every time the grammar it assumes was
+augmented or changed. What I am leading up to is that, with a parser
+generator, the hand compilation stage can be eliminated by allowing
+the programmer simply to declare his/her tokens and language specs,
+then have the appropriate automaton constructed with little, or no,
+human intervention. This is why parser generation tools were critical
+to the development of not just theoretically feasible, but truly
+*practical*, LR-based computer language design systems.
+
+
+3.__Using_Ibpag2
+
+ To recode the above toy expression grammar in
+Ibpag2-compatible format is relatively simple, especially if we omit
+the actions initially, and concentrate on simple recognition. We need
+only a set of token declarations and three rules. Certain
+modifications will have to be made to the token declarations later on.
+For general illustration's sake, however, the following will suffice:
+
+ %token TERM, '+', '-'
+ %%
+ expression : TERM
+ expression : expression, '+', expression
+ expression : expression, '-', expression
+
+TERM, and the addition and subtraction operators, are the tokens (i.e.
+the terminals symbols out of which the grammar is constructed - the
+things that the input stream is segmented into). Note the %token
+keyword used to declare them. The colon means "is composed of." The
+double percent sign separates token declarations from the grammar
+proper.
+
+ Adding in our actions - which above were keyed to a complex
+set of decisions based on input tokens and stack conditions - requires
+just a few extra lines of Ibpag2 action code, set off in curly braces:
+
+ %token TERM, '+', '-'
+ %%
+ expression : TERM { return arg1 }
+ expression : expression, '+', expression { return arg1 + arg3 }
+ expression : expression, '-', expression { return arg1 - arg3 }
+
+Using a "|" shorthand for repeated left-hand sides of rules, we may
+reformat this as:
+
+ %token TERM, '+', '-'
+ %%
+ expression : TERM { return arg1 }
+ | expression, '+', expression { return arg1 + arg3 }
+ | expression, '-', expression { return arg1 - arg3 }
+
+ ArgX above refers to the Xth element of the right-hand side of
+the preceding rule. So, for example, arg1 in "{ return arg1 }" above
+refers to TERM - the only right-hand side element of the first rule.
+The action "{ return arg1 }" means, "once you find a TERM and have
+renamed it as an expression, use the value of TERM as the value for
+that expression." By way of contrast, the action "{ return arg1 +
+arg3 }" means, in conjunction with the rule it follows: "When you find
+an expression consisting of a sub-expression, a plus operator, and
+another sub-expression, use the value of sub-expression 1 + the value
+of sub-expression 2 as the value for the expression as a whole."
+Technically, the action "{ return arg1 }" for expression : TERM is not
+necessary, since the Ibpag2 parser, by default, pushes the value of
+the last RHS arg onto the stack. For epsilon productions (to be
+discussed below), it pushes &null.
+
+ One serious problem with this set of specifications is that
+the operators '-' and '+' are left associative. We humans take this
+for granted, because correct algebraic grouping is something our
+high-school math teachers burned into us. The computer, though, has
+to be told, pedantically, how to group addition and subtraction
+expressions. It has to be explicitly instructed, in other words, to
+group expressions like "9 + 32 - 4" as (9 + 32) - 4. Without
+instructions of this kind, the parser does not know, after it has read
+"9 + 32" and is looking at a minus sign, whether to shift the minus
+sign onto the stack, and eventually try to group as 9 + (32 - 4), or
+to reduce "9 + 32" to an expression and group as (9 + 32) - 4.
+Although in this case the grouping may not seem to matter, it
+sometimes does. Some operators group right to left. The unary minus
+sign, for example, is one such operator (--4 groups as (- (- 4))). To
+include the unary minus sign in our grammar, we might append yet
+another rule:
+
+ %token TERM
+ %left '+', '-'
+ %right '-'
+ %%
+ expression : TERM { return arg1 }
+ | expression, '+', expression { return arg1 + arg3 }
+ | expression, '-', expression { return arg1 - arg3 }
+ | '-', expression { return - arg2 }
+
+The trouble with this arrangement is that the minus sign was already
+declared as left associative. To get around the conflict we use a
+"dummy" token declaration, and a %prec declaration in the applicable
+rule:
+
+ %token TERM
+ %left '+', '-'
+ %right UMINUS
+ %%
+ expression : TERM { return arg1 }
+ | expression, '+', expression { return arg1 + arg3 }
+ | expression, '-', expression { return arg1 - arg3 }
+ | '-', expression %prec UMINUS { return - arg2 }
+
+The %prec declaration simply tells the parser that, even though the
+rule contains a '-' operator, the rule should be handled as if the
+operator were UMINUS. UMINUS is not actually used as a symbol in the
+right-hand side of any rule (hence the designation "dummy"). It is
+there simply to make the last rule behave as if the minus sign in the
+last rule were different than in the second-to-last rule.
+
+ Let us now add in multiplication and division operators to our
+calculator specifications, and see what happens. Let me reiterate
+here that the action "{ return arg1 }" for rule 1 (expression : TERM)
+is not strictly necessary, since the default is to push the last RHS
+arg onto the value stack:
+
+ %token TERM
+ %left '+', '-'
+ %left '*', '/'
+ %right UMINUS
+ %%
+ expression : TERM { return arg1 }
+ | expression, '+', expression { return arg1 + arg3 }
+ | expression, '-', expression { return arg1 - arg3 }
+ | expression, '*', expression { return arg1 * arg3 }
+ | expression, '/', expression { return arg1 / arg3 }
+ | '-', expression %prec UMINUS { return - arg2 }
+
+Note that the multiplication and division operators were defined
+*after* the addition and subtraction operators. The reason for this
+is that, technically speaking, the grammar itself is ambiguous. If we
+treat all operators identically, the parser will not be able to tell
+whether "9 + 1 * 3" should be parsed as (9 + 1) * 3 or as 9 + (1 * 3).
+As we all know from our high-school algebra, multiplication has a
+higher precedence than addition. You do the multiplications before
+the additions, in other words, no matter where they occur. To tell
+the parser to behave in this same manner, we declare '*' after '+'.
+Note that, despite their higher priority, the '*' and '/' operators
+are still left associative. Hence, given "3 / 4 * 7," the parser will
+group its input as (3 / 4) * 7. As a brain teaser, try to figure out
+how the parser might group the input "9 + 3 / 4 * 7." Remember that
+higher-precedence rules get done first, but that same-precedence rules
+get done according to associativity.
+
+ The only fundamental problem remaining with the above grammar
+is that it assumes that the end of the input coincides with the end of
+the line. Is it possible to redefine the language described as
+consisting of arbitrary many lines? The answer to this question is
+"yes." One can simply add another set of productions to the grammar
+that state, essentially, that the input language consists of lines
+made up of an expression and a carriage return or of nothing. Nothing
+is indicated by the keyword epsilon. Note that only the first rule
+has an action field:
+
+ lines : lines, expression, '\n' { write(arg2) }
+ | lines, '\n'
+ | epsilon
+
+This rule-series may seem rather abstruse, but it becomes a bit
+clearer when you think about what happens on actual input. If there
+is no input (epsilon), nothing gets printed, because lines : epsilon
+has no action field. If the parser sees an expression and a newline,
+the parser takes this as an instance of epsilon, plus an expression,
+plus a newline. This, then, becomes the first component of rule 1 if
+another expression + newline follows, or of rule two if just a newline
+occurs. Every time an instance of rule 1 occurs, the action "{
+write(arg2) }" is executed, i.e. the value of the expression gets
+printed. If this still seems hard to fathom, try walking through
+step-by-step. Even experienced hands may find these sorts of rules
+difficult to construct and debug.
+
+ Note that "lines" is now the so-called "start symbol" of our
+grammar. It is, in other words, the goal of every parse. By default
+the left-hand side symbol of the first rule is the start symbol. This
+may be overridden with a %start declaration in the tokens section (on
+which, see the sample Ibpag2 input file below).
+
+ With our new, multi-line start symbol in place, the only piece
+that needs to be added, in order to make our calculator specification
+a full working input to Ibpag2, is a tokenizer. A tokenizer is a
+routine that reads input from a file or from some other stream (e.g.
+the user's console), and then segments this input into tokens that its
+parser can understand. In some cases, the tokens must be accompanied
+by a literal value. For example, if we encounter a TERM, we return
+TERM, just as it is listed in the %token declaration. But what is the
+literal value of a TERM token? It could be, for example, 9, or 5, or
+700. The tokenizer returns the symbol TERM, in this case, but then
+records that TERM's actual value by setting some global variable. In
+Ibpag2's parser, this variable is assumed to be "iilval." In the
+tokenizer, therefore, one might write
+
+ iilval := (literal value)
+ suspend TERM
+
+For literal operators like '+' and '*', there is no need to set
+iilval, since their literal value is irrelevant. One simply returns
+these as integers (usually via "suspend ord(c)").
+
+ The tokenizer routine is normally appended to the grammar
+after another double percent sign. Everything after this second
+double percent sign is copied literally to the output file.
+Alternatively, the tokenizer can be $included via Icon's preprocessor.
+Ibpag2 demands that the tokenizer be called iilex, and that it take a
+single file argument, that it be a generator, and that it fail when it
+reaches end-of-input. Combined with our "lines" productions, the
+addition of an iilex routine to our calculator grammar yields the
+following Ibpag2 input file:
+
+ %token TERM
+ %left '+', '-'
+ %left '*', '/'
+ %right UMINUS
+
+ %start lines
+
+ %%
+
+ expression : TERM { return arg1 }
+ | expression, '+', expression { return arg1 + arg3 }
+ | expression, '-', expression { return arg1 - arg3 }
+ | expression, '*', expression { return arg1 * arg3 }
+ | expression, '/', expression { return arg1 / arg3 }
+ | '-', expression %prec UMINUS { return - arg2 }
+
+ lines : lines, expression, '\n' { write(arg2) }
+ | lines, '\n'
+ | epsilon
+
+ %%
+
+ procedure iilex(infile)
+
+ local nextchar, c, num
+
+ nextchar := create !(!infile || "\n" || "\n")
+ c := @nextchar | fail
+
+ repeat {
+ if any(&digits, c) then {
+ if not (\num ||:= c) then
+ num := c
+ } else {
+ if iilval := \num then {
+ suspend TERM
+ num := &null
+ }
+ if any('+-*/()\n', c) then {
+ iilval := c
+ suspend ord(c)
+ } else {
+ if not any(' \t', c) then {
+ # deliberate error - will be handled later
+ suspend &null
+ }
+ }
+ }
+ c := @nextchar | break
+ }
+ if iilval := \num then {
+ return TERM
+ num := &null
+ }
+
+ end
+
+ procedure main()
+ return iiparse(&input, 1)
+ end
+
+As noted above, the tokenizer (iilex) must be a generator. It must
+suspend integers either directly (e.g. ord(c)), or else via symbolic
+defines like TERM, created by Ibpag2 on the basis of %token, %right,
+%left, and %nonassoc declarations. The tokenizer must fail on end of
+input.
+
+ If you like, cut the above code out, place it in a temporary
+file, tmp.ibp, and then feed this file to Ibpag2 by typing "ibpag2 -f
+tmp.ibp -o tmp.icn." If your system supports input and output
+redirection, type: "ibpag2 < tmp.ibp > tmp.icn." Ibpag2 will turn
+your grammar specifications and actions into a routine called iiparse.
+If you look above, you will see that I appended a main procedure that,
+in fact, calls iiparse(). Iiparse() takes two arguments: 1) an input
+stream, and 2) a switch that, if nonnull, tells the parser to fail
+rather than abort on unrecoverable errors. When Ibpag2 is finished
+creating its output file (tmp.icn above), compile that file the way
+you would compile any other Icon program (e.g. "icont tmp"). Finally,
+run the executable. You should be able to type in various simple
+arithmetic expressions and have the program spit back answers each
+time you hit a return. The only problem you might encounter is that
+the parser aborts on erroneous input.
+
+ The issue of erroneous input brings up yet another point of
+general Ibpag2 usage. Normally, if one is processing input, one does
+not want to abort on errors, but rather just emit an error message,
+and to continue processing - if this is at all possible. To do this,
+Ibpag2 provides a simple but fairly effective mechanism: A reserved
+"error" token.
+
+ When Ibpag2 encounters an error, it will remove symbols from
+its stack until it has backtracked to a point where the error token is
+legal. It then shifts the error token onto the stack, and tries to
+re-start the token stream at the point where it left off, discarding
+tokens if necessary in order to get itself resynchronized. The parser
+considers itself resynchronized when it has successfully read and
+shifted three tokens after shifting the error token. Until then it
+remains in an error state, and will not output additional error
+messages as it discards tokens.
+
+ This explanation may sound a bit abstruse, but in practice it
+is turns out to be quite simple. To implement error handling for our
+calculator, we really have to add only one production to the end of
+the "lines" section:
+
+ lines : lines, expression, '\n' { write(arg2) }
+ | lines, '\n'
+ | epsilon
+ | error, '\n' {
+ write("syntax error; try again:")
+ iierrok
+ }
+
+Given the above grammar, the parser will handle errors as follows: If
+an error occurs (say it has an expression then an operator on its
+stack and sees a newline on the input stream) the parser will throw
+out the operator, then check if the error token would be OK in this
+state (which it would not). Then it would throw out the expression.
+At this point, the stack is in the ready-to-read-a-lines state - the
+state it was in before it read the last expression. Since "lines" may
+consist of error and '\n,' the error token is legal here, and so the
+parser pushes error onto the stack, then looks back at the input
+stream (where a newline is still waiting). Since the newline now
+completes the rule lines : error, '\n', the parser pushes the newline
+onto its stack, then executes the action associated with this
+production, i.e. it writes "syntax error; try again:" to the console,
+prompting the user for additional input.
+
+ The keyword "iierrok" in the above error production's action
+field is there for a subtle, but important, reason: It tells the
+parser to consider itself resynchronized, even if three tokens have
+not yet been shifted. If iierrok were not in the action code for this
+rule, and the user were to supply more bad input after the prompt,
+then the parser would simply discard those tokens, without emitting
+another error message. Why? Because, as you will recall, the parser
+discards tokens after an error, in efforts to resynchronize itself.
+Until it reads and shifts three tokens successfully, it considers
+itself in an error state, and will not emit additional error messages.
+The three-token resync rule is there to prevent a cascade of
+irrelevant error messages touched off by a single error. In our
+calculator's case above, though, we are smarter than the parser. We
+know that it is resynchronized as soon as it reduces error, '\n' to
+lines. So if a syntax error occurs on the next token, it should be
+reported. Adding "iierrok" to the action insures that the parser will
+do just this.
+
+ In addition to iierrok, there are several other directives
+Ibpag2 accepts as part of the action code segments. These are as
+follows:
+
+ iiclearin clear the current input token
+ IIERROR perform error recovery
+ IIACCEPT simulate an accept action
+
+There are several other directives (all implemented as macros) that
+Ibpag2 accepts in GLR mode. For a discussion of GLR mode, see below,
+section 5. IIERROR in particular, and error recovery in general, work
+a bit differently in that mode than they do in Ibpag2's normal (i.e.
+LR) mode.
+
+ There are admittedly many other topics that might be covered
+here. This treatment, however, is intended as a general nontechnical
+introduction, and not as a complete textbook on parser generation use.
+If you want to learn more about this topic, consult the bibliography.
+Also, check the UNIX manual pages on the YACC utility (Yet Another
+Compiler Compiler). Ibpag's input format is fairly close (too close,
+perhaps) to YACC's. In fact, most of what is said about YACC in UNIX
+documentation can be carried directly over to Ibpag2. Several salient
+differences, though, should be kept in mind:
+
+ 1) YACC's "$$ = x" constructs are replaced by "return x" (e.g.
+ "$$ = $1 + $3" -> "return $1 + $3" [$1 is a synonym for
+ "arg1", $3 for "arg3", etc.])
+
+ 2) all variables within a given action are, by default, local
+ to that action; i.e. they cannot be accessed by other
+ actions unless you declare them global elsewhere (e.g. in
+ the pass-through part of the declarations section %{ ...
+ %})
+
+ 3) the %union and %type declarations/tags are not needed by
+ Ibpag2 (both for better and for worse)
+
+ 4) tokens and symbols are separated from each other by a comma
+ in Ibpag2 files (e.g. %token '+', '-' and S : NP, VP)
+
+ 5) epsilon is indicated by the keyword "epsilon" (e.g. REL :
+ epsilon), and not by an empty RHS
+
+ 6) both epsilon and error *may* be declared as %tokens for
+ reasons of precedence, although they retain hard-coded
+ internal values (-2 and -1, respectively)
+
+ 7) all actions must follow the last RHS symbol of the rule
+ they apply to (preceded by an optional %prec directive); to
+ achieve S : NP { action1 }, VP { action2 }, insert a dummy
+ rule: S : NP, dummy, VP { action2 }; dummy : epsilon {
+ action1 } ;
+
+ 8) YYERROR, YYACCEPT, yyclearin, and yyerrok are the same,
+ except they are written IIERROR, IIACCEPT, iiclearin, and
+ iierrok (i.e. "ii" replaces "yy")
+
+ 9) Ibpag2's input files are tokenized as modified Icon files,
+ and, as a consequence, Icon's reserved words must not be
+ used as symbols (e.g. "if : if, then" is no go)
+
+I myself find YACC to be ugly. As a result, Ibpag2 is not an exact
+YACC clone. I would like to underscore the fact that I have no
+intention to move in this direction, either. It's as YACC-like as
+it's going to get!
+
+ Both YACC and non-YACC users should note number 9 in the above
+list. Don't use things like "while," "every," "do," etc. as symbols
+in your grammar! Just use the same rules for Ibpag2 nonterminals as
+for Icon variables, and you'll be OK.
+
+ For those that just can't bear using anything but a strictly
+YACC-conformant system, I've included a preprocessor with the Ibpag2
+distribution called (at one user's recommendation) "iacc." Iacc reads
+&input - assumed to be a YACCish grammar - and sends to &output an
+Ibpag2-conformant file. I have not tested this file extensively, and
+there are likely to be bugs in the way I've handled the necessary 2
+token lookaheads and value stack references. Give it a whirl, though,
+if you are feeling adventurous. The only reason I personally use Iacc
+is that some YACCs (e.g. BSD YACC) have particularly nice debugging
+messages and help. If my grammar is particularly complex, I just run
+it through YACC without action code first, then use Iacc to convert it
+to Ibpag2 format. Iacc's output, as I noted, is not meant to be
+pretty, so I invariably end up doing a little editing - usually just
+respacing a few rules, and re-inserting any comments that I might have
+put in the original YACC file.
+
+ In general, Ibpag2 (like YACC) handles epsilon moves and
+indirect cycles. LR-mode shift-reduce conflicts are also handled in
+the normal way (i.e. pick the rule with the highest priority, and, in
+cases where the priority is the same, check the associativities). In
+contrast to YACC, Ibpag2 flags reduce/reduce conflicts as errors
+(since these often conceal deeper precedence problems and just plain
+kludges). Reduce/reduce conflict errors are easily enough remedied,
+if need be, via (dummy) precedences. One can convert these errors to
+warnings by specifying -y on the command line. With the -y option,
+reduce/reduce conflicts are resolved in favor of the rule that occurs
+first in the grammar. The -y switch also prevents Ibpag2 from
+aborting on shift/reduce conflicts, telling it instead to resolve in
+favor of shift. Basically, -y is a partial YACC compatibility switch.
+Normally (i.e. in SLR mode) Ibpag2 is much more finicky than YACC
+about conflicts in its grammars.
+
+ Also in contrast to YACC, Ibpag2 supports multiple
+simultaneous parsers. Ibpag2 normally names its main parser routine
+iiparse(). By using the -m command-line option, however, you can
+override this default behavior, and force Ibpag2 to augment this name
+in some uniquely identifiable fashion. For example, "ibpag2 -m _1 <
+tmp.ibp > tmp.icn" will force Ibpag2 to write a parser called
+"iiparse_1" to tmp.icn. Note that, instead of calling iilex, this
+iiparse_1() routine will now call iilex_1, and all necessary global
+variables will have _1 appended to them (e.g. errors will become
+errors_1). I don't expect that many people will have occasion to use
+this feature. It is there, though, for those that want it.
+
+
+4.__Debugging
+
+ Constructing and debugging LR(1) family parsers can sometimes
+be hair raising, even with a parser generator. Several precautions
+can be taken, however, to minimize the agony. The first is to declare
+all tokens initially as part of a single %token declaration, i.e. with
+no precedences, and with the same associativities. Also, leave out
+action code until the grammar seems to be working. In this stage, you
+can even run the grammar through (BSD)YACC or GNU Bison. All you
+would need to do is remove the commas between tokens and symbols, and
+place a semicolon at the end of every rule. During this and all
+debugging stages, supply Ibpag2 with a -v command-line switch. This
+will cause Ibpag2 to write a summary of rules, tokens, and its two
+state tables to "ibpag2.output" (a bit like GNU Bison, but with a
+hard-coded name). If you get messages about conflicts in your parse
+tables (e.g. "unresolvable reduce/reduce conflict, state 5, token
+257, rules 4,5"). This file will tell you what rules these are, and
+what token number 257 is. Use precedences and associativities to
+clear these problems up as they arise. If you are comfortable having
+reduce/reduce errors resolved by the order in which the conflicting
+rules occur, then use the -y command-line switch. With -y on the
+command line, Ibpag2 will always resolve in favor of the earlier rule.
+This option will also cause it to resolve all shift/reduce conflicts
+in favor of shift.
+
+ There are certain languages that are not ambiguous that SLR(1)
+parsers like Ibpag2 will fail to produce an unambiguous parse table
+for. The classic example is
+
+ expr : lval, '=', rval | rval
+ lval : '*', rval | ID
+ rval : lval
+
+C programmers will recognize this as a toy expression grammar with
+code for identifiers, assignments, and pointers. The problem is that
+if we feed this grammar to Ibpag2, it will claim that there is a
+conflict on lookahead '='. In truth, there is no ambiguity. The SLR
+parser simply doesn't remember the pathway the parser used to get to
+the state it is in when it sees '=' on the input stream. Whether the
+parser gets into this state by seeing '*' plus and ID, or by seeing
+just an ID, it knows to turn the ID into an lval. Then it knows to
+turn lval into rval. At this point, though, it doesn't know whether
+to shift the = sign via rule 1, or to turn rval and the preceding '*'
+into an lval. The parser has "forgotten" that the '*' is there
+waiting on level down on the stack!
+
+ The solution to this problem is actually quite simple (at
+least in concept). Just provide a unique pathway in the grammar for
+the conflicting rules. In this case, they are rules 1 and 5 (the
+first and last):
+
+ expr : lval, '=', rval | rval
+ lval : '*', pval | ID
+ pval : lval
+ rval : lval
+
+Now when the parser sees '*,' it can only have a pval after it. Never
+mind that pval is composed of precisely the same things as rval. The
+point is that the parser generator follows a different route after
+seeing '*' than if it starts with ID and no preceding '*'. Hence it
+"remembers" that that the '*' is back on the stack, waiting for the
+"lval : '*', pval" rule to apply. There is no more conflict.
+
+ Go ahead and run these grammars through Ibpag2 if you aren't
+sure what is going on. Remember to declare ID as a token, and to
+place "%%" in the appropriate spot!
+
+ If you get your parser up and running, but find that it is not
+functioning quite the way you expect, add the following line somewhere
+near the start of Ibpag2's output file:
+
+ $define IIDEBUG
+
+If you like, you can add it to the beginning of your Ibpag2 input
+file. Place it in the declarations section (before the first double
+percent sign), and surround it by %{ and %}, e.g.:
+
+ %{
+ $define IIDEBUG
+ %}
+
+This tells Ibpag2 to send $define IIDEBUG straight through to the
+output file.
+
+ What defining IIDEBUG does is tell iiparse, once compiled, to
+emit profuse debugging messages about the parser's actions, and about
+the state of its stacks. This display will not make a whole lot of
+sense to anyone who doesn't understand LR-family parsers, so those who
+want to access this feature should perhaps go through a standard
+reference like Aho, Sethi, and Ullman [1].
+
+ If, after you are finished debugging your grammar, you find
+that Ibpag2's output files are rather large, you may try saving space
+by compressing the action and goto tables. This is accomplished by
+invoking Ibpag2 with the -c (compress) option. Using this option
+makes debugging difficult, and makes the parser run a bit more slowly.
+It also only works for rather large grammars with long nonterminal
+symbol names. Don't even consider it until the grammar is thoroughly
+debugged and you have determined that the output file's size is just
+too great for practical use. Even then, compression may or may not
+help, depending on how long your nonterminal names are. In general,
+Ibpag2 is best as a teaching tool, or as a production system for
+medium or small grammars.
+
+
+5.__Using_Ibpag2_with_Non-LR_Grammars
+
+ There may be times when you *want* to parse languages that no
+LR-based algorithm can handle. There may be times, that is, when the
+grammar you want to use contains conflicts or ambiguities that are
+there by design, and not by oversight. For example, you may want to
+parse a natural language. Full-blown natural languages involve many
+highly ambiguous constructs, and are not LR-parsable. By invoking it
+with the -a option, Ibpag2 can parse or recognize certain natural
+languages, or, more practically speaking, certain NL subsets. The
+letter "a" in -a is supposed to stand for "ambiguous," although what
+this option really does is put Ibpag2 into a quasi-GLR mode - i.e.
+into a kind of "generalized" LR mode in which it can accept non-LR
+grammars [4,5].
+
+ User-visible changes to Ibpag2's operation in quasi-GLR mode
+(i.e. with the -a option) are as follows:
+
+ 1) iiparse() is now a generator
+ 2) action code can use suspend as well as return
+ 3) IIERROR places the current thread in an error state (i.e.
+ it doesn't *necessarily* trigger error recovery; see below)
+ 4) there are two new action-code directives (iiprune and
+ iiisolate) and a general define (AUTO_PRUNE)
+ 5) conflicts due to ambiguities in the grammar no longer
+ result in aborted processing (so, e.g., if you do not
+ specify the -y option on a grammar with reduce/reduce
+ conflicts, Ibpag2 will simply generate a parser capable of
+ producing multiple parses for the same input)
+
+ In quasi-GLR mode, iiparse() should be invoked in a way that
+will render multiple results usable, if they are available (e.g.
+"every result := iiparse(&input) do...". Action code is also allowed
+to produce more than one value (i.e. to use suspend). When it does
+so, iiparse() creates separate parse threads for each value. So, for
+instance, if your action code for some production suspends both of the
+following lists,
+
+ ["noun", "will", "gloss: desire"]
+ ["noun", "will", "gloss: legal document mandating how _
+ one's possessions are to be disposed _
+ of after one's death"],
+
+iiparse() would create two separate parse threads - one for each
+result. Note that in this case, the syntactic structure of each
+thread is the same. It is their semantics (i.e. the stuff on the
+value stack) that differs.
+
+ If you use the iierrok and iiclearin macros in your action
+code before suspending any result, their affect persists through all
+subseqent suspensions and resulting parse threads. If you use these
+macros after suspending one or more times, however, they are valid
+only for the parse thread generated by the next suspension. By way of
+contrast, the IIERROR macro *always* flags only the next parse thread
+as erroneous. Likewise, IIACCEPT always simulates an accept action on
+the next suspension only. IIERROR and IIACCEPT, in other words, never
+have any effect on subsequent suspensions and parse threads other than
+the one that immediately follows them. This is true of iierrok and
+iiclearin only when used after the first suspension.
+
+ In quasi-GLR mode, IIERROR (number three in the difference
+list above) becomes a mechanism for placing the current parse thread
+in error mode. This is similar to, but not quite identical to, how
+IIERROR functions in straight LR mode. In quasi-GLR mode, if other
+threads can carry on the parse without error the erroneous parse
+thread is quietly clobbered. Full-blown error recovery only occurs if
+all of the other parsers halt as well. This makes sense if you think
+about it. Why keep erroneous threads around when there are threads
+still continuing a valid parse? For some large interactive systems,
+it might be necessary to keep bogus threads around longer, and weed
+them out only after a lengthy grading process. If you are
+constructing a system such as this, you'll have to modify Ibpag2's
+iiglrpar.lib file. In particular, you'll need to change the segment
+in iiparse() that takes out the trash, so to speak, in such a way that
+it does so only if the error count in a given parser either rises
+above a specific threshhold or else exceeds the number of errors in
+the "most correct" parser by a certain amount. This is not that hard
+to do. I just don't expect that most parsers people generate with
+Ibpag2 will use IIERROR or error recovery in general in so involved a
+fashion.
+
+ Iiprune and iiisolate (number 4 above) are used to control the
+growth of the parallel parser array. In order to give straightforward
+(read "implementationally trivial") support for action code, Ibpag2
+cannot create a parse "forest" in the sense that a standard GLR parser
+does. Instead, it simply duplicates the current parser environment
+whenever it encounters a conflict in its action table. Even if the
+conflict turns out to reflect only a local ambiguity, the parsers, by
+default, remain separate. Put differently, Ibpag2's quasi-GLR parser,
+by default, makes no direct effort to reduce the size of its parser
+arrays or to alter the essentially linear structure of their value and
+state stacks. Size reduction, where necessary and/or desirable, is up
+to the programmer. What the iiprune macro is there to do is to give
+the programmer a way of pruning a given thread out of the active
+parser list. Iiisolate allows him or her to prune out every thread
+*but* the current one. AUTO_PRUNE makes the parser behave more like a
+standard GLR parser, instructing it to prune parse threads that are
+essentially duplicating another parse thread's efforts. The parser,
+though, does not build a parse tree per se, the way most GLR parsers
+typically do, but rather manipulates its value stack like a
+traditional LR-family parser.
+
+ Iiprune is useful when, for example, the semantics (i.e. your
+"action" code segments) determine that a given parse thread is no
+longer viable, and you want to signal the syntactic analyzer not to
+continue pursuing it. The difference between iiprune and IIERROR is
+that iiprune clobbers the current parser immediately. IIERROR only
+puts it into an error state. If all active parsers end up in an error
+state, and none can shift additional input symbols, then the IIERROR
+macro induces error recovery. Iiprune does not. NB: iiprune, if used
+in action code that suspends multiple results, cancels the current and
+remaining results (i.e. it does not clobber parsers already spun off
+by previous suspensions by invocation of that same code; it merely
+cuts the result sequence). Iiprune essentially stands in for "fail"
+in this situation. Fail itself can be used in the code, but be warned
+that iiparse() will still push *at least one* value onto its value
+stack, even if a given action code segment fails. This keeps the
+value stack in sync with the syntax. To avoid confusion, I recommend
+not using "fail" in any action code.
+
+ Iiisolate is useful if, during error recovery, you prompt the
+user interactively, or do something else that cannot be elegantly done
+in parallel for two or more distinct parse threads. Iiisolate allows
+you to preserve only the the current parse thread, and to clobber the
+rest. Iiisolate can also be useful as a way of making sure that only
+one thread carries on the parse in non-error situations. Suppose that
+we have a series of productions:
+
+ sentences : sentences sentence '\n'
+ { print_parse(arg2) }
+ | sentences '\n'
+ | error '\n'
+ | epsilon
+
+If we get a sentence with more than one parse, all of the underlying
+threads that produced these parses will be active for the next
+sentence as well. In many situations this will not be what we want.
+If our desire it to have only one active parse thread at the start of
+each sentence, we simply tell our lexical analyzer to suspend two
+newlines every time it sees a newline on the input stream. This
+insures that the second rule will always apply right after the first.
+We then insert iiisolate directives for both it and the one error
+production:
+
+ sentences : sentences sentence '\n'
+ { print_parse(arg2) }
+ | sentences '\n'
+ { iiisolate }
+ | error '\n'
+ { iiisolate; iierrok }
+ | epsilon
+
+The effect here is to allow multiple parsers to be generated only
+while parsing "sentence". The iiisolate directive, in other words,
+sees to it that no sentence parse will ever begin with multiple active
+parsers. As with LR mode, iierrok clears the error flag for the
+(current) parser.
+
+ Note that if you use iiisolate in action code that suspends
+multiple results, iiisolate will clobber all parsers but the one
+generated by the next suspension.
+
+ If there is no need for close control over the details of the
+parser array, and you wish only to clobber parsers that end up doing
+the same thing as some other parser (and hence returning identical
+values), then just make sure you add "$define AUTO_PRUNE" to the
+pass-through code section at the top of the file. Put differently,
+defining AUTO_PRUNE instructs the quasi-GLR parser to weed out parsers
+that are in the same state, and which have identical value stacks.
+AUTO_PRUNE can often be used in place of iiisolate in situations like
+the one discussed just above. Its only drawback is that it slows
+the parser a bit.
+
+ Other than these deviations (action code and iiparse becoming
+generators, IIERROR's altered behavior, and the addition of iiprune,
+iiisolate, and AUTO_PRUNE), Ibpag2's quasi-GLR mode - at least on the
+surface - works pretty much like its straight LR mode. In fact, if
+you take one of your SLR(1) grammars, and run it through Ibpag2 using
+the -a option, you probably won't notice any difference in the
+resulting automaton unless you do some debugging or perform some
+timing tests (the GLR parser is slower, though for straight SLR(1)
+grammars not by much). Even with non-SLR(1) grammars, the quasi-GLR
+parser will clip along merrily, using all the same sorts of rules,
+action code, and macros that you would typically use in LR mode!
+
+
+6.__Installing_Ibpag
+
+ If you are a UNIX user, or have a generic "make" utility, you
+are in luck. Just edit Makefile.dist according to the directions
+given in that file, rename it as "makefile," then execute "make."
+Ibpag2 should be created automatically. If everything goes smoothly,
+then "make install" (su-ing root, if both possible and necessary for
+correct installation of the iiparse.icn file). Check with your system
+administrator if you are on a public system, and aren't sure what to
+do.
+
+ Please be sure to read the directions in the makefile
+carefully, and set DESTDIR and LIBDIR to the directory where you want
+the executable and parser file to reside. Also, make sure the paths
+you specify are correct for your Icon executables. Although Ibpag2
+will apparently compile using iconc, I would recommend using the
+interpreter, icont, first, unless you are planning on working with a
+large grammar.
+
+ If you are using some other system - one that lacks "make" -
+then shame on your manufacturer :-). You'll be a bit inconvenienced.
+Try typing:
+
+ icont -o ibpag2 follow.icn ibpag2.icn ibreader.icn \
+ ibtokens.icn ibutil.icn ibwriter.icn iohno.icn \
+ outbits.icn slritems.icn slrtbls.icn shrnktbl.icn \
+ version.icn slshupto.icn
+
+The backslashes merely indicate that the next line is a continuation.
+The whole thing should, in other words, be on a single line. As noted
+above, you may compile rather than interpret - if your OS supports the
+Icon compiler. Just replace "icont" above with "iconc." The
+resulting executable will run considerably faster than with "icont,"
+although the time required to compile it may be large, and the (still
+somewhat experimental) compiler may not work smoothly in all
+environments.
+
+ If your operating system support environment variables, and
+you have set up your LPATH according to the specifications in the Icon
+distribution (see below), then you may copy iiparse.lib and
+iiglrpar.lib to some file in your LPATH. If you do not do this, or if
+your OS does not support environment variables, then you must be in
+the directory where you keep your Ibpag2 files when you use it, or
+else invoke Ibpag2 with the -p dirname option (where dirname is the
+directory that holds the iiparse.lib and iiglrpar.lib files that come
+with the Ibpag2 distribution). The .lib files contain template
+parsers that are critical to Ibpag2's operation. Ibpag2 will abort if
+it cannot find them.
+
+ If your operating system permits the creation of macros or
+batch files, it might be useful to create one that changes
+automatically to the Ibpag2 source directory, and runs the executable.
+This has the side-benefit of making it easier for Ibapg2 to find the
+parser library files, iiparse.lib and iiglrpar.lib. Under DOS, for
+instance, one might create a batch file that says:
+
+ c:
+ cd c:\ibpag2
+ iconx ibpag2 %1 %2 %3 %4 %5 %6 %7 %8 %9
+
+DOS, it turns out, has to execute Icon files indirectly through iconx,
+so this technique has yet another advantage in that it hides the
+second level of indirection - although it prevents you from using
+input and output redirection. Naturally, the above example assumes
+that Ibpag2 is in c:\ibpag2.
+
+ Ibpag2 assumes the existence on your system, not only of an
+Icon interpreter or compiler, but also of an up-to-date Icon Program
+Library. There are several routines included in the IPL that Bibleref
+uses. Make sure you (or the local system administrators) have put the
+IPL online, and have translated the appropriate object modules. Set
+your IPATH environment variable to point to the place where the object
+modules reside. Set LPATH to point to the modules' source files.
+Both IPATH and LPATH are documented in doc directory of the Icon
+source tree (ipd224.doc). If your system does not support environment
+variables, copy ximage.icn, options.icn, ebcdic.icn, and escape.icn
+from the IPL into the Ibpag2 source directory, and compile them in
+with the rest of the Ibpag2 source files, either by adding them to the
+SRC variable in the makefile, or by adding them manually to the "icont
+-o ..." command line given above.
+
+ If you have any problems installing or using Ibpag2, please
+feel free to drop me, Richard Goerwitz, an e-mail message at
+goer@midway.uchicago.edu, or (via the post) at:
+
+ 5410 S. Ridgewood Ct., 2E
+ Chicago, IL 60615
+
+
+6.__Bibliography
+
+1. Aho, Alfred V., Sethi, Ravi, and Ullman, Jeffrey D. Compilers.
+ Addison-Wesley: Reading, Massachusetts, second printing, 1988.
+
+2. Griswold, Ralph E. and Griswold, Madge T. The Icon Programming
+ Language. Prentice-Hall, Inc.: Englewood Cliffs, New Jersey, USA,
+ second edition, 1990.
+
+3. Griswold, Ralph E., Jeffery, Clinton L., and Townsend, Gregg M.
+ Version 8.10 of the Icon Programming Language. Univ. of Arizona
+ Icon Project Document 212, 1993. (obtain via anonymous FTP from
+ cs.arizona.edu ~ftp/icon/docs/ipd212.doc)
+
+4. Tomita, Masaru. Efficient Parsing for Natural Language. Boston:
+ Kluwer Academic Publishers, c. 1985.
+
+5. Tomita, Masaru editor. Generalized LR Parsing. Boston: Kluwer
+ Academic Publishers, 1991.
diff --git a/ipl/packs/ibpag2/beta2ref.ibp b/ipl/packs/ibpag2/beta2ref.ibp
new file mode 100644
index 0000000..62fa62b
--- /dev/null
+++ b/ipl/packs/ibpag2/beta2ref.ibp
@@ -0,0 +1,117 @@
+#
+# Ibpag2 source file for OT betacode-to-English converter.
+#
+# "Betacode" is the name used for the markers that the Thesaurus
+# Linguae Graecae uses to segment texts into works, books, chapters,
+# verses, etc. The Michigan-Claremont scan of the Hebrew OT (BHS)
+# uses a subset of the betacode "language." This file contains a
+# parser for that language that converts it into human readable form.
+#
+# Reads the standard input. Sends the original text, with betacode
+# markers converted to human-readable form, to the standard output.
+#
+
+%{
+
+# These need to be global, because all of the actions modify them.
+# Remember that the default scope for a variable used in an action is
+# that action.
+#
+global betavals, blev
+
+%}
+
+%token INTVAL, STRVAL, LINE
+
+%%
+
+betalines : betalines, betaline
+ | epsilon
+ ;
+
+betaline : '~', cvalue, xvalue, yvalue, '\n'
+ { if integer(betavals[2]) then {
+ write(betavals[1], " ",
+ betavals[2], ":",
+ betavals[3])
+ }
+ blev := 4 # global
+ }
+ | LINE, '\n' { write($1) }
+ ;
+
+cvalue : 'a', value, 'b', value, 'c', value
+ { betavals[blev := 1] := $6 }
+ | 'c', value { betavals[blev := 1] := $2 }
+ | epsilon
+ ;
+
+xvalue : 'x', value { betavals[blev := 2] := $2 }
+ | 'x' { if integer(betavals[2])
+ then betavals[blev := 2] +:= 1
+ else betavals[blev := 2] := 1
+ }
+ | epsilon { if blev < 2 then
+ betavals[2] := 1
+ }
+ ;
+
+yvalue : 'y', value { betavals[blev := 3] := $2 }
+ | 'y' { betavals[blev := 3] +:= 1 }
+ | epsilon { if blev < 3 then
+ betavals[3] := 1
+ }
+ ;
+
+value : INTVAL { return $1 }
+ | STRVAL { return $1 }
+ ;
+
+
+%%
+
+
+procedure iilex(infile)
+
+ local line
+ # betavals is global
+ initial betavals := ["", 0, 0]
+
+ while line := read(infile) do {
+ line ? {
+ if ="~" then {
+ suspend ord("~")
+ until pos(0) do {
+ case move(1) of {
+ "a" : suspend ord("a")
+ "b" : suspend ord("b")
+ "c" : suspend ord("c")
+ "x" : suspend ord("x")
+ "y" : suspend ord("y")
+ default : stop("betacode error: ", line)
+ }
+ if ="\"" then {
+ iilval := tab(find("\""))
+ suspend STRVAL
+ move(1)
+ } else {
+ if iilval := integer(tab(many(&digits)))
+ then suspend INTVAL
+ }
+ }
+ suspend ord("\n")
+ }
+ else {
+ iilval := line
+ suspend LINE
+ suspend ord("\n")
+ }
+ }
+ }
+
+end
+
+
+procedure main()
+ return iiparse(&input)
+end
diff --git a/ipl/packs/ibpag2/follow.icn b/ipl/packs/ibpag2/follow.icn
new file mode 100644
index 0000000..fa3c8c6
--- /dev/null
+++ b/ipl/packs/ibpag2/follow.icn
@@ -0,0 +1,332 @@
+############################################################################
+#
+# Name: follow.icn
+#
+# Title: compute follow sets for grammar
+#
+# Author: Richard L. Goerwitz
+#
+# Version: 1.15
+#
+############################################################################
+#
+# This file contains FIRST(st, symbol...) and FOLLOW(start_symbol,
+# st, symbol). For FIRST(), arg1 is a list of productions. Arg 2 is
+# a string (nonterminal) or an integer (terminal). FIRST may take
+# more than one symbol argument. FOLLOW takes a string as its first
+# argument, a list of productions as its second, and a symbol as its
+# third. There is never any need to call FOLLOW with any more than
+# one symbol. The return values for FIRST() and FOLLOW() may be
+# described as follows:
+#
+# FIRST returns the set of all terminal symbols that begin valid
+# prefixes of the first symbol argument, or, if this contains
+# epsilon, of the first symbol -- <epsilon> ++ the set of terminals
+# beginning valid prefixes of the second symbol, etc.... The first
+# argument, st, contains the production list over which FIRST is to
+# be computed.
+#
+# FOLLOW is similar, except that it accepts only one symbol argument,
+# and returns the set of nonterminals that begin valid prefixes of
+# symbols that may follow symbol in the grammar defined by the
+# productions in st.
+#
+# Both FIRST() and FOLLOW() are optimized. When called for the first
+# time with a specific production list (st), both FIRST() and
+# FOLLOW() create the necessary data structures to calculate their
+# respective return values. Once created, these data structures are
+# saved, and re-used for subsequent calls with the same st argument.
+# The implications for the user are two: 1) The first call to FOLLOW
+# or FIRST for a given production list will take a while to return,
+# but 2) subsequent calls will return much faster. Naturally, you
+# can call both FIRST() and FOLLOW() with various st arguments
+# throughout the life of a given program.
+#
+############################################################################
+#
+# Links: none
+#
+############################################################################
+
+
+#
+# FIRST: list|set x string|integer... -> set
+# (st, symbols...) -> FIRST_set
+#
+# Where symbols are strings or integers (nonterminal or terminal
+# symbols in a production in the list or set of productions, st),
+# and where FIRST_set is a set of integers corresponding to
+# terminal symbols that begin valid prefixes of symbols[1], or if
+# that derives epsilon, of symbols[1] -- epsilon ++ symbols[2],
+# unless that derives epsilon, etc...
+#
+procedure FIRST(st, symbols[])
+
+ local i, result, FIRST_tbl
+ static FIRST_tbl_tbl
+ initial FIRST_tbl_tbl := table()
+
+ /FIRST_tbl_tbl[st] := make_FIRST_sets(st)
+ FIRST_tbl := FIRST_tbl_tbl[st]
+
+ result := set()
+ i := 0
+ while *symbols >= (i +:= 1) do {
+ /FIRST_tbl[symbols[i]] & iohno(90, image(symbols[i]))
+ if not member(FIRST_tbl[symbols[i]], -2) then {
+ # We're done if no epsilons.
+ result ++:= FIRST_tbl[symbols[i]]
+ break
+ } else {
+ # Remove the epsilon & try the next symbol in p.RHS.
+ result ++:= FIRST_tbl[symbols[i]] -- FIRST_tbl[-2]
+ }
+ }
+ # If we get to here without finding a symbol that doesn't derive
+ # epsilon, then give up and insert <epsilon> into result.
+ if i > *symbols then
+ result ++:= FIRST_tbl[-2]
+
+ return result
+
+end
+
+
+#
+# FOLLOW: list|set x string|integer -> set
+# (st, symbol) -> FOLLOW_set
+#
+procedure FOLLOW(start_symbol, st, symbol)
+
+ static FOLLOW_tbl_tbl
+ initial FOLLOW_tbl_tbl := table()
+
+ /FOLLOW_tbl_tbl[st] := make_slr_FOLLOW_sets(start_symbol, st)
+ return FOLLOW_tbl_tbl[st][symbol]
+
+end
+
+
+#
+# Below is the procedure make_slr_FOLLOW_sets(start_symbol, st),
+# which accepts a string, a set, and a table as its arguments and
+# returns another table. The first argument must contain the start
+# symbol for the set (or list) of productions contained in the second
+# argument. Returns a table of FOLLOW sets, where keys = symbols and
+# values = follow sets for those symbols.
+#
+# The algorithm - somewhat inefficiently implemented here - works out
+# as follows:
+#
+# 1. Place $ (internal 0) in FOLLOW_tbl[start_symbol].
+# 2. Initialize FOLLOW_tbl[symbol] to { } for every other symbol.
+# 3. For each production A -> aBb do FOLLOW_tbl[B] ++:= FIRST(b) --
+# FIRST(<epsilon>).
+# 4. For each production A -> aBb where FIRST(b) contains
+# <epsilon> and for each production A -> aB, do FOLLOW_tbl[B] ++:=
+# FOLLOW_tbl[A].
+#
+# Repeat steps 3 and 4 until no FOLLOW set can be expanded, at which
+# point return the FOLLOW table.
+#
+# Note that <epsilon> is represented internally by -2.
+#
+
+
+#
+# make_slr_FOLLOW_sets: string x set/list -> table
+# (start_symbol, st) -> FOLLOW_tbl
+#
+# Where start_symbol is the start symbol for the grammar defined
+# by the set/list of productions in st, and where FOLLOW_tbl is a
+# table of follow sets (keys = symbols, values = follow sets for
+# the symbols).
+#
+procedure make_slr_FOLLOW_sets(start_symbol, st)
+
+ local FOLLOW_tbl, k, size, old_size, p, i, j
+
+ FOLLOW_tbl := table()
+ # step 1 above; note that 0 = EOF
+ FOLLOW_tbl[start_symbol] := set([0])
+
+ # step 2
+ every k := (!st).LHS do
+ /FOLLOW_tbl[k] := set()
+
+ # steps 3 and 4
+ size := 0
+ #
+ # When the old size of the FOLLOW sets equals the new size, we are
+ # done because nothing was added to the FOLLOW sets on the last
+ # pass.
+ #
+ while old_size ~===:= size do {
+ size := 0
+ every p := !st do {
+ every i := 1 to *p.RHS-1 do {
+ type(p.RHS[i]) == "string" | next
+ /FOLLOW_tbl[p.RHS[i]] & iohno(90, image(p.RHS[i]))
+ # Go through every RHS symbol until we get a FIRST set
+ # without an epsilon move.
+ every j := i+1 to *p.RHS do {
+ if member(FIRST(st, p.RHS[j]), -2) then {
+ FOLLOW_tbl[p.RHS[i]] ++:=
+ FIRST(st, p.RHS[j]) -- FIRST(st, -2)
+ } else {
+ FOLLOW_tbl[p.RHS[i]] ++:= FIRST(st, p.RHS[j])
+ size +:= *FOLLOW_tbl[p.RHS[i]]
+ break next
+ }
+ }
+ # If we get past "break next" then b in A -> aBb =>*
+ # <epsilon>; add FOLLOW_tbl[A] to FOLLOW_tbl[B].
+ FOLLOW_tbl[p.RHS[i]] ++:= FOLLOW_tbl[p.LHS]
+ size +:= *FOLLOW_tbl[p.RHS[i]]
+ }
+ # Add FOLLOW_tbl[A] to FOLLOW_tbl[B] for the last symbol in the
+ # RHS of every rule.
+ type(p.RHS[*p.RHS]) == "string" | next
+ /FOLLOW_tbl[p.RHS[*p.RHS]] & iohno(90, image(p.RHS[*p.RHS]))
+ FOLLOW_tbl[p.RHS[*p.RHS]] ++:= FOLLOW_tbl[p.LHS]
+ size +:= *FOLLOW_tbl[p.RHS[*p.RHS]]
+ }
+ }
+
+ # Print human-readable version of FOLLOW_tbl if instructed to do so.
+ if \DEBUG then
+ print_follow_sets(FOLLOW_tbl)
+
+ # check for useless nonterminal symbols
+ every k := (!st).LHS do
+ *FOLLOW_tbl[k] = 0 & iohno(91, k)
+
+ return FOLLOW_tbl
+
+end
+
+
+#
+# Below is the routine make_FIRST_sets(st), which accepts as its one
+# argument a list or set of production records, and which returns a
+# table t, where t's keys are symbols from the grammar defined by the
+# productions in st, and where the values assocated with each of
+# these keys is the FIRST set for that key.
+#
+# Production records are structures where the first two fields, LHS
+# and RHS, contain the left-hand and right-hand side of each rule in
+# a given grammar. The right-hand side is a linked list of integers
+# (used for terminals) and strings (used for nonterminals). LHS must
+# contain a string. Terminals below 1 are reserved. Currently three
+# are actually used:
+#
+# 0 EOF
+# -1 error
+# -2 epsilon
+#
+# For a description of the FIRST() construction algorithm, see Alfred
+# Aho, Ravi Sethi, and Jeffrey D. Ullman _Compilers_ (Reading,
+# Massachusetts: Addison & Wesley, 1986), section 4.4, page 189.
+# Their algorithm is not strictly suitable, as is, for use here. I
+# thank Dave Schaumann of the University of Arizona at Tuscon for
+# explaining to me the iterative construction algorithm that in fact
+# *is* suitable.
+#
+# FIRST is computed on an iterative basis as follows:
+#
+# 1. For every terminal symbol a, FIRST(a) = { a }
+# 2. For every non-terminal symbol A, initialize FIRST(A) = { }
+# 3. For every production A -> <epsilon>, add <epsilon> to FIRST(A)
+# 4. For each production of the grammar having the form X -> Y1
+# Y2 ... Yn, perform the following procedure:
+# i := 1
+# while i <= number-of-RHS-symbols do {
+# if <epsilon> is not in FIRST(Y[i]) then {
+# FIRST(X) ++:= FIRST(Y[i])
+# break
+# } else {
+# FIRST(X) ++:= FIRST(Y[i]) -- FIRST[<epsilon>]
+# i +:= 1
+# }
+# }
+# if i > number-of-RHS-symbols then
+# # <epsilon> is in FIRST(Y[i])
+# FIRST(X) ++:= FIRST[epsilon]
+# 5. Repeat step 3 until no new symbols or <epsilon> can be added
+# to any FIRST set
+#
+
+
+#
+# make_FIRST_sets: set/list -> table
+# st -> t
+#
+# Where st is a set or list of production records, and t is a
+# table of FIRST sets, where the keys = terminal or nonterminal
+# symbols and the values = sets of terminal symbols.
+#
+# Epsilon move is -2; terminals are positive integers;
+# nonterminals are strings. Error is -1; EOF is 0.
+#
+procedure make_FIRST_sets(st)
+
+ local FIRST_tbl, symbol, p, old_size, size, i
+
+ FIRST_tbl := table()
+ FIRST_tbl[0] := set([0])
+
+ # steps 1, 2, and 3 above
+ every p := !st do {
+ # check for empty RHS (an error)
+ *p.RHS = 0 & iohno(11, production_2_string(p))
+ # step 1
+ every symbol := !p.RHS do {
+ if type(symbol) == "integer"
+ then FIRST_tbl[symbol] := set([symbol])
+ }
+ # step 2
+ /FIRST_tbl[p.LHS] := set() &
+ # step 3
+ if *p.RHS = 1 then {
+ if p.RHS[1] === -2 # -2 is epsilon
+ then insert(FIRST_tbl[p.LHS], -2)
+ }
+ }
+
+ # steps 4 and 5 above
+ size := 0
+ #
+ # When the old size of the FIRST sets equals the new size, we are
+ # done. As long as they're unequal, set old_size to size and try
+ # to add to the FIRST sets.
+ #
+ while old_size ~===:= size do {
+ size := 0
+ every p := !st do {
+ every i := 1 to *p.RHS do {
+ \FIRST_tbl[p.RHS[i]] | iohno(90, image(p.RHS[i]))
+ if not member(FIRST_tbl[p.RHS[i]], -2) then {
+ # We're done with this pass if no epsilons.
+ FIRST_tbl[p.LHS] ++:= FIRST_tbl[p.RHS[i]]
+ size +:= *FIRST_tbl[p.LHS]
+ break next
+ } else {
+ # Remove the epsilon & try the next symbol in p.RHS.
+ FIRST_tbl[p.LHS] ++:= FIRST_tbl[p.RHS[i]] -- FIRST_tbl[-2]
+ }
+ }
+ # If we get past the every...do structure without
+ # break+next-ing, then we are still finding epsilons. In
+ # this case, add epsilon to FIRST_tbl[p.LHS].
+ FIRST_tbl[p.LHS] ++:= FIRST_tbl[-2]
+ size +:= *FIRST_tbl[p.LHS]
+ }
+ }
+
+ # Print human-readable version of FIRST_tbl if instructed to do so.
+ if \DEBUG then
+ print_first_sets(FIRST_tbl)
+
+ return FIRST_tbl
+
+end
diff --git a/ipl/packs/ibpag2/iacc.ibp b/ipl/packs/ibpag2/iacc.ibp
new file mode 100644
index 0000000..a169db8
--- /dev/null
+++ b/ipl/packs/ibpag2/iacc.ibp
@@ -0,0 +1,495 @@
+############################################################################
+#
+# Name: iacc.ibp
+#
+# Title: YACC-like front-end for Ibpag2 (experimental)
+#
+# Author: Richard L. Goerwitz
+#
+# Version: 1.6
+#
+############################################################################
+#
+# Summary:
+#
+# Iacc is a YACC-like Ibpag2 preprocessor (very experimental).
+# Iacc simply reads &input (assumed to be a YACC file, but with Icon
+# code in the action fields), and writes an Ibpag2 file to &output.
+#
+############################################################################
+#
+# Installation:
+#
+# This file is not an Icon file, but rather an Ibpag2 file. You
+# must have Ibpag2 installed in order to run it. To create the iacc
+# executable, first create iacc.icn by typing "ibpag2 -f iacc.ibp -o
+# iacc.icn," then compile iacc.icn as you would any other Icon file
+# to create iacc (or on systems without direct execution, iacc.icx).
+# Put more simply, iacc.ibp not only outputs Ibpag2 files, but is
+# itself generated using Ibpag2 + icon{t,c}.
+#
+############################################################################
+#
+# Implementation notes:
+#
+# Iacc uses an YACC grammar that is actually LR(2), and not
+# LR(1), as Ipbag2 would normally require in standard mode. Iacc
+# obtains the additional token lookahead via the lexical analyzer.
+# The place it uses that lookahead is when it sees an identifier. If
+# the next token is a colon, then it is the LHS of a rule (C_IDENT
+# below); otherwise it's an IDENT in the RHS of some rule. Crafting
+# the lexical analyzer in this fashion makes semicolons totally
+# superfluous (good riddance!), but it makes it necessary for the
+# lexical analyzer to suspend some dummy tokens whose only purpose is
+# to make sure that it doesn't eat up C or Icon action code while
+# trying to satisfy the grammar's two-token lookahead requirements
+# (see how RCURL and '}' are used below in the cdef and act
+# productions).
+#
+# Iacc does its work by making six basic changes to the input
+# stream: 1) puts commas between tokens and symbols in rules, 2)
+# removes superfluous union and type declarations/tags, 3) inserts
+# "epsilon" into the RHS of empty rules, 4) turns "$$ = x" into
+# "return x", 5) rewrites rules so that all actions appear at the end
+# of a production, and 6) strips all comments.
+#
+# Although Iacc is really meant for grammars with Icon action
+# code, Iacc can, in fact, accept straight YACC files, with C action
+# code. There isn't much point to using it this way, though, since
+# its output is not meant to be human readable. Rather, it is to be
+# passed directly to Ibpag2 for processing. Iacc is simply a YACCish
+# front end. Its output can be piped directly to Ibpag2 in most
+# cases: iacc < infile.iac | ibpag2 > infile.icn.
+#
+############################################################################
+#
+# Links: longstr, strings
+# See also: ibpag2
+#
+############################################################################
+
+%{
+
+link strings, longstr
+global newrules, lval, symbol_no
+
+%}
+
+# basic entities
+%token C_IDENT, IDENT # identifiers and literals
+%token NUMBER # [0-9]+
+
+# reserved words: %type -> TYPE, %left -> LEFT, etc.
+%token LEFT, RIGHT, NONASSOC, TOKEN, PREC, TYPE, START, UNION
+
+# miscellaneous
+%token MARK # %%
+%token LCURL # %{
+%token RCURL # dummy token used to start processing of C code
+
+%start yaccf
+
+%%
+
+yaccf : front, back
+front : defs, MARK { write(arg2) }
+back : rules, tail {
+ every write(!\newrules)
+ if write(\arg2) then
+ every write(!&input)
+ }
+tail : epsilon { return &null }
+ | MARK { return arg1 }
+
+defs : epsilon
+ | defs, def { write(\arg2) }
+ | defs, cdef { write(\arg2) }
+
+def : START, IDENT { return arg1 || " " || arg2 }
+ | rword, tag, nlist {
+ if arg1 == "%type"
+ then return &null
+ else return arg1 || " " || arg3
+ }
+cdef : stuff, RCURL, RCURL { return arg1 }
+stuff : UNION { get_icon_code("%}"); return &null }
+ | LCURL { return "%{ " || get_icon_code("%}") }
+
+rword : TOKEN | LEFT | RIGHT | NONASSOC | TYPE
+
+tag : epsilon { return &null }
+ | '<', IDENT, '>' { return "<" || arg2 || ">" }
+
+nlist : nmno { return arg1 }
+ | nlist, nmno { return arg1 || ", " || arg2 }
+ | nlist, ',', nmno { return arg1 || ", " || arg3 }
+
+nmno : IDENT { return arg1 }
+ | IDENT, NUMBER { return arg1 }
+
+rules : LHS, ':', RHS { write(arg1, "\t: ", arg3) }
+ | rules, rule { write(arg2) }
+
+RHS : rbody, prec { return arg1 || " " || arg2 }
+
+rule : LHS, '|', RHS { return "\t| " || arg3 }
+ | LHS, ':', RHS { return arg1 || "\t: " || arg3 }
+
+LHS : C_IDENT { symbol_no := 0 ; return arg1 }
+ | epsilon { symbol_no := 0 }
+
+rbody : IDENT { symbol_no +:= 1; return arg1 }
+ | act { return "epsilon " || arg1 }
+ | middle, IDENT { return arg1 || ", " || arg2 }
+ | middle, act { return arg1 || " " || arg2 }
+ | middle, ',', IDENT { return arg1 || ", " || arg3 }
+ | epsilon { return "epsilon" }
+
+middle : IDENT { symbol_no +:= 1; return arg1 }
+ | act { symbol_no +:= 1; return arg1 }
+ | middle, IDENT { symbol_no +:= 1; return arg1 || ", "||arg2 }
+ | middle, ',', IDENT { symbol_no +:= 1; return arg1 || ", "||arg3 }
+ | middle, act {
+ local i, l1, l2
+ static actno
+ initial { actno := 0; newrules := [] }
+ actno +:= 1
+ l1 := []; l2 := []
+ every i := 1 to symbol_no do {
+ every put(l1, ("arg"|"$") || i)
+ if symbol_no-i = 0 then i := "0"
+ else i := "-" || symbol_no - i
+ every put(l2, ("$"|"$") || i)
+ }
+ put(newrules, "ACT_"|| actno ||
+ "\t: epsilon "|| mapargs(arg2, l1, l2))
+ symbol_no +:= 1
+ return arg1 || ", " || "ACT_" || actno
+ }
+
+act : '{', cstuff, '}', '}' { return "{" || arg2 }
+cstuff : epsilon { return get_icon_code("}") }
+
+prec : epsilon { return "" }
+ | PREC, IDENT { return arg1 || arg2 }
+ | PREC, IDENT, act { return arg1 || arg2 || arg3 }
+
+
+%%
+
+
+procedure iilex()
+
+ local t
+ static last_token, last_lval, colon
+ initial colon := ord(":")
+
+ every t := next_token() do {
+ iilval := last_lval
+ if \last_token then {
+ if t = colon then {
+ if last_token = IDENT
+ then suspend C_IDENT
+ else suspend last_token
+ } else
+ suspend last_token
+ }
+ last_token := t
+ last_lval := lval
+ }
+ iilval := last_lval
+ suspend \last_token
+
+end
+
+
+procedure next_token()
+
+ local reserveds, UNreserveds, c, idchars, marks
+
+ reserveds := ["break","by","case","create","default","do",
+ "else","end","every","fail","global","if",
+ "initial","invocable","link","local","next",
+ "not","of","procedure","record","repeat",
+ "return","static","suspend","then","to","until",
+ "while"]
+
+ UNreserveds := ["break_","by_","case_","create_","default_","do_",
+ "else_","end_","every_","fail_","global_","if_",
+ "initial_","invocable_","link_","local_","next_",
+ "not_","of_","procedure_","record_","repeat_",
+ "return_","static_","suspend_","then_","to_",
+ "until_","while_"]
+
+ idchars := &letters ++ '._'
+ marks := 0
+
+ c := reads()
+ repeat {
+ lval := &null
+ case c of {
+ "#" : { do_icon_comment(); c := reads() | break }
+ "<" : { suspend ord(c); c := reads() | break }
+ ">" : { suspend ord(c); c := reads() | break }
+ ":" : { suspend ord(c); c := reads() | break }
+ "|" : { suspend ord(c); c := reads() | break }
+ "," : { suspend ord(c); c := reads() | break }
+ "{" : { suspend ord(c | "}" | "}"); c := reads() }
+ "/" : {
+ reads() == "*" | stop("unknown YACC operator, \"/\"")
+ do_c_comment()
+ c := reads() | break
+ }
+ "'" : {
+ lval := "'"
+ while lval ||:= (c := reads()) do {
+ if c == "\\"
+ then lval ||:= reads()
+ else if c == "'" then {
+ suspend IDENT
+ break
+ }
+ }
+ c := reads() | break
+ }
+ "%" : {
+ lval := "%"
+ while any(&letters, c := reads()) do
+ lval ||:= c
+ if *lval = 1 then {
+ if c == "%" then {
+ lval := "%%"
+ suspend MARK
+ if (marks +:= 1) > 1 then
+ fail
+ } else {
+ if c == "{" then {
+ lval := "%{"
+ suspend LCURL | RCURL | RCURL
+ }
+ else stop("malformed %declaration")
+ }
+ c := reads() | break
+ } else {
+ case lval of {
+ "%prec" : suspend PREC
+ "%left" : suspend LEFT
+ "%token" : suspend TOKEN
+ "%right" : suspend RIGHT
+ "%type" : suspend TYPE
+ "%start" : suspend START
+ "%union" : suspend UNION | RCURL | RCURL
+ "%nonassoc" : suspend NONASSOC
+ default : stop("unknown % code in def section")
+ }
+ }
+ }
+ default : {
+ if any(&digits, c) then {
+ lval := c
+ while any(&digits, c := reads()) do
+ lval ||:= c
+ suspend NUMBER
+ }
+ else {
+ if any(idchars, c) then {
+ lval := c
+ while any(&digits ++ idchars, c := reads()) do
+ lval ||:= c
+ lval := mapargs(lval, reserveds, UNreserveds)
+ suspend IDENT
+ }
+ else {
+ # whitespace
+ c := reads() | break
+ }
+ }
+ }
+ }
+ }
+
+
+end
+
+
+procedure get_icon_code(endmark, comment)
+
+ local yaccwords, ibpagwords, count, c, c2, s
+
+ yaccwords := ["YYDEBUG", "YYACCEPT", "YYERROR", "yyclearin", "yyerrok"]
+ ibpagwords := ["IIDEBUG", "IIACCEPT", "IIERROR", "iiclearin", "iierrok"]
+
+ s := ""
+ count := 1
+ c := reads()
+
+ repeat {
+ case c of {
+ "\"" : s ||:= c || do_string()
+ "'" : s ||:= c || do_charlit()
+ "$" : {
+ c2 := reads() | break
+ if c2 == "$" then {
+ until (c := reads()) == "="
+ s ||:= "return "
+ } else {
+ s ||:= c
+ c := c2
+ next
+ }
+ }
+ "#" : {
+ if s[-1] == "\n"
+ then s[-1] := ""
+ do_icon_comment()
+ }
+ "/" : {
+ c := reads() | break
+ if c == "*" then
+ do_c_comment()
+ else {
+ s ||:= c
+ next
+ }
+ }
+ "{" : {
+ s ||:= c
+ if endmark == "}" then
+ count +:= 1
+ }
+ "}" : {
+ s ||:= c
+ if endmark == "}" then {
+ count -:= 1
+ count = 0 & (return mapargs(s, yaccwords, ibpagwords))
+ }
+ }
+ "%" : {
+ s ||:= c
+ if endmark == "%}" then {
+ if (c := reads()) == "}"
+ then return mapargs(s || c, yaccwords, ibpagwords)
+ else next
+ }
+ }
+ default : s ||:= c
+ }
+ c := reads() | break
+ }
+
+ # if there is no endmark, just go to EOF
+ if \endmark
+ then stop("input file has mis-braced { code }")
+ else return mapargs(s, yaccwords, ibpagwords)
+
+end
+
+
+procedure do_string()
+
+ local c, s
+
+ s := ""
+ while c := reads() do {
+ case c of {
+ "\\" : s ||:= c || reads()
+ "\"" : return s || c || reads()
+ default : s ||:= c
+ }
+ }
+
+ stop("malformed string literal")
+
+end
+
+
+procedure do_charlit()
+
+ local c, s
+
+ s := ""
+ while c := reads() do {
+ case c of {
+ "\\" : s ||:= c || reads()
+ "'" : return s || c || reads()
+ default : s ||:= c
+ }
+ }
+
+ stop("malformed character literal")
+
+end
+
+
+procedure do_c_comment()
+
+ local c, s
+
+ s := c := reads() |
+ stop("malformed C-style /* comment */")
+
+ repeat {
+ if c == "*" then {
+ s ||:= (c := reads() | break)
+ if c == "/" then
+ return s
+ }
+ else s ||:= (c := reads() | break)
+ }
+
+ return s # EOF okay
+
+end
+
+
+procedure do_icon_comment()
+
+ local c, s
+
+ s := ""
+ while c := reads() do {
+ case c of {
+ "\\" : s ||:= c || (reads() | break)
+ "\n" : return s
+ default : s ||:= c
+ }
+ }
+
+ return s # EOF okay
+
+end
+
+
+procedure mapargs(s, l1, l2)
+
+ local i, s2
+ static cs, tbl, last_l1, last_l2
+
+ if /l1 | *l1 = 0 then return s
+
+ if not (last_l1 === l1, last_l2 === l2) then {
+ cs := ''
+ every cs ++:= (!l1)[1]
+ tbl := table()
+ every i := 1 to *l1 do
+ insert(tbl, l1[i], (\l2)[i] | "")
+ }
+
+ s2 := ""
+ s ? {
+ while s2 ||:= tab(upto(cs)) do {
+ (s2 <- (s2 || tbl[tab(longstr(l1))]),
+ not any(&letters++&digits++'_')) |
+ (s2 ||:= move(1))
+ }
+ s2 ||:= tab(0)
+ }
+
+ return s2
+
+end
+
+
+procedure main()
+ iiparse()
+end
diff --git a/ipl/packs/ibpag2/ibpag2.icn b/ipl/packs/ibpag2/ibpag2.icn
new file mode 100644
index 0000000..994cff6
--- /dev/null
+++ b/ipl/packs/ibpag2/ibpag2.icn
@@ -0,0 +1,303 @@
+############################################################################
+#
+# Name: ibpag2.icn
+#
+# Title: Icon-based parser generator (version 2)
+#
+# Author: Richard L. Goerwitz
+#
+# Version: 1.22
+#
+############################################################################
+#
+# The Basics
+#
+# Ibpag2 is a simple tool for generating parsers from grammar
+# specifications. This may sound pretty arcane to those who have
+# never used a parser generator. In fact, though, this kind of tool
+# forms the basis of most programming language implementations.
+# Parser generators are also used in preprocessors, transducers,
+# compilers, interpreters, calculators and in fact for just about any
+# situation where some form of structured input needs to be read into
+# an internal data structure and/or converted into some form of
+# structured output. This might include something as mundane as
+# reading in recepts or mailing addresses from a file, or turning
+# dates of one type (e.g. "September 3, 1993") into another
+# ("9/3/93"). For more information on how to use it, see the README
+# file included with the Ibpag2 distribution.
+#
+############################################################################
+#
+# Running Ibpag2:
+#
+# Invoking Ibpag2 is very, very simple. There are quite a few
+# command-line switches, but all are optional:
+#
+# ibpag2 [-f infile] [-m module] [-o outfile] [-p iiparse.lib dir]
+# [-a] [-c] [-v] [-y]
+#
+# Where infile is the Ibpag2 source file (default &input), outfile is
+# the output file (default &output), module is an optional string
+# appended to all global variables and all procedure calls (to allow
+# multiple running parsers), and where -v instructs Ibpag2 to write a
+# summary of its operations to ibpag2.output. Normally all of these
+# arguments can be ignored. Ibpag2 can usually be run using simple
+# shell redirection symbols (if your OS supports them). See the next
+# paragraph for an explanation of the -p option. The -c option is
+# for compressed tables, and -a is for non-LR or ambiguous grammars.
+# See the advanced sections of README file. -y directs Ibpag2 to
+# resolve reduce/reduce conflicts by their order of occurrence in the
+# grammar, and to resolve shift/reduce conflicts in favor of shift -
+# just like YACC. Invoking Ibpag with -h causes it to abort with a
+# brief help message.
+#
+# Make sure that the iiparse.lib and iiglrpar.lib files are in
+# some path listed in your LPATH directory, or else in a data
+# directory adjacent to some IPL "procs" directory in your LPATH.
+# Basically, LPATH is just a space-separated list of places where
+# .icn library source files reside. If your system does not support
+# environment variables, then there are two ways to tell Ibpag2 where
+# the .lib files are without using LPATH. The first is to move into
+# the directory that contains these files. The second is to supply
+# the files' location using Ibpag's -p option (e.g. ibpag2 -p
+# /usr/local/lib/icon/data).
+#
+############################################################################
+#
+# More Technical Details
+#
+# Technically speaking, Ibpag2 is a preprocessor that accepts a
+# YACC-like source file containing grammar productions and actions,
+# then 1) converts these into parse tables and associated code, 2)
+# adds to them an LR parser, and a few debugging tools, and 3) writes
+# the combination to the standard output, along with the necessary
+# action and goto table construction code. The user must $include,
+# or hard-code into the Ibpag2 source file, a lexical analyzer that
+# returns integers via symbolic $defines generated by %token, %right,
+# etc. declarations in the Ibpag2 source file.
+#
+# Cycles and epsilon moves are handled correctly (to my
+# knowledge). Shift-reduce conflicts are handled in the normal way
+# (i.e. pick the rule with the highest priority, and, in cases where
+# the priority is the same, check the associativities) I decided to
+# flag reduce/reduce conflicts as errors by default, since these
+# often conceal deeper precedence problems. They are easily enough
+# handled, if need be, via dummy precedences. The -y command-line
+# switch turns off this behavior, causing Ibpag2 to resolve
+# reduce/reduce conflicts in a YACCish manner (i.e. favoring the rule
+# that occurs first in the grammar). Ibpag2 normally aborts on
+# shift/reduce conflicts. The -y switch makes Ibpag resolve these in
+# favor of shift, and to keep on processing - again, just like YACC.
+#
+# For more information, see the README file.
+#
+############################################################################
+#
+# Links: ibreader, ibwriter, slrtbls, ibutil, version, options
+#
+############################################################################
+
+# link ibreader, ibwriter, slrtbls, ibutil, version, options
+link options
+
+global DEBUG
+
+procedure main(a)
+
+ local infile, outfile, verbosefile, atbl, gtbl, grammar, opttbl,
+ module, abort_on_conflict, paths, path, parser_name,
+ iiparse_file
+
+ # Get command-line options.
+ opttbl := options(a, "f:o:vdm:p:hcay", bad_arg)
+
+ # Abort with help message if -h is supplied.
+ if \opttbl["h"] then {
+ write(&errout, ib_version())
+ return ib_help_()
+ }
+
+ # If an input file was specified, open it. Otherwise use stdin.
+ #
+ if \opttbl["f"] then
+ infile := open(opttbl["f"], "r") |
+ bad_arg("can't open " || opttbl["f"])
+ else infile := &input
+
+ # If an output file was specified, use it. Otherwise use stdout.
+ #
+ if \opttbl["o"] then
+ outfile := open(opttbl["o"], "w") |
+ bad_arg("can't open " || opttbl["o"])
+ else outfile := &output
+
+ # If a module name was specified (-m), then use it.
+ #
+ module := opttbl["m"] | ""
+
+ # If the debug option was specified, set all verbose output to go
+ # to errout.
+ #
+ if \opttbl["d"] then {
+ verbosefile := &errout
+ DEBUG := 1
+ }
+
+ # If the verbose option was specified, send all verbose output to
+ # "ibpag2.output" (a bit like YACC's yacc.output file).
+ #
+ else if \opttbl["v"] then
+ verbosefile := open("ibpag2.output", "w") |
+ bad_arg("can't open " || opttbl["v"])
+
+ # Output defines for YACC-like macros. Output iiisolate and
+ # iiprune if -a option is specified. Sorry for the ugly code.
+ #
+ write_defines(opttbl, outfile, module)
+
+ # Whew! Now fetch the grammar from the input file.
+ #
+ # Emit line directives keyed to actual line numbers in the
+ # original file. Pass its name as arg4. If obttbl["f"] is
+ # null (and the input file is &input), ibreader will default
+ # to something else.
+ #
+ grammar := ibreader(infile, outfile, module, opttbl["f"])
+ if \verbosefile then
+ # grammar contains start symbol, rules, and terminal token table
+ print_grammar(grammar, verbosefile)
+
+ # Fill in parse tables, atbl and gtbl. Abort if there is a
+ # conflict caused by an ambiguity in the grammar or by some
+ # precedence/associativity problem, unless the -a option is
+ # supplied (telling Ibpag2 that ambiguous tables are okay).
+ #
+ if /opttbl["a"] then
+ abort_on_conflict := "yes"
+ atbl := table(); gtbl := table()
+ make_slr_tables(grammar, atbl, gtbl, abort_on_conflict, opttbl["y"])
+ if \verbosefile then
+ # grammar.tbl maps integer terminal symbols to human-readable strings
+ print_action_goto_tables(atbl, gtbl, grammar.tbl, verbosefile)
+
+ # If -c was specified on the command line, compress the action and
+ # goto tables.
+ #
+ if \opttbl["c"] then {
+ write(outfile, "\n$define COMPRESSED_TABLES\n")
+ if \verbosefile then
+ write(verbosefile, "\nNote: parse tables are compressed")
+ shrink_tables(grammar, atbl, gtbl)
+ }
+
+ # Try to find the .lib file using LPATH.
+ #
+ parser_name := {
+ if \opttbl["a"] then "iiglrpar.lib"
+ else "iiparse.lib"
+ }
+
+ paths := []
+ put(paths, trim(\opttbl["p"], '/') || "/")
+ put(paths, "")
+ (\getenv)("LPATH") ? {
+ while path := trim(tab(find(" ") | 0), '/') || "/" do {
+ tab(many(' '))
+ if find("procs", path) then
+ put(paths, ibreplace(path, "procs", "data"))
+ put(paths, path)
+ pos(0) & break
+ }
+ }
+ iiparse_file := open(!paths || parser_name, "r") | iohno(2)
+
+ # Write .lib file (contains the iiparse() parser routine), along
+ # with the start symbol, action table, goto table, and a list of
+ # productions.
+ #
+ # grammar contains start symbol, rules, and terminal token table
+ #
+ ibwriter(iiparse_file, outfile, grammar, atbl, gtbl, module)
+
+ return exit(0)
+
+end
+
+
+#
+# write_defines
+#
+procedure write_defines(opttbl, outfile, module)
+
+ # Output defines for YACC-like macros. Output iiisolate and
+ # iiprune if -a option is specified. Sorry for the ugly code.
+ #
+ if \opttbl["a"] then {
+ write(outfile,
+ "$define iiisolate (iidirective", module, " ||:= \"isolate\")")
+ write(outfile,
+ "$define iiprune (iidirective", module, " ||:= \"prune\")")
+ write(outfile,
+ "$define iierrok (iidirective", module, " ||:= \"errok\")")
+ } else {
+ write(outfile,
+ "$define iierrok (recover_shifts", module, " := &null &",
+ " discards", module, " := 0)")
+ }
+ write(outfile,
+ "$define iiclearin (iidirective", module, " ||:= \"clearin\")")
+ write(outfile,
+ "$define IIERROR (iidirective", module, " ||:= \"error\")")
+ write(outfile,
+ "$define IIACCEPT (iidirective", module, " ||:= \"accept\")")
+end
+
+
+#
+# bad_arg
+#
+# Simple routine called if command-line arguments are bad.
+#
+procedure bad_arg(s)
+
+ write(&errout, "ibpag2: ",s)
+ write(&errout,
+ "usage: ibpag2 [-f inf] [-m str ] [-o outf] _
+ [-p dir] [-a] [-c] [-v] [-y]")
+ write(&errout, " for help, type \"ibpag2 -h\"")
+ stop()
+
+end
+
+
+#
+# ib_help_
+#
+procedure ib_help_()
+
+ write(&errout, "")
+ write(&errout,
+ "usage: ibpag2 [-f inf] [-m str] [-o outf] [-p dir] _
+ [-a] [-c] [-v] [-y]")
+ write(&errout, "")
+ write(&errout, " -f inf........where inf = Ibpag2's input file (default")
+ write(&errout, " &input)")
+ write(&errout, " -m str........where str = a string to be appended to")
+ write(&errout, " global identifiers and procedures")
+ write(&errout, " -o outf.......where outf = Ibpag2's output file (default")
+ write(&errout, " &output)")
+ write(&errout, " -p dir........where dir = directory in which the")
+ write(&errout, " iiparse.lib file resides (mainly for")
+ write(&errout, " systems lacking LPATH support)")
+ write(&errout, " -a............permits ambiguous grammars and multiple")
+ write(&errout, " parses (makes iiparse() a generator).")
+ write(&errout, " -c............compresses action/goto tables (obstructs")
+ write(&errout, " debugging somewhat).")
+ write(&errout, " -v............sends debugging info to ibpag2.output")
+ write(&errout, " -y............tells Ibpag2 to resolve reduce/reduce")
+ write(&errout, " conflicts by order of occurrence in")
+ write(&errout, " the grammar, and to resolve shift/")
+ write(&errout, " reduce conflicts in favor of shift")
+ stop("")
+
+end
diff --git a/ipl/packs/ibpag2/ibreader.icn b/ipl/packs/ibpag2/ibreader.icn
new file mode 100644
index 0000000..8401159
--- /dev/null
+++ b/ipl/packs/ibpag2/ibreader.icn
@@ -0,0 +1,515 @@
+############################################################################
+#
+# Name: ibreader.icn
+#
+# Title: reader for Ibpag2 source files
+#
+# Author: Richard L. Goerwitz
+#
+# Version: 1.29
+#
+############################################################################
+#
+# This file contains a collection of procedures that 1) read in an
+# Ibpag2 source file, 2) output token defines, 3) emit action code,
+# and finally 4) pass a start symbol, list of productions, and token
+# table back to the calling procedure. Described formally:
+#
+# ibreader: file x file x string -> ib_grammar record
+# (in, out, module) -> grammar
+#
+# In is the input stream; out is the output stream; module is an
+# optional string that distinguishes this grammar from others that
+# might also be running simultaneously. Grammar is an ib_grammar
+# record containing the start symbol in its first field and the
+# production list in its second. Its third field contains a table
+# used to map integers to actual token names or character literals,
+# i.e. its keys are things like -1, 0, etc. and its values are things
+# like "error," "EOF," etc.
+#
+# Note that if a module argument is supplied to ibreader(), one must
+# also be supplied to ibwriter(). See ibwriter.icn.
+#
+# The format of the input file is highly reminiscent of YACC. It
+# consists of three basic sections, the first two of which are
+# followed by %%. See the main documentation to Ibpag2 for
+# specifics. Major differences between Ibpag2 and YACC input format
+# include:
+#
+# 1) "$$ = x" constructs are replaced by "return x" (e.g. "$$ =
+# $1 + $3" -> "return $1 + $3")
+#
+# 2) all variables within a given action are, by default, local
+# to that action; i.e. they cannot be accessed by other
+# actions unless you declare them global elsewhere (e.g. in
+# the pass-through part of the declarations section %{ ... %})
+#
+# 3) the %union declaration is not needed by Ibpag
+#
+# 4) tokens and symbols are separated from each other by a comma
+# (e.g. %token '+', '-' and S : NP, VP)
+#
+# 5) epsilon is indicated by the keyword "epsilon" (e.g. REL :
+# epsilon)
+#
+# 6) both epsilon and error *may* be declared as %tokens for
+# reasons of precedence, although they retain hard-coded
+# internal values (-2 and -1, respectively)
+#
+# 7) all actions must follow the last RHS symbol of the rule they
+# apply to (preceded by an optional %prec directive); to
+# achieve S : NP { action1 }, VP { action2 }, insert a dummy
+# rule: S : NP, dummy, VP { action2 }; dummy : epsilon {
+# action1 } ;
+#
+# 8) YYERROR, YYACCEPT, yyclearin, and yyerrok are the same,
+# except they are written IIERROR, IIACCEPT, iiclearin, and
+# iierrok (i.e. "ii" replaces "yy")
+#
+# 9) Ibpag2's input files are tokenized like modified Icon files,
+# and, as a consequence, Icon's reserved words must not be
+# used as symbols (e.g. "if : if, then" is no go)
+#
+############################################################################
+#
+# Links: itokens, escape
+#
+# See also: ibwriter
+#
+############################################################################
+
+#link itokens, escape
+link escape
+
+record ib_grammar(start, rules, tbl)
+record tokstats(str, no, prec, assoc)
+
+# Declared in itokens.icn:
+# global line_number
+
+#
+# ibreader: file x file x string x string -> ib_grammar record
+# (in, out, module, source_fname) -> grammar
+#
+# Where in is an input stream, out is an output stream, module is
+# some string uniquely identifying this module (optional), and
+# where grammar is an ib_grammar record containing the start
+# symbol in its first field and a list of production records in
+# its second. Source_fname is the string name of Ibpag2's input
+# grammar file. Defaults to "source file."
+#
+procedure ibreader(in, out, module, source_fname)
+
+ local tmp, grammar, toktbl, next_token, next_token_no_nl,
+ token, LHS, t
+
+ /source_fname := "source file"
+ grammar := ib_grammar(&null, list(), table())
+ toktbl := table()
+ next_token := create itokens(in, 1)
+ next_token_no_nl := create 1(tmp := |@next_token, \tmp.sym)
+ token := @next_token_no_nl | iohno(4)
+
+ # Do the %{ $} and %token stuff, i.e. everything up to %%
+ # (NEWSECT).
+ #
+ until token.sym == "NEWSECT" do {
+ case token.sym of {
+ default : {
+ iohno(48, "token "||image(token.str) ||"; line "|| line_number)
+ }
+ "SEMICOL" : {
+ # Skip semicolon. Get another token while we're at it.
+ token := @next_token_no_nl | iohno(47, "line "||line_number)
+ }
+ "BEGGLOB" : {
+ write(out, "\n$line ", line_number, " ", image(source_fname))
+ # Copy token values to out until we reach "%}" (ENDGLOB).
+ (token := copy_icon_stuff(next_token, out)).sym == "ENDGLOB"
+ token := @next_token_no_nl
+ }
+ "MOD" : {
+ (token := @next_token_no_nl).sym == "IDENT" |
+ iohno(30, "line " || line_number)
+ #
+ # Read in token declarations, set associativity and
+ # precedences, and enter the tokens into toktbl.
+ #
+ token := {
+ case token.str of {
+ default : iohno(30, "line " || line_number)
+ "token" : read_decl(next_token_no_nl, toktbl, &null)
+ "right" : read_decl(next_token_no_nl, toktbl, "r")
+ "left" : read_decl(next_token_no_nl, toktbl, "l")
+ "nonassoc": read_decl(next_token_no_nl, toktbl, "n")
+ "union" : iohno(45, "line "|| line_number)
+ "start" : {
+ (token := @next_token_no_nl).sym == "IDENT" |
+ iohno(31, "line " || line_number)
+ /grammar.start := token.str |
+ iohno(32, "line " || line_number)
+ @next_token_no_nl | iohno(4)
+ }
+ }
+ }
+ }
+ }
+ }
+ # Skip past %% (NEWSECT) and semicolon (if present).
+ token := @next_token_no_nl | iohno(47, "line "|| line_number)
+ (token := token | @next_token_no_nl | iohno(4)).sym ~== "SEMICOL"
+ token.sym == "NEWSECT" & iohno(47, "line "|| line_number)
+
+ #
+ # Fetch start symbol if it wasn't defined above via %start; by
+ # default the start symbol is the LHS of rule 1.
+ #
+ /grammar.start := token.str
+
+ # Having reached the end of the declarations section, we can now
+ # copy out a define for each token number, not counting character
+ # literals (which are stored as integers). While we're at it,
+ # create a table that maps token numbers back to character
+ # literals and strings (for use in later verbose and debugging
+ # displays).
+ #
+ write(out, "\n")
+ every t := !toktbl do {
+ if type(t.str) == "integer" then
+ insert(grammar.tbl, t.no, image(char(t.str)))
+ else {
+ insert(grammar.tbl, t.no, t.str)
+ write(out, "$define ", t.str, "\t", t.no)
+ }
+ }
+
+ # Now, finally, read in rules up until we reach EOF or %% (i.e.
+ # NEWSECT). EOF is signaled below by failure of read_RHS().
+ #
+ until token.sym == "NEWSECT" do {
+ token.sym == "IDENT" | iohno(33, token.str ||" line "|| line_number)
+ LHS := token.str
+ token := @next_token_no_nl | iohno(4)
+ token.sym == "COLON" | iohno(34, token.str ||" line "|| line_number)
+ #
+ # Read in RHS, then the action (if any) then the prec (if
+ # any). If we see a BAR, then repeat, re-using the same
+ # left-hand side symbol.
+ #
+ while token :=
+ read_RHS(next_token, next_token_no_nl, out, toktbl, LHS,
+ grammar, module, source_fname) |
+ # if read_RHS fails, we're at EOF
+ break break
+ do token.sym == "BAR" | break
+ }
+
+ # Copy the remainder of the file to out as Icon code.
+ write(out, "\n$line ", line_number, " ", image(source_fname))
+ every copy_icon_stuff(next_token, out, "EOFX")
+
+ # Do final setup on the reverse token table. This table will be
+ # used later to map integers to their original names in verbose or
+ # debugging displays.
+ #
+ insert(grammar.tbl, 0, "$")
+
+ return grammar
+
+end
+
+
+#
+# copy_icon_stuff: coexpression x file x string -> ib_TOK records
+# (next_token, out, except) -> token records
+#
+# Copy Icon code to output stream, also suspending as we go.
+# Insert separators between tokens where needed. Do not output
+# any token whose sym field matches except. The point in
+# suspending tokens as we go is to enable the calling procedure to
+# look for signal tokens that indicate insertion or termination
+# points.
+#
+procedure copy_icon_stuff(next_token, out, except)
+
+ local separator, T
+
+ separator := ""
+ while T := @next_token do {
+ if \T.sym then suspend T
+ if \T.sym == \except then next
+ if any(&digits ++ &letters ++ '_.', \T.str, 1, 2) & \T.sym ~== "DOT"
+ then writes(out, separator)
+ writes(out, T.str)
+ if any(&digits ++ &letters ++ '_.', \T.str, -1, 0) & \T.sym ~== "DOT"
+ then separator := " " else separator := ""
+ }
+
+ # unexpected EOF error
+ (except === "EOFX") | iohno(4)
+
+end
+
+
+#
+# read_decl: coexpression x table x string -> ib_TOK
+# (next_token_no_nl, toktbl, assoc) -> token
+#
+# Read in token declarations, assigning them the correct
+# precedence and associativity. Number the tokens for later
+# $define preprocessor directives. When done, return the last
+# token processed. Toktbl is the table that holds the stats for
+# each declared token.
+#
+procedure read_decl(next_token_no_nl, toktbl, assoc)
+
+ local token, c
+ static token_no, prec
+ initial {
+ token_no := 256
+ prec := 0
+ }
+
+ # All tokens in this list have the same prec and assoc.
+ # Precedence is determined by order. Associativity is determined
+ # by keyword in the calling procedure, and is passed as arg 3.
+ #
+ prec +:= 1
+ assoc === ("n"|"r"|"l"|&null) | iohno(5, image(assoc))
+
+ # As long as we find commas and token names, keep on adding tokens
+ # to the token table. Return the unused token when done. If we
+ # reach EOF, there's been an error.
+ #
+ repeat {
+ token := @next_token_no_nl | iohno(4)
+ case token.sym of {
+ default : iohno(31, token.str ||" line "|| line_number)
+ "CSETLIT" | "STRING": {
+ # Enter character literals as integers.
+ *escape(token.str[2:-1]) = 1 | iohno(49, token.str)
+ c := ord(escape(token.str[2:-1]))
+ toktbl[c] := tokstats(c, c, prec, assoc)
+ }
+ "IDENT" : {
+ case token.str of {
+ "error" :
+ toktbl[token.str] := tokstats("error", -1, prec, assoc)
+ "epsilon":
+ toktbl[token.str] := tokstats("epsilon",-2,prec, assoc)
+ default : {
+ # Enter TOKENs as string-keyed records in toktbl.
+ token_no +:= 1
+ toktbl[token.str] :=
+ tokstats(token.str, token_no, prec, assoc)
+ }
+ }
+ }
+ }
+ # As long as we're seeing commas, go back for more tokens.
+ token := @next_token_no_nl | iohno(4)
+ token.sym == "COMMA" | break
+ }
+
+ # Skip past semicolon, if present (as set up now, it shouldn't be).
+ (token := token | @next_token_no_nl | iohno(4)).sym ~== "SEMICOL"
+ return token
+
+end
+
+
+#
+# read_RHS: coexpression x coexpression x file x table x
+# string x ib_grammar record x string x string -> token
+#
+# Read_RHS goes through the RHS of rule definitions, inserting the
+# resulting productions into a master rule list. At the same
+# time, it outputs the actions corresponding to those productions
+# as procedures that are given names corresponding to the numbers
+# of the productions. I.e. production 1, if endowed with an {
+# action }, will correspond to procedure _1_. Prec and assoc are
+# automatically set to that of the last RHS nonterminal, but this
+# may be changed explicitly by the %prec keyword, as in YACC.
+# Source_fname is the name of the source grammar file we're pro-
+# cessing (caller will give us some reasonable default if we're
+# reading &input).
+#
+# Fails on EOF.
+#
+procedure read_RHS(next_token, next_token_no_nl, out, toktbl, LHS,
+ grammar, module, source_fname)
+
+ local token, rule, c
+ static rule_no
+ initial rule_no := 0
+
+ rule_no +:= 1
+ # LHS RHS POS LOOK no prec assoc
+ rule := production(LHS, list(), &null, &null, rule_no, &null, &null)
+ put(grammar.rules, rule)
+
+ # Read in RHS symbols.
+ #
+ repeat {
+ token := @next_token_no_nl | iohno(4)
+ case token.sym of {
+ default :
+ iohno(35, "token "|| image(token.str)||"; line "|| line_number)
+ "CSETLIT" | "STRING": {
+ *escape(token.str[2:-1]) = 1 | iohno(49, token.str)
+ c := ord(escape(token.str[2:-1]))
+ if \toktbl[c] then {
+ rule.prec := toktbl[c].prec
+ rule.assoc := toktbl[c].assoc
+ }
+ # literals not declared earlier will get caught here
+ else insert(grammar.tbl, c, image(char(c)))
+ put(rule.RHS, c)
+ }
+ "IDENT" : {
+ # If it's a terminal (i.e. a declared token), assign
+ # this rule its precedence and associativity. If it's
+ # not in toktbl, then it's not a declared token....
+ if \toktbl[token.str] then {
+ rule.prec := toktbl[token.str].prec
+ rule.assoc := toktbl[token.str].assoc
+ put(rule.RHS, toktbl[token.str].no)
+ if toktbl[token.str].no = -2 then {
+ *rule.RHS > 1 & iohno(44, "line ", line_number)
+ rule.POS := 2
+ }
+ }
+ # ...undeclared stuff. Could be a nonterminal. If
+ # error and/or epsilon weren't declared as tokens,
+ # they will get caught here, too.
+ else {
+ case token.str of {
+ &null : stop("What is going on here?")
+ default : put(rule.RHS, token.str)
+ "error" : {
+ put(rule.RHS, -1)
+ insert(grammar.tbl, -1, "error")
+ }
+ "epsilon" : {
+ if *put(rule.RHS, -2) > 1
+ then iohno(44, "line ", line_number)
+ else rule.POS := 2
+ insert(grammar.tbl, -2, "epsilon")
+ }
+ }
+ }
+ }
+ }
+ # Comma means: Go back for another RHS symbol.
+ token := @next_token_no_nl | fail
+ token.sym == "COMMA" | break
+ }
+
+ # Skip semicolon token, if present.
+ (token := token | @next_token_no_nl | fail).sym ~== "SEMICOL"
+
+ # Read and set (optional) precedence.
+ #
+ if token.sym == "MOD" then {
+ token := @next_token_no_nl | iohno(4)
+ (token.sym == "IDENT" & token.str == "prec") |
+ iohno(43, token.str || " line " || line_number)
+ token := @next_token_no_nl | iohno(4)
+ case token.sym of {
+ "CSETLIT" | "STRING" : {
+ *escape(token.str[2:-1]) = 1 | iohno(49, token.str)
+ c := ord(escape(token.str[2:-1])) &
+ rule.prec := toktbl[c].prec &
+ rule.assoc := toktbl[c].assoc
+ }
+ "IDENT" : {
+ \toktbl[token.str] |
+ iohno(43, token.str || " line " || line_number)
+ rule.prec := toktbl[token.str].prec &
+ rule.assoc := toktbl[token.str].assoc
+ }
+ default : 1 = 4 # deliberate failure
+ } | iohno(43, "line ", line_number)
+ token := @next_token_no_nl | fail
+ }
+
+ # Skip semicolon token, if present.
+ (token := token | @next_token_no_nl | fail).sym ~== "SEMICOL"
+
+ # Read in (optional) action.
+ #
+ if token.sym == "LBRACE" then {
+ write_action_as_procedure(next_token, out, rule,
+ module, source_fname)
+ token := @next_token_no_nl | fail
+ }
+
+ # Skip semicolon token, if present.
+ (token := token | @next_token_no_nl | fail).sym ~== "SEMICOL"
+ return token
+
+end
+
+
+#
+# write_action_as_procedure
+#
+procedure write_action_as_procedure(next_token, out, rule,
+ module, source_fname)
+
+ local argstr, bracelevel, token, i, neg
+
+ /module := ""
+ argstr := ""
+ #
+ # Decide the number of arguments based on the length of the RHS of
+ # rule. Exception: Epsilon productions are empty, and pop nothing
+ # off the stack, so take zero args.
+ #
+ if rule.RHS[1] ~=== -2 then {
+ every argstr ||:= "arg" || (1 to *rule.RHS) || ","
+ argstr := trim(argstr, ',')
+ }
+ write(out, "procedure _", rule.no, "_", module, "(", argstr, ")")
+ write(out, "\n$line ", line_number, " ", image(source_fname))
+
+ bracelevel := 1
+ until bracelevel = 0 do {
+ every token := copy_icon_stuff(next_token, out, "RHSARG") do {
+ case token.sym of {
+ default : next
+ "LBRACE" : bracelevel +:= 1
+ "RBRACE" : bracelevel -:= 1
+ "RHSARG" : {
+ until \ (token := @next_token).sym do
+ writes(out, token.str)
+ if neg := (token.sym == "MINUS") then
+ until \ (token := @next_token).sym do
+ writes(out, token.str)
+ else neg := &null
+ token.sym == "INTLIT" | iohno(37, "$"||token.str)
+ if /neg & token.str ~== "0" then {
+ token.str <= *rule.RHS | iohno(38, "$"||token.str)
+ writes(out, " arg", token.str, " ")
+ } else {
+ # Code for $0, $-1, etc.
+ #
+ # Warning! If the name of the stack is changed
+ # in iiparse.lib, it has to be changed here, too.
+ #
+ i := abs(token.str)+1
+ writes(out, " value_stack", module, "[", i, "] ")
+ }
+ }
+ }
+ if bracelevel = 0 then {
+ write(out, "\nend\n")
+ return token
+ }
+ }
+ }
+
+ iohno(39, "line "|| line_number)
+
+end
+
diff --git a/ipl/packs/ibpag2/ibutil.icn b/ipl/packs/ibpag2/ibutil.icn
new file mode 100644
index 0000000..d16e511
--- /dev/null
+++ b/ipl/packs/ibpag2/ibutil.icn
@@ -0,0 +1,296 @@
+############################################################################
+#
+# Name: ibutil.icn
+#
+# Title: utilities for Ibpag2
+#
+# Author: Richard L. Goerwitz
+#
+# Version: 1.21
+#
+############################################################################
+#
+# Contains:
+#
+# production_2_string(p) makes production or item p human-
+# readable
+#
+# print_item_list(C, i) returns human-readable version of
+# item list C
+#
+# print_grammar(grammar, f) sends to file f (default &output)
+# a human-readable printout of a grammar,
+# as recorded in an ib_grammar structure
+#
+# print_action_goto_tables(atbl, gtbl, ibtoktbl, f)
+# sends to file f (default (&output)
+# a human-readable printout of action
+# table atbl and goto table gtbl
+#
+# print_follow_sets(FOLLOW_table)
+# returns a human-readable version
+# of a FOLLOW table (table of sets)
+#
+# print_first_sets(FIRST_table)
+# returns a human-readable version
+# of a FIRST table (a table of sets)
+#
+# ibreplace(s1, s2, s3) replaces s2 with s3 in s1
+#
+# equivalent_items(i1, i2) succeeds if item i1 is structurally
+# identical to item i2
+#
+# equivalent_item_lists(l1,l2) same as equivalent_items, but for
+# lists of items, not individual items
+#
+############################################################################
+#
+# Links: none
+#
+############################################################################
+
+
+record production(LHS, RHS, POS, LOOK, no, prec, assoc)
+
+#
+# production_2_string: production record -> string
+# p -> s
+#
+# Stringizes an image of the LHS and RHS of production p in
+# human-readable form.
+#
+procedure production_2_string(p, ibtoktbl)
+
+ local s, m, t
+
+ s := image(p.LHS) || " -> "
+ every m := !p.RHS do {
+ if t := \ (\ibtoktbl)[m]
+ then s ||:= t || " "
+ else s ||:= image(m) || " "
+ }
+ # if the POS field is nonnull, print it
+ s ||:= "(POS = " || image(\p.POS) || ") "
+ # if the LOOK field is nonnull, print it, too
+ s ||:= "lookahead = " || image(\p.LOOK)
+
+ return trim(s)
+
+end
+
+
+#
+# print_item_list: makes item list human readable
+#
+procedure print_item_list(C, i)
+
+ write(&errout, "Productions for item list ", i, ":")
+ every write(&errout, "\t", production_2_string(!C[i]))
+ write(&errout)
+ return
+
+end
+
+
+#
+# print_grammar: makes entire grammar human readable
+#
+procedure print_grammar(grammar, f)
+
+ local p, i, sl
+
+ /f := &errout
+
+ write(f, "Start symbol:")
+ write(f, "\t", grammar.start)
+ write(f)
+ write(f, "Rules:")
+ every p := !grammar.rules do {
+ writes(f, "\tRule ", right(p.no, 3, " "), " ")
+ write(f, production_2_string(p, grammar.tbl))
+ }
+ write(f)
+ write(f, "Tokens:")
+ sl := sort(grammar.tbl, 3)
+ every i := 1 to *sl-1 by 2 do
+ write(f, "\t", left(sl[i], 5, "."), right(sl[i+1], 20, "."))
+ write(f)
+ return
+
+end
+
+
+#
+# print_action_goto_tables
+#
+# Makes action & goto tables human readable. If a table mapping
+# integer (i.e. char) literals to token names is supplied, the
+# token names themselves are printed.
+#
+procedure print_action_goto_tables(atbl, gtbl, ibtoktbl, f)
+
+ local TAB, tbl, key_set, size, i, column, k
+
+ /f := &errout
+ TAB := "\t"
+
+ every tbl := atbl|gtbl do {
+
+ key_set := set(); every insert(key_set, key(tbl))
+ writes(f, TAB)
+ every k := !key_set do
+ writes(f, \(\ibtoktbl)[k] | k, TAB)
+ write(f)
+
+ size := 0; every size <:= key(!tbl)
+ every i := 1 to size do {
+ writes(f, i, TAB)
+ every column := tbl[!key_set] do {
+ # action lists may have more than one element
+ if /column[i] then
+ writes(f, " ", TAB) & next
+ \column[i] ? {
+ if any('asr') then {
+ while any('asr') do {
+ writes(f, ="a") & next
+ writes(f, tab(upto('.<')))
+ if ="<" then tab(find(">")+1) else ="."
+ tab(many(&digits))
+ }
+ writes(f, TAB)
+ }
+ else writes(f, tab(many(&digits)), TAB)
+ }
+ }
+ write(f)
+ }
+ write(f)
+ }
+
+ return
+
+end
+
+
+#
+# print_follow_sets: make FOLLOW table human readable
+#
+procedure print_follow_sets(FOLLOW_table)
+
+ local FOLLOW_sets, i
+
+ FOLLOW_sets := sort(FOLLOW_table, 3)
+ write(&errout, "FOLLOW sets are as follows:")
+ every i := 1 to *FOLLOW_sets-1 by 2 do {
+ writes(&errout, "\tFOLLOW(", image(FOLLOW_sets[i]), ") = ")
+ every writes(&errout, image(! FOLLOW_sets[i+1]), " ")
+ write(&errout)
+ }
+ write(&errout)
+ return
+
+end
+
+
+#
+# print_first_sets: make FIRST table human readable
+#
+procedure print_first_sets(FIRST_table)
+
+ local FIRST_sets, i
+
+ FIRST_sets := sort(FIRST_table, 3)
+ write(&errout, "FIRST sets are as follows:")
+ every i := 1 to *FIRST_sets-1 by 2 do {
+ writes(&errout, "\tFIRST(", image(FIRST_sets[i]), ") = ")
+ every writes(&errout, image(! FIRST_sets[i+1]), " ")
+ write(&errout)
+ }
+ write(&errout)
+ return
+
+end
+
+
+#
+# ibreplace: string x string x string -> string
+# (s1, s2, s3) -> s4
+#
+# Where s4 is s1, with every instance of s2 stripped out and
+# replaced by s3. E.g. replace("hello there; hello", "hello",
+# "hi") yields "hi there; hi". Taken straight from the IPL.
+#
+procedure ibreplace(s1,s2,s3)
+
+ local result, i
+
+ result := ""
+ i := *s2
+
+ s1 ? {
+ while result ||:= tab(find(s2)) do {
+ result ||:= s3
+ move(i)
+ }
+ return result || tab(0)
+ }
+
+end
+
+
+#
+# equivalent_items: record x record -> record or failure
+# (item1, item2) -> item1 or failure
+#
+# Where item1 and item2 are records having LHS, RHS, POS, & LOOK
+# fields (and possibly others, though they aren't used). Returns
+# item1 if item1 and item2 are structurally identical as far as
+# their LHS, RHS, LOOK, and POS fields are concerned. For SLR
+# table generators, LOOK will always be null.
+#
+procedure equivalent_items(item1, item2)
+
+ local i
+
+ item1 === item2 & (return item1)
+
+ if item1.LHS == item2.LHS &
+ item1.POS = item2.POS &
+ #
+ # This comparison doesn't have to be recursive, since I take
+ # care never to alter RHS structures. Identical RHSs should
+ # always be *the same underlying structure*.
+ #
+ item1.RHS === item2.RHS &
+ item1.LOOK === item2.LOOK
+ then
+ return item1
+
+end
+
+
+#
+# equivalent_item_lists: list x list -> list or fail
+# (il1, il2) -> il1
+#
+# Where il1 is one sorted list-of-items (as returned by goto() or
+# by closure()), where il2 is another such list. Returns the
+# first list if the LHS, RHS, and POS fields of the constituent
+# items are all structurally identical, i.e. if the two lists
+# contain the structurally identical items.
+#
+procedure equivalent_item_lists(il1, il2)
+
+ local i
+
+ il1 === il2 & (return il1)
+ if *il1 = *il2
+ then {
+ every i := 1 to *il1 do
+ equivalent_items(il1[i], il2[i]) | fail
+ }
+ else fail
+
+ return il1
+
+end
diff --git a/ipl/packs/ibpag2/ibwriter.icn b/ipl/packs/ibpag2/ibwriter.icn
new file mode 100644
index 0000000..8bf0263
--- /dev/null
+++ b/ipl/packs/ibpag2/ibwriter.icn
@@ -0,0 +1,110 @@
+############################################################################
+#
+# Name: ibwriter.icn
+#
+# Title: Ibpag2 parser/library writer
+#
+# Author: Richard L. Goerwitz
+#
+# Version: 1.7
+#
+############################################################################
+#
+# Given a grammar, an action table, a goto table, an open output
+# file, an open iiparser file, and a module name, sends to the output
+# file a fully loaded LR parser with run-time constructible action
+# and goto tables. The iiparser file contains the base LR parser
+# that the output file uses.
+#
+############################################################################
+#
+# Links: itokens, ximage
+#
+# See also: iiparse.icn
+#
+############################################################################
+
+#link itokens, ximage
+link ximage
+
+# defined in itokens.icn
+# record ib_TOK(sym, str)
+
+procedure ibwriter(iiparse_file, outfile, grammar, atbl, gtbl, module)
+
+ local token, next_token, start_symbol, rule_list, ttbl
+
+ /module := ""
+ start_symbol := grammar.start
+ rule_list := grammar.rules
+ ttbl := grammar.tbl
+ next_token := create itokens(iiparse_file, 1)
+
+ #
+ # Copy tokens in iiparse_file to outfile. Whenever we find a $
+ # (RHSARG), process: If we find $$, output $; If we find $module,
+ # output image(module); and other such stuff. Note that
+ # copy_iiparse_tokens suspends tokens before writing them. It
+ # also blocks writing of any token whose sym field matches the
+ # string given as arg 3.
+ #
+ every token := copy_iiparse_tokens(next_token, outfile, "RHSARG")
+ do {
+ if token.sym == "RHSARG" then {
+ if (token := @next_token).sym == "RHSARG" then {
+ writes(outfile, token.str)
+ next
+ }
+ token.sym == "IDENT" | iohno(60, "line "|| line_number)
+ writes(outfile, " ")
+ case token.str of {
+ # copy $module name over as a literal
+ "module" : writes(outfile, image(module))
+ # use ximage to copy over action, goto, and token tables,
+ # as well as the production list (used only for debugging)
+ "atbl_insertion_point": writes(outfile, ximage(atbl))
+ "gtbl_insertion_point": writes(outfile, ximage(gtbl))
+ "ttbl_insertion_point": writes(outfile, ximage(ttbl))
+ "rule_list_insertion_point" :
+ writes(outfile, ximage(rule_list))
+ # use image to copy the start symbol into the output file
+ "start_symbol_insertion_point" :
+ writes(outfile, image(start_symbol))
+ # add the module name to anything else beginning with $
+ default : writes(outfile, token.str, module, " ")
+ }
+ }
+ }
+
+ return
+
+end
+
+
+#
+# copy_iiparse_tokens: coexpression x file x string -> ib_TOK records
+# (next_token, out, except) -> token records
+#
+# Copy Icon code to output stream, also suspending as we go.
+# Insert separators between tokens where needed. Do not output
+# any token whose sym field matches except. The point in
+# suspending tokens as we go is to enable the calling procedure to
+# look for signal tokens that indicate insertion or termination
+# points. Fail on EOF.
+#
+procedure copy_iiparse_tokens(next_token, out, except)
+
+ local separator, T
+
+ separator := ""
+ while T := @next_token do {
+ if \T.sym then suspend T
+ if \T.sym == \except then next
+ if any(&digits ++ &letters ++ '_.', \T.str, 1, 2) & \T.sym ~== "DOT"
+ then writes(out, separator)
+ writes(out, T.str)
+ if any(&digits ++ &letters ++ '_.', \T.str, -1, 0) & \T.sym ~== "DOT"
+ then separator := " " else separator := ""
+ }
+
+end
diff --git a/ipl/packs/ibpag2/iiglrpar.lib b/ipl/packs/ibpag2/iiglrpar.lib
new file mode 100644
index 0000000..059b0bf
--- /dev/null
+++ b/ipl/packs/ibpag2/iiglrpar.lib
@@ -0,0 +1,946 @@
+############################################################################
+#
+# Name: iiglrpar.lib
+#
+# Title: Quasi-GLR parser code
+#
+# Author: Richard L. Goerwitz
+#
+# Version: 1.20
+#
+############################################################################
+#
+# This file contains quasi-GLR parser code for use by Ibpag2's
+# output. See below on what I mean by "quasi-GLR." Entry point is
+# iiparse(infile, fail_on_error). Infile is the stream from which
+# input is to be taken. Infile is passed as argument 1 to the
+# user-supplied lexical analyzer, iilex_module() (where _module is
+# the string supplied with the -m option to Ibpag2). If
+# fail_on_error is nonnull, the parser, iiparse, will fail on errors,
+# rather than abort. Iiparse() returns the top element on its value
+# stack on a successful parse (which can be handy).
+#
+# Iilex_module() must suspend integers for tokens and may also set
+# iilval_module to the actual string values. Tokens -2, -1, and 0
+# are reserved. -2 is epsilon, and -1 is error. 0 is EOF, and is
+# automatically appended to the token stream when iilex_module, the
+# tokenizer, fails. These values should not normally be returned by
+# the analyzer. In general, it is a good idea to $include
+# iilex_module from your Ibpag2 source files, so that it can use the
+# symbolic %token names declared in the original Ibpag2 source file.
+# As implied above ("suspend"), iilex_module must be a generator,
+# failing on EOF.
+#
+# If desired, you may include your own error-handling routine. It
+# must be called iiparse_module (where _module is once again the
+# module name supplied to ibpag2 via the -m option). The global
+# variable line_number_module is automatically defined below, so a
+# typical arrangement would be for the lexical analyzer to initialize
+# line_number_module to 0, and increment by 1 for each line read.
+# The error handler, iierror_module() can then display this variable.
+# Note that the error handler should accept a single string argument
+# (set by iiparse to describe the token on the input stream when the
+# error was encountered).
+#
+# I label this parser "GLR" because it does support multiple parallel
+# parsers (like GLR parsers are supposed to). I use the qualifier
+# "quasi," though, because it does not use a graph-structured stack.
+# Instead it copies both value and state stacks (in fact, the whole
+# parser environment) when creating new automata to handle
+# alternative parse paths. Slower, yes. But it enables the user to
+# use almost precisely the action and input format that is used for
+# the standard parser.
+#
+# Note that iiparse(), as implemented here, may suspend multiple
+# results. So be sure to call it in some context where multiple
+# results can be used (e.g. every parse := iiparse(&input, 1), or the
+# like). Note also that when new parser "edges" get created, a
+# rather cumbersome recursive copy routine is used. Sorry, but it's
+# necessary to prevent unintended side-effects.
+#
+############################################################################
+#
+# The algorithm:
+#
+# A = list of active parsers needing action lookup
+# S = list of parsers to be shifted
+# R = list of parsers to be reduced
+# B = list of parsers that "choked"
+#
+# for every token on the input stream
+# begin
+# until length of R = 0 and length of A = 0
+# begin
+# - pop successive parsers off of A, and placing them in S,
+# R, or B, depending on parse table directives; suspend a
+# result for each parser that has reached an accepting
+# state
+# - pop successive parsers off of R, reducing them, and
+# placing them back in A; perform the action code
+# associated with each reduction
+# end
+# - pop successive parsers off of S, shifting them, and placing
+# them back in A; mark recovering parsers as recovered when
+# they have successfully shifted three tokens
+# if length of A = 0 and token not = EOF
+# then
+# - initiate error recovery on the parsers in B, i.e. for
+# each parser in B that is not already recovering, pop its
+# stack until error (-1) can legally be shifted, then shift
+# error, mark the parser as recovering from an error, and
+# place it back in A; if the parser is already recovering,
+# discard the current token
+# else
+# - clobber the parsers in B
+# end
+# end
+#
+# Note that when a given active parser in A is being classified
+# as needing a reduction, shift, suspension, or entry into the error
+# list (B), more than one action may apply due to ambiguity in the
+# grammar. At such points, the parser environment is duplicated,
+# once for each alternative pathway, and each of the new parsers is
+# then entered into the appropriate list (R or S; if accept is an
+# alternative, the classification routine suspends).
+#
+# Note also that when performing the action code associated with
+# reductions, parsers may be reclassified as erroneous, accepting,
+# etc. via "semantic" directives like IIERROR and IIACCEPT. See the
+# README file. Multiple-result action code will cause new parser
+# threads to be created, just as ambiguities in the grammar do within
+# the classification routine above.
+#
+#############################################################################
+#
+# See also: ibpag2.icn, iiparse.icn
+#
+############################################################################
+
+$$line 119 "iiglrpar.lib"
+
+$$ifndef IIDEBUG
+ $$define $iidebug 1
+ $$define show_new_forest 1
+$$endif # not IIDEBUG
+
+# These defines are output by Ibpag2 ahead of time (with the module
+# name appended, if need be):
+#
+# IIERROR
+# IIACCEPT
+# iiprune - GLR mode only
+# iiisolate - GLR mode only
+# iierrok
+# iiclearin
+
+# Parser environment + lookahead and pending action field.
+#
+record $ib_pe(state_stack, value_stack, action, errors,
+ recover_shifts, discards, clearin)
+
+# Warning! If you change the name of the value stack, change it also
+# in ibreader.icn, procedure write_action_as_procedure().
+#
+global $iilval, $line_number, $state_stack, $value_stack,
+ $iidirective, $ttbl, $errors, $discard_token
+
+#
+# iiparse: file x anything -> ?s (a generator)
+# (stream, fail_on_error) -> ?
+#
+# Where stream is an open file, where fail_on_error is a switch
+# that (if nonnull) tells the iiparse to fail, rather than abort,
+# on error, and where ?s represent the user-defined results of a
+# completed parse of file, from the current location up to the
+# point where the parser executes an "accept" action. Note that
+# iiparse, as implemented here, is a generator.
+#
+procedure $iiparse(stream, fail_on_error)
+
+ local token, next_token, actives, reducers, shifters, barfers
+ #global ttbl, errors
+ static atbl
+ initial {
+ $iidirective := ""
+ atbl := $atbl_insertion_point
+ $ttbl := $ttbl_insertion_point
+ $$line 166 "iiglrpar.lib"
+ \$iilex | stop("no iilex tokenizer defined")
+ }
+
+ actives := [ $ib_pe([1], [], &null, 0) ]
+ $state_stack := actives[1].state_stack
+ $value_stack := actives[1].value_stack
+ $errors := actives[1].errors
+ reducers := list()
+ shifters := list()
+ # I get tired of bland error code. We'll call the list of
+ # parsers in an error state "barfers" :-).
+ barfers := list()
+
+ next_token := create $iilex(stream, fail_on_error) | 0
+
+ token := @next_token
+ #
+ # After this ^, new tokens are read in near the end of the repeat
+ # loop. None is read in on an error, since then we will try again
+ # on the token that caused the error.
+ #
+ repeat {
+ until *actives = *reducers = 0
+ do {
+
+ # Prune out parsers that are doing the same thing as some
+ # other parser.
+ #
+ $$ifdef AUTO_PRUNE
+ auto_prune(actives)
+ $$endif
+
+ # Suspends $value_stack[1] on accept actions. Otherwise,
+ # puts parsers that need shifting into the shifters list,
+ # parsers that need reducing into the reducers list, and
+ # error-state parsers into the barfers list. Creates new
+ # parser environments as needed.
+ #
+ suspend $ib_action(atbl, token, actives, shifters,
+ reducers, barfers)
+
+ # Perform reductions. If instructed via the iiaccept
+ # macro, simulate an accept action, and suspend with a
+ # result.
+ #
+ suspend $perform_reductions(token, actives, shifters,
+ reducers, barfers)
+ }
+
+ # Shift token for every parser in the shifters list. This
+ # will create a bunch of new active parsers.
+ #
+ $perform_shifts(token, actives, shifters)
+ #
+ # If we get to here and have no actives, and we're not at the
+ # end of the input stream, then we are at an error impasse.
+ # Do formal error recovery.
+ #
+ if *actives = 0 & token ~=== 0 then {
+ suspend $perform_barfs(atbl, token, actives, barfers,fail_on_error)
+ #
+ # Perform_barfs sets discard_token if recovery was
+ # unsuccessful on the last token, and it needs discarding.
+ #
+ if \$discard_token := &null then
+ token := @next_token | break
+ #
+ # If there *still* aren't any active parsers, we've
+ # reached an impasse (or there are no error productions).
+ # Abort.
+ #
+ if *actives = 0 then {
+ if \fail_on_error then fail
+ else stop()
+ }
+ }
+ else {
+ #
+ # Parsers in an error state should be weeded out, since if
+ # we get to here, we have some valid parsers still going.
+ # I.e. only use them if there are *no* actives (see above).
+ #
+ $$ifdef IIDEBUG
+ write(&errout, "+++ pruning ", *barfers, " erroneous parsers")
+ while parser := pop(barfers)
+ do $iidebug("p", token, &null, parser)
+ $$else
+ while pop(barfers)
+ $$endif #IIDEBUG
+ #
+ # Get the next token. Only do this if we have active
+ # parsers not recovering from an error, i.e., if we're here.
+ #
+ token := @next_token | break
+ }
+ }
+
+end
+
+
+#
+# ib_action
+#
+procedure $ib_action(atbl, token, actives, shifters, reducers,
+ barfers)
+
+ local a, act, num, parser, new_parser
+
+ # While there is an active parser, take it off the actives list,
+ # and...
+ while parser := pop(actives) do {
+
+ # ...check for a valid action (if none, then there is an
+ # error; put it into the barfers list).
+ #
+ if a := \ (\atbl[token])[parser.state_stack[1]]
+ then {
+ a ? {
+ # Keep track of how many actions we've seen.
+ num := 0
+
+ # Snip off successive actions. If there's no
+ # ambiguity, there will be only one action, & no
+ # additional parser environments will be created.
+ #
+ while {
+ $$ifdef COMPRESSED_TABLES
+ # "\x80" is the accept action; uncompress_action
+ # does its own move()ing
+ act := $uncompress_action()
+ $$else
+ act := ="a" | {
+ tab(any('sr')) || tab(upto('.<')) ||
+ ((="<" || tab(find(">")+1)) | =".") ||
+ tab(many(&digits))
+ }
+ $$endif #COMPRESSED TABLES
+ }
+ do {
+ # New parser environment only needed for num > 1.
+ #
+ if (num +:= 1) > 1 then {
+ new_parser := $fullcopy(parser)
+ show_new_forest("=== table conflict; new parser",
+ actives, shifters, reducers, barfers, new_parser)
+ }
+ else new_parser := parser
+ new_parser.action := act
+
+ # Classify the action as s, r, or a, and place i
+ # the appropriate list (or suspend a result if a).
+ #
+ case act[1] of {
+ "s" : put(shifters, new_parser)
+ "r" : put(reducers, new_parser)
+ "a" : {
+ $iidebug("a", token, ruleno, parser)
+ suspend parser.value_stack[1]
+ }
+ }
+ }
+ }
+ }
+ else {
+ #
+ # Error. Parser will get garbage collected before another
+ # token is read from iilex, unless the parsers all fail -
+ # in which case, error recovery will be tried.
+ #
+ $iidebug("e", token, &null, parser)
+ put(barfers, parser)
+ }
+ }
+
+end
+
+
+#
+# perform_reductions
+#
+procedure $perform_reductions(token, actives, shifters, reducers, barfers)
+
+ local parser, ruleno, newsym, rhsize, arglist, result, num,
+ new_parser, tmp, p
+ static gtbl
+ initial {
+ gtbl := $gtbl_insertion_point
+ $$line 336 "iiglrpar.lib"
+ }
+
+ while parser := get(reducers)
+ do {
+
+ # Set up global state and value stacks, so that the action
+ # code can access them.
+ #
+ $state_stack := parser.state_stack
+ $value_stack := parser.value_stack
+ $errors := parser.errors
+
+ # Finally, perform the given action:
+ #
+ parser.action ? {
+ #
+ # Reduce action format, e.g. r1<S>2 = reduce by rule 1
+ # (LHS = S, RHS length = 2).
+ #
+ move(1)
+ ruleno := integer(1(tab(find("<")), move(1)))
+ newsym := 1(tab(find(">")), move(1))
+ rhsize := integer(tab(many(&digits)))
+ arglist := []
+ every 1 to rhsize do {
+ pop($state_stack)
+ push(arglist, pop($value_stack))
+ }
+ # Gtbl is "backwards," i.e. token first, state second.
+ # The value produced is the "goto" state.
+ #
+ push($state_stack, gtbl[newsym][$state_stack[1]])
+ #
+ # The actions are in procedures having the same name as
+ # the number of their rule, bracketed by underscores, &
+ # followed by the current module name. If there is such a
+ # procedure associated with the current reduce action,
+ # call it.
+ #
+ if func := proc("_" || ruleno || "_" || $module)
+ then {
+ num := 0
+ #
+ # For every valid result from the action code for the
+ # current reduction, create a new parser if need be
+ # (i.e. if num > 1), and check iidirective. Push the
+ # result onto the stack of the new parser & put the
+ # new parser into the actives list.
+ #
+ every result := func!arglist do {
+ # For all but the first result, create a new parser.
+ if (num +:= 1) > 1 then {
+ new_parser := $fullcopy(parser)
+ pop(new_parser.value_stack) # take off pushed result
+ show_new_forest("=== multi-result action; new parser",
+ actives, shifters, reducers, barfers, new_parser)
+ }
+ else new_parser := parser
+ #
+ # IIERROR, IIACCEPT, iierrok, iiisolate, and iiprune
+ # are all implemented using a search through a global
+ # iidirective variable; see the $defines described
+ # above.
+ #
+ tmp := $iidirective
+ $iidirective := ""
+ if *tmp > 0 then {
+ if find("clearin", tmp) then {
+ # see perform_shifts() below
+ new_parser.clearin := 1
+ }
+ if find("error", tmp) then {
+ $iidebug("e", token, ruleno, new_parser)
+ put(barfers, new_parser)
+ next
+ }
+ if find("errok", tmp) then {
+ new_parser.recover_shifts := &null
+ new_parser.discards := 0
+ }
+ if find("prune", tmp) then {
+ # Garden path.
+ $iidebug("p", token, ruleno, new_parser)
+ break next
+ }
+ if find("isolate", tmp) then {
+ # Prune all but the current parser.
+ $$ifdef IIDEBUG
+ write(&errout, "+++ isolating by pruning")
+ while p := pop(actives) do
+ $iidebug("p", token, ruleno, p)
+ while p := pop(reducers) do
+ $iidebug("p", token, ruleno, p)
+ while p := pop(shifters) do
+ $iidebug("p", token, ruleno, p)
+ while p := pop(barfers) do
+ $iidebug("p", token, ruleno, p)
+ $$else
+ while pop(actives)
+ while pop(reducers)
+ while pop(shifters)
+ while pop(barfers)
+ $$endif #IIDEBUG
+ push(new_parser.value_stack, result)
+ $iidebug("r", token, ruleno, new_parser)
+ put(actives, new_parser)
+ break next
+ }
+ if find("accept", tmp) then {
+ $iidebug("a", token, ruleno, new_parser)
+ suspend result
+ next
+ }
+ }
+ #
+ # Push result onto the new parser thread's value
+ # stack.
+ #
+ push(new_parser.value_stack, result)
+ $iidebug("r", token, ruleno, new_parser)
+ put(actives, new_parser)
+ #
+ # Action code must have the stack in its original
+ # form. So restore the stack's old form before
+ # going back to the action code.
+ #
+ if num = 1 then
+ $value_stack := parser.value_stack[2:0]
+ }
+ #
+ # If the action code for this rule failed, push &null.
+ # But first check $iidirective.
+ #
+ if num = 0 then {
+ #
+ # Same $iidirective code as above repeated
+ # (inelegantly) because it accesses too many
+ # variables to be easily isolated.
+ #
+ tmp := $iidirective
+ $iidirective := ""
+ if *tmp > 0 then {
+ if find("clearin", tmp) then {
+ # see perform_shifts() below
+ parser.clearin := 1
+ }
+ if find("error", tmp) then {
+ $iidebug("e", token, ruleno, parser)
+ put(barfers, parser)
+ next
+ }
+ if find("errok", tmp) then {
+ parser.recover_shifts := &null
+ parser.discards := 0
+ }
+ if find("prune", tmp) then {
+ # Garden path.
+ $iidebug("p", token, ruleno, parser)
+ next # go back to enclosing while pop...
+ }
+ if find("isolate", tmp) then {
+ # Prune all but the current parser.
+ $$ifdef IIDEBUG
+ write(&errout, "+++ isolating by pruning")
+ while p := pop(actives) do
+ $iidebug("p", token, ruleno, p)
+ while p := pop(reducers) do
+ $iidebug("p", token, ruleno, p)
+ while p := pop(shifters) do
+ $iidebug("p", token, ruleno, p)
+ while p := pop(barfers) do
+ $iidebug("p", token, ruleno, p)
+ $$else
+ while pop(actives)
+ while pop(reducers)
+ while pop(shifters)
+ while pop(barfers)
+ $$endif #IIDEBUG
+ }
+ if find("accept", tmp) then {
+ $iidebug("a", token, ruleno, parser)
+ suspend arglist[-1] | &null
+ next
+ }
+ }
+ # Finally, push the result!
+ result := arglist[-1] | &null
+ push(parser.value_stack, result)
+ $iidebug("r", token, ruleno, parser)
+ put(actives, parser)
+ }
+ }
+ # If there is no action code for this rule...
+ else {
+ # ...push the value of the last RHS arg.
+ # For 0-length e-productions, push &null.
+ result := arglist[-1] | &null
+ push(parser.value_stack, result)
+ $iidebug("r", token, ruleno, parser)
+ put(actives, parser)
+ }
+ }
+ }
+
+end
+
+
+#
+# perform_shifts
+#
+procedure $perform_shifts(token, actives, shifters)
+
+ local parser, ruleno
+
+ *shifters = 0 & fail
+
+ while parser := pop(shifters) do {
+ #
+ # One of the iidirectives is iiclearin, i.e. clear the input
+ # token and try again on the next token.
+ #
+ \parser.clearin := &null & {
+ put(actives, parser)
+ next
+ }
+ parser.action ? {
+ #
+ # Shift action format, e.g. s2.1 = shift and go to state 2
+ # by rule 1.
+ #
+ move(1)
+ push(parser.state_stack, integer(tab(find("."))))
+ push(parser.value_stack, $iilval)
+ ="."; ruleno := integer(tab(many(&digits)))
+ pos(0) | stop("malformed action: ", act)
+ #
+ # If, while recovering, we can manage to shift 3 tokens,
+ # then we consider ourselves resynchronized. Don't count
+ # the error token (-1).
+ #
+ if token ~= -1 then {
+ if \parser.recover_shifts +:= 1 then {
+ # 3 shifts make a successful recovery
+ if parser.recover_shifts > 4 then {
+ parser.recover_shifts := &null
+ parser.discards := 0
+ }
+ }
+ }
+ $iidebug("s", token, ruleno, parser)
+ }
+ put(actives, parser)
+ }
+
+ return
+
+end
+
+
+#
+# perform_barfs
+#
+procedure $perform_barfs(atbl, token, actives, barfers, fail_on_error)
+
+ #
+ # Note how this procedure has its own local reducers and shifters
+ # list. These are *not* passed from the parent environment!
+ #
+ local parser, count, reducers, shifters, recoverers
+
+ # To hold the list of parsers that need to shift error (-1).
+ recoverers := list()
+
+ count := 0
+ while parser := pop(barfers) do {
+ count +:= 1
+ if \parser.recover_shifts := 0 then {
+ #
+ # If we're already in an error state, discard the
+ # current token, and increment the number of discards
+ # we have made. 500 is too many; abort.
+ #
+ if (parser.discards +:= 1) > 500 then {
+ if proc($iierror)
+ then $iierror("fatal error: can't resynchronize")
+ else write(&errout, "fatal error: can't resynchronize")
+ if \fail_on_error then fail
+ else stop()
+ }
+ # try again on this one with the next token
+ put(actives, parser)
+ } else {
+ parser.errors +:= 1 # error count for this parser
+ parser.discards := parser.recover_shifts := 0
+ # If this is our first erroneous parser, print a message.
+ if count = 1 then {
+ if proc($iierror)
+ then $iierror(image(\$ttbl[token]) | image(token))
+ else write(&errout, "parse error")
+ }
+ #
+ # If error appears in a RHS, pop states until we get to a
+ # spot where error (-1) is a valid lookahead token:
+ #
+ if \$ttbl[-1] then {
+ until *parser.state_stack = 0 do {
+ if \atbl[-1][parser.state_stack[1]] then {
+ put(recoverers, parser)
+ break next
+ } else pop(parser.state_stack) & pop(parser.value_stack)
+ }
+ }
+ # If we get past here, the stack is now empty or there
+ # are no error productions. Abandon this parser.
+ $iidebug("p", token, &null, parser)
+ }
+ }
+
+ # Parsers still recovering are in the actives list; those that
+ # need to shift error (-1) are in the recoverers list. The
+ # following turns recoverers into actives:
+ #
+ if *recoverers > 0 then {
+ reducers := list() # a scratch list
+ shifters := list() # ditto
+ until *recoverers = *reducers = 0 do {
+ $$ifdef AUTO_PRUNE
+ auto_prune(actives)
+ $$endif
+ suspend $ib_action(atbl, -1, recoverers, shifters,
+ reducers, barfers)
+ suspend $perform_reductions(-1, recoverers, shifters,
+ reducers, barfers)
+ }
+ $perform_shifts(-1, recoverers, shifters)
+ every put(actives, !recoverers)
+ }
+ #
+ # If there were no recoverers, we've already shifted the error
+ # token, and are discarding tokens from the input stream. Note
+ # that if one parser was recovering, they *all* should be
+ # recovering, since if one was not recovering, it the erroneous
+ # parsers should all have been discarded by the calling proc.
+ #
+ else
+ $discard_token := 1
+
+end
+
+
+$$ifdef IIDEBUG
+
+record production(LHS, RHS, POS, LOOK, no, prec, assoc)
+#
+# iidebug
+#
+procedure $iidebug(action, token, ruleno, parser)
+
+ local p, t, state
+ static rule_list
+ initial {
+ rule_list := $rule_list_insertion_point
+ $$line 693 "iiglrpar.lib"
+ }
+
+ write(&errout, "--- In parser ", image(parser), ":")
+ case action of {
+ "a" : writes(&errout, "accepting ") &
+ state := parser.state_stack[1]
+ "e" : writes(&errout, "***ERROR***\n") &
+ write(&errout, "recover shifts = ",
+ parser.recover_shifts) &
+ write(&errout, "discarded tokens = ",
+ parser.discards) &
+ writes(&errout, "error action ") &
+ state := parser.state_stack[1]
+ "p" : writes(&errout, "***PRUNING***\n") &
+ writes(&errout, "prune action ") &
+ state := parser.state_stack[1]
+ "r" : writes(&errout, "reducing ") &
+ state := parser.state_stack[2]
+ "s" : writes(&errout, "shifting ") &
+ state := parser.state_stack[2]
+ default : stop("malformed action argument to iidebug")
+ }
+
+ t := image(token) || (" (" || (\$ttbl[token] | "unknown") || ")")
+ writes(&errout, "on lookahead ", t, ", in state ", state)
+ if \ruleno then {
+ (p := !rule_list).no === ruleno &
+ write(&errout, "; rule ", $production_2_string(p, $ttbl))
+ }
+ # for errors, ruleno is null
+ else write(&errout)
+
+ write(&errout, " state stack now: ")
+ every write(&errout, "\t", image(!parser.state_stack))
+ write(&errout, " value stack now: ")
+ if *parser.value_stack > 0
+ then every write(&errout, "\t", image(!parser.value_stack))
+ else write(&errout, "\t(empty)")
+
+ return
+
+end
+
+
+#
+# production_2_string: production record -> string
+# p -> s
+#
+# Stringizes an image of the LHS and RHS of production p in
+# human-readable form.
+#
+procedure $production_2_string(p, ibtoktbl)
+
+ local s, m, t
+
+ s := image(p.LHS) || " -> "
+ every m := !p.RHS do {
+ if t := \ (\ibtoktbl)[m]
+ then s ||:= t || " "
+ else s ||:= image(m) || " "
+ }
+ # if the POS field is nonnull, print it
+ s ||:= "(POS = " || image(\p.POS) || ") "
+ # if the LOOK field is nonnull, print it, too
+ s ||:= "lookahead = " || image(\p.LOOK)
+
+ return trim(s)
+
+end
+
+#
+# show_new_forest
+#
+procedure show_new_forest(msg, actives, shifters, reducers, barfers, parser)
+ write(&errout, msg)
+ write(&errout, " List of active parsers:")
+ every write(&errout, "\t", image(!actives))
+ every write(&errout, "\t", image(!shifters))
+ every write(&errout, "\t", image(!reducers))
+ every write(&errout, "\t", image(!barfers), " (error)")
+ write(&errout, "\tnew -> ", image(parser))
+end
+$$endif # IIDEBUG
+
+
+$$ifdef COMPRESSED_TABLES
+
+#
+# uncompress_action
+#
+procedure $uncompress_action()
+
+ local next_chunk, full_action
+
+ next_chunk := create ord(!&subject[&pos:0])
+ case $in_ib_bits(next_chunk, 2) of {
+ 0: {
+ full_action := "s"
+ full_action ||:= $in_ib_bits(next_chunk, 11)
+ full_action ||:= "."
+ full_action ||:= $in_ib_bits(next_chunk, 11)
+ move(3)
+ }
+ 1: {
+ full_action := "r"
+ full_action ||:= $in_ib_bits(next_chunk, 11)
+ full_action ||:= "<"
+ full_action ||:= $in_ib_bits(next_chunk, 11)
+ full_action ||:= ">"
+ full_action ||:= $in_ib_bits(next_chunk, 8)
+ move(4)
+ }
+ 2: {
+ full_action := "a"
+ move(1)
+ }
+ } | fail
+
+ return full_action
+
+end
+
+
+#
+# in_ib_bits: like inbits (IPL), but with coexpression for file
+#
+procedure $in_ib_bits(next_chunk, len)
+
+ local i, byte, old_byte_mask
+ static old_byte, old_len, byte_length
+ initial {
+ old_byte := old_len := 0
+ byte_length := 8
+ }
+
+ old_byte_mask := (0 < 2^old_len - 1) | 0
+ old_byte := iand(old_byte, old_byte_mask)
+ i := ishift(old_byte, len-old_len)
+
+ len -:= (len > old_len) | {
+ old_len -:= len
+ return i
+ }
+
+ while byte := @next_chunk do {
+ i := ior(i, ishift(byte, len-byte_length))
+ len -:= (len > byte_length) | {
+ old_len := byte_length-len
+ old_byte := byte
+ return i
+ }
+ }
+
+end
+
+$$endif # COMPRESSED_TABLES
+
+#
+# fullcopy: make full recursive copy of object obj
+#
+procedure $fullcopy(obj)
+
+ local retval, i, k
+
+ case type(obj) of {
+ "co-expression" : return obj
+ "cset" : return obj
+ "file" : return obj
+ "integer" : return obj
+ "list" : {
+ retval := list(*obj)
+ every i := 1 to *obj do
+ retval[i] := $fullcopy(obj[i])
+ return retval
+ }
+ "null" : return &null
+ "procedure" : return obj
+ "real" : return obj
+ "set" : {
+ retval := set()
+ every insert(retval, $fullcopy(!obj))
+ return retval
+ }
+ "string" : return obj
+ "table" : {
+ retval := table(obj[[]])
+ every k := key(obj) do
+ insert(retval, $fullcopy(k), $fullcopy(obj[k]))
+ return retval
+ }
+ # probably a record; if not, we're dealing with a new
+ # version of Icon or a nonstandard implementation, and
+ # we're screwed
+ default : {
+ retval := copy(obj)
+ every i := 1 to *obj do
+ retval[i] := $fullcopy(obj[i])
+ return retval
+ }
+ }
+
+end
+
+
+$$ifdef AUTO_PRUNE
+procedure auto_prune(actives)
+
+ new_actives := []
+ while parser1 := pop(actives) do {
+ every parser2 := actives[j := 1 to *actives] do {
+ parser1.state_stack[1] = parser2.state_stack[1] | next
+ *parser1.value_stack = *parser2.value_stack | next
+ every i := 1 to *parser1.value_stack do {
+ parser1.value_stack[i] === parser2.value_stack[i] |
+ break next
+ }
+ if parser1.errors < parser2.errors then
+ actives[j] := parser1
+ break next
+ }
+ put(new_actives, parser1)
+ }
+
+ every put(actives, !new_actives)
+ return &null
+
+end
+$$endif # AUTO_PRUNE
diff --git a/ipl/packs/ibpag2/iiparse.lib b/ipl/packs/ibpag2/iiparse.lib
new file mode 100644
index 0000000..7367735
--- /dev/null
+++ b/ipl/packs/ibpag2/iiparse.lib
@@ -0,0 +1,419 @@
+############################################################################
+#
+# Name: iiparse.lib
+#
+# Title: LR parser code
+#
+# Author: Richard L. Goerwitz
+#
+# Version: 1.31
+#
+############################################################################
+#
+# LR parser code for use by Ibpag2-generated files. Entry point is
+# iiparse(infile, fail_on_error). Infile is the stream from which
+# input is to be taken. Infile is passed as argument 1 to the
+# user-supplied lexical analyzer, iilex_module() (where _module is
+# the string supplied with the -m option to Ibpag2). If
+# fail_on_error is nonnull, the parser, iiparse, will fail on errors,
+# rather than abort. Iiparse() returns the top element on its value
+# stack on a successful parse (which can be handy).
+#
+# Iilex_module() must suspend integers for tokens and may also set
+# iilval_module to the actual string values. Tokens -2, -1, and 0
+# are reserved. -2 is epsilon, and -1 is error. 0 is EOF, and is
+# automatically appended to the token stream when iilex_module, the
+# tokenizer, fails. These values should not normally be returned by
+# the analyzer. In general, it is a good idea to $include
+# iilex_module from your Ibpag2 source files, so that it can use the
+# symbolic %token names declared in the original Ibpag2 source file.
+# As implied above ("suspend"), iilex_module must be a generator,
+# failing on EOF.
+#
+# If desired, the user may include his or her own error-handling
+# routine. It must be called iiparse_module (where _module is once
+# again the module name supplied to ibpag2 via the -m option). The
+# global variable line_number_module is automatically defined below,
+# so a typical arrangement would be for the lexical analyzer to
+# initialize line_number_module to 0, and increment by 1 for each
+# line read. The error handler, iierror_module() can then display
+# this variable. Note that the error handler should accept a single
+# string argument (set by iiparse to describe the error just
+# encountered).
+#
+############################################################################
+#
+# See also: ibpag2.icn
+#
+############################################################################
+
+$$line 50 "iiparse.lib"
+
+# These defines are output by Ibpag2 ahead of time (with the module
+# name appended, if need be):
+#
+# $define iierrok recover_shifts := &null;
+# $define IIERROR iidirective ||:= "error";
+# $define IIACCEPT iidirective ||:= "accept";
+# $define iiclearin iidirective ||:= "clearin";
+
+# Warning! If you change the name of the value stack, change it also
+# in ibreader.icn, procedure write_action_as_procedure().
+#
+global $iilval, $errors, $line_number, $state_stack, $value_stack,
+ $iidirective, $recover_shifts, $discards
+
+#
+# iiparse: file x anything -> ?
+# (stream, fail_on_error) -> ?
+#
+# Where stream is an open file, where fail_on_error is a switch
+# that (if nonnull) tells the iiparse to fail, rather than abort,
+# on error, and where ? represents the user-defined result of a
+# completed parse of file, from the current location up to the
+# point where the parser executes an "accept" action.
+#
+procedure $iiparse(stream, fail_on_error)
+
+ local token, next_token, act, ruleno, newsym, rhsize, arglist,
+ result, tmp, func
+ static atbl, gtbl, ttbl
+
+ initial {
+ $iidirective := ""
+ atbl := $atbl_insertion_point
+ gtbl := $gtbl_insertion_point
+ ttbl := $ttbl_insertion_point
+ $$line 86 "iiparse.lib"
+ \$iilex | stop("no iilex tokenizer defined")
+ }
+
+$$ifndef IIDEBUG
+ $iidebug := 1
+$$endif # not IIDEBUG
+
+ $state_stack := [1]
+ $value_stack := []
+
+ $errors := 0 # errors is global
+ next_token := create $iilex(stream, fail_on_error) | 0
+
+ token := @next_token
+ repeat {
+ #
+ # Begin cycle by checking whether there is a valid action
+ # for state $state_stack[1] and lookahead token. Atbl and
+ # gtbl here have a "backwards" structure: t[token][state]
+ # (usually they go t[state][token]).
+ #
+ if act := \ (\atbl[token])[$state_stack[1]] then {
+ $$ifdef COMPRESSED_TABLES
+ act := $uncompress_action(act)
+ $$endif #COMPRESSED TABLES
+ act ? {
+ # There's a valid action: Perform it.
+ case move(1) of {
+ "s": {
+ #
+ # Shift action format, e.g. s2.1 = shift and
+ # go to state 2 by rule 1.
+ #
+ push($state_stack, integer(tab(find("."))))
+ push($value_stack, $iilval)
+ ="."; ruleno := integer(tab(many(&digits)))
+ pos(0) | stop("malformed action: ", act)
+ #
+ # If, while recovering, we can manage to
+ # shift 3 tokens, then we consider ourselves
+ # resynchronized. Don't count error (-1).
+ #
+ if token ~= -1 then {
+ if \$recover_shifts +:= 1 then {
+ # 3 shifts = successful recovery
+ if $recover_shifts > 4 then {
+ $recover_shifts := &null
+ $discards := 0
+ }
+ }
+ }
+ $iidebug("s", ttbl, token, ruleno)
+ token := @next_token | break
+ }
+ "r": {
+ #
+ # Reduce action format, e.g. r1<S>2 = reduce
+ # by rule 1 (LHS = S, RHS length = 2).
+ #
+ ruleno := integer(1(tab(find("<")), move(1)))
+ newsym := 1(tab(find(">")), move(1))
+ rhsize := integer(tab(many(&digits)))
+ arglist := []
+ every 1 to rhsize do {
+ pop($state_stack)
+ push(arglist, pop($value_stack))
+ }
+ # on the structure of gtbl, see above on atbl
+ push($state_stack, gtbl[newsym][$state_stack[1]])
+ #
+ # The actions are in procedures having the same
+ # name as the number of their rule, bracketed
+ # by underscores followed by the current module.
+ #
+ if func := proc("_" || ruleno || "_" || $module)
+ then {
+ result := func!arglist | arglist[-1] | &null
+ tmp := $iidirective
+ $iidirective := ""
+ #
+ # IIERROR, IIACCEPT, iierrok, and iiclearin
+ # are implemented using a search through a global
+ # iidirective variable; see the $defines
+ # above
+ #
+ if *tmp > 0 then {
+ if find("clearin", tmp) then
+ token := @next_token
+ if find("error", tmp) then {
+ # restore stacks & fake an error
+ pop($state_stack)
+ every 1 to rhsize do
+ push($value_stack, !arglist)
+ $errors +:= 1
+ next_token := create (token |
+ (|@next_token))
+ token := -1
+ next
+ }
+ if find("accept", tmp) then {
+ $iidebug("a", ttbl, token, ruleno)
+ return result
+ }
+ }
+ }
+ # If there is no action code for this rule...
+ else {
+ # ...push the value of the last RHS arg.
+ # For 0-length e-productions, push &null.
+ result := arglist[-1] | &null
+ }
+ push($value_stack, result)
+ $iidebug("r", ttbl, token, ruleno)
+ }
+ # We're done. Return the last-generated value.
+ "a": {
+ $iidebug("a", ttbl, token, ruleno)
+ return $value_stack[1]
+ }
+ }
+ }
+ }
+ #
+ # ...but if there is *no* action for atbl[token][$state_stack[1]],
+ # then we have an error.
+ #
+ else {
+ if \$recover_shifts := 0 then {
+ #
+ # If we're already in an error state, discard the
+ # current token, and increment the number of discards
+ # we have made. 500 is too many; abort.
+ #
+ if ($discards +:= 1) > 500 then {
+ if \$iierror
+ then $iierror("fatal error: can't resynchronize")
+ else write(&errout, "fatal error: can't resynchronize")
+ if \fail_on_error then fail
+ else stop()
+ }
+ $iidebug("e", ttbl, token)
+ #
+ # We were in the process of recovering, and the late
+ # token didn't help; discard it and try again.
+ #
+ token := @next_token | break
+ } else {
+ $errors +:= 1 # global error count
+ $discards := $recover_shifts := 0
+ if \$iierror
+ then $iierror(image(\ttbl[token]) | image(token))
+ else write(&errout, "parse error")
+ #
+ # If error appears in a RHS, pop states until we get to
+ # a spot where error (-1) is a valid lookahead token:
+ #
+ if \ttbl[-1] then {
+ until *$state_stack = 0 do {
+ if \atbl[-1][$state_stack[1]] then {
+ $iidebug("e", ttbl, token)
+ next_token := create (token | (|@next_token))
+ token := -1
+ break next
+ } else pop($state_stack) & pop($value_stack)
+ }
+ # If we get past here, the stack is now empty. Abort.
+ }
+ if \fail_on_error then fail
+ else stop()
+ }
+ }
+ }
+
+ #
+ # If we get to here without hitting a final state, then we aren't
+ # going to get a valid parse. Abort.
+ #
+ if \$iierror
+ then $iierror("unexpected EOF")
+ else write(&errout, "unexpected EOF")
+
+ if \fail_on_error then fail
+ else stop()
+
+end
+
+
+$$ifdef IIDEBUG
+
+record production(LHS, RHS, POS, LOOK, no, prec, assoc)
+#
+# iidebug
+#
+procedure $iidebug(action, ttbl, token, ruleno)
+
+ local p, t, state
+ static rule_list
+ initial {
+ rule_list := $rule_list_insertion_point
+ $$line 279 "iiparse.lib"
+ }
+
+ case action of {
+ "a" : writes(&errout, "accepting ") & state := $state_stack[1]
+ "e" : writes(&errout, "***ERROR***\n") &
+ writes(&errout, "recovery shifts = ", $recover_shifts,"\n") &
+ writes(&errout, "discarded tokens = ", $discards, "\n") &
+ writes(&errout, "total error count = ", $errors, "\n") &
+ writes(&errout, "error action ") & state := $state_stack[1]
+ "r" : writes(&errout, "reducing ") & state := $state_stack[2]
+ "s" : writes(&errout, "shifting ") & state := $state_stack[2]
+ default : stop("malformed action argument to iidebug")
+ }
+
+ t := image(token) || (" (" || (\ttbl[token] | "unknown") || ")")
+ writes(&errout, "on lookahead ", t, ", in state ", state)
+ if \ruleno then {
+ (p := !rule_list).no = ruleno |
+ stop("no rule number ", tbl[symbol][state])
+ write(&errout, "; rule ", $production_2_string(p, ttbl))
+ }
+ # for errors, ruleno is null
+ else write(&errout)
+
+ write(&errout, " state stack now: ")
+ every write(&errout, "\t", image(!$state_stack))
+ write(&errout, " value stack now: ")
+ if *$value_stack > 0
+ then every write(&errout, "\t", image(!$value_stack))
+ else write(&errout, "\t(empty)")
+
+ return
+
+end
+
+
+#
+# production_2_string: production record -> string
+# p -> s
+#
+# Stringizes an image of the LHS and RHS of production p in
+# human-readable form.
+#
+procedure $production_2_string(p, ibtoktbl)
+
+ local s, m, t
+
+ s := image(p.LHS) || " -> "
+ every m := !p.RHS do {
+ if t := \ (\ibtoktbl)[m]
+ then s ||:= t || " "
+ else s ||:= image(m) || " "
+ }
+ # if the POS field is nonnull, print it
+ s ||:= "(POS = " || image(\p.POS) || ") "
+ # if the LOOK field is nonnull, print it, too
+ s ||:= "lookahead = " || image(\p.LOOK)
+
+ return trim(s)
+
+end
+$$endif # IIDEBUG
+
+
+$$ifdef COMPRESSED_TABLES
+
+#
+# uncompress_action
+#
+procedure $uncompress_action(action)
+
+ local next_chunk, full_action
+
+ next_chunk := create ord(!action)
+ case $in_ib_bits(next_chunk, 2) of {
+ 0: {
+ full_action := "s"
+ full_action ||:= $in_ib_bits(next_chunk, 11)
+ full_action ||:= "."
+ full_action ||:= $in_ib_bits(next_chunk, 11)
+ }
+ 1: {
+ full_action := "r"
+ full_action ||:= $in_ib_bits(next_chunk, 11)
+ full_action ||:= "<"
+ full_action ||:= $in_ib_bits(next_chunk, 11)
+ full_action ||:= ">"
+ full_action ||:= $in_ib_bits(next_chunk, 8)
+ }
+ 2: {
+ full_action := "a"
+ }
+ }
+
+ return full_action
+
+end
+
+
+#
+# in_ib_bits: like inbits (IPL), but with coexpression for file
+#
+procedure $in_ib_bits(next_chunk, len)
+
+ local i, byte, old_byte_mask
+ static old_byte, old_len, byte_length
+ initial {
+ old_byte := old_len := 0
+ byte_length := 8
+ }
+
+ old_byte_mask := (0 < 2^old_len - 1) | 0
+ old_byte := iand(old_byte, old_byte_mask)
+ i := ishift(old_byte, len-old_len)
+
+ len -:= (len > old_len) | {
+ old_len -:= len
+ return i
+ }
+
+ while byte := @next_chunk do {
+ i := ior(i, ishift(byte, len-byte_length))
+ len -:= (len > byte_length) | {
+ old_len := byte_length-len
+ old_byte := byte
+ return i
+ }
+ }
+
+end
+
+$$endif # COMPRESSED_TABLES
diff --git a/ipl/packs/ibpag2/iohno.icn b/ipl/packs/ibpag2/iohno.icn
new file mode 100644
index 0000000..dcf54d0
--- /dev/null
+++ b/ipl/packs/ibpag2/iohno.icn
@@ -0,0 +1,95 @@
+############################################################################
+#
+# Name: iohno.icn
+#
+# Title: iohno (error handler, with hard-coded messages)
+#
+# Author: Richard L. Goerwitz
+#
+# Version: 1.20
+#
+############################################################################
+#
+# This file contains iohno(n, s) - an error handler taking two
+# arguments: 1) an integer and 2) a string. The string (2) is an
+# optional error message. The integer (1) is one of several
+# hard-coded error numbers (see below).
+#
+############################################################################
+#
+# Links: rewrap
+#
+############################################################################
+
+#
+# iohno: print error message s to stderr; abort with exit status n
+#
+procedure iohno(n, s)
+
+ local i, msg
+ static errlist
+ initial {
+ errlist := [[100, "unspecified failure"],
+
+ [2, "can't find iiparse.lib file"],
+
+ [4, "unexpected EOF"],
+ [5, "unknown associativity value"],
+
+ [11, "malformed right-hand side"],
+ [12, "unexpected RHS symbol type"],
+
+ [21, "malformed left-hand side"],
+
+ [30, "unknown or unimplemented % declaration"],
+ [31, "malformed token declaration"],
+ [32, "start symbol redefined"],
+ [33, "LHS symbol expected"],
+ [34, "colon missing"],
+ [35, "malformed RHS in rule declaration"],
+ [36, "undeclared character literal"],
+ [37, "illegal $integer reference"],
+ [38, "out-of-range $reference"],
+ [39, "unterminated brace { in action"],
+ [43, "bogus precedence"],
+ [44, "superfluous epsilon"],
+ [45, "superfluous %union declaration"],
+ [47, "empty or missing rules section"],
+ [48, "garbled declarations section"],
+ [49, "multiple characters within quotes"],
+
+ [40, "same prec, different (or perhaps lacking) assoc"],
+ [41, "conflict between nonassociative rules"],
+ [42, "reduce -- reduce conflict"],
+ [46, "unresolvable shift/reduce conflict"],
+
+ [50, "illegal conflict for nonassociative rules"],
+ [51, "reduce/reduce conflict"],
+ [52, "nonterminal useless and/or declared as a terminal"],
+
+ [60, "malformed $insertion point in iiparse file"],
+
+ [70, "bad action format"],
+ [71, "nonexistent rule number specified in old action"],
+ [72, "nonexistent rule number specified in new action"],
+
+ [80, "conflict in goto table"],
+
+ [90, "RHS nonterminal appears in no LHS"],
+ [91, "useless nonterminal"]
+ ]
+ }
+
+ /n := 0
+ every i := 1 to *errlist do
+ if errlist[i][1] = n then msg := errlist[i][2]
+ writes(&errout, "error ", n, " (", msg, ")")
+ if \s then {
+ write(&errout, ": ")
+ every write(&errout, "\t", rewrap(s) | rewrap())
+ }
+ else write(&errout)
+
+ exit(n)
+
+end
diff --git a/ipl/packs/ibpag2/itokens.icn b/ipl/packs/ibpag2/itokens.icn
new file mode 100644
index 0000000..1bb9cd1
--- /dev/null
+++ b/ipl/packs/ibpag2/itokens.icn
@@ -0,0 +1,925 @@
+############################################################################
+#
+# Name: itokens.icn
+#
+# Title: itokens (Icon source-file tokenizer)
+#
+# Author: Richard L. Goerwitz
+#
+# Version: 1.11
+#
+############################################################################
+#
+# This file contains itokens() - a utility for breaking Icon source
+# files up into individual tokens. This is the sort of routine one
+# needs to have around when implementing things like pretty printers,
+# preprocessors, code obfuscators, etc. It would also be useful for
+# implementing cut-down implementations of Icon written in Icon - the
+# sort of thing one might use in an interactive tutorial.
+#
+# Itokens(f, x) takes, as its first argument, f, an open file, and
+# suspends successive TOK records. TOK records contain two fields.
+# The first field, sym, contains a string that represents the name of
+# the next token (e.g. "CSET", "STRING", etc.). The second field,
+# str, gives that token's literal value. E.g. the TOK for a literal
+# semicolon is TOK("SEMICOL", ";"). For a mandatory newline, itokens
+# would suspend TOK("SEMICOL", "\n").
+#
+# Unlike Icon's own tokenizer, itokens() does not return an EOFX
+# token on end-of-file, but rather simply fails. It also can be
+# instructed to return syntactically meaningless newlines by passing
+# it a nonnull second argument (e.g. itokens(infile, 1)). These
+# meaningless newlines are returned as TOK records with a null sym
+# field (i.e. TOK(&null, "\n")).
+#
+# NOTE WELL: If new reserved words or operators are added to a given
+# implementation, the tables below will have to be altered. Note
+# also that &keywords should be implemented on the syntactic level -
+# not on the lexical one. As a result, a keyword like &features will
+# be suspended as TOK("CONJUNC", "&") and TOK("IDENT", "features").
+#
+############################################################################
+#
+# Links: slshupto
+#
+# Requires: coexpressions
+#
+############################################################################
+
+#link ximage, slshupto
+link slshupto #make sure you have version 1.2 or above
+
+global next_c, line_number
+record TOK(sym, str)
+
+#
+# main: an Icon source code uglifier
+#
+# Stub main for testing; uncomment & compile. The resulting
+# executable will act as an Icon file compressor, taking the
+# standard input and outputting Icon code stripped of all
+# unnecessary whitespace. Guaranteed to make the code a visual
+# mess :-).
+#
+#procedure main()
+#
+# local separator, T
+# separator := ""
+# every T := itokens(&input) do {
+# if any(&digits ++ &letters ++ '_.', \T.str, 1, 2) & \T.sym ~== "DOT"
+# then writes(separator)
+# if T.sym == "SEMICOL" then writes(";") else writes(T.str)
+# if any(&digits ++ &letters ++ '_.', \T.str, -1, 0) & \T.sym ~== "DOT"
+# then separator := " " else separator := ""
+# }
+#
+#end
+
+
+#
+# itokens: file x anything -> TOK records (a generator)
+# (stream, nostrip) -> Rs
+#
+# Where stream is an open file, anything is any object (it only
+# matters whether it is null or not), and Rs are TOK records.
+# Note that itokens strips out useless newlines. If the second
+# argument is nonnull, itokens does not strip out superfluous
+# newlines. It may be useful to keep them when the original line
+# structure of the input file must be maintained.
+#
+procedure itokens(stream, nostrip)
+
+ local T, last_token
+
+ # initialize to some meaningless value
+ last_token := TOK()
+
+ every T := \iparse_tokens(stream) do {
+ if \T.sym then {
+ if T.sym == "EOFX" then fail
+ else {
+ #
+ # If the last token was a semicolon, then interpret
+ # all ambiguously unary/binary sequences like "**" as
+ # beginners (** could be two unary stars or the [c]set
+ # intersection operator).
+ #
+ if \last_token.sym == "SEMICOL"
+ then suspend last_token := expand_fake_beginner(T)
+ else suspend last_token := T
+ }
+ } else {
+ if \nostrip
+ then suspend last_token := T
+ }
+ }
+
+end
+
+
+#
+# expand_fake_beginner: TOK record -> TOK records
+#
+# Some "beginner" tokens aren't really beginners. They are token
+# sequences that could be either a single binary operator or a
+# series of unary operators. The tokenizer's job is just to snap
+# up as many characters as could logically constitute an operator.
+# Here is where we decide whether to break the sequence up into
+# more than one op or not.
+#
+procedure expand_fake_beginner(next_token)
+
+ static exptbl
+ initial {
+ exptbl := table()
+ insert(exptbl, "CONCAT", [TOK("BAR", "|"), TOK("BAR", "|")])
+ insert(exptbl, "DIFF", [TOK("MINUS", "-"), TOK("MINUS", "-")])
+ insert(exptbl, "EQUIV", [TOK("NUMEQ", "="), TOK("NUMEQ", "="),
+ TOK("NUMEQ", "=")])
+ insert(exptbl, "INTER", [TOK("STAR", "*"), TOK("STAR", "*")])
+ insert(exptbl, "LCONCAT", [TOK("BAR", "|"), TOK("BAR", "|"),
+ TOK("BAR", "|")])
+ insert(exptbl, "LEXEQ", [TOK("NUMEQ", "="), TOK("NUMEQ", "=")])
+ insert(exptbl, "LEXNE", [TOK("TILDE", "~"), TOK("NUMEQ", "="),
+ TOK("NUMEQ", "=")])
+ insert(exptbl, "NOTEQUIV",[TOK("TILDE", "~"), TOK("NUMEQ","="),
+ TOK("NUMEQ", "="), TOK("NUMEQ", "=")])
+ insert(exptbl, "NUMNE", [TOK("TILDE", "~"), TOK("NUMEQ","=")])
+ insert(exptbl, "UNION", [TOK("PLUS", "+"), TOK("PLUS", "+")])
+ }
+
+ if \exptbl[next_token.sym]
+ then suspend !exptbl[next_token.sym]
+ else return next_token
+
+end
+
+
+#
+# iparse_tokens: file -> TOK records (a generator)
+# (stream) -> tokens
+#
+# Where file is an open input stream, and tokens are TOK records
+# holding both the token type and actual token text.
+#
+# TOK records contain two parts, a preterminal symbol (the first
+# "sym" field), and the actual text of the token ("str"). The
+# parser only pays attention to the sym field, although the
+# strings themselves get pushed onto the value stack.
+#
+# Note the following kludge: Unlike real Icon tokenizers, this
+# procedure returns syntactially meaningless newlines as TOK
+# records with a null sym field. Normally they would be ignored.
+# I wanted to return them so they could be printed on the output
+# stream, thus preserving the line structure of the original
+# file, and making later diagnostic messages more usable.
+#
+procedure iparse_tokens(stream, getchar)
+
+ local elem, whitespace, token, last_token, primitives, reserveds
+ static be_tbl, reserved_tbl, operators
+ initial {
+
+ # Primitive Tokens
+ #
+ primitives := [
+ ["identifier", "IDENT", "be"],
+ ["integer-literal", "INTLIT", "be"],
+ ["real-literal", "REALLIT", "be"],
+ ["string-literal", "STRINGLIT", "be"],
+ ["cset-literal", "CSETLIT", "be"],
+ ["end-of-file", "EOFX", "" ]]
+
+ # Reserved Words
+ #
+ reserveds := [
+ ["break", "BREAK", "be"],
+ ["by", "BY", "" ],
+ ["case", "CASE", "b" ],
+ ["create", "CREATE", "b" ],
+ ["default", "DEFAULT", "b" ],
+ ["do", "DO", "" ],
+ ["else", "ELSE", "" ],
+ ["end", "END", "b" ],
+ ["every", "EVERY", "b" ],
+ ["fail", "FAIL", "be"],
+ ["global", "GLOBAL", "" ],
+ ["if", "IF", "b" ],
+ ["initial", "INITIAL", "b" ],
+ ["invocable", "INVOCABLE", "" ],
+ ["link", "LINK", "" ],
+ ["local", "LOCAL", "b" ],
+ ["next", "NEXT", "be"],
+ ["not", "NOT", "b" ],
+ ["of", "OF", "" ],
+ ["procedure", "PROCEDURE", "" ],
+ ["record", "RECORD", "" ],
+ ["repeat", "REPEAT", "b" ],
+ ["return", "RETURN", "be"],
+ ["static", "STATIC", "b" ],
+ ["suspend", "SUSPEND", "be"],
+ ["then", "THEN", "" ],
+ ["to", "TO", "" ],
+ ["until", "UNTIL", "b" ],
+ ["while", "WHILE", "b" ]]
+
+ # Operators
+ #
+ operators := [
+ [":=", "ASSIGN", "" ],
+ ["@", "AT", "b" ],
+ ["@:=", "AUGACT", "" ],
+ ["&:=", "AUGAND", "" ],
+ ["=:=", "AUGEQ", "" ],
+ ["===:=", "AUGEQV", "" ],
+ [">=:=", "AUGGE", "" ],
+ [">:=", "AUGGT", "" ],
+ ["<=:=", "AUGLE", "" ],
+ ["<:=", "AUGLT", "" ],
+ ["~=:=", "AUGNE", "" ],
+ ["~===:=", "AUGNEQV", "" ],
+ ["==:=", "AUGSEQ", "" ],
+ [">>=:=", "AUGSGE", "" ],
+ [">>:=", "AUGSGT", "" ],
+ ["<<=:=", "AUGSLE", "" ],
+ ["<<:=", "AUGSLT", "" ],
+ ["~==:=", "AUGSNE", "" ],
+ ["\\", "BACKSLASH", "b" ],
+ ["!", "BANG", "b" ],
+ ["|", "BAR", "b" ],
+ ["^", "CARET", "b" ],
+ ["^:=", "CARETASGN", "b" ],
+ [":", "COLON", "" ],
+ [",", "COMMA", "" ],
+ ["||", "CONCAT", "b" ],
+ ["||:=", "CONCATASGN","" ],
+ ["&", "CONJUNC", "b" ],
+ [".", "DOT", "b" ],
+ ["--", "DIFF", "b" ],
+ ["--:=", "DIFFASGN", "" ],
+ ["===", "EQUIV", "b" ],
+ ["**", "INTER", "b" ],
+ ["**:=", "INTERASGN", "" ],
+ ["{", "LBRACE", "b" ],
+ ["[", "LBRACK", "b" ],
+ ["|||", "LCONCAT", "b" ],
+ ["|||:=", "LCONCATASGN","" ],
+ ["==", "LEXEQ", "b" ],
+ [">>=", "LEXGE", "" ],
+ [">>", "LEXGT", "" ],
+ ["<<=", "LEXLE", "" ],
+ ["<<", "LEXLT", "" ],
+ ["~==", "LEXNE", "b" ],
+ ["(", "LPAREN", "b" ],
+ ["-:", "MCOLON", "" ],
+ ["-", "MINUS", "b" ],
+ ["-:=", "MINUSASGN", "" ],
+ ["%", "MOD", "" ],
+ ["%:=", "MODASGN", "" ],
+ ["~===", "NOTEQUIV", "b" ],
+ ["=", "NUMEQ", "b" ],
+ [">=", "NUMGE", "" ],
+ [">", "NUMGT", "" ],
+ ["<=", "NUMLE", "" ],
+ ["<", "NUMLT", "" ],
+ ["~=", "NUMNE", "b" ],
+ ["+:", "PCOLON", "" ],
+ ["+", "PLUS", "b" ],
+ ["+:=", "PLUSASGN", "" ],
+ ["?", "QMARK", "b" ],
+ ["<-", "REVASSIGN", "" ],
+ ["<->", "REVSWAP", "" ],
+ ["}", "RBRACE", "e" ],
+ ["]", "RBRACK", "e" ],
+ [")", "RPAREN", "e" ],
+ [";", "SEMICOL", "" ],
+ ["?:=", "SCANASGN", "" ],
+ ["/", "SLASH", "b" ],
+ ["/:=", "SLASHASGN", "" ],
+ ["*", "STAR", "b" ],
+ ["*:=", "STARASGN", "" ],
+ [":=:", "SWAP", "" ],
+ ["~", "TILDE", "b" ],
+ ["++", "UNION", "b" ],
+ ["++:=", "UNIONASGN", "" ],
+ ["$(", "LBRACE", "b" ],
+ ["$)", "RBRACE", "e" ],
+ ["$<", "LBRACK", "b" ],
+ ["$>", "RBRACK", "e" ],
+ ["$", "RHSARG", "b" ],
+ ["%$(", "BEGGLOB", "b" ],
+ ["%$)", "ENDGLOB", "e" ],
+ ["%{", "BEGGLOB", "b" ],
+ ["%}", "ENDGLOB", "e" ],
+ ["%%", "NEWSECT", "be"]]
+
+ # static be_tbl, reserved_tbl
+ reserved_tbl := table()
+ every elem := !reserveds do
+ insert(reserved_tbl, elem[1], elem[2])
+ be_tbl := table()
+ every elem := !primitives | !reserveds | !operators do {
+ insert(be_tbl, elem[2], elem[3])
+ }
+ }
+
+ /getchar := create {
+ line_number := 0
+ ! ( 1(!stream, line_number +:=1) || "\n" )
+ }
+ whitespace := ' \t'
+ /next_c := @getchar | {
+ if \stream then
+ return TOK("EOFX")
+ else fail
+ }
+
+ repeat {
+ case next_c of {
+
+ "." : {
+ # Could be a real literal *or* a dot operator. Check
+ # following character to see if it's a digit. If so,
+ # it's a real literal. We can only get away with
+ # doing the dot here because it is not a substring of
+ # any longer identifier. If this gets changed, we'll
+ # have to move this code into do_operator().
+ #
+ last_token := do_dot(getchar)
+ suspend last_token
+# write(&errout, "next_c == ", image(next_c))
+ next
+ }
+
+ "\n" : {
+ # If do_newline fails, it means we're at the end of
+ # the input stream, and we should break out of the
+ # repeat loop.
+ #
+ every last_token := do_newline(getchar, last_token, be_tbl)
+ do suspend last_token
+ if next_c === &null then break
+ next
+ }
+
+ "\#" : {
+ # Just a comment. Strip it by reading every character
+ # up to the next newline. The global var next_c
+ # should *always* == "\n" when this is done.
+ #
+ do_number_sign(getchar)
+# write(&errout, "next_c == ", image(next_c))
+ next
+ }
+
+ "\"" : {
+ # Suspend as STRINGLIT everything from here up to the
+ # next non-backslashed quotation mark, inclusive
+ # (accounting for the _ line-continuation convention).
+ #
+ last_token := do_quotation_mark(getchar)
+ suspend last_token
+# write(&errout, "next_c == ", image(next_c))
+ next
+ }
+
+ "'" : {
+ # Suspend as CSETLIT everything from here up to the
+ # next non-backslashed apostrophe, inclusive.
+ #
+ last_token := do_apostrophe(getchar)
+ suspend last_token
+# write(&errout, "next_c == ", image(next_c))
+ next
+ }
+
+ &null : stop("iparse_tokens (lexer): unexpected EOF")
+
+ default : {
+ # If we get to here, we have either whitespace, an
+ # integer or real literal, an identifier or reserved
+ # word (both get handled by do_identifier), or an
+ # operator. The question of which we have can be
+ # determined by checking the first character.
+ #
+ if any(whitespace, next_c) then {
+ # Like all of the TOK forming procedures,
+ # do_whitespace resets next_c.
+ do_whitespace(getchar, whitespace)
+ # don't suspend any tokens
+ next
+ }
+ if any(&digits, next_c) then {
+ last_token := do_digits(getchar)
+ suspend last_token
+ next
+ }
+ if any(&letters ++ '_', next_c) then {
+ last_token := do_identifier(getchar, reserved_tbl)
+ suspend last_token
+ next
+ }
+# write(&errout, "it's an operator")
+ last_token := do_operator(getchar, operators)
+ suspend last_token
+ next
+ }
+ }
+ }
+
+ # If stream argument is nonnull, then we are in the top-level
+ # iparse_tokens(). If not, then we are in a recursive call, and
+ # we should not emit all this end-of-file crap.
+ #
+ if \stream then {
+ return TOK("EOFX")
+ }
+ else fail
+
+end
+
+
+#
+# do_dot: coexpression -> TOK record
+# getchar -> t
+#
+# Where getchar is the coexpression that produces the next
+# character from the input stream and t is a token record whose
+# sym field contains either "REALLIT" or "DOT". Essentially,
+# do_dot checks the next char on the input stream to see if it's
+# an integer. Since the preceding char was a dot, an integer
+# tips us off that we have a real literal. Otherwise, it's just
+# a dot operator. Note that do_dot resets next_c for the next
+# cycle through the main case loop in the calling procedure.
+#
+procedure do_dot(getchar)
+
+ local token
+ # global next_c
+
+# write(&errout, "it's a dot")
+
+ # If dot's followed by a digit, then we have a real literal.
+ #
+ if any(&digits, next_c := @getchar) then {
+# write(&errout, "dot -> it's a real literal")
+ token := "." || next_c
+ while any(&digits, next_c := @getchar) do
+ token ||:= next_c
+ if token ||:= (next_c == ("e"|"E")) then {
+ while (next_c := @getchar) == "0"
+ while any(&digits, next_c) do {
+ token ||:= next_c
+ next_c = @getchar
+ }
+ }
+ return TOK("REALLIT", token)
+ }
+
+ # Dot not followed by an integer; so we just have a dot operator,
+ # and not a real literal.
+ #
+# write(&errout, "dot -> just a plain dot")
+ return TOK("DOT", ".")
+
+end
+
+
+#
+# do_newline: coexpression x TOK record x table -> TOK records
+# (getchar, last_token, be_tbl) -> Ts (a generator)
+#
+# Where getchar is the coexpression that returns the next
+# character from the input stream, last_token is the last TOK
+# record suspended by the calling procedure, be_tbl is a table of
+# tokens and their "beginner/ender" status, and Ts are TOK
+# records. Note that do_newline resets next_c. Do_newline is a
+# mess. What it does is check the last token suspended by the
+# calling procedure to see if it was a beginner or ender. It
+# then gets the next token by calling iparse_tokens again. If
+# the next token is a beginner and the last token is an ender,
+# then we have to suspend a SEMICOL token. In either event, both
+# the last and next token are suspended.
+#
+procedure do_newline(getchar, last_token, be_tbl)
+
+ local next_token
+ # global next_c
+
+# write(&errout, "it's a newline")
+
+ # Go past any additional newlines.
+ #
+ while next_c == "\n" do {
+ # NL can be the last char in the getchar stream; if it *is*,
+ # then signal that it's time to break out of the repeat loop
+ # in the calling procedure.
+ #
+ next_c := @getchar | {
+ next_c := &null
+ fail
+ }
+ suspend TOK(&null, next_c == "\n")
+ }
+
+ # If there was a last token (i.e. if a newline wasn't the first
+ # character of significance in the input stream), then check to
+ # see if it was an ender. If so, then check to see if the next
+ # token is a beginner. If so, then suspend a TOK("SEMICOL")
+ # record before suspending the next token.
+ #
+ if find("e", be_tbl[(\last_token).sym]) then {
+# write(&errout, "calling iparse_tokens via do_newline")
+# &trace := -1
+ # First arg to iparse_tokens can be null here.
+ \ (next_token := iparse_tokens(&null, getchar)).sym
+ if \next_token then {
+# write(&errout, "call of iparse_tokens via do_newline yields ",
+# ximage(next_token))
+ if find("b", be_tbl[next_token.sym])
+ then suspend TOK("SEMICOL", "\n")
+ #
+ # See below. If this were like the real Icon parser,
+ # the following line would be commented out.
+ #
+ else suspend TOK(&null, "\n")
+ return next_token
+ }
+ else {
+ #
+ # If this were a *real* Icon tokenizer, it would not emit
+ # any record here, but would simply fail. Instead, we'll
+ # emit a dummy record with a null sym field.
+ #
+ return TOK(&null, "\n")
+# &trace := 0
+# fail
+ }
+ }
+
+ # See above. Again, if this were like Icon's own tokenizer, we
+ # would just fail here, and not return any TOK record.
+ #
+# &trace := 0
+ return TOK(&null, "\n")
+# fail
+
+end
+
+
+#
+# do_number_sign: coexpression -> &null
+# getchar ->
+#
+# Where getchar is the coexpression that pops characters off the
+# main input stream. Sets the global variable next_c. This
+# procedure simply reads characters until it gets a newline, then
+# returns with next_c == "\n". Since the starting character was
+# a number sign, this has the effect of stripping comments.
+#
+procedure do_number_sign(getchar)
+
+ # global next_c
+
+# write(&errout, "it's a number sign")
+ while next_c ~== "\n" do {
+ next_c := @getchar
+ }
+
+ # Return to calling procedure to cycle around again with the new
+ # next_c already set. Next_c should always be "\n" at this point.
+ return
+
+end
+
+
+#
+# do_quotation_mark: coexpression -> TOK record
+# getchar -> t
+#
+# Where getchar is the coexpression that yields another character
+# from the input stream, and t is a TOK record with "STRINGLIT"
+# as its sym field. Puts everything upto and including the next
+# non-backslashed quotation mark into the str field. Handles the
+# underscore continuation convention.
+#
+procedure do_quotation_mark(getchar)
+
+ local token
+ # global next_c
+
+ # write(&errout, "it's a string literal")
+ token := "\""
+ next_c := @getchar
+ repeat {
+ if next_c == "\n" & token[-1] == "_" then {
+ token := token[1:-1]
+ while any('\t ', next_c := @getchar)
+ next
+ } else {
+ if slshupto('"', token ||:= next_c, 2)
+ then {
+ next_c := @getchar
+ # resume outermost (repeat) loop in calling procedure,
+ # with the new (here explicitly set) next_c
+ return TOK("STRINGLIT", token)
+ }
+ next_c := @getchar
+ }
+ }
+
+end
+
+
+#
+# do_apostrophe: coexpression -> TOK record
+# getchar -> t
+#
+# Where getchar is the coexpression that yields another character
+# from the input stream, and t is a TOK record with "CSETLIT"
+# as its sym field. Puts everything upto and including the next
+# non-backslashed apostrope into the str field.
+#
+procedure do_apostrophe(getchar)
+
+ local token
+ # global next_c
+
+# write(&errout, "it's a cset literal")
+ token := "'"
+ next_c := @getchar
+ repeat {
+ if next_c == "\n" & token[-1] == "_" then {
+ token := token[1:-1]
+ while any('\t ', next_c := @getchar)
+ next
+ } else {
+ if slshupto("'", token ||:= next_c, 2)
+ then {
+ next_c := @getchar
+ # Return & resume outermost containing loop in calling
+ # procedure w/ new next_c.
+ return TOK("CSETLIT", token)
+ }
+ next_c := @getchar
+ }
+ }
+
+end
+
+
+#
+# do_digits: coexpression -> TOK record
+# getchar -> t
+#
+# Where getchar is the coexpression that produces the next char
+# on the input stream, and where t is a TOK record containing
+# either "REALLIT" or "INTLIT" in its sym field, and the text of
+# the numeric literal in its str field.
+#
+procedure do_digits(getchar)
+
+ local token, tok_record, extras, digits, over
+ # global next_c
+
+ # For bases > 16
+ extras := "AaBbCcDdEeFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvWwXxYyZz"
+ # Assume integer literal until proven otherwise....
+ tok_record := TOK("INTLIT")
+
+# write(&errout, "it's an integer or real literal")
+ token := ("0" ~== next_c) | ""
+ while any(&digits, next_c := @getchar) do
+ token ||:= next_c
+ if token ||:= (next_c == ("R"|"r")) then {
+ digits := &digits
+ if over := ((10 < token[1:-1]) - 10) * 2 then
+ digits ++:= extras[1:over+1] | extras
+ next_c := @getchar
+ if next_c == "-" then {
+ token ||:= next_c
+ next_c := @getchar
+ }
+ while any(digits, next_c) do {
+ token ||:= next_c
+ next_c := @getchar
+ }
+ } else {
+ if token ||:= (next_c == ".") then {
+ while any(&digits, next_c := @getchar) do
+ token ||:= next_c
+ tok_record := TOK("REALLIT")
+ }
+ if token ||:= (next_c == ("e"|"E")) then {
+ next_c := @getchar
+ if next_c == "-" then {
+ token ||:= next_c
+ next_c := @getchar
+ }
+ while any(&digits, next_c) do {
+ token ||:= next_c
+ next_c := @getchar
+ }
+ tok_record := TOK("REALLIT")
+ }
+ }
+ tok_record.str := ("" ~== token) | "0"
+ return tok_record
+
+end
+
+
+#
+# do_whitespace: coexpression x cset -> &null
+# getchar x whitespace -> &null
+#
+# Where getchar is the coexpression producing the next char on
+# the input stream. Do_whitespace just repeats until it finds a
+# non-whitespace character, whitespace being defined as
+# membership of a given character in the whitespace argument (a
+# cset).
+#
+procedure do_whitespace(getchar, whitespace)
+
+# write(&errout, "it's junk")
+ while any(whitespace, next_c) do
+ next_c := @getchar
+ return
+
+end
+
+
+#
+# do_identifier: coexpression x table -> TOK record
+# (getchar, reserved_tbl) -> t
+#
+# Where getchar is the coexpression that pops off characters from
+# the input stream, reserved_tbl is a table of reserved words
+# (keys = the string values, values = the names qua symbols in
+# the grammar), and t is a TOK record containing all subsequent
+# letters, digits, or underscores after next_c (which must be a
+# letter or underscore). Note that next_c is global and gets
+# reset by do_identifier.
+#
+procedure do_identifier(getchar, reserved_tbl)
+
+ local token
+ # global next_c
+
+# write(&errout, "it's an indentifier")
+ token := next_c
+ while any(&letters ++ &digits ++ '_', next_c := @getchar)
+ do token ||:= next_c
+ return TOK(\reserved_tbl[token], token) | TOK("IDENT", token)
+
+end
+
+
+#
+# do_operator: coexpression x list -> TOK record
+# (getchar, operators) -> t
+#
+# Where getchar is the coexpression that produces the next
+# character on the input stream, operators is the operator list,
+# and where t is a TOK record describing the operator just
+# scanned. Calls recognop, which creates a DFSA to recognize
+# valid Icon operators. Arg2 (operators) is the list of lists
+# containing valid Icon operator string values and names (see
+# above).
+#
+procedure do_operator(getchar, operators)
+
+ local token, elem
+
+ token := next_c
+
+ # Go until recognop fails.
+ while elem := recognop(operators, token, 1) do
+ token ||:= (next_c := @getchar)
+# write(&errout, ximage(elem))
+ if *\elem = 1 then
+ return TOK(elem[1][2], elem[1][1])
+ else fail
+
+end
+
+
+record dfstn_state(b, e, tbl)
+record start_state(b, e, tbl, master_list)
+#
+# recognop: list x string x integer -> list
+# (l, s, i) -> l2
+#
+# Where l is the list of lists created by the calling procedure
+# (each element contains a token string value, name, and
+# beginner/ender string), where s is a string possibly
+# corresponding to a token in the list, where i is the position in
+# the elements of l where the operator string values are recorded,
+# and where l2 is a list of elements from l that contain operators
+# for which string s is an exact match. Fails if there are no
+# operators that s is a prefix of, but returns an empty list if
+# there just aren't any that happen to match exactly.
+#
+# What this does is let the calling procedure just keep adding
+# characters to s until recognop fails, then check the last list
+# it returned to see if it is of length 1. If it is, then it
+# contains list with the vital stats for the operator last
+# recognized. If it is of length 0, then string s did not
+# contain any recognizable operator.
+#
+procedure recognop(l, s, i)
+
+ local current_state, master_list, c, result, j
+ static dfstn_table
+ initial dfstn_table := table()
+
+ /i := 1
+ # See if we've created an automaton for l already.
+ /dfstn_table[l] := start_state(1, *l, &null, &null) & {
+ dfstn_table[l].master_list := sortf(l, i)
+ }
+
+ current_state := dfstn_table[l]
+ # Save master_list, as current_state will change later on.
+ master_list := current_state.master_list
+
+ s ? {
+ while c := move(1) do {
+
+ # Null means that this part of the automaton isn't
+ # complete.
+ #
+ if /current_state.tbl then
+ create_arcs(master_list, i, current_state, &pos)
+
+ # If the table has been clobbered, then there are no arcs
+ # leading out of the current state. Fail.
+ #
+ if current_state.tbl === 0 then
+ fail
+
+# write(&errout, "c = ", image(c))
+# write(&errout, "table for current state = ",
+# ximage(current_state.tbl))
+
+ # If we get to here, the current state has arcs leading
+ # out of it. See if c is one of them. If so, make the
+ # node to which arc c is connected the current state.
+ # Otherwise fail.
+ #
+ current_state := \current_state.tbl[c] | fail
+ }
+ }
+
+ # Return possible completions.
+ #
+ result := list()
+ every j := current_state.b to current_state.e do {
+ if *master_list[j][i] = *s then
+ put(result, master_list[j])
+ }
+ # return empty list if nothing the right length is found
+ return result
+
+end
+
+
+#
+# create_arcs: fill out a table of arcs leading out of the current
+# state, and place that table in the tbl field for
+# current_state
+#
+procedure create_arcs(master_list, field, current_state, POS)
+
+ local elem, i, first_char, old_first_char
+
+ current_state.tbl := table()
+ old_first_char := ""
+
+ every elem := master_list[i := current_state.b to current_state.e][field]
+ do {
+
+ # Get the first character for the current position (note that
+ # we're one character behind the calling routine; hence
+ # POS-1).
+ #
+ first_char := elem[POS-1] | next
+
+ # If we have a new first character, create a new arc out of
+ # the current state.
+ #
+ if first_char ~== old_first_char then {
+ # Store the start position for the current character.
+ current_state.tbl[first_char] := dfstn_state(i)
+ # Store the end position for the old character.
+ (\current_state.tbl[old_first_char]).e := i-1
+ old_first_char := first_char
+ }
+ }
+ (\current_state.tbl[old_first_char]).e := i
+
+ # Clobber table with 0 if no arcs were added.
+ current_state.tbl := (*current_state.tbl = 0)
+ return current_state
+
+end
diff --git a/ipl/packs/ibpag2/outbits.icn b/ipl/packs/ibpag2/outbits.icn
new file mode 100644
index 0000000..cf3f597
--- /dev/null
+++ b/ipl/packs/ibpag2/outbits.icn
@@ -0,0 +1,100 @@
+############################################################################
+#
+# Name: outbits.icn
+#
+# Title: output variable-length characters in byte-size chunks
+#
+# Author: Richard L. Goerwitz
+#
+# Version: 1.5
+#
+############################################################################
+#
+# In any number of instances (e.g. when outputting variable-length
+# characters or fixed-length encoded strings), the programmer must
+# fit variable and/or non-byte-sized blocks into standard 8-bit
+# bytes. Outbits() performs this task.
+#
+# Pass to outbits(i, len) an integer i, and a length parameter (len),
+# and outbits will suspend byte-sized chunks of i converted to
+# characters (most significant bits first) until there is not enough
+# left of i to fill up an 8-bit character. The remaining portion is
+# stored in a buffer until outbits() is called again, at which point
+# the buffer is combined with the new i and then output in the same
+# manner as before. The buffer is flushed by calling outbits() with
+# a null i argument. Note that len gives the number of bits there
+# are in i (or at least the number of bits you want preserved; those
+# that are discarded are the most significant ones).
+#
+# A trivial example of how outbits() might be used:
+#
+# outtext := open("some.file.name","w")
+# l := [1,2,3,4]
+# every writes(outtext, outbits(!l,3))
+# writes(outtext, outbits(&null,3)) # flush buffer
+#
+# List l may be reconstructed with inbits() (see inbits.icn):
+#
+# intext := open("some.file.name")
+# l := []
+# while put(l, inbits(intext, 3))
+#
+# Note that outbits() is a generator, while inbits() is not.
+#
+############################################################################
+#
+# Links: none
+# See also: inbits.icn
+#
+############################################################################
+
+
+procedure outbits(i, len)
+
+ local old_part, new_part, window, old_byte_mask
+ static old_i, old_len, byte_length, byte_mask
+ initial {
+ old_i := old_len := 0
+ byte_length := 8
+ byte_mask := (2^byte_length)-1
+ }
+
+ old_byte_mask := (0 < 2^old_len - 1) | 0
+ window := byte_length - old_len
+ old_part := ishift(iand(old_i, old_byte_mask), window)
+
+ # If we have a no-arg invocation, then flush buffer (old_i).
+ if /i then {
+ if old_len > 0 then {
+ old_i := old_len := 0
+ return char(old_part)
+ } else {
+ old_i := old_len := 0
+ fail
+ }
+ } else {
+ new_part := ishift(i, window-len)
+ len -:= (len >= window) | {
+ old_len +:= len
+ old_i := ior(ishift(old_part, len-window), i)
+ fail
+ }
+# For debugging purposes.
+# write("old_byte_mask = ", old_byte_mask)
+# write("window = ", image(window))
+# write("old_part = ", image(old_part))
+# write("new_part = ", image(new_part))
+# write("outputting ", image(ior(old_part, new_part)))
+ suspend char(ior(old_part, new_part))
+ }
+
+ until len < byte_length do {
+ suspend char(iand(ishift(i, byte_length-len), byte_mask))
+ len -:= byte_length
+ }
+
+ old_len := len
+ old_i := i
+ fail
+
+end
diff --git a/ipl/packs/ibpag2/rewrap.icn b/ipl/packs/ibpag2/rewrap.icn
new file mode 100644
index 0000000..9ceff0c
--- /dev/null
+++ b/ipl/packs/ibpag2/rewrap.icn
@@ -0,0 +1,144 @@
+############################################################################
+#
+# Name: rewrap.icn
+#
+# Title: advanced line rewrap utility
+#
+# Author: Richard L. Goerwitz
+#
+# Version: 1.4
+#
+############################################################################
+#
+# The procedure rewrap(s,i), included in this file, reformats text
+# fed to it into strings < i in length. Rewrap utilizes a static
+# buffer, so it can be called repeatedly with different s arguments,
+# and still produce homogenous output. This buffer is flushed by
+# calling rewrap with a null first argument. The default for
+# argument 2 (i) is 70.
+#
+# Here's a simple example of how rewrap could be used. The following
+# program reads the standard input, producing fully rewrapped output.
+#
+# procedure main()
+# every write(rewrap(!&input))
+# write(rewrap())
+# end
+#
+# Naturally, in practice you would want to do things like check for in-
+# dentation or blank lines in order to wrap only on a paragraph-by para-
+# graph basis, as in
+#
+# procedure main()
+# while line := read(&input) do {
+# if line == "" then {
+# write("" ~== rewrap())
+# write(line)
+# } else {
+# if match("\t", line) then {
+# write(rewrap())
+# write(rewrap(line))
+# } else {
+# write(rewrap(line))
+# }
+# }
+# }
+# end
+#
+# Fill-prefixes can be implemented simply by prepending them to the
+# output of rewrap:
+#
+# i := 70; fill_prefix := " > "
+# while line := read(input_file) do {
+# line ?:= (f_bit := tab(many('> ')) | "", tab(0))
+# write(fill_prefix || f_bit || rewrap(line, i - *fill_prefix))
+# etc.
+#
+# Obviously, these examples are fairly simplistic. Putting them to
+# actual use would certainly require a few environment-specific
+# modifications and/or extensions. Still, I hope they offer some
+# indication of the kinds of applications rewrap might be used in.
+#
+# Note: If you want leading and trailing tabs removed, map them to
+# spaces first. Rewrap only fools with spaces, leaving tabs intact.
+# This can be changed easily enough, by running its input through the
+# Icon detab() function.
+#
+############################################################################
+#
+# See also: wrap.icn
+#
+############################################################################
+
+
+procedure rewrap(s,i)
+
+ local extra_bit, line
+ static old_line
+ initial old_line := ""
+
+ # Default column to wrap on is 70.
+ /i := 70
+ # Flush buffer on null first argument.
+ if /s then {
+ extra_bit := old_line
+ old_line := ""
+ return "" ~== extra_bit
+ }
+
+ # Prepend to s anything that is in the buffer (leftovers from the last s).
+ s ?:= { tab(many(' ')); old_line || trim(tab(0)) }
+
+ # If the line isn't long enough, just add everything to old_line.
+ if *s < i then old_line := s || " " & fail
+
+ s ? {
+
+ # While it is possible to find places to break s, do so.
+ while any(' -',line := EndToFront(i),-1) do {
+ # Clean up and suspend the last piece of s tabbed over.
+ line ?:= (tab(many(' ')), trim(tab(0)))
+ if *&subject - &pos + *line > i
+ then suspend line
+ else {
+ old_line := ""
+ return line || tab(0)
+ }
+ }
+
+ # Keep the extra section of s in a buffer.
+ old_line := tab(0)
+
+ # If the reason the remaining section of s was unrewrapable was
+ # that it was too long, and couldn't be broken up, then just return
+ # the thing as-is.
+ if *old_line > i then {
+ old_line ? {
+ if extra_bit := tab(upto(' -')+1) || (tab(many(' ')) | "")
+ then old_line := tab(0)
+ else extra_bit := old_line & old_line := ""
+ return trim(extra_bit)
+ }
+ }
+ # Otherwise, clean up the buffer for prepending to the next s.
+ else {
+ # If old_line is blank, then don't mess with it. Otherwise,
+ # add whatever is needed in order to link it with the next s.
+ if old_line ~== "" then {
+ # If old_line ends in a dash, then there's no need to add a
+ # space to it.
+ if old_line[-1] ~== "-"
+ then old_line ||:= " "
+ }
+ }
+ }
+
+end
+
+
+
+procedure EndToFront(i)
+ # Goes with rewrap(s,i)
+ *&subject+1 - &pos >= i | fail
+ suspend &subject[.&pos:&pos <- &pos+i to &pos by -1]
+end
diff --git a/ipl/packs/ibpag2/sample.ibp b/ipl/packs/ibpag2/sample.ibp
new file mode 100644
index 0000000..ab8358f
--- /dev/null
+++ b/ipl/packs/ibpag2/sample.ibp
@@ -0,0 +1,111 @@
+#
+# Sample Ibpag2 grammar file.
+#
+
+#
+# The code between %{ and %} gets copied directly. Note the Iconish
+# comment syntax.
+#
+%{
+
+# Note: If IIDEBUG is defined in the output file, debugging messages
+# about the stacks and actions get displayed.
+#
+$define IIDEBUG 1
+
+%}
+
+#
+# Here we declare the tokens returned by the lexical analyzer.
+# Precedences increase as we go on. Note how (unlike YACC), tokens
+# are separated by commas. Note also how UMINUS is used only for its
+# %prec later.
+#
+%token NUMBER
+%left '+', '-'
+%left '*', '/'
+%right UMINUS
+
+%%
+
+#
+# After this point, and up to the next %%, we have the grammar itself.
+# By default, the start symbol is the left-hand side of the first
+# rule.
+#
+
+lines : lines, expr, '\n' { write($2) }
+ | lines, '\n'
+ | epsilon # Note use of epsilon/error tokens.
+ | error, '\n' {
+ write("syntax error; try again:")
+ # like YACC's yyerrok macro
+ iierrok
+ }
+ ;
+
+expr : expr, '+', expr { return $1 + $3 }
+ | expr, '-', expr { return $1 - $3 }
+ | expr, '*', expr { return $1 * $3 }
+ | expr, '/', expr { return $1 / $3 }
+ | '(', expr, ')' { return $2 }
+ | '-', expr %prec UMINUS { return -$2 }
+ | NUMBER { return $1 }
+ ;
+
+%%
+
+#
+# From here on, code gets copied directly to the output file. We are
+# no longer in the grammar proper.
+#
+
+#
+# The lexical analyzer must be called iilex, with the module name
+# appended (if there is one). It must take one argument, infile (an
+# input stream). It must be a generator, and fail on EOF (not return
+# something <= 0, as is the case for YACC + Lex). Iilval holds the
+# literal string value of the token just suspended by iilex().
+#
+procedure iilex(infile)
+
+ local nextchar, c, num
+ initial {
+ # Here's where you'd initialize any %{ globals %} declared
+ # above.
+ }
+
+ nextchar := create !(!infile || "\n" || "\n")
+
+ c := @nextchar | fail
+ repeat {
+ if any(&digits, c) then {
+ if not (\num ||:= c) then
+ num := c
+ } else {
+ if iilval := \num then {
+ suspend NUMBER
+ num := &null
+ }
+ if any('+-*/()\n', c) then {
+ iilval := c
+ suspend ord(c)
+ } else {
+ if not any(' \t', c) then {
+ # deliberate error - will be handled later
+ suspend &null
+ }
+ }
+ }
+ c := @nextchar | break
+ }
+ if iilval := \num then {
+ return NUMBER
+ num := &null
+ }
+
+end
+
+procedure main()
+ return iiparse(&input, 1)
+end
diff --git a/ipl/packs/ibpag2/shrnktbl.icn b/ipl/packs/ibpag2/shrnktbl.icn
new file mode 100644
index 0000000..a91ca3d
--- /dev/null
+++ b/ipl/packs/ibpag2/shrnktbl.icn
@@ -0,0 +1,131 @@
+############################################################################
+#
+# Name: shrnktbl.icn
+#
+# Title: table shrinker
+#
+# Author: Richard L. Goerwitz
+#
+# Version: 1.4 (later modified 4-Aug-2000/gmt)
+#
+############################################################################
+#
+# Action/goto table shrinking routine.
+#
+# Entry point: shrink_tables(start_symbol, st, atbl, gtbl), where
+# start_symbol is the start symbol for the grammar whose productions
+# are contained in the list/set st, and where atbl and gtbl are the
+# action and goto tables, respectively. Returns &null, for lack of
+# anything better.
+#
+# Basically, this routine merges duplicate structures in atbl and
+# gtbl (if there are any), replaces the nonterminal symbols in the
+# action table with integers (including the start symbol), then
+# resets the goto table so that its keys point to these integers,
+# instead of to the original nonterminal symbol strings.
+#
+############################################################################
+#
+# Links: equiv, lists, sets, tables, outbits
+#
+############################################################################
+#
+# See also: ibpag2, slrtbls
+#
+############################################################################
+
+# structs has equiv; outbits is for outputting variable-width integers
+# as 8-bit characters
+#
+link equiv
+link lists
+link sets
+link tables
+link outbits
+
+#
+# shrink_tables
+#
+procedure shrink_tables(grammar, atbl, gtbl)
+
+ local t, k, seen, nontermtbl, r, a, action, state, by_rule,
+ rule_len, LHS, keys
+
+ # Create a table mapping nonterminal symbols to integers.
+ nontermtbl := table()
+ every r := !grammar.rules do
+ # r is a production; production records have LHS, RHS,...no
+ # fields, where the no field contains the rule number; we can
+ # use this as an arbitrary representation for that rule's LHS
+ # nonterminal
+ insert(nontermtbl, r.LHS, r.no)
+
+ # Replace old start symbol.
+ grammar.start := nontermtbl[grammar.start]
+
+ # Re-form the goto table to use the new integer values for
+ # nonterminals.
+ keys := set()
+ every insert(keys, key(gtbl))
+ every k := !keys do {
+ # first create a column for the new integer-valued nonterminal
+ insert(gtbl, string(nontermtbl[k]), gtbl[k])
+ # then clobber the old column with a string-valued nonterminal
+ gtbl[k] := &null
+ }
+
+ # Rewrite actions using a fixed field-width format.
+ every t := !atbl do {
+ every k := key(t) do {
+ a := ""
+ t[k] ? {
+ while action := tab(any('sra')) do {
+ case action of {
+ "s": {
+ outbits(0, 2)
+ state := integer(tab(find(".")))
+ every a ||:= outbits(state, 11)
+ move(1)
+ by_rule := integer(tab(many(&digits)))
+ every a ||:= outbits(by_rule, 11)
+ outbits()
+ }
+ "r": {
+ outbits(1, 2)
+ state := integer(tab(find("<")))
+ every a ||:= outbits(state, 11)
+ move(1)
+ LHS := nontermtbl[tab(find(">"))]
+ every a ||:= outbits(LHS, 11)
+ move(1)
+ rule_len := integer(tab(many(&digits)))
+ every a ||:= outbits(rule_len, 8)
+ outbits()
+ }
+ "a": {
+ outbits(2, 2)
+ a ||:= outbits()
+ }
+ }
+ }
+ }
+ t[k] := a
+ }
+ }
+
+ #
+ # Turn pointers to identical structures into pointers to the same
+ # structure.
+ #
+ seen := set()
+ every t := atbl | gtbl do {
+ every k := key(t) do {
+ if t[k] := equiv(t[k], !seen)
+ then next else insert(seen, t[k])
+ }
+ }
+
+ # signal success
+ return &null
+
+end
diff --git a/ipl/packs/ibpag2/slritems.icn b/ipl/packs/ibpag2/slritems.icn
new file mode 100644
index 0000000..2a87f2c
--- /dev/null
+++ b/ipl/packs/ibpag2/slritems.icn
@@ -0,0 +1,244 @@
+############################################################################
+#
+# Name: slritems.icn
+#
+# Title: compute item sets for a grammar
+#
+# Author: Richard L. Goerwitz
+#
+# Version: 1.10
+#
+############################################################################
+#
+# Contains make_slr_item_sets(start_symbol, st), slr_goto(l, symbol,
+# st), slr_closure(l, st). The user need only worry about
+# make_slr_item_sets() initially. The slr_goto() routine may be
+# useful later when constructing action and goto tables.
+#
+# Slr_closure(l, st) accepts a list of items as its first argument, a
+# list or set of the productions in the grammar as its second, and
+# returns the closure of item list l, in the form of another item
+# list.
+#
+# Note also that the production record structure (LHS, RHS, POS,
+# LOOK...) has a POS field, and therefore can serve also as an item.
+# In fact, any structure can be used, as long as its first three
+# fields are LHS, RHS, and POS.
+#
+# See the "Dragon Book" (cited in first.icn) p. 222 ff.
+#
+# Slr_goto(l, symbol, st) accepts a list as its first argument, a
+# string or integer as its second (string = nonterminal, integer =
+# terminal), and a list or set for its third, returning another list.
+# Arg 1 must be an item list, as generated either by another call to
+# slr_goto() or by closure of the start production of the augmented
+# grammar. Arg 2, symbol, is some terminal or nonterminal symbol.
+# Arg 3 is the list or set of all productions in the current grammar.
+# The return value is the closure of the set of all items [A -> aX.b]
+# such that [A -> a.Xb] is in l (arg 1).
+#
+# make_slr_item_sets(start_sym, st) takes a string, start_sym, as its
+# first argument, and a list or set of productions as its second.
+# Returns a list of canonical LR(0) item sets or states. It returns,
+# in other words, a list of lists of items. Items can be any record
+# type that has LHS, RHS, and POS as its first three fields.
+#
+# See the "Dragon Book," example 4.35 (p. 224).
+#
+############################################################################
+#
+# Links: ibutil
+#
+############################################################################
+
+# link ibutil
+
+#
+# slr_closure: list x list/set -> list
+# (l2, st) -> l2
+#
+# Where l is a list of items, where st is a list/set of all
+# productions in the grammar from which l was derived, and where
+# l(2) is the SLR closure of l, as constructed using the standard
+# SLR closure operation.
+#
+# Ignore the third to fifth arguments, len to added. They are
+# used internally by recursive calls to slr_closure().
+#
+procedure slr_closure(l, st, len, LHS_tbl, added)
+
+ local p, i, new_p, symbol
+ static LHS_tbl_tbl
+ initial LHS_tbl_tbl := table()
+
+ if /LHS_tbl then {
+ if /LHS_tbl_tbl[st] := table() then {
+ # makes looking up all rules with a given LHS easier
+ every p := !st do {
+ /LHS_tbl_tbl[st][p.LHS] := list()
+ put(LHS_tbl_tbl[st][p.LHS], p)
+ }
+ }
+ LHS_tbl := LHS_tbl_tbl[st]
+ }
+
+ /len := 0
+ /added := set()
+
+ # Len tells us where the elements in l start that we haven't yet
+ # tried to generate more items from. These elements are basically
+ # the items added on the last recursive call (or the "core," if
+ # there has not yet been a recursive call).
+ #
+ every i := len+1 to *l do {
+ /l[i].POS := 1
+ # Fails if dot (i.e. l[i].POS) is at the end of the RHS;
+ # also fails if the current symbol (i.e. l[i].RHS[l[i].POS])
+ # is a nonterminal.
+ symbol := l[i].RHS[l[i].POS]
+ # No need to add productions having symbol as their LHS if
+ # we've already done so for this particular l.
+ member(added, symbol) & next
+ every p := !\LHS_tbl[symbol] do {
+ # Make a copy of p, but with dot set to position 1.
+ new_p := copy(p)
+ # Set POS to 1 for non-epsilon productions; otherwise to 2.
+ if *new_p.RHS = 1 & new_p.RHS[1] === -2 then
+ new_p.POS := 2
+ else new_p.POS := 1
+ # if new_p isn't in l, add it to the end of l
+ if not equivalent_items(new_p, !l) then
+ put(l, new_p)
+ }
+ insert(added, symbol)
+ }
+ return {
+ # If nothing new has been added, sort the result and return...
+ if *l = i then sortff(l, 1, 2, 3)
+ # ...otherwise, try to add more items to l.
+ else slr_closure(l, st, i, LHS_tbl, added)
+ }
+
+end
+
+
+#
+# slr_goto: list x string|integer x list|set -> list
+# (l, symbol, st) -> l2
+#
+# Where l is an item set previously returned by slr_goto or (for
+# the start symbol of the augmented grammar) by slr_closure(),
+# where symbol is a string (nonterminal) or integer (terminal),
+# where st is a list or set of all productions in the current
+# grammar, and where l2 is the SLR closure of the set of all items
+# [A -> aX.b] such that [A -> a.Xb] is in l.
+#
+# The idea is just to move the dots for all productions where the
+# dots precede "symbol," creating a new item list for the "moved"
+# items, and then performing a slr_closure() on that new item list.
+# Note that items can be represented by any structure where fields
+# 1, 2, and 3 are LHS, RHS, and POS.
+#
+# Note that slr_goto(l, symbol, st) may yield a result that's
+# structurally equivalent to one already in the sets of items thus
+# far generated. This won't normally happen, because slr_goto()
+# saves old results, never re-calcing for the same l x symbol
+# combination. Still, a duplicate result could theoretically
+# happen.
+#
+procedure slr_goto(l, symbol, st)
+
+ local item, item2, l2, iteml_symbol_table
+ static iteml_symbol_table_table
+ initial iteml_symbol_table_table := table()
+
+ # Keep old results for this grammar (st) in a table of tables of
+ # tables!
+ #
+ /iteml_symbol_table_table[st] := table()
+ iteml_symbol_table := iteml_symbol_table_table[st]
+
+ # See if we've already performed this same calculation.
+ #
+ if l2 := \(\iteml_symbol_table[l])[symbol]
+ then return l2
+
+ l2 := list()
+ every item := !l do {
+ # Subscripting operation fails if the dot's at end.
+ if item.RHS[item.POS] === symbol
+ then {
+ item2 := copy(item) # copy is nonrecursive
+ item2.POS +:= 1
+ put(l2, item2)
+ }
+ }
+ if *l2 = 0 then fail
+ else l2 := slr_closure(l2, st)
+ #
+ # Keep track of item lists and symbols we've already seen.
+ #
+ /iteml_symbol_table[l] := table()
+ /iteml_symbol_table[l][symbol] := l2
+
+ if *l2 > 0 then
+ return l2
+ else fail
+
+end
+
+
+#
+# make_slr_item_sets: string x list|set -> list
+# (start_sym, st) -> l
+#
+# Where start_sym is the start symbol for the grammar defined by
+# the productions contained in st, and where l is the list of item
+# lists generated by the standard LR(0) set-of-items construction
+# algorithm.
+#
+# Ignore the third and fourth arguments. They are used internally
+# by recursive calls.
+#
+procedure make_slr_item_sets(start_sym, st, C, len)
+
+ local i, next_items, item_list, new_list, item, symbol
+
+ #
+ # First extend the old start symbol and use the result as the new
+ # start symbol for the augmented grammar to which the set-of-items
+ # construction will be applied.
+ #
+ # &trace := -1
+ /C := [slr_closure(
+ [production("`_" || start_sym || "_'", [start_sym], 1)],st)]
+ /len := 0
+
+ # Iterate through C (the list of item-lists), doing gotos, and adding
+ # new states, until no more states can be added to C.
+ #
+ every item_list := C[i := len+1 to *C] do {
+ if \DEBUG then
+ print_item_list(C, i)
+ # collect all symbols after the dot for the the items in C[i]...
+ next_items := set()
+ every item := !item_list do
+ insert(next_items, item.RHS[item.POS])
+ # ...now, try to do a slr_goto() for every collected symbol.
+ every symbol := !next_items do {
+ new_list := slr_goto(item_list, symbol, st) | next
+ if not equivalent_item_lists(new_list, !C)
+ then put(C, new_list)
+ }
+ }
+ # If nothing has been inserted, return C and quit; otherwise, call
+ # recursively and try again.
+ #
+ return {
+ if i = *C then C
+ else make_slr_item_sets(&null, st, C, i)
+ }
+
+end
+
+
diff --git a/ipl/packs/ibpag2/slrtbls.icn b/ipl/packs/ibpag2/slrtbls.icn
new file mode 100644
index 0000000..8d00f12
--- /dev/null
+++ b/ipl/packs/ibpag2/slrtbls.icn
@@ -0,0 +1,370 @@
+############################################################################
+#
+# Name: slrtbls.icn
+#
+# Title: slr table generation routines
+#
+# Author: Richard L. Goerwitz
+#
+# Version: 1.20
+#
+############################################################################
+#
+# Contains make_slr_tables(grammar, atbl, gtbl, noconflict,
+# like_yacc), where grammar is an ib_grammar record (as returned by
+# ibreader), where atbl and gtbl are initialized (default &null) hash
+# tables, and where noconflict is a switch that, if nonnull, directs
+# the resolver to abort on unresolvable conflicts. Returns &null if
+# successful in filling out atbl and gtbl. If likeyacc is nonnull,
+# make_slr_tables will resolve reduce/reduce conflicts by order of
+# occurrence in the grammar, just like YACC. Shift/reduce conflicts
+# will be resolved in favor of shift.
+#
+# The reason for the noconflict switch is that there are parsers that
+# can accept tables with multiple action entries, i.e. parsers that
+# can use tables generated by ambiguous grammars.
+#
+# In this routine's case, success is identified with creating a
+# standard SLR action and goto table. Note that both tables end up
+# as tables of tables, with symbols being the primary or first key,
+# and state numbers being the second. This is the reverse of the
+# usual arrangement, but turns out to save a lot of space. Atbl
+# values are of the form "s2.3", "r4<A>10", "a", etc. The string
+# "s2.3" means "shift the current lookahead token, and enter state 2
+# via rule 3." By way of contrast, "r4<A>10" means "reduce by rule
+# number 4, which has A as its LHS symbol and 10 RHS symbols." A
+# single "a" means "accept."
+
+# Atbl entries may contain more than one action. The actions are
+# simply concatenated: "s2.3r4<A>10a". Conflicts may be resolved
+# later by associativity or precedence, if available. Unresolvable
+# conflicts only cause error termination if the 5th and final
+# argument is nonnull (see above on "noconflict").
+#
+# Gtbl entries are simpler than atble entries, consisting of a single
+# integer.
+#
+############################################################################
+#
+# Links: follow, slritems, iohno
+#
+############################################################################
+
+# declared in ibreader.icn
+# record ib_grammar(start, rules, tbl)
+
+#link follow, slritems, iohno#, ximage
+
+#
+# make_slr_tables
+#
+procedure make_slr_tables(grammar, atbl, gtbl, noconflict, like_yacc)
+
+ local start_symbol, st, C, i, augmented_start_symbol, item,
+ symbol, new_item_list, j, action
+
+ # Initialize start symbol and rule list/set (either is okay).
+ start_symbol := grammar.start
+ st := grammar.rules
+
+ # Number the rules, and then construct the canonical LR(0) item sets.
+ every i := 1 to *st do st[i].no := i
+ C := make_slr_item_sets(start_symbol, st)
+
+ # Now, go through each item in each item set in C filling out the
+ # action (atbl) and goto table (gtbl) as we go.
+ #
+ augmented_start_symbol := "`_" || start_symbol || "_'"
+ every i := 1 to *C do {
+ every item := !C[i] do {
+ # if the dot's *not* at the end of the production...
+ if symbol := item.RHS[item.POS] then {
+ # if were looking at a terminal, enter a shift action
+ if type(symbol) == "integer" then {
+ if symbol = -2 then next # Never shift epsilon!
+ new_item_list := slr_goto(C[i], symbol, st)
+ every j := 1 to *C do {
+ if equivalent_item_lists(new_item_list, C[j]) then {
+ action := "s" || j || "." || item.no
+ resolve(st, atbl, symbol, i, action,
+ noconflict, like_yacc)
+ break next
+ }
+ }
+ # if we're looking at a nonterminal, add action to gtbl
+ } else {
+ new_item_list := slr_goto(C[i], symbol, st)
+ every j := 1 to *C do {
+ if equivalent_item_lists(new_item_list, C[j]) then {
+ /gtbl[symbol] := table()
+ /gtbl[symbol][i] := j |
+ gtbl[symbol][i] =:= j |
+ iohno(80, image(symbol), ".", image(i), ":", j)
+ break next
+ }
+ }
+ }
+ # ...else if the dot *is* at the end of the production
+ } else {
+ if item.LHS == augmented_start_symbol then {
+ action := "a"
+ # 0 = EOF
+ resolve(st, atbl, 0, i, action, noconflict, like_yacc)
+ } else {
+ # add a reduce for every symbol in FOLLOW(item.LHS)
+ every symbol := !FOLLOW(start_symbol, st, item.LHS) do {
+ # RHS size is 0 for epsilon.
+ if item.RHS[1] === -2 then {
+ action := "r" || item.no || "<" || item.LHS ||
+ ">0"
+ } else
+ action := "r" || item.no || "<" || item.LHS ||
+ ">" || *item.RHS
+ resolve(st, atbl, symbol, i, action,
+ noconflict, like_yacc)
+ }
+ }
+ }
+ }
+ }
+
+ return
+
+end
+
+
+#
+# resolve: list|set x table x string|integer, integer, anything, anything
+# -> string
+# (st, tbl, symbol, state, action, noconflict, like_yacc)
+# -> new_action_list
+#
+# Add action to action table, resolving conflicts by precedence
+# and associativity, if need be. If noconflict is nonnull, abort
+# on unresolvable conflicts. Fails on shift/shift "conflicts," or
+# if an identical action is already present in the table entry to
+# be modified. If like_yacc is nonnull, resolve reduce/reduce
+# conflicts by their order of occurrence in the grammar; resolve
+# shift/reduce conflicts in favor of shift.
+#
+procedure resolve(st, tbl, symbol, state, action, noconflict, like_yacc)
+
+ local actions, chr, a, ruleno, p, newp
+
+ /tbl[symbol] := table()
+ /tbl[symbol][state] := ""
+
+ # If this action is already present, then don't re-enter it. Just
+ # fail.
+ #
+ tbl[symbol][state] ? {
+ while a := tab(any('sra')) do {
+ a ||:= tab(upto('.<'))
+ a ||:= { (="<" || tab(find(">")+1)) | ="." }
+ a ||:= tab(many(&digits))
+ if a == action then fail
+ }
+ }
+
+ # Get rule number for the new action specified as arg 5, and
+ # fetch its source production.
+ action ? {
+ case move(1) of {
+ "s": ruleno := (tab(find(".")+1), tab(many(&digits)))
+ "r": ruleno := 1(tab(find("<")), move(1))
+ "a": return tbl[symbol][state] := action || tbl[symbol][state]
+ } | iohno(70, tbl[symbol][state])
+ (newp := !st).no = ruleno |
+ iohno(72, tbl[symbol][state])
+ }
+
+ # Resolve any conflicts that might be present.
+ #
+ actions := ""
+ tbl[symbol][state] ? {
+ while a := tab(any('sra')) do {
+ # Snip out the old action, and put it into a.
+ a ||:= tab(upto('.<'))
+ a ||:= { (="<" || tab(find(">")+1)) | ="." }
+ a ||:= tab(many(&digits))
+ #
+ # Get the old action's rule number, and use it to fetch
+ # the full production that it is keyed to.
+ #
+ a ? {
+ case move(1) of {
+ "s": ruleno := (tab(find(".")+1), tab(many(&digits)))
+ "r": ruleno := 1(tab(find("<")), move(1))
+ "a": return tbl[symbol][state] := a || actions || action
+ } | iohno(70, tbl[symbol][state])
+ # Go through rule list; find the one whose number is ruleno.
+ (p := !st).no = ruleno |
+ iohno(71, tbl[symbol][state])
+ }
+
+ # Check precedences to see if we can resolve the conflict
+ # this way.
+ #
+ if \newp.prec > \p.prec then
+ # discard the old action, a
+ return tbl[symbol][state] := actions || action || tab(0)
+ else if \newp.prec < \p.prec then
+ # discard the new action, action
+ return tbl[symbol][state] := actions || a || tab(0)
+ else {
+ #
+ # If, however, both precedences are the same (i.e.
+ # newp.prec === p.prec), then we must check the
+ # associativities. Right implies shift; left, reduce.
+ # If there is no associativity, then we have a
+ # conflict. Nonassociative ("n") implies error.
+ #
+ case action[1] of {
+ default: iohno(70, tbl[symbol][state])
+ # case "a" is handled above; look for "s" & "r"
+ "s" : {
+ if a[1] == "s" then fail # no shift/shift "conflict"
+ else if a[1] == "r" then {
+ newp.assoc === p.assoc | {
+ iohno(40, "state " || state || "; token " ||
+ symbol || "; rules " || newp.no ||
+ "," || p.no)
+ }
+ case newp.assoc of {
+ "n" : iohno(41, production_2_string(newp))
+ &null: { # no associativity given
+ if \noconflict & /like_yacc then
+ iohno(46, "state " || state ||
+ "; token " || symbol ||
+ "; rules " || newp.no ||
+ "," || p.no)
+ else {
+ write(&errout, "warning: shift/reduce",
+ " conflict in state " || state ||
+ "; token " || symbol ||
+ "; rules " || newp.no ||
+ "," || p.no)
+ if \like_yacc then {
+ write(&errout, "resolving in _
+ favor of shift.")
+ return tbl[symbol][state] :=
+ actions || action || tab(0)
+ } else {
+ write(&errout, "creating multi-_
+ action table entry")
+ return tbl[symbol][state] :=
+ actions || action || a || tab(0)
+ }
+ }
+ }
+ "l" : { # left associative
+ # discard new action, action
+ return tbl[symbol][state] :=
+ actions || a || tab(0)
+ }
+ "r" : { # right associative
+ # remove old action, a
+ return tbl[symbol][state] :=
+ actions || action || tab(0)
+ }
+ }
+ }
+ }
+ "r" : {
+ if a[1] == "r" then {
+ #
+ # If conflicts in general, and reduce-reduce
+ # conflicts in specific are not okay...
+ #
+ if \noconflict & /like_yacc then {
+ # ...abort, otherwise...
+ iohno(42, "state " || state || "; token " ||
+ symbol || "; " || "; rules " ||
+ newp.no || "," || p.no)
+ } else {
+ #
+ # ...flag reduce-reduce conficts, and
+ # then resolve them by their order of
+ # occurrence in the grammar.
+ #
+ write(&errout, "warning: reduce/reduce",
+ " conflict in state ", state,
+ "; token ", symbol, "; rules ",
+ newp.no, ",", p.no)
+ if \like_yacc then {
+ write(&errout, "resolving by order of _
+ occurrence in the grammar")
+ if newp.no > p.no
+ # discard later production (newp)
+ then return return tbl[symbol][state] :=
+ actions || a || tab(0)
+ # discard later production (old p)
+ else return tbl[symbol][state] :=
+ actions || action || tab(0)
+ } else {
+ #
+ # If conflicts ok, but we aren't supposed
+ # to resolve reduce-reduce conflicts by
+ # order of rule occurrence:
+ #
+ write(&errout, "creating multi-action _
+ table entry")
+ return tbl[symbol][state] :=
+ actions || action || a || tab(0)
+ }
+ }
+ } else {
+ # associativities must be the same for both rules:
+ newp.assoc === p.assoc | {
+ iohno(40, "state " || state || "; token " ||
+ symbol || "; rules " || newp.no ||
+ "," || p.no)
+ }
+ case newp.assoc of {
+ "n" : iohno(41, production_2_string(newp))
+ &null: {
+ if \noconflict & /like_yacc then
+ iohno(46, "state " || state ||
+ "; token " || symbol ||
+ "; rules " || newp.no ||
+ "," || p.no)
+ else {
+ write(&errout, "warning: shift/reduce",
+ " conflict in state " || state ||
+ "; token " || symbol ||
+ "; rules " || newp.no ||
+ "," || p.no)
+ if \like_yacc then {
+ write(&errout, "resolving in _
+ favor of shift.")
+ return tbl[symbol][state] :=
+ actions || a || tab(0)
+ } else {
+ write(&errout, "creating multi-_
+ action table entry")
+ return tbl[symbol][state] :=
+ actions || action || a || tab(0)
+ }
+ }
+ }
+ "r" : {
+ # discard new action, action
+ return tbl[symbol][state] :=
+ actions || a || tab(0)
+ }
+ "l" : {
+ # remove old action, a
+ return tbl[symbol][state] :=
+ actions || action || tab(0)
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+
+ return tbl[symbol][state] ||:= action
+
+end
diff --git a/ipl/packs/ibpag2/slshupto.icn b/ipl/packs/ibpag2/slshupto.icn
new file mode 100644
index 0000000..07cbece
--- /dev/null
+++ b/ipl/packs/ibpag2/slshupto.icn
@@ -0,0 +1,79 @@
+############################################################################
+#
+# Name: slshupto.icn
+#
+# Title: slshupto (upto with backslash escaping)
+#
+# Author: Richard L. Goerwitz
+#
+# Version: 1.4
+#
+############################################################################
+#
+# Slshupto works just like upto, except that it ignores backslash
+# escaped characters. I can't even begin to express how often I've
+# run into problems applying Icon's string scanning facilities to
+# to input that uses backslash escaping. Normally, I tokenize first,
+# and then work with lists. With slshupto() I can now postpone or
+# even eliminate the traditional tokenizing step, and let Icon's
+# string scanning facilities to more of the work.
+#
+# If you're confused:
+#
+# Typically UNIX utilities (and probably others) use backslashes to
+# "escape" (i.e. remove the special meaning of) metacharacters. For
+# instance, UNIX shells normally accept "*" as a shorthand for "any
+# series of zero or more characters. You can make the "*" a literal
+# "*," with no special meaning, by prepending a backslash. The rou-
+# tine slshupto() understands these backslashing conventions. You
+# can use it to find the "*" and other special characters because it
+# will ignore "escaped" characters.
+#
+############################################################################
+#
+# Links: none
+#
+# See also: slashbal.icn
+#
+############################################################################
+
+# for compatibility with the original name
+#
+procedure slashupto(c, s, i, j)
+ suspend slshupto(c, s, i, j)
+end
+
+#
+# slshupto: cset x string x integer x integer -> integers
+# (c, s, i, j) -> Is (a generator)
+# where Is are the integer positions in s[i:j] before characters
+# in c that is not preceded by a backslash escape
+#
+procedure slshupto(c, s, i, j)
+
+ local c2
+
+ if /s := &subject
+ then /i := &pos
+ else /i := 1
+ /j := *s + 1
+
+ /c := &cset
+ c2 := '\\' ++ c
+ s[1:j] ? {
+ tab(i)
+ while tab(upto(c2)) do {
+ if ="\\" then {
+ move(1) | {
+ if find("\\", c)
+ then return &pos - 1
+ }
+ next
+ }
+ suspend .&pos
+ move(1)
+ }
+ }
+
+end
+
diff --git a/ipl/packs/ibpag2/sortff.icn b/ipl/packs/ibpag2/sortff.icn
new file mode 100644
index 0000000..c198c55
--- /dev/null
+++ b/ipl/packs/ibpag2/sortff.icn
@@ -0,0 +1,82 @@
+############################################################################
+#
+# Name: sortff.icn
+#
+# Title: sortf with multiple field arguments
+#
+# Author: Bob Alexander and Richard L. Goerwitz
+#
+# Date: July 14, 1993
+#
+############################################################################
+#
+# Sortff is like sortf(), except takes an unlimited number of field
+# arguments. E.g. if you want to sort a list of structures on field
+# 5, and (for those objects that have the same field 5) do a sub-sort
+# on field 2, you would use "sortff(list_of_objects, 5, 2)."
+#
+############################################################################
+
+#
+# sortff: structure [x integer [x integer...]] -> structure
+# (L, [fields ...]) -> new_L
+#
+# Where L is any subscriptable structure, and fields are any
+# number of integer subscripts in any desired order. Returns
+# a copy of structure L with its elements sorted on field 1,
+# and, for those elements having an identical field 1, sub-
+# sorted on field 2, etc.
+#
+procedure sortff(L, fields[])
+ *L <= 1 & { return copy(L) }
+ return sortff_1(L, fields, 1, [])
+end
+
+procedure sortff_1(L, fields, k, uniqueObject)
+
+ local sortField, cachedKeyValue, i, startOfRun, thisKey
+
+ sortField := fields[k]
+ L := sortf(L, sortField) # initial sort using fields[k]
+ #
+ # If more than one sort field is given, use each field successively
+ # as the current key, and, where members in L have the same value for
+ # this key, do a subsort using fields[k+1].
+ #
+ if fields[k +:= 1] then {
+ #
+ # Set the equal-key-run pointer to the start of the list and
+ # save the value of the first key in the run.
+ #
+ startOfRun := 1
+ cachedKeyValue := L[startOfRun][sortField] | uniqueObject
+ every i := 2 to *L do {
+ thisKey := L[i][sortField] | uniqueObject
+ if not (thisKey === cachedKeyValue) then {
+ #
+ # We have an element with a sort key different from the
+ # previous. If there's a run of more than one equal keys,
+ # sort the sublist.
+ #
+ if i - startOfRun > 1 then {
+ L := L[1:startOfRun] |||
+ sortff_1(L[startOfRun:i], fields, k, uniqueObject) |||
+ L[i:0]
+ }
+ # Reset the equal-key-run pointer to this key and cache.
+ startOfRun := i
+ cachedKeyValue := L[startOfRun][sortField] | uniqueObject
+ }
+ }
+ #
+ # Sort a final run if it exists.
+ #
+ if i - startOfRun > 1 then {
+ L := L[1:startOfRun] |||
+ sortff_1(L[startOfRun:0], fields, k, uniqueObject)
+ }
+ }
+
+ return L
+
+end
diff --git a/ipl/packs/ibpag2/version.icn b/ipl/packs/ibpag2/version.icn
new file mode 100644
index 0000000..597a4f4
--- /dev/null
+++ b/ipl/packs/ibpag2/version.icn
@@ -0,0 +1,19 @@
+############################################################################
+#
+# Name: version.icn
+#
+# Title: return Ibpag2 version number
+#
+# Author: Richard L. Goerwitz
+#
+# Version: 1.13
+#
+############################################################################
+#
+# See also: ibpag2.icn
+#
+############################################################################
+
+procedure ib_version()
+ return "Ibpag2, version 1.3.7"
+end