summaryrefslogtreecommitdiff
path: root/ipl/progs/rsg.icn
diff options
context:
space:
mode:
Diffstat (limited to 'ipl/progs/rsg.icn')
-rw-r--r--ipl/progs/rsg.icn391
1 files changed, 391 insertions, 0 deletions
diff --git a/ipl/progs/rsg.icn b/ipl/progs/rsg.icn
new file mode 100644
index 0000000..747e78b
--- /dev/null
+++ b/ipl/progs/rsg.icn
@@ -0,0 +1,391 @@
+############################################################################
+#
+# File: rsg.icn
+#
+# Subject: Program to generate randomly selected sentences
+#
+# Author: Ralph E. Griswold
+#
+# Date: March 26, 2002
+#
+############################################################################
+#
+# This file is in the public domain.
+#
+############################################################################
+#
+# This program generates randomly selected strings (``sen-
+# tences'') from a grammar specified by the user. Grammars are
+# basically context-free and resemble BNF in form, although there
+# are a number of extensions.
+#
+############################################################################
+#
+# The program works interactively, allowing the user to build,
+# test, modify, and save grammars. Input to rsg consists of various
+# kinds of specifications, which can be intermixed:
+#
+# Productions define nonterminal symbols in a syntax similar to
+# the rewriting rules of BNF with various alternatives consisting
+# of the concatenation of nonterminal and terminal symbols. Gen-
+# eration specifications cause the generation of a specified number
+# of sentences from the language defined by a given nonterminal
+# symbol. Grammar output specifications cause the definition of a
+# specified nonterminal or the entire current grammar to be written
+# to a given file. Source specifications cause subsequent input to
+# be read from a specified file.
+#
+# In addition, any line beginning with # is considered to be a
+# comment, while any line beginning with = causes the rest of that
+# line to be used subsequently as a prompt to the user whenever rsg
+# is ready for input (there normally is no prompt). A line consist-
+# ing of a single = stops prompting.
+#
+# Productions: Examples of productions are:
+#
+# <expr>::=<term>|<term>+<expr>
+# <term>::=<elem>|<elem>*<term>
+# <elem>::=x|y|z|(<expr>)
+#
+# Productions may occur in any order. The definition for a nonter-
+# minal symbol can be changed by specifying a new production for
+# it.
+#
+# There are a number of special devices to facilitate the defin-
+# ition of grammars, including eight predefined, built-in nontermi-
+# nal symbols:
+# symbol definition
+# <lb> <
+# <rb> >
+# <vb> |
+# <nl> newline
+# <> empty string
+# <&lcase> any single lowercase letter
+# <&ucase> any single uppercase letter
+# <&digit> any single digit
+#
+# In addition, if the string between a < and a > begins and ends
+# with a single quotation mark, it stands for any single character
+# between the quotation marks. For example,
+#
+# <'xyz'>
+#
+# is equivalent to
+#
+# x|y|z
+#
+# Generation Specifications: A generation specification consists of
+# a nonterminal symbol followed by a nonnegative integer. An exam-
+# ple is
+#
+# <expr>10
+#
+# which specifies the generation of 10 <expr>s. If the integer is
+# omitted, it is assumed to be 1. Generated sentences are written
+# to standard output.
+#
+# Grammar Output Specifications: A grammar output specification
+# consists of a nonterminal symbol, followed by ->, followed by a
+# file name. Such a specification causes the current definition of
+# the nonterminal symbol to be written to the given file. If the
+# file is omitted, standard output is assumed. If the nonterminal
+# symbol is omitted, the entire grammar is written out. Thus,
+#
+# ->
+#
+# causes the entire grammar to be written to standard output.
+#
+# Source Specifications: A source specification consists of @ fol-
+# lowed by a file name. Subsequent input is read from that file.
+# When an end of file is encountered, input reverts to the previous
+# file. Input files can be nested.
+#
+# Options: The following options are available:
+#
+# -s n Set the seed for random generation to n.
+#
+# -r In the absence of -s, set the seed to 0 for repeatable
+# results. Otherwise the seed is set to a different value
+# for each run (as far as this is possible). -r is equivalent
+# to -s 0.
+#
+# -l n Terminate generation if the number of symbols remaining
+# to be processed exceeds n. The default is limit is 1000.
+#
+# -t Trace the generation of sentences. Trace output goes to
+# standard error output.
+#
+# Diagnostics: Syntactically erroneous input lines are noted but
+# are otherwise ignored. Specifications for a file that cannot be
+# opened are noted and treated as erroneous.
+#
+# If an undefined nonterminal symbol is encountered during gen-
+# eration, an error message that identifies the undefined symbol is
+# produced, followed by the partial sentence generated to that
+# point. Exceeding the limit of symbols remaining to be generated
+# as specified by the -l option is handled similarly.
+#
+# Caveats: Generation may fail to terminate because of a loop in
+# the rewriting rules or, more seriously, because of the progres-
+# sive accumulation of nonterminal symbols. The latter problem can
+# be identified by using the -t option and controlled by using the
+# -l option. The problem often can be circumvented by duplicating
+# alternatives that lead to fewer rather than more nonterminal sym-
+# bols. For example, changing
+#
+# <term>::=<elem>|<elem>*<term>
+#
+# to
+#
+# <term>::=<elem>|<elem>|<elem>*<term>
+#
+# increases the probability of selecting <elem> from 1/2 to 2/3.
+#
+# There are many possible extensions to the program. One of the
+# most useful would be a way to specify the probability of select-
+# ing an alternative.
+#
+############################################################################
+#
+# Links: options, random
+#
+############################################################################
+
+link options
+link random
+
+global defs, ifile, in, limit, prompt, tswitch
+
+record nonterm(name)
+record charset(chars)
+
+procedure main(args)
+ local line, plist, s, opts
+ # procedures to try on input lines
+ plist := [define,generate,grammar,source,comment,prompter,error]
+ defs := table() # table of definitions
+ defs["lb"] := [["<"]] # built-in definitions
+ defs["rb"] := [[">"]]
+ defs["vb"] := [["|"]]
+ defs["nl"] := [["\n"]]
+ defs[""] := [[""]]
+ defs["&lcase"] := [[charset(&lcase)]]
+ defs["&ucase"] := [[charset(&ucase)]]
+ defs["&digit"] := [[charset(&digits)]]
+
+ opts := options(args,"tl+s+r")
+ limit := \opts["l"] | 1000
+ tswitch := \opts["t"]
+ &random := \opts["s"]
+ if /opts["s"] & /opts["r"] then randomize()
+
+ ifile := [&input] # stack of input files
+ prompt := ""
+ while in := pop(ifile) do { # process all files
+ repeat {
+ if *prompt ~= 0 then writes(prompt)
+ line := read(in) | break
+ while line[-1] == "\\" do line := line[1:-1] || read(in) | break
+ (!plist)(line)
+ }
+ close(in)
+ }
+end
+
+# process alternatives
+#
+procedure alts(defn)
+ local alist
+ alist := []
+ defn ? while put(alist,syms(tab(upto('|') | 0))) do move(1) | break
+ return alist
+end
+
+# look for comment
+#
+procedure comment(line)
+ if line[1] == "#" then return
+end
+
+# look for definition
+#
+procedure define(line)
+ return line ?
+ defs[(="<",tab(find(">::=")))] := (move(4),alts(tab(0)))
+end
+
+# define nonterminal
+#
+procedure defnon(sym)
+ local chars, name
+ if sym ? {
+ ="'" &
+ chars := cset(tab(-1)) &
+ ="'"
+ }
+ then return charset(chars)
+ else return nonterm(sym)
+end
+
+# note erroneous input line
+#
+procedure error(line)
+ write("*** erroneous line: ",line)
+ return
+end
+
+# generate sentences
+#
+procedure gener(goal)
+ local pending, symbol
+ pending := [nonterm(goal)]
+ while symbol := get(pending) do {
+ if \tswitch then
+ write(&errout,symimage(symbol),listimage(pending))
+ case type(symbol) of {
+ "string": writes(symbol)
+ "charset": writes(?symbol.chars)
+ "nonterm": {
+ pending := ?\defs[symbol.name] ||| pending | {
+ write(&errout,"*** undefined nonterminal: <",symbol.name,">")
+ break
+ }
+ if *pending > \limit then {
+ write(&errout,"*** excessive symbols remaining")
+ break
+ }
+ }
+ }
+ }
+ write()
+end
+
+# look for generation specification
+#
+procedure generate(line)
+ local goal, count
+ if line ? {
+ ="<" &
+ goal := tab(upto('>')) \ 1 &
+ move(1) &
+ count := (pos(0) & 1) | integer(tab(0))
+ }
+ then {
+ every 1 to count do
+ gener(goal)
+ return
+ }
+ else fail
+end
+
+# get right hand side of production
+#
+procedure getrhs(a)
+ local rhs
+ rhs := ""
+ every rhs ||:= listimage(!a) || "|"
+ return rhs[1:-1]
+end
+
+# look for request to write out grammar
+#
+procedure grammar(line)
+ local file, out, name
+ if line ? {
+ name := tab(find("->")) &
+ move(2) &
+ file := tab(0) &
+ out := if *file = 0 then &output else {
+ open(file,"w") | {
+ write(&errout,"*** cannot open ",file)
+ fail
+ }
+ }
+ }
+ then {
+ (*name = 0) | (name[1] == "<" & name[-1] == ">") | fail
+ pwrite(name,out)
+ if *file ~= 0 then close(out)
+ return
+ }
+ else fail
+end
+
+# produce image of list of grammar symbols
+#
+procedure listimage(a)
+ local s, x
+ s := ""
+ every x := !a do
+ s ||:= symimage(x)
+ return s
+end
+
+# look for new prompt symbol
+#
+procedure prompter(line)
+ if line[1] == "=" then {
+ prompt := line[2:0]
+ return
+ }
+end
+
+# write out grammar
+#
+procedure pwrite(name,ofile)
+ local nt, a
+ static builtin
+ initial builtin := ["lb","rb","vb","nl","","&lcase","&ucase","&digit"]
+ if *name = 0 then {
+ a := sort(defs,3)
+ while nt := get(a) do {
+ if nt == !builtin then {
+ get(a)
+ next
+ }
+ write(ofile,"<",nt,">::=",getrhs(get(a)))
+ }
+ }
+ else write(ofile,name,"::=",getrhs(\defs[name[2:-1]])) |
+ write("*** undefined nonterminal: ",name)
+end
+
+# look for file with input
+#
+procedure source(line)
+ local file, new
+
+ return line ? {
+ if ="@" then {
+ new := open(file := tab(0)) | {
+ write(&errout,"*** cannot open ",file)
+ fail
+ }
+ push(ifile,in) &
+ in := new
+ return
+ }
+ }
+end
+
+# produce string image of grammar symbol
+#
+procedure symimage(x)
+ return case type(x) of {
+ "string": x
+ "nonterm": "<" || x.name || ">"
+ "charset": "<'" || x.chars || "'>"
+ }
+end
+
+# process the symbols in an alternative
+#
+procedure syms(alt)
+ local slist
+ static nonbrack
+ initial nonbrack := ~'<'
+ slist := []
+ alt ? while put(slist,tab(many(nonbrack)) |
+ defnon(2(="<",tab(upto('>')),move(1))))
+ return slist
+end