diff options
author | Igor Pashev <pashev.igor@gmail.com> | 2013-01-27 23:51:56 +0000 |
---|---|---|
committer | Igor Pashev <pashev.igor@gmail.com> | 2013-01-27 23:51:56 +0000 |
commit | 6ab0c0f5bf14ed9c15370407b9ee7e0b4b089ae1 (patch) | |
tree | 926065cf45450116098db664e3c61dced9e1f21a /ipl/progs/rsg.icn | |
download | icon-6ab0c0f5bf14ed9c15370407b9ee7e0b4b089ae1.tar.gz |
Initial upstream version 9.4.3upstream/9.4.3
Diffstat (limited to 'ipl/progs/rsg.icn')
-rw-r--r-- | ipl/progs/rsg.icn | 391 |
1 files changed, 391 insertions, 0 deletions
diff --git a/ipl/progs/rsg.icn b/ipl/progs/rsg.icn new file mode 100644 index 0000000..747e78b --- /dev/null +++ b/ipl/progs/rsg.icn @@ -0,0 +1,391 @@ +############################################################################ +# +# File: rsg.icn +# +# Subject: Program to generate randomly selected sentences +# +# Author: Ralph E. Griswold +# +# Date: March 26, 2002 +# +############################################################################ +# +# This file is in the public domain. +# +############################################################################ +# +# This program generates randomly selected strings (``sen- +# tences'') from a grammar specified by the user. Grammars are +# basically context-free and resemble BNF in form, although there +# are a number of extensions. +# +############################################################################ +# +# The program works interactively, allowing the user to build, +# test, modify, and save grammars. Input to rsg consists of various +# kinds of specifications, which can be intermixed: +# +# Productions define nonterminal symbols in a syntax similar to +# the rewriting rules of BNF with various alternatives consisting +# of the concatenation of nonterminal and terminal symbols. Gen- +# eration specifications cause the generation of a specified number +# of sentences from the language defined by a given nonterminal +# symbol. Grammar output specifications cause the definition of a +# specified nonterminal or the entire current grammar to be written +# to a given file. Source specifications cause subsequent input to +# be read from a specified file. +# +# In addition, any line beginning with # is considered to be a +# comment, while any line beginning with = causes the rest of that +# line to be used subsequently as a prompt to the user whenever rsg +# is ready for input (there normally is no prompt). A line consist- +# ing of a single = stops prompting. +# +# Productions: Examples of productions are: +# +# <expr>::=<term>|<term>+<expr> +# <term>::=<elem>|<elem>*<term> +# <elem>::=x|y|z|(<expr>) +# +# Productions may occur in any order. The definition for a nonter- +# minal symbol can be changed by specifying a new production for +# it. +# +# There are a number of special devices to facilitate the defin- +# ition of grammars, including eight predefined, built-in nontermi- +# nal symbols: +# symbol definition +# <lb> < +# <rb> > +# <vb> | +# <nl> newline +# <> empty string +# <&lcase> any single lowercase letter +# <&ucase> any single uppercase letter +# <&digit> any single digit +# +# In addition, if the string between a < and a > begins and ends +# with a single quotation mark, it stands for any single character +# between the quotation marks. For example, +# +# <'xyz'> +# +# is equivalent to +# +# x|y|z +# +# Generation Specifications: A generation specification consists of +# a nonterminal symbol followed by a nonnegative integer. An exam- +# ple is +# +# <expr>10 +# +# which specifies the generation of 10 <expr>s. If the integer is +# omitted, it is assumed to be 1. Generated sentences are written +# to standard output. +# +# Grammar Output Specifications: A grammar output specification +# consists of a nonterminal symbol, followed by ->, followed by a +# file name. Such a specification causes the current definition of +# the nonterminal symbol to be written to the given file. If the +# file is omitted, standard output is assumed. If the nonterminal +# symbol is omitted, the entire grammar is written out. Thus, +# +# -> +# +# causes the entire grammar to be written to standard output. +# +# Source Specifications: A source specification consists of @ fol- +# lowed by a file name. Subsequent input is read from that file. +# When an end of file is encountered, input reverts to the previous +# file. Input files can be nested. +# +# Options: The following options are available: +# +# -s n Set the seed for random generation to n. +# +# -r In the absence of -s, set the seed to 0 for repeatable +# results. Otherwise the seed is set to a different value +# for each run (as far as this is possible). -r is equivalent +# to -s 0. +# +# -l n Terminate generation if the number of symbols remaining +# to be processed exceeds n. The default is limit is 1000. +# +# -t Trace the generation of sentences. Trace output goes to +# standard error output. +# +# Diagnostics: Syntactically erroneous input lines are noted but +# are otherwise ignored. Specifications for a file that cannot be +# opened are noted and treated as erroneous. +# +# If an undefined nonterminal symbol is encountered during gen- +# eration, an error message that identifies the undefined symbol is +# produced, followed by the partial sentence generated to that +# point. Exceeding the limit of symbols remaining to be generated +# as specified by the -l option is handled similarly. +# +# Caveats: Generation may fail to terminate because of a loop in +# the rewriting rules or, more seriously, because of the progres- +# sive accumulation of nonterminal symbols. The latter problem can +# be identified by using the -t option and controlled by using the +# -l option. The problem often can be circumvented by duplicating +# alternatives that lead to fewer rather than more nonterminal sym- +# bols. For example, changing +# +# <term>::=<elem>|<elem>*<term> +# +# to +# +# <term>::=<elem>|<elem>|<elem>*<term> +# +# increases the probability of selecting <elem> from 1/2 to 2/3. +# +# There are many possible extensions to the program. One of the +# most useful would be a way to specify the probability of select- +# ing an alternative. +# +############################################################################ +# +# Links: options, random +# +############################################################################ + +link options +link random + +global defs, ifile, in, limit, prompt, tswitch + +record nonterm(name) +record charset(chars) + +procedure main(args) + local line, plist, s, opts + # procedures to try on input lines + plist := [define,generate,grammar,source,comment,prompter,error] + defs := table() # table of definitions + defs["lb"] := [["<"]] # built-in definitions + defs["rb"] := [[">"]] + defs["vb"] := [["|"]] + defs["nl"] := [["\n"]] + defs[""] := [[""]] + defs["&lcase"] := [[charset(&lcase)]] + defs["&ucase"] := [[charset(&ucase)]] + defs["&digit"] := [[charset(&digits)]] + + opts := options(args,"tl+s+r") + limit := \opts["l"] | 1000 + tswitch := \opts["t"] + &random := \opts["s"] + if /opts["s"] & /opts["r"] then randomize() + + ifile := [&input] # stack of input files + prompt := "" + while in := pop(ifile) do { # process all files + repeat { + if *prompt ~= 0 then writes(prompt) + line := read(in) | break + while line[-1] == "\\" do line := line[1:-1] || read(in) | break + (!plist)(line) + } + close(in) + } +end + +# process alternatives +# +procedure alts(defn) + local alist + alist := [] + defn ? while put(alist,syms(tab(upto('|') | 0))) do move(1) | break + return alist +end + +# look for comment +# +procedure comment(line) + if line[1] == "#" then return +end + +# look for definition +# +procedure define(line) + return line ? + defs[(="<",tab(find(">::=")))] := (move(4),alts(tab(0))) +end + +# define nonterminal +# +procedure defnon(sym) + local chars, name + if sym ? { + ="'" & + chars := cset(tab(-1)) & + ="'" + } + then return charset(chars) + else return nonterm(sym) +end + +# note erroneous input line +# +procedure error(line) + write("*** erroneous line: ",line) + return +end + +# generate sentences +# +procedure gener(goal) + local pending, symbol + pending := [nonterm(goal)] + while symbol := get(pending) do { + if \tswitch then + write(&errout,symimage(symbol),listimage(pending)) + case type(symbol) of { + "string": writes(symbol) + "charset": writes(?symbol.chars) + "nonterm": { + pending := ?\defs[symbol.name] ||| pending | { + write(&errout,"*** undefined nonterminal: <",symbol.name,">") + break + } + if *pending > \limit then { + write(&errout,"*** excessive symbols remaining") + break + } + } + } + } + write() +end + +# look for generation specification +# +procedure generate(line) + local goal, count + if line ? { + ="<" & + goal := tab(upto('>')) \ 1 & + move(1) & + count := (pos(0) & 1) | integer(tab(0)) + } + then { + every 1 to count do + gener(goal) + return + } + else fail +end + +# get right hand side of production +# +procedure getrhs(a) + local rhs + rhs := "" + every rhs ||:= listimage(!a) || "|" + return rhs[1:-1] +end + +# look for request to write out grammar +# +procedure grammar(line) + local file, out, name + if line ? { + name := tab(find("->")) & + move(2) & + file := tab(0) & + out := if *file = 0 then &output else { + open(file,"w") | { + write(&errout,"*** cannot open ",file) + fail + } + } + } + then { + (*name = 0) | (name[1] == "<" & name[-1] == ">") | fail + pwrite(name,out) + if *file ~= 0 then close(out) + return + } + else fail +end + +# produce image of list of grammar symbols +# +procedure listimage(a) + local s, x + s := "" + every x := !a do + s ||:= symimage(x) + return s +end + +# look for new prompt symbol +# +procedure prompter(line) + if line[1] == "=" then { + prompt := line[2:0] + return + } +end + +# write out grammar +# +procedure pwrite(name,ofile) + local nt, a + static builtin + initial builtin := ["lb","rb","vb","nl","","&lcase","&ucase","&digit"] + if *name = 0 then { + a := sort(defs,3) + while nt := get(a) do { + if nt == !builtin then { + get(a) + next + } + write(ofile,"<",nt,">::=",getrhs(get(a))) + } + } + else write(ofile,name,"::=",getrhs(\defs[name[2:-1]])) | + write("*** undefined nonterminal: ",name) +end + +# look for file with input +# +procedure source(line) + local file, new + + return line ? { + if ="@" then { + new := open(file := tab(0)) | { + write(&errout,"*** cannot open ",file) + fail + } + push(ifile,in) & + in := new + return + } + } +end + +# produce string image of grammar symbol +# +procedure symimage(x) + return case type(x) of { + "string": x + "nonterm": "<" || x.name || ">" + "charset": "<'" || x.chars || "'>" + } +end + +# process the symbols in an alternative +# +procedure syms(alt) + local slist + static nonbrack + initial nonbrack := ~'<' + slist := [] + alt ? while put(slist,tab(many(nonbrack)) | + defnon(2(="<",tab(upto('>')),move(1)))) + return slist +end |