summaryrefslogtreecommitdiff
path: root/ipl/progs/rsg.icn
blob: 747e78b59692536523c7bb445b79a8d014258269 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
############################################################################
#
#	File:     rsg.icn
#
#	Subject:  Program to generate randomly selected sentences
#
#	Author:   Ralph E. Griswold
#
#	Date:     March 26, 2002
#
############################################################################
#
#   This file is in the public domain.
#
############################################################################
#  
#     This program generates randomly selected strings (``sen-
#  tences'') from a grammar specified by the user.  Grammars are
#  basically context-free and resemble BNF in form, although there
#  are a number of extensions.
#
############################################################################
#  
#     The program works interactively, allowing the user to build,
#  test, modify, and save grammars. Input to rsg consists of various
#  kinds of specifications, which can be intermixed:
#  
#     Productions define nonterminal symbols in a syntax similar to
#  the rewriting rules of BNF with various alternatives consisting
#  of the concatenation of nonterminal and terminal symbols.  Gen-
#  eration specifications cause the generation of a specified number
#  of sentences from the language defined by a given nonterminal
#  symbol.  Grammar output specifications cause the definition of a
#  specified nonterminal or the entire current grammar to be written
#  to a given file.  Source specifications cause subsequent input to
#  be read from a specified file.
#  
#     In addition, any line beginning with # is considered to be a
#  comment, while any line beginning with = causes the rest of that
#  line to be used subsequently as a prompt to the user whenever rsg
#  is ready for input (there normally is no prompt). A line consist-
#  ing of a single = stops prompting.
#  
#  Productions: Examples of productions are:
#  
#          <expr>::=<term>|<term>+<expr>
#          <term>::=<elem>|<elem>*<term>
#          <elem>::=x|y|z|(<expr>)
#  
#  Productions may occur in any order. The definition for a nonter-
#  minal symbol can be changed by specifying a new production for
#  it.
#  
#     There are a number of special devices to facilitate the defin-
#  ition of grammars, including eight predefined, built-in nontermi-
#  nal symbols:
#     symbol   definition
#     <lb>     <
#     <rb>     >
#     <vb>     |
#     <nl>     newline
#     <>       empty string
#     <&lcase> any single lowercase letter
#     <&ucase> any single uppercase letter
#     <&digit> any single digit
#  
#  In addition, if the string between a < and a > begins and ends
#  with a single quotation mark, it stands for any single character
#  between the quotation marks. For example,
#  
#          <'xyz'>
#  
#  is equivalent to
#  
#          x|y|z
#  
#  Generation Specifications: A generation specification consists of
#  a nonterminal symbol followed by a nonnegative integer. An exam-
#  ple is
#  
#          <expr>10
#  
#  which specifies the generation of 10 <expr>s. If the integer is
#  omitted, it is assumed to be 1. Generated sentences are written
#  to standard output.
#  
#  Grammar Output Specifications: A grammar output specification
#  consists of a nonterminal symbol, followed by ->, followed by a
#  file name. Such a specification causes the current definition of
#  the nonterminal symbol to be written to the given file. If the
#  file is omitted, standard output is assumed. If the nonterminal
#  symbol is omitted, the entire grammar is written out. Thus,
#  
#          ->
#  
#  causes the entire grammar to be written to standard output.
#  
#  Source Specifications: A source specification consists of @ fol-
#  lowed by a file name.  Subsequent input is read from that file.
#  When an end of file is encountered, input reverts to the previous
#  file. Input files can be nested.
#  
#  Options: The following options are available:
#  
#       -s n Set the seed for random generation to n.
#  
#	-r   In the absence of -s, set the seed to 0 for repeatable
#	     results.  Otherwise the seed is set to a different value
#	     for each run (as far as this is possible). -r is equivalent
#	     to -s 0.
#
#       -l n Terminate generation if the number of symbols remaining
#            to be processed exceeds n. The default is limit is 1000.
#  
#       -t   Trace the generation of sentences. Trace output goes to
#            standard error output.
#  
#  Diagnostics: Syntactically erroneous input lines are noted but
#  are otherwise ignored.  Specifications for a file that cannot be
#  opened are noted and treated as erroneous.
#  
#     If an undefined nonterminal symbol is encountered during gen-
#  eration, an error message that identifies the undefined symbol is
#  produced, followed by the partial sentence generated to that
#  point. Exceeding the limit of symbols remaining to be generated
#  as specified by the -l option is handled similarly.
#  
#  Caveats: Generation may fail to terminate because of a loop in
#  the rewriting rules or, more seriously, because of the progres-
#  sive accumulation of nonterminal symbols. The latter problem can
#  be identified by using the -t option and controlled by using the
#  -l option. The problem often can be circumvented by duplicating
#  alternatives that lead to fewer rather than more nonterminal sym-
#  bols. For example, changing
#  
#          <term>::=<elem>|<elem>*<term>
#  
#  to
#  
#          <term>::=<elem>|<elem>|<elem>*<term>
#  
#  increases the probability of selecting <elem> from 1/2 to 2/3.
#  
#     There are many possible extensions to the program. One of the
#  most useful would be a way to specify the probability of select-
#  ing an alternative.
#  
############################################################################
#
#  Links: options, random
#
############################################################################

link options
link random

global defs, ifile, in, limit, prompt, tswitch

record nonterm(name)
record charset(chars)

procedure main(args)
   local line, plist, s, opts
					# procedures to try on input lines
   plist := [define,generate,grammar,source,comment,prompter,error]
   defs := table()			# table of definitions
   defs["lb"] := [["<"]]		# built-in definitions
   defs["rb"] := [[">"]]
   defs["vb"] := [["|"]]
   defs["nl"] := [["\n"]]
   defs[""] := [[""]]
   defs["&lcase"] := [[charset(&lcase)]]
   defs["&ucase"] := [[charset(&ucase)]]
   defs["&digit"] := [[charset(&digits)]]

   opts := options(args,"tl+s+r")
   limit := \opts["l"] | 1000
   tswitch := \opts["t"]
   &random := \opts["s"]
   if /opts["s"] & /opts["r"] then randomize()

   ifile := [&input]			# stack of input files
   prompt := ""
   while in := pop(ifile) do {		# process all files
      repeat {
         if *prompt ~= 0 then writes(prompt)
         line := read(in) | break
         while line[-1] == "\\" do line := line[1:-1] || read(in) | break
         (!plist)(line)
         }
      close(in)
      }
end

#  process alternatives
#
procedure alts(defn)
   local alist
   alist := []
   defn ? while put(alist,syms(tab(upto('|') | 0))) do move(1) | break
   return alist
end

#  look for comment
#
procedure comment(line)
   if line[1] == "#" then return
end

#  look for definition
#
procedure define(line)
   return line ?
      defs[(="<",tab(find(">::=")))] := (move(4),alts(tab(0)))
end

#  define nonterminal
#
procedure defnon(sym)
   local chars, name
   if sym ? {
      ="'" &
      chars := cset(tab(-1)) &
      ="'"
      }
   then return charset(chars)
   else return nonterm(sym)
end

#  note erroneous input line
#
procedure error(line)
   write("*** erroneous line:  ",line)
   return
end

#  generate sentences
#
procedure gener(goal)
   local pending, symbol
   pending := [nonterm(goal)]
   while symbol := get(pending) do {
      if \tswitch then
         write(&errout,symimage(symbol),listimage(pending))
      case type(symbol) of {
         "string":   writes(symbol)
         "charset":  writes(?symbol.chars)
         "nonterm":  {
            pending := ?\defs[symbol.name] ||| pending | {
               write(&errout,"*** undefined nonterminal:  <",symbol.name,">")
               break 
               }
            if *pending > \limit then {
               write(&errout,"*** excessive symbols remaining")
               break 
               }
            }
         }
      }
   write()
end

#  look for generation specification
#
procedure generate(line)
   local goal, count
   if line ? {
      ="<" &
      goal := tab(upto('>')) \ 1 &
      move(1) &
      count := (pos(0) & 1) | integer(tab(0))
      }
   then {
      every 1 to count do
         gener(goal)
      return
      }
   else fail
end

#  get right hand side of production
#
procedure getrhs(a)
   local rhs
   rhs := ""
   every rhs ||:= listimage(!a) || "|"
   return rhs[1:-1]
end

#  look for request to write out grammar
#
procedure grammar(line)
   local file, out, name
   if line ? {
      name := tab(find("->")) &
      move(2) &
      file := tab(0) &
      out := if *file = 0 then &output else {
         open(file,"w") | {
            write(&errout,"*** cannot open ",file)
            fail
            }
         }
      }
   then {
      (*name = 0) | (name[1] == "<" & name[-1] == ">") | fail
      pwrite(name,out)
      if *file ~= 0 then close(out)
      return
      }
   else fail
end

#  produce image of list of grammar symbols
#
procedure listimage(a)
   local s, x
   s := ""
   every x := !a do
      s ||:= symimage(x)
   return s
end

#  look for new prompt symbol
#
procedure prompter(line)
   if line[1] == "=" then {
      prompt := line[2:0]
      return
      }
end

#  write out grammar
#
procedure pwrite(name,ofile)
   local nt, a
   static builtin
   initial builtin := ["lb","rb","vb","nl","","&lcase","&ucase","&digit"]
   if *name = 0 then {
      a := sort(defs,3)
      while nt := get(a) do {
         if nt == !builtin then {
            get(a)
            next
            }
         write(ofile,"<",nt,">::=",getrhs(get(a)))
         }
      }
   else write(ofile,name,"::=",getrhs(\defs[name[2:-1]])) |
      write("*** undefined nonterminal:  ",name)
end

#  look for file with input
#
procedure source(line)
   local file, new

   return line ? {
      if ="@" then {
         new := open(file := tab(0)) | {
            write(&errout,"*** cannot open ",file)
            fail
            }
         push(ifile,in) &
         in := new
         return
         }
      }
end

#  produce string image of grammar symbol
#
procedure symimage(x)
   return case type(x) of {
      "string":   x
      "nonterm":  "<" || x.name || ">"
      "charset":  "<'" || x.chars || "'>"
      }
end

#  process the symbols in an alternative
#
procedure syms(alt)
   local slist
   static nonbrack
   initial nonbrack := ~'<'
   slist := []
   alt ? while put(slist,tab(many(nonbrack)) |
      defnon(2(="<",tab(upto('>')),move(1))))
   return slist
end