summaryrefslogtreecommitdiff
path: root/ipl/procs/dif.icn
diff options
context:
space:
mode:
Diffstat (limited to 'ipl/procs/dif.icn')
-rw-r--r--ipl/procs/dif.icn238
1 files changed, 238 insertions, 0 deletions
diff --git a/ipl/procs/dif.icn b/ipl/procs/dif.icn
new file mode 100644
index 0000000..ea57134
--- /dev/null
+++ b/ipl/procs/dif.icn
@@ -0,0 +1,238 @@
+############################################################################
+#
+# File: dif.icn
+#
+# Subject: Procedure to check for differences
+#
+# Author: Robert J. Alexander
+#
+# Date: August 14, 1996
+#
+############################################################################
+#
+# This file is in the public domain.
+#
+############################################################################
+#
+# dif(stream, compare, eof, group)
+# generates a sequence of differences between an arbitrary
+# number of input streams. Each result is returned as a list
+# of diff_recs, one for each input stream, with each diff_rec
+# containing a list of items that differ and their position
+# in the input stream.
+#
+# The diff_rec type is declared as:
+#
+# record diff_rec(pos,diffs)
+#
+# dif() fails if there are no differences, i.e. it produces an empty
+# result sequence.
+#
+############################################################################
+#
+# For example, if two input streams are:
+#
+# a b c d e f g h
+# a b d e f i j
+#
+# the output sequence would be:
+#
+# [diff_rec(3,[c]),diff_rec(3,[])]
+# [diff_rec(7,[g,h]),diff_rec(6,[i,j])
+#
+# The arguments to dif(stream,compare,eof,group) are:
+#
+# stream A list of data objects that represent input streams
+# from which dif will extract its input "records".
+# The elements can be of several different types which
+# result in different actions, as follows:
+#
+# Type Action
+# =========== =============================
+# file file is "read" to get records
+#
+# co-expression co-expression is activated to
+# get records
+#
+# list records are "gotten" (get()) from
+# the list
+#
+# diff_proc a record type defined in "dif" to
+# allow a procedure (or procedures)
+# suppled by dif's caller to be called
+# to get records. Diff_proc has two
+# fields, the procedure to call and the
+# argument to call it with. Its
+# definition looks like this:
+#
+# record diff_proc(proc,arg)
+#
+#
+# Optional arguments:
+#
+# compare Item comparison procedure -- succeeds if
+# "equal", otherwise fails (default is the
+# identity "===" comparison). The comparison
+# must allow for the fact that the eof object
+# (see next) might be an argument, and a pair of
+# eofs must compare equal.
+#
+# eof An object that is distinguishable from other
+# objects in the stream. Default is &null.
+#
+# group A procedure that is called with the current number
+# of unmatched items as its argument. It must
+# return the number of matching items required
+# for file synchronization to occur. Default is
+# the formula Trunc((2.0 * Log(M)) + 2.0) where
+# M is the number of unmatched items.
+#
+############################################################################
+
+invocable all
+
+record diff_rec(pos,diffs)
+record diff_proc(proc,arg)
+record diff_file(stream,queue)
+
+
+procedure dif(stream,compare,eof,group)
+ local f,linenbr,line,difflist,gf,i,j,k,l,m,n,x,test,
+ result,synclist,nsyncs,syncpoint
+ #
+ # Provide default arguments and initialize data.
+ #
+ /compare := proc("===",2)
+ /group := groupfactor
+ f := []
+ every put(f,diff_file(!stream,[]))
+ linenbr := list(*stream,0)
+ line := list(*stream)
+ test := list(*stream)
+ difflist := list(*stream)
+ every !difflist := []
+ #
+ # Loop to process all records of all input streams.
+ #
+ repeat {
+ #
+ # This is the "idle loop" where we spin until we find a discrepancy
+ # among the data streams. A line is read from each stream, with a
+ # check for eof on all streams. Then the line from the first
+ # stream is compared to the lines from all the others.
+ #
+ repeat {
+ every i := 1 to *stream do
+ line[i] := diffread(f[i]) | eof
+ if not (every x := !line do
+ (x === eof) | break) then break break
+ every !linenbr +:= 1
+ if (every x := !line[2:0] do
+ compare(x,line[1]) | break) then break
+ }
+ #
+ # Aha! We have found a difference. Create a difference list,
+ # one entry per stream, primed with the differing line we just found.
+ #
+ every i := 1 to *stream do
+ difflist[i] := [line[i]]
+ repeat {
+ #
+ # Add a new input line from each stream to the difference list.
+ # Then build lists of the subset of different lines we need to
+ # actually compare.
+ #
+ every i := 1 to *stream do
+ put(difflist[i],diffread(f[i]) | eof)
+ gf := group(*difflist[1])
+ every i := 1 to *stream do
+ test[i] := difflist[i][-gf:0]
+ #
+ # Create a "synchronization matrix", with a row and column for
+ # each input stream. The entries will be initially &null, then
+ # will be set to the synchronization position if sync is
+ # achieved between the two streams. Another list is created to
+ # keep track of how many syncs have been achieved for each stream.
+ #
+ j := *difflist[1] - gf + 1
+ synclist := list(*stream)
+ every !synclist := list(*stream)
+ every k := 1 to *stream do
+ synclist[k][k] := j
+ nsyncs := list(*stream,1)
+ #
+ # Loop through positions to start comparing lines. This set of
+ # nested loops will be exited when a stream achieves sync with
+ # all other streams.
+ #
+ every i := 1 to j do {
+ #
+ # Loop through all streams.
+ #
+ every k := 1 to *stream do {
+ #
+ # Loop through all streams.
+ #
+ every l := 1 to *stream do {
+ if /synclist[k][l] then { # avoid unnecessary comparisons
+ #
+ # Compare items of the test list to the differences list
+ # at all possible positions. If they compare, store the
+ # current position in the sync matrix and bump the count
+ # of streams sync'd to this stream. If all streams are in
+ # sync, exit all loops but the outer one.
+ #
+ m := i - 1
+ if not every n := 1 to gf do {
+ if not compare(test[k][n],difflist[l][m +:= 1]) then break
+ } then {
+ synclist[k][l] := i # store current position
+ if (nsyncs[k] +:= 1) = *stream then break break break break
+ }
+ }
+ }
+ }
+ }
+ }
+ #
+ # Prepare an output set. Since we have read the input streams past
+ # the point of synchronization, we must queue those lines before their
+ # input streams.
+ #
+ synclist := synclist[k]
+ result := list(*stream)
+ every i := 1 to *stream do {
+ j := synclist[i]
+ while difflist[i][j -:= 1] === eof # trim past eof
+ result[i] := diff_rec(linenbr[i],difflist[i][1:j + 1])
+ f[i].queue := difflist[i][synclist[i] + gf:0] ||| f[i].queue
+ linenbr[i] +:= synclist[i] + gf - 2
+ difflist[i] := []
+ }
+ suspend result
+ }
+end
+
+#
+# diffread() -- Read a line from an input stream.
+#
+procedure diffread(f)
+ local x
+ return get(f.queue) | case type(x := f.stream) of {
+ "file" | "window": read(x)
+ "co-expression": @x
+ "diff_proc": x.proc(x.arg)
+ "list": get(x)
+ }
+end
+
+#
+# groupfactor() -- Determine how many like lines we need to close
+# off a group of differences. This is the default routine -- the
+# caller may provide his own.
+#
+procedure groupfactor(m) # Compute: Trunc((2.0 * Log(m)) + 2.0)
+ m := string(m)
+ return 2 * *m + if m <<= "316227766"[1+:*m] then 0 else 1
+end
+